-
Notifications
You must be signed in to change notification settings - Fork 41
/
UncurryFoldPartialData.hs
30 lines (27 loc) · 1.36 KB
/
UncurryFoldPartialData.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
{-# LANGUAGE DataKinds, FlexibleContexts, QuasiQuotes, TemplateHaskell, TypeApplications, TypeOperators #-}
module UncurryFoldPartialData where
import qualified Control.Foldl as L
import Data.Maybe (isNothing)
import Data.Vinyl.XRec (toHKD)
import Frames
import Pipes (Producer, (>->))
import qualified Pipes.Prelude as P
-- Data set from http://vincentarelbundock.github.io/Rdatasets/datasets.html
-- The prestige column has been left blank for rows whose "type" is
-- listed as "NA".
tableTypes "Row" "test/data/prestigePartial.csv"
-- | A pipes 'Producer' of our 'Row' type with a column functor of
-- 'Maybe'. That is, each element of each row may have failed to parse
-- from the CSV file.
maybeRows :: MonadSafe m => Producer (Rec (Maybe :. ElField) (RecordColumns Row)) m ()
maybeRows = readTableMaybe "test/data/prestigePartial.csv"
-- | Return the number of rows with unknown prestige, and the average
-- income of those rows.
incomeOfUnknownPrestige :: IO (Int, Double)
incomeOfUnknownPrestige =
runSafeEffect . L.purely P.fold avg $
maybeRows >-> P.filter prestigeUnknown >-> P.map getIncome >-> P.concat
where avg = (\s l -> (l, s / fromIntegral l)) <$> L.sum <*> L.length
getIncome = fmap fromIntegral . toHKD . rget @Income
prestigeUnknown :: Rec (Maybe :. ElField) (RecordColumns Row) -> Bool
prestigeUnknown = isNothing . toHKD . rget @Prestige