summaryrefslogtreecommitdiffstats
path: root/src/Much/ParseMail.hs
diff options
context:
space:
mode:
authorKierán Meinhardt <kieran.meinhardt@gmail.com>2020-09-23 17:44:40 +0200
committerKierán Meinhardt <kieran.meinhardt@gmail.com>2020-09-23 17:44:40 +0200
commit8e92e6e11d2b3b0bfb5ac9d68f347219493e6380 (patch)
tree6484ca42d85ca89475e922f7b45039c116ebbf97 /src/Much/ParseMail.hs
parent6a6ad3aecd53ffd89101a0dee2b4ea576d4964d4 (diff)
split into library + executables
Diffstat (limited to 'src/Much/ParseMail.hs')
-rw-r--r--src/Much/ParseMail.hs312
1 files changed, 312 insertions, 0 deletions
diff --git a/src/Much/ParseMail.hs b/src/Much/ParseMail.hs
new file mode 100644
index 0000000..e12737a
--- /dev/null
+++ b/src/Much/ParseMail.hs
@@ -0,0 +1,312 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Much.ParseMail (readMail) where
+
+import qualified Data.Attoparsec.ByteString.Char8 as A8
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as BS8
+import qualified Data.ByteString.Lazy as LBS
+import qualified Data.CaseInsensitive as CI
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import qualified Data.Text.Lazy as LT
+import qualified Data.Text.Lazy.Encoding as LT
+import qualified Network.Email.Header.Parser as P
+import qualified Network.Email.Header.Types as H
+import qualified Network.Mail.Mime as M
+import Codec.MIME.Parse
+import qualified Codec.MIME.QuotedPrintable as QP
+import Codec.MIME.Type
+import Control.Applicative
+import Data.Char
+
+
+
+-- TODO eventually we want our completely own Address, i.e. w/o M.Address
+data Address = Mailbox M.Address | Group T.Text [M.Address]
+ deriving (Show)
+
+
+
+readMail :: T.Text -> M.Mail
+readMail =
+ fromMIMEValue . parseMIMEMessage
+
+
+fromMIMEValue :: MIMEValue -> M.Mail
+fromMIMEValue val0 =
+ let m = foldr f (M.emptyMail $ M.Address Nothing "anonymous@localhost")
+ $ fromMIMEParams
+ $ mime_val_headers val0
+ in m { M.mailParts = [part val0] }
+ where
+
+ part val =
+ case mime_val_content val of
+ Single content ->
+ (:[]) $
+ M.Part
+ -- TODO actually check if we're utf-8 or ascii(?)
+ { M.partType = "text/plain; charset=utf-8"
+ , M.partEncoding = M.QuotedPrintableText
+ , M.partFilename = Nothing
+ , M.partHeaders = []
+ , M.partContent = LT.encodeUtf8 $ LT.fromStrict content
+ }
+ Multi vals ->
+ concatMap part vals
+
+ --f :: H.Header -> M.Mail -> M.Mail
+ f (k, v) m = case k of
+ "from" ->
+ m { M.mailFrom = case parseAddress (LBS.toStrict v) of
+ Left msg -> error msg
+ Right Nothing -> M.mailFrom m
+ Right (Just (Mailbox a)) -> a
+ Right (Just (Group _ _)) ->
+ error "cannot use group in from header"
+ }
+ "to" ->
+ m { M.mailTo =
+ mconcat $
+ map (\case
+ Mailbox a -> [a]
+ Group _ as -> as
+ ) $
+ either error id $
+ parseAddresses $
+ LBS.toStrict v
+ }
+ "cc" ->
+ m { M.mailCc =
+ mconcat $
+ map (\case
+ Mailbox a -> [a]
+ Group _ as -> as
+ ) $
+ either error id $
+ parseAddresses $
+ LBS.toStrict v
+ }
+ "bcc" ->
+ m { M.mailBcc =
+ mconcat $
+ map (\case
+ Mailbox a -> [a]
+ Group _ as -> as
+ ) $
+ either error id $
+ parseAddresses $
+ LBS.toStrict v
+ }
+ _ ->
+ m { M.mailHeaders =
+ ( CI.original k
+ , either
+ (const "I am made of stupid")
+ LT.toStrict
+ (LT.decodeUtf8' v)
+ ) :
+ M.mailHeaders m
+ }
+
+
+parseAddress :: BS.ByteString -> Either String (Maybe Address)
+parseAddress =
+ A8.parseOnly (P.cfws *> (Just <$> address <|> return Nothing) <* A8.endOfInput)
+
+
+parseAddresses :: BS.ByteString -> Either String [Address]
+parseAddresses =
+ A8.parseOnly (P.cfws *> address `A8.sepBy1` A8.char ',' <* A8.endOfInput)
+
+
+fromMIMEParams :: [MIMEParam] -> H.Headers
+fromMIMEParams =
+ map $ \(MIMEParam k v) ->
+ (CI.mk $ T.encodeUtf8 $ CI.original k, LT.encodeUtf8 $ LT.fromStrict v)
+
+
+-- TODO we should probably use email-header
+
+
+-- address = mailbox ; one addressee
+-- / group ; named list
+address :: A8.Parser Address
+address =
+ (A8.<?> "address") $
+ Mailbox <$> mailbox
+ <|>
+ group
+
+
+-- group = phrase ":" [#mailbox] ";"
+group :: A8.Parser Address
+group =
+ (A8.<?> "group") $
+ Group
+ <$> T.intercalate "," <$> phrase
+ <* A8.char ':'
+ <*> mailbox `A8.sepBy` A8.many1 (A8.char ',')
+ <* A8.char ';'
+
+
+-- mailbox = addr-spec ; simple address
+-- / phrase route-addr ; name & addr-spec
+mailbox :: A8.Parser M.Address
+mailbox =
+ (A8.<?> "mailbox") $
+ M.Address Nothing <$> addrSpec <|>
+ M.Address . Just . T.intercalate " " <$> A8.option [] phrase <*> routeAddr
+
+
+-- route-addr = "<" [route] addr-spec ">"
+routeAddr :: A8.Parser T.Text
+routeAddr =
+ (A8.<?> "routeAddr") $
+ P.cfws *>
+ A8.char '<' *>
+ -- TODO A8.option [] route <*>
+ addrSpec <*
+ A8.char '>'
+
+
+---- route = 1#("@" domain) ":" ; path-relative
+--route :: A8.Parser [T.Text]
+--route =
+-- (A8.<?> "route") $
+-- A8.many1 (A8.char '@' *> domain) <* A8.char ':'
+
+
+-- addr-spec = local-part "@" domain ; global address
+addrSpec :: A8.Parser T.Text
+addrSpec =
+ (A8.<?> "addrSpec") $ do
+ a <- localPart
+ b <- T.singleton <$> A8.char '@'
+ c <- domain
+ return $ a <> b <> c
+
+-- local-part = word *("." word) ; uninterpreted
+-- ; case-preserved
+localPart :: A8.Parser T.Text
+localPart =
+ (A8.<?> "localPart") $
+ T.intercalate "." <$> (word `A8.sepBy1` A8.char '.')
+
+
+-- domain = sub-domain *("." sub-domain)
+domain :: A8.Parser T.Text
+domain =
+ (A8.<?> "domain") $
+ T.intercalate "." <$> (subDomain `A8.sepBy1` A8.char '.')
+
+-- sub-domain = domain-ref / domain-literal
+subDomain :: A8.Parser T.Text
+subDomain =
+ (A8.<?> "subDomain") $
+ domainRef <|> domainLiteral
+
+-- domain-ref = atom ; symbolic reference
+domainRef :: A8.Parser T.Text
+domainRef =
+ (A8.<?> "domainRef") $
+ atom
+
+
+-- atom = 1*<any CHAR except specials, SPACE and CTLs>
+atom :: A8.Parser T.Text
+atom =
+ (A8.<?> "atom") $
+ P.cfws *>
+ (T.pack <$> A8.many1 (A8.satisfy $ A8.notInClass atomClass))
+
+
+-- domain-literal = "[" *(dtext / quoted-pair) "]"
+domainLiteral :: A8.Parser T.Text
+domainLiteral =
+ (A8.<?> "domainLiteral") $
+ T.pack <$>
+ (A8.char '[' *> A8.many' (dtext <|> quotedPair) <* A8.char ']')
+
+
+-- dtext = <any CHAR excluding "[", ; => may be folded
+-- "]", "\" & CR, & including
+-- linear-white-space>
+dtext :: A8.Parser Char
+dtext =
+ (A8.<?> "dtext") $
+ A8.satisfy (A8.notInClass "[]\\\CR")
+
+
+-- phrase = 1*word
+phrase :: A8.Parser [T.Text]
+phrase =
+ (A8.<?> "phrase") $
+ A8.many1 word
+
+
+-- qtext = <any CHAR excepting <">, ; => may be folded
+-- "\" & CR, and including
+-- linear-white-space>
+qtext :: A8.Parser Char
+qtext =
+ (A8.<?> "qtext") $
+ A8.satisfy (A8.notInClass "\"\\\CR")
+
+
+-- quoted-pair = "\" CHAR
+quotedPair :: A8.Parser Char
+quotedPair =
+ (A8.<?> "quotedPair") $
+ A8.char '\\' *> A8.anyChar
+
+
+-- quoted-string = <"> *(qtext/quoted-pair) <">; Regular qtext or
+-- ; quoted chars.
+quotedString :: A8.Parser T.Text
+quotedString =
+ (A8.<?> "quotedString") $
+ T.pack <$> (A8.char '"' *> A8.many' (qtext <|> quotedPair) <* A8.char '"')
+
+
+encodedWord :: A8.Parser T.Text
+encodedWord =
+ (A8.<?> "encodedWord") $ do
+ _ <- A8.string "=?"
+ _ <- A8.string "utf-8" -- TODO 1. CI, 2. other encodings
+ _ <- A8.string "?Q?"
+ w <- A8.manyTill A8.anyChar (A8.string "?=") -- TODO all of them
+ return
+ $ T.decodeUtf8
+ $ BS8.pack
+ $ QP.decode
+ -- ^ TODO this current doesn't decode
+ -- underscore to space
+ $ map (\c -> if c == '_' then ' ' else c)
+ $ w
+
+
+-- word = encoded-word / atom / quoted-string
+-- ^ TODO what's the correct term for that?
+word :: A8.Parser T.Text
+word =
+ (A8.<?> "word") $
+ encodedWord <|> atom <|> quotedString
+
+
+atomClass :: [Char]
+atomClass = specialClass ++ spaceClass ++ ctlClass
+
+
+specialClass :: [Char]
+specialClass = "()<>@,;:\\\".[]"
+
+
+spaceClass :: [Char]
+spaceClass = " "
+
+
+ctlClass :: [Char]
+ctlClass = map chr $ [0..31] ++ [127]