diff options
author | Kierán Meinhardt <kieran.meinhardt@gmail.com> | 2020-09-23 17:44:40 +0200 |
---|---|---|
committer | Kierán Meinhardt <kieran.meinhardt@gmail.com> | 2020-09-23 17:44:40 +0200 |
commit | 8e92e6e11d2b3b0bfb5ac9d68f347219493e6380 (patch) | |
tree | 6484ca42d85ca89475e922f7b45039c116ebbf97 /src/Much/ParseMail.hs | |
parent | 6a6ad3aecd53ffd89101a0dee2b4ea576d4964d4 (diff) |
split into library + executables
Diffstat (limited to 'src/Much/ParseMail.hs')
-rw-r--r-- | src/Much/ParseMail.hs | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/src/Much/ParseMail.hs b/src/Much/ParseMail.hs new file mode 100644 index 0000000..e12737a --- /dev/null +++ b/src/Much/ParseMail.hs @@ -0,0 +1,312 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} + +module Much.ParseMail (readMail) where + +import qualified Data.Attoparsec.ByteString.Char8 as A8 +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BS8 +import qualified Data.ByteString.Lazy as LBS +import qualified Data.CaseInsensitive as CI +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Lazy as LT +import qualified Data.Text.Lazy.Encoding as LT +import qualified Network.Email.Header.Parser as P +import qualified Network.Email.Header.Types as H +import qualified Network.Mail.Mime as M +import Codec.MIME.Parse +import qualified Codec.MIME.QuotedPrintable as QP +import Codec.MIME.Type +import Control.Applicative +import Data.Char + + + +-- TODO eventually we want our completely own Address, i.e. w/o M.Address +data Address = Mailbox M.Address | Group T.Text [M.Address] + deriving (Show) + + + +readMail :: T.Text -> M.Mail +readMail = + fromMIMEValue . parseMIMEMessage + + +fromMIMEValue :: MIMEValue -> M.Mail +fromMIMEValue val0 = + let m = foldr f (M.emptyMail $ M.Address Nothing "anonymous@localhost") + $ fromMIMEParams + $ mime_val_headers val0 + in m { M.mailParts = [part val0] } + where + + part val = + case mime_val_content val of + Single content -> + (:[]) $ + M.Part + -- TODO actually check if we're utf-8 or ascii(?) + { M.partType = "text/plain; charset=utf-8" + , M.partEncoding = M.QuotedPrintableText + , M.partFilename = Nothing + , M.partHeaders = [] + , M.partContent = LT.encodeUtf8 $ LT.fromStrict content + } + Multi vals -> + concatMap part vals + + --f :: H.Header -> M.Mail -> M.Mail + f (k, v) m = case k of + "from" -> + m { M.mailFrom = case parseAddress (LBS.toStrict v) of + Left msg -> error msg + Right Nothing -> M.mailFrom m + Right (Just (Mailbox a)) -> a + Right (Just (Group _ _)) -> + error "cannot use group in from header" + } + "to" -> + m { M.mailTo = + mconcat $ + map (\case + Mailbox a -> [a] + Group _ as -> as + ) $ + either error id $ + parseAddresses $ + LBS.toStrict v + } + "cc" -> + m { M.mailCc = + mconcat $ + map (\case + Mailbox a -> [a] + Group _ as -> as + ) $ + either error id $ + parseAddresses $ + LBS.toStrict v + } + "bcc" -> + m { M.mailBcc = + mconcat $ + map (\case + Mailbox a -> [a] + Group _ as -> as + ) $ + either error id $ + parseAddresses $ + LBS.toStrict v + } + _ -> + m { M.mailHeaders = + ( CI.original k + , either + (const "I am made of stupid") + LT.toStrict + (LT.decodeUtf8' v) + ) : + M.mailHeaders m + } + + +parseAddress :: BS.ByteString -> Either String (Maybe Address) +parseAddress = + A8.parseOnly (P.cfws *> (Just <$> address <|> return Nothing) <* A8.endOfInput) + + +parseAddresses :: BS.ByteString -> Either String [Address] +parseAddresses = + A8.parseOnly (P.cfws *> address `A8.sepBy1` A8.char ',' <* A8.endOfInput) + + +fromMIMEParams :: [MIMEParam] -> H.Headers +fromMIMEParams = + map $ \(MIMEParam k v) -> + (CI.mk $ T.encodeUtf8 $ CI.original k, LT.encodeUtf8 $ LT.fromStrict v) + + +-- TODO we should probably use email-header + + +-- address = mailbox ; one addressee +-- / group ; named list +address :: A8.Parser Address +address = + (A8.<?> "address") $ + Mailbox <$> mailbox + <|> + group + + +-- group = phrase ":" [#mailbox] ";" +group :: A8.Parser Address +group = + (A8.<?> "group") $ + Group + <$> T.intercalate "," <$> phrase + <* A8.char ':' + <*> mailbox `A8.sepBy` A8.many1 (A8.char ',') + <* A8.char ';' + + +-- mailbox = addr-spec ; simple address +-- / phrase route-addr ; name & addr-spec +mailbox :: A8.Parser M.Address +mailbox = + (A8.<?> "mailbox") $ + M.Address Nothing <$> addrSpec <|> + M.Address . Just . T.intercalate " " <$> A8.option [] phrase <*> routeAddr + + +-- route-addr = "<" [route] addr-spec ">" +routeAddr :: A8.Parser T.Text +routeAddr = + (A8.<?> "routeAddr") $ + P.cfws *> + A8.char '<' *> + -- TODO A8.option [] route <*> + addrSpec <* + A8.char '>' + + +---- route = 1#("@" domain) ":" ; path-relative +--route :: A8.Parser [T.Text] +--route = +-- (A8.<?> "route") $ +-- A8.many1 (A8.char '@' *> domain) <* A8.char ':' + + +-- addr-spec = local-part "@" domain ; global address +addrSpec :: A8.Parser T.Text +addrSpec = + (A8.<?> "addrSpec") $ do + a <- localPart + b <- T.singleton <$> A8.char '@' + c <- domain + return $ a <> b <> c + +-- local-part = word *("." word) ; uninterpreted +-- ; case-preserved +localPart :: A8.Parser T.Text +localPart = + (A8.<?> "localPart") $ + T.intercalate "." <$> (word `A8.sepBy1` A8.char '.') + + +-- domain = sub-domain *("." sub-domain) +domain :: A8.Parser T.Text +domain = + (A8.<?> "domain") $ + T.intercalate "." <$> (subDomain `A8.sepBy1` A8.char '.') + +-- sub-domain = domain-ref / domain-literal +subDomain :: A8.Parser T.Text +subDomain = + (A8.<?> "subDomain") $ + domainRef <|> domainLiteral + +-- domain-ref = atom ; symbolic reference +domainRef :: A8.Parser T.Text +domainRef = + (A8.<?> "domainRef") $ + atom + + +-- atom = 1*<any CHAR except specials, SPACE and CTLs> +atom :: A8.Parser T.Text +atom = + (A8.<?> "atom") $ + P.cfws *> + (T.pack <$> A8.many1 (A8.satisfy $ A8.notInClass atomClass)) + + +-- domain-literal = "[" *(dtext / quoted-pair) "]" +domainLiteral :: A8.Parser T.Text +domainLiteral = + (A8.<?> "domainLiteral") $ + T.pack <$> + (A8.char '[' *> A8.many' (dtext <|> quotedPair) <* A8.char ']') + + +-- dtext = <any CHAR excluding "[", ; => may be folded +-- "]", "\" & CR, & including +-- linear-white-space> +dtext :: A8.Parser Char +dtext = + (A8.<?> "dtext") $ + A8.satisfy (A8.notInClass "[]\\\CR") + + +-- phrase = 1*word +phrase :: A8.Parser [T.Text] +phrase = + (A8.<?> "phrase") $ + A8.many1 word + + +-- qtext = <any CHAR excepting <">, ; => may be folded +-- "\" & CR, and including +-- linear-white-space> +qtext :: A8.Parser Char +qtext = + (A8.<?> "qtext") $ + A8.satisfy (A8.notInClass "\"\\\CR") + + +-- quoted-pair = "\" CHAR +quotedPair :: A8.Parser Char +quotedPair = + (A8.<?> "quotedPair") $ + A8.char '\\' *> A8.anyChar + + +-- quoted-string = <"> *(qtext/quoted-pair) <">; Regular qtext or +-- ; quoted chars. +quotedString :: A8.Parser T.Text +quotedString = + (A8.<?> "quotedString") $ + T.pack <$> (A8.char '"' *> A8.many' (qtext <|> quotedPair) <* A8.char '"') + + +encodedWord :: A8.Parser T.Text +encodedWord = + (A8.<?> "encodedWord") $ do + _ <- A8.string "=?" + _ <- A8.string "utf-8" -- TODO 1. CI, 2. other encodings + _ <- A8.string "?Q?" + w <- A8.manyTill A8.anyChar (A8.string "?=") -- TODO all of them + return + $ T.decodeUtf8 + $ BS8.pack + $ QP.decode + -- ^ TODO this current doesn't decode + -- underscore to space + $ map (\c -> if c == '_' then ' ' else c) + $ w + + +-- word = encoded-word / atom / quoted-string +-- ^ TODO what's the correct term for that? +word :: A8.Parser T.Text +word = + (A8.<?> "word") $ + encodedWord <|> atom <|> quotedString + + +atomClass :: [Char] +atomClass = specialClass ++ spaceClass ++ ctlClass + + +specialClass :: [Char] +specialClass = "()<>@,;:\\\".[]" + + +spaceClass :: [Char] +spaceClass = " " + + +ctlClass :: [Char] +ctlClass = map chr $ [0..31] ++ [127] |