diff options
Diffstat (limited to 'Codec/MIME/Parse.hs')
-rw-r--r-- | Codec/MIME/Parse.hs | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/Codec/MIME/Parse.hs b/Codec/MIME/Parse.hs new file mode 100644 index 0000000..f9dfeb2 --- /dev/null +++ b/Codec/MIME/Parse.hs @@ -0,0 +1,296 @@ +{-# LANGUAGE OverloadedStrings #-} +-------------------------------------------------------------------- +-- | +-- Module : Codec.MIME.Pare +-- Copyright : (c) 2006-2009, Galois, Inc. +-- License : BSD3 +-- +-- Maintainer: Sigbjorn Finne <sigbjorn.finne@gmail.com> +-- Stability : provisional +-- Portability: portable +-- +-- Parsing MIME content. +-- +-------------------------------------------------------------------- +module Codec.MIME.Parse + ( parseMIMEBody -- :: [(T.Text,T.Text)] -> T.Text -> MIMEValue + , parseMIMEType -- :: T.Text -> Maybe Type + , parseMIMEMessage -- :: T.Text -> MIMEValue + + , parseHeaders -- :: T.Text -> ([(T.Text,T.Text)], T.Text) + , parseMultipart -- :: Type -> T.Text -> (MIMEValue, T.Text) + , parseContentType -- :: T.Text -> Maybe Type + , splitMulti -- :: T.Text -> T.Text -> ([MIMEValue], T.Text) + , normalizeCRLF + ) where + +import Codec.MIME.Type +import Codec.MIME.Decode +import Control.Arrow(second) + +import Data.Char +import Data.Maybe +import qualified Data.List as L +import Debug.Trace ( trace ) +import qualified Data.Text as T +import Data.Monoid(Monoid(..), (<>)) + +enableTrace :: Bool +enableTrace = False + +doTrace :: String -> b -> b +doTrace | enableTrace = trace + | otherwise = \_ x -> x + + +parseMIMEBody :: [MIMEParam] -> T.Text -> MIMEValue +parseMIMEBody headers_in body = result { mime_val_headers = headers } + where + result = case mimeType mty of + Multipart{} -> fst (parseMultipart mty body) + Message{} -> fst (parseMultipart mty body) + _ -> nullMIMEValue { mime_val_type = mty + , mime_val_disp = parseContentDisp headers + , mime_val_content = Single (processBody headers body) + } + headers = [ MIMEParam (T.toLower k) v | (MIMEParam k v) <- headers_in ] + mty = fromMaybe defaultType + (parseContentType =<< lookupField "content-type" (paramPairs headers)) +defaultType :: Type +defaultType = Type { mimeType = Text "plain" + , mimeParams = [MIMEParam "charset" "us-ascii"] + } + +parseContentDisp :: [MIMEParam] -> Maybe Disposition +parseContentDisp headers = + (processDisp . dropFoldingWSP) =<< lookupField "content-disposition" (paramPairs headers) + where + processDisp t | T.null t = Nothing + | T.null bs = Just $ Disposition { dispType = toDispType (T.toLower as) + , dispParams = [] + } + | otherwise = Just $ Disposition { dispType = toDispType (T.toLower as) + , dispParams = processParams (parseParams bs) + } + where (as,bs) = T.break (\ch -> isSpace ch || ch == ';') t + + processParams = map procP + where + procP (MIMEParam as val) + | "name" == asl = Name val + | "filename" == asl = Filename val + | "creation-date" == asl = CreationDate val + | "modification-date" == asl = ModDate val + | "read-date" == asl = ReadDate val + | "size" == asl = Size val + | otherwise = OtherParam asl val + where asl = T.toLower as + + toDispType t = if t == "inline" then DispInline + else if t == "attachment" then DispAttachment + else if t == "form-data" then DispFormData + else DispOther t + +paramPairs :: [MIMEParam] -> [(T.Text, T.Text)] +paramPairs = map paramPair + where + paramPair (MIMEParam a b) = (a,b) + +processBody :: [MIMEParam] -> T.Text -> T.Text +processBody headers body = + case lookupField "content-transfer-encoding" $ paramPairs headers of + Nothing -> body + Just v -> T.pack $ decodeBody (T.unpack v) $ T.unpack body + +normalizeCRLF :: T.Text -> T.Text +normalizeCRLF t + | T.null t = "" + | "\r\n" `T.isPrefixOf` t = "\r\n" <> normalizeCRLF (T.drop 2 t) + | any (`T.isPrefixOf` t) ["\r", "\n"] = "\r\n" <> normalizeCRLF (T.drop 1 t) + | otherwise = let (a,b) = T.break (`elem` "\r\n") t in a <> normalizeCRLF b + +parseMIMEMessage :: T.Text -> MIMEValue +parseMIMEMessage entity = + case parseHeaders (normalizeCRLF entity) of + (as,bs) -> parseMIMEBody as bs + +parseHeaders :: T.Text -> ([MIMEParam], T.Text) +parseHeaders str = + case findFieldName "" str of + Left (nm, rs) -> parseFieldValue nm (dropFoldingWSP rs) + Right body -> ([],body) + where + findFieldName acc t + | T.null t = Right "" + | "\r\n" `T.isPrefixOf` t = Right $ T.drop 2 t + | ":" `T.isPrefixOf` t = Left (T.reverse $ T.dropWhile isHSpace acc, T.drop 1 t) + | otherwise = findFieldName (T.take 1 t <> acc) $ T.drop 1 t + + parseFieldValue nm xs + | T.null bs = ([MIMEParam nm as], "") + | otherwise = let (zs,ys) = parseHeaders bs in (MIMEParam nm as :zs, ys) + where + (as,bs) = takeUntilCRLF xs + +parseMultipart :: Type -> T.Text -> (MIMEValue, T.Text) +parseMultipart mty body = + case lookupField "boundary" (paramPairs $ mimeParams mty) of + Nothing -> doTrace ("Multipart mime type, " ++ T.unpack (showType mty) ++ + ", has no required boundary parameter. Defaulting to text/plain") $ + (nullMIMEValue{ mime_val_type = defaultType + , mime_val_disp = Nothing + , mime_val_content = Single body + }, "") + Just bnd -> (nullMIMEValue { mime_val_type = mty + , mime_val_disp = Nothing + , mime_val_content = Multi vals + }, rs) + where (vals,rs) = splitMulti bnd body + +splitMulti :: T.Text -> T.Text -> ([MIMEValue], T.Text) +splitMulti bnd body_in = + -- Note: we insert a CRLF if it looks as if the boundary string starts + -- right off the bat. No harm done if this turns out to be incorrect. + let body | "--" `T.isPrefixOf` body_in = "\r\n" <> body_in + | otherwise = body_in + in case untilMatch dashBoundary body of + Nothing -> mempty + Just xs | "--" `T.isPrefixOf` xs -> ([], T.drop 2 xs) + | otherwise -> splitMulti1 (dropTrailer xs) + + where + dashBoundary = ("\r\n--" <> bnd) + + splitMulti1 xs + | T.null as && T.null bs = ([], "") + | T.null bs = ([parseMIMEMessage as],"") + | T.isPrefixOf "--" bs = ([parseMIMEMessage as], dropTrailer bs) + | otherwise = let (zs,ys) = splitMulti1 (dropTrailer bs) + in ((parseMIMEMessage as) : zs,ys) + + where + (as,bs) = matchUntil dashBoundary xs + + dropTrailer xs + | "\r\n" `T.isPrefixOf` xs1 = T.drop 2 xs1 + | otherwise = xs1 -- hmm, flag an error? + where + xs1 = T.dropWhile isHSpace xs + +parseMIMEType :: T.Text -> Maybe Type +parseMIMEType = parseContentType + +parseContentType :: T.Text -> Maybe Type +parseContentType str + | T.null minor0 = doTrace ("unable to parse content-type: " ++ show str) $ Nothing + | otherwise = Just Type { mimeType = toType maj as + , mimeParams = parseParams (T.dropWhile isHSpace bs) + } + where + (maj, minor0) = T.break (=='/') (dropFoldingWSP str) + minor = T.drop 1 minor0 + (as, bs) = T.break (\ ch -> isHSpace ch || isTSpecial ch) minor + toType a b = case lookupField (T.toLower a) mediaTypes of + Just ctor -> ctor b + _ -> Other a b + +parseParams :: T.Text -> [MIMEParam] +parseParams t | T.null t = [] + | ';' == T.head t = let (nm_raw, vs0) = T.break (=='=') (dropFoldingWSP $ T.tail t) + nm = T.toLower nm_raw in + if T.null vs0 + then [] + else let vs = T.tail vs0 in + if not (T.null vs) && T.head vs == '"' + then let vs1 = T.tail vs + (val, zs0) = T.break (=='"') vs1 in + if T.null zs0 + then [MIMEParam nm val] + else MIMEParam nm val : parseParams (T.dropWhile isHSpace $ T.tail zs0) + else let (val, zs) = T.break (\ch -> isHSpace ch || isTSpecial ch) vs in + MIMEParam nm val : parseParams (T.dropWhile isHSpace zs) + | otherwise = doTrace ("Codec.MIME.Parse.parseParams: curious param value -- " ++ show t) [] + +mediaTypes :: [(T.Text, T.Text -> MIMEType)] +mediaTypes = + [ ("multipart", (Multipart . toMultipart)) + , ("application", Application) + , ("audio", Audio) + , ("image", Image) + , ("message", Message) + , ("model", Model) + , ("text", Text) + , ("video", Video) + ] + where toMultipart b = fromMaybe other (lookupField (T.toLower b) multipartTypes) + where other | T.isPrefixOf "x-" b = Extension b + | otherwise = OtherMulti b + +multipartTypes :: [(T.Text, Multipart)] +multipartTypes = + [ ("alternative", Alternative) + , ("byteranges", Byteranges) + , ("digest", Digest) + , ("encrypted", Encrypted) + , ("form-data", FormData) + , ("mixed", Mixed) + , ("parallel", Parallel) + , ("related", Related) + , ("signed", Signed) + ] + +untilMatch :: T.Text -> T.Text -> Maybe T.Text +untilMatch a b | T.null a = Just b + | T.null b = Nothing + | a `T.isPrefixOf` b = Just $ T.drop (T.length a) b + | otherwise = untilMatch a $ T.tail b + +matchUntil :: T.Text -> T.Text -> (T.Text, T.Text) +-- searching str; returning parts before str and after str +matchUntil str = second (T.drop $ T.length str) . T.breakOn str + +{- +matchUntil' :: T.Text -> T.Text -> (T.Text, T.Text) +matchUntil' _ "" = ("", "") +matchUntil' str xs + | T.null xs = mempty + -- slow, but it'll do for now. + | str `T.isPrefixOf` xs = ("", T.drop (T.length str) xs) + | otherwise = let (as,bs) = matchUntil' str $ T.tail xs in (T.take 1 xs <> as, bs) +-} + +isHSpace :: Char -> Bool +isHSpace c = c == ' ' || c == '\t' + +isTSpecial :: Char -> Bool +isTSpecial x = x `elem` "()<>@,;:\\\"/[]?=" -- " + +dropFoldingWSP :: T.Text -> T.Text +dropFoldingWSP t | T.null t = "" + | isHSpace (T.head t) = dropFoldingWSP $ T.tail t + | "\r\n" `T.isPrefixOf` t && not (T.null $ T.drop 2 t) && isHSpace (T.head $ T.drop 2 t) + = dropFoldingWSP $ T.drop 3 t + | otherwise = t + +takeUntilCRLF :: T.Text -> (T.Text, T.Text) +takeUntilCRLF str = go "" str + where + go acc t | T.null t = (T.reverse (T.dropWhile isHSpace acc), "") + | "\r\n" `T.isPrefixOf` t && not (T.null $ T.drop 2 t) && isHSpace (T.head $ T.drop 2 t) + = go (" " <> acc) (T.drop 3 t) + | "\r\n" `T.isPrefixOf` t && not (T.null $ T.drop 2 t) + = (T.reverse (T.dropWhile isHSpace acc), T.drop 2 t) + | otherwise = go (T.take 1 t <> acc) $ T.tail t + +-- case in-sensitive lookup of field names or attributes\/parameters. +lookupField :: T.Text -> [(T.Text,a)] -> Maybe a +lookupField n ns = + -- assume that inputs have been mostly normalized already + -- (i.e., lower-cased), but should the lookup fail fall back + -- to a second try where we do normalize before giving up. + case lookup n ns of + x@Just{} -> x + Nothing -> + let nl = T.toLower n in + fmap snd $ L.find ((nl==) . T.toLower . fst) ns + |