diff options
author | Iavor S. Diatchki <diatchki@galois.com> | 2007-12-21 11:39:47 -0800 |
---|---|---|
committer | Iavor S. Diatchki <diatchki@galois.com> | 2007-12-21 11:39:47 -0800 |
commit | b99baac33e68d5603d0aa9ef699460a7e6a15c1d (patch) | |
tree | f4b0a8bd1a49dec75caab397fddff2a194a0aa00 /MIME/Parse.hs |
Initial import.
Diffstat (limited to 'MIME/Parse.hs')
-rw-r--r-- | MIME/Parse.hs | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/MIME/Parse.hs b/MIME/Parse.hs new file mode 100644 index 0000000..06fffc6 --- /dev/null +++ b/MIME/Parse.hs @@ -0,0 +1,241 @@ +{- | + Module : MIME.Parse + Copyright : (c) 2006 + + Maintainer : tse-dev-team@galois.com + Stability : unstable + Portability : GHC + + Parsing MIME content. +-} +module MIME.Parse + ( parseMIMEBody + , parseMIMEType + ) where + +import MIME.Type +import MIME.Decode + +import Data.Char +import Data.Maybe +import Data.List +import Debug.Trace ( trace ) + +parseMIMEBody :: [(String,String)] -> String -> MIMEValue +parseMIMEBody headers_in body = + case mimeType mty of + Multipart{} -> fst (parseMultipart mty body) + Message{} -> fst (parseMultipart mty body) + _ -> MIMEValue mty (parseContentDisp headers) + (Single (processBody headers body)) + + where headers = [ (map toLower k,v) | (k,v) <- headers_in ] + mty = fromMaybe defaultType + (parseContentType =<< lookup "content-type" headers) +defaultType :: Type +defaultType = Type { mimeType = Text "plain" + , mimeParams = [("charset", "us-ascii")] + } + +parseContentDisp :: [(String,String)] -> Maybe Disposition +parseContentDisp headers = + (processDisp . dropFoldingWSP) =<< lookup "content-disposition" headers + where + processDisp "" = Nothing + processDisp xs = Just $ + case break (\ch -> isSpace ch || ch == ';') xs of + (as,"") -> Disposition { dispType = toDispType (map toLower as) + , dispParams = [] + } + (as,bs) -> Disposition { dispType = toDispType (map toLower as) + , dispParams = processParams (parseParams bs) + } + + processParams = map procP + where + procP (as,val) + | "name" == asl = Name val + | "filename" == asl = Filename val + | "creation-date" == asl = CreationDate val + | "modification-date" == asl = ModDate val + | "read-date" == asl = ReadDate val + | "size" == asl = Size val + | otherwise = OtherParam (map toLower as) val + where asl = map toLower as + + toDispType t = case t of + "inline" -> DispInline + "attachment" -> DispAttachment + "form-data" -> DispFormData + _ -> DispOther t + + +processBody :: [(String,String)] -> String -> String +processBody headers body = + case lookup "content-transfer-encoding" headers of + Nothing -> body + Just v -> decodeBody v body + +parseMIMEMessage :: String -> MIMEValue +parseMIMEMessage entity = + case parseHeaders entity of + (as,bs) -> parseMIMEBody as bs + +parseHeaders :: String -> ([(String,String)], String) +parseHeaders str = + case findFieldName "" str of + Left (nm, rs) -> parseFieldValue nm (dropFoldingWSP rs) + Right body -> ([],body) + where + findFieldName _acc "" = Right "" + findFieldName _acc ('\r':'\n':xs) = Right xs + findFieldName acc (':':xs) = Left (reverse (dropWhile isHSpace acc), xs) + findFieldName acc (x:xs) = findFieldName (x:acc) xs + + parseFieldValue nm xs = + case takeUntilCRLF xs of + (as,"") -> ([(nm,as)],"") + (as,bs) -> let (zs,ys) = parseHeaders bs in ((nm,as):zs,ys) + +parseMultipart :: Type -> String -> (MIMEValue, String) +parseMultipart mty body = + case lookup "boundary" (mimeParams mty) of + Nothing -> trace ("Multipart mime type, " ++ showType mty ++ + ", has no required boundary parameter. Defaulting to text/plain") $ + (MIMEValue defaultType Nothing (Single body), "") + Just bnd -> (MIMEValue mty Nothing (Multi vals), rs) + where (vals,rs) = splitMulti bnd body + +splitMulti :: String -> String -> ([MIMEValue], String) +splitMulti bnd body_in = + -- Note: we insert a CRLF if it looks as if the boundary string starts + -- right off the bat. No harm done if this turns out to be incorrect. + let body = case body_in of + '-':'-':_ -> ('\r':'\n':body_in) + _ -> body_in + in case untilMatch dashBoundary body of + Nothing -> ([],"") + Just ('-':'-':xs) -> ([],xs) + Just xs -> splitMulti1 (dropTrailer xs) + + where + dashBoundary = ("\r\n--" ++ bnd) + + splitMulti1 xs = + case matchUntil dashBoundary xs of + ("","") -> ([],"") + (as,"") -> ([parseMIMEMessage as],"") + (as,'-':'-':bs) -> ([parseMIMEMessage as], dropTrailer bs) + (as,bs) -> let (zs,ys) = splitMulti1 (dropTrailer bs) + in ((parseMIMEMessage as) : zs,ys) + + dropTrailer xs = + case dropWhile isHSpace xs of + '\r':'\n':xs1 -> xs1 + xs1 -> xs1 -- hmm, flag an error? + +parseMIMEType :: String -> Maybe Type +parseMIMEType = parseContentType + +parseContentType :: String -> Maybe Type +parseContentType str = + case break (=='/') (dropFoldingWSP str) of + (maj,_:minor) -> + case break (\ ch -> isHSpace ch || isTSpecial ch) minor of + (as,bs) -> + Just Type { mimeType = toType maj as + , mimeParams = parseParams (dropWhile isHSpace bs) + } + _ -> trace ("unable to parse content-type: " ++ show str) $ Nothing + where + toType a b = case lookup (map toLower a) mediaTypes of + Just ctor -> ctor b + _ -> Other a b + + +parseParams :: String -> [(String,String)] +parseParams "" = [] +parseParams (';':xs) = + case break (=='=') (dropFoldingWSP xs) of + (nm,_:vs) -> + case vs of + '"':vs1 -> + case break (=='"') vs1 of + (val,"") -> [(nm,val)] + (val,_:zs) -> (nm,val):parseParams (dropWhile isHSpace zs) + _ -> case break (\ ch -> isHSpace ch || isTSpecial ch) vs of + (val,zs) -> (nm,val):parseParams (dropWhile isHSpace zs) + _ -> [] + +parseParams cs = trace ("curious: " ++ show cs) [] + +mediaTypes :: [(String, String -> MIMEType)] +mediaTypes = + [ ("multipart", (Multipart . toMultipart)) + , ("application", Application) + , ("audio", Audio) + , ("image", Image) + , ("message", Message) + , ("model", Model) + , ("text", Text) + , ("video", Video) + ] + where toMultipart b = fromMaybe other (lookup (map toLower b) multipartTypes) + where other = case b of + 'x':'-':_ -> Extension b + _ -> OtherMulti b + + +multipartTypes :: [(String, Multipart)] +multipartTypes = + [ ("alternative", Alternative) + , ("byteranges", Byteranges) + , ("digest", Digest) + , ("encrypted", Encrypted) + , ("form-data", FormData) + , ("mixed", Mixed) + , ("parallel", Parallel) + , ("related", Related) + , ("signed", Signed) + ] + + +untilMatch :: String -> String -> Maybe String +untilMatch str xs = go str xs + where go "" rs = Just rs + go _ "" = Nothing + go (a:as) (b:bs) = if a == b then go as bs else go str bs + +matchUntil :: String -> String -> (String, String) +matchUntil _ "" = ("", "") +matchUntil str xs + -- slow, but it'll do for now. + | str `isPrefixOf` xs = ("", drop (length str) xs) +matchUntil str (x:xs) = let (as,bs) = matchUntil str xs in (x:as,bs) + + + +isHSpace :: Char -> Bool +isHSpace c = c == ' ' || c == '\t' + +isTSpecial :: Char -> Bool +isTSpecial x = x `elem` "()<>@,;:\\\"/[]?=" + + +dropFoldingWSP :: String -> String +dropFoldingWSP "" = "" +dropFoldingWSP (x:xs) + | isHSpace x = dropFoldingWSP xs +dropFoldingWSP ('\r':'\n':x:xs) + | isHSpace x = dropFoldingWSP xs +dropFoldingWSP (x:xs) = x:xs + +takeUntilCRLF :: String -> (String, String) +takeUntilCRLF str = go "" str + where + go acc "" = (reverse (dropWhile isHSpace acc), "") + go acc ('\r':'\n':x:xs) + | isHSpace x = go (' ':acc) xs + | otherwise = (reverse (dropWhile isHSpace acc), x:xs) + go acc (x:xs) = go (x:acc) xs + |