summaryrefslogtreecommitdiffstats
path: root/Codec/MIME
diff options
context:
space:
mode:
Diffstat (limited to 'Codec/MIME')
-rw-r--r--Codec/MIME/Base64.hs141
-rw-r--r--Codec/MIME/Decode.hs56
-rw-r--r--Codec/MIME/Parse.hs241
-rw-r--r--Codec/MIME/QuotedPrintable.hs12
-rw-r--r--Codec/MIME/Type.hs166
-rw-r--r--Codec/MIME/Utils.hs30
6 files changed, 646 insertions, 0 deletions
diff --git a/Codec/MIME/Base64.hs b/Codec/MIME/Base64.hs
new file mode 100644
index 0000000..848e034
--- /dev/null
+++ b/Codec/MIME/Base64.hs
@@ -0,0 +1,141 @@
+{- |
+
+ Module : MIME.Parse
+ Copyright : (c) 2006
+
+ Maintainer :
+ Stability : unstable
+ Portability : GHC
+
+ Base64 decoding and encoding routines.
+-}
+module MIME.Base64
+ ( encodeRaw -- :: Bool -> String -> [Word8]
+ , encodeRawString -- :: Bool -> String -> String
+ , encodeRawPrim -- :: Bool -> Char -> Char -> [Word8] -> String
+
+ , formatOutput -- :: Int -> Maybe String -> String -> String
+
+ , decode -- :: String -> [Word8]
+ , decodeToString -- :: String -> String
+ , decodePrim -- :: Char -> Char -> String -> [Word8]
+ ) where
+
+import Data.Bits
+import Data.Char
+import Data.Word
+import Data.Maybe
+
+encodeRawString :: Bool -> String -> String
+encodeRawString trail xs = encodeRaw trail (map (fromIntegral.ord) xs)
+
+-- | 'formatOutput n mbLT str' formats 'str', splitting it
+-- into lines of length 'n'. The optional value lets you control what
+-- line terminator sequence to use; the default is CRLF (as per MIME.)
+formatOutput :: Int -> Maybe String -> String -> String
+formatOutput n mbTerm str
+ | n <= 0 = error ("formatOutput: negative line length " ++ show n)
+ | otherwise = chop n str
+ where
+ crlf :: String
+ crlf = fromMaybe "\r\n" mbTerm
+
+ chop _ "" = ""
+ chop i xs =
+ case splitAt i xs of
+ (as,"") -> as
+ (as,bs) -> as ++ crlf ++ chop i bs
+
+encodeRaw :: Bool -> [Word8] -> String
+encodeRaw trail bs = encodeRawPrim trail '+' '/' bs
+
+-- lets you control what non-alphanum characters to use
+-- (The base64url variation uses '*' and '-', for instance.)
+-- No support for mapping these to multiple characters in the output though.
+encodeRawPrim :: Bool -> Char -> Char -> [Word8] -> String
+encodeRawPrim trail ch62 ch63 ls = encoder ls
+ where
+ trailer xs ys
+ | not trail = xs
+ | otherwise = xs ++ ys
+ f = fromB64 ch62 ch63
+ encoder [] = []
+ encoder [x] = trailer (take 2 (encode3 f x 0 0 "")) "=="
+ encoder [x,y] = trailer (take 3 (encode3 f x y 0 "")) "="
+ encoder (x:y:z:ws) = encode3 f x y z (encoder ws)
+
+encode3 :: (Word8 -> Char) -> Word8 -> Word8 -> Word8 -> String -> String
+encode3 f a b c rs =
+ f (low6 (w24 `shiftR` 18)) :
+ f (low6 (w24 `shiftR` 12)) :
+ f (low6 (w24 `shiftR` 6)) :
+ f (low6 w24) : rs
+ where
+ w24 :: Word32
+ w24 = (fromIntegral a `shiftL` 16) +
+ (fromIntegral b `shiftL` 8) +
+ fromIntegral c
+
+decodeToString :: String -> String
+decodeToString str = map (chr.fromIntegral) $ decode str
+
+decode :: String -> [Word8]
+decode str = decodePrim '+' '/' str
+
+decodePrim :: Char -> Char -> String -> [Word8]
+decodePrim ch62 ch63 str = decoder $ takeUntilEnd str
+ where
+ takeUntilEnd "" = []
+ takeUntilEnd ('=':_) = []
+ takeUntilEnd (x:xs) =
+ case toB64 ch62 ch63 x of
+ Nothing -> takeUntilEnd xs
+ Just b -> b : takeUntilEnd xs
+
+decoder :: [Word8] -> [Word8]
+decoder [] = []
+decoder [x] = take 1 (decode4 x 0 0 0 [])
+decoder [x,y] = take 1 (decode4 x y 0 0 []) -- upper 4 bits of second val are known to be 0.
+decoder [x,y,z] = take 2 (decode4 x y z 0 [])
+decoder (x:y:z:w:xs) = decode4 x y z w (decoder xs)
+
+decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> [Word8] -> [Word8]
+decode4 a b c d rs =
+ (lowByte (w24 `shiftR` 16)) :
+ (lowByte (w24 `shiftR` 8)) :
+ (lowByte w24) : rs
+ where
+ w24 :: Word32
+ w24 =
+ (fromIntegral a) `shiftL` 18 .|.
+ (fromIntegral b) `shiftL` 12 .|.
+ (fromIntegral c) `shiftL` 6 .|.
+ (fromIntegral d)
+
+toB64 :: Char -> Char -> Char -> Maybe Word8
+toB64 a b ch
+ | ch >= 'A' && ch <= 'Z' = Just (fromIntegral (ord ch - ord 'A'))
+ | ch >= 'a' && ch <= 'z' = Just (26 + fromIntegral (ord ch - ord 'a'))
+ | ch >= '0' && ch <= '9' = Just (52 + fromIntegral (ord ch - ord '0'))
+ | ch == a = Just 62
+ | ch == b = Just 63
+ | otherwise = Nothing
+
+fromB64 :: Char -> Char -> Word8 -> Char
+fromB64 ch62 ch63 x
+ | x < 26 = chr (ord 'A' + xi)
+ | x < 52 = chr (ord 'a' + (xi-26))
+ | x < 62 = chr (ord '0' + (xi-52))
+ | x == 62 = ch62
+ | x == 63 = ch63
+ | otherwise = error ("fromB64: index out of range " ++ show x)
+ where
+ xi :: Int
+ xi = fromIntegral x
+
+low6 :: Word32 -> Word8
+low6 x = fromIntegral (x .&. 0x3f)
+
+lowByte :: Word32 -> Word8
+lowByte x = (fromIntegral x) .&. 0xff
+
diff --git a/Codec/MIME/Decode.hs b/Codec/MIME/Decode.hs
new file mode 100644
index 0000000..f23454a
--- /dev/null
+++ b/Codec/MIME/Decode.hs
@@ -0,0 +1,56 @@
+module MIME.Decode where
+
+import Data.Char
+import MIME.QuotedPrintable as QP
+import MIME.Base64 as Base64
+
+decodeBody :: String -> String -> String
+decodeBody enc body =
+ case map toLower enc of
+ "base64" -> map (chr.fromIntegral) $ Base64.decode body
+ "quoted-printable" -> QP.decode body
+ _ -> body
+
+-- Decoding of RFC 2047's "encoded-words' production
+-- (as used in email-headers and some HTTP header cases
+-- (AtomPub's Slug: header))
+decodeWord :: String -> Maybe (String, String)
+decodeWord str =
+ case str of
+ '=':'?':xs ->
+ case dropLang $ break (\ch -> ch =='?' || ch == '*') xs of
+ (cs,_:x:'?':bs)
+ | isKnownCharset (map toLower cs) ->
+ case toLower x of
+ 'q' -> decodeQ cs (break (=='?') bs)
+ 'b' -> decodeB cs (break (=='?') bs)
+ _ -> Nothing
+ _ -> Nothing
+ _ -> Nothing
+ where
+ isKnownCharset cs = cs `elem` ["iso-8859-1", "us-ascii"]
+
+ -- ignore RFC 2231 extension of permitting a language tag to be supplied
+ -- after the charset.
+ dropLang (as,'*':bs) = (as,dropWhile (/='?') bs)
+ dropLang (as,bs) = (as,bs)
+
+ decodeQ cset (fs,'?':'=':rs) = Just (fromCharset cset (QP.decode fs),rs)
+ decodeQ _ _ = Nothing
+
+ decodeB cset (fs,'?':'=':rs) =
+ Just (fromCharset cset (Base64.decodeToString fs),rs)
+ decodeB _ _ = Nothing
+
+ fromCharset _cset cs = cs
+
+decodeWords :: String -> String
+decodeWords "" = ""
+decodeWords (x:xs)
+ | isSpace x = x : decodeWords xs
+ | otherwise =
+ case decodeWord (x:xs) of
+ Nothing -> x : decodeWords xs
+ Just (as,bs) -> as ++ decodeWords bs
+
+
diff --git a/Codec/MIME/Parse.hs b/Codec/MIME/Parse.hs
new file mode 100644
index 0000000..06fffc6
--- /dev/null
+++ b/Codec/MIME/Parse.hs
@@ -0,0 +1,241 @@
+{- |
+ Module : MIME.Parse
+ Copyright : (c) 2006
+
+ Maintainer : tse-dev-team@galois.com
+ Stability : unstable
+ Portability : GHC
+
+ Parsing MIME content.
+-}
+module MIME.Parse
+ ( parseMIMEBody
+ , parseMIMEType
+ ) where
+
+import MIME.Type
+import MIME.Decode
+
+import Data.Char
+import Data.Maybe
+import Data.List
+import Debug.Trace ( trace )
+
+parseMIMEBody :: [(String,String)] -> String -> MIMEValue
+parseMIMEBody headers_in body =
+ case mimeType mty of
+ Multipart{} -> fst (parseMultipart mty body)
+ Message{} -> fst (parseMultipart mty body)
+ _ -> MIMEValue mty (parseContentDisp headers)
+ (Single (processBody headers body))
+
+ where headers = [ (map toLower k,v) | (k,v) <- headers_in ]
+ mty = fromMaybe defaultType
+ (parseContentType =<< lookup "content-type" headers)
+defaultType :: Type
+defaultType = Type { mimeType = Text "plain"
+ , mimeParams = [("charset", "us-ascii")]
+ }
+
+parseContentDisp :: [(String,String)] -> Maybe Disposition
+parseContentDisp headers =
+ (processDisp . dropFoldingWSP) =<< lookup "content-disposition" headers
+ where
+ processDisp "" = Nothing
+ processDisp xs = Just $
+ case break (\ch -> isSpace ch || ch == ';') xs of
+ (as,"") -> Disposition { dispType = toDispType (map toLower as)
+ , dispParams = []
+ }
+ (as,bs) -> Disposition { dispType = toDispType (map toLower as)
+ , dispParams = processParams (parseParams bs)
+ }
+
+ processParams = map procP
+ where
+ procP (as,val)
+ | "name" == asl = Name val
+ | "filename" == asl = Filename val
+ | "creation-date" == asl = CreationDate val
+ | "modification-date" == asl = ModDate val
+ | "read-date" == asl = ReadDate val
+ | "size" == asl = Size val
+ | otherwise = OtherParam (map toLower as) val
+ where asl = map toLower as
+
+ toDispType t = case t of
+ "inline" -> DispInline
+ "attachment" -> DispAttachment
+ "form-data" -> DispFormData
+ _ -> DispOther t
+
+
+processBody :: [(String,String)] -> String -> String
+processBody headers body =
+ case lookup "content-transfer-encoding" headers of
+ Nothing -> body
+ Just v -> decodeBody v body
+
+parseMIMEMessage :: String -> MIMEValue
+parseMIMEMessage entity =
+ case parseHeaders entity of
+ (as,bs) -> parseMIMEBody as bs
+
+parseHeaders :: String -> ([(String,String)], String)
+parseHeaders str =
+ case findFieldName "" str of
+ Left (nm, rs) -> parseFieldValue nm (dropFoldingWSP rs)
+ Right body -> ([],body)
+ where
+ findFieldName _acc "" = Right ""
+ findFieldName _acc ('\r':'\n':xs) = Right xs
+ findFieldName acc (':':xs) = Left (reverse (dropWhile isHSpace acc), xs)
+ findFieldName acc (x:xs) = findFieldName (x:acc) xs
+
+ parseFieldValue nm xs =
+ case takeUntilCRLF xs of
+ (as,"") -> ([(nm,as)],"")
+ (as,bs) -> let (zs,ys) = parseHeaders bs in ((nm,as):zs,ys)
+
+parseMultipart :: Type -> String -> (MIMEValue, String)
+parseMultipart mty body =
+ case lookup "boundary" (mimeParams mty) of
+ Nothing -> trace ("Multipart mime type, " ++ showType mty ++
+ ", has no required boundary parameter. Defaulting to text/plain") $
+ (MIMEValue defaultType Nothing (Single body), "")
+ Just bnd -> (MIMEValue mty Nothing (Multi vals), rs)
+ where (vals,rs) = splitMulti bnd body
+
+splitMulti :: String -> String -> ([MIMEValue], String)
+splitMulti bnd body_in =
+ -- Note: we insert a CRLF if it looks as if the boundary string starts
+ -- right off the bat. No harm done if this turns out to be incorrect.
+ let body = case body_in of
+ '-':'-':_ -> ('\r':'\n':body_in)
+ _ -> body_in
+ in case untilMatch dashBoundary body of
+ Nothing -> ([],"")
+ Just ('-':'-':xs) -> ([],xs)
+ Just xs -> splitMulti1 (dropTrailer xs)
+
+ where
+ dashBoundary = ("\r\n--" ++ bnd)
+
+ splitMulti1 xs =
+ case matchUntil dashBoundary xs of
+ ("","") -> ([],"")
+ (as,"") -> ([parseMIMEMessage as],"")
+ (as,'-':'-':bs) -> ([parseMIMEMessage as], dropTrailer bs)
+ (as,bs) -> let (zs,ys) = splitMulti1 (dropTrailer bs)
+ in ((parseMIMEMessage as) : zs,ys)
+
+ dropTrailer xs =
+ case dropWhile isHSpace xs of
+ '\r':'\n':xs1 -> xs1
+ xs1 -> xs1 -- hmm, flag an error?
+
+parseMIMEType :: String -> Maybe Type
+parseMIMEType = parseContentType
+
+parseContentType :: String -> Maybe Type
+parseContentType str =
+ case break (=='/') (dropFoldingWSP str) of
+ (maj,_:minor) ->
+ case break (\ ch -> isHSpace ch || isTSpecial ch) minor of
+ (as,bs) ->
+ Just Type { mimeType = toType maj as
+ , mimeParams = parseParams (dropWhile isHSpace bs)
+ }
+ _ -> trace ("unable to parse content-type: " ++ show str) $ Nothing
+ where
+ toType a b = case lookup (map toLower a) mediaTypes of
+ Just ctor -> ctor b
+ _ -> Other a b
+
+
+parseParams :: String -> [(String,String)]
+parseParams "" = []
+parseParams (';':xs) =
+ case break (=='=') (dropFoldingWSP xs) of
+ (nm,_:vs) ->
+ case vs of
+ '"':vs1 ->
+ case break (=='"') vs1 of
+ (val,"") -> [(nm,val)]
+ (val,_:zs) -> (nm,val):parseParams (dropWhile isHSpace zs)
+ _ -> case break (\ ch -> isHSpace ch || isTSpecial ch) vs of
+ (val,zs) -> (nm,val):parseParams (dropWhile isHSpace zs)
+ _ -> []
+
+parseParams cs = trace ("curious: " ++ show cs) []
+
+mediaTypes :: [(String, String -> MIMEType)]
+mediaTypes =
+ [ ("multipart", (Multipart . toMultipart))
+ , ("application", Application)
+ , ("audio", Audio)
+ , ("image", Image)
+ , ("message", Message)
+ , ("model", Model)
+ , ("text", Text)
+ , ("video", Video)
+ ]
+ where toMultipart b = fromMaybe other (lookup (map toLower b) multipartTypes)
+ where other = case b of
+ 'x':'-':_ -> Extension b
+ _ -> OtherMulti b
+
+
+multipartTypes :: [(String, Multipart)]
+multipartTypes =
+ [ ("alternative", Alternative)
+ , ("byteranges", Byteranges)
+ , ("digest", Digest)
+ , ("encrypted", Encrypted)
+ , ("form-data", FormData)
+ , ("mixed", Mixed)
+ , ("parallel", Parallel)
+ , ("related", Related)
+ , ("signed", Signed)
+ ]
+
+
+untilMatch :: String -> String -> Maybe String
+untilMatch str xs = go str xs
+ where go "" rs = Just rs
+ go _ "" = Nothing
+ go (a:as) (b:bs) = if a == b then go as bs else go str bs
+
+matchUntil :: String -> String -> (String, String)
+matchUntil _ "" = ("", "")
+matchUntil str xs
+ -- slow, but it'll do for now.
+ | str `isPrefixOf` xs = ("", drop (length str) xs)
+matchUntil str (x:xs) = let (as,bs) = matchUntil str xs in (x:as,bs)
+
+
+
+isHSpace :: Char -> Bool
+isHSpace c = c == ' ' || c == '\t'
+
+isTSpecial :: Char -> Bool
+isTSpecial x = x `elem` "()<>@,;:\\\"/[]?="
+
+
+dropFoldingWSP :: String -> String
+dropFoldingWSP "" = ""
+dropFoldingWSP (x:xs)
+ | isHSpace x = dropFoldingWSP xs
+dropFoldingWSP ('\r':'\n':x:xs)
+ | isHSpace x = dropFoldingWSP xs
+dropFoldingWSP (x:xs) = x:xs
+
+takeUntilCRLF :: String -> (String, String)
+takeUntilCRLF str = go "" str
+ where
+ go acc "" = (reverse (dropWhile isHSpace acc), "")
+ go acc ('\r':'\n':x:xs)
+ | isHSpace x = go (' ':acc) xs
+ | otherwise = (reverse (dropWhile isHSpace acc), x:xs)
+ go acc (x:xs) = go (x:acc) xs
+
diff --git a/Codec/MIME/QuotedPrintable.hs b/Codec/MIME/QuotedPrintable.hs
new file mode 100644
index 0000000..514ce4e
--- /dev/null
+++ b/Codec/MIME/QuotedPrintable.hs
@@ -0,0 +1,12 @@
+module MIME.QuotedPrintable where
+
+import Data.Char
+
+decode :: String -> String
+decode "" = ""
+decode ('=':x1:x2:xs)
+ | isHexDigit x1 && isHexDigit x2 =
+ chr (digitToInt x1 * 16 + digitToInt x2) : decode xs
+decode ('=':xs) = '=':decode xs
+ -- make it explicit that we propagate other '=' occurrences.
+decode (x1:xs) = x1:decode xs
diff --git a/Codec/MIME/Type.hs b/Codec/MIME/Type.hs
new file mode 100644
index 0000000..e9266ec
--- /dev/null
+++ b/Codec/MIME/Type.hs
@@ -0,0 +1,166 @@
+{- |
+
+ Module : MIME.Type
+ Copyright : (c) 2006
+
+ Maintainer : tse-dev-team@galois.com
+ Stability : unstable
+ Portability : GHC
+
+ Representing MIME types and values.
+-}
+module MIME.Type where
+
+import Data.List ( concatMap, isSuffixOf )
+
+data Type
+ = Type
+ { mimeType :: MIMEType
+ , mimeParams :: [(String,String)]
+ } deriving ( Show, Ord, Eq )
+
+showType :: Type -> String
+showType t = showMIMEType (mimeType t) ++ showMIMEParams (mimeParams t)
+
+showMIMEParams :: [(String,String)] -> String
+showMIMEParams ps = concatMap showP ps
+ where
+ showP (a,b) = ';':a ++ '=':'"':b ++ "\""
+
+
+data MIMEType
+ = Application SubType
+ | Audio SubType
+ | Image SubType
+ | Message SubType
+ | Model SubType
+ | Multipart Multipart
+ | Text TextType
+ | Video SubType
+ | Other String SubType
+ deriving ( Show, Ord, Eq )
+
+showMIMEType :: MIMEType -> String
+showMIMEType t =
+ case t of
+ Application s -> "application/"++s
+ Audio s -> "audio/"++s
+ Image s -> "image/"++s
+ Message s -> "message/"++s
+ Model s -> "model/"++s
+ Multipart s -> "multipart/"++showMultipart s
+ Text s -> "text/"++s
+ Video s -> "video/"++s
+ Other a b -> a ++ '/':b
+
+-- | a (type, subtype) MIME pair.
+data MIMEPair
+ = MIMEPair String SubType
+ deriving ( Eq )
+
+showMIMEPair :: MIMEPair -> String
+showMIMEPair (MIMEPair a b) = a ++ '/':b
+
+-- | default subtype representation.
+type SubType = String
+
+-- | subtype for text content; currently just a string.
+type TextType = SubType
+
+subTypeString :: Type -> String
+subTypeString t =
+ case break (=='/') (showMIMEType (mimeType t)) of
+ (_,"") -> ""
+ (_,_:bs) -> bs
+
+majTypeString :: Type -> String
+majTypeString t =
+ case break (=='/') (showMIMEType (mimeType t)) of
+ (as,_) -> as
+
+data Multipart
+ = Alternative
+ | Byteranges
+ | Digest
+ | Encrypted
+ | FormData
+ | Mixed
+ | Parallel
+ | Related
+ | Signed
+ | Extension String -- ^ e.g., 'x-foo' (i.e., includes the 'x-' bit)
+ | OtherMulti String -- unrecognized\/uninterpreted.
+ -- (e.g., appledouble, voice-message, etc.)
+ deriving ( Show, Ord, Eq )
+
+isXmlBased :: Type -> Bool
+isXmlBased t =
+ case mimeType t of
+ Multipart{} -> False
+ _ -> "+xml" `isSuffixOf` subTypeString t
+
+isXmlType :: Type -> Bool
+isXmlType t = isXmlBased t ||
+ case mimeType t of
+ Application s -> s `elem` xml_media_types
+ Text s -> s `elem` xml_media_types
+ _ -> False
+ where
+ -- Note: xml-dtd isn't considered an XML type here.
+ xml_media_types :: [String]
+ xml_media_types =
+ [ "xml"
+ , "xml-external-parsed-entity"
+ ]
+
+
+showMultipart :: Multipart -> String
+showMultipart m =
+ case m of
+ Alternative -> "alternative"
+ Byteranges -> "byteranges"
+ Digest -> "digest"
+ Encrypted -> "encrypted"
+ FormData -> "form-data"
+ Mixed -> "mixed"
+ Parallel -> "parallel"
+ Related -> "related"
+ Signed -> "signed"
+ Extension e -> e
+ OtherMulti e -> e
+
+type Content = String
+
+data MIMEValue = MIMEValue {
+ mime_val_type :: Type,
+ mime_val_disp :: Maybe Disposition,
+ mime_val_content :: MIMEContent }
+ deriving ( Show, Eq )
+
+data MIMEContent
+ = Single Content
+ | Multi [MIMEValue]
+ deriving (Eq,Show)
+
+data Disposition
+ = Disposition
+ { dispType :: DispType
+ , dispParams :: [DispParam]
+ } deriving ( Show, Eq )
+
+data DispType
+ = DispInline
+ | DispAttachment
+ | DispFormData
+ | DispOther String
+ deriving ( Show, Eq)
+
+data DispParam
+ = Name String
+ | Filename String
+ | CreationDate String
+ | ModDate String
+ | ReadDate String
+ | Size String
+ | OtherParam String String
+ deriving ( Show, Eq)
diff --git a/Codec/MIME/Utils.hs b/Codec/MIME/Utils.hs
new file mode 100644
index 0000000..a5db2d9
--- /dev/null
+++ b/Codec/MIME/Utils.hs
@@ -0,0 +1,30 @@
+{- |
+ Module : MIME.Utils
+ Copyright : (c) 2007
+
+ Maintainer : tse-dev-team@galois.com
+ Stability : unstable
+ Portability : GHC
+
+ Extracting content from MIME values and types.
+-}
+module MIME.Utils
+ ( findMultipartNamed -- :: String -> MIMEValue -> Maybe MIMEValue
+ ) where
+
+import MIME.Type
+import Data.List ( find )
+import Control.Monad ( msum )
+
+-- | Given a parameter name, locate it within a MIME value,
+-- returning the corresponding (sub) MIME value.
+findMultipartNamed :: String -> MIMEValue -> Maybe MIMEValue
+findMultipartNamed nm mv =
+ case mime_val_content mv of
+ Multi ms -> msum (map (findMultipartNamed nm) ms)
+ Single {} -> do cd <- mime_val_disp mv
+ find (withDispName nm) (dispParams cd)
+ return mv
+ where withDispName a (Name b) = a == b
+ withDispName _ _ = False
+