From 35f0f40cfabeb49b468c6ae3c68fedded145a022 Mon Sep 17 00:00:00 2001 From: Don Stewart Date: Fri, 4 Jan 2008 16:23:04 -0800 Subject: Move MIME stuff into proper Codec.* namespace. Add copyrights where missing. --- Codec/MIME/Base64.hs | 141 ++++++++++++++++++++++++ Codec/MIME/Decode.hs | 56 ++++++++++ Codec/MIME/Parse.hs | 241 ++++++++++++++++++++++++++++++++++++++++++ Codec/MIME/QuotedPrintable.hs | 12 +++ Codec/MIME/Type.hs | 166 +++++++++++++++++++++++++++++ Codec/MIME/Utils.hs | 30 ++++++ LICENSE | 2 +- MIME/Base64.hs | 141 ------------------------ MIME/Decode.hs | 56 ---------- MIME/Parse.hs | 241 ------------------------------------------ MIME/QuotedPrintable.hs | 12 --- MIME/Type.hs | 166 ----------------------------- MIME/Utils.hs | 30 ------ mime.cabal | 14 +-- 14 files changed, 655 insertions(+), 653 deletions(-) create mode 100644 Codec/MIME/Base64.hs create mode 100644 Codec/MIME/Decode.hs create mode 100644 Codec/MIME/Parse.hs create mode 100644 Codec/MIME/QuotedPrintable.hs create mode 100644 Codec/MIME/Type.hs create mode 100644 Codec/MIME/Utils.hs delete mode 100644 MIME/Base64.hs delete mode 100644 MIME/Decode.hs delete mode 100644 MIME/Parse.hs delete mode 100644 MIME/QuotedPrintable.hs delete mode 100644 MIME/Type.hs delete mode 100644 MIME/Utils.hs diff --git a/Codec/MIME/Base64.hs b/Codec/MIME/Base64.hs new file mode 100644 index 0000000..848e034 --- /dev/null +++ b/Codec/MIME/Base64.hs @@ -0,0 +1,141 @@ +{- | + + Module : MIME.Parse + Copyright : (c) 2006 + + Maintainer : + Stability : unstable + Portability : GHC + + Base64 decoding and encoding routines. +-} +module MIME.Base64 + ( encodeRaw -- :: Bool -> String -> [Word8] + , encodeRawString -- :: Bool -> String -> String + , encodeRawPrim -- :: Bool -> Char -> Char -> [Word8] -> String + + , formatOutput -- :: Int -> Maybe String -> String -> String + + , decode -- :: String -> [Word8] + , decodeToString -- :: String -> String + , decodePrim -- :: Char -> Char -> String -> [Word8] + ) where + +import Data.Bits +import Data.Char +import Data.Word +import Data.Maybe + +encodeRawString :: Bool -> String -> String +encodeRawString trail xs = encodeRaw trail (map (fromIntegral.ord) xs) + +-- | 'formatOutput n mbLT str' formats 'str', splitting it +-- into lines of length 'n'. The optional value lets you control what +-- line terminator sequence to use; the default is CRLF (as per MIME.) +formatOutput :: Int -> Maybe String -> String -> String +formatOutput n mbTerm str + | n <= 0 = error ("formatOutput: negative line length " ++ show n) + | otherwise = chop n str + where + crlf :: String + crlf = fromMaybe "\r\n" mbTerm + + chop _ "" = "" + chop i xs = + case splitAt i xs of + (as,"") -> as + (as,bs) -> as ++ crlf ++ chop i bs + +encodeRaw :: Bool -> [Word8] -> String +encodeRaw trail bs = encodeRawPrim trail '+' '/' bs + +-- lets you control what non-alphanum characters to use +-- (The base64url variation uses '*' and '-', for instance.) +-- No support for mapping these to multiple characters in the output though. +encodeRawPrim :: Bool -> Char -> Char -> [Word8] -> String +encodeRawPrim trail ch62 ch63 ls = encoder ls + where + trailer xs ys + | not trail = xs + | otherwise = xs ++ ys + f = fromB64 ch62 ch63 + encoder [] = [] + encoder [x] = trailer (take 2 (encode3 f x 0 0 "")) "==" + encoder [x,y] = trailer (take 3 (encode3 f x y 0 "")) "=" + encoder (x:y:z:ws) = encode3 f x y z (encoder ws) + +encode3 :: (Word8 -> Char) -> Word8 -> Word8 -> Word8 -> String -> String +encode3 f a b c rs = + f (low6 (w24 `shiftR` 18)) : + f (low6 (w24 `shiftR` 12)) : + f (low6 (w24 `shiftR` 6)) : + f (low6 w24) : rs + where + w24 :: Word32 + w24 = (fromIntegral a `shiftL` 16) + + (fromIntegral b `shiftL` 8) + + fromIntegral c + +decodeToString :: String -> String +decodeToString str = map (chr.fromIntegral) $ decode str + +decode :: String -> [Word8] +decode str = decodePrim '+' '/' str + +decodePrim :: Char -> Char -> String -> [Word8] +decodePrim ch62 ch63 str = decoder $ takeUntilEnd str + where + takeUntilEnd "" = [] + takeUntilEnd ('=':_) = [] + takeUntilEnd (x:xs) = + case toB64 ch62 ch63 x of + Nothing -> takeUntilEnd xs + Just b -> b : takeUntilEnd xs + +decoder :: [Word8] -> [Word8] +decoder [] = [] +decoder [x] = take 1 (decode4 x 0 0 0 []) +decoder [x,y] = take 1 (decode4 x y 0 0 []) -- upper 4 bits of second val are known to be 0. +decoder [x,y,z] = take 2 (decode4 x y z 0 []) +decoder (x:y:z:w:xs) = decode4 x y z w (decoder xs) + +decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> [Word8] -> [Word8] +decode4 a b c d rs = + (lowByte (w24 `shiftR` 16)) : + (lowByte (w24 `shiftR` 8)) : + (lowByte w24) : rs + where + w24 :: Word32 + w24 = + (fromIntegral a) `shiftL` 18 .|. + (fromIntegral b) `shiftL` 12 .|. + (fromIntegral c) `shiftL` 6 .|. + (fromIntegral d) + +toB64 :: Char -> Char -> Char -> Maybe Word8 +toB64 a b ch + | ch >= 'A' && ch <= 'Z' = Just (fromIntegral (ord ch - ord 'A')) + | ch >= 'a' && ch <= 'z' = Just (26 + fromIntegral (ord ch - ord 'a')) + | ch >= '0' && ch <= '9' = Just (52 + fromIntegral (ord ch - ord '0')) + | ch == a = Just 62 + | ch == b = Just 63 + | otherwise = Nothing + +fromB64 :: Char -> Char -> Word8 -> Char +fromB64 ch62 ch63 x + | x < 26 = chr (ord 'A' + xi) + | x < 52 = chr (ord 'a' + (xi-26)) + | x < 62 = chr (ord '0' + (xi-52)) + | x == 62 = ch62 + | x == 63 = ch63 + | otherwise = error ("fromB64: index out of range " ++ show x) + where + xi :: Int + xi = fromIntegral x + +low6 :: Word32 -> Word8 +low6 x = fromIntegral (x .&. 0x3f) + +lowByte :: Word32 -> Word8 +lowByte x = (fromIntegral x) .&. 0xff + diff --git a/Codec/MIME/Decode.hs b/Codec/MIME/Decode.hs new file mode 100644 index 0000000..f23454a --- /dev/null +++ b/Codec/MIME/Decode.hs @@ -0,0 +1,56 @@ +module MIME.Decode where + +import Data.Char +import MIME.QuotedPrintable as QP +import MIME.Base64 as Base64 + +decodeBody :: String -> String -> String +decodeBody enc body = + case map toLower enc of + "base64" -> map (chr.fromIntegral) $ Base64.decode body + "quoted-printable" -> QP.decode body + _ -> body + +-- Decoding of RFC 2047's "encoded-words' production +-- (as used in email-headers and some HTTP header cases +-- (AtomPub's Slug: header)) +decodeWord :: String -> Maybe (String, String) +decodeWord str = + case str of + '=':'?':xs -> + case dropLang $ break (\ch -> ch =='?' || ch == '*') xs of + (cs,_:x:'?':bs) + | isKnownCharset (map toLower cs) -> + case toLower x of + 'q' -> decodeQ cs (break (=='?') bs) + 'b' -> decodeB cs (break (=='?') bs) + _ -> Nothing + _ -> Nothing + _ -> Nothing + where + isKnownCharset cs = cs `elem` ["iso-8859-1", "us-ascii"] + + -- ignore RFC 2231 extension of permitting a language tag to be supplied + -- after the charset. + dropLang (as,'*':bs) = (as,dropWhile (/='?') bs) + dropLang (as,bs) = (as,bs) + + decodeQ cset (fs,'?':'=':rs) = Just (fromCharset cset (QP.decode fs),rs) + decodeQ _ _ = Nothing + + decodeB cset (fs,'?':'=':rs) = + Just (fromCharset cset (Base64.decodeToString fs),rs) + decodeB _ _ = Nothing + + fromCharset _cset cs = cs + +decodeWords :: String -> String +decodeWords "" = "" +decodeWords (x:xs) + | isSpace x = x : decodeWords xs + | otherwise = + case decodeWord (x:xs) of + Nothing -> x : decodeWords xs + Just (as,bs) -> as ++ decodeWords bs + + diff --git a/Codec/MIME/Parse.hs b/Codec/MIME/Parse.hs new file mode 100644 index 0000000..06fffc6 --- /dev/null +++ b/Codec/MIME/Parse.hs @@ -0,0 +1,241 @@ +{- | + Module : MIME.Parse + Copyright : (c) 2006 + + Maintainer : tse-dev-team@galois.com + Stability : unstable + Portability : GHC + + Parsing MIME content. +-} +module MIME.Parse + ( parseMIMEBody + , parseMIMEType + ) where + +import MIME.Type +import MIME.Decode + +import Data.Char +import Data.Maybe +import Data.List +import Debug.Trace ( trace ) + +parseMIMEBody :: [(String,String)] -> String -> MIMEValue +parseMIMEBody headers_in body = + case mimeType mty of + Multipart{} -> fst (parseMultipart mty body) + Message{} -> fst (parseMultipart mty body) + _ -> MIMEValue mty (parseContentDisp headers) + (Single (processBody headers body)) + + where headers = [ (map toLower k,v) | (k,v) <- headers_in ] + mty = fromMaybe defaultType + (parseContentType =<< lookup "content-type" headers) +defaultType :: Type +defaultType = Type { mimeType = Text "plain" + , mimeParams = [("charset", "us-ascii")] + } + +parseContentDisp :: [(String,String)] -> Maybe Disposition +parseContentDisp headers = + (processDisp . dropFoldingWSP) =<< lookup "content-disposition" headers + where + processDisp "" = Nothing + processDisp xs = Just $ + case break (\ch -> isSpace ch || ch == ';') xs of + (as,"") -> Disposition { dispType = toDispType (map toLower as) + , dispParams = [] + } + (as,bs) -> Disposition { dispType = toDispType (map toLower as) + , dispParams = processParams (parseParams bs) + } + + processParams = map procP + where + procP (as,val) + | "name" == asl = Name val + | "filename" == asl = Filename val + | "creation-date" == asl = CreationDate val + | "modification-date" == asl = ModDate val + | "read-date" == asl = ReadDate val + | "size" == asl = Size val + | otherwise = OtherParam (map toLower as) val + where asl = map toLower as + + toDispType t = case t of + "inline" -> DispInline + "attachment" -> DispAttachment + "form-data" -> DispFormData + _ -> DispOther t + + +processBody :: [(String,String)] -> String -> String +processBody headers body = + case lookup "content-transfer-encoding" headers of + Nothing -> body + Just v -> decodeBody v body + +parseMIMEMessage :: String -> MIMEValue +parseMIMEMessage entity = + case parseHeaders entity of + (as,bs) -> parseMIMEBody as bs + +parseHeaders :: String -> ([(String,String)], String) +parseHeaders str = + case findFieldName "" str of + Left (nm, rs) -> parseFieldValue nm (dropFoldingWSP rs) + Right body -> ([],body) + where + findFieldName _acc "" = Right "" + findFieldName _acc ('\r':'\n':xs) = Right xs + findFieldName acc (':':xs) = Left (reverse (dropWhile isHSpace acc), xs) + findFieldName acc (x:xs) = findFieldName (x:acc) xs + + parseFieldValue nm xs = + case takeUntilCRLF xs of + (as,"") -> ([(nm,as)],"") + (as,bs) -> let (zs,ys) = parseHeaders bs in ((nm,as):zs,ys) + +parseMultipart :: Type -> String -> (MIMEValue, String) +parseMultipart mty body = + case lookup "boundary" (mimeParams mty) of + Nothing -> trace ("Multipart mime type, " ++ showType mty ++ + ", has no required boundary parameter. Defaulting to text/plain") $ + (MIMEValue defaultType Nothing (Single body), "") + Just bnd -> (MIMEValue mty Nothing (Multi vals), rs) + where (vals,rs) = splitMulti bnd body + +splitMulti :: String -> String -> ([MIMEValue], String) +splitMulti bnd body_in = + -- Note: we insert a CRLF if it looks as if the boundary string starts + -- right off the bat. No harm done if this turns out to be incorrect. + let body = case body_in of + '-':'-':_ -> ('\r':'\n':body_in) + _ -> body_in + in case untilMatch dashBoundary body of + Nothing -> ([],"") + Just ('-':'-':xs) -> ([],xs) + Just xs -> splitMulti1 (dropTrailer xs) + + where + dashBoundary = ("\r\n--" ++ bnd) + + splitMulti1 xs = + case matchUntil dashBoundary xs of + ("","") -> ([],"") + (as,"") -> ([parseMIMEMessage as],"") + (as,'-':'-':bs) -> ([parseMIMEMessage as], dropTrailer bs) + (as,bs) -> let (zs,ys) = splitMulti1 (dropTrailer bs) + in ((parseMIMEMessage as) : zs,ys) + + dropTrailer xs = + case dropWhile isHSpace xs of + '\r':'\n':xs1 -> xs1 + xs1 -> xs1 -- hmm, flag an error? + +parseMIMEType :: String -> Maybe Type +parseMIMEType = parseContentType + +parseContentType :: String -> Maybe Type +parseContentType str = + case break (=='/') (dropFoldingWSP str) of + (maj,_:minor) -> + case break (\ ch -> isHSpace ch || isTSpecial ch) minor of + (as,bs) -> + Just Type { mimeType = toType maj as + , mimeParams = parseParams (dropWhile isHSpace bs) + } + _ -> trace ("unable to parse content-type: " ++ show str) $ Nothing + where + toType a b = case lookup (map toLower a) mediaTypes of + Just ctor -> ctor b + _ -> Other a b + + +parseParams :: String -> [(String,String)] +parseParams "" = [] +parseParams (';':xs) = + case break (=='=') (dropFoldingWSP xs) of + (nm,_:vs) -> + case vs of + '"':vs1 -> + case break (=='"') vs1 of + (val,"") -> [(nm,val)] + (val,_:zs) -> (nm,val):parseParams (dropWhile isHSpace zs) + _ -> case break (\ ch -> isHSpace ch || isTSpecial ch) vs of + (val,zs) -> (nm,val):parseParams (dropWhile isHSpace zs) + _ -> [] + +parseParams cs = trace ("curious: " ++ show cs) [] + +mediaTypes :: [(String, String -> MIMEType)] +mediaTypes = + [ ("multipart", (Multipart . toMultipart)) + , ("application", Application) + , ("audio", Audio) + , ("image", Image) + , ("message", Message) + , ("model", Model) + , ("text", Text) + , ("video", Video) + ] + where toMultipart b = fromMaybe other (lookup (map toLower b) multipartTypes) + where other = case b of + 'x':'-':_ -> Extension b + _ -> OtherMulti b + + +multipartTypes :: [(String, Multipart)] +multipartTypes = + [ ("alternative", Alternative) + , ("byteranges", Byteranges) + , ("digest", Digest) + , ("encrypted", Encrypted) + , ("form-data", FormData) + , ("mixed", Mixed) + , ("parallel", Parallel) + , ("related", Related) + , ("signed", Signed) + ] + + +untilMatch :: String -> String -> Maybe String +untilMatch str xs = go str xs + where go "" rs = Just rs + go _ "" = Nothing + go (a:as) (b:bs) = if a == b then go as bs else go str bs + +matchUntil :: String -> String -> (String, String) +matchUntil _ "" = ("", "") +matchUntil str xs + -- slow, but it'll do for now. + | str `isPrefixOf` xs = ("", drop (length str) xs) +matchUntil str (x:xs) = let (as,bs) = matchUntil str xs in (x:as,bs) + + + +isHSpace :: Char -> Bool +isHSpace c = c == ' ' || c == '\t' + +isTSpecial :: Char -> Bool +isTSpecial x = x `elem` "()<>@,;:\\\"/[]?=" + + +dropFoldingWSP :: String -> String +dropFoldingWSP "" = "" +dropFoldingWSP (x:xs) + | isHSpace x = dropFoldingWSP xs +dropFoldingWSP ('\r':'\n':x:xs) + | isHSpace x = dropFoldingWSP xs +dropFoldingWSP (x:xs) = x:xs + +takeUntilCRLF :: String -> (String, String) +takeUntilCRLF str = go "" str + where + go acc "" = (reverse (dropWhile isHSpace acc), "") + go acc ('\r':'\n':x:xs) + | isHSpace x = go (' ':acc) xs + | otherwise = (reverse (dropWhile isHSpace acc), x:xs) + go acc (x:xs) = go (x:acc) xs + diff --git a/Codec/MIME/QuotedPrintable.hs b/Codec/MIME/QuotedPrintable.hs new file mode 100644 index 0000000..514ce4e --- /dev/null +++ b/Codec/MIME/QuotedPrintable.hs @@ -0,0 +1,12 @@ +module MIME.QuotedPrintable where + +import Data.Char + +decode :: String -> String +decode "" = "" +decode ('=':x1:x2:xs) + | isHexDigit x1 && isHexDigit x2 = + chr (digitToInt x1 * 16 + digitToInt x2) : decode xs +decode ('=':xs) = '=':decode xs + -- make it explicit that we propagate other '=' occurrences. +decode (x1:xs) = x1:decode xs diff --git a/Codec/MIME/Type.hs b/Codec/MIME/Type.hs new file mode 100644 index 0000000..e9266ec --- /dev/null +++ b/Codec/MIME/Type.hs @@ -0,0 +1,166 @@ +{- | + + Module : MIME.Type + Copyright : (c) 2006 + + Maintainer : tse-dev-team@galois.com + Stability : unstable + Portability : GHC + + Representing MIME types and values. +-} +module MIME.Type where + +import Data.List ( concatMap, isSuffixOf ) + +data Type + = Type + { mimeType :: MIMEType + , mimeParams :: [(String,String)] + } deriving ( Show, Ord, Eq ) + +showType :: Type -> String +showType t = showMIMEType (mimeType t) ++ showMIMEParams (mimeParams t) + +showMIMEParams :: [(String,String)] -> String +showMIMEParams ps = concatMap showP ps + where + showP (a,b) = ';':a ++ '=':'"':b ++ "\"" + + +data MIMEType + = Application SubType + | Audio SubType + | Image SubType + | Message SubType + | Model SubType + | Multipart Multipart + | Text TextType + | Video SubType + | Other String SubType + deriving ( Show, Ord, Eq ) + +showMIMEType :: MIMEType -> String +showMIMEType t = + case t of + Application s -> "application/"++s + Audio s -> "audio/"++s + Image s -> "image/"++s + Message s -> "message/"++s + Model s -> "model/"++s + Multipart s -> "multipart/"++showMultipart s + Text s -> "text/"++s + Video s -> "video/"++s + Other a b -> a ++ '/':b + +-- | a (type, subtype) MIME pair. +data MIMEPair + = MIMEPair String SubType + deriving ( Eq ) + +showMIMEPair :: MIMEPair -> String +showMIMEPair (MIMEPair a b) = a ++ '/':b + +-- | default subtype representation. +type SubType = String + +-- | subtype for text content; currently just a string. +type TextType = SubType + +subTypeString :: Type -> String +subTypeString t = + case break (=='/') (showMIMEType (mimeType t)) of + (_,"") -> "" + (_,_:bs) -> bs + +majTypeString :: Type -> String +majTypeString t = + case break (=='/') (showMIMEType (mimeType t)) of + (as,_) -> as + +data Multipart + = Alternative + | Byteranges + | Digest + | Encrypted + | FormData + | Mixed + | Parallel + | Related + | Signed + | Extension String -- ^ e.g., 'x-foo' (i.e., includes the 'x-' bit) + | OtherMulti String -- unrecognized\/uninterpreted. + -- (e.g., appledouble, voice-message, etc.) + deriving ( Show, Ord, Eq ) + +isXmlBased :: Type -> Bool +isXmlBased t = + case mimeType t of + Multipart{} -> False + _ -> "+xml" `isSuffixOf` subTypeString t + +isXmlType :: Type -> Bool +isXmlType t = isXmlBased t || + case mimeType t of + Application s -> s `elem` xml_media_types + Text s -> s `elem` xml_media_types + _ -> False + where + -- Note: xml-dtd isn't considered an XML type here. + xml_media_types :: [String] + xml_media_types = + [ "xml" + , "xml-external-parsed-entity" + ] + + +showMultipart :: Multipart -> String +showMultipart m = + case m of + Alternative -> "alternative" + Byteranges -> "byteranges" + Digest -> "digest" + Encrypted -> "encrypted" + FormData -> "form-data" + Mixed -> "mixed" + Parallel -> "parallel" + Related -> "related" + Signed -> "signed" + Extension e -> e + OtherMulti e -> e + +type Content = String + +data MIMEValue = MIMEValue { + mime_val_type :: Type, + mime_val_disp :: Maybe Disposition, + mime_val_content :: MIMEContent } + deriving ( Show, Eq ) + +data MIMEContent + = Single Content + | Multi [MIMEValue] + deriving (Eq,Show) + +data Disposition + = Disposition + { dispType :: DispType + , dispParams :: [DispParam] + } deriving ( Show, Eq ) + +data DispType + = DispInline + | DispAttachment + | DispFormData + | DispOther String + deriving ( Show, Eq) + +data DispParam + = Name String + | Filename String + | CreationDate String + | ModDate String + | ReadDate String + | Size String + | OtherParam String String + deriving ( Show, Eq) diff --git a/Codec/MIME/Utils.hs b/Codec/MIME/Utils.hs new file mode 100644 index 0000000..a5db2d9 --- /dev/null +++ b/Codec/MIME/Utils.hs @@ -0,0 +1,30 @@ +{- | + Module : MIME.Utils + Copyright : (c) 2007 + + Maintainer : tse-dev-team@galois.com + Stability : unstable + Portability : GHC + + Extracting content from MIME values and types. +-} +module MIME.Utils + ( findMultipartNamed -- :: String -> MIMEValue -> Maybe MIMEValue + ) where + +import MIME.Type +import Data.List ( find ) +import Control.Monad ( msum ) + +-- | Given a parameter name, locate it within a MIME value, +-- returning the corresponding (sub) MIME value. +findMultipartNamed :: String -> MIMEValue -> Maybe MIMEValue +findMultipartNamed nm mv = + case mime_val_content mv of + Multi ms -> msum (map (findMultipartNamed nm) ms) + Single {} -> do cd <- mime_val_disp mv + find (withDispName nm) (dispParams cd) + return mv + where withDispName a (Name b) = a == b + withDispName _ _ = False + diff --git a/LICENSE b/LICENSE index e3d544f..3e57945 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) Galois, Inc. 2007 +Copyright (c) Galois, Inc. 2006-2008 All rights reserved. diff --git a/MIME/Base64.hs b/MIME/Base64.hs deleted file mode 100644 index 848e034..0000000 --- a/MIME/Base64.hs +++ /dev/null @@ -1,141 +0,0 @@ -{- | - - Module : MIME.Parse - Copyright : (c) 2006 - - Maintainer : - Stability : unstable - Portability : GHC - - Base64 decoding and encoding routines. --} -module MIME.Base64 - ( encodeRaw -- :: Bool -> String -> [Word8] - , encodeRawString -- :: Bool -> String -> String - , encodeRawPrim -- :: Bool -> Char -> Char -> [Word8] -> String - - , formatOutput -- :: Int -> Maybe String -> String -> String - - , decode -- :: String -> [Word8] - , decodeToString -- :: String -> String - , decodePrim -- :: Char -> Char -> String -> [Word8] - ) where - -import Data.Bits -import Data.Char -import Data.Word -import Data.Maybe - -encodeRawString :: Bool -> String -> String -encodeRawString trail xs = encodeRaw trail (map (fromIntegral.ord) xs) - --- | 'formatOutput n mbLT str' formats 'str', splitting it --- into lines of length 'n'. The optional value lets you control what --- line terminator sequence to use; the default is CRLF (as per MIME.) -formatOutput :: Int -> Maybe String -> String -> String -formatOutput n mbTerm str - | n <= 0 = error ("formatOutput: negative line length " ++ show n) - | otherwise = chop n str - where - crlf :: String - crlf = fromMaybe "\r\n" mbTerm - - chop _ "" = "" - chop i xs = - case splitAt i xs of - (as,"") -> as - (as,bs) -> as ++ crlf ++ chop i bs - -encodeRaw :: Bool -> [Word8] -> String -encodeRaw trail bs = encodeRawPrim trail '+' '/' bs - --- lets you control what non-alphanum characters to use --- (The base64url variation uses '*' and '-', for instance.) --- No support for mapping these to multiple characters in the output though. -encodeRawPrim :: Bool -> Char -> Char -> [Word8] -> String -encodeRawPrim trail ch62 ch63 ls = encoder ls - where - trailer xs ys - | not trail = xs - | otherwise = xs ++ ys - f = fromB64 ch62 ch63 - encoder [] = [] - encoder [x] = trailer (take 2 (encode3 f x 0 0 "")) "==" - encoder [x,y] = trailer (take 3 (encode3 f x y 0 "")) "=" - encoder (x:y:z:ws) = encode3 f x y z (encoder ws) - -encode3 :: (Word8 -> Char) -> Word8 -> Word8 -> Word8 -> String -> String -encode3 f a b c rs = - f (low6 (w24 `shiftR` 18)) : - f (low6 (w24 `shiftR` 12)) : - f (low6 (w24 `shiftR` 6)) : - f (low6 w24) : rs - where - w24 :: Word32 - w24 = (fromIntegral a `shiftL` 16) + - (fromIntegral b `shiftL` 8) + - fromIntegral c - -decodeToString :: String -> String -decodeToString str = map (chr.fromIntegral) $ decode str - -decode :: String -> [Word8] -decode str = decodePrim '+' '/' str - -decodePrim :: Char -> Char -> String -> [Word8] -decodePrim ch62 ch63 str = decoder $ takeUntilEnd str - where - takeUntilEnd "" = [] - takeUntilEnd ('=':_) = [] - takeUntilEnd (x:xs) = - case toB64 ch62 ch63 x of - Nothing -> takeUntilEnd xs - Just b -> b : takeUntilEnd xs - -decoder :: [Word8] -> [Word8] -decoder [] = [] -decoder [x] = take 1 (decode4 x 0 0 0 []) -decoder [x,y] = take 1 (decode4 x y 0 0 []) -- upper 4 bits of second val are known to be 0. -decoder [x,y,z] = take 2 (decode4 x y z 0 []) -decoder (x:y:z:w:xs) = decode4 x y z w (decoder xs) - -decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> [Word8] -> [Word8] -decode4 a b c d rs = - (lowByte (w24 `shiftR` 16)) : - (lowByte (w24 `shiftR` 8)) : - (lowByte w24) : rs - where - w24 :: Word32 - w24 = - (fromIntegral a) `shiftL` 18 .|. - (fromIntegral b) `shiftL` 12 .|. - (fromIntegral c) `shiftL` 6 .|. - (fromIntegral d) - -toB64 :: Char -> Char -> Char -> Maybe Word8 -toB64 a b ch - | ch >= 'A' && ch <= 'Z' = Just (fromIntegral (ord ch - ord 'A')) - | ch >= 'a' && ch <= 'z' = Just (26 + fromIntegral (ord ch - ord 'a')) - | ch >= '0' && ch <= '9' = Just (52 + fromIntegral (ord ch - ord '0')) - | ch == a = Just 62 - | ch == b = Just 63 - | otherwise = Nothing - -fromB64 :: Char -> Char -> Word8 -> Char -fromB64 ch62 ch63 x - | x < 26 = chr (ord 'A' + xi) - | x < 52 = chr (ord 'a' + (xi-26)) - | x < 62 = chr (ord '0' + (xi-52)) - | x == 62 = ch62 - | x == 63 = ch63 - | otherwise = error ("fromB64: index out of range " ++ show x) - where - xi :: Int - xi = fromIntegral x - -low6 :: Word32 -> Word8 -low6 x = fromIntegral (x .&. 0x3f) - -lowByte :: Word32 -> Word8 -lowByte x = (fromIntegral x) .&. 0xff - diff --git a/MIME/Decode.hs b/MIME/Decode.hs deleted file mode 100644 index f23454a..0000000 --- a/MIME/Decode.hs +++ /dev/null @@ -1,56 +0,0 @@ -module MIME.Decode where - -import Data.Char -import MIME.QuotedPrintable as QP -import MIME.Base64 as Base64 - -decodeBody :: String -> String -> String -decodeBody enc body = - case map toLower enc of - "base64" -> map (chr.fromIntegral) $ Base64.decode body - "quoted-printable" -> QP.decode body - _ -> body - --- Decoding of RFC 2047's "encoded-words' production --- (as used in email-headers and some HTTP header cases --- (AtomPub's Slug: header)) -decodeWord :: String -> Maybe (String, String) -decodeWord str = - case str of - '=':'?':xs -> - case dropLang $ break (\ch -> ch =='?' || ch == '*') xs of - (cs,_:x:'?':bs) - | isKnownCharset (map toLower cs) -> - case toLower x of - 'q' -> decodeQ cs (break (=='?') bs) - 'b' -> decodeB cs (break (=='?') bs) - _ -> Nothing - _ -> Nothing - _ -> Nothing - where - isKnownCharset cs = cs `elem` ["iso-8859-1", "us-ascii"] - - -- ignore RFC 2231 extension of permitting a language tag to be supplied - -- after the charset. - dropLang (as,'*':bs) = (as,dropWhile (/='?') bs) - dropLang (as,bs) = (as,bs) - - decodeQ cset (fs,'?':'=':rs) = Just (fromCharset cset (QP.decode fs),rs) - decodeQ _ _ = Nothing - - decodeB cset (fs,'?':'=':rs) = - Just (fromCharset cset (Base64.decodeToString fs),rs) - decodeB _ _ = Nothing - - fromCharset _cset cs = cs - -decodeWords :: String -> String -decodeWords "" = "" -decodeWords (x:xs) - | isSpace x = x : decodeWords xs - | otherwise = - case decodeWord (x:xs) of - Nothing -> x : decodeWords xs - Just (as,bs) -> as ++ decodeWords bs - - diff --git a/MIME/Parse.hs b/MIME/Parse.hs deleted file mode 100644 index 06fffc6..0000000 --- a/MIME/Parse.hs +++ /dev/null @@ -1,241 +0,0 @@ -{- | - Module : MIME.Parse - Copyright : (c) 2006 - - Maintainer : tse-dev-team@galois.com - Stability : unstable - Portability : GHC - - Parsing MIME content. --} -module MIME.Parse - ( parseMIMEBody - , parseMIMEType - ) where - -import MIME.Type -import MIME.Decode - -import Data.Char -import Data.Maybe -import Data.List -import Debug.Trace ( trace ) - -parseMIMEBody :: [(String,String)] -> String -> MIMEValue -parseMIMEBody headers_in body = - case mimeType mty of - Multipart{} -> fst (parseMultipart mty body) - Message{} -> fst (parseMultipart mty body) - _ -> MIMEValue mty (parseContentDisp headers) - (Single (processBody headers body)) - - where headers = [ (map toLower k,v) | (k,v) <- headers_in ] - mty = fromMaybe defaultType - (parseContentType =<< lookup "content-type" headers) -defaultType :: Type -defaultType = Type { mimeType = Text "plain" - , mimeParams = [("charset", "us-ascii")] - } - -parseContentDisp :: [(String,String)] -> Maybe Disposition -parseContentDisp headers = - (processDisp . dropFoldingWSP) =<< lookup "content-disposition" headers - where - processDisp "" = Nothing - processDisp xs = Just $ - case break (\ch -> isSpace ch || ch == ';') xs of - (as,"") -> Disposition { dispType = toDispType (map toLower as) - , dispParams = [] - } - (as,bs) -> Disposition { dispType = toDispType (map toLower as) - , dispParams = processParams (parseParams bs) - } - - processParams = map procP - where - procP (as,val) - | "name" == asl = Name val - | "filename" == asl = Filename val - | "creation-date" == asl = CreationDate val - | "modification-date" == asl = ModDate val - | "read-date" == asl = ReadDate val - | "size" == asl = Size val - | otherwise = OtherParam (map toLower as) val - where asl = map toLower as - - toDispType t = case t of - "inline" -> DispInline - "attachment" -> DispAttachment - "form-data" -> DispFormData - _ -> DispOther t - - -processBody :: [(String,String)] -> String -> String -processBody headers body = - case lookup "content-transfer-encoding" headers of - Nothing -> body - Just v -> decodeBody v body - -parseMIMEMessage :: String -> MIMEValue -parseMIMEMessage entity = - case parseHeaders entity of - (as,bs) -> parseMIMEBody as bs - -parseHeaders :: String -> ([(String,String)], String) -parseHeaders str = - case findFieldName "" str of - Left (nm, rs) -> parseFieldValue nm (dropFoldingWSP rs) - Right body -> ([],body) - where - findFieldName _acc "" = Right "" - findFieldName _acc ('\r':'\n':xs) = Right xs - findFieldName acc (':':xs) = Left (reverse (dropWhile isHSpace acc), xs) - findFieldName acc (x:xs) = findFieldName (x:acc) xs - - parseFieldValue nm xs = - case takeUntilCRLF xs of - (as,"") -> ([(nm,as)],"") - (as,bs) -> let (zs,ys) = parseHeaders bs in ((nm,as):zs,ys) - -parseMultipart :: Type -> String -> (MIMEValue, String) -parseMultipart mty body = - case lookup "boundary" (mimeParams mty) of - Nothing -> trace ("Multipart mime type, " ++ showType mty ++ - ", has no required boundary parameter. Defaulting to text/plain") $ - (MIMEValue defaultType Nothing (Single body), "") - Just bnd -> (MIMEValue mty Nothing (Multi vals), rs) - where (vals,rs) = splitMulti bnd body - -splitMulti :: String -> String -> ([MIMEValue], String) -splitMulti bnd body_in = - -- Note: we insert a CRLF if it looks as if the boundary string starts - -- right off the bat. No harm done if this turns out to be incorrect. - let body = case body_in of - '-':'-':_ -> ('\r':'\n':body_in) - _ -> body_in - in case untilMatch dashBoundary body of - Nothing -> ([],"") - Just ('-':'-':xs) -> ([],xs) - Just xs -> splitMulti1 (dropTrailer xs) - - where - dashBoundary = ("\r\n--" ++ bnd) - - splitMulti1 xs = - case matchUntil dashBoundary xs of - ("","") -> ([],"") - (as,"") -> ([parseMIMEMessage as],"") - (as,'-':'-':bs) -> ([parseMIMEMessage as], dropTrailer bs) - (as,bs) -> let (zs,ys) = splitMulti1 (dropTrailer bs) - in ((parseMIMEMessage as) : zs,ys) - - dropTrailer xs = - case dropWhile isHSpace xs of - '\r':'\n':xs1 -> xs1 - xs1 -> xs1 -- hmm, flag an error? - -parseMIMEType :: String -> Maybe Type -parseMIMEType = parseContentType - -parseContentType :: String -> Maybe Type -parseContentType str = - case break (=='/') (dropFoldingWSP str) of - (maj,_:minor) -> - case break (\ ch -> isHSpace ch || isTSpecial ch) minor of - (as,bs) -> - Just Type { mimeType = toType maj as - , mimeParams = parseParams (dropWhile isHSpace bs) - } - _ -> trace ("unable to parse content-type: " ++ show str) $ Nothing - where - toType a b = case lookup (map toLower a) mediaTypes of - Just ctor -> ctor b - _ -> Other a b - - -parseParams :: String -> [(String,String)] -parseParams "" = [] -parseParams (';':xs) = - case break (=='=') (dropFoldingWSP xs) of - (nm,_:vs) -> - case vs of - '"':vs1 -> - case break (=='"') vs1 of - (val,"") -> [(nm,val)] - (val,_:zs) -> (nm,val):parseParams (dropWhile isHSpace zs) - _ -> case break (\ ch -> isHSpace ch || isTSpecial ch) vs of - (val,zs) -> (nm,val):parseParams (dropWhile isHSpace zs) - _ -> [] - -parseParams cs = trace ("curious: " ++ show cs) [] - -mediaTypes :: [(String, String -> MIMEType)] -mediaTypes = - [ ("multipart", (Multipart . toMultipart)) - , ("application", Application) - , ("audio", Audio) - , ("image", Image) - , ("message", Message) - , ("model", Model) - , ("text", Text) - , ("video", Video) - ] - where toMultipart b = fromMaybe other (lookup (map toLower b) multipartTypes) - where other = case b of - 'x':'-':_ -> Extension b - _ -> OtherMulti b - - -multipartTypes :: [(String, Multipart)] -multipartTypes = - [ ("alternative", Alternative) - , ("byteranges", Byteranges) - , ("digest", Digest) - , ("encrypted", Encrypted) - , ("form-data", FormData) - , ("mixed", Mixed) - , ("parallel", Parallel) - , ("related", Related) - , ("signed", Signed) - ] - - -untilMatch :: String -> String -> Maybe String -untilMatch str xs = go str xs - where go "" rs = Just rs - go _ "" = Nothing - go (a:as) (b:bs) = if a == b then go as bs else go str bs - -matchUntil :: String -> String -> (String, String) -matchUntil _ "" = ("", "") -matchUntil str xs - -- slow, but it'll do for now. - | str `isPrefixOf` xs = ("", drop (length str) xs) -matchUntil str (x:xs) = let (as,bs) = matchUntil str xs in (x:as,bs) - - - -isHSpace :: Char -> Bool -isHSpace c = c == ' ' || c == '\t' - -isTSpecial :: Char -> Bool -isTSpecial x = x `elem` "()<>@,;:\\\"/[]?=" - - -dropFoldingWSP :: String -> String -dropFoldingWSP "" = "" -dropFoldingWSP (x:xs) - | isHSpace x = dropFoldingWSP xs -dropFoldingWSP ('\r':'\n':x:xs) - | isHSpace x = dropFoldingWSP xs -dropFoldingWSP (x:xs) = x:xs - -takeUntilCRLF :: String -> (String, String) -takeUntilCRLF str = go "" str - where - go acc "" = (reverse (dropWhile isHSpace acc), "") - go acc ('\r':'\n':x:xs) - | isHSpace x = go (' ':acc) xs - | otherwise = (reverse (dropWhile isHSpace acc), x:xs) - go acc (x:xs) = go (x:acc) xs - diff --git a/MIME/QuotedPrintable.hs b/MIME/QuotedPrintable.hs deleted file mode 100644 index 514ce4e..0000000 --- a/MIME/QuotedPrintable.hs +++ /dev/null @@ -1,12 +0,0 @@ -module MIME.QuotedPrintable where - -import Data.Char - -decode :: String -> String -decode "" = "" -decode ('=':x1:x2:xs) - | isHexDigit x1 && isHexDigit x2 = - chr (digitToInt x1 * 16 + digitToInt x2) : decode xs -decode ('=':xs) = '=':decode xs - -- make it explicit that we propagate other '=' occurrences. -decode (x1:xs) = x1:decode xs diff --git a/MIME/Type.hs b/MIME/Type.hs deleted file mode 100644 index e9266ec..0000000 --- a/MIME/Type.hs +++ /dev/null @@ -1,166 +0,0 @@ -{- | - - Module : MIME.Type - Copyright : (c) 2006 - - Maintainer : tse-dev-team@galois.com - Stability : unstable - Portability : GHC - - Representing MIME types and values. --} -module MIME.Type where - -import Data.List ( concatMap, isSuffixOf ) - -data Type - = Type - { mimeType :: MIMEType - , mimeParams :: [(String,String)] - } deriving ( Show, Ord, Eq ) - -showType :: Type -> String -showType t = showMIMEType (mimeType t) ++ showMIMEParams (mimeParams t) - -showMIMEParams :: [(String,String)] -> String -showMIMEParams ps = concatMap showP ps - where - showP (a,b) = ';':a ++ '=':'"':b ++ "\"" - - -data MIMEType - = Application SubType - | Audio SubType - | Image SubType - | Message SubType - | Model SubType - | Multipart Multipart - | Text TextType - | Video SubType - | Other String SubType - deriving ( Show, Ord, Eq ) - -showMIMEType :: MIMEType -> String -showMIMEType t = - case t of - Application s -> "application/"++s - Audio s -> "audio/"++s - Image s -> "image/"++s - Message s -> "message/"++s - Model s -> "model/"++s - Multipart s -> "multipart/"++showMultipart s - Text s -> "text/"++s - Video s -> "video/"++s - Other a b -> a ++ '/':b - --- | a (type, subtype) MIME pair. -data MIMEPair - = MIMEPair String SubType - deriving ( Eq ) - -showMIMEPair :: MIMEPair -> String -showMIMEPair (MIMEPair a b) = a ++ '/':b - --- | default subtype representation. -type SubType = String - --- | subtype for text content; currently just a string. -type TextType = SubType - -subTypeString :: Type -> String -subTypeString t = - case break (=='/') (showMIMEType (mimeType t)) of - (_,"") -> "" - (_,_:bs) -> bs - -majTypeString :: Type -> String -majTypeString t = - case break (=='/') (showMIMEType (mimeType t)) of - (as,_) -> as - -data Multipart - = Alternative - | Byteranges - | Digest - | Encrypted - | FormData - | Mixed - | Parallel - | Related - | Signed - | Extension String -- ^ e.g., 'x-foo' (i.e., includes the 'x-' bit) - | OtherMulti String -- unrecognized\/uninterpreted. - -- (e.g., appledouble, voice-message, etc.) - deriving ( Show, Ord, Eq ) - -isXmlBased :: Type -> Bool -isXmlBased t = - case mimeType t of - Multipart{} -> False - _ -> "+xml" `isSuffixOf` subTypeString t - -isXmlType :: Type -> Bool -isXmlType t = isXmlBased t || - case mimeType t of - Application s -> s `elem` xml_media_types - Text s -> s `elem` xml_media_types - _ -> False - where - -- Note: xml-dtd isn't considered an XML type here. - xml_media_types :: [String] - xml_media_types = - [ "xml" - , "xml-external-parsed-entity" - ] - - -showMultipart :: Multipart -> String -showMultipart m = - case m of - Alternative -> "alternative" - Byteranges -> "byteranges" - Digest -> "digest" - Encrypted -> "encrypted" - FormData -> "form-data" - Mixed -> "mixed" - Parallel -> "parallel" - Related -> "related" - Signed -> "signed" - Extension e -> e - OtherMulti e -> e - -type Content = String - -data MIMEValue = MIMEValue { - mime_val_type :: Type, - mime_val_disp :: Maybe Disposition, - mime_val_content :: MIMEContent } - deriving ( Show, Eq ) - -data MIMEContent - = Single Content - | Multi [MIMEValue] - deriving (Eq,Show) - -data Disposition - = Disposition - { dispType :: DispType - , dispParams :: [DispParam] - } deriving ( Show, Eq ) - -data DispType - = DispInline - | DispAttachment - | DispFormData - | DispOther String - deriving ( Show, Eq) - -data DispParam - = Name String - | Filename String - | CreationDate String - | ModDate String - | ReadDate String - | Size String - | OtherParam String String - deriving ( Show, Eq) diff --git a/MIME/Utils.hs b/MIME/Utils.hs deleted file mode 100644 index a5db2d9..0000000 --- a/MIME/Utils.hs +++ /dev/null @@ -1,30 +0,0 @@ -{- | - Module : MIME.Utils - Copyright : (c) 2007 - - Maintainer : tse-dev-team@galois.com - Stability : unstable - Portability : GHC - - Extracting content from MIME values and types. --} -module MIME.Utils - ( findMultipartNamed -- :: String -> MIMEValue -> Maybe MIMEValue - ) where - -import MIME.Type -import Data.List ( find ) -import Control.Monad ( msum ) - --- | Given a parameter name, locate it within a MIME value, --- returning the corresponding (sub) MIME value. -findMultipartNamed :: String -> MIMEValue -> Maybe MIMEValue -findMultipartNamed nm mv = - case mime_val_content mv of - Multi ms -> msum (map (findMultipartNamed nm) ms) - Single {} -> do cd <- mime_val_disp mv - find (withDispName nm) (dispParams cd) - return mv - where withDispName a (Name b) = a == b - withDispName _ _ = False - diff --git a/mime.cabal b/mime.cabal index 1d77084..ac24c4c 100644 --- a/mime.cabal +++ b/mime.cabal @@ -1,5 +1,5 @@ name: mime -version: 0.1 +version: 0.2 synopsis: Working with MIME types. description: Working with MIME types. category: Web @@ -7,9 +7,7 @@ license: BSD3 license-file: LICENSE author: Galois Inc. maintainer: Galois Inc -Copyright: (c) 2007 Galois Inc. -extra-source-files: scripts/json-rpc.js -homepage: http://docserver/mime.git/ +Copyright: (c) 2006-2008 Galois Inc. cabal-version: >= 1.2.0 flag split-base @@ -20,6 +18,10 @@ library else build-depends: base < 3 - exposed-modules: MIME.Type, MIME.Parse, MIME.Utils - other-modules: MIME.Base64, MIME.Decode, MIME.QuotedPrintable + exposed-modules: Codec.MIME.Type + Codec.MIME.Parse + Codec.MIME.Utils + other-modules: Codec.MIME.Base64 + Codec.MIME.Decode + Codec.MIME.QuotedPrintable ghc-options: -Wall -O2 -- cgit v1.2.3