summaryrefslogtreecommitdiffstats
path: root/Regfish/Parser.hs
blob: be131e783c6ab465eb4966dff00f3b3057a82440 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}


module Regfish.Parser
    ( readZoneIO
    ) where

import qualified Data.Char as C
import qualified Data.List as L
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy.Char8 as LBS8
import qualified Data.Text as T
import Data.Bits (xor)
import Data.Maybe (fromMaybe)
import Regfish.Types
import Text.HandsomeSoup
import Text.XML.HXT.Core
import Text.Hex (decodeHex)


normalizeId :: String -> String
normalizeId = \case
    'a':'_':xs -> xs
    'r':'r':'_':xs -> drop 1 . dropWhile (/='_') $ xs
    xs -> xs


normalizeRR :: IOSLA (XIOState ()) XmlTree XmlTree
normalizeRR =
    processAttrl (changeAttrValue normalizeId `when` hasName "id")
        <<< (filterA $ neg $ hasAttr "nowrap")
        <<< (removeAttr "style")


readZoneIO :: LBS8.ByteString -> IO [RR]
readZoneIO b =
    concat <$> mapM (flip readRRs $ LBS8.unpack b)
        [ "NS"
        , "MX"
        , "A"
        , "AAAA"
        , "ALIAS"
        , "CNAME"
        , "TXT"
        , "SRV"
        ]


readRRs :: String -> String -> IO [RR]
readRRs rrtype s = do
    let --body = LBS8.unpack b -- $ r ^. W.responseBody
        doc = parseHtml s
        t = processTopDown normalizeRR
                  <<< removeAllWhiteSpace'
                  <<< css ("." ++ rrtype)
                  <<< doc

    -- t also contains <td>IN</td> and <td id="type">TXT</td>
    rrid    <- runX $ getAttrValue "id" <<< t
    rrname  <- runX $ css "#name" //> getText <<< t
    rrttl   <- runX $ css "#ttl"  //> getText <<< t
    rraux   <- runX $ css "#aux"  //> getText <<< t
    rrdata  <- runX $ css "#data" //> getText <<< decodeAllProtectedEmail <<< t
    return $ map (parse rrtype) $
        L.transpose [rrid, rrname, rrttl, rraux, rrdata]

  where

    -- TODO note that "0"-values should be different

    parse "A" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (A rrdata)

    parse "AAAA" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (AAAA rrdata)

    parse "ALIAS" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (ALIAS rrdata)

    parse "CNAME" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (CNAME rrdata)

    parse "MX" [rrid, rrname, rrttl, rraux, rrdata] =
        RR rrid rrname (read rrttl) (MX (read rraux) rrdata)

    parse "MX" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (MX 0 rrdata)

    parse "NS" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (NS rrdata)

    parse "NS" [rrid, rrname, rrdata] =
        RR rrid rrname 0 (NS rrdata)

    parse "TXT" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (TXT rrdata)

    parse "SRV" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (SRV rrdata)

    parse x y = errorBadArgument "readRRs.parse" (x, y)



decodeAllProtectedEmail :: ArrowXml a => a XmlTree XmlTree
decodeAllProtectedEmail =
    (
      processTopDown (
        (
          getAttrValue "data-cfemail"
          >>> arr (\x -> fromMaybe x (decodeEmail x))
          >>> mkText
        )
        `when` hasAttr "data-cfemail"
      )
      >>> processTopDown (
        replaceChildren (
          listA (deep getText) >>> arr concat
          >>> mkText
        )
        `when` hasAttrValue "id" (== "data")
      )
    ) `when` css ".__cf_email__"


decodeEmail :: String -> Maybe String
decodeEmail cfemail =
    case decodeHexString cfemail of
        Just (x:text) ->
            Just $ map (C.chr . (`xor` x)) text

        _ ->
            Nothing


decodeHexString :: String -> Maybe [Int]
decodeHexString =
    fmap (map fromIntegral . BS.unpack) . decodeHex . T.pack

removeAllWhiteSpace' :: ArrowXml a => a XmlTree XmlTree
removeAllWhiteSpace' =
    fromLA $ editNTreeA [isWhiteSpace' :-> Text.XML.HXT.Core.none]
    -- fromLA $ processBottomUp removeWhiteSpace'    -- less efficient
  where
    isWhiteSpace' = hasText (all isXmlSpaceChar')
    isXmlSpaceChar' = (`elem` (" \n\t\r\160" :: [Char]))


errorBadArgument :: Show a => String -> a -> b
errorBadArgument name x =
    error $ "Regfish.Parser." ++ name ++ ": bad argument: " ++ show x