summaryrefslogtreecommitdiffstats
path: root/Regfish/Parser.hs
blob: a1af5a5fc5061c1357ef6c6878954db13ea93882 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}


module Regfish.Parser
    ( readZoneIO
    ) where

import Control.Applicative
import qualified Data.List as L
import qualified Data.ByteString.Lazy.Char8 as LBS8
import Regfish.Types
import Text.HandsomeSoup
import Text.XML.HXT.Core


normalizeId :: String -> String
normalizeId = \case
    'a':'_':xs -> xs
    'r':'r':'_':xs -> drop 1 . dropWhile (/='_') $ xs
    xs -> xs


normalizeRR :: IOSLA (XIOState ()) XmlTree XmlTree
normalizeRR =
    processAttrl (changeAttrValue normalizeId `when` hasName "id")
        <<< (filterA $ neg $ hasAttr "nowrap")
        <<< (removeAttr "style")


readZoneIO :: LBS8.ByteString -> IO [RR]
readZoneIO b =
    concat <$> mapM (flip readRRs $ LBS8.unpack b)
        [ "A"
        , "AAAA"
        , "ALIAS"
        , "CNAME"
        , "MX"
        , "NS"
        , "TXT"
        , "SRV"
        ]


readRRs :: String -> String -> IO [RR]
readRRs rrtype s = do
    let --body = LBS8.unpack b -- $ r ^. W.responseBody
        doc = parseHtml s
        t = processTopDown normalizeRR
                  <<< removeAllWhiteSpace'
                  <<< css ("." ++ rrtype)
                  <<< doc

    -- t also contains <td>IN</td> and <td id="type">TXT</td>
    rrid    <- runX $ getAttrValue "id" <<< t
    rrname  <- runX $ css "#name" //> getText <<< t
    rrttl   <- runX $ css "#ttl"  //> getText <<< t
    rraux   <- runX $ css "#aux"  //> getText <<< t
    rrdata  <- runX $ css "#data" //> getText <<< t
    return $ map (parse rrtype) $
        L.transpose [rrid, rrname, rrttl, rraux, rrdata]

  where

    -- TODO note that "0"-values should be different

    parse "A" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (A rrdata)

    parse "AAAA" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (AAAA rrdata)

    parse "ALIAS" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (ALIAS rrdata)

    parse "CNAME" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (CNAME rrdata)

    parse "MX" [rrid, rrname, rrttl, rraux, rrdata] =
        RR rrid rrname (read rrttl) (MX (read rraux) rrdata)

    parse "MX" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (MX 0 rrdata)

    parse "NS" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (NS rrdata)

    parse "NS" [rrid, rrname, rrdata] =
        RR rrid rrname 0 (NS rrdata)

    parse "TXT" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (TXT rrdata)

    parse "SRV" [rrid, rrname, rrttl, rrdata] =
        RR rrid rrname (read rrttl) (SRV rrdata)

    parse x y = errorBadArgument "readRRs.parse" (x, y)




removeAllWhiteSpace' :: ArrowXml a => a XmlTree XmlTree
removeAllWhiteSpace' =
    fromLA $ editNTreeA [isWhiteSpace' :-> Text.XML.HXT.Core.none]
    -- fromLA $ processBottomUp removeWhiteSpace'    -- less efficient
  where
    isWhiteSpace' = hasText (all isXmlSpaceChar')
    isXmlSpaceChar' = (`elem` " \n\t\r\160")



errorBadArgument :: Show a => String -> a -> b
errorBadArgument name x =
    error $ "Regfish.Parser." ++ name ++ ": bad argument: " ++ show x