summaryrefslogtreecommitdiffstats
path: root/Regfish
diff options
context:
space:
mode:
authortv <tv@shackspace.de>2014-11-18 23:08:07 +0100
committertv <tv@shackspace.de>2014-11-18 23:08:07 +0100
commit8908c3bfaf10277de632c98e70ea9e988f0c7471 (patch)
tree771f2598739df6faeab6acf07c1815fe78ad4128 /Regfish
parent64ec0daf8937202092e887c80bc86f2e6e54fe7f (diff)
Scrape zone with hxt.
Diffstat (limited to 'Regfish')
-rw-r--r--Regfish/Parser.hs152
-rw-r--r--Regfish/Types.hs11
2 files changed, 93 insertions, 70 deletions
diff --git a/Regfish/Parser.hs b/Regfish/Parser.hs
index bc294b6..a1af5a5 100644
--- a/Regfish/Parser.hs
+++ b/Regfish/Parser.hs
@@ -3,96 +3,110 @@
module Regfish.Parser
- ( readZone
+ ( readZoneIO
) where
+import Control.Applicative
+import qualified Data.List as L
import qualified Data.ByteString.Lazy.Char8 as LBS8
import Regfish.Types
-import Text.HTML.TagSoup
+import Text.HandsomeSoup
+import Text.XML.HXT.Core
+
+
+normalizeId :: String -> String
+normalizeId = \case
+ 'a':'_':xs -> xs
+ 'r':'r':'_':xs -> drop 1 . dropWhile (/='_') $ xs
+ xs -> xs
+
+
+normalizeRR :: IOSLA (XIOState ()) XmlTree XmlTree
+normalizeRR =
+ processAttrl (changeAttrValue normalizeId `when` hasName "id")
+ <<< (filterA $ neg $ hasAttr "nowrap")
+ <<< (removeAttr "style")
+
+
+readZoneIO :: LBS8.ByteString -> IO [RR]
+readZoneIO b =
+ concat <$> mapM (flip readRRs $ LBS8.unpack b)
+ [ "A"
+ , "AAAA"
+ , "ALIAS"
+ , "CNAME"
+ , "MX"
+ , "NS"
+ , "TXT"
+ , "SRV"
+ ]
+
+
+readRRs :: String -> String -> IO [RR]
+readRRs rrtype s = do
+ let --body = LBS8.unpack b -- $ r ^. W.responseBody
+ doc = parseHtml s
+ t = processTopDown normalizeRR
+ <<< removeAllWhiteSpace'
+ <<< css ("." ++ rrtype)
+ <<< doc
+
+ -- t also contains <td>IN</td> and <td id="type">TXT</td>
+ rrid <- runX $ getAttrValue "id" <<< t
+ rrname <- runX $ css "#name" //> getText <<< t
+ rrttl <- runX $ css "#ttl" //> getText <<< t
+ rraux <- runX $ css "#aux" //> getText <<< t
+ rrdata <- runX $ css "#data" //> getText <<< t
+ return $ map (parse rrtype) $
+ L.transpose [rrid, rrname, rrttl, rraux, rrdata]
-
-readZone :: LBS8.ByteString -> [Record]
-readZone = parseRecords . dnszone
-
-
-parseRecords :: [Tag LBS8.ByteString] -> [Record]
-parseRecords =
- map toRecord . filter knownRecord . rec []
where
- knownRecord (_ : _ : _ : namespace : type_ : _) =
- namespace == "IN" && type_ `elem` ["MX", "A", "CNAME"]
- knownRecord x =
- errorBadArgument "parseRecords.knownRecord" x
+ -- TODO note that "0"-values should be different
- rec ys = \case
- (TagOpen "tr" attrs:xs) ->
- case getRecordId attrs of
- Just id_ ->
- let (r,_:xs') = span (/=TagClose "tr") xs
- in rec (parseRecord id_ r:ys) xs'
- _ ->
- let xs' = drop 1 $ dropWhile (/=TagClose "tr") xs
- in rec ys xs'
+ parse "A" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (A rrdata)
- [] -> ys
+ parse "AAAA" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (AAAA rrdata)
- x -> errorBadArgument "parseRecords.rec" x
+ parse "ALIAS" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (ALIAS rrdata)
- getRecordId attrs =
- case maybe Nothing (Just . LBS8.unpack) $ lookup "id" attrs of
- Just ('a':'_':xs) -> Just (LBS8.pack xs)
- _ -> Nothing
+ parse "CNAME" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (CNAME rrdata)
+ parse "MX" [rrid, rrname, rrttl, rraux, rrdata] =
+ RR rrid rrname (read rrttl) (MX (read rraux) rrdata)
+ parse "MX" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (MX 0 rrdata)
-toRecord :: [LBS8.ByteString] -> Record
-toRecord xs = case map LBS8.unpack xs of
- [rrid, rrname, rrttl, "IN", rrtype, rraux, rrdata] ->
- Record rrid rrtype rrname (read rrttl) (read rraux) rrdata
- [rrid, rrname, rrttl, "IN", rrtype, rrdata] ->
- Record rrid rrtype rrname (read rrttl) 0 rrdata
- x ->
- errorBadArgument "toRecord" x
+ parse "NS" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (NS rrdata)
+ parse "NS" [rrid, rrname, rrdata] =
+ RR rrid rrname 0 (NS rrdata)
-dnszone :: LBS8.ByteString -> [Tag LBS8.ByteString]
-dnszone =
- filter p . (dnszoneSoup . parseTags)
- where
- p = \case
- TagOpen "tr" _ -> True
- TagClose "tr" -> True
- TagText x
- | x /= "\n" -> True
- | otherwise -> False
- _ -> False
-
-
-parseRecord :: LBS8.ByteString -> [Tag LBS8.ByteString] -> [LBS8.ByteString]
-parseRecord id_ =
- (id_:) . add_def_ttl "0" . filter p . map f
- where
- f (TagText x) = x
- f x = errorBadArgument "parseRecord" x
- p x
- | x == "\160" = False
- | x == " " = False
- | otherwise = True
+ parse "TXT" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (TXT rrdata)
+
+ parse "SRV" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (SRV rrdata)
- add_def_ttl def_ttl (x:"IN":xs) = x : def_ttl : "IN" : xs
- add_def_ttl _ xs = xs
+ parse x y = errorBadArgument "readRRs.parse" (x, y)
-dnszoneSoup :: [Tag LBS8.ByteString] -> [Tag LBS8.ByteString]
-dnszoneSoup =
- takeWhile (\t -> not $ t ~== dnszoneClose) .
- dropWhile (\t -> not $ t ~== dnszoneOpen)
+
+removeAllWhiteSpace' :: ArrowXml a => a XmlTree XmlTree
+removeAllWhiteSpace' =
+ fromLA $ editNTreeA [isWhiteSpace' :-> Text.XML.HXT.Core.none]
+ -- fromLA $ processBottomUp removeWhiteSpace' -- less efficient
+ where
+ isWhiteSpace' = hasText (all isXmlSpaceChar')
+ isXmlSpaceChar' = (`elem` " \n\t\r\160")
-dnszoneOpen, dnszoneClose :: Tag LBS8.ByteString
-dnszoneOpen = TagOpen "table" [("id", "dnszone")]
-dnszoneClose = TagClose "table"
errorBadArgument :: Show a => String -> a -> b
diff --git a/Regfish/Types.hs b/Regfish/Types.hs
index cef2808..c453947 100644
--- a/Regfish/Types.hs
+++ b/Regfish/Types.hs
@@ -65,10 +65,19 @@ data RFState = RFState
makeLenses ''RFState
+data RR = RR String String Integer RRData
+ deriving (Show)
+
+
data RRData
= A String
- | MX Integer String
+ | AAAA String
+ | ALIAS String
| CNAME String
+ | MX Integer String
+ | NS String
+ | TXT String
+ | SRV String
deriving (Show)