summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortv <tv@shackspace.de>2014-11-18 23:08:07 +0100
committertv <tv@shackspace.de>2014-11-18 23:08:07 +0100
commit8908c3bfaf10277de632c98e70ea9e988f0c7471 (patch)
tree771f2598739df6faeab6acf07c1815fe78ad4128
parent64ec0daf8937202092e887c80bc86f2e6e54fe7f (diff)
Scrape zone with hxt.
-rw-r--r--Regfish.hs26
-rw-r--r--Regfish/Parser.hs152
-rw-r--r--Regfish/Types.hs11
-rw-r--r--env.nix2
4 files changed, 116 insertions, 75 deletions
diff --git a/Regfish.hs b/Regfish.hs
index 4119d4d..a0c70db 100644
--- a/Regfish.hs
+++ b/Regfish.hs
@@ -14,6 +14,7 @@ module Regfish
, add
, edit
, del
+ , RRData(..)
) where
import Control.Lens
@@ -90,10 +91,21 @@ list =
-- TODO login and try again (once)
error "not logged in"
else do
- mapM_ pr $ readZone $ body
+ mapM_ pr =<< readZoneIO body
where
- pr (Record rrid rrtype rrname rrttl rraux rrdata) = do
- printf "%-8s %-24s\t%s\tIN\t%-5s\t%s\n"
+ pr (RR rrid rrname rrttl x) = case x of
+ A rrdata -> f "A" rrid rrname rrttl 0 rrdata
+ AAAA rrdata -> f "AAAA" rrid rrname rrttl 0 rrdata
+ ALIAS rrdata -> f "ALIAS" rrid rrname rrttl 0 rrdata
+ CNAME rrdata -> f "CNAME" rrid rrname rrttl 0 rrdata
+ MX rraux rrdata -> f "MX" rrid rrname rrttl rraux rrdata
+ NS rrdata -> f "NS" rrid rrname rrttl 0 rrdata
+ TXT rrdata -> f "TXT" rrid rrname rrttl 0 rrdata
+ SRV rrdata -> f "SRV" rrid rrname rrttl 0 rrdata
+
+ f :: String -> String -> String -> Integer -> Integer -> String -> IO ()
+ f rrtype rrid rrname rrttl rraux rrdata =
+ printf "%-8s %-24s\t%s\tin\t%-5s\t%s\n"
rrid
rrname
(if rrttl /= 0 then show rrttl else mempty)
@@ -177,8 +189,14 @@ isLoginPage =
undata :: RRData -> (String -> Integer -> String -> a) -> a
undata (A rrdata) f = f "A" 0 rrdata
-undata (MX rraux rrdata) f = f "MX" rraux rrdata
+undata (AAAA rrdata) f = f "AAAA" 0 rrdata
+undata (ALIAS rrdata) f = f "ALIAS" 0 rrdata
undata (CNAME rrdata) f = f "CNAME" 0 rrdata
+undata (MX rraux rrdata) f = f "MX" rraux rrdata
+undata (NS rrdata) f = f "NS" 0 rrdata
+undata (TXT rrdata) f = f "TXT" 0 rrdata
+undata (SRV rrdata) f = f "SRV" 0 rrdata
+
add :: RRname -> RRttl -> RRData -> IO (Either String (W.Response LBS8.ByteString))
add rrname rrttl rrdata =
diff --git a/Regfish/Parser.hs b/Regfish/Parser.hs
index bc294b6..a1af5a5 100644
--- a/Regfish/Parser.hs
+++ b/Regfish/Parser.hs
@@ -3,96 +3,110 @@
module Regfish.Parser
- ( readZone
+ ( readZoneIO
) where
+import Control.Applicative
+import qualified Data.List as L
import qualified Data.ByteString.Lazy.Char8 as LBS8
import Regfish.Types
-import Text.HTML.TagSoup
+import Text.HandsomeSoup
+import Text.XML.HXT.Core
+
+
+normalizeId :: String -> String
+normalizeId = \case
+ 'a':'_':xs -> xs
+ 'r':'r':'_':xs -> drop 1 . dropWhile (/='_') $ xs
+ xs -> xs
+
+
+normalizeRR :: IOSLA (XIOState ()) XmlTree XmlTree
+normalizeRR =
+ processAttrl (changeAttrValue normalizeId `when` hasName "id")
+ <<< (filterA $ neg $ hasAttr "nowrap")
+ <<< (removeAttr "style")
+
+
+readZoneIO :: LBS8.ByteString -> IO [RR]
+readZoneIO b =
+ concat <$> mapM (flip readRRs $ LBS8.unpack b)
+ [ "A"
+ , "AAAA"
+ , "ALIAS"
+ , "CNAME"
+ , "MX"
+ , "NS"
+ , "TXT"
+ , "SRV"
+ ]
+
+
+readRRs :: String -> String -> IO [RR]
+readRRs rrtype s = do
+ let --body = LBS8.unpack b -- $ r ^. W.responseBody
+ doc = parseHtml s
+ t = processTopDown normalizeRR
+ <<< removeAllWhiteSpace'
+ <<< css ("." ++ rrtype)
+ <<< doc
+
+ -- t also contains <td>IN</td> and <td id="type">TXT</td>
+ rrid <- runX $ getAttrValue "id" <<< t
+ rrname <- runX $ css "#name" //> getText <<< t
+ rrttl <- runX $ css "#ttl" //> getText <<< t
+ rraux <- runX $ css "#aux" //> getText <<< t
+ rrdata <- runX $ css "#data" //> getText <<< t
+ return $ map (parse rrtype) $
+ L.transpose [rrid, rrname, rrttl, rraux, rrdata]
-
-readZone :: LBS8.ByteString -> [Record]
-readZone = parseRecords . dnszone
-
-
-parseRecords :: [Tag LBS8.ByteString] -> [Record]
-parseRecords =
- map toRecord . filter knownRecord . rec []
where
- knownRecord (_ : _ : _ : namespace : type_ : _) =
- namespace == "IN" && type_ `elem` ["MX", "A", "CNAME"]
- knownRecord x =
- errorBadArgument "parseRecords.knownRecord" x
+ -- TODO note that "0"-values should be different
- rec ys = \case
- (TagOpen "tr" attrs:xs) ->
- case getRecordId attrs of
- Just id_ ->
- let (r,_:xs') = span (/=TagClose "tr") xs
- in rec (parseRecord id_ r:ys) xs'
- _ ->
- let xs' = drop 1 $ dropWhile (/=TagClose "tr") xs
- in rec ys xs'
+ parse "A" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (A rrdata)
- [] -> ys
+ parse "AAAA" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (AAAA rrdata)
- x -> errorBadArgument "parseRecords.rec" x
+ parse "ALIAS" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (ALIAS rrdata)
- getRecordId attrs =
- case maybe Nothing (Just . LBS8.unpack) $ lookup "id" attrs of
- Just ('a':'_':xs) -> Just (LBS8.pack xs)
- _ -> Nothing
+ parse "CNAME" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (CNAME rrdata)
+ parse "MX" [rrid, rrname, rrttl, rraux, rrdata] =
+ RR rrid rrname (read rrttl) (MX (read rraux) rrdata)
+ parse "MX" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (MX 0 rrdata)
-toRecord :: [LBS8.ByteString] -> Record
-toRecord xs = case map LBS8.unpack xs of
- [rrid, rrname, rrttl, "IN", rrtype, rraux, rrdata] ->
- Record rrid rrtype rrname (read rrttl) (read rraux) rrdata
- [rrid, rrname, rrttl, "IN", rrtype, rrdata] ->
- Record rrid rrtype rrname (read rrttl) 0 rrdata
- x ->
- errorBadArgument "toRecord" x
+ parse "NS" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (NS rrdata)
+ parse "NS" [rrid, rrname, rrdata] =
+ RR rrid rrname 0 (NS rrdata)
-dnszone :: LBS8.ByteString -> [Tag LBS8.ByteString]
-dnszone =
- filter p . (dnszoneSoup . parseTags)
- where
- p = \case
- TagOpen "tr" _ -> True
- TagClose "tr" -> True
- TagText x
- | x /= "\n" -> True
- | otherwise -> False
- _ -> False
-
-
-parseRecord :: LBS8.ByteString -> [Tag LBS8.ByteString] -> [LBS8.ByteString]
-parseRecord id_ =
- (id_:) . add_def_ttl "0" . filter p . map f
- where
- f (TagText x) = x
- f x = errorBadArgument "parseRecord" x
- p x
- | x == "\160" = False
- | x == " " = False
- | otherwise = True
+ parse "TXT" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (TXT rrdata)
+
+ parse "SRV" [rrid, rrname, rrttl, rrdata] =
+ RR rrid rrname (read rrttl) (SRV rrdata)
- add_def_ttl def_ttl (x:"IN":xs) = x : def_ttl : "IN" : xs
- add_def_ttl _ xs = xs
+ parse x y = errorBadArgument "readRRs.parse" (x, y)
-dnszoneSoup :: [Tag LBS8.ByteString] -> [Tag LBS8.ByteString]
-dnszoneSoup =
- takeWhile (\t -> not $ t ~== dnszoneClose) .
- dropWhile (\t -> not $ t ~== dnszoneOpen)
+
+removeAllWhiteSpace' :: ArrowXml a => a XmlTree XmlTree
+removeAllWhiteSpace' =
+ fromLA $ editNTreeA [isWhiteSpace' :-> Text.XML.HXT.Core.none]
+ -- fromLA $ processBottomUp removeWhiteSpace' -- less efficient
+ where
+ isWhiteSpace' = hasText (all isXmlSpaceChar')
+ isXmlSpaceChar' = (`elem` " \n\t\r\160")
-dnszoneOpen, dnszoneClose :: Tag LBS8.ByteString
-dnszoneOpen = TagOpen "table" [("id", "dnszone")]
-dnszoneClose = TagClose "table"
errorBadArgument :: Show a => String -> a -> b
diff --git a/Regfish/Types.hs b/Regfish/Types.hs
index cef2808..c453947 100644
--- a/Regfish/Types.hs
+++ b/Regfish/Types.hs
@@ -65,10 +65,19 @@ data RFState = RFState
makeLenses ''RFState
+data RR = RR String String Integer RRData
+ deriving (Show)
+
+
data RRData
= A String
- | MX Integer String
+ | AAAA String
+ | ALIAS String
| CNAME String
+ | MX Integer String
+ | NS String
+ | TXT String
+ | SRV String
deriving (Show)
diff --git a/env.nix b/env.nix
index f0ea840..812bea7 100644
--- a/env.nix
+++ b/env.nix
@@ -22,7 +22,7 @@ let
[
acidState
cabalInstall
- tagsoup
+ HandsomeSoup
wreq
]
);