From 8908c3bfaf10277de632c98e70ea9e988f0c7471 Mon Sep 17 00:00:00 2001 From: tv Date: Tue, 18 Nov 2014 23:08:07 +0100 Subject: Scrape zone with hxt. --- Regfish.hs | 26 ++++++++-- Regfish/Parser.hs | 152 +++++++++++++++++++++++++++++------------------------- Regfish/Types.hs | 11 +++- env.nix | 2 +- 4 files changed, 116 insertions(+), 75 deletions(-) diff --git a/Regfish.hs b/Regfish.hs index 4119d4d..a0c70db 100644 --- a/Regfish.hs +++ b/Regfish.hs @@ -14,6 +14,7 @@ module Regfish , add , edit , del + , RRData(..) ) where import Control.Lens @@ -90,10 +91,21 @@ list = -- TODO login and try again (once) error "not logged in" else do - mapM_ pr $ readZone $ body + mapM_ pr =<< readZoneIO body where - pr (Record rrid rrtype rrname rrttl rraux rrdata) = do - printf "%-8s %-24s\t%s\tIN\t%-5s\t%s\n" + pr (RR rrid rrname rrttl x) = case x of + A rrdata -> f "A" rrid rrname rrttl 0 rrdata + AAAA rrdata -> f "AAAA" rrid rrname rrttl 0 rrdata + ALIAS rrdata -> f "ALIAS" rrid rrname rrttl 0 rrdata + CNAME rrdata -> f "CNAME" rrid rrname rrttl 0 rrdata + MX rraux rrdata -> f "MX" rrid rrname rrttl rraux rrdata + NS rrdata -> f "NS" rrid rrname rrttl 0 rrdata + TXT rrdata -> f "TXT" rrid rrname rrttl 0 rrdata + SRV rrdata -> f "SRV" rrid rrname rrttl 0 rrdata + + f :: String -> String -> String -> Integer -> Integer -> String -> IO () + f rrtype rrid rrname rrttl rraux rrdata = + printf "%-8s %-24s\t%s\tin\t%-5s\t%s\n" rrid rrname (if rrttl /= 0 then show rrttl else mempty) @@ -177,8 +189,14 @@ isLoginPage = undata :: RRData -> (String -> Integer -> String -> a) -> a undata (A rrdata) f = f "A" 0 rrdata -undata (MX rraux rrdata) f = f "MX" rraux rrdata +undata (AAAA rrdata) f = f "AAAA" 0 rrdata +undata (ALIAS rrdata) f = f "ALIAS" 0 rrdata undata (CNAME rrdata) f = f "CNAME" 0 rrdata +undata (MX rraux rrdata) f = f "MX" rraux rrdata +undata (NS rrdata) f = f "NS" 0 rrdata +undata (TXT rrdata) f = f "TXT" 0 rrdata +undata (SRV rrdata) f = f "SRV" 0 rrdata + add :: RRname -> RRttl -> RRData -> IO (Either String (W.Response LBS8.ByteString)) add rrname rrttl rrdata = diff --git a/Regfish/Parser.hs b/Regfish/Parser.hs index bc294b6..a1af5a5 100644 --- a/Regfish/Parser.hs +++ b/Regfish/Parser.hs @@ -3,96 +3,110 @@ module Regfish.Parser - ( readZone + ( readZoneIO ) where +import Control.Applicative +import qualified Data.List as L import qualified Data.ByteString.Lazy.Char8 as LBS8 import Regfish.Types -import Text.HTML.TagSoup +import Text.HandsomeSoup +import Text.XML.HXT.Core + + +normalizeId :: String -> String +normalizeId = \case + 'a':'_':xs -> xs + 'r':'r':'_':xs -> drop 1 . dropWhile (/='_') $ xs + xs -> xs + + +normalizeRR :: IOSLA (XIOState ()) XmlTree XmlTree +normalizeRR = + processAttrl (changeAttrValue normalizeId `when` hasName "id") + <<< (filterA $ neg $ hasAttr "nowrap") + <<< (removeAttr "style") + + +readZoneIO :: LBS8.ByteString -> IO [RR] +readZoneIO b = + concat <$> mapM (flip readRRs $ LBS8.unpack b) + [ "A" + , "AAAA" + , "ALIAS" + , "CNAME" + , "MX" + , "NS" + , "TXT" + , "SRV" + ] + + +readRRs :: String -> String -> IO [RR] +readRRs rrtype s = do + let --body = LBS8.unpack b -- $ r ^. W.responseBody + doc = parseHtml s + t = processTopDown normalizeRR + <<< removeAllWhiteSpace' + <<< css ("." ++ rrtype) + <<< doc + + -- t also contains IN and TXT + rrid <- runX $ getAttrValue "id" <<< t + rrname <- runX $ css "#name" //> getText <<< t + rrttl <- runX $ css "#ttl" //> getText <<< t + rraux <- runX $ css "#aux" //> getText <<< t + rrdata <- runX $ css "#data" //> getText <<< t + return $ map (parse rrtype) $ + L.transpose [rrid, rrname, rrttl, rraux, rrdata] - -readZone :: LBS8.ByteString -> [Record] -readZone = parseRecords . dnszone - - -parseRecords :: [Tag LBS8.ByteString] -> [Record] -parseRecords = - map toRecord . filter knownRecord . rec [] where - knownRecord (_ : _ : _ : namespace : type_ : _) = - namespace == "IN" && type_ `elem` ["MX", "A", "CNAME"] - knownRecord x = - errorBadArgument "parseRecords.knownRecord" x + -- TODO note that "0"-values should be different - rec ys = \case - (TagOpen "tr" attrs:xs) -> - case getRecordId attrs of - Just id_ -> - let (r,_:xs') = span (/=TagClose "tr") xs - in rec (parseRecord id_ r:ys) xs' - _ -> - let xs' = drop 1 $ dropWhile (/=TagClose "tr") xs - in rec ys xs' + parse "A" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (A rrdata) - [] -> ys + parse "AAAA" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (AAAA rrdata) - x -> errorBadArgument "parseRecords.rec" x + parse "ALIAS" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (ALIAS rrdata) - getRecordId attrs = - case maybe Nothing (Just . LBS8.unpack) $ lookup "id" attrs of - Just ('a':'_':xs) -> Just (LBS8.pack xs) - _ -> Nothing + parse "CNAME" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (CNAME rrdata) + parse "MX" [rrid, rrname, rrttl, rraux, rrdata] = + RR rrid rrname (read rrttl) (MX (read rraux) rrdata) + parse "MX" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (MX 0 rrdata) -toRecord :: [LBS8.ByteString] -> Record -toRecord xs = case map LBS8.unpack xs of - [rrid, rrname, rrttl, "IN", rrtype, rraux, rrdata] -> - Record rrid rrtype rrname (read rrttl) (read rraux) rrdata - [rrid, rrname, rrttl, "IN", rrtype, rrdata] -> - Record rrid rrtype rrname (read rrttl) 0 rrdata - x -> - errorBadArgument "toRecord" x + parse "NS" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (NS rrdata) + parse "NS" [rrid, rrname, rrdata] = + RR rrid rrname 0 (NS rrdata) -dnszone :: LBS8.ByteString -> [Tag LBS8.ByteString] -dnszone = - filter p . (dnszoneSoup . parseTags) - where - p = \case - TagOpen "tr" _ -> True - TagClose "tr" -> True - TagText x - | x /= "\n" -> True - | otherwise -> False - _ -> False - - -parseRecord :: LBS8.ByteString -> [Tag LBS8.ByteString] -> [LBS8.ByteString] -parseRecord id_ = - (id_:) . add_def_ttl "0" . filter p . map f - where - f (TagText x) = x - f x = errorBadArgument "parseRecord" x - p x - | x == "\160" = False - | x == " " = False - | otherwise = True + parse "TXT" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (TXT rrdata) + + parse "SRV" [rrid, rrname, rrttl, rrdata] = + RR rrid rrname (read rrttl) (SRV rrdata) - add_def_ttl def_ttl (x:"IN":xs) = x : def_ttl : "IN" : xs - add_def_ttl _ xs = xs + parse x y = errorBadArgument "readRRs.parse" (x, y) -dnszoneSoup :: [Tag LBS8.ByteString] -> [Tag LBS8.ByteString] -dnszoneSoup = - takeWhile (\t -> not $ t ~== dnszoneClose) . - dropWhile (\t -> not $ t ~== dnszoneOpen) + +removeAllWhiteSpace' :: ArrowXml a => a XmlTree XmlTree +removeAllWhiteSpace' = + fromLA $ editNTreeA [isWhiteSpace' :-> Text.XML.HXT.Core.none] + -- fromLA $ processBottomUp removeWhiteSpace' -- less efficient + where + isWhiteSpace' = hasText (all isXmlSpaceChar') + isXmlSpaceChar' = (`elem` " \n\t\r\160") -dnszoneOpen, dnszoneClose :: Tag LBS8.ByteString -dnszoneOpen = TagOpen "table" [("id", "dnszone")] -dnszoneClose = TagClose "table" errorBadArgument :: Show a => String -> a -> b diff --git a/Regfish/Types.hs b/Regfish/Types.hs index cef2808..c453947 100644 --- a/Regfish/Types.hs +++ b/Regfish/Types.hs @@ -65,10 +65,19 @@ data RFState = RFState makeLenses ''RFState +data RR = RR String String Integer RRData + deriving (Show) + + data RRData = A String - | MX Integer String + | AAAA String + | ALIAS String | CNAME String + | MX Integer String + | NS String + | TXT String + | SRV String deriving (Show) diff --git a/env.nix b/env.nix index f0ea840..812bea7 100644 --- a/env.nix +++ b/env.nix @@ -22,7 +22,7 @@ let [ acidState cabalInstall - tagsoup + HandsomeSoup wreq ] ); -- cgit v1.2.3