diff options
author | makefu <github@syntax-fehler.de> | 2022-05-02 22:25:19 +0200 |
---|---|---|
committer | makefu <github@syntax-fehler.de> | 2022-05-02 22:57:31 +0200 |
commit | 5187d0ac208deb06eff3bafb7ffd2fc32286b46a (patch) | |
tree | 17e595c8c9aecb5b3d21f07bdee4527af0b9a4f9 /makefu/2configs | |
parent | bdd36774f5e0854553b13433ef85260c6c074b3e (diff) |
ma rss: deploy ratt job
Diffstat (limited to 'makefu/2configs')
-rw-r--r-- | makefu/2configs/deployment/rss/ebk.yml | 59 | ||||
-rwxr-xr-x | makefu/2configs/deployment/rss/ratt-hourly.sh | 28 | ||||
-rw-r--r-- | makefu/2configs/deployment/rss/ratt.nix | 26 | ||||
-rw-r--r-- | makefu/2configs/deployment/rss/rss.euer.krebsco.de.nix (renamed from makefu/2configs/deployment/rss.euer.krebsco.de.nix) | 6 | ||||
-rw-r--r-- | makefu/2configs/deployment/rss/urls | 5 |
5 files changed, 124 insertions, 0 deletions
diff --git a/makefu/2configs/deployment/rss/ebk.yml b/makefu/2configs/deployment/rss/ebk.yml new file mode 100644 index 000000000..3248f5c4e --- /dev/null +++ b/makefu/2configs/deployment/rss/ebk.yml @@ -0,0 +1,59 @@ +regex: https://www.ebay\-kleinanzeigen.de/s\-.* +selectors: + httpsettings: + cookie: {} + header: {} + useragent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) + Chrome/90.0.4430.72 Safari/537.36 + insecure: false + feed: + title: title + authorname: "" + authoremail: "" + item: + container: ul[id='srchrslt-adtable'] li[class='ad-listitem lazyload-item '] + title: | + title = sel:find("h2.text-module-begin"):first():text():gsub("^%s*(.-)%s*$", "%1") + print(title) + link: | + link = sel:find("a"):first():attr("href") + print("https://www.ebay-kleinanzeigen.de" .. link) + created: |- + created = "" + sel:find("div.aditem-main--top--right"):each(function(i, s) + created = s:text():gsub("^%s*(.-)%s*$", "%1") + end) + if created:match("Heute") then + time = created:gsub("^.*,", "") + print(os.date("%d.%m.%Y") .. time .. " CET") + return + end + if created:match("Gestern") then + time = created:gsub("^.*,", "") + print(os.date("%d.%m.%Y", os.time()-24*60*60) .. time .. " CET") + return + end + if created:match("\.") then + print(created .. " 00:00 CET") + return + end + createdformat: 02.01.2006 15:04 MST + description: |- + description = sel:find(".aditem-main--middle"):html() + place = sel:find(".aditem-main--top--left"):html() + print(description .. place) + content: "" + image: | + img = sel:find("div.imagebox"):first():attr("data-imgsrc") + if img ~= "" then + -- prepend host if needed + if not(img:match("https*:\/\/.*")) then + img = "https://www.ebay-kleinanzeigen.de" .. img + end + print(img) + end + nextpage: | + nextpage = sel:find("link[rel=next]"):attr("href") + print("https://www.ebay-kleinanzeigen.de" .. nextpage) + nextpagecount: 5 + sort: "" diff --git a/makefu/2configs/deployment/rss/ratt-hourly.sh b/makefu/2configs/deployment/rss/ratt-hourly.sh new file mode 100755 index 000000000..67f2529bd --- /dev/null +++ b/makefu/2configs/deployment/rss/ratt-hourly.sh @@ -0,0 +1,28 @@ +#!/bin/sh +set -eu +URLS=${1?must provide URLS file} +OUTFILE=${2:-all.xml} + +echo "init, writing to $OUTFILE" + +cat > "$OUTFILE" <<EOF +<?xml version="1.0" encoding="UTF-8"?> +<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/"> + <channel> + <title>makefu Ebay Kleinanzeigen</title> + <link>https://www.ebay-kleinanzeigen.de/</link> + <description>Feed for all kleinanzeigen</description> + <pubDate>$(date '+%a, %d %b %Y %H:%M:%S %z')</pubDate> +EOF +echo "looping through $URLS" +cat "$URLS" | while read line;do + echo "fetching $line" + ratt auto "$line" | \ + xmlstarlet sel -t -c "//item" >> "$OUTFILE" || : +done + +echo "close" +cat >> "$OUTFILE" <<EOF + </channel> +</rss> +EOF diff --git a/makefu/2configs/deployment/rss/ratt.nix b/makefu/2configs/deployment/rss/ratt.nix new file mode 100644 index 000000000..b794d9201 --- /dev/null +++ b/makefu/2configs/deployment/rss/ratt.nix @@ -0,0 +1,26 @@ +{ pkgs, lib, config, ... }: +let + fqdn = "rss.euer.krebsco.de"; + ratt-path = "/var/lib/ratt/"; + out-path = "${ratt-path}/all.xml"; +in { + systemd.tmpfiles.rules = ["d ${ratt-path} 0750 nginx nginx - -" ]; + systemd.services.run-ratt = { + enable = true; + path = with pkgs; [ "/nix/store/vhmzblnaav2lp4lwqdgm13l55qlm79mk-ratt-unstable-2022-01-11" xmlstarlet ]; + script = builtins.readFile ./ratt-hourly.sh; + scriptArgs = "${./urls} ${out-path}"; + + preStart = "install -v -m750 ${./ebk.yml} ${ratt-path}/ebk.yml"; # ratt requires the config file in the cwd + serviceConfig.User = "nginx"; + serviceConfig.WorkingDirectory= ratt-path; + startAt = "00/3:07"; # every 3 hours, fetch latest + }; + + services.nginx.virtualHosts."${fqdn}" = { + locations."=/ratt/all.xml" = { + alias = out-path; + }; + }; +} + diff --git a/makefu/2configs/deployment/rss.euer.krebsco.de.nix b/makefu/2configs/deployment/rss/rss.euer.krebsco.de.nix index 19f20f50f..e64a69d9c 100644 --- a/makefu/2configs/deployment/rss.euer.krebsco.de.nix +++ b/makefu/2configs/deployment/rss/rss.euer.krebsco.de.nix @@ -1,7 +1,9 @@ { pkgs, lib, config, ... }: let fqdn = "rss.euer.krebsco.de"; + ratt-path = "/var/lib/ratt/"; in { + systemd.tmpfiles.rules = ["d ${ratt-path} 0750 nginx nginx - -" ]; services.tt-rss = { enable = true; virtualHost = fqdn; @@ -19,6 +21,10 @@ in { services.nginx.virtualHosts."${fqdn}" = { enableACME = true; forceSSL = true; + locations."/ratt/" = { + alias = ratt-path; + extraConfig = "autoindex on;"; + }; }; } diff --git a/makefu/2configs/deployment/rss/urls b/makefu/2configs/deployment/rss/urls new file mode 100644 index 000000000..12d4c092a --- /dev/null +++ b/makefu/2configs/deployment/rss/urls @@ -0,0 +1,5 @@ +https://www.ebay-kleinanzeigen.de/s-muehlhausen/preis:0:45/duplo-eisenbahn/k0l9313r5 +https://www.ebay-kleinanzeigen.de/s-heimwerken/nein/muehlhausen/bohrmaschine/k0c84l9313r5+heimwerken.versand_s:nein +https://www.ebay-kleinanzeigen.de/s-stuttgart/zigbee/k0l9280 +https://www.ebay-kleinanzeigen.de/s-stuttgart/ikea-tradfri-fernbedienung/k0l9280 +https://www.ebay-kleinanzeigen.de/s-70378/d%C3%B6rrautomat/k0l9334r5 |