summaryrefslogtreecommitdiffstats
path: root/krebs/5pkgs/simple
diff options
context:
space:
mode:
authorjeschli <jeschli@gmail.com>2018-07-17 19:38:46 +0200
committerjeschli <jeschli@gmail.com>2018-07-17 19:38:46 +0200
commit113a6cb4192693d06651d8e51626c681accfc28b (patch)
tree80c00b9a158e2b57039ae49799a9f04b1ebc6228 /krebs/5pkgs/simple
parent8abbda9061c7fc3f04ca580a7435085cab8a8c83 (diff)
parent74698c904460db569a3c6c8355c0b25a94c98800 (diff)
Merge remote-tracking branch 'origin/master'
Diffstat (limited to 'krebs/5pkgs/simple')
-rw-r--r--krebs/5pkgs/simple/Reaktor/plugins.nix25
1 files changed, 15 insertions, 10 deletions
diff --git a/krebs/5pkgs/simple/Reaktor/plugins.nix b/krebs/5pkgs/simple/Reaktor/plugins.nix
index 96e0af2d7..db578c457 100644
--- a/krebs/5pkgs/simple/Reaktor/plugins.nix
+++ b/krebs/5pkgs/simple/Reaktor/plugins.nix
@@ -121,21 +121,26 @@ rec {
pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$";
path = with pkgs; [ curl perl ];
script = pkgs.writePython3 "url-title" [ "beautifulsoup4" "lxml" ] ''
+ import cgi
import sys
import urllib.request
from bs4 import BeautifulSoup
try:
- soup = BeautifulSoup(urllib.request.urlopen(sys.argv[1]), "lxml")
- title = soup.find('title').string
-
- if title:
- if len(title) > 512:
- print('message to long, skipped')
- elif len(title.split('\n')) > 5:
- print('to many lines, skipped')
- else:
- print(title)
+ req = urllib.request.Request(sys.argv[1])
+ req.add_header('user-agent', 'Reaktor-url-title')
+ resp = urllib.request.urlopen(req)
+ if resp.headers['content-type'].find('text/html') >= 0:
+ soup = BeautifulSoup(resp.read(16000), "lxml")
+ title = soup.find('title').string
+
+ if len(title.split('\n')) > 5:
+ title = '\n'.join(title.split('\n')[:5])
+
+ print(title[:450])
+ else:
+ cd_header = resp.headers['content-disposition']
+ print(cgi.parse_header(cd_header)[1]['filename'])
except: # noqa: E722
pass
'';