diff options
author | jeschli <jeschli@gmail.com> | 2018-07-17 19:38:46 +0200 |
---|---|---|
committer | jeschli <jeschli@gmail.com> | 2018-07-17 19:38:46 +0200 |
commit | 113a6cb4192693d06651d8e51626c681accfc28b (patch) | |
tree | 80c00b9a158e2b57039ae49799a9f04b1ebc6228 /krebs/5pkgs | |
parent | 8abbda9061c7fc3f04ca580a7435085cab8a8c83 (diff) | |
parent | 74698c904460db569a3c6c8355c0b25a94c98800 (diff) |
Merge remote-tracking branch 'origin/master'
Diffstat (limited to 'krebs/5pkgs')
-rw-r--r-- | krebs/5pkgs/simple/Reaktor/plugins.nix | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/krebs/5pkgs/simple/Reaktor/plugins.nix b/krebs/5pkgs/simple/Reaktor/plugins.nix index 96e0af2d7..db578c457 100644 --- a/krebs/5pkgs/simple/Reaktor/plugins.nix +++ b/krebs/5pkgs/simple/Reaktor/plugins.nix @@ -121,21 +121,26 @@ rec { pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$"; path = with pkgs; [ curl perl ]; script = pkgs.writePython3 "url-title" [ "beautifulsoup4" "lxml" ] '' + import cgi import sys import urllib.request from bs4 import BeautifulSoup try: - soup = BeautifulSoup(urllib.request.urlopen(sys.argv[1]), "lxml") - title = soup.find('title').string - - if title: - if len(title) > 512: - print('message to long, skipped') - elif len(title.split('\n')) > 5: - print('to many lines, skipped') - else: - print(title) + req = urllib.request.Request(sys.argv[1]) + req.add_header('user-agent', 'Reaktor-url-title') + resp = urllib.request.urlopen(req) + if resp.headers['content-type'].find('text/html') >= 0: + soup = BeautifulSoup(resp.read(16000), "lxml") + title = soup.find('title').string + + if len(title.split('\n')) > 5: + title = '\n'.join(title.split('\n')[:5]) + + print(title[:450]) + else: + cd_header = resp.headers['content-disposition'] + print(cgi.parse_header(cd_header)[1]['filename']) except: # noqa: E722 pass ''; |