rename knn to news :(

author: lassulus <lassulus@googlemail.com> 2014-01-10 00:03:38 +0100
committer: lassulus <lassulus@googlemail.com> 2014-01-10 00:03:38 +0100
commit: 4f7c7241e3e5bf0203269d1fa6298c85ab952db1 (patch)
tree: 4087f4292d418e63f815a89755fcef1e84082769 /news
parent: 55f6d44e551289a336bb922cc7c3fe70a5100774 (diff)
5 files changed, 388 insertions, 0 deletions
diff --git a/news/GfindFeeds4bot b/news/GfindFeeds4bot
new file mode 100755
index 00000000..a5439c5a
--- /dev/null
+++ b/news/GfindFeeds4bot
@@ -0,0 +1,17 @@
+#! /bin/sh
+# usage: GfindFeeds4bot QUERY
+set -euf
+
+export query="$1"
+export data="$(
+  curl -sS "https://www.google.com/uds/GfindFeeds?v=1.0&q=$query"
+)"
+
+node <<EOF
+  query = process.env.query
+  data = JSON.parse(process.env.data)
+
+  data.responseData.entries.forEach(function (entry, index) {
+    console.log(query + index + '|' + entry.url)
+  })
+EOF
diff --git a/news/controller.py b/news/controller.py
new file mode 100755
index 00000000..5277d626
--- /dev/null
+++ b/news/controller.py
@@ -0,0 +1,143 @@
+from time import sleep
+import irc.bot
+import _thread
+import rssbot
+import os
+import subprocess
+
+class NewsBot(irc.bot.SingleServerIRCBot):
+    def __init__(self, name, chans=['#news'], server='ire', port=6667, timeout=60):
+        irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], name, name)
+        self.name = name
+        self.server = server
+        self.port = port
+        self.chans = chans
+        self.to = timeout
+
+    def start(self):
+        self.bot = _thread.start_new_thread(irc.bot.SingleServerIRCBot.start, (self,))
+
+    def on_welcome(self, connection, event):
+        for chan in self.chans:
+            connection.join(chan)
+
+    def send(self, target, string):
+        for line in string.split('\n'):
+            self.connection.privmsg(target, line)
+            sleep(1)
+
+    def on_privmsg(self, connection, event):
+        args_array = event.arguments[0].split()
+        answer = self.read_message(args_array)
+        self.send(event.source.nick, answer)
+
+    def on_pubmsg(self, connection, event):
+        args_array = event.arguments[0].split()
+        if len(args_array[0]) > 0 and args_array[0][:-1]==self.name:
+            answer = self.read_message(args_array[1:])
+            self.send(event.target, answer)
+
+    def on_invite(self, connection, event):
+        for chan in event.arguments:
+            connection.join(chan)
+
+    def read_message(self, args):
+        try:
+            if args[0] in [x for x in commands.__dict__.keys() if x.find('_')]:
+                func = getattr(commands, args[0])
+                return func(args)
+            else:
+                return 'command not found'
+        except:
+            return "mimimimi"
+
+
+
+class commands():
+    def add(args): 
+        bot = rssbot.RssBot(args[2], args[1], url_shortener=url_shortener)
+        bots[args[1]] = bot
+        bot.start()
+        return "bot " + args[1] + " added"
+
+    def delete(args):
+        bots[args[1]].stop()
+        del bots[args[1]]
+        return "bot " + args[1] + " deleted"
+
+    def rename(args):
+        if args[1] in bots:
+            if args[2] in bots:
+                return args[2] + ' already taken'
+            else:
+                bots[args[1]].connection.nick(args[2])
+                bots[args[1]].name = args[2]
+                bots[args[2]] = bots[args[1]]
+                del bots[args[1]]
+                return 'renamed ' + args[1] + ' in ' + args[2]
+        else:
+            return args[1] + ' does not exist'
+
+    def save(args):
+        output_buffer = ''
+        for bot in bots:
+            if bot.loop:
+                output_buffer += bot + '|' + bots[bot].url + '|' + ' '.join(bots[bot].channels) + '\n'
+
+        F = open(feedfile, "w")
+        F.writelines(output_buffer)
+        F.close()
+
+        return "bots saved to " + feedfile
+
+    def caps(args):
+        return ' '.join([x for x in commands.__dict__.keys() if x.find('_')])
+
+    def list(args):
+        output_buffer = ''
+        for bot in bots:
+            output_buffer += bot + ' url: ' + bots[bot].url + '\n'
+        return output_buffer
+
+    def info(args):
+        if args[1] in bots:
+            output_buffer = ''
+            for data in ['title', 'link', 'updated']:
+                if data in bots[args[1]].feed.feed:
+                    output_buffer += data + ': ' + bots[args[1]].feed.feed[data] + '\n'
+            output_buffer += 'lastnew: ' + bots[args[1]].lastnew.isoformat()
+            return output_buffer
+        else:
+            return 'bot not found'
+
+    def search(args):
+        output = subprocess.check_output(['./GfindFeeds4bot', args[1]]).decode()
+        return output
+
+feedfile = 'new_feeds'
+url_shortener = 'http://wall'
+init_channels = ['#news']
+
+if 'FEEDFILE' in os.environ:
+    feedfile = os.environ['FEEDFILE']
+
+if 'URLSHORT' in os.environ:
+    url_shortener = os.environ['URLSHORT']
+
+bots = {}
+knews = NewsBot('knews')
+
+#config file reading
+F = open(feedfile, "r")
+lines = F.readlines()
+F.close()
+
+for line in lines:
+    line = line.strip('\n')
+    linear = line.split('|')
+    bot = rssbot.RssBot(linear[1], linear[0], init_channels + linear[2].split(), url_shortener)
+    bot.start()
+    bots[linear[0]] = bot
+
+knews.start()
+
diff --git a/news/feeds b/news/feeds
new file mode 100644
index 00000000..50fe0667
--- /dev/null
+++ b/news/feeds
@@ -0,0 +1,2 @@
+HN|http://news.ycombinator.com/rss
+Fefe|http://blog.fefe.de/rss.xml
diff --git a/news/new_feeds b/news/new_feeds
new file mode 100644
index 00000000..97f2f762
--- /dev/null
+++ b/news/new_feeds
@@ -0,0 +1,110 @@
+faz_feui|http://www.faz.net/rss/aktuell/feuilleton/|#news
+catholic_news|http://feeds.feedburner.com/catholicnewsagency/dailynews|#news
+lisp|http://planet.lisp.org/rss20.xml|#news
+sciencemag|http://news.sciencemag.org/rss/current.xml|#news
+weechat|http://dev.weechat.org/feed/atom|#news
+nnewsg|http://www.net-news-global.net/rss/rssfeed.xml|#news
+ccc|http://www.ccc.de/rss/updates.rdf|#news
+danisch|http://www.danisch.de/blog/feed/|#news
+tinc|http://tinc-vpn.org/news/index.rss|#news
+ft_india|http://www.ft.com/rss/home/india|#news
+nasa_news|http://www.nasa-usa.de/rss/dyn/breaking_news.rss|#news
+GerForPol|http://www.german-foreign-policy.com/de/news/rss-2.0|#news
+un_me|http://www.un.org/apps/news/rss/rss_mideast.asp|#news
+shz_news|http://www.shz.de/nachrichten/newsticker/rss|#news
+wp_world|http://feeds.washingtonpost.com/rss/rss_blogpost|#news
+fbi_nat_press|http://www.fbi.gov/news/rss|#news
+dwn|http://deutsche-wirtschafts-nachrichten.de/feed/customfeed/|#news
+faz_politik|http://www.faz.net/rss/aktuell/politik/|#news
+spiegel_top|http://www.spiegel.de/schlagzeilen/tops/index.rss|#news
+presseportal|http://www.presseportal.de/rss/presseportal.rss2|#news
+telegraph_uk|http://www.telegraph.co.uk/news/uknews/rss|#news
+LtU|http://lambda-the-ultimate.org/rss.xml|#news
+vimperator|https://sites.google.com/a/vimperator.org/www/blog/posts.xml|#news
+schallurauch|http://feeds.feedburner.com/SchallUndRauch|#news
+add|kernel|#news
+anon|http://anoninsiders.net/feed/|#news
+4chan_status|http://status.4chan.org/feeds/posts/default?alt=rss|
+nytimes|http://rss.nytimes.com/services/xml/rss/nyt/World.xml|#news
+bbc|http://feeds.bbci.co.uk/news/rss.xml|#news
+bild|http://rss.bild.de/bild.xml|#news
+reddit_world|http://www.reddit.com/r/worldnews/.rss|#news
+fbi|http://www.fbi.gov/homepage/RSS|#news
+reddit_sci|http://www.reddit.com/r/science/.rss|#news
+geheimorganisation|http://geheimorganisation.org/feed/|#news
+eia_press|http://www.eia.gov/rss/press_rss.xml|#news
+nsa|http://www.nsa.gov/rss.shtml|#news
+travel_warnings|http://feeds.travel.state.gov/ca/travelwarnings-alerts|#news
+tigsource|http://www.tigsource.com/feed/|#news
+un_top|http://www.un.org/apps/news/rss/rss_top.asp|#news
+archlinux|http://www.archlinux.org/feeds/news/|#news
+sec-db|http://feeds.security-database.com/SecurityDatabaseToolsWatch|#news
+coinspotting|http://coinspotting.com/rss|#news
+fefe|http://blog.fefe.de/rss.xml|#news
+embargowatch|https://embargowatch.wordpress.com/feed/|#news
+spiegel_eil|http://www.spiegel.de/schlagzeilen/eilmeldungen/index.rss|#news
+aje|http://www.aljazeera.com/Services/Rss/?PostingId=2007731105943979989|#news
+gulli|http://ticker.gulli.com/rss/|#news
+us_math_society|http://www.ams.org/cgi-bin/content/news_items.cgi?rss=1|#news
+tagesschau|http://www.tagesschau.de/newsticker.rdf|#news
+fbi_news|http://www.fbi.gov/news/news_blog/rss.xml|#news
+bmj|http://www.bmj.com/rss|#news
+ft_me|http://www.ft.com/rss/home/middleeast|#news
+fbi_stories|http://www.fbi.gov/news/stories/all-stories/rss.xml|#news
+sz_wirtschaft|http://rss.sueddeutsche.de/rss/Wirtschaft|#news
+arbor|http://feeds2.feedburner.com/asert/|#news
+reddit_tech|http://www.reddit.com/r/technology/.rss|#news
+golem|http://www.golem.de/rss.php?feed=RSS1.0|#news
+heise|http://heise.de.feedsportal.com/c/35207/f/653902/index.rss|#news
+fbi_press|http://www.fbi.gov/news/current/rss.xml|#news
+ars|http://feeds.arstechnica.com/arstechnica/index?format=xml|#news
+cancer|http://feeds.feedburner.com/ncinewsreleases?format=xml|#news
+un_eu|http://www.un.org/apps/news/rss/rss_europe.asp|#news
+europa_ric|http://ec.europa.eu/research/infocentre/rss/infocentre-rss.xml|#news
+fedreserve|http://www.federalreserve.gov/feeds/press_all.xml|#news
+exploitdb|http://www.exploit-db.com/rss.xml|#news
+xkcd|https://xkcd.com/rss.xml|#news
+reddit_prog|http://reddit.com/r/programming/|#news
+HN|http://news.ycombinator.com/rss|#news
+sz_wissen|http://suche.sueddeutsche.de/rss/Wissen|#news
+scmp|http://www.scmp.com/rss/91/feed|#news
+shackspace|http://shackspace.de/?feed=rss2|#news
+greenpeace|http://www.greenpeace.de/nachrichten/feed/rss2/|#news
+rt|http://rt.com/rss/news/|#news
+nasa_iotd|http://www.nasa-usa.de/rss/dyn/lg_image_of_the_day.rss|#news
+z0r|https://www.facebook.com/feeds/page.php?format=atom10&id=278857186139|#news
+stz|http://www.stuttgarter-zeitung.de/rss/topthemen.rss.feed|#news
+reuters|http://feeds.reuters.com/Reuters/worldNews|#news
+gmanet|http://www.gmanetwork.com/news/rss/news|#news
+un_am|http://www.un.org/apps/news/rss/rss_americas.asp|#news
+slashdot|http://rss.slashdot.org/Slashdot/slashdot|#news
+antirez|http://antirez.com/rss|#news
+telegraph_world|http://www.telegraph.co.uk/news/worldnews/rss|#news
+fvwm|http://freecode.com/projects/fvwm/releases.atom|#news
+eu_survei|http://www.eurosurveillance.org/public/RSSFeed/RSS.aspx|#news
+eia_today|http://www.eia.gov/rss/todayinenergy.xml|#news
+reddit_consp|http://reddit.com/r/conspiracy/.rss|#news
+ft_uk|http://www.ft.com/rss/home/uk|#news
+times|http://www.thetimes.co.uk/tto/news/rss|#news
+phys|http://phys.org/rss-feed/|#news
+stern|http://www.stern.de/feed/standard/all/|#news
+zdnet|http://www.zdnet.com/news/rss.xml|#news
+presse_polizei|http://www.presseportal.de/rss/polizei.rss2|#news
+torr_news|http://feed.torrentfreak.com/Torrentfreak/|#news
+faz_wirtschaft|http://www.faz.net/rss/aktuell/wirtschaft/|#news
+telegraph_finance|http://www.telegraph.co.uk/finance/rss|#news
+linuxinsider|http://www.linuxinsider.com/perl/syndication/rssfull.pl|#news
+telegraph_pol|http://www.telegraph.co.uk/news/politics/rss|#news
+lolmythesis|http://lolmythesis.com/rss|#news
+taz|http://taz.de/Themen-des-Tages/!p15;rss/|#news
+un_afr|http://www.un.org/apps/news/rss/rss_africa.asp|#news
+ft_us|http://www.ft.com/rss/home/us|#news
+telepolis|http://www.heise.de/tp/rss/news-atom.xml|#news
+ft_europe|http://www.ft.com/rss/home/europe|#news
+handelblatt|http://www.handelsblatt.com/contentexport/feed/schlagzeilen|#news
+rawstory|http://www.rawstory.com/rs/feed/|#news
+sz_politik|http://rss.sueddeutsche.de/rss/Politik|#news
+un_pac|http://www.un.org/apps/news/rss/rss_asiapac.asp|#news
+torr_bits|http://feeds.feedburner.com/TorrentfreakBits|#news
+ign|http://feeds.ign.com/ign/all|#news
+ft_asia|http://www.ft.com/rss/home/asia|#news
diff --git a/news/rssbot.py b/news/rssbot.py
new file mode 100755
index 00000000..87c58781
--- /dev/null
+++ b/news/rssbot.py
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+import irc.bot
+from irc.client import IRC
+import feedparser
+import threading
+import math
+import re
+import subprocess
+from datetime import datetime
+from time import sleep
+
+class RssBot(irc.bot.SingleServerIRCBot):
+    def __init__(self, rss, name, chans=['#news'], url_shortener="http://localhost", server='ire', port=6667, timeout=60):
+        irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], name, name)
+        self.url = rss
+        self.name = name
+        self.server = server
+        self.port = port
+        self.chans = chans
+        self.to = timeout
+        self.oldnews = []
+        self.sendqueue = []
+        self.loop = True
+        self.lastnew = datetime.now()
+        self.url_shortener = url_shortener
+
+        def better_loop(timeout=0.2):
+            while self.loop:
+                self.ircobj.process_once(timeout)
+        self.ircobj.process_forever = better_loop
+
+
+    def start(self):
+        self.upd_loop = threading.Thread(target=self.updateloop)
+        self.bot = threading.Thread(target=irc.bot.SingleServerIRCBot.start, args=(self,))
+        self.upd_loop.start()
+        self.bot.start()
+
+    def stop(self):
+        self.ircobj.disconnect_all()
+        self.loop = False
+        del self
+
+    def updateloop(self):
+        failcount=0
+        while True:
+          try:
+              self.feed = feedparser.parse(self.url)
+              for entry in self.feed.entries:
+                  self.oldnews.append(entry.link)
+              break
+          except:
+              print(self.name + ': rss timeout occured')
+              failcount+=1
+              if failcount>20:
+                  print(self.name + ' is broken, going to die')
+                  self.stop()
+                  return
+        while self.loop:
+            try:
+                self.feed = feedparser.parse(self.url)
+                for entry in self.feed.entries:
+                    if not entry.link in self.oldnews:
+                        #try:
+                        #    self.send(entry.title + " " + entry.link + " com: " + entry.comments)
+                        #except AttributeError:
+                        shorturl = self.shortenurl(entry.link)
+                        self.sendall(entry.title + ' ' + shorturl)
+                        self.oldnews.append(entry.link)
+                        self.lastnew = datetime.now()
+            except:
+                print(self.name + ': rss timeout occured')
+            sleep(self.to)
+
+    def shortenurl(self, url):
+      while True:
+          try:
+              shorturl = subprocess.check_output(["curl", "-sS", "-F", "uri=" + url, self.url_shortener]).decode().strip('\n').strip('\r') + '#' + url.partition('://')[2].partition('/')[0]
+              return shorturl
+          except:
+              print('url shortener error')
+              sleep(1)
+
+    def last(self, target, num):
+        for feed in [x for x in self.feed.entries][:num]:
+            self.send(target, feed.title + ' ' + self.shortenurl(feed.link))
+
+    def sendall(self, string):
+        for chan in self.channels:
+            self.send(chan, string)
+
+    def send(self, target, string):
+        if self.connection.connected:
+            for line in string.split('\n'):
+                if len(line) < 450:
+                    self.connection.privmsg(target, line)
+                    sleep(1)
+                else:
+                    space = 0
+                    for x in range(math.ceil(len(line)/400)):
+                        oldspace = space
+                        space = line.find(" ", (x+1)*400, (x+1)*400+50)
+                        self.connection.privmsg(target, line[oldspace:space])
+                        sleep(1)
+        else:
+            self.connection.reconnect()
+            sleep(1)
+            self.send(string)
+
+    def on_invite(self, connection, event):
+        for chan in event.arguments:
+            connection.join(chan)
+
+    def on_welcome(self, connection, event):
+        for chan in self.chans:
+            connection.join(chan)
author	lassulus <lassulus@googlemail.com>	2014-01-10 00:03:38 +0100
committer	lassulus <lassulus@googlemail.com>	2014-01-10 00:03:38 +0100
commit	4f7c7241e3e5bf0203269d1fa6298c85ab952db1 (patch)
tree	4087f4292d418e63f815a89755fcef1e84082769 /news
parent	55f6d44e551289a336bb922cc7c3fe70a5100774 (diff)