diff options
Diffstat (limited to 'knn')
| -rwxr-xr-x | knn/GfindFeeds4bot | 17 | ||||
| -rwxr-xr-x | knn/controller.py | 143 | ||||
| -rw-r--r-- | knn/feeds | 2 | ||||
| -rw-r--r-- | knn/new_feeds | 56 | ||||
| -rwxr-xr-x | knn/rssbot.py | 116 | 
5 files changed, 334 insertions, 0 deletions
| diff --git a/knn/GfindFeeds4bot b/knn/GfindFeeds4bot new file mode 100755 index 00000000..a5439c5a --- /dev/null +++ b/knn/GfindFeeds4bot @@ -0,0 +1,17 @@ +#! /bin/sh +# usage: GfindFeeds4bot QUERY +set -euf + +export query="$1" +export data="$( +  curl -sS "https://www.google.com/uds/GfindFeeds?v=1.0&q=$query" +)" + +node <<EOF +  query = process.env.query +  data = JSON.parse(process.env.data) + +  data.responseData.entries.forEach(function (entry, index) { +    console.log(query + index + '|' + entry.url) +  }) +EOF diff --git a/knn/controller.py b/knn/controller.py new file mode 100755 index 00000000..5277d626 --- /dev/null +++ b/knn/controller.py @@ -0,0 +1,143 @@ +from time import sleep +import irc.bot +import _thread +import rssbot +import os +import subprocess + +class NewsBot(irc.bot.SingleServerIRCBot): +    def __init__(self, name, chans=['#news'], server='ire', port=6667, timeout=60): +        irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], name, name) +        self.name = name +        self.server = server +        self.port = port +        self.chans = chans +        self.to = timeout + +    def start(self): +        self.bot = _thread.start_new_thread(irc.bot.SingleServerIRCBot.start, (self,)) + +    def on_welcome(self, connection, event): +        for chan in self.chans: +            connection.join(chan) + +    def send(self, target, string): +        for line in string.split('\n'): +            self.connection.privmsg(target, line) +            sleep(1) + +    def on_privmsg(self, connection, event): +        args_array = event.arguments[0].split() +        answer = self.read_message(args_array) +        self.send(event.source.nick, answer) + +    def on_pubmsg(self, connection, event): +        args_array = event.arguments[0].split() +        if len(args_array[0]) > 0 and args_array[0][:-1]==self.name: +            answer = self.read_message(args_array[1:]) +            self.send(event.target, answer) + +    def on_invite(self, connection, event): +        for chan in event.arguments: +            connection.join(chan) + +    def read_message(self, args): +        try: +            if args[0] in [x for x in commands.__dict__.keys() if x.find('_')]: +                func = getattr(commands, args[0]) +                return func(args) +            else: +                return 'command not found' +        except: +            return "mimimimi" + + + +class commands(): +    def add(args):  +        bot = rssbot.RssBot(args[2], args[1], url_shortener=url_shortener) +        bots[args[1]] = bot +        bot.start() +        return "bot " + args[1] + " added" + +    def delete(args): +        bots[args[1]].stop() +        del bots[args[1]] +        return "bot " + args[1] + " deleted" + +    def rename(args): +        if args[1] in bots: +            if args[2] in bots: +                return args[2] + ' already taken' +            else: +                bots[args[1]].connection.nick(args[2]) +                bots[args[1]].name = args[2] +                bots[args[2]] = bots[args[1]] +                del bots[args[1]] +                return 'renamed ' + args[1] + ' in ' + args[2] +        else: +            return args[1] + ' does not exist' + +    def save(args): +        output_buffer = '' +        for bot in bots: +            if bot.loop: +                output_buffer += bot + '|' + bots[bot].url + '|' + ' '.join(bots[bot].channels) + '\n' + +        F = open(feedfile, "w") +        F.writelines(output_buffer) +        F.close() + +        return "bots saved to " + feedfile + +    def caps(args): +        return ' '.join([x for x in commands.__dict__.keys() if x.find('_')]) + +    def list(args): +        output_buffer = '' +        for bot in bots: +            output_buffer += bot + ' url: ' + bots[bot].url + '\n' +        return output_buffer + +    def info(args): +        if args[1] in bots: +            output_buffer = '' +            for data in ['title', 'link', 'updated']: +                if data in bots[args[1]].feed.feed: +                    output_buffer += data + ': ' + bots[args[1]].feed.feed[data] + '\n' +            output_buffer += 'lastnew: ' + bots[args[1]].lastnew.isoformat() +            return output_buffer +        else: +            return 'bot not found' + +    def search(args): +        output = subprocess.check_output(['./GfindFeeds4bot', args[1]]).decode() +        return output + +feedfile = 'new_feeds' +url_shortener = 'http://wall' +init_channels = ['#news'] + +if 'FEEDFILE' in os.environ: +    feedfile = os.environ['FEEDFILE'] + +if 'URLSHORT' in os.environ: +    url_shortener = os.environ['URLSHORT'] + +bots = {} +knews = NewsBot('knews') + +#config file reading +F = open(feedfile, "r") +lines = F.readlines() +F.close() + +for line in lines: +    line = line.strip('\n') +    linear = line.split('|') +    bot = rssbot.RssBot(linear[1], linear[0], init_channels + linear[2].split(), url_shortener) +    bot.start() +    bots[linear[0]] = bot + +knews.start() + diff --git a/knn/feeds b/knn/feeds new file mode 100644 index 00000000..50fe0667 --- /dev/null +++ b/knn/feeds @@ -0,0 +1,2 @@ +HN|http://news.ycombinator.com/rss +Fefe|http://blog.fefe.de/rss.xml diff --git a/knn/new_feeds b/knn/new_feeds new file mode 100644 index 00000000..b1e77657 --- /dev/null +++ b/knn/new_feeds @@ -0,0 +1,56 @@ +spiegel_top|http://www.spiegel.de/schlagzeilen/tops/index.rss|#news +wsj_jp|http://blogs.wsj.com/japanrealtime/feed?mod=WSJ_Japan_JapanRealTime|#news +ccc|http://www.ccc.de/rss/updates.rdf|#news +coinspotting|http://coinspotting.com/rss|#news +reddit_sci|http://www.reddit.com/r/science/.rss|#news +reddit_tech|http://www.reddit.com/r/technology/.rss|#news +scmp|http://www.scmp.com/rss/91/feed|#news +golem|http://www.golem.de/rss.php?feed=RSS1.0|#news +anon|http://anoninsiders.net/feed/|#news +wsj_kor|http://blogs.wsj.com/korearealtime/feed?mod=WSJ_korearealtimeRealTime|#news #test +faz_politik|http://www.faz.net/rss/aktuell/politik/|#news +reddit_prog|http://reddit.com/r/programming/|#news +gulli|http://ticker.gulli.com/rss/|#news +danisch|http://www.danisch.de/blog/feed/|#news +lisp|http://planet.lisp.org/rss20.xml|#news +wsj_in|http://blogs.wsj.com/indiarealtime/feed?mod=irt|#news #test +wsj_me|http://blogs.wsj.com/middleeast/feed?mod=middleeast|#news #test +lolmythesis|http://lolmythesis.com/rss|#news +exploitdb|http://www.exploit-db.com/rss.xml|#news +LtU|http://lambda-the-ultimate.org/rss.xml|#news +HN|http://news.ycombinator.com/rss|#news +weechat|http://dev.weechat.org/feed/atom|#news +faz_feui|http://www.faz.net/rss/aktuell/feuilleton/|#news +fvwm|http://freecode.com/projects/fvwm/releases.atom|#news +rawstory|http://www.rawstory.com/rs/feed/|#news +wsj_cn|http://blogs.wsj.com/chinarealtime/feed?mod=chinablog|#news +torr_news|http://feed.torrentfreak.com/Torrentfreak/|#news +xkcd|https://xkcd.com/rss.xml|#news +wp_world|http://feeds.washingtonpost.com/rss/rss_blogpost|#news +wsj_eu|http://blogs.wsj.com/emergingeurope/feed?mod=emergingeurope|#news #test +telepolis|http://www.heise.de/tp/rss/news-atom.xml|#news +wsj_sea|http://blogs.wsj.com/searealtime/feed?mod=WSJ_SEA_Blog|#news #test +embargowatch|https://embargowatch.wordpress.com/feed/|#news +aje|http://www.aljazeera.com/Services/Rss/?PostingId=2007731105943979989|#news +taz|http://taz.de/Themen-des-Tages/!p15;rss/|#news +reuters|http://feeds.reuters.com/Reuters/worldNews|#news +sec-db|http://feeds.security-database.com/SecurityDatabaseToolsWatch|#news +archlinux|http://www.archlinux.org/feeds/news/|#news +nnewsg|http://www.net-news-global.net/rss/rssfeed.xml|#news +faz_wirtschaft|http://www.faz.net/rss/aktuell/wirtschaft/|#news +arbor|http://feeds2.feedburner.com/asert/|#news +reddit_world|http://www.reddit.com/r/worldnews/.rss|#news +linuxinsider|http://www.linuxinsider.com/perl/syndication/rssfull.pl|#news +spiegel_eil|http://www.spiegel.de/schlagzeilen/eilmeldungen/index.rss|#news +heise|http://heise.de.feedsportal.com/c/35207/f/653902/index.rss|#news +slashdot|http://rss.slashdot.org/Slashdot/slashdot|#news +antirez|http://antirez.com/rss|#news +sz|http://suche.sueddeutsche.de/?output=rss|#news +GerForPol|http://www.german-foreign-policy.com/de/news/rss-2.0|#news +schallurauch|http://feeds.feedburner.com/SchallUndRauch|#news +torr_bits|http://feeds.feedburner.com/TorrentfreakBits|#news +fefe|http://blog.fefe.de/rss.xml|#news +rt|http://rt.com/rss/news/|#news +ars|http://feeds.arstechnica.com/arstechnica/index?format=xml|#news +dwn|http://deutsche-wirtschafts-nachrichten.de/feed/customfeed/|#news +reddit_consp|http://reddit.com/r/conspiracy/.rss|#news diff --git a/knn/rssbot.py b/knn/rssbot.py new file mode 100755 index 00000000..87c58781 --- /dev/null +++ b/knn/rssbot.py @@ -0,0 +1,116 @@ +#!/usr/bin/python +import irc.bot +from irc.client import IRC +import feedparser +import threading +import math +import re +import subprocess +from datetime import datetime +from time import sleep + +class RssBot(irc.bot.SingleServerIRCBot): +    def __init__(self, rss, name, chans=['#news'], url_shortener="http://localhost", server='ire', port=6667, timeout=60): +        irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], name, name) +        self.url = rss +        self.name = name +        self.server = server +        self.port = port +        self.chans = chans +        self.to = timeout +        self.oldnews = [] +        self.sendqueue = [] +        self.loop = True +        self.lastnew = datetime.now() +        self.url_shortener = url_shortener + +        def better_loop(timeout=0.2): +            while self.loop: +                self.ircobj.process_once(timeout) +        self.ircobj.process_forever = better_loop + + +    def start(self): +        self.upd_loop = threading.Thread(target=self.updateloop) +        self.bot = threading.Thread(target=irc.bot.SingleServerIRCBot.start, args=(self,)) +        self.upd_loop.start() +        self.bot.start() + +    def stop(self): +        self.ircobj.disconnect_all() +        self.loop = False +        del self + +    def updateloop(self): +        failcount=0 +        while True: +          try: +              self.feed = feedparser.parse(self.url) +              for entry in self.feed.entries: +                  self.oldnews.append(entry.link) +              break +          except: +              print(self.name + ': rss timeout occured') +              failcount+=1 +              if failcount>20: +                  print(self.name + ' is broken, going to die') +                  self.stop() +                  return +        while self.loop: +            try: +                self.feed = feedparser.parse(self.url) +                for entry in self.feed.entries: +                    if not entry.link in self.oldnews: +                        #try: +                        #    self.send(entry.title + " " + entry.link + " com: " + entry.comments) +                        #except AttributeError: +                        shorturl = self.shortenurl(entry.link) +                        self.sendall(entry.title + ' ' + shorturl) +                        self.oldnews.append(entry.link) +                        self.lastnew = datetime.now() +            except: +                print(self.name + ': rss timeout occured') +            sleep(self.to) + +    def shortenurl(self, url): +      while True: +          try: +              shorturl = subprocess.check_output(["curl", "-sS", "-F", "uri=" + url, self.url_shortener]).decode().strip('\n').strip('\r') + '#' + url.partition('://')[2].partition('/')[0] +              return shorturl +          except: +              print('url shortener error') +              sleep(1) + +    def last(self, target, num): +        for feed in [x for x in self.feed.entries][:num]: +            self.send(target, feed.title + ' ' + self.shortenurl(feed.link)) + +    def sendall(self, string): +        for chan in self.channels: +            self.send(chan, string) + +    def send(self, target, string): +        if self.connection.connected: +            for line in string.split('\n'): +                if len(line) < 450: +                    self.connection.privmsg(target, line) +                    sleep(1) +                else: +                    space = 0 +                    for x in range(math.ceil(len(line)/400)): +                        oldspace = space +                        space = line.find(" ", (x+1)*400, (x+1)*400+50) +                        self.connection.privmsg(target, line[oldspace:space]) +                        sleep(1) +        else: +            self.connection.reconnect() +            sleep(1) +            self.send(string) + +    def on_invite(self, connection, event): +        for chan in event.arguments: +            connection.join(chan) + +    def on_welcome(self, connection, event): +        for chan in self.chans: +            connection.join(chan) | 
