diff options
-rw-r--r-- | news/.gitignore | 1 | ||||
-rw-r--r-- | news/new_feeds | 13 | ||||
-rw-r--r-- | news/newsbot.js | 272 | ||||
-rw-r--r-- | news/newsbot.py | 32 | ||||
-rw-r--r-- | news/package.json | 32 |
5 files changed, 329 insertions, 21 deletions
diff --git a/news/.gitignore b/news/.gitignore new file mode 100644 index 00000000..07e6e472 --- /dev/null +++ b/news/.gitignore @@ -0,0 +1 @@ +/node_modules diff --git a/news/new_feeds b/news/new_feeds index f6025f7e..a5de8d72 100644 --- a/news/new_feeds +++ b/news/new_feeds @@ -18,7 +18,6 @@ bdt_pressemitteilungen|http://bundestag.de/service/rss/Bundestag_Presse.rss|#new bdt_wd|http://bundestag.de/service/rss/Bundestag_WD.rss|#news #bundestag bitcoinboard|http://bitcoinboard.net/feed/|#news #financial bitcoinpakistan|https://bitcoinspakistan.com/feed/|#news #financial -bmj|http://www.bmj.com/rss|#news businessweek|http://www.businessweek.com/feeds/homepage.rss|#news cancer|http://feeds.feedburner.com/ncinewsreleases?format=xml|#news carta|http://feeds2.feedburner.com/carta-standard-rss|#news @@ -44,7 +43,6 @@ csm|http://rss.csmonitor.com/feeds/csm|#news csm_world|http://rss.csmonitor.com/feeds/world|#news cyberguerrilla|https://www.cyberguerrilla.org/a/2012/?feed=rss2|#news danisch|http://www.danisch.de/blog/feed/|#news -dod|http://www.defense.gov/news/rss/|#news dwn|http://deutsche-wirtschafts-nachrichten.de/feed/customfeed/|#news ecat|http://ecat.com/feed|#news eia_press|http://www.eia.gov/rss/press_rss.xml|#news @@ -60,7 +58,6 @@ faz_feui|http://www.faz.net/rss/aktuell/feuilleton/|#news faz_politik|http://www.faz.net/rss/aktuell/politik/|#news faz_wirtschaft|http://www.faz.net/rss/aktuell/wirtschaft/|#news #financial fbi|http://www.fbi.gov/homepage/RSS|#news #bullerei -fbi_nat_press|http://www.fbi.gov/news/rss|#news #bullerei fbi_news|http://www.fbi.gov/news/news_blog/rss.xml|#news fbi_press|http://www.fbi.gov/news/current/rss.xml|#news #bullerei fbi_stories|http://www.fbi.gov/news/stories/all-stories/rss.xml|#news #bullerei @@ -75,7 +72,6 @@ GerForPol|http://www.german-foreign-policy.com/de/news/rss-2.0|#news gmanet|http://www.gmanetwork.com/news/rss/news|#news golem|http://www.golem.de/rss.php?feed=RSS1.0|#news google|http://news.google.com/?output=rss|#news -greenpeace|http://www.greenpeace.de/nachrichten/feed/rss2/|#news guardian_uk|http://feeds.theguardian.com/theguardian/uk-news/rss|#news gulli|http://ticker.gulli.com/rss/|#news handelsblatt|http://www.handelsblatt.com/contentexport/feed/schlagzeilen|#news #financial @@ -98,8 +94,6 @@ LtU|http://lambda-the-ultimate.org/rss.xml|#news lukepalmer|http://lukepalmer.wordpress.com/feed/|#news mit|http://web.mit.edu/newsoffice/rss-feeds.feed?type=rss|#news mongrel2_master|https://github.com/zedshaw/mongrel2/commits/master.atom|#news -nasa_iotd|http://www.nasa-usa.de/rss/dyn/lg_image_of_the_day.rss|#news -nasa_news|http://www.nasa-usa.de/rss/dyn/breaking_news.rss|#news nds|http://www.nachdenkseiten.de/?feed=atom|#news netzpolitik|https://netzpolitik.org/feed/|#news newsbtc|http://newsbtc.com/feed/|#news #financial @@ -122,7 +116,6 @@ reddit_4chan|http://www.reddit.com/r/4chan/new/.rss|#news #brainfuck reddit_anticonsum|http://www.reddit.com/r/Anticonsumption/new/.rss|#news reddit_btc|http://www.reddit.com/r/Bitcoin/new/.rss|#news #financial reddit_consp|http://reddit.com/r/conspiracy/.rss|#news -reddit_prog|http://reddit.com/r/programming/|#news reddit_sci|http://www.reddit.com/r/science/.rss|#news reddit_tech|http://www.reddit.com/r/technology/.rss|#news reddit_tpp|http://www.reddit.com/r/twitchplayspokemon/.rss|#news #tpp @@ -182,5 +175,9 @@ weechat|http://dev.weechat.org/feed/atom|#news wp_world|http://feeds.washingtonpost.com/rss/rss_blogpost|#news xkcd|https://xkcd.com/rss.xml|#news yahoo|http://news.yahoo.com/rss/|#news -z0r|https://www.facebook.com/feeds/page.php?format=atom10&id=278857186139|#news zdnet|http://www.zdnet.com/news/rss.xml|#news +reddit_prog|http://www.reddit.com/r/programming/new/.rss|#news +bmj|[object Object]|#news +dod|http://www.defense.gov/news/afps2.xml|#news +greenpeace|http://feeds.feedburner.com/GreenpeaceNews|#news +painload|https://github.com/krebscode/painload/commits/master.atom|#news diff --git a/news/newsbot.js b/news/newsbot.js new file mode 100644 index 00000000..18b5f780 --- /dev/null +++ b/news/newsbot.js @@ -0,0 +1,272 @@ +var IRC = require('irc') +var FeedParser = require('feedparser') +var Request = require('request') +var Parse = require('shell-quote').parse +var FS = require('fs') +var HTTP = require('http') +var FormData = require('form-data') +var URL = require('url') + +var irc_server = 'ire.retiolum' +var master_nick = 'knews' +var news_channel = '#news' +var feeds_file = 'new_feeds' +var feedbot_loop_delay = 60 * 1000 // [ms] +var feedbot_create_delay = 200 // [ms] +var url_shortener_host = 'go' + +var slaves = {} + +function main () { + var master = new IRC.Client(irc_server, master_nick, { + channels: [ news_channel ], + }) + + master.on('message' + news_channel, function (nick, text, message) { + if (is_talking_to(master_nick, text)) { + var request = parse_request(text) + if (request) { + return run_command(request.method, request.params, function (error, result) { + if (error) { + return master.say(news_channel, '4' + error) + } else { + return master.say(news_channel, result) + } + }) + } + } + }) + + master.once('registered', function () { + // read feeds file and create a feedbot for each entry + FS + .readFileSync(feeds_file) + .toString() + .split('\n') + //.filter((function () { + // var n = 2; + // return function () { + // return n-- > 0 + // } + //})()) + .filter(function (line) { + return line.length > 0 + }) + .forEach(function (line, i) { + var parts = line.split('|') + if (parts.length !== 3) { + console.log('bad new_feeds line ' + lines + ': ' + line) + return + } + + var nick = parts[0] + var uri = parts[1] + var channels = parts[2].split(' ') + + setTimeout(function () { + return create_feedbot(nick, uri, channels) + }, i*feedbot_create_delay) + }) + }) +} + +function create_feedbot (nick, uri, channels) { + var client = new IRC.Client(irc_server, nick, { + channels: channels, + autoRejoin: false, + }) + + slaves[nick] = { + client: client, + nick: nick, + uri: uri, + } + + // say text in every joined channel + function broadcast (text) { + Object.keys(client.chans).forEach(function (channel) { + client.say(channel, text) + }) + } + + function broadcast_new_item (item) { + return getShortLink(item.link, function (error, shortlink) { + return broadcast(item.title + ' ' + shortlink) + }) + } + + client.once('registered', loop_feedparser) + client.once('registered', deaf_myself) + + client.on('invite', function (channel, from, message) { + client.join(channel, null) + }) + + client.on('error', function (error) { + console.log('Error:', error) + }) + + // TODO stopping criteria + function loop_feedparser () { + try { + var request = Request(uri) + var feedparser = new FeedParser() + } catch (error) { + return broadcast('4' + error) + } + + request.on('error', function (error) { + broadcast('4request ' + error) + }) + request.on('response', function (response) { + if (response.statusCode !== 200) { + return this.emit('error', new Error('Bad status code')) + } + var output = response + switch (response.headers['content-encoding']) { + case 'gzip': + output = zlib.createGunzip() + response.pipe(output) + break + case 'deflate': + output = zlib.createInflate() + response.pipe(output) + break + } + this.pipe(feedparser) + }) + + var items = [] + + feedparser.on('error', function (error) { + broadcast('4feedparser ' + error) + return continue_loop() + }) + feedparser.on('readable', function () { + for (var item; item = this.read(); ) { + items.push(item) + } + }) + feedparser.on('end', function () { + + if (client.lastItems) { + items.forEach(function (item) { + if (!client.lastItems.hasOwnProperty(item.title)) { + broadcast_new_item(item) + } + }) + } + + client.lastItems = {} + items.forEach(function (item) { + client.lastItems[item.title] = true + }) + + return continue_loop() + }) + + function continue_loop () { + setTimeout(loop_feedparser, feedbot_loop_delay) + } + } + function deaf_myself () { + client.send('mode', nick, '+D') + } +} + +// return true if text "is talking to" my_nick +function is_talking_to (my_nick, text) { + return text.slice(0, my_nick.length) === my_nick + && text[my_nick.length] === ':' +} + +function parse_request (text) { + var parse = Parse(text) + return { + method: parse[1], + params: parse.slice(2), + } +} + +function run_command (methodname, params, callback) { + var method = methods[methodname] + if (method) { + return method(params, callback) + } else { + return callback(new Error('dunno what ' + methodname + ' is')); + } +} + +function getShortLink (link, callback) { + var form = new FormData() + try { + form.append('uri', link) + } catch (err) { + console.log('link:', link) + throw err + } + + var request = HTTP.request({ + method: 'post', + host: url_shortener_host, + path: '/', + headers: form.getHeaders(), + }) + form.pipe(request) + + request.on('response', function (response) { + var data = '' + response.on('data', function (chunk) { + data += chunk + }) + response.on('end', function () { + callback(null, data.replace(/\r\n$/,'') + '#' + URL.parse(link).host) + }) + }) +} + +var methods = {} +methods.add = function (params, callback) { + if (slaves.hasOwnProperty(params[0])) { + return callback(new Error('name already taken')) + } else { + create_feedbot(params[0], params[1], [news_channel]) + return callback(null) + } +} +methods.del = function (params, callback) { + var nick = params[0] + if (slaves.hasOwnProperty(nick)) { + var slave = slaves[nick] + slave.client.disconnect() + delete slaves[nick] + return callback(null) + } else { + return callback(new Error('botname not found')) + } +} +methods.save = function (params, callback) { + var feeds = Object.keys(slaves) + .map(function (nick) { + return slaves[nick] + }) + .map(function (slave) { + return [ + slave.nick, + slave.uri, + Object.keys(slave.client.chans).join(' '), + ].join('|') + }).join('\n') + '\n' + return FS.writeFile(feeds_file, feeds, function (error) { + if (error) { + return callback(error) + } else { + return callback(null, 'Feeds saved') + } + }) +} + + +if (require.main === module) { + main() +} diff --git a/news/newsbot.py b/news/newsbot.py index 8834851d..2f8bf635 100644 --- a/news/newsbot.py +++ b/news/newsbot.py @@ -17,7 +17,7 @@ from time import sleep ## Newsbot Controller Class class NewsBot(asybot): - def __init__(self, name, channels=['#test'], server='ire', port=6667, timeout=60, loglevel=logging.ERROR, url_shortener='http://wall'): + def __init__(self, name, channels=['#test'], server='ire', port=6667, timeout=60, loglevel=logging.ERROR, url_shortener='http://localhost'): asybot.__init__(self, server, port, name, channels, loglevel=loglevel) self.to = timeout self.url_shortener = url_shortener @@ -140,7 +140,9 @@ class RssBot(asybot): self.lastnew = datetime.now() self.url_shortener = url_shortener self.retry = True - self.on_nickinuse = lambda: None + + def on_nickinuse(*bla): + pass def start_rss(self): self.upd_loop = threading.Thread(target=self.updateloop) @@ -203,19 +205,19 @@ class RssBot(asybot): def send_msg(self, target, string): if self.connected: for line in string.split('\n'): - if len(line) < 450: - self.PRIVMSG(target, line) - else: - space = 0 - for x in range(math.ceil(len(line)/400)): - oldspace = space - space = line.find(" ", (x+1)*400, (x+1)*400+50) - self.PRIVMSG(target, line[oldspace:space]) + while len(line)>0: + if len(line) < 450: + self.PRIVMSG(target, line) + line = '' + else: + space = line.rfind(" ", 1, 450) + self.PRIVMSG(target, line[:space]) + line=line[space:] else: self.reconnect() while not self.connected: - sleep(10) print(self.nickname + ' waiting for reconnect') + sleep(10) self.send_msg(target, string) def on_invite(self, prefix, command, params, rest): @@ -223,12 +225,16 @@ class RssBot(asybot): self.push('JOIN ' + chan) self.channels.append(chan) + def on_welcome(self, prefix, command, params, rest): + asybot.on_welcome(self, prefix, command, params, rest) + self.push('MODE ' + self.nickname + ' +D') + feedfile = 'new_feeds' -url_shortener = 'http://wall' +url_shortener = 'http://go' init_channels = ['#news'] bots = {} -knews = NewsBot('knews', init_channels) +knews = NewsBot('knews', init_channels, url_shortener=url_shortener) #config file reading F = open(feedfile, "r") diff --git a/news/package.json b/news/package.json new file mode 100644 index 00000000..52c19177 --- /dev/null +++ b/news/package.json @@ -0,0 +1,32 @@ +{ + "name": "news", + "version": "0.0.0", + "description": "", + "main": "newsbot.js", + "dependencies": { + "feedparser": "*", + "form-data": "*", + "irc": "*", + "request": "*", + "shell-quote": "*" + }, + "devDependencies": {}, + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "https://github.com/krebscode/painload" + }, + "keywords": [ + "irc", + "news", + "feed" + ], + "author": "krebs", + "license": "WTFPLv2", + "bugs": { + "url": "https://github.com/krebscode/painload/issues" + }, + "homepage": "https://github.com/krebscode/painload" +} |