From 2c484daf3cb78deefdcfe2ef6cececaab397dace Mon Sep 17 00:00:00 2001 From: lassulus Date: Sun, 5 Feb 2017 00:23:44 +0100 Subject: l 2 monitoring client: remove dns query --- lass/2configs/monitoring/client.nix | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix index e879d6960..210fd2d10 100644 --- a/lass/2configs/monitoring/client.nix +++ b/lass/2configs/monitoring/client.nix @@ -29,10 +29,6 @@ with import ; '' [[inputs.net]] '' - '' - [[inputs.dns_query]] - servers = ["8.8.8.8"] - '' ]; }; systemd.services.telegraf.path = with pkgs; [ -- cgit v1.2.3 From 0ea991ffe9252041751e6e740c5166e164541928 Mon Sep 17 00:00:00 2001 From: lassulus Date: Sun, 5 Feb 2017 00:25:39 +0100 Subject: l 2 monitoring server: add ram & deadman alarm --- lass/2configs/monitoring/server.nix | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 2e1c15ca1..505cb7a17 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -1,9 +1,7 @@ {pkgs, config, ...}: with import ; { - services.influxdb = { - enable = true; - }; + services.influxdb.enable = true; services.influxdb.extraConfig = { meta.hostname = config.krebs.build.host.name; @@ -29,24 +27,39 @@ with import ; data="$(${pkgs.jq}/bin/jq -r .message)" export LOGNAME=prism-alarm ${pkgs.irc-announce}/bin/irc-announce \ - irc.freenode.org 6667 prism-alarm \#krebs-bots "$data" >/dev/null + ni.r 6667 prism-alarm \#retiolum "$data" >/dev/null ''; in { enable = true; + check_db = "telegraf_db"; alarms = { - test2 = '' - batch + cpu = '' + var data = batch |query(${"'''"} SELECT mean("usage_user") AS mean FROM "${config.lass.kapacitor.check_db}"."default"."cpu" ${"'''"}) - .every(3m) - .period(1m) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + data |deadman(1.0,5m) + .stateChangesOnly() + .exec('${echoToIrc}') + ''; + ram = '' + var data = batch + |query(${"'''"} + SELECT mean("used_percent") AS mean + FROM "${config.lass.kapacitor.check_db}"."default"."mem" + ${"'''"}) + .period(10m) + .every(1m) .groupBy('host') - |alert() - .crit(lambda: "mean" > 90) - // Whenever we get an alert write it to a file. - .log('/tmp/alerts.log') + data |alert() + .crit(lambda: "mean" > 90) .exec('${echoToIrc}') ''; }; -- cgit v1.2.3 From 6ad79bd34eb62d916d5802536174bdb6c3fcd363 Mon Sep 17 00:00:00 2001 From: lassulus Date: Sun, 5 Feb 2017 12:15:17 +0100 Subject: l 2 monitoring client: use services.telegraf --- lass/2configs/monitoring/client.nix | 105 ++++++++---------------------------- 1 file changed, 22 insertions(+), 83 deletions(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix index 210fd2d10..d20ad475d 100644 --- a/lass/2configs/monitoring/client.nix +++ b/lass/2configs/monitoring/client.nix @@ -1,90 +1,29 @@ {pkgs, config, ...}: with import ; { - lass.telegraf = { + services.telegraf = { enable = true; - interval = "1s"; - - outputs = '' - [outputs.influxdb] - urls = ["http://prism:8086"] - database = "telegraf_db" - user_agent = "telegraf" - ''; - inputs = [ - '' - [cpu] - percpu = false - totalcpu = true - drop = ["cpu_time"] - '' - '' - [[inputs.mem]] - '' - '' - [[inputs.ping]] - urls = ["8.8.8.8"] - '' - '' - [[inputs.net]] - '' - ]; - }; - systemd.services.telegraf.path = with pkgs; [ - iputils - lm_sensors - ]; - - services.collectd = { - enable = true; - autoLoadPlugin = true; - extraConfig = '' - Hostname ${config.krebs.build.host.name} - LoadPlugin load - LoadPlugin disk - LoadPlugin memory - Interval 30.0 - - LoadPlugin interface - - Interface "*Link" - Interface "lo" - Interface "vboxnet*" - Interface "virbr*" - IgnoreSelected true - - - LoadPlugin df - - MountPoint "/nix/store" - FSType "tmpfs" - FSType "binfmt_misc" - FSType "debugfs" - FSType "mqueue" - FSType "hugetlbfs" - FSType "systemd-1" - FSType "cgroup" - FSType "securityfs" - FSType "ramfs" - FSType "proc" - FSType "devpts" - FSType "devtmpfs" - MountPoint "/var/lib/docker/devicemapper" - IgnoreSelected true - - - LoadPlugin cpu - - ReportByCpu true - ReportByState true - ValuesPercentage true - - - LoadPlugin network - - Server "prism" "25826" - - ''; + extraConfig = { + interval = "1s"; + outputs = { + influxdb = { + urls = ["http://prism:8086"]; + database = "telegraf_db"; + user_agent = "telegraf"; + }; + }; + inputs = { + cpu = { + percpu = false; + totalcpu = true; + }; + mem = {}; + ping = { + urls = ["8.8.8.8"]; + }; + net = {}; + }; + }; }; } -- cgit v1.2.3 From 53532e63f006479bc3ff57c93a2b5f52d4709545 Mon Sep 17 00:00:00 2001 From: lassulus Date: Sun, 5 Feb 2017 14:20:55 +0100 Subject: l 2 monitoring: remove broken ping statistics --- lass/2configs/monitoring/client.nix | 3 --- 1 file changed, 3 deletions(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix index d20ad475d..bd7d6acbb 100644 --- a/lass/2configs/monitoring/client.nix +++ b/lass/2configs/monitoring/client.nix @@ -19,9 +19,6 @@ with import ; totalcpu = true; }; mem = {}; - ping = { - urls = ["8.8.8.8"]; - }; net = {}; }; }; -- cgit v1.2.3 From a5bd0ad7742260db8893d8578950a0cfd0cbc62e Mon Sep 17 00:00:00 2001 From: lassulus Date: Sun, 5 Feb 2017 14:21:42 +0100 Subject: l 2 monitoring: set agent interval to 1s --- lass/2configs/monitoring/client.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix index bd7d6acbb..b8c245215 100644 --- a/lass/2configs/monitoring/client.nix +++ b/lass/2configs/monitoring/client.nix @@ -5,7 +5,7 @@ with import ; enable = true; extraConfig = { - interval = "1s"; + agent.interval = "1s"; outputs = { influxdb = { urls = ["http://prism:8086"]; -- cgit v1.2.3 From f39df4913b225ec67ca0557e3b702323bcb2bf2b Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:10:34 +0100 Subject: l 2 monitoring server: use new kapacitor config --- lass/2configs/monitoring/server.nix | 66 ++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 30 deletions(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 505cb7a17..1b556c56b 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -22,6 +22,7 @@ with import ; lass.kapacitor = let + db = "telegraf_db"; echoToIrc = pkgs.writeDash "echo_irc" '' set -euf data="$(${pkgs.jq}/bin/jq -r .message)" @@ -31,37 +32,42 @@ with import ; ''; in { enable = true; - check_db = "telegraf_db"; alarms = { - cpu = '' - var data = batch - |query(${"'''"} - SELECT mean("usage_user") AS mean - FROM "${config.lass.kapacitor.check_db}"."default"."cpu" - ${"'''"}) - .period(10m) - .every(1m) - .groupBy('host') - data |alert() - .crit(lambda: "mean" > 90) - .exec('${echoToIrc}') - data |deadman(1.0,5m) - .stateChangesOnly() - .exec('${echoToIrc}') - ''; - ram = '' - var data = batch - |query(${"'''"} - SELECT mean("used_percent") AS mean - FROM "${config.lass.kapacitor.check_db}"."default"."mem" - ${"'''"}) - .period(10m) - .every(1m) - .groupBy('host') - data |alert() - .crit(lambda: "mean" > 90) - .exec('${echoToIrc}') - ''; + cpu = { + database = db; + text = '' + var data = batch + |query(${"'''"} + SELECT mean("usage_user") AS mean + FROM "${db}"."default"."cpu" + ${"'''"}) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + data |deadman(1.0,5m) + .stateChangesOnly() + .exec('${echoToIrc}') + ''; + }; + ram = { + database = db; + text = '' + var data = batch + |query(${"'''"} + SELECT mean("used_percent") AS mean + FROM "${db}"."default"."mem" + ${"'''"}) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + ''; + }; }; }; -- cgit v1.2.3 From 3085d190485d2b4e822bf4a507104ace155c52b3 Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:11:30 +0100 Subject: l 2 monitoring: disable influx http logging --- lass/2configs/monitoring/server.nix | 1 + 1 file changed, 1 insertion(+) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 1b556c56b..ff6e980cb 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -8,6 +8,7 @@ with import ; # meta.logging-enabled = true; http.bind-address = ":8086"; admin.bind-address = ":8083"; + http.log-enabled = false; monitoring = { enabled = false; # write-interval = "24h"; -- cgit v1.2.3 From 73140ed18358e25983b28874c220f8e882e5e95f Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:12:21 +0100 Subject: l 2 monitoring server: add kibana + elasticsearch --- lass/2configs/monitoring/server.nix | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index ff6e980cb..f3d8026ab 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -72,15 +72,28 @@ with import ; }; }; - krebs.iptables.tables.filter.INPUT.rules = [ - { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; } - { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; } - { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; } - ]; services.grafana = { enable = true; addr = "0.0.0.0"; auth.anonymous.enable = true; security = import ; # { AdminUser = ""; adminPassword = ""} }; + + services.elasticsearch = { + enable = true; + listenAddress = "0.0.0.0"; + }; + + services.kibana = { + enable = true; + listenAddress = "0.0.0.0"; + }; + + krebs.iptables.tables.filter.INPUT.rules = [ + { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; } + { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 9200"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 5601"; target = "ACCEPT"; } + ]; } -- cgit v1.2.3 From aaf1f55626f16c7f9dcb681eb2d34743345e0e29 Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:12:50 +0100 Subject: l 2 monitoring client: add journalbeat --- lass/2configs/monitoring/client.nix | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix index b8c245215..e2b7dcae6 100644 --- a/lass/2configs/monitoring/client.nix +++ b/lass/2configs/monitoring/client.nix @@ -23,4 +23,13 @@ with import ; }; }; }; + + services.journalbeat = { + enable = true; + extraConfig = '' + output.elasticsearch: + hosts: ["prism:9200"] + template.enabled: false + ''; + }; } -- cgit v1.2.3 From dc4dcb80d39d0429c108c2b2258d4074eede2122 Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:44:24 +0100 Subject: l 2 monitoring server: use krebs.kapacitor --- lass/2configs/monitoring/server.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lass/2configs/monitoring') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index f3d8026ab..bbae4511e 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -21,7 +21,7 @@ with import ; }]; }; - lass.kapacitor = + krebs.kapacitor = let db = "telegraf_db"; echoToIrc = pkgs.writeDash "echo_irc" '' -- cgit v1.2.3