diff options
Diffstat (limited to 'lass/2configs/monitoring')
-rw-r--r-- | lass/2configs/monitoring/client.nix | 105 | ||||
-rw-r--r-- | lass/2configs/monitoring/server.nix | 83 |
2 files changed, 81 insertions, 107 deletions
diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix index e879d6960..e2b7dcae6 100644 --- a/lass/2configs/monitoring/client.nix +++ b/lass/2configs/monitoring/client.nix @@ -1,94 +1,35 @@ {pkgs, config, ...}: with import <stockholm/lib>; { - lass.telegraf = { + services.telegraf = { enable = true; - interval = "1s"; - - outputs = '' - [outputs.influxdb] - urls = ["http://prism:8086"] - database = "telegraf_db" - user_agent = "telegraf" - ''; - inputs = [ - '' - [cpu] - percpu = false - totalcpu = true - drop = ["cpu_time"] - '' - '' - [[inputs.mem]] - '' - '' - [[inputs.ping]] - urls = ["8.8.8.8"] - '' - '' - [[inputs.net]] - '' - '' - [[inputs.dns_query]] - servers = ["8.8.8.8"] - '' - ]; + extraConfig = { + agent.interval = "1s"; + outputs = { + influxdb = { + urls = ["http://prism:8086"]; + database = "telegraf_db"; + user_agent = "telegraf"; + }; + }; + inputs = { + cpu = { + percpu = false; + totalcpu = true; + }; + mem = {}; + net = {}; + }; + }; }; - systemd.services.telegraf.path = with pkgs; [ - iputils - lm_sensors - ]; - services.collectd = { + services.journalbeat = { enable = true; - autoLoadPlugin = true; extraConfig = '' - Hostname ${config.krebs.build.host.name} - LoadPlugin load - LoadPlugin disk - LoadPlugin memory - Interval 30.0 - - LoadPlugin interface - <Plugin "interface"> - Interface "*Link" - Interface "lo" - Interface "vboxnet*" - Interface "virbr*" - IgnoreSelected true - </Plugin> - - LoadPlugin df - <Plugin "df"> - MountPoint "/nix/store" - FSType "tmpfs" - FSType "binfmt_misc" - FSType "debugfs" - FSType "mqueue" - FSType "hugetlbfs" - FSType "systemd-1" - FSType "cgroup" - FSType "securityfs" - FSType "ramfs" - FSType "proc" - FSType "devpts" - FSType "devtmpfs" - MountPoint "/var/lib/docker/devicemapper" - IgnoreSelected true - </Plugin> - - LoadPlugin cpu - <Plugin cpu> - ReportByCpu true - ReportByState true - ValuesPercentage true - </Plugin> - - LoadPlugin network - <Plugin "network"> - Server "prism" "25826" - </Plugin> + output.elasticsearch: + hosts: ["prism:9200"] + template.enabled: false ''; }; } diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 2e1c15ca1..bbae4511e 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -1,15 +1,14 @@ {pkgs, config, ...}: with import <stockholm/lib>; { - services.influxdb = { - enable = true; - }; + services.influxdb.enable = true; services.influxdb.extraConfig = { meta.hostname = config.krebs.build.host.name; # meta.logging-enabled = true; http.bind-address = ":8086"; admin.bind-address = ":8083"; + http.log-enabled = false; monitoring = { enabled = false; # write-interval = "24h"; @@ -22,45 +21,79 @@ with import <stockholm/lib>; }]; }; - lass.kapacitor = + krebs.kapacitor = let + db = "telegraf_db"; echoToIrc = pkgs.writeDash "echo_irc" '' set -euf data="$(${pkgs.jq}/bin/jq -r .message)" export LOGNAME=prism-alarm ${pkgs.irc-announce}/bin/irc-announce \ - irc.freenode.org 6667 prism-alarm \#krebs-bots "$data" >/dev/null + ni.r 6667 prism-alarm \#retiolum "$data" >/dev/null ''; in { enable = true; alarms = { - test2 = '' - batch - |query(${"'''"} - SELECT mean("usage_user") AS mean - FROM "${config.lass.kapacitor.check_db}"."default"."cpu" - ${"'''"}) - .every(3m) - .period(1m) - .groupBy('host') - |alert() - .crit(lambda: "mean" > 90) - // Whenever we get an alert write it to a file. - .log('/tmp/alerts.log') - .exec('${echoToIrc}') - ''; + cpu = { + database = db; + text = '' + var data = batch + |query(${"'''"} + SELECT mean("usage_user") AS mean + FROM "${db}"."default"."cpu" + ${"'''"}) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + data |deadman(1.0,5m) + .stateChangesOnly() + .exec('${echoToIrc}') + ''; + }; + ram = { + database = db; + text = '' + var data = batch + |query(${"'''"} + SELECT mean("used_percent") AS mean + FROM "${db}"."default"."mem" + ${"'''"}) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + ''; + }; }; }; - krebs.iptables.tables.filter.INPUT.rules = [ - { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; } - { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; } - { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; } - ]; services.grafana = { enable = true; addr = "0.0.0.0"; auth.anonymous.enable = true; security = import <secrets/grafana_security.nix>; # { AdminUser = ""; adminPassword = ""} }; + + services.elasticsearch = { + enable = true; + listenAddress = "0.0.0.0"; + }; + + services.kibana = { + enable = true; + listenAddress = "0.0.0.0"; + }; + + krebs.iptables.tables.filter.INPUT.rules = [ + { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; } + { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 9200"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 5601"; target = "ACCEPT"; } + ]; } |