diff options
Diffstat (limited to 'lass/2configs/monitoring')
-rw-r--r-- | lass/2configs/monitoring/client.nix | 94 | ||||
-rw-r--r-- | lass/2configs/monitoring/server.nix | 66 |
2 files changed, 160 insertions, 0 deletions
diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix new file mode 100644 index 000000000..e879d6960 --- /dev/null +++ b/lass/2configs/monitoring/client.nix @@ -0,0 +1,94 @@ +{pkgs, config, ...}: +with import <stockholm/lib>; +{ + lass.telegraf = { + enable = true; + interval = "1s"; + + + outputs = '' + [outputs.influxdb] + urls = ["http://prism:8086"] + database = "telegraf_db" + user_agent = "telegraf" + ''; + inputs = [ + '' + [cpu] + percpu = false + totalcpu = true + drop = ["cpu_time"] + '' + '' + [[inputs.mem]] + '' + '' + [[inputs.ping]] + urls = ["8.8.8.8"] + '' + '' + [[inputs.net]] + '' + '' + [[inputs.dns_query]] + servers = ["8.8.8.8"] + '' + ]; + }; + systemd.services.telegraf.path = with pkgs; [ + iputils + lm_sensors + ]; + + services.collectd = { + enable = true; + autoLoadPlugin = true; + extraConfig = '' + Hostname ${config.krebs.build.host.name} + LoadPlugin load + LoadPlugin disk + LoadPlugin memory + Interval 30.0 + + LoadPlugin interface + <Plugin "interface"> + Interface "*Link" + Interface "lo" + Interface "vboxnet*" + Interface "virbr*" + IgnoreSelected true + </Plugin> + + LoadPlugin df + <Plugin "df"> + MountPoint "/nix/store" + FSType "tmpfs" + FSType "binfmt_misc" + FSType "debugfs" + FSType "mqueue" + FSType "hugetlbfs" + FSType "systemd-1" + FSType "cgroup" + FSType "securityfs" + FSType "ramfs" + FSType "proc" + FSType "devpts" + FSType "devtmpfs" + MountPoint "/var/lib/docker/devicemapper" + IgnoreSelected true + </Plugin> + + LoadPlugin cpu + <Plugin cpu> + ReportByCpu true + ReportByState true + ValuesPercentage true + </Plugin> + + LoadPlugin network + <Plugin "network"> + Server "prism" "25826" + </Plugin> + ''; + }; +} diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix new file mode 100644 index 000000000..2e1c15ca1 --- /dev/null +++ b/lass/2configs/monitoring/server.nix @@ -0,0 +1,66 @@ +{pkgs, config, ...}: +with import <stockholm/lib>; +{ + services.influxdb = { + enable = true; + }; + + services.influxdb.extraConfig = { + meta.hostname = config.krebs.build.host.name; + # meta.logging-enabled = true; + http.bind-address = ":8086"; + admin.bind-address = ":8083"; + monitoring = { + enabled = false; + # write-interval = "24h"; + }; + collectd = [{ + enabled = true; + typesdb = "${pkgs.collectd}/share/collectd/types.db"; + database = "collectd_db"; + port = 25826; + }]; + }; + + lass.kapacitor = + let + echoToIrc = pkgs.writeDash "echo_irc" '' + set -euf + data="$(${pkgs.jq}/bin/jq -r .message)" + export LOGNAME=prism-alarm + ${pkgs.irc-announce}/bin/irc-announce \ + irc.freenode.org 6667 prism-alarm \#krebs-bots "$data" >/dev/null + ''; + in { + enable = true; + alarms = { + test2 = '' + batch + |query(${"'''"} + SELECT mean("usage_user") AS mean + FROM "${config.lass.kapacitor.check_db}"."default"."cpu" + ${"'''"}) + .every(3m) + .period(1m) + .groupBy('host') + |alert() + .crit(lambda: "mean" > 90) + // Whenever we get an alert write it to a file. + .log('/tmp/alerts.log') + .exec('${echoToIrc}') + ''; + }; + }; + + krebs.iptables.tables.filter.INPUT.rules = [ + { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; } + { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; } + ]; + services.grafana = { + enable = true; + addr = "0.0.0.0"; + auth.anonymous.enable = true; + security = import <secrets/grafana_security.nix>; # { AdminUser = ""; adminPassword = ""} + }; +} |