summaryrefslogtreecommitdiffstats
path: root/lass/2configs/monitoring
diff options
context:
space:
mode:
authortv <tv@krebsco.de>2017-02-09 14:54:56 +0100
committertv <tv@krebsco.de>2017-02-09 14:54:56 +0100
commit38d2ff961f2ad8d02ae6061952abe42e4de89f75 (patch)
treea8573db9389ba5268a8fb83ddef6d92e3e1a7815 /lass/2configs/monitoring
parented406bd979609fd05f5846049f571f43e6512050 (diff)
parent954477b8674156754cd51021d92885b456a04a5b (diff)
Merge remote-tracking branch 'prism/master'
Diffstat (limited to 'lass/2configs/monitoring')
-rw-r--r--lass/2configs/monitoring/client.nix105
-rw-r--r--lass/2configs/monitoring/server.nix83
2 files changed, 81 insertions, 107 deletions
diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix
index e879d6960..e2b7dcae6 100644
--- a/lass/2configs/monitoring/client.nix
+++ b/lass/2configs/monitoring/client.nix
@@ -1,94 +1,35 @@
{pkgs, config, ...}:
with import <stockholm/lib>;
{
- lass.telegraf = {
+ services.telegraf = {
enable = true;
- interval = "1s";
-
- outputs = ''
- [outputs.influxdb]
- urls = ["http://prism:8086"]
- database = "telegraf_db"
- user_agent = "telegraf"
- '';
- inputs = [
- ''
- [cpu]
- percpu = false
- totalcpu = true
- drop = ["cpu_time"]
- ''
- ''
- [[inputs.mem]]
- ''
- ''
- [[inputs.ping]]
- urls = ["8.8.8.8"]
- ''
- ''
- [[inputs.net]]
- ''
- ''
- [[inputs.dns_query]]
- servers = ["8.8.8.8"]
- ''
- ];
+ extraConfig = {
+ agent.interval = "1s";
+ outputs = {
+ influxdb = {
+ urls = ["http://prism:8086"];
+ database = "telegraf_db";
+ user_agent = "telegraf";
+ };
+ };
+ inputs = {
+ cpu = {
+ percpu = false;
+ totalcpu = true;
+ };
+ mem = {};
+ net = {};
+ };
+ };
};
- systemd.services.telegraf.path = with pkgs; [
- iputils
- lm_sensors
- ];
- services.collectd = {
+ services.journalbeat = {
enable = true;
- autoLoadPlugin = true;
extraConfig = ''
- Hostname ${config.krebs.build.host.name}
- LoadPlugin load
- LoadPlugin disk
- LoadPlugin memory
- Interval 30.0
-
- LoadPlugin interface
- <Plugin "interface">
- Interface "*Link"
- Interface "lo"
- Interface "vboxnet*"
- Interface "virbr*"
- IgnoreSelected true
- </Plugin>
-
- LoadPlugin df
- <Plugin "df">
- MountPoint "/nix/store"
- FSType "tmpfs"
- FSType "binfmt_misc"
- FSType "debugfs"
- FSType "mqueue"
- FSType "hugetlbfs"
- FSType "systemd-1"
- FSType "cgroup"
- FSType "securityfs"
- FSType "ramfs"
- FSType "proc"
- FSType "devpts"
- FSType "devtmpfs"
- MountPoint "/var/lib/docker/devicemapper"
- IgnoreSelected true
- </Plugin>
-
- LoadPlugin cpu
- <Plugin cpu>
- ReportByCpu true
- ReportByState true
- ValuesPercentage true
- </Plugin>
-
- LoadPlugin network
- <Plugin "network">
- Server "prism" "25826"
- </Plugin>
+ output.elasticsearch:
+ hosts: ["prism:9200"]
+ template.enabled: false
'';
};
}
diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix
index 2e1c15ca1..bbae4511e 100644
--- a/lass/2configs/monitoring/server.nix
+++ b/lass/2configs/monitoring/server.nix
@@ -1,15 +1,14 @@
{pkgs, config, ...}:
with import <stockholm/lib>;
{
- services.influxdb = {
- enable = true;
- };
+ services.influxdb.enable = true;
services.influxdb.extraConfig = {
meta.hostname = config.krebs.build.host.name;
# meta.logging-enabled = true;
http.bind-address = ":8086";
admin.bind-address = ":8083";
+ http.log-enabled = false;
monitoring = {
enabled = false;
# write-interval = "24h";
@@ -22,45 +21,79 @@ with import <stockholm/lib>;
}];
};
- lass.kapacitor =
+ krebs.kapacitor =
let
+ db = "telegraf_db";
echoToIrc = pkgs.writeDash "echo_irc" ''
set -euf
data="$(${pkgs.jq}/bin/jq -r .message)"
export LOGNAME=prism-alarm
${pkgs.irc-announce}/bin/irc-announce \
- irc.freenode.org 6667 prism-alarm \#krebs-bots "$data" >/dev/null
+ ni.r 6667 prism-alarm \#retiolum "$data" >/dev/null
'';
in {
enable = true;
alarms = {
- test2 = ''
- batch
- |query(${"'''"}
- SELECT mean("usage_user") AS mean
- FROM "${config.lass.kapacitor.check_db}"."default"."cpu"
- ${"'''"})
- .every(3m)
- .period(1m)
- .groupBy('host')
- |alert()
- .crit(lambda: "mean" > 90)
- // Whenever we get an alert write it to a file.
- .log('/tmp/alerts.log')
- .exec('${echoToIrc}')
- '';
+ cpu = {
+ database = db;
+ text = ''
+ var data = batch
+ |query(${"'''"}
+ SELECT mean("usage_user") AS mean
+ FROM "${db}"."default"."cpu"
+ ${"'''"})
+ .period(10m)
+ .every(1m)
+ .groupBy('host')
+ data |alert()
+ .crit(lambda: "mean" > 90)
+ .exec('${echoToIrc}')
+ data |deadman(1.0,5m)
+ .stateChangesOnly()
+ .exec('${echoToIrc}')
+ '';
+ };
+ ram = {
+ database = db;
+ text = ''
+ var data = batch
+ |query(${"'''"}
+ SELECT mean("used_percent") AS mean
+ FROM "${db}"."default"."mem"
+ ${"'''"})
+ .period(10m)
+ .every(1m)
+ .groupBy('host')
+ data |alert()
+ .crit(lambda: "mean" > 90)
+ .exec('${echoToIrc}')
+ '';
+ };
};
};
- krebs.iptables.tables.filter.INPUT.rules = [
- { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; }
- { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; }
- { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; }
- ];
services.grafana = {
enable = true;
addr = "0.0.0.0";
auth.anonymous.enable = true;
security = import <secrets/grafana_security.nix>; # { AdminUser = ""; adminPassword = ""}
};
+
+ services.elasticsearch = {
+ enable = true;
+ listenAddress = "0.0.0.0";
+ };
+
+ services.kibana = {
+ enable = true;
+ listenAddress = "0.0.0.0";
+ };
+
+ krebs.iptables.tables.filter.INPUT.rules = [
+ { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; }
+ { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; }
+ { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; }
+ { predicate = "-p tcp -i retiolum --dport 9200"; target = "ACCEPT"; }
+ { predicate = "-p tcp -i retiolum --dport 5601"; target = "ACCEPT"; }
+ ];
}