summaryrefslogtreecommitdiffstats
path: root/makefu/3modules/netdata.nix
blob: 3ed33643cbdb17b7ce9bea359b4764e7b948e55d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
{ config, lib, pkgs, ... }:

# fork of https://github.com/Mic92/dotfiles/blob/master/nixos/vms/modules/netdata.nix
with lib;
let
  cfg = config.makefu.netdata;
in
{
  options.makefu.netdata = {
    enable = mkEnableOption "netdata";

    # TODO only apikey from file, set remote host manually
    stream.file = mkOption {
      type = types.str;
      default = toString <secrets/netdata-stream.conf>;
      description = "path to stream data file";
    };
    stream.role = mkOption {
      type = types.enum [ "master" "slave" ];
      default = "slave";
      description = "Wether to stream data";
    };

    httpcheck.checks = mkOption {
      type = types.attrsOf (types.submodule ({
        options = {
          url = mkOption {
            type = types.str;
            example = "https://thalheim.io";
            description = "Url to check";
          };
          regex = mkOption {
            type = types.nullOr types.str;
            default = null;
            example = "My homepage";
            description = "Regex that is matched against the returned content";
          };
          statusAccepted = mkOption {
            type = types.listOf types.int;
            default = [ 200 ];
            example = [ 401 ];
            description = "Expected http status code";
          };
        };
      }));
      default = {};
      description = ''
        httpcheck plugin: https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/httpcheck/httpcheck.conf
      '';
    };

    portcheck.checks = mkOption {
      type = types.attrsOf (types.submodule ({
        options = {
          host = mkOption {
            type = types.str;
            default = "127.0.0.1";
            description = "Dns name/IP to check";
          };
          port = mkOption {
            type = types.int;
            description = "Tcp port number";
          };
        };
      }));
      default = {};
      description = ''
        portcheck plugin: https://github.com/netdata/netdata/tree/master/collectors/python.d.plugin/portcheck
      '';
    };
  };
  config = mkIf cfg.enable {
    systemd.services.netdata = {
      requires = [ "secret.service" ];
      after = [ "secret.service" ];
    };
    krebs.secret.files.netdata-stream = {
      path = "/run/secret/netdata-stream.conf";
      owner.name = "netdata";
      source-path = cfg.stream.file;
    };
    environment.etc."netdata/stream.conf".source = "/run/secret/netdata-stream.conf";

    services.netdata = {
      enable = true;
      config = {
        global = {
          "bind to" = "0.0.0.0:19999 [::]:19999";
          "error log" = "stderr";
          "update every" = "5";
        };
        health.enable = if cfg.stream.role == "master" then "yes" else "no";
      };
    };
  services.netdata.python.extraPackages = ps: [
    ps.psycopg2 ps.docker ps.dnspython
  ];

    makefu.netdata.portcheck.checks.openssh.port = (lib.head config.services.openssh.ports);

    networking.firewall.allowedTCPPorts = [ 19999 ];

    environment.etc."netdata/python.d/httpcheck.conf".text = ''
    update_every: 30
    ${lib.concatStringsSep "\n" (mapAttrsToList (site: options:
    ''
      ${site}:
        url: '${options.url}'
        ${optionalString (options.regex != null) "regex: '${options.regex}'"}
        status_accepted: [ ${lib.concatStringsSep " " (map toString options.statusAccepted) } ]
      '') cfg.httpcheck.checks)
      }
    '';

    environment.etc."netdata/python.d/portcheck.conf".text = ''
    ${lib.concatStringsSep "\n" (mapAttrsToList (service: options:
    ''
      ${service}:
        host: '${options.host}'
        port: ${toString options.port}
      '') cfg.portcheck.checks)
      }
    '';
    systemd.services.netdata.restartTriggers = [
      config.environment.etc."netdata/python.d/httpcheck.conf".source
      config.environment.etc."netdata/python.d/portcheck.conf".source
      config.environment.etc."netdata/stream.conf".source
    ];

    environment.etc."netdata/health.d/httpcheck.conf".text = ''
      # taken from the original but warn only if a request is at least 300ms slow
      template: web_service_slow
      families: *
      on: httpcheck.responsetime
      lookup: average -3m unaligned of time
      units: ms
      every: 10s
      warn: ($this > ($1h_web_service_response_time * 4) && $this > 1000)
      crit: ($this > ($1h_web_service_response_time * 6) && $this > 1000)
      info: average response time over the last 3 minutes, compared to the average over the last hour
      delay: down 5m multiplier 1.5 max 1h
      options: no-clear-notification
      to: webmaster
    '';

  };
  # TODO: notification
  # environment.etc."netdata/health_alarm_notify.conf".source = "/run/keys/netdata-pushover.conf";

}