summaryrefslogtreecommitdiffstats
path: root/krebs/3modules/sync-containers3.nix
diff options
context:
space:
mode:
Diffstat (limited to 'krebs/3modules/sync-containers3.nix')
-rw-r--r--krebs/3modules/sync-containers3.nix343
1 files changed, 343 insertions, 0 deletions
diff --git a/krebs/3modules/sync-containers3.nix b/krebs/3modules/sync-containers3.nix
new file mode 100644
index 000000000..4a00b23ab
--- /dev/null
+++ b/krebs/3modules/sync-containers3.nix
@@ -0,0 +1,343 @@
+{ config, lib, pkgs, ... }: let
+ cfg = config.krebs.sync-containers3;
+ slib = pkgs.stockholm.lib;
+in {
+ options.krebs.sync-containers3 = {
+ inContainer = {
+ enable = lib.mkEnableOption "container config for syncing";
+ pubkey = lib.mkOption {
+ type = lib.types.str; # TODO ssh key
+ };
+ };
+ containers = lib.mkOption {
+ default = {};
+ type = lib.types.attrsOf (lib.types.submodule ({ config, ... }: {
+ options = {
+ name = lib.mkOption {
+ type = lib.types.str;
+ default = config._module.args.name;
+ };
+ sshKey = lib.mkOption {
+ type = slib.types.absolute-pathname;
+ };
+ luksKey = lib.mkOption {
+ type = slib.types.absolute-pathname;
+ default = config.sshKey;
+ };
+ ephemeral = lib.mkOption {
+ type = lib.types.bool;
+ default = false;
+ };
+ runContainer = lib.mkOption {
+ type = lib.types.bool;
+ default = true;
+ };
+ };
+ }));
+ };
+ };
+ config = lib.mkMerge [
+ (lib.mkIf (cfg.containers != {}) {
+
+ containers = lib.mapAttrs' (n: ctr: lib.nameValuePair ctr.name {
+ config = {
+ environment.systemPackages = [
+ pkgs.dhcpcd
+ pkgs.git
+ pkgs.jq
+ ];
+ networking.useDHCP = lib.mkForce true;
+ systemd.services.autoswitch = {
+ environment = {
+ NIX_REMOTE = "daemon";
+ };
+ wantedBy = [ "multi-user.target" ];
+ serviceConfig.ExecStart = pkgs.writers.writeDash "autoswitch" ''
+ set -efu
+ mkdir -p /var/state/var_src
+ ln -Tfrs /var/state/var_src /var/src
+ if test -e /var/src/nixos-config; then
+ /run/current-system/sw/bin/nixos-rebuild -I /var/src switch || :
+ fi
+ '';
+ unitConfig.X-StopOnRemoval = false;
+ };
+ };
+ autoStart = false;
+ enableTun = true;
+ ephemeral = ctr.ephemeral;
+ privateNetwork = true;
+ hostBridge = "ctr0";
+ bindMounts = {
+ "/var/lib/self/disk" = {
+ hostPath = "/var/lib/sync-containers3/${ctr.name}/disk";
+ isReadOnly = false;
+ };
+ "/var/state" = {
+ hostPath = "/var/lib/sync-containers3/${ctr.name}/state";
+ isReadOnly = false;
+ };
+ };
+ }) (lib.filterAttrs (_: ctr: ctr.runContainer) cfg.containers);
+
+ systemd.services = lib.foldr lib.recursiveUpdate {} (lib.flatten (map (ctr: [
+ { "${ctr.name}_syncer" = {
+ path = with pkgs; [
+ coreutils
+ consul
+ rsync
+ openssh
+ systemd
+ ];
+ startAt = "*:0/1";
+ serviceConfig = {
+ User = "${ctr.name}_container";
+ LoadCredential = [
+ "ssh_key:${ctr.sshKey}"
+ ];
+ ExecCondition = pkgs.writers.writeDash "${ctr.name}_checker" ''
+ set -efu
+ ! systemctl is-active --quiet container@${ctr.name}.service
+ '';
+ ExecStart = pkgs.writers.writeDash "${ctr.name}_syncer" ''
+ set -efux
+ consul lock sync_${ctr.name} ${pkgs.writers.writeDash "${ctr.name}-sync" ''
+ set -efux
+ if /run/wrappers/bin/ping -c 1 ${ctr.name}.r; then
+ nice --adjustment=30 rsync -a -e "ssh -i $CREDENTIALS_DIRECTORY/ssh_key" --timeout=30 container_sync@${ctr.name}.r:disk "$HOME"/disk
+ rm -f "$HOME"/incomplete
+ fi
+ ''}
+ '';
+ };
+ }; }
+ { "${ctr.name}_watcher" = lib.mkIf ctr.runContainer {
+ path = with pkgs; [
+ coreutils
+ consul
+ cryptsetup
+ curl
+ mount
+ util-linux
+ jq
+ retry
+ ];
+ serviceConfig = {
+ ExecStart = pkgs.writers.writeDash "${ctr.name}_watcher" ''
+ set -efux
+ while sleep 5; do
+ # get the payload
+ # check if the host reacted recently
+ case $(curl -s -o /dev/null --retry 10 --retry-delay 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in
+ 404)
+ echo 'got 404 from kv, should kill the container'
+ break
+ ;;
+ 500)
+ echo 'got 500 from kv, will kill container'
+ break
+ ;;
+ 200)
+ # echo 'got 200 from kv, will check payload'
+ payload=$(consul kv get containers/${ctr.name}) || continue
+ export payload
+ if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then
+ # echo 'we are the host, trying to reach container'
+ if $(retry -t 10 -d 10 -- /run/wrappers/bin/ping -q -c 1 ${ctr.name}.r > /dev/null); then
+ # echo 'container is reachable, continueing'
+ continue
+ else
+ # echo 'container seems dead, killing'
+ break
+ fi
+ else
+ echo 'we are not host, killing container'
+ break
+ fi
+ ;;
+ *)
+ echo 'unknown state, continuing'
+ continue
+ ;;
+ esac
+ done
+ /run/current-system/sw/bin/nixos-container stop ${ctr.name} || :
+ umount /var/lib/sync-containers3/${ctr.name}/state || :
+ cryptsetup luksClose ${ctr.name} || :
+ '';
+ };
+ }; }
+ { "${ctr.name}_scheduler" = lib.mkIf ctr.runContainer {
+ wantedBy = [ "multi-user.target" ];
+ path = with pkgs; [
+ coreutils
+ consul
+ cryptsetup
+ mount
+ util-linux
+ curl
+ systemd
+ jq
+ retry
+ bc
+ ];
+ serviceConfig = {
+ Restart = "always";
+ RestartSec = "30s";
+ ExecStart = pkgs.writers.writeDash "${ctr.name}_scheduler" ''
+ set -efux
+ # get the payload
+ # check if the host reacted recently
+ case $(curl -s -o /dev/null --retry 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in
+ 404)
+ # echo 'got 404 from kv, will create container'
+ ;;
+ 500)
+ # echo 'got 500 from kv, retrying again'
+ exit 0
+ ;;
+ 200)
+ # echo 'got 200 from kv, will check payload'
+ export payload=$(consul kv get containers/${ctr.name})
+ if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then
+ echo 'we are the host, starting container'
+ else
+ # echo 'we are not host, checking timestamp'
+ # if [ $(echo "$(date +%s) - $(jq -rn 'env.payload | fromjson.time') > 100" | bc) -eq 1 ]; then
+ if [ "$(jq -rn 'env.payload | fromjson.time | now - tonumber > 100')" = 'true' ]; then
+ echo 'last beacon is more than 100s ago, taking over'
+ else
+ # echo 'last beacon was recent. trying again'
+ exit 0
+ fi
+ fi
+ ;;
+ *)
+ echo 'unknown state, bailing out'
+ exit 0
+ ;;
+ esac
+ consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null
+ consul lock -verbose -monitor-retry=100 -timeout 30s -name container_${ctr.name} container_${ctr.name} ${pkgs.writers.writeBash "${ctr.name}-start" ''
+ set -efu
+ cryptsetup luksOpen --key-file ${ctr.luksKey} /var/lib/sync-containers3/${ctr.name}/disk ${ctr.name} || :
+ mkdir -p /var/lib/sync-containers3/${ctr.name}/state
+ mountpoint /var/lib/sync-containers3/${ctr.name}/state || mount /dev/mapper/${ctr.name} /var/lib/sync-containers3/${ctr.name}/state
+ /run/current-system/sw/bin/nixos-container start ${ctr.name}
+ # wait for system to become reachable for the first time
+ systemctl start ${ctr.name}_watcher.service
+ retry -t 10 -d 10 -- /run/wrappers/bin/ping -q -c 1 ${ctr.name}.r > /dev/null
+ while systemctl is-active container@${ctr.name}.service >/devnull && /run/wrappers/bin/ping -q -c 3 ${ctr.name}.r >/dev/null; do
+ consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null
+ sleep 10
+ done
+ ''}
+ '';
+ };
+ }; }
+ { "container@${ctr.name}" = lib.mkIf ctr.runContainer {
+ serviceConfig = {
+ ExecStop = pkgs.writers.writeDash "remove_interface" ''
+ ${pkgs.iproute2}/bin/ip link del vb-${ctr.name}
+ '';
+ };
+ }; }
+ ]) (lib.attrValues cfg.containers)));
+
+ systemd.timers = lib.mapAttrs' (n: ctr: lib.nameValuePair "${ctr.name}_syncer" {
+ timerConfig = {
+ RandomizedDelaySec = 100;
+ };
+ }) cfg.containers;
+
+ users.groups = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" {
+ }) cfg.containers;
+ users.users = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" ({
+ group = "${ctr.name}_container";
+ isNormalUser = true;
+ uid = slib.genid_uint31 "container_${ctr.name}";
+ home = "/var/lib/sync-containers3/${ctr.name}";
+ createHome = true;
+ homeMode = "705";
+ })) cfg.containers;
+
+ environment.systemPackages = lib.mapAttrsToList (_: ctr: (pkgs.writers.writeDashBin "${ctr.name}_init" ''
+ set -efux
+ export PATH=${lib.makeBinPath [
+ pkgs.coreutils
+ pkgs.cryptsetup
+ pkgs.libxfs.bin
+ ]}:$PATH
+ truncate -s 5G /var/lib/sync-containers3/${ctr.name}/disk
+ cryptsetup luksFormat /var/lib/sync-containers3/${ctr.name}/disk ${ctr.luksKey}
+ cryptsetup luksOpen --key-file ${ctr.luksKey} /var/lib/sync-containers3/${ctr.name}/disk ${ctr.name}
+ mkfs.xfs /dev/mapper/${ctr.name}
+ mkdir -p /var/lib/sync-containers3/${ctr.name}/state
+ mountpoint /var/lib/sync-containers3/${ctr.name}/state || mount /dev/mapper/${ctr.name} /var/lib/sync-containers3/${ctr.name}/state
+ /run/current-system/sw/bin/nixos-container start ${ctr.name}
+ /run/current-system/sw/bin/nixos-container run ${ctr.name} -- ${pkgs.writeDash "init" ''
+ mkdir -p /var/state
+ ''}
+ '')) cfg.containers;
+ })
+ (lib.mkIf (cfg.containers != {}) {
+ # networking
+
+ # needed because otherwise we lose local dns
+ environment.etc."resolv.conf".source = lib.mkForce "/run/systemd/resolve/resolv.conf";
+
+ boot.kernel.sysctl."net.ipv4.ip_forward" = lib.mkForce 1;
+ systemd.network.networks.ctr0 = {
+ name = "ctr0";
+ address = [
+ "10.233.0.1/24"
+ ];
+ networkConfig = {
+ # IPForward = "yes";
+ # IPMasquerade = "both";
+ ConfigureWithoutCarrier = true;
+ DHCPServer = "yes";
+ };
+ };
+ systemd.network.netdevs.ctr0.netdevConfig = {
+ Kind = "bridge";
+ Name = "ctr0";
+ };
+ networking.networkmanager.unmanaged = [ "ctr0" ];
+ krebs.iptables.tables.filter.INPUT.rules = [
+ { predicate = "-i ctr0"; target = "ACCEPT"; }
+ ];
+ krebs.iptables.tables.filter.FORWARD.rules = [
+ { predicate = "-i ctr0"; target = "ACCEPT"; }
+ { predicate = "-o ctr0"; target = "ACCEPT"; }
+ ];
+ krebs.iptables.tables.nat.POSTROUTING.rules = [
+ { v6 = false; predicate = "-s 10.233.0.0/24"; target = "MASQUERADE"; }
+ ];
+ })
+ (lib.mkIf cfg.inContainer.enable {
+ users.groups.container_sync = {};
+ users.users.container_sync = {
+ group = "container_sync";
+ uid = slib.genid_uint31 "container_sync";
+ isNormalUser = true;
+ home = "/var/lib/self";
+ createHome = true;
+ openssh.authorizedKeys.keys = [
+ cfg.inContainer.pubkey
+ ];
+ };
+
+ networking.useHostResolvConf = false;
+ networking.useNetworkd = true;
+ systemd.network = {
+ enable = true;
+ networks.eth0 = {
+ matchConfig.Name = "eth0";
+ DHCP = "yes";
+ dhcpV4Config.UseDNS = true;
+ };
+ };
+ })
+ ];
+}