diff --git a/README.md b/README.md index 87f064d..3504e4b 100644 --- a/README.md +++ b/README.md @@ -121,21 +121,33 @@ $ inv deploy --hosts build02 reboot --hosts build02 ``` ## Install/Fix system from Hetzner recovery mode -1. Install kexec image from Hetzner recovery system as described in [kexec.nix](roles/kexec.nix) and boot into it -2. Format and/or mount all filesystems to /mnt: +1. Copy your ssh key to the recovery system so that the kexec image can re-use it. + +``` console +yourmachine> ssh-copy-id root@build0X.nix-community.org +``` + +2. Download and boot into kexec-image: + +``` console +$ curl -L https://github.com/nix-community/nixos-images/releases/download/nixos-unstable/nixos-kexec-installer-x86_64-linux.tar.gz | tar -xzf- -C /root +$ /root/kexec/run +``` + +3. Format and/or mount all filesystems to /mnt: ```console $ inv format-disks --hosts buildXX --disks /dev/nvme0n1,/dev/nvme1n1 ``` -3. Setup secrets +4. Setup secrets ```console $ inv setup-secret --hosts buildXX ``` -4. Generate configuration and download to the repo +5. Generate configuration and download to the repo ```console $ nixos-generate-config --root /tmp @@ -143,7 +155,7 @@ $ nixos-generate-config --root /tmp $ scp buildXX.nix-community.org:/tmp/etc/nixos/hardware-configuration.nix buildXX/hardware-configuration.nix ``` -5. Build and install +6. Build and install ```console $ inv install-nixos --hosts buildXX diff --git a/flake.lock b/flake.lock index 477bed8..cf1d6de 100644 --- a/flake.lock +++ b/flake.lock @@ -23,11 +23,11 @@ ] }, "locked": { - "lastModified": 1670441596, - "narHash": "sha256-+T487QnluBT5F9tVk0chG/zzv+9zzTrx3o7rlOBK7ps=", + "lastModified": 1671322946, + "narHash": "sha256-J8Qj+ITV+eti+irTK9Zn2LZVYoIW2g7irPUckU8yZvU=", "owner": "hercules-ci", "repo": "flake-parts", - "rev": "8d0e2444ab05f79df93b70e5e497f8c708eb6b9b", + "rev": "3f7172646953bf86dad5953bc45f0edae62ac445", "type": "github" }, "original": { @@ -69,8 +69,8 @@ }, "original": { "owner": "NixOS", - "ref": "nixos-unstable-small", "repo": "nixpkgs", + "rev": "34274e6c8604be2d103606b11dae0ac2e3a0d584", "type": "github" } }, @@ -180,7 +180,8 @@ "nixpkgs-update": "nixpkgs-update", "nixpkgs-update-github-releases": "nixpkgs-update-github-releases", "nixpkgs-update-pypi-releases": "nixpkgs-update-pypi-releases", - "sops-nix": "sops-nix" + "sops-nix": "sops-nix", + "srvos": "srvos" } }, "sops-nix": { @@ -203,6 +204,26 @@ "repo": "sops-nix", "type": "github" } + }, + "srvos": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1671482743, + "narHash": "sha256-YqOjz4ZY++p6/siB0eygD0kFeYJwQgwfkz2W/d9JWkA=", + "owner": "numtide", + "repo": "srvos", + "rev": "dcd08ecab2efc069b0a3326415f740a927a1f023", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "srvos", + "type": "github" + } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 8fc0395..4ab546d 100644 --- a/flake.nix +++ b/flake.nix @@ -11,7 +11,9 @@ ]; inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small"; + # FIXME: hercules ci is currently broken in latest nixpkgs + # nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small"; + nixpkgs.url = "github:NixOS/nixpkgs/34274e6c8604be2d103606b11dae0ac2e3a0d584"; nixpkgs-update.url = "github:ryantm/nixpkgs-update"; nixpkgs-update-github-releases.url = "github:ryantm/nixpkgs-update-github-releases"; nixpkgs-update-github-releases.flake = false; @@ -20,17 +22,17 @@ sops-nix.url = "github:Mic92/sops-nix"; sops-nix.inputs.nixpkgs.follows = "nixpkgs"; + srvos.url = "github:numtide/srvos"; + # actually not used when using the modules but than nothing ever will try to fetch this nixpkgs variant + srvos.inputs.nixpkgs.follows = "nixpkgs"; + flake-parts.url = "github:hercules-ci/flake-parts"; flake-parts.inputs.nixpkgs-lib.follows = "nixpkgs"; }; - outputs = { - self, - flake-parts, - ... - }: + outputs = inputs @ {flake-parts, ...}: flake-parts.lib.mkFlake - {inherit self;} + {inherit inputs;} { systems = ["x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin"]; @@ -44,10 +46,14 @@ }; }; flake.nixosConfigurations = let - inherit (self.inputs.nixpkgs.lib) nixosSystem; + inherit (inputs.nixpkgs.lib) nixosSystem; common = [ - self.inputs.sops-nix.nixosModules.sops - { _module.args.inputs = self.inputs; } + { _module.args.inputs = inputs; } + inputs.sops-nix.nixosModules.sops + inputs.srvos.nixosModules.common + + inputs.srvos.nixosModules.telegraf + { networking.firewall.allowedTCPPorts = [ 9273 ]; } ]; in { "build01.nix-community.org" = nixosSystem { @@ -66,7 +72,7 @@ ++ [ (import ./build02/nixpkgs-update.nix { inherit - (self.inputs) + (inputs) nixpkgs-update nixpkgs-update-github-releases nixpkgs-update-pypi-releases diff --git a/roles/common.nix b/roles/common.nix index 69505e7..1c9e0e3 100644 --- a/roles/common.nix +++ b/roles/common.nix @@ -1,21 +1,12 @@ { pkgs, lib, config, ... }: { - imports = [ ./auto-upgrade.nix ./nix-daemon.nix ./security.nix ./sops-nix.nix - ./sshd.nix - ./telegraf.nix ./users.nix - ./zfs.nix - ]; - - environment.systemPackages = [ - # for quick activity overview - pkgs.htop ]; # Nicer interactive shell @@ -30,15 +21,6 @@ # Just disable it since we are using telegraf to monitor raid health. systemd.services.mdmonitor.enable = false; - # Make debugging failed units easier - systemd.extraConfig = '' - DefaultStandardOutput=journal - DefaultStandardError=journal - ''; - - # The nix-community is global :) - time.timeZone = "UTC"; - # speed-up evaluation & save disk space by disabling manpages documentation.enable = false; diff --git a/roles/kexec.nix b/roles/kexec.nix deleted file mode 100644 index 8b75c90..0000000 --- a/roles/kexec.nix +++ /dev/null @@ -1,28 +0,0 @@ -{ config, lib, pkgs, ... }: -# build with: -# nix-shell -p nixos-generators --run 'nixos-generate -o ./result -f kexec-bundle -c ./roles/kexec.nix' -{ - imports = [ - ./users.nix - ./sshd.nix - ]; - - # ttyAMA0 is consoles on aarch64 - boot.kernelParams = [ "console=ttyS0,115200n8" "console=ttyAMA0,115200n8" "console=tty0" ]; -} - -# Hetzner bootstrap from rescue system -# -#useradd -m -s /bin/bash foo -#install -d -m700 -o foo /nix -#su - foo -#curl -L https://nixos.org/nix/install | bash -#. /home/foo/.nix-profile/etc/profile.d/nix.sh -#git clone https://github.com/nix-community/infra && cd infra -#nix-shell -#nix-shell -p nixos-generators --run 'nixos-generate -o ./result -f kexec-bundle -c ./roles/kexec.nix' -#exit -#exit -#/home/foo/infra/result -#after reboot: -#$ systemctl stop autoreboot.timer diff --git a/roles/nix-daemon.nix b/roles/nix-daemon.nix index 4e01c9d..46edc0d 100644 --- a/roles/nix-daemon.nix +++ b/roles/nix-daemon.nix @@ -20,12 +20,6 @@ in settings.min-free = asGB 10; settings.max-free = asGB 200; - # avoid copying unecessary stuff over SSH - settings.builders-use-substitutes = true; - - # allow flakes - settings.experimental-features = "nix-command flakes"; - # users in trusted group are trusted by the nix-daemon settings.trusted-users = [ "@trusted" ]; diff --git a/roles/security.nix b/roles/security.nix index c378c7b..e27ec76 100644 --- a/roles/security.nix +++ b/roles/security.nix @@ -1,30 +1,9 @@ { config, pkgs, lib, ... }: - { # Make sure that the firewall is enabled, even if it's the default. networking.firewall.enable = true; - # Allow password-less sudo for wheel users - security.sudo.enable = true; - security.sudo.wheelNeedsPassword = false; - - # Dont let users create their own authorized keys files - services.openssh.authorizedKeysFiles = lib.mkForce [ - "/etc/ssh/authorized_keys.d/%u" - ]; - - services.openssh.kbdInteractiveAuthentication = false; - services.openssh.passwordAuthentication = false; - programs.ssh.knownHosts = { - github-rsa = { - extraHostNames = [ "github.com" ]; - publicKey = "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ=="; - }; - github-ed25519 = { - extraHostNames = [ "github.com" ]; - publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl"; - }; build01 = { hostNames = [ "build01.nix-community.org" ]; publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIElIQ54qAy7Dh63rBudYKdbzJHrrbrrMXLYl7Pkmk88H"; diff --git a/roles/sshd.nix b/roles/sshd.nix index d4d585c..e69de29 100644 --- a/roles/sshd.nix +++ b/roles/sshd.nix @@ -1,12 +0,0 @@ -{ config, lib, pkgs, ... }: -{ - services.openssh = { - enable = true; - passwordAuthentication = false; - useDns = false; - # unbind gnupg sockets if they exists - extraConfig = '' - StreamLocalBindUnlink yes - ''; - }; -} diff --git a/roles/telegraf.nix b/roles/telegraf.nix deleted file mode 100644 index c8734d9..0000000 --- a/roles/telegraf.nix +++ /dev/null @@ -1,128 +0,0 @@ -{ pkgs, lib, config, ... }: -let - isVM = lib.any (mod: mod == "xen-blkfront" || mod == "virtio_console") config.boot.initrd.kernelModules; -in -{ - networking.firewall.allowedTCPPorts = [ 9273 ]; - systemd.services.telegraf.path = [ pkgs.nvme-cli ]; - - services.telegraf = { - enable = true; - extraConfig = { - agent.interval = "60s"; - inputs = { - #syslog.server = "unixgram:///run/systemd/journal/syslog"; - #syslog.best_effort = true; - #syslog.syslog_standard = "RFC3164"; - prometheus.urls = lib.mkIf (config.services.promtail.enable) [ - # default promtail port - "http://localhost:9080/metrics" - ]; - prometheus.metric_version = 2; - kernel_vmstat = { }; - smart = lib.mkIf (!isVM) { - path = pkgs.writeShellScript "smartctl" '' - exec /run/wrappers/bin/sudo ${pkgs.smartmontools}/bin/smartctl "$@" - ''; - }; - mdstat = { }; - system = { }; - mem = { }; - file = [{ - data_format = "influx"; - file_tag = "name"; - files = [ "/var/log/telegraf/*" ]; - }] ++ lib.optional (lib.any (fs: fs == "ext4") config.boot.supportedFilesystems) { - name_override = "ext4_errors"; - files = [ "/sys/fs/ext4/*/errors_count" ]; - data_format = "value"; - }; - exec = [{ - ## Commands array - commands = (lib.optional (lib.any (fs: fs == "zfs") config.boot.supportedFilesystems) - (pkgs.writeScript "zpool-health" '' - #!${pkgs.gawk}/bin/awk -f - BEGIN { - while ("${pkgs.zfs}/bin/zpool status" | getline) { - if ($1 ~ /pool:/) { printf "zpool_status,name=%s ", $2 } - if ($1 ~ /state:/) { printf " state=\"%s\",", $2 } - if ($1 ~ /errors:/) { - if (index($2, "No")) printf "errors=0i\n"; else printf "errors=%di\n", $2 - } - } - } - '') - ) ++ ( - let - collectHosts = shares: fs: - if builtins.elem fs.fsType [ "nfs" "nfs3" "nfs4" ] then - shares // ( - let - # also match ipv6 addresses - group = builtins.match "\\[?([^\]]+)]?:([^:]+)$" fs.device; - host = builtins.head group; - path = builtins.elemAt group 1; - in - { - ${host} = (shares.${host} or [ ]) ++ [ path ]; - } - ) - else - shares; - nfsHosts = lib.foldl collectHosts { } (builtins.attrValues config.fileSystems); - in - lib.mapAttrsToList - (host: args: - (pkgs.writeScript "zpool-health" '' - #!${pkgs.gawk}/bin/awk -f - BEGIN { - for (i = 2; i < ARGC; i++) { - mounts[ARGV[i]] = 1 - } - while ("${pkgs.nfs-utils}/bin/showmount -e " ARGV[1] | getline) { - if (NR == 1) { continue } - if (mounts[$1] == 1) { - printf "nfs_export,host=%s,path=%s present=1\n", ARGV[1], $1 - } - delete mounts[$1] - } - for (mount in mounts) { - printf "nfs_export,host=%s,path=%s present=0\n", ARGV[1], $1 - } - } - '') + " ${host} ${builtins.concatStringsSep " " args}" - ) - nfsHosts - ); - data_format = "influx"; - }]; - systemd_units = { }; - swap = { }; - disk.tagdrop = { - fstype = [ "tmpfs" "ramfs" "devtmpfs" "devfs" "iso9660" "overlay" "aufs" "squashfs" ]; - device = [ "rpc_pipefs" "lxcfs" "nsfs" "borgfs" ]; - }; - diskio = { }; - }; - outputs.prometheus_client = { - listen = ":9273"; - metric_version = 2; - }; - }; - }; - security.sudo.extraRules = lib.mkIf (!isVM) [{ - users = [ "telegraf" ]; - commands = [{ - command = "${pkgs.smartmontools}/bin/smartctl"; - options = [ "NOPASSWD" ]; - }]; - }]; - # avoid logging sudo use - security.sudo.configFile = '' - Defaults:telegraf !syslog,!pam_session - ''; - # create dummy file to avoid telegraf errors - systemd.tmpfiles.rules = [ - "f /var/log/telegraf/dummy 0444 root root - -" - ]; -} diff --git a/roles/zfs.nix b/roles/zfs.nix deleted file mode 100644 index 91e485c..0000000 --- a/roles/zfs.nix +++ /dev/null @@ -1,13 +0,0 @@ -{ ... }: { - services.zfs = { - autoSnapshot.enable = true; - # defaults to 12, which is a bit much given how much data is written - autoSnapshot.monthly = 1; - autoScrub.enable = true; - }; - - # ZFS already has its own scheduler. Without this my(@Artturin) computer froze for a second when i nix build something. - services.udev.extraRules = '' - ACTION=="add|change", KERNEL=="sd[a-z]*[0-9]*|mmcblk[0-9]*p[0-9]*|nvme[0-9]*n[0-9]*p[0-9]*", ENV{ID_FS_TYPE}=="zfs_member", ATTR{../queue/scheduler}="none" - ''; -} diff --git a/tasks.py b/tasks.py index c8274d4..76bbffa 100644 --- a/tasks.py +++ b/tasks.py @@ -164,6 +164,29 @@ def deploy(c, hosts=""): deploy_nixos(get_hosts(hosts)) +@task +def build_local(c, hosts=""): + """ + Build all servers. Use inv build-local --host build01 to build a single server + """ + g = DeployGroup(get_hosts(hosts)) + + def build_local(h: DeployHost) -> None: + h.run_local( + [ + "nixos-rebuild", + "build", + "--option", + "accept-flake-config", + "true", + "--flake", + f".#{h.host}", + ] + ) + + g.run_function(build_local) + + def wait_for_port(host: str, port: int, shutdown: bool = False) -> None: import socket, time