Merge pull request from nix-community/ci

Pull in common configuration from srvos
This commit is contained in:
Jörg Thalheim 2022-12-23 08:13:39 +00:00 committed by GitHub
commit f1b566ca48
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 83 additions and 247 deletions

View file

@ -121,21 +121,33 @@ $ inv deploy --hosts build02 reboot --hosts build02
```
## Install/Fix system from Hetzner recovery mode
1. Install kexec image from Hetzner recovery system as described in [kexec.nix](roles/kexec.nix) and boot into it
2. Format and/or mount all filesystems to /mnt:
1. Copy your ssh key to the recovery system so that the kexec image can re-use it.
``` console
yourmachine> ssh-copy-id root@build0X.nix-community.org
```
2. Download and boot into kexec-image:
``` console
$ curl -L https://github.com/nix-community/nixos-images/releases/download/nixos-unstable/nixos-kexec-installer-x86_64-linux.tar.gz | tar -xzf- -C /root
$ /root/kexec/run
```
3. Format and/or mount all filesystems to /mnt:
```console
$ inv format-disks --hosts buildXX --disks /dev/nvme0n1,/dev/nvme1n1
```
3. Setup secrets
4. Setup secrets
```console
$ inv setup-secret --hosts buildXX
```
4. Generate configuration and download to the repo
5. Generate configuration and download to the repo
```console
$ nixos-generate-config --root /tmp
@ -143,7 +155,7 @@ $ nixos-generate-config --root /tmp
$ scp buildXX.nix-community.org:/tmp/etc/nixos/hardware-configuration.nix buildXX/hardware-configuration.nix
```
5. Build and install
6. Build and install
```console
$ inv install-nixos --hosts buildXX

31
flake.lock generated
View file

@ -23,11 +23,11 @@
]
},
"locked": {
"lastModified": 1670441596,
"narHash": "sha256-+T487QnluBT5F9tVk0chG/zzv+9zzTrx3o7rlOBK7ps=",
"lastModified": 1671322946,
"narHash": "sha256-J8Qj+ITV+eti+irTK9Zn2LZVYoIW2g7irPUckU8yZvU=",
"owner": "hercules-ci",
"repo": "flake-parts",
"rev": "8d0e2444ab05f79df93b70e5e497f8c708eb6b9b",
"rev": "3f7172646953bf86dad5953bc45f0edae62ac445",
"type": "github"
},
"original": {
@ -69,8 +69,8 @@
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable-small",
"repo": "nixpkgs",
"rev": "34274e6c8604be2d103606b11dae0ac2e3a0d584",
"type": "github"
}
},
@ -180,7 +180,8 @@
"nixpkgs-update": "nixpkgs-update",
"nixpkgs-update-github-releases": "nixpkgs-update-github-releases",
"nixpkgs-update-pypi-releases": "nixpkgs-update-pypi-releases",
"sops-nix": "sops-nix"
"sops-nix": "sops-nix",
"srvos": "srvos"
}
},
"sops-nix": {
@ -203,6 +204,26 @@
"repo": "sops-nix",
"type": "github"
}
},
"srvos": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1671482743,
"narHash": "sha256-YqOjz4ZY++p6/siB0eygD0kFeYJwQgwfkz2W/d9JWkA=",
"owner": "numtide",
"repo": "srvos",
"rev": "dcd08ecab2efc069b0a3326415f740a927a1f023",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "srvos",
"type": "github"
}
}
},
"root": "root",

View file

@ -11,7 +11,9 @@
];
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small";
# FIXME: hercules ci is currently broken in latest nixpkgs
# nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small";
nixpkgs.url = "github:NixOS/nixpkgs/34274e6c8604be2d103606b11dae0ac2e3a0d584";
nixpkgs-update.url = "github:ryantm/nixpkgs-update";
nixpkgs-update-github-releases.url = "github:ryantm/nixpkgs-update-github-releases";
nixpkgs-update-github-releases.flake = false;
@ -20,17 +22,17 @@
sops-nix.url = "github:Mic92/sops-nix";
sops-nix.inputs.nixpkgs.follows = "nixpkgs";
srvos.url = "github:numtide/srvos";
# actually not used when using the modules but than nothing ever will try to fetch this nixpkgs variant
srvos.inputs.nixpkgs.follows = "nixpkgs";
flake-parts.url = "github:hercules-ci/flake-parts";
flake-parts.inputs.nixpkgs-lib.follows = "nixpkgs";
};
outputs = {
self,
flake-parts,
...
}:
outputs = inputs @ {flake-parts, ...}:
flake-parts.lib.mkFlake
{inherit self;}
{inherit inputs;}
{
systems = ["x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin"];
@ -44,10 +46,14 @@
};
};
flake.nixosConfigurations = let
inherit (self.inputs.nixpkgs.lib) nixosSystem;
inherit (inputs.nixpkgs.lib) nixosSystem;
common = [
self.inputs.sops-nix.nixosModules.sops
{ _module.args.inputs = self.inputs; }
{ _module.args.inputs = inputs; }
inputs.sops-nix.nixosModules.sops
inputs.srvos.nixosModules.common
inputs.srvos.nixosModules.telegraf
{ networking.firewall.allowedTCPPorts = [ 9273 ]; }
];
in {
"build01.nix-community.org" = nixosSystem {
@ -66,7 +72,7 @@
++ [
(import ./build02/nixpkgs-update.nix {
inherit
(self.inputs)
(inputs)
nixpkgs-update
nixpkgs-update-github-releases
nixpkgs-update-pypi-releases

View file

@ -1,21 +1,12 @@
{ pkgs, lib, config, ... }:
{
imports = [
./auto-upgrade.nix
./nix-daemon.nix
./security.nix
./sops-nix.nix
./sshd.nix
./telegraf.nix
./users.nix
./zfs.nix
];
environment.systemPackages = [
# for quick activity overview
pkgs.htop
];
# Nicer interactive shell
@ -30,15 +21,6 @@
# Just disable it since we are using telegraf to monitor raid health.
systemd.services.mdmonitor.enable = false;
# Make debugging failed units easier
systemd.extraConfig = ''
DefaultStandardOutput=journal
DefaultStandardError=journal
'';
# The nix-community is global :)
time.timeZone = "UTC";
# speed-up evaluation & save disk space by disabling manpages
documentation.enable = false;

View file

@ -1,28 +0,0 @@
{ config, lib, pkgs, ... }:
# build with:
# nix-shell -p nixos-generators --run 'nixos-generate -o ./result -f kexec-bundle -c ./roles/kexec.nix'
{
imports = [
./users.nix
./sshd.nix
];
# ttyAMA0 is consoles on aarch64
boot.kernelParams = [ "console=ttyS0,115200n8" "console=ttyAMA0,115200n8" "console=tty0" ];
}
# Hetzner bootstrap from rescue system
#
#useradd -m -s /bin/bash foo
#install -d -m700 -o foo /nix
#su - foo
#curl -L https://nixos.org/nix/install | bash
#. /home/foo/.nix-profile/etc/profile.d/nix.sh
#git clone https://github.com/nix-community/infra && cd infra
#nix-shell
#nix-shell -p nixos-generators --run 'nixos-generate -o ./result -f kexec-bundle -c ./roles/kexec.nix'
#exit
#exit
#/home/foo/infra/result
#after reboot:
#$ systemctl stop autoreboot.timer

View file

@ -20,12 +20,6 @@ in
settings.min-free = asGB 10;
settings.max-free = asGB 200;
# avoid copying unecessary stuff over SSH
settings.builders-use-substitutes = true;
# allow flakes
settings.experimental-features = "nix-command flakes";
# users in trusted group are trusted by the nix-daemon
settings.trusted-users = [ "@trusted" ];

View file

@ -1,30 +1,9 @@
{ config, pkgs, lib, ... }:
{
# Make sure that the firewall is enabled, even if it's the default.
networking.firewall.enable = true;
# Allow password-less sudo for wheel users
security.sudo.enable = true;
security.sudo.wheelNeedsPassword = false;
# Dont let users create their own authorized keys files
services.openssh.authorizedKeysFiles = lib.mkForce [
"/etc/ssh/authorized_keys.d/%u"
];
services.openssh.kbdInteractiveAuthentication = false;
services.openssh.passwordAuthentication = false;
programs.ssh.knownHosts = {
github-rsa = {
extraHostNames = [ "github.com" ];
publicKey = "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==";
};
github-ed25519 = {
extraHostNames = [ "github.com" ];
publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl";
};
build01 = {
hostNames = [ "build01.nix-community.org" ];
publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIElIQ54qAy7Dh63rBudYKdbzJHrrbrrMXLYl7Pkmk88H";

View file

@ -1,12 +0,0 @@
{ config, lib, pkgs, ... }:
{
services.openssh = {
enable = true;
passwordAuthentication = false;
useDns = false;
# unbind gnupg sockets if they exists
extraConfig = ''
StreamLocalBindUnlink yes
'';
};
}

View file

@ -1,128 +0,0 @@
{ pkgs, lib, config, ... }:
let
isVM = lib.any (mod: mod == "xen-blkfront" || mod == "virtio_console") config.boot.initrd.kernelModules;
in
{
networking.firewall.allowedTCPPorts = [ 9273 ];
systemd.services.telegraf.path = [ pkgs.nvme-cli ];
services.telegraf = {
enable = true;
extraConfig = {
agent.interval = "60s";
inputs = {
#syslog.server = "unixgram:///run/systemd/journal/syslog";
#syslog.best_effort = true;
#syslog.syslog_standard = "RFC3164";
prometheus.urls = lib.mkIf (config.services.promtail.enable) [
# default promtail port
"http://localhost:9080/metrics"
];
prometheus.metric_version = 2;
kernel_vmstat = { };
smart = lib.mkIf (!isVM) {
path = pkgs.writeShellScript "smartctl" ''
exec /run/wrappers/bin/sudo ${pkgs.smartmontools}/bin/smartctl "$@"
'';
};
mdstat = { };
system = { };
mem = { };
file = [{
data_format = "influx";
file_tag = "name";
files = [ "/var/log/telegraf/*" ];
}] ++ lib.optional (lib.any (fs: fs == "ext4") config.boot.supportedFilesystems) {
name_override = "ext4_errors";
files = [ "/sys/fs/ext4/*/errors_count" ];
data_format = "value";
};
exec = [{
## Commands array
commands = (lib.optional (lib.any (fs: fs == "zfs") config.boot.supportedFilesystems)
(pkgs.writeScript "zpool-health" ''
#!${pkgs.gawk}/bin/awk -f
BEGIN {
while ("${pkgs.zfs}/bin/zpool status" | getline) {
if ($1 ~ /pool:/) { printf "zpool_status,name=%s ", $2 }
if ($1 ~ /state:/) { printf " state=\"%s\",", $2 }
if ($1 ~ /errors:/) {
if (index($2, "No")) printf "errors=0i\n"; else printf "errors=%di\n", $2
}
}
}
'')
) ++ (
let
collectHosts = shares: fs:
if builtins.elem fs.fsType [ "nfs" "nfs3" "nfs4" ] then
shares // (
let
# also match ipv6 addresses
group = builtins.match "\\[?([^\]]+)]?:([^:]+)$" fs.device;
host = builtins.head group;
path = builtins.elemAt group 1;
in
{
${host} = (shares.${host} or [ ]) ++ [ path ];
}
)
else
shares;
nfsHosts = lib.foldl collectHosts { } (builtins.attrValues config.fileSystems);
in
lib.mapAttrsToList
(host: args:
(pkgs.writeScript "zpool-health" ''
#!${pkgs.gawk}/bin/awk -f
BEGIN {
for (i = 2; i < ARGC; i++) {
mounts[ARGV[i]] = 1
}
while ("${pkgs.nfs-utils}/bin/showmount -e " ARGV[1] | getline) {
if (NR == 1) { continue }
if (mounts[$1] == 1) {
printf "nfs_export,host=%s,path=%s present=1\n", ARGV[1], $1
}
delete mounts[$1]
}
for (mount in mounts) {
printf "nfs_export,host=%s,path=%s present=0\n", ARGV[1], $1
}
}
'') + " ${host} ${builtins.concatStringsSep " " args}"
)
nfsHosts
);
data_format = "influx";
}];
systemd_units = { };
swap = { };
disk.tagdrop = {
fstype = [ "tmpfs" "ramfs" "devtmpfs" "devfs" "iso9660" "overlay" "aufs" "squashfs" ];
device = [ "rpc_pipefs" "lxcfs" "nsfs" "borgfs" ];
};
diskio = { };
};
outputs.prometheus_client = {
listen = ":9273";
metric_version = 2;
};
};
};
security.sudo.extraRules = lib.mkIf (!isVM) [{
users = [ "telegraf" ];
commands = [{
command = "${pkgs.smartmontools}/bin/smartctl";
options = [ "NOPASSWD" ];
}];
}];
# avoid logging sudo use
security.sudo.configFile = ''
Defaults:telegraf !syslog,!pam_session
'';
# create dummy file to avoid telegraf errors
systemd.tmpfiles.rules = [
"f /var/log/telegraf/dummy 0444 root root - -"
];
}

View file

@ -1,13 +0,0 @@
{ ... }: {
services.zfs = {
autoSnapshot.enable = true;
# defaults to 12, which is a bit much given how much data is written
autoSnapshot.monthly = 1;
autoScrub.enable = true;
};
# ZFS already has its own scheduler. Without this my(@Artturin) computer froze for a second when i nix build something.
services.udev.extraRules = ''
ACTION=="add|change", KERNEL=="sd[a-z]*[0-9]*|mmcblk[0-9]*p[0-9]*|nvme[0-9]*n[0-9]*p[0-9]*", ENV{ID_FS_TYPE}=="zfs_member", ATTR{../queue/scheduler}="none"
'';
}

View file

@ -164,6 +164,29 @@ def deploy(c, hosts=""):
deploy_nixos(get_hosts(hosts))
@task
def build_local(c, hosts=""):
"""
Build all servers. Use inv build-local --host build01 to build a single server
"""
g = DeployGroup(get_hosts(hosts))
def build_local(h: DeployHost) -> None:
h.run_local(
[
"nixos-rebuild",
"build",
"--option",
"accept-flake-config",
"true",
"--flake",
f".#{h.host}",
]
)
g.run_function(build_local)
def wait_for_port(host: str, port: int, shutdown: bool = False) -> None:
import socket, time