2021-10-21 11:09:52 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2021-10-24 01:04:22 +02:00
|
|
|
import json
|
2022-12-31 07:24:17 +01:00
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
from typing import Any, List
|
|
|
|
|
|
|
|
from deploykit import DeployGroup, DeployHost
|
|
|
|
from invoke import task
|
2021-10-21 11:09:52 +02:00
|
|
|
|
2023-01-09 10:07:27 +10:00
|
|
|
RSYNC_EXCLUDES = [
|
|
|
|
".direnv",
|
|
|
|
".git",
|
|
|
|
".mypy-cache",
|
|
|
|
".ruff_cache",
|
|
|
|
".terraform",
|
|
|
|
"result*",
|
|
|
|
]
|
2022-02-04 09:27:45 +01:00
|
|
|
|
2021-10-21 11:09:52 +02:00
|
|
|
|
|
|
|
def deploy_nixos(hosts: List[DeployHost]) -> None:
|
|
|
|
"""
|
|
|
|
Deploy to all hosts in parallel
|
|
|
|
"""
|
|
|
|
g = DeployGroup(hosts)
|
2022-01-15 13:38:30 +01:00
|
|
|
|
2021-10-21 11:09:52 +02:00
|
|
|
def deploy(h: DeployHost) -> None:
|
2022-08-31 11:28:46 +02:00
|
|
|
target = f"{h.user or 'root'}@{h.host}"
|
2021-10-21 11:09:52 +02:00
|
|
|
h.run_local(
|
2022-08-31 11:28:46 +02:00
|
|
|
f"rsync {' --exclude '.join([''] + RSYNC_EXCLUDES)} -vaF --delete -e ssh . {target}:/etc/nixos"
|
2021-10-21 11:09:52 +02:00
|
|
|
)
|
|
|
|
|
2022-12-31 07:24:17 +01:00
|
|
|
h.run("nixos-rebuild switch --option accept-flake-config true")
|
2022-01-15 13:38:30 +01:00
|
|
|
|
2021-10-21 11:09:52 +02:00
|
|
|
g.run_function(deploy)
|
|
|
|
|
|
|
|
|
2021-10-24 01:04:22 +02:00
|
|
|
def sfdisk_json(host: DeployHost, dev: str) -> List[Any]:
|
|
|
|
out = host.run(f"sfdisk --json {dev}", stdout=subprocess.PIPE)
|
|
|
|
data = json.loads(out.stdout)
|
|
|
|
return data["partitiontable"]["partitions"]
|
2021-10-21 11:09:52 +02:00
|
|
|
|
2021-10-24 01:04:22 +02:00
|
|
|
|
|
|
|
def _format_disks(host: DeployHost, devices: List[str]) -> None:
|
2022-01-15 13:38:30 +01:00
|
|
|
assert (
|
|
|
|
len(devices) == 1 or len(devices) == 2
|
|
|
|
), "we only support single devices or mirror raids at the moment"
|
2021-10-24 01:04:22 +02:00
|
|
|
# format disk with as follow:
|
|
|
|
# - partition 1 will be the boot partition, needed for legacy (BIOS) boot
|
|
|
|
# - partition 2 is for boot partition
|
|
|
|
# - partition 3 takes up the rest of the space and is for the system
|
|
|
|
for device in devices:
|
2022-01-15 13:38:30 +01:00
|
|
|
host.run(
|
|
|
|
f"sgdisk -Z -n 1:2048:4095 -n 2:4096:+2G -N 3 -t 1:ef02 -t 2:8304 -t 3:8304 {device}"
|
|
|
|
)
|
2021-10-24 01:04:22 +02:00
|
|
|
|
|
|
|
# create mdadm raid for /boot with ext4
|
|
|
|
if len(devices) == 2:
|
|
|
|
boot_parts = []
|
|
|
|
root_parts = []
|
|
|
|
for dev in devices:
|
|
|
|
# use partuuids as they are more stable than device names
|
|
|
|
partitions = sfdisk_json(host, dev)
|
|
|
|
boot_parts.append(partitions[1]["node"])
|
|
|
|
root_parts.append(f"/dev/disk/by-partuuid/{partitions[2]['uuid'].lower()}")
|
|
|
|
|
2022-01-15 13:38:30 +01:00
|
|
|
host.run(
|
|
|
|
f"mdadm --create --verbose /dev/md127 --raid-devices=2 --level=1 {' '.join(boot_parts)}"
|
|
|
|
)
|
|
|
|
host.run(
|
|
|
|
f"zpool create zroot -O acltype=posixacl -O xattr=sa -O compression=lz4 mirror {' '.join(root_parts)}"
|
|
|
|
)
|
2021-10-24 01:04:22 +02:00
|
|
|
boot = "/dev/md127"
|
|
|
|
else:
|
|
|
|
partitions = sfdisk_json(host, devices[0])
|
|
|
|
boot = partitions[1]["node"]
|
|
|
|
uuid = partitions[2]["uuid"].lower()
|
|
|
|
root_part = f"/dev/disk/by-partuuid/{uuid}"
|
2022-01-15 13:38:30 +01:00
|
|
|
host.run(
|
|
|
|
f"zpool create zroot -O acltype=posixacl -O xattr=sa -O compression=lz4 -O atime=off {root_part}"
|
|
|
|
)
|
2021-10-24 01:04:22 +02:00
|
|
|
|
2022-12-31 07:24:17 +01:00
|
|
|
host.run("partprobe")
|
2021-10-24 01:04:22 +02:00
|
|
|
host.run(f"mkfs.ext4 -F {boot}")
|
|
|
|
|
|
|
|
# setup zfs dataset
|
2022-12-31 07:24:17 +01:00
|
|
|
host.run("zfs create -o mountpoint=none zroot/root")
|
|
|
|
host.run("zfs create -o mountpoint=legacy zroot/root/nixos")
|
|
|
|
host.run("zfs create -o mountpoint=legacy zroot/root/home")
|
2021-10-24 01:04:22 +02:00
|
|
|
|
|
|
|
## and finally mount
|
2022-12-31 07:24:17 +01:00
|
|
|
host.run("mount -t zfs zroot/root/nixos /mnt")
|
|
|
|
host.run("mkdir /mnt/home /mnt/boot")
|
|
|
|
host.run("mount -t zfs zroot/root/home /mnt/home")
|
|
|
|
host.run("mount -t ext4 /dev/md127 /mnt/boot")
|
2021-10-24 01:04:22 +02:00
|
|
|
|
|
|
|
|
2022-10-25 09:55:14 +02:00
|
|
|
@task
|
|
|
|
def update_sops_files(c):
|
|
|
|
"""
|
|
|
|
Update all sops yaml and json files according to .sops.yaml rules
|
|
|
|
"""
|
|
|
|
|
|
|
|
c.run(
|
|
|
|
"""
|
|
|
|
find . \
|
|
|
|
-type f \
|
2022-11-17 08:57:22 +10:00
|
|
|
\( -iname '*.enc.json' -o -iname 'secrets.yaml' \) \
|
|
|
|
-exec sops updatekeys --yes {} \;
|
2022-10-25 09:55:14 +02:00
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
2022-12-31 07:24:17 +01:00
|
|
|
|
2022-12-30 20:51:58 +01:00
|
|
|
@task
|
|
|
|
def scan_age_keys(c, host):
|
|
|
|
"""
|
|
|
|
Scans for the host key via ssh an converts it to age
|
|
|
|
"""
|
|
|
|
import subprocess
|
2022-12-31 07:24:17 +01:00
|
|
|
|
|
|
|
proc = subprocess.run(
|
|
|
|
["ssh-keyscan", host], stdout=subprocess.PIPE, text=True, check=True
|
|
|
|
)
|
2022-12-30 20:51:58 +01:00
|
|
|
print("###### Age keys ######")
|
2022-12-31 07:24:17 +01:00
|
|
|
subprocess.run(
|
|
|
|
["nix", "run", "--inputs-from", ".#", "nixpkgs#ssh-to-age"],
|
|
|
|
input=proc.stdout,
|
|
|
|
check=True,
|
|
|
|
text=True,
|
|
|
|
)
|
2022-12-30 20:51:58 +01:00
|
|
|
|
2022-10-25 09:55:14 +02:00
|
|
|
|
2021-10-24 01:04:22 +02:00
|
|
|
@task
|
2022-01-15 13:38:30 +01:00
|
|
|
def format_disks(c, hosts="", disks=""):
|
2021-10-24 01:04:22 +02:00
|
|
|
"""
|
2021-10-24 01:31:30 +02:00
|
|
|
Format disks with zfs, i.e.: inv format-disks --hosts build02 --disks /dev/nvme0n1,/dev/nvme1n1
|
2021-10-24 01:04:22 +02:00
|
|
|
"""
|
2021-10-24 01:31:30 +02:00
|
|
|
for h in get_hosts(hosts):
|
|
|
|
_format_disks(h, disks.split(","))
|
2021-10-24 01:04:22 +02:00
|
|
|
|
|
|
|
|
|
|
|
@task
|
2022-01-15 13:38:30 +01:00
|
|
|
def setup_secret(c, hosts=""):
|
2021-10-24 01:04:22 +02:00
|
|
|
"""
|
|
|
|
Setup SSH key and print age key for sops-nix
|
|
|
|
"""
|
|
|
|
for h in get_hosts(hosts):
|
2022-01-15 13:38:30 +01:00
|
|
|
h.run(
|
|
|
|
"install -m600 -D /etc/ssh/ssh_host_rsa_key /mnt/etc/ssh/ssh_host_rsa_key"
|
|
|
|
)
|
|
|
|
h.run(
|
|
|
|
"install -m600 -D /etc/ssh/ssh_host_ed25519_key /mnt/etc/ssh/ssh_host_ed25519_key"
|
|
|
|
)
|
2021-10-24 01:04:22 +02:00
|
|
|
print(h.host)
|
2022-01-15 13:38:30 +01:00
|
|
|
h.run(
|
|
|
|
"nix-shell -p ssh-to-age --run 'cat /etc/ssh/ssh_host_ed25519_key.pub | ssh-to-age'"
|
|
|
|
)
|
2021-10-24 01:04:22 +02:00
|
|
|
|
|
|
|
|
|
|
|
@task
|
2022-01-15 13:38:30 +01:00
|
|
|
def nixos_install(c, hosts=""):
|
2021-10-24 01:04:22 +02:00
|
|
|
"""
|
|
|
|
Run NixOS install
|
|
|
|
"""
|
|
|
|
for h in get_hosts(hosts):
|
2022-01-15 13:38:30 +01:00
|
|
|
h.run(
|
|
|
|
"nix-shell -p git --run 'git clone https://github.com/nix-community/infra && cd infra && nix-shell'"
|
|
|
|
)
|
|
|
|
hostname = h.host.replace(".nix-community.org", "")
|
|
|
|
h.run(
|
|
|
|
f"cd /root/infra && nixos-install --system $(nix-build -A {hostname}-system)"
|
|
|
|
)
|
2021-10-24 01:04:22 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_hosts(hosts: str) -> List[DeployHost]:
|
2021-10-21 11:09:52 +02:00
|
|
|
if hosts == "":
|
2022-09-28 10:20:56 +02:00
|
|
|
return [
|
|
|
|
DeployHost(f"build{n + 1:02d}.nix-community.org", user="root")
|
|
|
|
for n in range(4)
|
|
|
|
]
|
2021-10-21 11:09:52 +02:00
|
|
|
|
2022-09-04 05:52:52 +02:00
|
|
|
return [DeployHost(f"{h}.nix-community.org", user="root") for h in hosts.split(",")]
|
2021-10-21 11:09:52 +02:00
|
|
|
|
|
|
|
|
|
|
|
@task
|
2022-01-15 13:38:30 +01:00
|
|
|
def deploy(c, hosts=""):
|
2021-10-21 11:09:52 +02:00
|
|
|
"""
|
2023-01-07 07:37:07 +10:00
|
|
|
Deploy to all servers. Use inv deploy --hosts build01 to deploy to a single server
|
2021-10-21 11:09:52 +02:00
|
|
|
"""
|
|
|
|
deploy_nixos(get_hosts(hosts))
|
|
|
|
|
|
|
|
|
2022-12-19 15:39:59 +01:00
|
|
|
@task
|
|
|
|
def build_local(c, hosts=""):
|
|
|
|
"""
|
2023-01-07 07:37:07 +10:00
|
|
|
Build all servers. Use inv build-local --hosts build01 to build a single server
|
2022-12-19 15:39:59 +01:00
|
|
|
"""
|
|
|
|
g = DeployGroup(get_hosts(hosts))
|
|
|
|
|
|
|
|
def build_local(h: DeployHost) -> None:
|
|
|
|
h.run_local(
|
|
|
|
[
|
|
|
|
"nixos-rebuild",
|
|
|
|
"build",
|
|
|
|
"--option",
|
|
|
|
"accept-flake-config",
|
|
|
|
"true",
|
|
|
|
"--flake",
|
|
|
|
f".#{h.host}",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
g.run_function(build_local)
|
|
|
|
|
|
|
|
|
2021-10-21 11:09:52 +02:00
|
|
|
def wait_for_port(host: str, port: int, shutdown: bool = False) -> None:
|
2022-12-31 07:24:17 +01:00
|
|
|
import socket
|
|
|
|
import time
|
2021-10-21 11:09:52 +02:00
|
|
|
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
with socket.create_connection((host, port), timeout=1):
|
|
|
|
if shutdown:
|
|
|
|
time.sleep(1)
|
|
|
|
sys.stdout.write(".")
|
|
|
|
sys.stdout.flush()
|
|
|
|
else:
|
|
|
|
break
|
2022-12-31 07:24:17 +01:00
|
|
|
except OSError:
|
2021-10-21 11:09:52 +02:00
|
|
|
if shutdown:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
time.sleep(0.01)
|
|
|
|
sys.stdout.write(".")
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
|
|
|
|
@task
|
|
|
|
def reboot(c, hosts=""):
|
|
|
|
"""
|
|
|
|
Reboot hosts. example usage: inv reboot --hosts build01,build02
|
|
|
|
"""
|
2021-10-24 01:31:40 +02:00
|
|
|
for h in get_hosts(hosts):
|
|
|
|
h.run("reboot &")
|
2021-10-21 11:09:52 +02:00
|
|
|
|
|
|
|
print(f"Wait for {h.host} to shutdown", end="")
|
|
|
|
sys.stdout.flush()
|
|
|
|
wait_for_port(h.host, h.port, shutdown=True)
|
|
|
|
print("")
|
|
|
|
|
|
|
|
print(f"Wait for {h.host} to start", end="")
|
|
|
|
sys.stdout.flush()
|
|
|
|
wait_for_port(h.host, h.port)
|
|
|
|
print("")
|
|
|
|
|
|
|
|
|
|
|
|
@task
|
|
|
|
def cleanup_gcroots(c, hosts=""):
|
|
|
|
g = DeployGroup(get_hosts(hosts))
|
|
|
|
g.run("find /nix/var/nix/gcroots/auto -type s -delete")
|
|
|
|
g.run("systemctl restart nix-gc")
|