infra/tasks.py

176 lines
5.8 KiB
Python
Raw Normal View History

2021-10-21 11:09:52 +02:00
#!/usr/bin/env python3
from invoke import task
import sys
2021-10-24 01:04:22 +02:00
from typing import List, Any
2021-10-21 11:09:52 +02:00
from deploy_nixos import DeployHost, DeployGroup
2021-10-24 01:04:22 +02:00
import subprocess
import json
2021-10-21 11:09:52 +02:00
def deploy_nixos(hosts: List[DeployHost]) -> None:
"""
Deploy to all hosts in parallel
"""
g = DeployGroup(hosts)
def deploy(h: DeployHost) -> None:
h.run_local(
f"rsync --exclude='.git/' -vaF --delete -e ssh . {h.user}@{h.host}:/etc/nixos",
)
config = f"/etc/nixos/{h.host.replace('.nix-community.org', '')}/configuration.nix"
2021-11-01 14:08:06 +01:00
# FIXME: build03 has itself as a builder and deadlocks building packages.
h.run(f"nixos-rebuild switch --builders '' -I nixos-config={config} -I nixpkgs=$(nix-instantiate --eval -E '(import /etc/nixos/nix {{}}).path')")
2021-10-21 11:09:52 +02:00
g.run_function(deploy)
2021-10-24 01:04:22 +02:00
def sfdisk_json(host: DeployHost, dev: str) -> List[Any]:
out = host.run(f"sfdisk --json {dev}", stdout=subprocess.PIPE)
data = json.loads(out.stdout)
return data["partitiontable"]["partitions"]
2021-10-21 11:09:52 +02:00
2021-10-24 01:04:22 +02:00
def _format_disks(host: DeployHost, devices: List[str]) -> None:
assert len(devices) == 1 or len(devices) == 2, "we only support single devices or mirror raids at the moment"
# format disk with as follow:
# - partition 1 will be the boot partition, needed for legacy (BIOS) boot
# - partition 2 is for boot partition
# - partition 3 takes up the rest of the space and is for the system
for device in devices:
host.run(f"sgdisk -Z -n 1:2048:4095 -n 2:4096:+2G -N 3 -t 1:ef02 -t 2:8304 -t 3:8304 {device}")
# create mdadm raid for /boot with ext4
if len(devices) == 2:
boot_parts = []
root_parts = []
for dev in devices:
# use partuuids as they are more stable than device names
partitions = sfdisk_json(host, dev)
boot_parts.append(partitions[1]["node"])
root_parts.append(f"/dev/disk/by-partuuid/{partitions[2]['uuid'].lower()}")
host.run(f"mdadm --create --verbose /dev/md127 --raid-devices=2 --level=1 {' '.join(boot_parts)}")
host.run(f"zpool create zroot -O acltype=posixacl -O xattr=sa -O compression=lz4 mirror {' '.join(root_parts)}")
boot = "/dev/md127"
else:
partitions = sfdisk_json(host, devices[0])
boot = partitions[1]["node"]
uuid = partitions[2]["uuid"].lower()
root_part = f"/dev/disk/by-partuuid/{uuid}"
host.run(f"zpool create zroot -O acltype=posixacl -O xattr=sa -O compression=lz4 -O atime=off {root_part}")
host.run(f"partprobe")
host.run(f"mkfs.ext4 -F {boot}")
# setup zfs dataset
host.run(f"zfs create -o mountpoint=none zroot/root")
host.run(f"zfs create -o mountpoint=legacy zroot/root/nixos")
host.run(f"zfs create -o mountpoint=legacy zroot/root/home")
## and finally mount
host.run(f"mount -t zfs zroot/root/nixos /mnt")
host.run(f"mkdir /mnt/home /mnt/boot")
host.run(f"mount -t zfs zroot/root/home /mnt/home")
host.run(f"mount -t ext4 /dev/md127 /mnt/boot")
@task
def format_disks(c, hosts = "", disks = ""):
"""
2021-10-24 01:31:30 +02:00
Format disks with zfs, i.e.: inv format-disks --hosts build02 --disks /dev/nvme0n1,/dev/nvme1n1
2021-10-24 01:04:22 +02:00
"""
2021-10-24 01:31:30 +02:00
for h in get_hosts(hosts):
_format_disks(h, disks.split(","))
2021-10-24 01:04:22 +02:00
@task
def setup_secret(c, hosts = ""):
"""
Setup SSH key and print age key for sops-nix
"""
for h in get_hosts(hosts):
h.run("install -m600 -D /etc/ssh/ssh_host_rsa_key /mnt/etc/ssh/ssh_host_rsa_key")
2021-10-24 01:04:22 +02:00
h.run("install -m600 -D /etc/ssh/ssh_host_ed25519_key /mnt/etc/ssh/ssh_host_ed25519_key")
print(h.host)
h.run("nix-shell -p ssh-to-age --run 'cat /etc/ssh/ssh_host_ed25519_key.pub | ssh-to-age'")
@task
def nixos_install(c, hosts = ""):
"""
Run NixOS install
"""
for h in get_hosts(hosts):
h.run("nix-shell -p git --run 'git clone https://github.com/nix-community/infra && cd infra && nix-shell'")
hostname = h.host.replace('.nix-community.org', '')
h.run(f"cd /root/infra && nixos-install --system $(nix-build -A {hostname}-system)")
def get_hosts(hosts: str) -> List[DeployHost]:
2021-10-21 11:09:52 +02:00
if hosts == "":
2021-11-01 11:50:06 +01:00
return [DeployHost(f"build{n + 1:02d}.nix-community.org") for n in range(4)]
2021-10-21 11:09:52 +02:00
return [DeployHost(f"{h}.nix-community.org") for h in hosts.split(",")]
@task
def deploy(c, hosts = ""):
"""
Deploy to all servers. Use inv deploy --host build01 to deploy to a single server
"""
deploy_nixos(get_hosts(hosts))
def wait_for_port(host: str, port: int, shutdown: bool = False) -> None:
import socket, time
while True:
try:
with socket.create_connection((host, port), timeout=1):
if shutdown:
time.sleep(1)
sys.stdout.write(".")
sys.stdout.flush()
else:
break
except OSError as ex:
if shutdown:
break
else:
time.sleep(0.01)
sys.stdout.write(".")
sys.stdout.flush()
@task
def reboot(c, hosts=""):
"""
Reboot hosts. example usage: inv reboot --hosts build01,build02
"""
2021-10-24 01:31:40 +02:00
for h in get_hosts(hosts):
h.run("reboot &")
2021-10-21 11:09:52 +02:00
print(f"Wait for {h.host} to shutdown", end="")
sys.stdout.flush()
wait_for_port(h.host, h.port, shutdown=True)
print("")
print(f"Wait for {h.host} to start", end="")
sys.stdout.flush()
wait_for_port(h.host, h.port)
print("")
@task
def cleanup_gcroots(c, hosts=""):
g = DeployGroup(get_hosts(hosts))
g.run("find /nix/var/nix/gcroots/auto -type s -delete")
g.run("systemctl restart nix-gc")
2022-01-15 13:38:22 +01:00
@task
def restart_cachix_deploy(c, hosts=""):
g = DeployGroup(get_hosts(hosts))
g.run("systemctl restart cachix-deploy-agent")