diff --git a/hosts/build02/nixpkgs-update-backup.nix b/hosts/build02/nixpkgs-update-backup.nix index 2cb2115..d877adb 100644 --- a/hosts/build02/nixpkgs-update-backup.nix +++ b/hosts/build02/nixpkgs-update-backup.nix @@ -11,6 +11,7 @@ name = "nixpkgs-update"; after = [ config.systemd.services.nixpkgs-update-delete-old-logs.name ]; paths = [ "/var/log/nixpkgs-update" ]; + startAt = "daily"; } ]; } diff --git a/modules/nixos/backup.nix b/modules/nixos/backup.nix index e9b816c..62dba8b 100644 --- a/modules/nixos/backup.nix +++ b/modules/nixos/backup.nix @@ -18,6 +18,12 @@ paths = lib.mkOption { type = lib.types.listOf lib.types.str; }; + startAt = lib.mkOption { + type = lib.types.enum [ + "daily" + "hourly" + ]; + }; }; } ); @@ -39,16 +45,15 @@ builtins.map (backup: { inherit (backup) name; value = { - inherit (backup) paths; + inherit (backup) paths startAt; repo = "u416406@u416406.your-storagebox.de:/./${config.networking.hostName}-${backup.name}"; encryption.mode = "none"; compression = "auto,zstd"; - startAt = "daily"; environment.BORG_RSH = "ssh -oPort=23 -i ${config.age.secrets.hetzner-borgbackup-ssh.path}"; preHook = "set -x"; postHook = '' cat > /var/log/telegraf/borgbackup-job-${backup.name}.service <<EOF - task,frequency=daily last_run=$(date +%s)i,state="$([[ $exitStatus == 0 ]] && echo ok || echo fail)" + task,frequency=${backup.startAt} last_run=$(date +%s)i,state="$([[ $exitStatus == 0 ]] && echo ok || echo fail)" EOF ''; prune.keep = { diff --git a/modules/nixos/github-org-backup.nix b/modules/nixos/github-org-backup.nix index 2267f68..f79cc4b 100644 --- a/modules/nixos/github-org-backup.nix +++ b/modules/nixos/github-org-backup.nix @@ -40,6 +40,7 @@ name = "github-org"; after = [ config.systemd.services.github-org-backup.name ]; paths = [ "/var/lib/github-org-backup" ]; + startAt = "daily"; } ]; } diff --git a/modules/nixos/monitoring/alert-rules.nix b/modules/nixos/monitoring/alert-rules.nix index d07f3b9..5864afe 100644 --- a/modules/nixos/monitoring/alert-rules.nix +++ b/modules/nixos/monitoring/alert-rules.nix @@ -12,6 +12,15 @@ annotations.description = "status of ${name} is unknown: no data for a day"; }) ) + // (lib.genAttrs + [ + ] + (name: { + expr = ''absent_over_time(task_last_run{name="${name}"}[1h])''; + for = "1h"; + annotations.description = "status of ${name} is unknown: no data for a hour"; + }) + ) // { Filesystem80percentFull.enable = false; @@ -21,6 +30,12 @@ annotations.description = "{{$labels.host}} device {{$labels.device}} on {{$labels.path}} got less than 5% space left on its filesystem"; }; + HourlyTaskNotRun = { + expr = ''time() - task_last_run{state="ok",frequency="hourly"} > 60 * 60''; + for = "1h"; + annotations.description = "{{$labels.host}}: {{$labels.name}} was not run in the last hour"; + }; + Load15.expr = lib.mkForce ''system_load15 / system_n_cpus{host!~"(build|darwin).*"} >= 2.0''; MatrixHookNotRunning = {