feat(backup): pg_dump + state-tarball + daily auto + UI
Production-Box braucht Backups — bisher keine. Jetzt komplette
Pipeline:
Backend (internal/services/backup/):
- Output: /var/backups/edgeguard/eg-YYYYMMDD-HHMMSS.tar.gz
- Inhalt: dump.sql (pg_dump --clean --if-exists --no-owner --no-acl),
files/setup.json, files/license_key, files/license.cache,
files/.jwt_fingerprint, files/node.conf, files/acme-account/* +
manifest.json (Version, kind, hostname, sizes)
- sha256 während-write via TeeWriter, Size + sha in backups-DB-Row
- Failure-Path: row mit status=failed + error, kein orphan-tarball
- Prune(keepN=14) löscht erfolgreiche Backups älter als die letzten N
Migration 0018: backups(id, file, size, sha256, db/files bytes, kind,
status, error, host, started/finished).
Scheduler (cmd/edgeguard-scheduler):
- 24h-Tick → backup.Run(KindScheduled) + Prune. Beim Boot wird ein
initialer Backup NICHT sofort gezogen (kein nervöses Spam),
sondern erst beim nächsten 24h-Slot.
REST (internal/handlers/backup.go):
GET /api/v1/backups — list (newest first)
POST /api/v1/backups — trigger manual (sync, audit'ed)
GET /api/v1/backups/:id — single
GET /api/v1/backups/:id/download — sendfile tar.gz
DELETE /api/v1/backups/:id — entferne file + row
UI (management-ui/src/pages/Backups):
- Liste mit Time, File+sha (first 16), Kind-Tag, Status, Size (mit
DB + Files Aufschlüsselung), Dauer
- „Backup jetzt erstellen" Button, Refresh, Download, Delete
- Auto-Refresh 30s
- Sidebar-Eintrag „Backups" unter System
postinst:
- /var/backups/edgeguard 0750 edgeguard:edgeguard (enthält sensitive
pg_dump + license_key → NICHT world-readable)
- sudoers-Whitelist `sudo -u postgres /usr/bin/pg_dump --clean
--if-exists --no-owner --no-acl edgeguard` — exakte Form
Verifiziert auf der Box: backups-Tabelle existiert, scheduler logged
„backup enabled tick=24h dir=/var/backups/edgeguard keep_n=14",
pg_dump-via-sudoers liefert 2808 lines.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -33,6 +33,7 @@ import (
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/audit"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/backends"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/backendservers"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/backup"
|
||||
dnssvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/dns"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/domains"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/firewall"
|
||||
@@ -50,7 +51,7 @@ import (
|
||||
wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard"
|
||||
)
|
||||
|
||||
var version = "1.0.63"
|
||||
var version = "1.0.64"
|
||||
|
||||
func main() {
|
||||
addr := os.Getenv("EDGEGUARD_API_ADDR")
|
||||
@@ -194,6 +195,10 @@ func main() {
|
||||
|
||||
// /logs (Phase 4): aggregierter Reader für journalctl + audit_log
|
||||
handlers.NewLogsHandler(syslogs.New(auditRepo)).Register(authed)
|
||||
|
||||
// /backups — manueller Trigger + Liste + Download. Scheduled-
|
||||
// Jobs laufen im edgeguard-scheduler.
|
||||
handlers.NewBackupHandler(backup.New(pool), auditRepo, nodeID, version).Register(authed)
|
||||
handlers.NewTLSCertsHandler(tlsRepo, auditRepo, nodeID, acmeService).Register(authed)
|
||||
// Firewall reload: nach jeder Mutation den Renderer neu fahren
|
||||
// (writes ruleset.nft + sudo nft -f). Errors loggen, nicht failen.
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"os"
|
||||
)
|
||||
|
||||
var version = "1.0.63"
|
||||
var version = "1.0.64"
|
||||
|
||||
const usage = `edgeguard-ctl — EdgeGuard CLI
|
||||
|
||||
|
||||
@@ -18,13 +18,14 @@ import (
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/database"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/license"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/acme"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/backup"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/certrenewer"
|
||||
licsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/license"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/setup"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts"
|
||||
)
|
||||
|
||||
var version = "1.0.63"
|
||||
var version = "1.0.64"
|
||||
|
||||
const (
|
||||
// renewTickInterval — how often we re-evaluate expiring certs.
|
||||
@@ -39,6 +40,11 @@ const (
|
||||
// licenseTickInterval — daily re-verify against
|
||||
// license.netcell-it.com. Result lands in the licenses table.
|
||||
licenseTickInterval = 24 * time.Hour
|
||||
|
||||
// backupTickInterval — daily scheduled backup at ~03:00 (Tick
|
||||
// alignment ist approximativ, weil time.Ticker bei Boot startet).
|
||||
// Retention: 14 erfolgreiche Backups (default in backup.Service).
|
||||
backupTickInterval = 24 * time.Hour
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -74,6 +80,10 @@ func main() {
|
||||
nodeID := os.Getenv("EDGEGUARD_NODE_ID")
|
||||
slog.Info("scheduler: license re-verify enabled", "tick", licenseTickInterval)
|
||||
|
||||
backupSvc := backup.New(pool)
|
||||
slog.Info("scheduler: daily backup enabled", "tick", backupTickInterval,
|
||||
"dir", backupSvc.BackupDir, "keep_n", backup.DefaultKeepN)
|
||||
|
||||
if renewer != nil {
|
||||
runRenewer(ctx, renewer)
|
||||
}
|
||||
@@ -83,6 +93,8 @@ func main() {
|
||||
defer renewTick.Stop()
|
||||
licTick := time.NewTicker(licenseTickInterval)
|
||||
defer licTick.Stop()
|
||||
backupTick := time.NewTicker(backupTickInterval)
|
||||
defer backupTick.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
@@ -92,10 +104,29 @@ func main() {
|
||||
}
|
||||
case <-licTick.C:
|
||||
runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID)
|
||||
case <-backupTick.C:
|
||||
runBackup(ctx, backupSvc, version)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runBackup führt einen scheduled Backup aus + prunet alte. Failures
|
||||
// loggen wir nur — der Tick läuft morgen wieder, kein Notfall.
|
||||
func runBackup(ctx context.Context, svc *backup.Service, version string) {
|
||||
res, err := svc.Run(ctx, backup.KindScheduled, version)
|
||||
if err != nil {
|
||||
slog.Warn("scheduler: backup failed", "error", err, "file", res.File)
|
||||
return
|
||||
}
|
||||
slog.Info("scheduler: backup done",
|
||||
"file", res.File, "size", res.SizeBytes,
|
||||
"db_bytes", res.DBDumpBytes, "files_bytes", res.FilesBytes,
|
||||
"sha256", res.SHA256)
|
||||
if err := svc.Prune(ctx, backup.DefaultKeepN); err != nil {
|
||||
slog.Warn("scheduler: backup prune failed", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// runLicenseVerify performs a single re-verify pass. Empty key = no-op
|
||||
// (box stays in trial), so this is safe to call on every tick.
|
||||
func runLicenseVerify(ctx context.Context, c *license.Client, ks *license.KeyStore,
|
||||
|
||||
Reference in New Issue
Block a user