feat(backup): Restore-Pfad — POST /backups/:id/restore + UI

backup.Service.Restore(id) schreibt /var/lib/edgeguard/restore.sh
und dispatcht via `sudo systemd-run --unit=edgeguard-restore.service`.
Skript-Ablauf:
  1. tar -xzf der Backup-Datei → /var/lib/edgeguard/restore-tmp
  2. state-files (setup.json/license/jwt/node.conf/acme-account) per
     cp -a zurück, chown edgeguard
  3. systemctl stop edgeguard-api + scheduler (DB-Connections freigeben)
  4. sudo -u postgres psql -f dump.sql (--clean droppt + recreated)
  5. edgeguard-ctl render-config (haproxy/nft/squid/unbound/chrony)
  6. systemctl start edgeguard-api + scheduler
  7. rm -rf restore-tmp + restore.sh

UI: pro Backup-Row neuer Restore-Button mit Popconfirm. Beim Trigger
zeigt sich das vertraute Fullscreen-Overlay (Klassen .update-modal*
re-used) mit 4 Steps (Extract / DB-Restore / Render / Restart) + Live-
Timer. Health-Poll alle 3s detektiert API-Restart + reload. Safety-
Timeout 3 min für große DB-Dumps.

postinst: sudoers für `systemd-run --unit=edgeguard-restore.service
--description=... --collect bash /var/lib/edgeguard/restore.sh` +
zugehöriges `systemctl reset-failed`. Pfad fix damit kein Wildcard
nötig wird.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Debian
2026-05-12 23:22:55 +02:00
parent 571f51ba9a
commit dbc14a24a4
11 changed files with 273 additions and 7 deletions

View File

@@ -36,9 +36,33 @@ func (h *BackupHandler) Register(rg *gin.RouterGroup) {
g.POST("", h.Trigger)
g.GET("/:id", h.Get)
g.GET("/:id/download", h.Download)
g.POST("/:id/restore", h.Restore)
g.DELETE("/:id", h.Delete)
}
// Restore startet einen Restore aus einem vorhandenen Backup. Endpoint
// returnt sofort 202 Accepted — der eigentliche Restore läuft in einer
// transient systemd-Unit; die UI pollt /healthz für die Restart-
// Detection. Massive Audit-Trail, weil das ein destruktiver Eingriff
// in den live-DB-State ist.
func (h *BackupHandler) Restore(c *gin.Context) {
id, ok := parseID(c)
if !ok {
return
}
e, err := h.Service.Restore(c.Request.Context(), id)
if err != nil {
response.Err(c, http.StatusInternalServerError, err)
return
}
_ = h.Audit.Log(c.Request.Context(), actorOf(c), "backup.restore",
e.File, gin.H{"id": id, "sha256": e.SHA256}, h.NodeID)
c.JSON(http.StatusAccepted, response.Envelope{
Data: gin.H{"status": "restoring", "file": e.File, "id": id},
Message: "Restore gestartet",
})
}
func (h *BackupHandler) List(c *gin.Context) {
out, err := h.Service.List(c.Request.Context())
if err != nil {

View File

@@ -468,6 +468,102 @@ FROM backups WHERE id = $1`, id).Scan(
return &e, filepath.Join(s.BackupDir, e.File), nil
}
// Restore startet einen full-system-restore aus einem vorhandenen
// Backup-Tarball. Läuft analog `/system/upgrade`-Pattern: wir
// schreiben /var/lib/edgeguard/restore.sh und dispatchen es per
// `sudo systemd-run --unit=edgeguard-restore.service`. Das Skript
// stoppt edgeguard-api+scheduler, kopiert die files/, restored den
// DB-Dump als postgres, re-rendert die Configs und startet die
// Services neu.
//
// Returnt sofort nach dem Dispatch (asynchron) — der eigentliche
// Restore läuft im Hintergrund. UI pollt /healthz für die
// Version-Flip-Detection (analog Upgrade).
func (s *Service) Restore(ctx context.Context, id int64) (*Entry, error) {
e, path, err := s.Get(ctx, id)
if err != nil {
return nil, fmt.Errorf("backup not found: %w", err)
}
if e.Status != "success" {
return nil, fmt.Errorf("backup is in status %q — cannot restore", e.Status)
}
if _, err := os.Stat(path); err != nil {
return nil, fmt.Errorf("backup file missing on disk: %w", err)
}
const scriptPath = "/var/lib/edgeguard/restore.sh"
script := fmt.Sprintf(`#!/bin/bash
# Generated by edgeguard-api — restore from %s
set -e
sleep 2 # let API return 202 first
TARBALL=%q
TMP=/var/lib/edgeguard/restore-tmp
echo "[restore] extract $TARBALL → $TMP"
rm -rf "$TMP"
mkdir -p "$TMP"
tar -xzf "$TARBALL" -C "$TMP"
# 1) Restore node-local state files BEFORE the DB swap so a crash
# mid-restore leaves the box in a state where the next API-start
# sees the new keys/setup. DB will be partial but recoverable.
echo "[restore] state files"
for f in setup.json license_key license.cache trial.json .jwt_fingerprint node.conf; do
if [ -f "$TMP/files/$f" ]; then
cp -a "$TMP/files/$f" /var/lib/edgeguard/
fi
done
if [ -d "$TMP/files/acme-account" ]; then
mkdir -p /var/lib/edgeguard/acme-account
cp -a "$TMP/files/acme-account/." /var/lib/edgeguard/acme-account/
fi
chown -R edgeguard:edgeguard /var/lib/edgeguard/setup.json \
/var/lib/edgeguard/license_key /var/lib/edgeguard/license.cache \
/var/lib/edgeguard/trial.json /var/lib/edgeguard/.jwt_fingerprint \
/var/lib/edgeguard/node.conf /var/lib/edgeguard/acme-account 2>/dev/null || true
# 2) Stop API+scheduler so psql can DROP/CREATE tables without active
# connections fighting the dump-restore.
echo "[restore] stop services"
systemctl stop edgeguard-api edgeguard-scheduler
# 3) Apply DB dump. pg_dump --clean emits DROP TABLE IF EXISTS so
# we don't need to wipe the schema manually.
echo "[restore] psql -f dump.sql"
sudo -u postgres /usr/bin/psql --quiet -d edgeguard -f "$TMP/dump.sql"
# 4) Re-render configs from the freshly restored DB. Each renderer
# triggers its own service reload — haproxy, nft, etc. so the
# user-visible state matches DB-state immediately.
echo "[restore] render-config"
sudo -u edgeguard /usr/bin/edgeguard-ctl render-config || true
# 5) Restart edgeguard-api so the UI's /healthz poll sees version-
# flip / fresh connection. Scheduler comes back automatically.
echo "[restore] start services"
systemctl start edgeguard-api edgeguard-scheduler
rm -rf "$TMP" "$0"
echo "[restore] complete"
`, e.File, path)
if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil {
return nil, fmt.Errorf("write %s: %w", scriptPath, err)
}
const unitName = "edgeguard-restore.service"
_ = exec.Command("sudo", "-n", "/usr/bin/systemctl",
"reset-failed", unitName).Run()
cmd := exec.Command("sudo", "-n", "/usr/bin/systemd-run",
"--unit="+unitName,
"--description=EdgeGuard self-restore",
"--collect",
"bash", scriptPath)
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("systemd-run: %w", err)
}
return e, nil
}
// Delete entfernt File + DB-Row.
func (s *Service) Delete(ctx context.Context, id int64) error {
_, path, err := s.Get(ctx, id)