feat(backup): Restore-Pfad — POST /backups/:id/restore + UI

backup.Service.Restore(id) schreibt /var/lib/edgeguard/restore.sh
und dispatcht via `sudo systemd-run --unit=edgeguard-restore.service`.
Skript-Ablauf:
  1. tar -xzf der Backup-Datei → /var/lib/edgeguard/restore-tmp
  2. state-files (setup.json/license/jwt/node.conf/acme-account) per
     cp -a zurück, chown edgeguard
  3. systemctl stop edgeguard-api + scheduler (DB-Connections freigeben)
  4. sudo -u postgres psql -f dump.sql (--clean droppt + recreated)
  5. edgeguard-ctl render-config (haproxy/nft/squid/unbound/chrony)
  6. systemctl start edgeguard-api + scheduler
  7. rm -rf restore-tmp + restore.sh

UI: pro Backup-Row neuer Restore-Button mit Popconfirm. Beim Trigger
zeigt sich das vertraute Fullscreen-Overlay (Klassen .update-modal*
re-used) mit 4 Steps (Extract / DB-Restore / Render / Restart) + Live-
Timer. Health-Poll alle 3s detektiert API-Restart + reload. Safety-
Timeout 3 min für große DB-Dumps.

postinst: sudoers für `systemd-run --unit=edgeguard-restore.service
--description=... --collect bash /var/lib/edgeguard/restore.sh` +
zugehöriges `systemctl reset-failed`. Pfad fix damit kein Wildcard
nötig wird.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Debian
2026-05-12 23:22:55 +02:00
parent 571f51ba9a
commit dbc14a24a4
11 changed files with 273 additions and 7 deletions

View File

@@ -1 +1 @@
1.0.64
1.0.65

View File

@@ -51,7 +51,7 @@ import (
wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard"
)
var version = "1.0.64"
var version = "1.0.65"
func main() {
addr := os.Getenv("EDGEGUARD_API_ADDR")

View File

@@ -9,7 +9,7 @@ import (
"os"
)
var version = "1.0.64"
var version = "1.0.65"
const usage = `edgeguard-ctl — EdgeGuard CLI

View File

@@ -25,7 +25,7 @@ import (
"git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts"
)
var version = "1.0.64"
var version = "1.0.65"
const (
// renewTickInterval — how often we re-evaluate expiring certs.

View File

@@ -36,9 +36,33 @@ func (h *BackupHandler) Register(rg *gin.RouterGroup) {
g.POST("", h.Trigger)
g.GET("/:id", h.Get)
g.GET("/:id/download", h.Download)
g.POST("/:id/restore", h.Restore)
g.DELETE("/:id", h.Delete)
}
// Restore startet einen Restore aus einem vorhandenen Backup. Endpoint
// returnt sofort 202 Accepted — der eigentliche Restore läuft in einer
// transient systemd-Unit; die UI pollt /healthz für die Restart-
// Detection. Massive Audit-Trail, weil das ein destruktiver Eingriff
// in den live-DB-State ist.
func (h *BackupHandler) Restore(c *gin.Context) {
id, ok := parseID(c)
if !ok {
return
}
e, err := h.Service.Restore(c.Request.Context(), id)
if err != nil {
response.Err(c, http.StatusInternalServerError, err)
return
}
_ = h.Audit.Log(c.Request.Context(), actorOf(c), "backup.restore",
e.File, gin.H{"id": id, "sha256": e.SHA256}, h.NodeID)
c.JSON(http.StatusAccepted, response.Envelope{
Data: gin.H{"status": "restoring", "file": e.File, "id": id},
Message: "Restore gestartet",
})
}
func (h *BackupHandler) List(c *gin.Context) {
out, err := h.Service.List(c.Request.Context())
if err != nil {

View File

@@ -468,6 +468,102 @@ FROM backups WHERE id = $1`, id).Scan(
return &e, filepath.Join(s.BackupDir, e.File), nil
}
// Restore startet einen full-system-restore aus einem vorhandenen
// Backup-Tarball. Läuft analog `/system/upgrade`-Pattern: wir
// schreiben /var/lib/edgeguard/restore.sh und dispatchen es per
// `sudo systemd-run --unit=edgeguard-restore.service`. Das Skript
// stoppt edgeguard-api+scheduler, kopiert die files/, restored den
// DB-Dump als postgres, re-rendert die Configs und startet die
// Services neu.
//
// Returnt sofort nach dem Dispatch (asynchron) — der eigentliche
// Restore läuft im Hintergrund. UI pollt /healthz für die
// Version-Flip-Detection (analog Upgrade).
func (s *Service) Restore(ctx context.Context, id int64) (*Entry, error) {
e, path, err := s.Get(ctx, id)
if err != nil {
return nil, fmt.Errorf("backup not found: %w", err)
}
if e.Status != "success" {
return nil, fmt.Errorf("backup is in status %q — cannot restore", e.Status)
}
if _, err := os.Stat(path); err != nil {
return nil, fmt.Errorf("backup file missing on disk: %w", err)
}
const scriptPath = "/var/lib/edgeguard/restore.sh"
script := fmt.Sprintf(`#!/bin/bash
# Generated by edgeguard-api — restore from %s
set -e
sleep 2 # let API return 202 first
TARBALL=%q
TMP=/var/lib/edgeguard/restore-tmp
echo "[restore] extract $TARBALL → $TMP"
rm -rf "$TMP"
mkdir -p "$TMP"
tar -xzf "$TARBALL" -C "$TMP"
# 1) Restore node-local state files BEFORE the DB swap so a crash
# mid-restore leaves the box in a state where the next API-start
# sees the new keys/setup. DB will be partial but recoverable.
echo "[restore] state files"
for f in setup.json license_key license.cache trial.json .jwt_fingerprint node.conf; do
if [ -f "$TMP/files/$f" ]; then
cp -a "$TMP/files/$f" /var/lib/edgeguard/
fi
done
if [ -d "$TMP/files/acme-account" ]; then
mkdir -p /var/lib/edgeguard/acme-account
cp -a "$TMP/files/acme-account/." /var/lib/edgeguard/acme-account/
fi
chown -R edgeguard:edgeguard /var/lib/edgeguard/setup.json \
/var/lib/edgeguard/license_key /var/lib/edgeguard/license.cache \
/var/lib/edgeguard/trial.json /var/lib/edgeguard/.jwt_fingerprint \
/var/lib/edgeguard/node.conf /var/lib/edgeguard/acme-account 2>/dev/null || true
# 2) Stop API+scheduler so psql can DROP/CREATE tables without active
# connections fighting the dump-restore.
echo "[restore] stop services"
systemctl stop edgeguard-api edgeguard-scheduler
# 3) Apply DB dump. pg_dump --clean emits DROP TABLE IF EXISTS so
# we don't need to wipe the schema manually.
echo "[restore] psql -f dump.sql"
sudo -u postgres /usr/bin/psql --quiet -d edgeguard -f "$TMP/dump.sql"
# 4) Re-render configs from the freshly restored DB. Each renderer
# triggers its own service reload — haproxy, nft, etc. so the
# user-visible state matches DB-state immediately.
echo "[restore] render-config"
sudo -u edgeguard /usr/bin/edgeguard-ctl render-config || true
# 5) Restart edgeguard-api so the UI's /healthz poll sees version-
# flip / fresh connection. Scheduler comes back automatically.
echo "[restore] start services"
systemctl start edgeguard-api edgeguard-scheduler
rm -rf "$TMP" "$0"
echo "[restore] complete"
`, e.File, path)
if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil {
return nil, fmt.Errorf("write %s: %w", scriptPath, err)
}
const unitName = "edgeguard-restore.service"
_ = exec.Command("sudo", "-n", "/usr/bin/systemctl",
"reset-failed", unitName).Run()
cmd := exec.Command("sudo", "-n", "/usr/bin/systemd-run",
"--unit="+unitName,
"--description=EdgeGuard self-restore",
"--collect",
"bash", scriptPath)
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("systemd-run: %w", err)
}
return e, nil
}
// Delete entfernt File + DB-Row.
func (s *Service) Delete(ctx context.Context, id int64) error {
_, path, err := s.Get(ctx, id)

View File

@@ -78,7 +78,7 @@ const NAV: NavSection[] = [
},
]
const VERSION = '1.0.64'
const VERSION = '1.0.65'
// Sidebar-Pattern 1:1 aus netcell-webpanel (enconf) übernommen:
// - <nav> als root, dunkler Gradient + Teal/Blue-Accent

View File

@@ -632,6 +632,20 @@
"failed": "Backup fehlgeschlagen",
"deleted": "Backup gelöscht",
"download": "Download",
"restore": "Wiederherstellen",
"restoreOk": "Restore starten",
"restoreDone": "Restore abgeschlossen — Seite wird neu geladen.",
"restoreFailed": "Restore fehlgeschlagen",
"restoreRunning": "Restore läuft …",
"restoreHint": "edgeguard-api startet automatisch neu; die UI lädt nach Restart neu.",
"confirmRestoreTitle": "Backup wiederherstellen?",
"confirmRestoreDesc": "DB-Inhalt wird durch {{file}} ersetzt + State-Dateien zurückgespielt. edgeguard-api + scheduler restarten. Aktuelle Änderungen seit dem Backup gehen verloren.",
"step": {
"extract": "Tar entpacken",
"psql": "DB-Restore (psql)",
"render": "Configs re-rendern",
"restart": "Services neu starten"
},
"downloadTooltip": "tar.gz herunterladen",
"refreshTooltip": "Liste neu laden",
"confirmDelete": "Backup {{file}} wirklich löschen?",

View File

@@ -632,6 +632,20 @@
"failed": "Backup failed",
"deleted": "Backup deleted",
"download": "Download",
"restore": "Restore",
"restoreOk": "Start restore",
"restoreDone": "Restore complete — page reloading.",
"restoreFailed": "Restore failed",
"restoreRunning": "Restore in progress …",
"restoreHint": "edgeguard-api will restart; the UI reloads after the restart.",
"confirmRestoreTitle": "Restore this backup?",
"confirmRestoreDesc": "Database content will be replaced with {{file}} + state files. edgeguard-api + scheduler will restart. Changes made since the backup are lost.",
"step": {
"extract": "Extract tar",
"psql": "DB restore (psql)",
"render": "Re-render configs",
"restart": "Restart services"
},
"downloadTooltip": "Download tar.gz",
"refreshTooltip": "Reload list",
"confirmDelete": "Really delete backup {{file}}?",

View File

@@ -1,4 +1,4 @@
import { useState } from 'react'
import { useEffect, useRef, useState } from 'react'
import {
Alert, Button, Card, Popconfirm, Space, Table, Tag, Tooltip, Typography, message,
} from 'antd'
@@ -9,6 +9,8 @@ import {
DatabaseOutlined,
DeleteOutlined,
ReloadOutlined,
RocketOutlined,
UndoOutlined,
} from '@ant-design/icons'
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
import { useTranslation } from 'react-i18next'
@@ -87,6 +89,65 @@ export default function BackupsPage() {
onError: (e: Error) => msg.error(e.message),
})
// Restore-Modal-State: nach Klick aufs Restore zeigen wir ein
// Vollbild-Overlay mit Step-Indicator + Health-Poll (analog Update).
const [restoring, setRestoring] = useState<{ file: string } | null>(null)
const [restoreElapsed, setRestoreElapsed] = useState(0)
const tickRef = useRef<ReturnType<typeof setInterval> | null>(null)
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null)
useEffect(() => () => {
if (tickRef.current) clearInterval(tickRef.current)
if (pollRef.current) clearInterval(pollRef.current)
}, [])
const restore = useMutation({
mutationFn: async (id: number) => {
const r = await apiClient.post(`/backups/${id}/restore`)
return isEnvelope(r.data) ? r.data.data : r.data
},
onError: (e: Error) => {
setRestoring(null)
if (tickRef.current) clearInterval(tickRef.current)
msg.error(t('backups.restoreFailed') + ': ' + e.message)
},
})
const startRestore = (b: Backup) => {
setRestoring({ file: b.file })
setRestoreElapsed(0)
tickRef.current = setInterval(() => setRestoreElapsed((e) => e + 1), 1000)
restore.mutate(b.id, {
onSuccess: () => {
// Poll /system/health bis API neu hochkommt → reload.
let sawDown = false
pollRef.current = setInterval(async () => {
try {
const res = await apiClient.get('/system/health')
const v = isEnvelope(res.data) ? (res.data.data as { version: string }).version : ''
if (sawDown && v) {
if (pollRef.current) clearInterval(pollRef.current)
if (tickRef.current) clearInterval(tickRef.current)
setRestoring(null)
msg.success(t('backups.restoreDone'))
setTimeout(() => window.location.reload(), 1500)
}
} catch {
sawDown = true
}
}, 3000)
// Safety-Timeout 3 min — Restore kann bei großer DB länger
// dauern als Upgrade. Danach reload trotzdem.
setTimeout(() => {
if (pollRef.current) clearInterval(pollRef.current)
if (tickRef.current) clearInterval(tickRef.current)
setRestoring(null)
window.location.reload()
}, 180_000)
},
})
}
const download = (b: Backup) => {
// gin.FileAttachment liefert via Browser direkt; einfach
// Cookie-authentifiziert in eine versteckte Form öffnen.
@@ -153,7 +214,7 @@ export default function BackupsPage() {
render: (_, row) => <Text type="secondary">{fmtDuration(row.started_at, row.finished_at)}</Text>,
},
{
title: t('common.actions'), key: 'a', width: 200,
title: t('common.actions'), key: 'a', width: 280,
render: (_, row) => (
<Space size={4}>
<Tooltip title={t('backups.downloadTooltip')}>
@@ -164,6 +225,21 @@ export default function BackupsPage() {
disabled={row.status !== 'success'}
>{t('backups.download')}</Button>
</Tooltip>
<Popconfirm
title={t('backups.confirmRestoreTitle')}
description={t('backups.confirmRestoreDesc', { file: row.file })}
okText={t('backups.restoreOk')}
okButtonProps={{ danger: true }}
cancelText={t('common.cancel')}
onConfirm={() => startRestore(row)}
disabled={row.status !== 'success'}
>
<Button
size="small"
icon={<UndoOutlined />}
disabled={row.status !== 'success'}
>{t('backups.restore')}</Button>
</Popconfirm>
<Popconfirm
title={t('backups.confirmDelete', { file: row.file })}
onConfirm={() => del.mutate(row.id)}
@@ -222,6 +298,44 @@ export default function BackupsPage() {
pagination={{ pageSize: 25, showSizeChanger: true, pageSizeOptions: [25, 50, 100] }}
locale={{ emptyText: t('backups.empty') }}
/>
{restoring && (
<div className="update-modal-overlay">
<div className="update-modal">
<div className="update-modal__orbit">
<div className="update-modal__ring" />
<div className="update-modal__ring update-modal__ring--2" />
<div className="update-modal__dot" />
<div className="update-modal__dot update-modal__dot--2" />
<div className="update-modal__center">
<RocketOutlined className="update-modal__icon" />
</div>
</div>
<div className="update-modal__title">{t('backups.restoreRunning')}</div>
<div className="update-modal__version">{restoring.file}</div>
<div className="update-modal__steps">
<div className={`update-modal__step ${restoreElapsed < 5 ? 'update-modal__step--active' : 'update-modal__step--done'}`}>
<span className="update-modal__step-dot" />
<span>{t('backups.step.extract')}</span>
</div>
<div className={`update-modal__step ${restoreElapsed >= 5 && restoreElapsed < 15 ? 'update-modal__step--active' : restoreElapsed >= 15 ? 'update-modal__step--done' : ''}`}>
<span className="update-modal__step-dot" />
<span>{t('backups.step.psql')}</span>
</div>
<div className={`update-modal__step ${restoreElapsed >= 15 && restoreElapsed < 25 ? 'update-modal__step--active' : restoreElapsed >= 25 ? 'update-modal__step--done' : ''}`}>
<span className="update-modal__step-dot" />
<span>{t('backups.step.render')}</span>
</div>
<div className={`update-modal__step ${restoreElapsed >= 25 ? 'update-modal__step--active' : ''}`}>
<span className="update-modal__step-dot" />
<span>{t('backups.step.restart')}</span>
</div>
</div>
<div className="update-modal__timer">{restoreElapsed}s</div>
<div className="update-modal__hint">{t('backups.restoreHint')}</div>
</div>
</div>
)}
</div>
)
}

View File

@@ -98,6 +98,10 @@ edgeguard ALL=(postgres) NOPASSWD: /usr/bin/pg_dump --clean --if-exists --no-own
# Units anlegen darf.
edgeguard ALL=(root) NOPASSWD: /usr/bin/systemctl reset-failed edgeguard-upgrade.service
edgeguard ALL=(root) NOPASSWD: /usr/bin/systemd-run --unit=edgeguard-upgrade.service --description=EdgeGuard self-upgrade --collect bash /var/lib/edgeguard/upgrade.sh
# Backup-Restore: gleiche Pattern wie Upgrade — Skript landet immer
# unter /var/lib/edgeguard/restore.sh, Unit-Form ist fix.
edgeguard ALL=(root) NOPASSWD: /usr/bin/systemctl reset-failed edgeguard-restore.service
edgeguard ALL=(root) NOPASSWD: /usr/bin/systemd-run --unit=edgeguard-restore.service --description=EdgeGuard self-restore --collect bash /var/lib/edgeguard/restore.sh
SUDOERS
# ── Distro-Conf-Includes für die per-Service Renderer ─────────