diff --git a/VERSION b/VERSION index a970716..b668c3b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.15 +1.0.16 diff --git a/cmd/edgeguard-api/main.go b/cmd/edgeguard-api/main.go index ce251c9..5e194a8 100644 --- a/cmd/edgeguard-api/main.go +++ b/cmd/edgeguard-api/main.go @@ -39,7 +39,7 @@ import ( wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard" ) -var version = "1.0.15" +var version = "1.0.16" func main() { addr := os.Getenv("EDGEGUARD_API_ADDR") diff --git a/cmd/edgeguard-ctl/main.go b/cmd/edgeguard-ctl/main.go index 20cb162..33e2dce 100644 --- a/cmd/edgeguard-ctl/main.go +++ b/cmd/edgeguard-ctl/main.go @@ -9,7 +9,7 @@ import ( "os" ) -var version = "1.0.15" +var version = "1.0.16" const usage = `edgeguard-ctl — EdgeGuard CLI diff --git a/cmd/edgeguard-scheduler/main.go b/cmd/edgeguard-scheduler/main.go index a22630b..bac660d 100644 --- a/cmd/edgeguard-scheduler/main.go +++ b/cmd/edgeguard-scheduler/main.go @@ -1,15 +1,85 @@ +// edgeguard-scheduler runs background jobs that don't belong on the +// API request path: +// +// - ACME cert renewal (every 6h, re-issues anything < 30d to expiry) +// +// Future jobs (cluster heartbeat, backup, audit-log retention) +// hang off the same Tick loop. Stays single-process — no leader +// election yet (Phase 3). package main import ( - "log" + "context" + "log/slog" + "os" "time" + + "git.netcell-it.de/projekte/edgeguard-native/internal/database" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/acme" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/certrenewer" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/setup" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts" ) -var version = "1.0.15" +var version = "1.0.16" + +const ( + // renewTickInterval — how often we re-evaluate expiring certs. + // 6h is enough: LE renewal window is 30 days; missing one tick + // makes no difference. Hourly would log too much. + renewTickInterval = 6 * time.Hour + + // certDir matches handlers.NewTLSCertsHandler default — HAProxy + // reads from this directory. + certDir = "/etc/edgeguard/tls" +) func main() { - log.Printf("edgeguard-scheduler %s starting", version) - for { - time.Sleep(60 * time.Second) + slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))) + slog.Info("edgeguard-scheduler starting", "version", version) + + ctx := context.Background() + + pool, err := database.Open(ctx, database.ConnStringFromEnv()) + if err != nil { + slog.Error("scheduler: DB open failed — sleeping forever", "error", err) + select {} + } + defer pool.Close() + + tlsRepo := tlscerts.New(pool) + setupStore := setup.NewStore(setup.DefaultDir) + st, _ := setupStore.Load() + + var renewer *certrenewer.Service + if st != nil && st.ACMEEmail != "" { + issuer := acme.New(st.ACMEEmail) + renewer = certrenewer.New(tlsRepo, issuer, certDir, 30*24*time.Hour) + slog.Info("scheduler: ACME renewer enabled", + "email", st.ACMEEmail, "tick", renewTickInterval, "threshold", "30d") + } else { + slog.Warn("scheduler: setup.acme_email empty — ACME renewal disabled until setup wizard ran") + } + + if renewer != nil { + runRenewer(ctx, renewer) + } + tick := time.NewTicker(renewTickInterval) + defer tick.Stop() + for range tick.C { + if renewer != nil { + runRenewer(ctx, renewer) + } } } + +func runRenewer(ctx context.Context, r *certrenewer.Service) { + res, err := r.Run(ctx) + if err != nil { + slog.Error("scheduler: renewer run failed", "error", err) + return + } + slog.Info("scheduler: renewer pass complete", + "checked", res.Checked, "renewed", res.Renewed, + "failed", res.Failed, "skipped", res.Skipped) +} diff --git a/deploy/systemd/edgeguard-scheduler.service b/deploy/systemd/edgeguard-scheduler.service index 8e24ec9..048cd62 100644 --- a/deploy/systemd/edgeguard-scheduler.service +++ b/deploy/systemd/edgeguard-scheduler.service @@ -12,7 +12,7 @@ ExecStart=/usr/bin/edgeguard-scheduler Restart=on-failure RestartSec=10 -NoNewPrivileges=true +NoNewPrivileges=false ProtectSystem=strict ProtectHome=true ProtectKernelTunables=true @@ -22,7 +22,7 @@ PrivateTmp=true PrivateDevices=true RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 SystemCallFilter=@system-service -ReadWritePaths=/var/lib/edgeguard /var/log/edgeguard +ReadWritePaths=/etc/edgeguard /var/lib/edgeguard /var/log/edgeguard [Install] WantedBy=multi-user.target diff --git a/internal/services/certrenewer/certrenewer.go b/internal/services/certrenewer/certrenewer.go new file mode 100644 index 0000000..9b1e766 --- /dev/null +++ b/internal/services/certrenewer/certrenewer.go @@ -0,0 +1,144 @@ +// Package certrenewer encapsulates the "find expiring certs + +// re-issue them via Let's Encrypt + reload haproxy" pipeline so the +// scheduler and the handler share the exact same write path. +// +// Scope deliberately narrow: +// - Only Let's Encrypt-issued rows are renewed (manually-uploaded +// PEMs stay untouched — operator owns the lifecycle). +// - Re-issue happens when not_after - now < threshold (default +// 30 days). LE recommends 30; certbot defaults to 30. +// - On error, the row's last_error is set; the schedule retries +// on the next tick, so a transient ACME failure self-heals. +package certrenewer + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os/exec" + "time" + + "git.netcell-it.de/projekte/edgeguard-native/internal/models" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/certstore" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts" +) + +// Issuer is the contract acme.Service implements (Issue is also +// used for renewal — LE doesn't distinguish; a fresh cert with the +// same Common-Name supersedes the old one). +type Issuer interface { + Issue(domain string) (cert, chain, key string, err error) +} + +type Service struct { + Repo *tlscerts.Repo + Issuer Issuer + CertDir string + Threshold time.Duration + + // Logger lets the scheduler attach its slog handler. If nil, + // the package uses slog.Default. + Logger *slog.Logger +} + +func New(repo *tlscerts.Repo, issuer Issuer, certDir string, threshold time.Duration) *Service { + if threshold == 0 { + threshold = 30 * 24 * time.Hour + } + return &Service{ + Repo: repo, + Issuer: issuer, + CertDir: certDir, + Threshold: threshold, + } +} + +func (s *Service) log() *slog.Logger { + if s.Logger != nil { + return s.Logger + } + return slog.Default() +} + +// Result counts what one Run pass did so the scheduler can log / +// surface it. +type Result struct { + Checked int + Renewed int + Failed int + Skipped int +} + +// Run scans tls_certs for LE-issued rows that expire within the +// threshold, re-issues each via the Issuer, writes the new PEM and +// triggers an HAProxy reload at the end if anything changed. Errors +// per cert are logged and recorded in tls_certs.last_error but do +// not abort the loop. +func (s *Service) Run(ctx context.Context) (Result, error) { + if s.Issuer == nil { + return Result{}, errors.New("certrenewer: Issuer is nil — ACME-Email noch nicht im Setup gesetzt?") + } + rows, err := s.Repo.ListExpiringSoon(ctx, s.Threshold) + if err != nil { + return Result{}, fmt.Errorf("list expiring: %w", err) + } + res := Result{Checked: len(rows)} + anyRenewed := false + for _, row := range rows { + if row.Issuer != "letsencrypt" { + res.Skipped++ + s.log().Debug("certrenewer: skip non-LE cert", "domain", row.Domain, "issuer", row.Issuer) + continue + } + s.log().Info("certrenewer: renewing", "domain", row.Domain, "expires", row.NotAfter) + certPEM, chainPEM, keyPEM, err := s.Issuer.Issue(row.Domain) + if err != nil { + res.Failed++ + s.log().Error("certrenewer: issue failed", "domain", row.Domain, "error", err) + _ = s.Repo.MarkError(ctx, row.Domain, err.Error()) + continue + } + info, err := certstore.Parse(certPEM) + if err != nil { + res.Failed++ + _ = s.Repo.MarkError(ctx, row.Domain, "parse: "+err.Error()) + continue + } + path, err := certstore.WriteCombined(s.CertDir, row.Domain, certPEM, chainPEM, keyPEM) + if err != nil { + res.Failed++ + _ = s.Repo.MarkError(ctx, row.Domain, "write: "+err.Error()) + continue + } + now := time.Now() + if _, err := s.Repo.Upsert(ctx, models.TLSCert{ + Domain: row.Domain, + Issuer: "letsencrypt", + Status: "active", + CertPath: &path, + KeyPath: &path, + NotBefore: &info.NotBefore, + NotAfter: &info.NotAfter, + LastRenewedAt: &now, + }); err != nil { + res.Failed++ + s.log().Error("certrenewer: upsert failed", "domain", row.Domain, "error", err) + continue + } + res.Renewed++ + anyRenewed = true + } + if anyRenewed { + if err := reloadHAProxy(); err != nil { + s.log().Warn("certrenewer: haproxy reload failed", "error", err) + } else { + s.log().Info("certrenewer: haproxy reloaded") + } + } + return res, nil +} + +func reloadHAProxy() error { + return exec.Command("sudo", "-n", "/usr/bin/systemctl", "reload", "haproxy.service").Run() +} diff --git a/management-ui/package.json b/management-ui/package.json index fea1a59..9232c0a 100644 --- a/management-ui/package.json +++ b/management-ui/package.json @@ -1,7 +1,7 @@ { "name": "edgeguard-management-ui", "private": true, - "version": "1.0.15", + "version": "1.0.16", "type": "module", "scripts": { "dev": "vite", diff --git a/management-ui/src/components/Layout/Sidebar.tsx b/management-ui/src/components/Layout/Sidebar.tsx index 0f7202c..6d9b4af 100644 --- a/management-ui/src/components/Layout/Sidebar.tsx +++ b/management-ui/src/components/Layout/Sidebar.tsx @@ -70,7 +70,7 @@ const NAV: NavSection[] = [ }, ] -const VERSION = '1.0.15' +const VERSION = '1.0.16' export default function Sidebar({ isOpen, onClose }: SidebarProps) { const { t } = useTranslation()