feat(scheduler): Auto-Renewal für Let's Encrypt Certs
Vorher: edgeguard-scheduler war 60s-sleep-Stub. LE-Certs liefen nach 90 Tagen ab und mussten manuell re-issued werden. Jetzt: * internal/services/certrenewer — Pipeline (find expiring → ACME-Issue → certstore.WriteCombined → Repo.Upsert → haproxy reload). Kapselt was der /tls-certs/issue-Handler macht, nur DB-driven für N Certs. * edgeguard-scheduler nutzt acme.Service + tlscerts.Repo + certrenewer. Tick alle 6h, Threshold 30 Tage Restlaufzeit. Sofort-Run bei Startup damit eine frisch eingespielte Box auch ohne 6h-Wartezeit prüft. * Issuer == "letsencrypt" als Filter — manuell hochgeladene PEMs bleiben unangetastet (Operator owns lifecycle). * Errors landen in tls_certs.last_error, retry beim nächsten Tick (transiente ACME-Failures self-heal). * systemd-Unit edgeguard-scheduler.service: ReadWritePaths um /etc/edgeguard erweitert (für Cert-PEM-Writes), NoNewPrivileges auf false (sudo systemctl reload haproxy braucht setuid). Spiegelt edgeguard-api-Unit. Version 1.0.16. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -39,7 +39,7 @@ import (
|
||||
wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard"
|
||||
)
|
||||
|
||||
var version = "1.0.15"
|
||||
var version = "1.0.16"
|
||||
|
||||
func main() {
|
||||
addr := os.Getenv("EDGEGUARD_API_ADDR")
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"os"
|
||||
)
|
||||
|
||||
var version = "1.0.15"
|
||||
var version = "1.0.16"
|
||||
|
||||
const usage = `edgeguard-ctl — EdgeGuard CLI
|
||||
|
||||
|
||||
@@ -1,15 +1,85 @@
|
||||
// edgeguard-scheduler runs background jobs that don't belong on the
|
||||
// API request path:
|
||||
//
|
||||
// - ACME cert renewal (every 6h, re-issues anything < 30d to expiry)
|
||||
//
|
||||
// Future jobs (cluster heartbeat, backup, audit-log retention)
|
||||
// hang off the same Tick loop. Stays single-process — no leader
|
||||
// election yet (Phase 3).
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/database"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/acme"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/certrenewer"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/setup"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts"
|
||||
)
|
||||
|
||||
var version = "1.0.15"
|
||||
var version = "1.0.16"
|
||||
|
||||
const (
|
||||
// renewTickInterval — how often we re-evaluate expiring certs.
|
||||
// 6h is enough: LE renewal window is 30 days; missing one tick
|
||||
// makes no difference. Hourly would log too much.
|
||||
renewTickInterval = 6 * time.Hour
|
||||
|
||||
// certDir matches handlers.NewTLSCertsHandler default — HAProxy
|
||||
// reads from this directory.
|
||||
certDir = "/etc/edgeguard/tls"
|
||||
)
|
||||
|
||||
func main() {
|
||||
log.Printf("edgeguard-scheduler %s starting", version)
|
||||
for {
|
||||
time.Sleep(60 * time.Second)
|
||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})))
|
||||
slog.Info("edgeguard-scheduler starting", "version", version)
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
pool, err := database.Open(ctx, database.ConnStringFromEnv())
|
||||
if err != nil {
|
||||
slog.Error("scheduler: DB open failed — sleeping forever", "error", err)
|
||||
select {}
|
||||
}
|
||||
defer pool.Close()
|
||||
|
||||
tlsRepo := tlscerts.New(pool)
|
||||
setupStore := setup.NewStore(setup.DefaultDir)
|
||||
st, _ := setupStore.Load()
|
||||
|
||||
var renewer *certrenewer.Service
|
||||
if st != nil && st.ACMEEmail != "" {
|
||||
issuer := acme.New(st.ACMEEmail)
|
||||
renewer = certrenewer.New(tlsRepo, issuer, certDir, 30*24*time.Hour)
|
||||
slog.Info("scheduler: ACME renewer enabled",
|
||||
"email", st.ACMEEmail, "tick", renewTickInterval, "threshold", "30d")
|
||||
} else {
|
||||
slog.Warn("scheduler: setup.acme_email empty — ACME renewal disabled until setup wizard ran")
|
||||
}
|
||||
|
||||
if renewer != nil {
|
||||
runRenewer(ctx, renewer)
|
||||
}
|
||||
tick := time.NewTicker(renewTickInterval)
|
||||
defer tick.Stop()
|
||||
for range tick.C {
|
||||
if renewer != nil {
|
||||
runRenewer(ctx, renewer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func runRenewer(ctx context.Context, r *certrenewer.Service) {
|
||||
res, err := r.Run(ctx)
|
||||
if err != nil {
|
||||
slog.Error("scheduler: renewer run failed", "error", err)
|
||||
return
|
||||
}
|
||||
slog.Info("scheduler: renewer pass complete",
|
||||
"checked", res.Checked, "renewed", res.Renewed,
|
||||
"failed", res.Failed, "skipped", res.Skipped)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user