feat(scheduler): Auto-Renewal für Let's Encrypt Certs
Vorher: edgeguard-scheduler war 60s-sleep-Stub. LE-Certs liefen nach 90 Tagen ab und mussten manuell re-issued werden. Jetzt: * internal/services/certrenewer — Pipeline (find expiring → ACME-Issue → certstore.WriteCombined → Repo.Upsert → haproxy reload). Kapselt was der /tls-certs/issue-Handler macht, nur DB-driven für N Certs. * edgeguard-scheduler nutzt acme.Service + tlscerts.Repo + certrenewer. Tick alle 6h, Threshold 30 Tage Restlaufzeit. Sofort-Run bei Startup damit eine frisch eingespielte Box auch ohne 6h-Wartezeit prüft. * Issuer == "letsencrypt" als Filter — manuell hochgeladene PEMs bleiben unangetastet (Operator owns lifecycle). * Errors landen in tls_certs.last_error, retry beim nächsten Tick (transiente ACME-Failures self-heal). * systemd-Unit edgeguard-scheduler.service: ReadWritePaths um /etc/edgeguard erweitert (für Cert-PEM-Writes), NoNewPrivileges auf false (sudo systemctl reload haproxy braucht setuid). Spiegelt edgeguard-api-Unit. Version 1.0.16. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -39,7 +39,7 @@ import (
|
|||||||
wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard"
|
wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard"
|
||||||
)
|
)
|
||||||
|
|
||||||
var version = "1.0.15"
|
var version = "1.0.16"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
addr := os.Getenv("EDGEGUARD_API_ADDR")
|
addr := os.Getenv("EDGEGUARD_API_ADDR")
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
var version = "1.0.15"
|
var version = "1.0.16"
|
||||||
|
|
||||||
const usage = `edgeguard-ctl — EdgeGuard CLI
|
const usage = `edgeguard-ctl — EdgeGuard CLI
|
||||||
|
|
||||||
|
|||||||
@@ -1,15 +1,85 @@
|
|||||||
|
// edgeguard-scheduler runs background jobs that don't belong on the
|
||||||
|
// API request path:
|
||||||
|
//
|
||||||
|
// - ACME cert renewal (every 6h, re-issues anything < 30d to expiry)
|
||||||
|
//
|
||||||
|
// Future jobs (cluster heartbeat, backup, audit-log retention)
|
||||||
|
// hang off the same Tick loop. Stays single-process — no leader
|
||||||
|
// election yet (Phase 3).
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/database"
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/services/acme"
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/services/certrenewer"
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/services/setup"
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts"
|
||||||
)
|
)
|
||||||
|
|
||||||
var version = "1.0.15"
|
var version = "1.0.16"
|
||||||
|
|
||||||
|
const (
|
||||||
|
// renewTickInterval — how often we re-evaluate expiring certs.
|
||||||
|
// 6h is enough: LE renewal window is 30 days; missing one tick
|
||||||
|
// makes no difference. Hourly would log too much.
|
||||||
|
renewTickInterval = 6 * time.Hour
|
||||||
|
|
||||||
|
// certDir matches handlers.NewTLSCertsHandler default — HAProxy
|
||||||
|
// reads from this directory.
|
||||||
|
certDir = "/etc/edgeguard/tls"
|
||||||
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
log.Printf("edgeguard-scheduler %s starting", version)
|
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})))
|
||||||
for {
|
slog.Info("edgeguard-scheduler starting", "version", version)
|
||||||
time.Sleep(60 * time.Second)
|
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
pool, err := database.Open(ctx, database.ConnStringFromEnv())
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("scheduler: DB open failed — sleeping forever", "error", err)
|
||||||
|
select {}
|
||||||
|
}
|
||||||
|
defer pool.Close()
|
||||||
|
|
||||||
|
tlsRepo := tlscerts.New(pool)
|
||||||
|
setupStore := setup.NewStore(setup.DefaultDir)
|
||||||
|
st, _ := setupStore.Load()
|
||||||
|
|
||||||
|
var renewer *certrenewer.Service
|
||||||
|
if st != nil && st.ACMEEmail != "" {
|
||||||
|
issuer := acme.New(st.ACMEEmail)
|
||||||
|
renewer = certrenewer.New(tlsRepo, issuer, certDir, 30*24*time.Hour)
|
||||||
|
slog.Info("scheduler: ACME renewer enabled",
|
||||||
|
"email", st.ACMEEmail, "tick", renewTickInterval, "threshold", "30d")
|
||||||
|
} else {
|
||||||
|
slog.Warn("scheduler: setup.acme_email empty — ACME renewal disabled until setup wizard ran")
|
||||||
|
}
|
||||||
|
|
||||||
|
if renewer != nil {
|
||||||
|
runRenewer(ctx, renewer)
|
||||||
|
}
|
||||||
|
tick := time.NewTicker(renewTickInterval)
|
||||||
|
defer tick.Stop()
|
||||||
|
for range tick.C {
|
||||||
|
if renewer != nil {
|
||||||
|
runRenewer(ctx, renewer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runRenewer(ctx context.Context, r *certrenewer.Service) {
|
||||||
|
res, err := r.Run(ctx)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("scheduler: renewer run failed", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slog.Info("scheduler: renewer pass complete",
|
||||||
|
"checked", res.Checked, "renewed", res.Renewed,
|
||||||
|
"failed", res.Failed, "skipped", res.Skipped)
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ ExecStart=/usr/bin/edgeguard-scheduler
|
|||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
RestartSec=10
|
RestartSec=10
|
||||||
|
|
||||||
NoNewPrivileges=true
|
NoNewPrivileges=false
|
||||||
ProtectSystem=strict
|
ProtectSystem=strict
|
||||||
ProtectHome=true
|
ProtectHome=true
|
||||||
ProtectKernelTunables=true
|
ProtectKernelTunables=true
|
||||||
@@ -22,7 +22,7 @@ PrivateTmp=true
|
|||||||
PrivateDevices=true
|
PrivateDevices=true
|
||||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||||
SystemCallFilter=@system-service
|
SystemCallFilter=@system-service
|
||||||
ReadWritePaths=/var/lib/edgeguard /var/log/edgeguard
|
ReadWritePaths=/etc/edgeguard /var/lib/edgeguard /var/log/edgeguard
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|||||||
144
internal/services/certrenewer/certrenewer.go
Normal file
144
internal/services/certrenewer/certrenewer.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
// Package certrenewer encapsulates the "find expiring certs +
|
||||||
|
// re-issue them via Let's Encrypt + reload haproxy" pipeline so the
|
||||||
|
// scheduler and the handler share the exact same write path.
|
||||||
|
//
|
||||||
|
// Scope deliberately narrow:
|
||||||
|
// - Only Let's Encrypt-issued rows are renewed (manually-uploaded
|
||||||
|
// PEMs stay untouched — operator owns the lifecycle).
|
||||||
|
// - Re-issue happens when not_after - now < threshold (default
|
||||||
|
// 30 days). LE recommends 30; certbot defaults to 30.
|
||||||
|
// - On error, the row's last_error is set; the schedule retries
|
||||||
|
// on the next tick, so a transient ACME failure self-heals.
|
||||||
|
package certrenewer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os/exec"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/models"
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/services/certstore"
|
||||||
|
"git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Issuer is the contract acme.Service implements (Issue is also
|
||||||
|
// used for renewal — LE doesn't distinguish; a fresh cert with the
|
||||||
|
// same Common-Name supersedes the old one).
|
||||||
|
type Issuer interface {
|
||||||
|
Issue(domain string) (cert, chain, key string, err error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Service struct {
|
||||||
|
Repo *tlscerts.Repo
|
||||||
|
Issuer Issuer
|
||||||
|
CertDir string
|
||||||
|
Threshold time.Duration
|
||||||
|
|
||||||
|
// Logger lets the scheduler attach its slog handler. If nil,
|
||||||
|
// the package uses slog.Default.
|
||||||
|
Logger *slog.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(repo *tlscerts.Repo, issuer Issuer, certDir string, threshold time.Duration) *Service {
|
||||||
|
if threshold == 0 {
|
||||||
|
threshold = 30 * 24 * time.Hour
|
||||||
|
}
|
||||||
|
return &Service{
|
||||||
|
Repo: repo,
|
||||||
|
Issuer: issuer,
|
||||||
|
CertDir: certDir,
|
||||||
|
Threshold: threshold,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Service) log() *slog.Logger {
|
||||||
|
if s.Logger != nil {
|
||||||
|
return s.Logger
|
||||||
|
}
|
||||||
|
return slog.Default()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Result counts what one Run pass did so the scheduler can log /
|
||||||
|
// surface it.
|
||||||
|
type Result struct {
|
||||||
|
Checked int
|
||||||
|
Renewed int
|
||||||
|
Failed int
|
||||||
|
Skipped int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run scans tls_certs for LE-issued rows that expire within the
|
||||||
|
// threshold, re-issues each via the Issuer, writes the new PEM and
|
||||||
|
// triggers an HAProxy reload at the end if anything changed. Errors
|
||||||
|
// per cert are logged and recorded in tls_certs.last_error but do
|
||||||
|
// not abort the loop.
|
||||||
|
func (s *Service) Run(ctx context.Context) (Result, error) {
|
||||||
|
if s.Issuer == nil {
|
||||||
|
return Result{}, errors.New("certrenewer: Issuer is nil — ACME-Email noch nicht im Setup gesetzt?")
|
||||||
|
}
|
||||||
|
rows, err := s.Repo.ListExpiringSoon(ctx, s.Threshold)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("list expiring: %w", err)
|
||||||
|
}
|
||||||
|
res := Result{Checked: len(rows)}
|
||||||
|
anyRenewed := false
|
||||||
|
for _, row := range rows {
|
||||||
|
if row.Issuer != "letsencrypt" {
|
||||||
|
res.Skipped++
|
||||||
|
s.log().Debug("certrenewer: skip non-LE cert", "domain", row.Domain, "issuer", row.Issuer)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s.log().Info("certrenewer: renewing", "domain", row.Domain, "expires", row.NotAfter)
|
||||||
|
certPEM, chainPEM, keyPEM, err := s.Issuer.Issue(row.Domain)
|
||||||
|
if err != nil {
|
||||||
|
res.Failed++
|
||||||
|
s.log().Error("certrenewer: issue failed", "domain", row.Domain, "error", err)
|
||||||
|
_ = s.Repo.MarkError(ctx, row.Domain, err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
info, err := certstore.Parse(certPEM)
|
||||||
|
if err != nil {
|
||||||
|
res.Failed++
|
||||||
|
_ = s.Repo.MarkError(ctx, row.Domain, "parse: "+err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
path, err := certstore.WriteCombined(s.CertDir, row.Domain, certPEM, chainPEM, keyPEM)
|
||||||
|
if err != nil {
|
||||||
|
res.Failed++
|
||||||
|
_ = s.Repo.MarkError(ctx, row.Domain, "write: "+err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
now := time.Now()
|
||||||
|
if _, err := s.Repo.Upsert(ctx, models.TLSCert{
|
||||||
|
Domain: row.Domain,
|
||||||
|
Issuer: "letsencrypt",
|
||||||
|
Status: "active",
|
||||||
|
CertPath: &path,
|
||||||
|
KeyPath: &path,
|
||||||
|
NotBefore: &info.NotBefore,
|
||||||
|
NotAfter: &info.NotAfter,
|
||||||
|
LastRenewedAt: &now,
|
||||||
|
}); err != nil {
|
||||||
|
res.Failed++
|
||||||
|
s.log().Error("certrenewer: upsert failed", "domain", row.Domain, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
res.Renewed++
|
||||||
|
anyRenewed = true
|
||||||
|
}
|
||||||
|
if anyRenewed {
|
||||||
|
if err := reloadHAProxy(); err != nil {
|
||||||
|
s.log().Warn("certrenewer: haproxy reload failed", "error", err)
|
||||||
|
} else {
|
||||||
|
s.log().Info("certrenewer: haproxy reloaded")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func reloadHAProxy() error {
|
||||||
|
return exec.Command("sudo", "-n", "/usr/bin/systemctl", "reload", "haproxy.service").Run()
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "edgeguard-management-ui",
|
"name": "edgeguard-management-ui",
|
||||||
"private": true,
|
"private": true,
|
||||||
"version": "1.0.15",
|
"version": "1.0.16",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "vite",
|
"dev": "vite",
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ const NAV: NavSection[] = [
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
const VERSION = '1.0.15'
|
const VERSION = '1.0.16'
|
||||||
|
|
||||||
export default function Sidebar({ isOpen, onClose }: SidebarProps) {
|
export default function Sidebar({ isOpen, onClose }: SidebarProps) {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
|
|||||||
Reference in New Issue
Block a user