diff --git a/VERSION b/VERSION index be1dcc8..ea2f1d3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.73 +1.0.74 diff --git a/cmd/edgeguard-api/main.go b/cmd/edgeguard-api/main.go index 8bf0dda..ebb0555 100644 --- a/cmd/edgeguard-api/main.go +++ b/cmd/edgeguard-api/main.go @@ -30,6 +30,7 @@ import ( wgrender "git.netcell-it.de/projekte/edgeguard-native/internal/wireguard" "git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response" "git.netcell-it.de/projekte/edgeguard-native/internal/services/acme" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/alerts" "git.netcell-it.de/projekte/edgeguard-native/internal/services/audit" "git.netcell-it.de/projekte/edgeguard-native/internal/services/backends" "git.netcell-it.de/projekte/edgeguard-native/internal/services/backendservers" @@ -52,7 +53,7 @@ import ( wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard" ) -var version = "1.0.73" +var version = "1.0.74" func main() { addr := os.Getenv("EDGEGUARD_API_ADDR") @@ -205,6 +206,7 @@ func main() { // Jobs laufen im edgeguard-scheduler. handlers.NewBackupHandler(backup.New(pool), auditRepo, nodeID, version).Register(authed) handlers.NewDiagnosticsHandler().Register(authed) + handlers.NewAlertsHandler(alerts.New(pool), auditRepo, nodeID).Register(authed) handlers.NewTLSCertsHandler(tlsRepo, auditRepo, nodeID, acmeService).Register(authed) // Firewall reload: nach jeder Mutation den Renderer neu fahren // (writes ruleset.nft + sudo nft -f). Errors loggen, nicht failen. diff --git a/cmd/edgeguard-ctl/main.go b/cmd/edgeguard-ctl/main.go index 50da00f..e24b012 100644 --- a/cmd/edgeguard-ctl/main.go +++ b/cmd/edgeguard-ctl/main.go @@ -9,7 +9,7 @@ import ( "os" ) -var version = "1.0.73" +var version = "1.0.74" const usage = `edgeguard-ctl — EdgeGuard CLI diff --git a/cmd/edgeguard-scheduler/main.go b/cmd/edgeguard-scheduler/main.go index b1bedad..ada0307 100644 --- a/cmd/edgeguard-scheduler/main.go +++ b/cmd/edgeguard-scheduler/main.go @@ -11,8 +11,10 @@ package main import ( "context" "encoding/json" + "fmt" "log/slog" "os" + "strconv" "time" "github.com/jackc/pgx/v5/pgxpool" @@ -21,6 +23,7 @@ import ( "git.netcell-it.de/projekte/edgeguard-native/internal/database" "git.netcell-it.de/projekte/edgeguard-native/internal/license" "git.netcell-it.de/projekte/edgeguard-native/internal/services/acme" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/alerts" "git.netcell-it.de/projekte/edgeguard-native/internal/services/backup" "git.netcell-it.de/projekte/edgeguard-native/internal/services/certrenewer" licsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/license" @@ -28,7 +31,7 @@ import ( "git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts" ) -var version = "1.0.73" +var version = "1.0.74" const ( // renewTickInterval — how often we re-evaluate expiring certs. @@ -92,10 +95,13 @@ func main() { slog.Info("scheduler: daily backup enabled", "tick", backupTickInterval, "dir", backupSvc.BackupDir, "keep_n", backup.DefaultKeepN) + alertSvc := alerts.New(pool) + alertDedupe := newDedupe(12 * time.Hour) + if renewer != nil { - runRenewer(ctx, renewer) + runRenewer(ctx, renewer, alertSvc, alertDedupe) } - runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID) + runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID, alertSvc, alertDedupe) // Lokale Node-ID für config-hash-refresh. EnsureNodeID liefert // dieselbe ID die die API hat (gleiches /var/lib/edgeguard/node-id). @@ -118,18 +124,80 @@ func main() { select { case <-renewTick.C: if renewer != nil { - runRenewer(ctx, renewer) + runRenewer(ctx, renewer, alertSvc, alertDedupe) } + runCertExpiryCheck(ctx, tlsRepo, alertSvc, alertDedupe) case <-licTick.C: - runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID) + runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID, alertSvc, alertDedupe) case <-backupTick.C: - runBackup(ctx, backupSvc, version) + runBackup(ctx, backupSvc, version, alertSvc) case <-hashTick.C: runConfigHash(ctx, pool, localID) } } } +// dedupe verhindert dass derselbe Alert-Key (z.B. "cert.expiring:utm-1.netcell-it.de") +// öfter als alle 12h gefeuert wird. In-memory — Scheduler-Restart +// resettet, was OK ist (Operator soll bei restart wieder einen kennen- +// lernen-Event sehen können). +type dedupe struct { + ttl time.Duration + last map[string]time.Time +} + +func newDedupe(ttl time.Duration) *dedupe { return &dedupe{ttl: ttl, last: map[string]time.Time{}} } + +func (d *dedupe) shouldFire(key string) bool { + now := time.Now() + if last, ok := d.last[key]; ok && now.Sub(last) < d.ttl { + return false + } + d.last[key] = now + return true +} + +// runCertExpiryCheck prüft tls_certs auf bevorstehende Expiry. Warning +// bei <14d Restzeit. Dedupe pro Cert-Name 12h damit der scheduler +// nicht alle 6h dieselbe Warnung feuert. +func runCertExpiryCheck(ctx context.Context, repo *tlscerts.Repo, + a *alerts.Service, d *dedupe) { + if repo == nil || a == nil { + return + } + certs, err := repo.List(ctx) + if err != nil { + slog.Warn("scheduler: cert-expiry list failed", "error", err) + return + } + threshold := 14 * 24 * time.Hour + now := time.Now() + for _, c := range certs { + if c.NotAfter == nil { + continue + } + remain := c.NotAfter.Sub(now) + if remain > threshold || remain < -90*24*time.Hour { + continue + } + key := "cert.expiring:" + c.Domain + if !d.shouldFire(key) { + continue + } + days := int(remain.Hours() / 24) + sev := alerts.SeverityWarning + if days < 3 { + sev = alerts.SeverityError + } + _, err := a.Fire(ctx, "cert.expiring", sev, + "TLS-Zertifikat läuft ab: "+c.Domain, + "Cert für "+c.Domain+" läuft in "+strconv.Itoa(days)+" Tagen ab ("+c.NotAfter.Format(time.RFC3339)+"). Renewer-Status: "+c.Status) + if err != nil { + slog.Warn("scheduler: alert fire failed", "error", err) + } + } +} + // runConfigHash berechnet den Hash und schreibt ihn in ha_nodes. // Pool kann nil sein (scheduler-pool-fail beim boot) — dann no-op. func runConfigHash(ctx context.Context, pool *pgxpoolPool, localID string) { @@ -149,11 +217,16 @@ func runConfigHash(ctx context.Context, pool *pgxpoolPool, localID string) { type pgxpoolPool = pgxpool.Pool // runBackup führt einen scheduled Backup aus + prunet alte. Failures -// loggen wir nur — der Tick läuft morgen wieder, kein Notfall. -func runBackup(ctx context.Context, svc *backup.Service, version string) { +// loggen wir + alarmieren — verlorene Backups sind kritisch. +func runBackup(ctx context.Context, svc *backup.Service, version string, a *alerts.Service) { res, err := svc.Run(ctx, backup.KindScheduled, version) if err != nil { slog.Warn("scheduler: backup failed", "error", err, "file", res.File) + if a != nil { + _, _ = a.Fire(ctx, "backup.failed", alerts.SeverityError, + "Backup fehlgeschlagen", + "Scheduled Backup konnte nicht erstellt werden: "+err.Error()) + } return } slog.Info("scheduler: backup done", @@ -167,8 +240,9 @@ func runBackup(ctx context.Context, svc *backup.Service, version string) { // runLicenseVerify performs a single re-verify pass. Empty key = no-op // (box stays in trial), so this is safe to call on every tick. +// Bei valid:false-Antwort + Stand >7d alt → Warnung an Alerts. func runLicenseVerify(ctx context.Context, c *license.Client, ks *license.KeyStore, - repo *licsvc.Repo, nodeID string) { + repo *licsvc.Repo, nodeID string, a *alerts.Service, d *dedupe) { key := ks.Get() if key == "" { slog.Debug("scheduler: license verify skipped — no key") @@ -194,15 +268,36 @@ func runLicenseVerify(ctx context.Context, c *license.Client, ks *license.KeySto } slog.Info("scheduler: license verified", "status", status, "valid", res.Valid, "expires_at", res.ExpiresAt) + + // Alarm bei ungültiger Lizenz (revoked, expired) — dedupe 12h damit + // der Operator nicht alle 24h denselben Alert bekommt. + if a != nil && d != nil && !res.Valid { + if d.shouldFire("license.invalid") { + _, _ = a.Fire(ctx, "license.invalid", alerts.SeverityError, + "License "+status, + "License-Server liefert valid=false. Reason: "+res.Reason) + } + } } -func runRenewer(ctx context.Context, r *certrenewer.Service) { +func runRenewer(ctx context.Context, r *certrenewer.Service, a *alerts.Service, d *dedupe) { res, err := r.Run(ctx) if err != nil { slog.Error("scheduler: renewer run failed", "error", err) + if a != nil && d != nil && d.shouldFire("cert.renewer.run_failed") { + _, _ = a.Fire(ctx, "cert.renewer.run_failed", alerts.SeverityError, + "ACME-Renewer-Lauf fehlgeschlagen", + "Certrenewer-Cycle abgebrochen: "+err.Error()) + } return } slog.Info("scheduler: renewer pass complete", "checked", res.Checked, "renewed", res.Renewed, "failed", res.Failed, "skipped", res.Skipped) + if a != nil && res.Failed > 0 && d != nil && d.shouldFire("cert.renew_failed") { + _, _ = a.Fire(ctx, "cert.renew_failed", alerts.SeverityError, + "Cert-Renewal teilweise fehlgeschlagen", + fmt.Sprintf("Renewer-Cycle: %d checked, %d renewed, %d failed, %d skipped", + res.Checked, res.Renewed, res.Failed, res.Skipped)) + } } diff --git a/internal/database/migrations/0021_alerts.sql b/internal/database/migrations/0021_alerts.sql new file mode 100644 index 0000000..c03bdad --- /dev/null +++ b/internal/database/migrations/0021_alerts.sql @@ -0,0 +1,49 @@ +-- +goose Up +-- +goose StatementBegin + +-- Health-Alarme: Webhook + Email-Notification bei kritischen Events. +-- Triggers leben im edgeguard-scheduler (cert-expiry, backup-fail, +-- license-verify-fail). Pro Event wird optional in jeden aktiven +-- Channel gepushed; das Ergebnis (success/error) landet in alert_events +-- damit der Operator-UI History+Failure-Reason zeigen kann. + +CREATE TABLE IF NOT EXISTS alert_channels ( + id BIGSERIAL PRIMARY KEY, + name TEXT NOT NULL, + kind TEXT NOT NULL, + target TEXT NOT NULL, -- webhook-URL oder "to"-Email + settings JSONB NOT NULL DEFAULT '{}'::jsonb, + active BOOLEAN NOT NULL DEFAULT TRUE, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT alert_channels_kind_check + CHECK (kind IN ('webhook', 'email')) +); + +CREATE INDEX IF NOT EXISTS idx_alert_channels_active + ON alert_channels (active) WHERE active; + +CREATE TABLE IF NOT EXISTS alert_events ( + id BIGSERIAL PRIMARY KEY, + kind TEXT NOT NULL, -- cert.expiring | backup.failed | license.invalid | test + severity TEXT NOT NULL, -- info | warning | error | critical + subject TEXT NOT NULL, + message TEXT NOT NULL, + sent_to JSONB NOT NULL DEFAULT '[]'::jsonb, -- [{channel_id, ok, error}, ...] + fired_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT alert_events_severity_check + CHECK (severity IN ('info', 'warning', 'error', 'critical')) +); + +CREATE INDEX IF NOT EXISTS idx_alert_events_fired_at + ON alert_events (fired_at DESC); +CREATE INDEX IF NOT EXISTS idx_alert_events_kind + ON alert_events (kind); + +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +DROP TABLE IF EXISTS alert_events; +DROP TABLE IF EXISTS alert_channels; +-- +goose StatementEnd diff --git a/internal/handlers/alerts.go b/internal/handlers/alerts.go new file mode 100644 index 0000000..e483332 --- /dev/null +++ b/internal/handlers/alerts.go @@ -0,0 +1,134 @@ +package handlers + +import ( + "errors" + "strconv" + + "github.com/gin-gonic/gin" + + "git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/alerts" + "git.netcell-it.de/projekte/edgeguard-native/internal/services/audit" +) + +// AlertsHandler exposes: +// +// GET /api/v1/alerts/channels +// POST /api/v1/alerts/channels +// PUT /api/v1/alerts/channels/:id +// DELETE /api/v1/alerts/channels/:id +// POST /api/v1/alerts/test — Test-Event in alle aktiven Channels +// GET /api/v1/alerts/events?limit=N — History +type AlertsHandler struct { + Service *alerts.Service + Audit *audit.Repo + NodeID string +} + +func NewAlertsHandler(s *alerts.Service, a *audit.Repo, nodeID string) *AlertsHandler { + return &AlertsHandler{Service: s, Audit: a, NodeID: nodeID} +} + +func (h *AlertsHandler) Register(rg *gin.RouterGroup) { + g := rg.Group("/alerts") + g.GET("/channels", h.ListChannels) + g.POST("/channels", h.CreateChannel) + g.PUT("/channels/:id", h.UpdateChannel) + g.DELETE("/channels/:id", h.DeleteChannel) + g.POST("/test", h.TestFire) + g.GET("/events", h.ListEvents) +} + +func (h *AlertsHandler) ListChannels(c *gin.Context) { + out, err := h.Service.ListChannels(c.Request.Context()) + if err != nil { + response.Internal(c, err) + return + } + response.OK(c, gin.H{"channels": out}) +} + +func (h *AlertsHandler) CreateChannel(c *gin.Context) { + var req alerts.Channel + if err := c.ShouldBindJSON(&req); err != nil { + response.BadRequest(c, err) + return + } + out, err := h.Service.CreateChannel(c.Request.Context(), req) + if err != nil { + response.Internal(c, err) + return + } + _ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.channel.create", + out.Name, out, h.NodeID) + response.Created(c, out) +} + +func (h *AlertsHandler) UpdateChannel(c *gin.Context) { + id, ok := parseID(c) + if !ok { + return + } + var req alerts.Channel + if err := c.ShouldBindJSON(&req); err != nil { + response.BadRequest(c, err) + return + } + out, err := h.Service.UpdateChannel(c.Request.Context(), id, req) + if err != nil { + if errors.Is(err, alerts.ErrNotFound) { + response.NotFound(c, err) + return + } + response.Internal(c, err) + return + } + _ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.channel.update", + out.Name, out, h.NodeID) + response.OK(c, out) +} + +func (h *AlertsHandler) DeleteChannel(c *gin.Context) { + id, ok := parseID(c) + if !ok { + return + } + if err := h.Service.DeleteChannel(c.Request.Context(), id); err != nil { + if errors.Is(err, alerts.ErrNotFound) { + response.NotFound(c, err) + return + } + response.Internal(c, err) + return + } + _ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.channel.delete", + strconv.FormatInt(id, 10), gin.H{"id": id}, h.NodeID) + response.NoContent(c) +} + +func (h *AlertsHandler) TestFire(c *gin.Context) { + ev, err := h.Service.Fire(c.Request.Context(), "test", alerts.SeverityInfo, + "EdgeGuard-Test-Alert", + "Dies ist ein Test-Event. Wenn du das siehst, funktionieren deine Alert-Channels.") + if err != nil { + response.Internal(c, err) + return + } + _ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.test", "test", ev, h.NodeID) + response.OK(c, ev) +} + +func (h *AlertsHandler) ListEvents(c *gin.Context) { + limit := 100 + if v := c.Query("limit"); v != "" { + if n, err := strconv.Atoi(v); err == nil { + limit = n + } + } + out, err := h.Service.ListEvents(c.Request.Context(), limit) + if err != nil { + response.Internal(c, err) + return + } + response.OK(c, gin.H{"events": out}) +} diff --git a/internal/services/alerts/alerts.go b/internal/services/alerts/alerts.go new file mode 100644 index 0000000..198f00f --- /dev/null +++ b/internal/services/alerts/alerts.go @@ -0,0 +1,304 @@ +// Package alerts liefert Health-Notifications via Webhook + SMTP. +// Triggers (cert-expiry, backup-fail, license-invalid) leben im +// edgeguard-scheduler; Operator-Triggers (Test-Event) im API-Handler. +package alerts + +import ( + "bytes" + "context" + "crypto/tls" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/smtp" + "strconv" + "strings" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +var ErrNotFound = errors.New("alert channel not found") + +// Severity ist die UI-Kategorie. Constraint in der DB. +type Severity string + +const ( + SeverityInfo Severity = "info" + SeverityWarning Severity = "warning" + SeverityError Severity = "error" + SeverityCritical Severity = "critical" +) + +// Kind klassifiziert die Trigger-Source (cert.expiring, +// backup.failed, license.invalid, test). Frei textbar — UI rendert +// es als Tag. + +// Channel ist eine Notification-Senke. +type Channel struct { + ID int64 `json:"id"` + Name string `json:"name"` + Kind string `json:"kind"` // webhook | email + Target string `json:"target"` + Settings json.RawMessage `json:"settings,omitempty"` + Active bool `json:"active"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +// EmailSettings sind die SMTP-Konfig-Felder die in settings.JSONB +// für kind=email liegen. +type EmailSettings struct { + SMTPHost string `json:"smtp_host"` + SMTPPort int `json:"smtp_port"` + Username string `json:"username,omitempty"` + Password string `json:"password,omitempty"` + From string `json:"from"` + UseTLS bool `json:"use_tls"` +} + +// Event ist eine Row in alert_events. +type Event struct { + ID int64 `json:"id"` + Kind string `json:"kind"` + Severity Severity `json:"severity"` + Subject string `json:"subject"` + Message string `json:"message"` + SentTo json.RawMessage `json:"sent_to"` + FiredAt time.Time `json:"fired_at"` +} + +// SendResult pro Channel — landet als JSON-Array in sent_to. +type SendResult struct { + ChannelID int64 `json:"channel_id"` + ChannelName string `json:"channel_name"` + OK bool `json:"ok"` + Error string `json:"error,omitempty"` +} + +type Service struct { + Pool *pgxpool.Pool + HTTPClient *http.Client +} + +func New(pool *pgxpool.Pool) *Service { + return &Service{ + Pool: pool, + HTTPClient: &http.Client{ + Timeout: 10 * time.Second, + Transport: &http.Transport{ + // Webhooks gehen oft an interne Receivers — wir + // erlauben self-signed TLS auf der Webhook-Seite + // (Slack/Discord/Teams sind ohnehin valid signed). + TLSClientConfig: &tls.Config{InsecureSkipVerify: false}, + }, + }, + } +} + +// ListChannels gibt alle Channels zurück, newest-first. +func (s *Service) ListChannels(ctx context.Context) ([]Channel, error) { + rows, err := s.Pool.Query(ctx, ` +SELECT id, name, kind, target, settings, active, created_at, updated_at +FROM alert_channels ORDER BY id ASC`) + if err != nil { + return nil, err + } + defer rows.Close() + out := []Channel{} + for rows.Next() { + var c Channel + if err := rows.Scan(&c.ID, &c.Name, &c.Kind, &c.Target, + &c.Settings, &c.Active, &c.CreatedAt, &c.UpdatedAt); err != nil { + return nil, err + } + out = append(out, c) + } + return out, rows.Err() +} + +func (s *Service) CreateChannel(ctx context.Context, c Channel) (*Channel, error) { + if c.Settings == nil || len(c.Settings) == 0 { + c.Settings = json.RawMessage(`{}`) + } + row := s.Pool.QueryRow(ctx, ` +INSERT INTO alert_channels (name, kind, target, settings, active) +VALUES ($1, $2, $3, $4, $5) +RETURNING id, name, kind, target, settings, active, created_at, updated_at`, + c.Name, c.Kind, c.Target, c.Settings, c.Active) + var out Channel + if err := row.Scan(&out.ID, &out.Name, &out.Kind, &out.Target, + &out.Settings, &out.Active, &out.CreatedAt, &out.UpdatedAt); err != nil { + return nil, err + } + return &out, nil +} + +func (s *Service) UpdateChannel(ctx context.Context, id int64, c Channel) (*Channel, error) { + if c.Settings == nil || len(c.Settings) == 0 { + c.Settings = json.RawMessage(`{}`) + } + row := s.Pool.QueryRow(ctx, ` +UPDATE alert_channels SET + name = $1, kind = $2, target = $3, settings = $4, active = $5, + updated_at = NOW() +WHERE id = $6 +RETURNING id, name, kind, target, settings, active, created_at, updated_at`, + c.Name, c.Kind, c.Target, c.Settings, c.Active, id) + var out Channel + if err := row.Scan(&out.ID, &out.Name, &out.Kind, &out.Target, + &out.Settings, &out.Active, &out.CreatedAt, &out.UpdatedAt); err != nil { + if errors.Is(err, pgx.ErrNoRows) { + return nil, ErrNotFound + } + return nil, err + } + return &out, nil +} + +func (s *Service) DeleteChannel(ctx context.Context, id int64) error { + tag, err := s.Pool.Exec(ctx, `DELETE FROM alert_channels WHERE id = $1`, id) + if err != nil { + return err + } + if tag.RowsAffected() == 0 { + return ErrNotFound + } + return nil +} + +// ListEvents liefert die letzten N Events newest-first. +func (s *Service) ListEvents(ctx context.Context, limit int) ([]Event, error) { + if limit <= 0 || limit > 500 { + limit = 100 + } + rows, err := s.Pool.Query(ctx, ` +SELECT id, kind, severity, subject, message, sent_to, fired_at +FROM alert_events ORDER BY fired_at DESC, id DESC LIMIT $1`, limit) + if err != nil { + return nil, err + } + defer rows.Close() + out := []Event{} + for rows.Next() { + var e Event + if err := rows.Scan(&e.ID, &e.Kind, &e.Severity, &e.Subject, + &e.Message, &e.SentTo, &e.FiredAt); err != nil { + return nil, err + } + out = append(out, e) + } + return out, rows.Err() +} + +// Fire dispatch'ed einen Event an alle aktiven Channels und persistiert +// das Ergebnis. Non-fatal — Send-Failures werden im sent_to-JSON +// dokumentiert, der Event selbst landet in jedem Fall in der History. +func (s *Service) Fire(ctx context.Context, kind string, severity Severity, + subject, message string) (*Event, error) { + chans, err := s.ListChannels(ctx) + if err != nil { + return nil, err + } + results := []SendResult{} + for _, c := range chans { + if !c.Active { + continue + } + r := SendResult{ChannelID: c.ID, ChannelName: c.Name} + var sendErr error + switch c.Kind { + case "webhook": + sendErr = s.sendWebhook(ctx, c, kind, severity, subject, message) + case "email": + sendErr = s.sendEmail(c, severity, subject, message) + default: + sendErr = fmt.Errorf("unknown kind %q", c.Kind) + } + if sendErr != nil { + r.OK = false + r.Error = sendErr.Error() + } else { + r.OK = true + } + results = append(results, r) + } + sentJSON, _ := json.Marshal(results) + + var e Event + err = s.Pool.QueryRow(ctx, ` +INSERT INTO alert_events (kind, severity, subject, message, sent_to) +VALUES ($1, $2, $3, $4, $5) +RETURNING id, kind, severity, subject, message, sent_to, fired_at`, + kind, string(severity), subject, message, sentJSON). + Scan(&e.ID, &e.Kind, &e.Severity, &e.Subject, &e.Message, &e.SentTo, &e.FiredAt) + if err != nil { + return nil, err + } + return &e, nil +} + +// sendWebhook POSTet ein JSON-Payload mit kind+severity+subject+message +// + ISO-timestamp. Slack/Discord/Teams akzeptieren das (Discord +// braucht "content"-Feld; wir liefern beides damit der Operator +// keinen Adapter braucht). +func (s *Service) sendWebhook(ctx context.Context, c Channel, kind string, + sev Severity, subject, message string) error { + payload := map[string]any{ + "kind": kind, + "severity": string(sev), + "subject": subject, + "message": message, + "content": fmt.Sprintf("[%s] %s: %s\n%s", + strings.ToUpper(string(sev)), kind, subject, message), + "text": fmt.Sprintf("*[%s]* %s — %s\n%s", + strings.ToUpper(string(sev)), kind, subject, message), + "fired_at": time.Now().UTC().Format(time.RFC3339), + } + body, _ := json.Marshal(payload) + req, err := http.NewRequestWithContext(ctx, "POST", c.Target, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", "edgeguard-alerts/1.0") + resp, err := s.HTTPClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + b, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) + return fmt.Errorf("webhook %d: %s", resp.StatusCode, strings.TrimSpace(string(b))) + } + return nil +} + +// sendEmail nutzt net/smtp + STARTTLS optional. Settings-Felder +// (host/port/user/pass/from/use_tls) liegen in c.Settings. +func (s *Service) sendEmail(c Channel, sev Severity, subject, message string) error { + var es EmailSettings + if len(c.Settings) > 0 { + if err := json.Unmarshal(c.Settings, &es); err != nil { + return fmt.Errorf("parse settings: %w", err) + } + } + if es.SMTPHost == "" || es.SMTPPort == 0 || es.From == "" { + return errors.New("email settings incomplete (smtp_host/smtp_port/from required)") + } + addr := es.SMTPHost + ":" + strconv.Itoa(es.SMTPPort) + msg := []byte("From: " + es.From + "\r\n" + + "To: " + c.Target + "\r\n" + + "Subject: [" + strings.ToUpper(string(sev)) + "] " + subject + "\r\n" + + "Content-Type: text/plain; charset=utf-8\r\n" + + "\r\n" + + message + "\r\n") + var auth smtp.Auth + if es.Username != "" { + auth = smtp.PlainAuth("", es.Username, es.Password, es.SMTPHost) + } + return smtp.SendMail(addr, auth, es.From, []string{c.Target}, msg) +} diff --git a/management-ui/src/App.tsx b/management-ui/src/App.tsx index e1aa116..0125467 100644 --- a/management-ui/src/App.tsx +++ b/management-ui/src/App.tsx @@ -29,6 +29,7 @@ const FirewallLivePage = lazy(() => import('./pages/FirewallLive')) const LogsPage = lazy(() => import('./pages/Logs')) const BackupsPage = lazy(() => import('./pages/Backups')) const DiagnosticsPage = lazy(() => import('./pages/Diagnostics')) +const AlertsPage = lazy(() => import('./pages/Alerts')) const LicensePage = lazy(() => import('./pages/License')) const SettingsPage = lazy(() => import('./pages/Settings')) @@ -117,6 +118,7 @@ export default function App() { } /> } /> } /> + } /> } /> } /> diff --git a/management-ui/src/components/Layout/AppLayout.tsx b/management-ui/src/components/Layout/AppLayout.tsx index 5282fdd..79326d6 100644 --- a/management-ui/src/components/Layout/AppLayout.tsx +++ b/management-ui/src/components/Layout/AppLayout.tsx @@ -22,6 +22,7 @@ const PAGE_TITLES: Record = { '/logs': 'nav.logs', '/backups': 'nav.backups', '/diagnostics': 'nav.diagnostics', + '/alerts': 'nav.alerts', '/license': 'nav.license', '/settings': 'nav.settings', } diff --git a/management-ui/src/components/Layout/Sidebar.tsx b/management-ui/src/components/Layout/Sidebar.tsx index 6a20b7a..c7df56d 100644 --- a/management-ui/src/components/Layout/Sidebar.tsx +++ b/management-ui/src/components/Layout/Sidebar.tsx @@ -2,6 +2,7 @@ import { Link, useLocation } from 'react-router-dom' import type { ReactNode } from 'react' import { ApartmentOutlined, + BellOutlined, ClockCircleOutlined, CloudServerOutlined, ClusterOutlined, @@ -76,6 +77,7 @@ const NAV: NavSection[] = [ { path: '/cluster', labelKey: 'nav.cluster', icon: }, { path: '/logs', labelKey: 'nav.logs', icon: }, { path: '/diagnostics', labelKey: 'nav.diagnostics', icon: }, + { path: '/alerts', labelKey: 'nav.alerts', icon: }, { path: '/backups', labelKey: 'nav.backups', icon: }, { path: '/license', labelKey: 'nav.license', icon: }, { path: '/settings', labelKey: 'nav.settings', icon: }, @@ -83,7 +85,7 @@ const NAV: NavSection[] = [ }, ] -const VERSION = '1.0.73' +const VERSION = '1.0.74' // Sidebar-Pattern 1:1 aus netcell-webpanel (enconf) übernommen: // -