feat(alerts): Health-Alarme via Webhook + Email-SMTP

Sidebar → System → Alarme.

Migration 0021: alert_channels (kind=webhook|email, target, settings,
active) + alert_events (kind, severity=info/warning/error/critical,
subject, message, sent_to JSONB).

internal/services/alerts/:
  - Fire(kind, severity, subject, message) — broadcastet an alle
    aktiven Channels + persistiert Event mit per-Channel-Result
    (ok/error) in sent_to.
  - Webhook-Sender: POST JSON {kind, severity, subject, message,
    content, text, fired_at}. Slack/Discord/Teams akzeptieren das
    out-of-the-box ohne Adapter (content + text-Felder gleichzeitig).
  - Email-Sender: net/smtp + STARTTLS optional. Settings (smtp_host,
    smtp_port, username/password, from, use_tls) liegen in
    channel.settings JSONB.

internal/handlers/alerts.go: CRUD + POST /alerts/test + GET
/alerts/events (history).

Scheduler-Trigger:
  - cert.expiring  — TLS-Cert <14 Tage Restzeit (12h-dedupe pro cert)
                     severity warning, <3 Tage → error
  - cert.renew_failed       — Renewer-Cycle hat fails
  - cert.renewer.run_failed — Renewer-Cycle abgebrochen
  - backup.failed  — Scheduled Backup error
  - license.invalid — License-Server liefert valid=false

In-process Dedupe (12h TTL, map[key]time.Time) verhindert dass
identische Alerts in Schleifen feuern.

UI (pages/Alerts): Tabs Channels (CRUD-Tabelle, Add-Modal mit
conditional-Email-Fields) + History (200 letzte Events mit
severity-Tag + per-Channel-Delivery-Status). Header-Button
„Test-Alert" feuert einen Test-Event in alle aktiven Channels.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Debian
2026-05-13 15:57:05 +02:00
parent 4a34629023
commit 81a8217493
13 changed files with 1012 additions and 14 deletions

View File

@@ -1 +1 @@
1.0.73
1.0.74

View File

@@ -30,6 +30,7 @@ import (
wgrender "git.netcell-it.de/projekte/edgeguard-native/internal/wireguard"
"git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/acme"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/alerts"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/audit"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/backends"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/backendservers"
@@ -52,7 +53,7 @@ import (
wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard"
)
var version = "1.0.73"
var version = "1.0.74"
func main() {
addr := os.Getenv("EDGEGUARD_API_ADDR")
@@ -205,6 +206,7 @@ func main() {
// Jobs laufen im edgeguard-scheduler.
handlers.NewBackupHandler(backup.New(pool), auditRepo, nodeID, version).Register(authed)
handlers.NewDiagnosticsHandler().Register(authed)
handlers.NewAlertsHandler(alerts.New(pool), auditRepo, nodeID).Register(authed)
handlers.NewTLSCertsHandler(tlsRepo, auditRepo, nodeID, acmeService).Register(authed)
// Firewall reload: nach jeder Mutation den Renderer neu fahren
// (writes ruleset.nft + sudo nft -f). Errors loggen, nicht failen.

View File

@@ -9,7 +9,7 @@ import (
"os"
)
var version = "1.0.73"
var version = "1.0.74"
const usage = `edgeguard-ctl — EdgeGuard CLI

View File

@@ -11,8 +11,10 @@ package main
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"strconv"
"time"
"github.com/jackc/pgx/v5/pgxpool"
@@ -21,6 +23,7 @@ import (
"git.netcell-it.de/projekte/edgeguard-native/internal/database"
"git.netcell-it.de/projekte/edgeguard-native/internal/license"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/acme"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/alerts"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/backup"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/certrenewer"
licsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/license"
@@ -28,7 +31,7 @@ import (
"git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts"
)
var version = "1.0.73"
var version = "1.0.74"
const (
// renewTickInterval — how often we re-evaluate expiring certs.
@@ -92,10 +95,13 @@ func main() {
slog.Info("scheduler: daily backup enabled", "tick", backupTickInterval,
"dir", backupSvc.BackupDir, "keep_n", backup.DefaultKeepN)
alertSvc := alerts.New(pool)
alertDedupe := newDedupe(12 * time.Hour)
if renewer != nil {
runRenewer(ctx, renewer)
runRenewer(ctx, renewer, alertSvc, alertDedupe)
}
runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID)
runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID, alertSvc, alertDedupe)
// Lokale Node-ID für config-hash-refresh. EnsureNodeID liefert
// dieselbe ID die die API hat (gleiches /var/lib/edgeguard/node-id).
@@ -118,18 +124,80 @@ func main() {
select {
case <-renewTick.C:
if renewer != nil {
runRenewer(ctx, renewer)
runRenewer(ctx, renewer, alertSvc, alertDedupe)
}
runCertExpiryCheck(ctx, tlsRepo, alertSvc, alertDedupe)
case <-licTick.C:
runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID)
runLicenseVerify(ctx, licClient, licKeyStore, licRepo, nodeID, alertSvc, alertDedupe)
case <-backupTick.C:
runBackup(ctx, backupSvc, version)
runBackup(ctx, backupSvc, version, alertSvc)
case <-hashTick.C:
runConfigHash(ctx, pool, localID)
}
}
}
// dedupe verhindert dass derselbe Alert-Key (z.B. "cert.expiring:utm-1.netcell-it.de")
// öfter als alle 12h gefeuert wird. In-memory — Scheduler-Restart
// resettet, was OK ist (Operator soll bei restart wieder einen kennen-
// lernen-Event sehen können).
type dedupe struct {
ttl time.Duration
last map[string]time.Time
}
func newDedupe(ttl time.Duration) *dedupe { return &dedupe{ttl: ttl, last: map[string]time.Time{}} }
func (d *dedupe) shouldFire(key string) bool {
now := time.Now()
if last, ok := d.last[key]; ok && now.Sub(last) < d.ttl {
return false
}
d.last[key] = now
return true
}
// runCertExpiryCheck prüft tls_certs auf bevorstehende Expiry. Warning
// bei <14d Restzeit. Dedupe pro Cert-Name 12h damit der scheduler
// nicht alle 6h dieselbe Warnung feuert.
func runCertExpiryCheck(ctx context.Context, repo *tlscerts.Repo,
a *alerts.Service, d *dedupe) {
if repo == nil || a == nil {
return
}
certs, err := repo.List(ctx)
if err != nil {
slog.Warn("scheduler: cert-expiry list failed", "error", err)
return
}
threshold := 14 * 24 * time.Hour
now := time.Now()
for _, c := range certs {
if c.NotAfter == nil {
continue
}
remain := c.NotAfter.Sub(now)
if remain > threshold || remain < -90*24*time.Hour {
continue
}
key := "cert.expiring:" + c.Domain
if !d.shouldFire(key) {
continue
}
days := int(remain.Hours() / 24)
sev := alerts.SeverityWarning
if days < 3 {
sev = alerts.SeverityError
}
_, err := a.Fire(ctx, "cert.expiring", sev,
"TLS-Zertifikat läuft ab: "+c.Domain,
"Cert für "+c.Domain+" läuft in "+strconv.Itoa(days)+" Tagen ab ("+c.NotAfter.Format(time.RFC3339)+"). Renewer-Status: "+c.Status)
if err != nil {
slog.Warn("scheduler: alert fire failed", "error", err)
}
}
}
// runConfigHash berechnet den Hash und schreibt ihn in ha_nodes.
// Pool kann nil sein (scheduler-pool-fail beim boot) — dann no-op.
func runConfigHash(ctx context.Context, pool *pgxpoolPool, localID string) {
@@ -149,11 +217,16 @@ func runConfigHash(ctx context.Context, pool *pgxpoolPool, localID string) {
type pgxpoolPool = pgxpool.Pool
// runBackup führt einen scheduled Backup aus + prunet alte. Failures
// loggen wir nurder Tick läuft morgen wieder, kein Notfall.
func runBackup(ctx context.Context, svc *backup.Service, version string) {
// loggen wir + alarmierenverlorene Backups sind kritisch.
func runBackup(ctx context.Context, svc *backup.Service, version string, a *alerts.Service) {
res, err := svc.Run(ctx, backup.KindScheduled, version)
if err != nil {
slog.Warn("scheduler: backup failed", "error", err, "file", res.File)
if a != nil {
_, _ = a.Fire(ctx, "backup.failed", alerts.SeverityError,
"Backup fehlgeschlagen",
"Scheduled Backup konnte nicht erstellt werden: "+err.Error())
}
return
}
slog.Info("scheduler: backup done",
@@ -167,8 +240,9 @@ func runBackup(ctx context.Context, svc *backup.Service, version string) {
// runLicenseVerify performs a single re-verify pass. Empty key = no-op
// (box stays in trial), so this is safe to call on every tick.
// Bei valid:false-Antwort + Stand >7d alt → Warnung an Alerts.
func runLicenseVerify(ctx context.Context, c *license.Client, ks *license.KeyStore,
repo *licsvc.Repo, nodeID string) {
repo *licsvc.Repo, nodeID string, a *alerts.Service, d *dedupe) {
key := ks.Get()
if key == "" {
slog.Debug("scheduler: license verify skipped — no key")
@@ -194,15 +268,36 @@ func runLicenseVerify(ctx context.Context, c *license.Client, ks *license.KeySto
}
slog.Info("scheduler: license verified",
"status", status, "valid", res.Valid, "expires_at", res.ExpiresAt)
// Alarm bei ungültiger Lizenz (revoked, expired) — dedupe 12h damit
// der Operator nicht alle 24h denselben Alert bekommt.
if a != nil && d != nil && !res.Valid {
if d.shouldFire("license.invalid") {
_, _ = a.Fire(ctx, "license.invalid", alerts.SeverityError,
"License "+status,
"License-Server liefert valid=false. Reason: "+res.Reason)
}
}
}
func runRenewer(ctx context.Context, r *certrenewer.Service) {
func runRenewer(ctx context.Context, r *certrenewer.Service, a *alerts.Service, d *dedupe) {
res, err := r.Run(ctx)
if err != nil {
slog.Error("scheduler: renewer run failed", "error", err)
if a != nil && d != nil && d.shouldFire("cert.renewer.run_failed") {
_, _ = a.Fire(ctx, "cert.renewer.run_failed", alerts.SeverityError,
"ACME-Renewer-Lauf fehlgeschlagen",
"Certrenewer-Cycle abgebrochen: "+err.Error())
}
return
}
slog.Info("scheduler: renewer pass complete",
"checked", res.Checked, "renewed", res.Renewed,
"failed", res.Failed, "skipped", res.Skipped)
if a != nil && res.Failed > 0 && d != nil && d.shouldFire("cert.renew_failed") {
_, _ = a.Fire(ctx, "cert.renew_failed", alerts.SeverityError,
"Cert-Renewal teilweise fehlgeschlagen",
fmt.Sprintf("Renewer-Cycle: %d checked, %d renewed, %d failed, %d skipped",
res.Checked, res.Renewed, res.Failed, res.Skipped))
}
}

View File

@@ -0,0 +1,49 @@
-- +goose Up
-- +goose StatementBegin
-- Health-Alarme: Webhook + Email-Notification bei kritischen Events.
-- Triggers leben im edgeguard-scheduler (cert-expiry, backup-fail,
-- license-verify-fail). Pro Event wird optional in jeden aktiven
-- Channel gepushed; das Ergebnis (success/error) landet in alert_events
-- damit der Operator-UI History+Failure-Reason zeigen kann.
CREATE TABLE IF NOT EXISTS alert_channels (
id BIGSERIAL PRIMARY KEY,
name TEXT NOT NULL,
kind TEXT NOT NULL,
target TEXT NOT NULL, -- webhook-URL oder "to"-Email
settings JSONB NOT NULL DEFAULT '{}'::jsonb,
active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT alert_channels_kind_check
CHECK (kind IN ('webhook', 'email'))
);
CREATE INDEX IF NOT EXISTS idx_alert_channels_active
ON alert_channels (active) WHERE active;
CREATE TABLE IF NOT EXISTS alert_events (
id BIGSERIAL PRIMARY KEY,
kind TEXT NOT NULL, -- cert.expiring | backup.failed | license.invalid | test
severity TEXT NOT NULL, -- info | warning | error | critical
subject TEXT NOT NULL,
message TEXT NOT NULL,
sent_to JSONB NOT NULL DEFAULT '[]'::jsonb, -- [{channel_id, ok, error}, ...]
fired_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT alert_events_severity_check
CHECK (severity IN ('info', 'warning', 'error', 'critical'))
);
CREATE INDEX IF NOT EXISTS idx_alert_events_fired_at
ON alert_events (fired_at DESC);
CREATE INDEX IF NOT EXISTS idx_alert_events_kind
ON alert_events (kind);
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
DROP TABLE IF EXISTS alert_events;
DROP TABLE IF EXISTS alert_channels;
-- +goose StatementEnd

134
internal/handlers/alerts.go Normal file
View File

@@ -0,0 +1,134 @@
package handlers
import (
"errors"
"strconv"
"github.com/gin-gonic/gin"
"git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/alerts"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/audit"
)
// AlertsHandler exposes:
//
// GET /api/v1/alerts/channels
// POST /api/v1/alerts/channels
// PUT /api/v1/alerts/channels/:id
// DELETE /api/v1/alerts/channels/:id
// POST /api/v1/alerts/test — Test-Event in alle aktiven Channels
// GET /api/v1/alerts/events?limit=N — History
type AlertsHandler struct {
Service *alerts.Service
Audit *audit.Repo
NodeID string
}
func NewAlertsHandler(s *alerts.Service, a *audit.Repo, nodeID string) *AlertsHandler {
return &AlertsHandler{Service: s, Audit: a, NodeID: nodeID}
}
func (h *AlertsHandler) Register(rg *gin.RouterGroup) {
g := rg.Group("/alerts")
g.GET("/channels", h.ListChannels)
g.POST("/channels", h.CreateChannel)
g.PUT("/channels/:id", h.UpdateChannel)
g.DELETE("/channels/:id", h.DeleteChannel)
g.POST("/test", h.TestFire)
g.GET("/events", h.ListEvents)
}
func (h *AlertsHandler) ListChannels(c *gin.Context) {
out, err := h.Service.ListChannels(c.Request.Context())
if err != nil {
response.Internal(c, err)
return
}
response.OK(c, gin.H{"channels": out})
}
func (h *AlertsHandler) CreateChannel(c *gin.Context) {
var req alerts.Channel
if err := c.ShouldBindJSON(&req); err != nil {
response.BadRequest(c, err)
return
}
out, err := h.Service.CreateChannel(c.Request.Context(), req)
if err != nil {
response.Internal(c, err)
return
}
_ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.channel.create",
out.Name, out, h.NodeID)
response.Created(c, out)
}
func (h *AlertsHandler) UpdateChannel(c *gin.Context) {
id, ok := parseID(c)
if !ok {
return
}
var req alerts.Channel
if err := c.ShouldBindJSON(&req); err != nil {
response.BadRequest(c, err)
return
}
out, err := h.Service.UpdateChannel(c.Request.Context(), id, req)
if err != nil {
if errors.Is(err, alerts.ErrNotFound) {
response.NotFound(c, err)
return
}
response.Internal(c, err)
return
}
_ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.channel.update",
out.Name, out, h.NodeID)
response.OK(c, out)
}
func (h *AlertsHandler) DeleteChannel(c *gin.Context) {
id, ok := parseID(c)
if !ok {
return
}
if err := h.Service.DeleteChannel(c.Request.Context(), id); err != nil {
if errors.Is(err, alerts.ErrNotFound) {
response.NotFound(c, err)
return
}
response.Internal(c, err)
return
}
_ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.channel.delete",
strconv.FormatInt(id, 10), gin.H{"id": id}, h.NodeID)
response.NoContent(c)
}
func (h *AlertsHandler) TestFire(c *gin.Context) {
ev, err := h.Service.Fire(c.Request.Context(), "test", alerts.SeverityInfo,
"EdgeGuard-Test-Alert",
"Dies ist ein Test-Event. Wenn du das siehst, funktionieren deine Alert-Channels.")
if err != nil {
response.Internal(c, err)
return
}
_ = h.Audit.Log(c.Request.Context(), actorOf(c), "alert.test", "test", ev, h.NodeID)
response.OK(c, ev)
}
func (h *AlertsHandler) ListEvents(c *gin.Context) {
limit := 100
if v := c.Query("limit"); v != "" {
if n, err := strconv.Atoi(v); err == nil {
limit = n
}
}
out, err := h.Service.ListEvents(c.Request.Context(), limit)
if err != nil {
response.Internal(c, err)
return
}
response.OK(c, gin.H{"events": out})
}

View File

@@ -0,0 +1,304 @@
// Package alerts liefert Health-Notifications via Webhook + SMTP.
// Triggers (cert-expiry, backup-fail, license-invalid) leben im
// edgeguard-scheduler; Operator-Triggers (Test-Event) im API-Handler.
package alerts
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/smtp"
"strconv"
"strings"
"time"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
)
var ErrNotFound = errors.New("alert channel not found")
// Severity ist die UI-Kategorie. Constraint in der DB.
type Severity string
const (
SeverityInfo Severity = "info"
SeverityWarning Severity = "warning"
SeverityError Severity = "error"
SeverityCritical Severity = "critical"
)
// Kind klassifiziert die Trigger-Source (cert.expiring,
// backup.failed, license.invalid, test). Frei textbar — UI rendert
// es als Tag.
// Channel ist eine Notification-Senke.
type Channel struct {
ID int64 `json:"id"`
Name string `json:"name"`
Kind string `json:"kind"` // webhook | email
Target string `json:"target"`
Settings json.RawMessage `json:"settings,omitempty"`
Active bool `json:"active"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// EmailSettings sind die SMTP-Konfig-Felder die in settings.JSONB
// für kind=email liegen.
type EmailSettings struct {
SMTPHost string `json:"smtp_host"`
SMTPPort int `json:"smtp_port"`
Username string `json:"username,omitempty"`
Password string `json:"password,omitempty"`
From string `json:"from"`
UseTLS bool `json:"use_tls"`
}
// Event ist eine Row in alert_events.
type Event struct {
ID int64 `json:"id"`
Kind string `json:"kind"`
Severity Severity `json:"severity"`
Subject string `json:"subject"`
Message string `json:"message"`
SentTo json.RawMessage `json:"sent_to"`
FiredAt time.Time `json:"fired_at"`
}
// SendResult pro Channel — landet als JSON-Array in sent_to.
type SendResult struct {
ChannelID int64 `json:"channel_id"`
ChannelName string `json:"channel_name"`
OK bool `json:"ok"`
Error string `json:"error,omitempty"`
}
type Service struct {
Pool *pgxpool.Pool
HTTPClient *http.Client
}
func New(pool *pgxpool.Pool) *Service {
return &Service{
Pool: pool,
HTTPClient: &http.Client{
Timeout: 10 * time.Second,
Transport: &http.Transport{
// Webhooks gehen oft an interne Receivers — wir
// erlauben self-signed TLS auf der Webhook-Seite
// (Slack/Discord/Teams sind ohnehin valid signed).
TLSClientConfig: &tls.Config{InsecureSkipVerify: false},
},
},
}
}
// ListChannels gibt alle Channels zurück, newest-first.
func (s *Service) ListChannels(ctx context.Context) ([]Channel, error) {
rows, err := s.Pool.Query(ctx, `
SELECT id, name, kind, target, settings, active, created_at, updated_at
FROM alert_channels ORDER BY id ASC`)
if err != nil {
return nil, err
}
defer rows.Close()
out := []Channel{}
for rows.Next() {
var c Channel
if err := rows.Scan(&c.ID, &c.Name, &c.Kind, &c.Target,
&c.Settings, &c.Active, &c.CreatedAt, &c.UpdatedAt); err != nil {
return nil, err
}
out = append(out, c)
}
return out, rows.Err()
}
func (s *Service) CreateChannel(ctx context.Context, c Channel) (*Channel, error) {
if c.Settings == nil || len(c.Settings) == 0 {
c.Settings = json.RawMessage(`{}`)
}
row := s.Pool.QueryRow(ctx, `
INSERT INTO alert_channels (name, kind, target, settings, active)
VALUES ($1, $2, $3, $4, $5)
RETURNING id, name, kind, target, settings, active, created_at, updated_at`,
c.Name, c.Kind, c.Target, c.Settings, c.Active)
var out Channel
if err := row.Scan(&out.ID, &out.Name, &out.Kind, &out.Target,
&out.Settings, &out.Active, &out.CreatedAt, &out.UpdatedAt); err != nil {
return nil, err
}
return &out, nil
}
func (s *Service) UpdateChannel(ctx context.Context, id int64, c Channel) (*Channel, error) {
if c.Settings == nil || len(c.Settings) == 0 {
c.Settings = json.RawMessage(`{}`)
}
row := s.Pool.QueryRow(ctx, `
UPDATE alert_channels SET
name = $1, kind = $2, target = $3, settings = $4, active = $5,
updated_at = NOW()
WHERE id = $6
RETURNING id, name, kind, target, settings, active, created_at, updated_at`,
c.Name, c.Kind, c.Target, c.Settings, c.Active, id)
var out Channel
if err := row.Scan(&out.ID, &out.Name, &out.Kind, &out.Target,
&out.Settings, &out.Active, &out.CreatedAt, &out.UpdatedAt); err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return nil, ErrNotFound
}
return nil, err
}
return &out, nil
}
func (s *Service) DeleteChannel(ctx context.Context, id int64) error {
tag, err := s.Pool.Exec(ctx, `DELETE FROM alert_channels WHERE id = $1`, id)
if err != nil {
return err
}
if tag.RowsAffected() == 0 {
return ErrNotFound
}
return nil
}
// ListEvents liefert die letzten N Events newest-first.
func (s *Service) ListEvents(ctx context.Context, limit int) ([]Event, error) {
if limit <= 0 || limit > 500 {
limit = 100
}
rows, err := s.Pool.Query(ctx, `
SELECT id, kind, severity, subject, message, sent_to, fired_at
FROM alert_events ORDER BY fired_at DESC, id DESC LIMIT $1`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
out := []Event{}
for rows.Next() {
var e Event
if err := rows.Scan(&e.ID, &e.Kind, &e.Severity, &e.Subject,
&e.Message, &e.SentTo, &e.FiredAt); err != nil {
return nil, err
}
out = append(out, e)
}
return out, rows.Err()
}
// Fire dispatch'ed einen Event an alle aktiven Channels und persistiert
// das Ergebnis. Non-fatal — Send-Failures werden im sent_to-JSON
// dokumentiert, der Event selbst landet in jedem Fall in der History.
func (s *Service) Fire(ctx context.Context, kind string, severity Severity,
subject, message string) (*Event, error) {
chans, err := s.ListChannels(ctx)
if err != nil {
return nil, err
}
results := []SendResult{}
for _, c := range chans {
if !c.Active {
continue
}
r := SendResult{ChannelID: c.ID, ChannelName: c.Name}
var sendErr error
switch c.Kind {
case "webhook":
sendErr = s.sendWebhook(ctx, c, kind, severity, subject, message)
case "email":
sendErr = s.sendEmail(c, severity, subject, message)
default:
sendErr = fmt.Errorf("unknown kind %q", c.Kind)
}
if sendErr != nil {
r.OK = false
r.Error = sendErr.Error()
} else {
r.OK = true
}
results = append(results, r)
}
sentJSON, _ := json.Marshal(results)
var e Event
err = s.Pool.QueryRow(ctx, `
INSERT INTO alert_events (kind, severity, subject, message, sent_to)
VALUES ($1, $2, $3, $4, $5)
RETURNING id, kind, severity, subject, message, sent_to, fired_at`,
kind, string(severity), subject, message, sentJSON).
Scan(&e.ID, &e.Kind, &e.Severity, &e.Subject, &e.Message, &e.SentTo, &e.FiredAt)
if err != nil {
return nil, err
}
return &e, nil
}
// sendWebhook POSTet ein JSON-Payload mit kind+severity+subject+message
// + ISO-timestamp. Slack/Discord/Teams akzeptieren das (Discord
// braucht "content"-Feld; wir liefern beides damit der Operator
// keinen Adapter braucht).
func (s *Service) sendWebhook(ctx context.Context, c Channel, kind string,
sev Severity, subject, message string) error {
payload := map[string]any{
"kind": kind,
"severity": string(sev),
"subject": subject,
"message": message,
"content": fmt.Sprintf("[%s] %s: %s\n%s",
strings.ToUpper(string(sev)), kind, subject, message),
"text": fmt.Sprintf("*[%s]* %s — %s\n%s",
strings.ToUpper(string(sev)), kind, subject, message),
"fired_at": time.Now().UTC().Format(time.RFC3339),
}
body, _ := json.Marshal(payload)
req, err := http.NewRequestWithContext(ctx, "POST", c.Target, bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", "edgeguard-alerts/1.0")
resp, err := s.HTTPClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
b, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
return fmt.Errorf("webhook %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
}
return nil
}
// sendEmail nutzt net/smtp + STARTTLS optional. Settings-Felder
// (host/port/user/pass/from/use_tls) liegen in c.Settings.
func (s *Service) sendEmail(c Channel, sev Severity, subject, message string) error {
var es EmailSettings
if len(c.Settings) > 0 {
if err := json.Unmarshal(c.Settings, &es); err != nil {
return fmt.Errorf("parse settings: %w", err)
}
}
if es.SMTPHost == "" || es.SMTPPort == 0 || es.From == "" {
return errors.New("email settings incomplete (smtp_host/smtp_port/from required)")
}
addr := es.SMTPHost + ":" + strconv.Itoa(es.SMTPPort)
msg := []byte("From: " + es.From + "\r\n" +
"To: " + c.Target + "\r\n" +
"Subject: [" + strings.ToUpper(string(sev)) + "] " + subject + "\r\n" +
"Content-Type: text/plain; charset=utf-8\r\n" +
"\r\n" +
message + "\r\n")
var auth smtp.Auth
if es.Username != "" {
auth = smtp.PlainAuth("", es.Username, es.Password, es.SMTPHost)
}
return smtp.SendMail(addr, auth, es.From, []string{c.Target}, msg)
}

View File

@@ -29,6 +29,7 @@ const FirewallLivePage = lazy(() => import('./pages/FirewallLive'))
const LogsPage = lazy(() => import('./pages/Logs'))
const BackupsPage = lazy(() => import('./pages/Backups'))
const DiagnosticsPage = lazy(() => import('./pages/Diagnostics'))
const AlertsPage = lazy(() => import('./pages/Alerts'))
const LicensePage = lazy(() => import('./pages/License'))
const SettingsPage = lazy(() => import('./pages/Settings'))
@@ -117,6 +118,7 @@ export default function App() {
<Route path="/logs" element={<LogsPage />} />
<Route path="/backups" element={<BackupsPage />} />
<Route path="/diagnostics" element={<DiagnosticsPage />} />
<Route path="/alerts" element={<AlertsPage />} />
<Route path="/license" element={<LicensePage />} />
<Route path="/settings" element={<SettingsPage />} />
</Route>

View File

@@ -22,6 +22,7 @@ const PAGE_TITLES: Record<string, string> = {
'/logs': 'nav.logs',
'/backups': 'nav.backups',
'/diagnostics': 'nav.diagnostics',
'/alerts': 'nav.alerts',
'/license': 'nav.license',
'/settings': 'nav.settings',
}

View File

@@ -2,6 +2,7 @@ import { Link, useLocation } from 'react-router-dom'
import type { ReactNode } from 'react'
import {
ApartmentOutlined,
BellOutlined,
ClockCircleOutlined,
CloudServerOutlined,
ClusterOutlined,
@@ -76,6 +77,7 @@ const NAV: NavSection[] = [
{ path: '/cluster', labelKey: 'nav.cluster', icon: <ApartmentOutlined /> },
{ path: '/logs', labelKey: 'nav.logs', icon: <FileSearchOutlined /> },
{ path: '/diagnostics', labelKey: 'nav.diagnostics', icon: <ToolOutlined /> },
{ path: '/alerts', labelKey: 'nav.alerts', icon: <BellOutlined /> },
{ path: '/backups', labelKey: 'nav.backups', icon: <DatabaseOutlined /> },
{ path: '/license', labelKey: 'nav.license', icon: <CrownOutlined /> },
{ path: '/settings', labelKey: 'nav.settings', icon: <SettingOutlined /> },
@@ -83,7 +85,7 @@ const NAV: NavSection[] = [
},
]
const VERSION = '1.0.73'
const VERSION = '1.0.74'
// Sidebar-Pattern 1:1 aus netcell-webpanel (enconf) übernommen:
// - <nav> als root, dunkler Gradient + Teal/Blue-Accent

View File

@@ -22,6 +22,7 @@
"logs": "Logs",
"backups": "Backups",
"diagnostics": "Diagnose",
"alerts": "Alarme",
"license": "Lizenz",
"settings": "Einstellungen",
"section": {
@@ -682,6 +683,34 @@
"src": "Quell-IP"
}
},
"alerts": {
"title": "Health-Alarme",
"intro": "Notification-Channels für kritische Events. Webhook (Slack/Discord/Teams/Generic-HTTP) oder Email (SMTP). Triggers: cert.expiring (<14 d), cert.renew_failed, backup.failed, license.invalid.",
"scopeTitle": "Was triggert Alarme?",
"scopeDesc": "cert.expiring — TLS-Zertifikat <14 Tage Restzeit (dedupe 12h). cert.renew_failed — ACME-Renewer hat Fails. backup.failed — Scheduled Backup konnte nicht erstellt werden. license.invalid — License-Server liefert valid=false. Mehr Triggers folgen (Backend-Down, Disk-Usage).",
"tabs": { "channels": "Channels", "events": "History" },
"add": "Channel hinzufügen",
"addTitle": "Notification-Channel anlegen",
"editTitle": "Channel bearbeiten",
"test": "Test-Alert",
"testDone": "Test gesendet — {{ok}}/{{total}} Channels erfolgreich",
"emptyChannels": "Keine Channels. Lege einen Webhook oder eine Email an.",
"emptyEvents": "Noch keine Alarme — Triggers haben noch keinen Event gefeuert.",
"noChannels": "kein Channel aktiv",
"confirmDelete": "Channel {{name}} wirklich löschen?",
"col": {
"name": "Name",
"kind": "Typ",
"target": "Ziel",
"targetWebhook": "Webhook-URL",
"targetEmail": "Empfänger-Email",
"active": "Aktiv",
"time": "Zeit",
"severity": "Severity",
"subject": "Betreff",
"delivered": "Gesendet"
}
},
"diag": {
"title": "Diagnose",
"intro": "Operator-Tools direkt aus dem UI: ping, traceroute, DNS, HTTP-Probe, TCP-Connect. Alle Calls laufen authentifiziert auf dieser Box (nicht im Browser).",

View File

@@ -22,6 +22,7 @@
"logs": "Logs",
"backups": "Backups",
"diagnostics": "Diagnostics",
"alerts": "Alerts",
"license": "License",
"settings": "Settings",
"section": {
@@ -682,6 +683,34 @@
"src": "Source IP"
}
},
"alerts": {
"title": "Health alerts",
"intro": "Notification channels for critical events. Webhook (Slack/Discord/Teams/generic-HTTP) or email (SMTP). Triggers: cert.expiring (<14 d), cert.renew_failed, backup.failed, license.invalid.",
"scopeTitle": "What triggers alerts?",
"scopeDesc": "cert.expiring — TLS cert <14 days remaining (12 h dedupe). cert.renew_failed — ACME renewer cycle had failures. backup.failed — scheduled backup couldn't run. license.invalid — License server returns valid=false. More triggers coming (backend-down, disk usage).",
"tabs": { "channels": "Channels", "events": "History" },
"add": "Add channel",
"addTitle": "Add notification channel",
"editTitle": "Edit channel",
"test": "Test alert",
"testDone": "Test sent — {{ok}}/{{total}} channels OK",
"emptyChannels": "No channels. Add a webhook or an email.",
"emptyEvents": "No alerts yet — triggers haven't fired any events.",
"noChannels": "no active channel",
"confirmDelete": "Really delete channel {{name}}?",
"col": {
"name": "Name",
"kind": "Kind",
"target": "Target",
"targetWebhook": "Webhook URL",
"targetEmail": "Recipient email",
"active": "Active",
"time": "Time",
"severity": "Severity",
"subject": "Subject",
"delivered": "Delivered"
}
},
"diag": {
"title": "Diagnostics",
"intro": "Operator tools straight from the UI: ping, traceroute, DNS, HTTP probe, TCP connect. All calls run authenticated on this box (not in the browser).",

View File

@@ -0,0 +1,351 @@
import { useState } from 'react'
import {
Alert, Button, Card, Form, Input, InputNumber, Modal, Popconfirm, Select, Space, Switch, Table, Tabs, Tag, Tooltip, Typography, message,
} from 'antd'
import type { ColumnsType } from 'antd/es/table'
import {
BellOutlined, ExperimentOutlined, PlusOutlined,
} from '@ant-design/icons'
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
import { useTranslation } from 'react-i18next'
import dayjs from 'dayjs'
import apiClient, { isEnvelope } from '../../api/client'
import PageHeader from '../../components/PageHeader'
const { Text } = Typography
interface Channel {
id: number
name: string
kind: 'webhook' | 'email'
target: string
settings: Record<string, unknown>
active: boolean
}
interface SendResult {
channel_id: number
channel_name: string
ok: boolean
error?: string
}
interface AlertEvent {
id: number
kind: string
severity: 'info' | 'warning' | 'error' | 'critical'
subject: string
message: string
sent_to: SendResult[]
fired_at: string
}
interface ChannelFormValues {
name: string
kind: 'webhook' | 'email'
target: string
active: boolean
// Email-fields (settings.*):
smtp_host?: string
smtp_port?: number
username?: string
password?: string
from?: string
use_tls?: boolean
}
function sevTag(s: AlertEvent['severity']) {
const map: Record<string, string> = {
info: 'blue', warning: 'orange', error: 'red', critical: 'magenta',
}
return <Tag color={map[s]}>{s.toUpperCase()}</Tag>
}
export default function AlertsPage() {
const { t } = useTranslation()
const qc = useQueryClient()
const channels = useQuery({
queryKey: ['alerts', 'channels'],
queryFn: async () => {
const r = await apiClient.get('/alerts/channels')
return isEnvelope(r.data) ? (r.data.data as { channels: Channel[] }).channels : []
},
})
const events = useQuery({
queryKey: ['alerts', 'events'],
queryFn: async () => {
const r = await apiClient.get('/alerts/events?limit=200')
return isEnvelope(r.data) ? (r.data.data as { events: AlertEvent[] }).events : []
},
refetchInterval: 15_000,
})
const [edit, setEdit] = useState<Channel | null>(null)
const [creating, setCreating] = useState(false)
const [form] = Form.useForm<ChannelFormValues>()
function buildPayload(v: ChannelFormValues): Channel {
const settings: Record<string, unknown> = {}
if (v.kind === 'email') {
settings.smtp_host = v.smtp_host
settings.smtp_port = v.smtp_port
settings.from = v.from
settings.use_tls = !!v.use_tls
if (v.username) settings.username = v.username
if (v.password) settings.password = v.password
}
return {
id: 0,
name: v.name,
kind: v.kind,
target: v.target,
settings,
active: v.active,
}
}
const create = useMutation({
mutationFn: async (v: ChannelFormValues) => {
await apiClient.post('/alerts/channels', buildPayload(v))
},
onSuccess: () => {
message.success(t('common.save'))
setCreating(false); form.resetFields()
qc.invalidateQueries({ queryKey: ['alerts', 'channels'] })
},
onError: (e: Error) => message.error(e.message),
})
const update = useMutation({
mutationFn: async ({ id, v }: { id: number; v: ChannelFormValues }) => {
await apiClient.put(`/alerts/channels/${id}`, buildPayload(v))
},
onSuccess: () => {
message.success(t('common.save'))
setEdit(null); form.resetFields()
qc.invalidateQueries({ queryKey: ['alerts', 'channels'] })
},
onError: (e: Error) => message.error(e.message),
})
const del = useMutation({
mutationFn: async (id: number) => { await apiClient.delete(`/alerts/channels/${id}`) },
onSuccess: () => { qc.invalidateQueries({ queryKey: ['alerts', 'channels'] }) },
onError: (e: Error) => message.error(e.message),
})
const testFire = useMutation({
mutationFn: async () => {
const r = await apiClient.post('/alerts/test')
return isEnvelope(r.data) ? (r.data.data as AlertEvent) : null
},
onSuccess: (e: AlertEvent | null) => {
const total = e?.sent_to.length ?? 0
const ok = e?.sent_to.filter((r) => r.ok).length ?? 0
message.success(t('alerts.testDone', { ok, total }))
qc.invalidateQueries({ queryKey: ['alerts', 'events'] })
},
onError: (e: Error) => message.error(e.message),
})
const chanColumns: ColumnsType<Channel> = [
{ title: t('alerts.col.name'), dataIndex: 'name' },
{
title: t('alerts.col.kind'), dataIndex: 'kind', width: 110,
render: (v: string) =>
<Tag color={v === 'webhook' ? 'blue' : 'purple'}>{v}</Tag>,
},
{
title: t('alerts.col.target'), dataIndex: 'target',
render: (v: string) => <Text style={{ fontFamily: 'monospace', fontSize: 12 }}>{v}</Text>,
},
{
title: t('alerts.col.active'), dataIndex: 'active', width: 80,
render: (v: boolean) =>
v ? <Tag color="green">an</Tag> : <Tag>aus</Tag>,
},
{
title: t('common.actions'), key: 'a', width: 180,
render: (_, r) => (
<Space size={4}>
<Button size="small" onClick={() => {
setEdit(r)
const settings = (r.settings ?? {}) as Record<string, unknown>
form.setFieldsValue({
name: r.name, kind: r.kind, target: r.target, active: r.active,
smtp_host: settings.smtp_host as string,
smtp_port: settings.smtp_port as number,
username: settings.username as string,
password: settings.password as string,
from: settings.from as string,
use_tls: settings.use_tls as boolean,
})
}}>{t('common.edit')}</Button>
<Popconfirm title={t('alerts.confirmDelete', { name: r.name })}
onConfirm={() => del.mutate(r.id)}>
<Button size="small" danger>{t('common.delete')}</Button>
</Popconfirm>
</Space>
),
},
]
const evColumns: ColumnsType<AlertEvent> = [
{
title: t('alerts.col.time'), dataIndex: 'fired_at', width: 160,
render: (v: string) =>
<Text style={{ fontFamily: 'monospace', fontSize: 11 }}>
{dayjs(v).format('YYYY-MM-DD HH:mm:ss')}
</Text>,
},
{
title: t('alerts.col.severity'), dataIndex: 'severity', width: 110,
render: sevTag,
},
{ title: t('alerts.col.kind'), dataIndex: 'kind', width: 180,
render: (v: string) => <Tag>{v}</Tag> },
{
title: t('alerts.col.subject'), dataIndex: 'subject',
render: (s: string, r) => (
<div>
<div><Text strong>{s}</Text></div>
<Text type="secondary" style={{ fontSize: 12, whiteSpace: 'pre-wrap' }}>
{r.message}
</Text>
</div>
),
},
{
title: t('alerts.col.delivered'), dataIndex: 'sent_to', width: 200,
render: (rs: SendResult[]) => {
if (!rs || rs.length === 0)
return <Text type="secondary">{t('alerts.noChannels')}</Text>
return (
<Space size={4} wrap>
{rs.map((r) => (
<Tooltip key={r.channel_id} title={r.error || 'OK'}>
<Tag color={r.ok ? 'green' : 'red'}>{r.channel_name}</Tag>
</Tooltip>
))}
</Space>
)
},
},
]
const kind = Form.useWatch('kind', form)
return (
<div>
<PageHeader
icon={<BellOutlined />}
title={t('alerts.title')}
subtitle={t('alerts.intro')}
extra={
<Space>
<Button icon={<ExperimentOutlined />}
onClick={() => testFire.mutate()}
loading={testFire.isPending}>
{t('alerts.test')}
</Button>
</Space>
}
/>
<Alert
type="info"
showIcon
className="mb-16"
message={t('alerts.scopeTitle')}
description={t('alerts.scopeDesc')}
/>
<Tabs items={[
{
key: 'channels',
label: t('alerts.tabs.channels'),
children: (
<Card size="small" extra={
<Button type="primary" size="small" icon={<PlusOutlined />}
onClick={() => {
setCreating(true); form.resetFields()
form.setFieldsValue({ kind: 'webhook', active: true, smtp_port: 587, use_tls: true })
}}>
{t('alerts.add')}
</Button>
}>
<Table size="small" rowKey="id" loading={channels.isFetching}
dataSource={channels.data ?? []} columns={chanColumns}
pagination={false}
locale={{ emptyText: t('alerts.emptyChannels') }} />
</Card>
),
},
{
key: 'events',
label: t('alerts.tabs.events'),
children: (
<Card size="small">
<Table size="small" rowKey="id" loading={events.isFetching}
dataSource={events.data ?? []} columns={evColumns}
pagination={{ pageSize: 25 }}
locale={{ emptyText: t('alerts.emptyEvents') }} />
</Card>
),
},
]} />
<Modal
title={edit ? t('alerts.editTitle') : t('alerts.addTitle')}
open={edit !== null || creating}
onCancel={() => { setEdit(null); setCreating(false); form.resetFields() }}
onOk={() => form.submit()}
width={560}
confirmLoading={create.isPending || update.isPending}
>
<Form form={form} layout="vertical"
onFinish={(v) => edit ? update.mutate({ id: edit.id, v }) : create.mutate(v)}>
<Form.Item label={t('alerts.col.name')} name="name" rules={[{ required: true }]}>
<Input placeholder="Ops-Slack / Oncall-Email" />
</Form.Item>
<Form.Item label={t('alerts.col.kind')} name="kind" rules={[{ required: true }]}>
<Select options={[
{ value: 'webhook', label: 'Webhook (Slack/Discord/Teams/HTTP-Endpoint)' },
{ value: 'email', label: 'Email (SMTP)' },
]} />
</Form.Item>
<Form.Item label={kind === 'email' ? t('alerts.col.targetEmail') : t('alerts.col.targetWebhook')}
name="target" rules={[{ required: true }]}>
<Input placeholder={kind === 'email' ? 'oncall@example.com' : 'https://hooks.slack.com/services/...'} />
</Form.Item>
{kind === 'email' && (
<>
<Form.Item label="SMTP-Host" name="smtp_host" rules={[{ required: true }]}>
<Input placeholder="smtp.gmail.com" />
</Form.Item>
<Form.Item label="SMTP-Port" name="smtp_port" rules={[{ required: true }]}>
<InputNumber min={1} max={65535} style={{ width: '100%' }} />
</Form.Item>
<Form.Item label="From" name="from" rules={[{ required: true }]}>
<Input placeholder="edgeguard@your-domain.tld" />
</Form.Item>
<Form.Item label="Username" name="username">
<Input />
</Form.Item>
<Form.Item label="Password" name="password">
<Input.Password />
</Form.Item>
<Form.Item label="STARTTLS" name="use_tls" valuePropName="checked">
<Switch />
</Form.Item>
</>
)}
<Form.Item label={t('alerts.col.active')} name="active" valuePropName="checked">
<Switch />
</Form.Item>
</Form>
</Modal>
</div>
)
}