feat(dashboard): Operations-Dashboard mit Live-Health/Resources/Audit/HAProxy

Vorher: Dashboard war Counts + statische Cards. Jetzt operativer
Überblick — was läuft, was klemmt, was wurde gerade geändert.

Backend (4 neue Endpoints):
* GET /api/v1/system/services — systemctl is-active für 8 services
  (edgeguard-api, scheduler, haproxy, nftables, unbound, chrony,
  squid, postgresql). Inklusive ActiveEnterTimestamp.
* GET /api/v1/system/resources — /proc/loadavg, meminfo, statfs(/),
  nf_conntrack count+max, uptime.
* GET /api/v1/audit/recent?limit=N — letzte audit_log entries.
  audit-Repo bekommt ListRecent + Entry struct.
* GET /api/v1/haproxy/stats — parsed haproxy 'show stat' CSV vom
  /run/haproxy/admin.sock (postinst addet edgeguard zu haproxy-
  group für socket-read; haproxy-group exists nach apt install).

Frontend Dashboard rewrite:
* PageHeader + KPI-Strip (6 tiles, wie zuvor) — bleibt.
* Resources-Strip: Load (1/5/15) + Mem-Progress + Disk-Progress +
  Conntrack-Progress + Uptime.
* Service-Health-Grid: 8 Karten mit StatusDot + state.
* Recent-Activity-Card (audit-log): action-Tag + actor + subject +
  relative time.
* HAProxy-Backends-Card: backend/server + UP/DOWN-Tag + sessions +
  bytes_in/out + last_change_age.
* WireGuard live (handshake-age, traffic) — bleibt aus früherem
  Stand.
* Cluster + Firewall + SSL + Routing Cards — bleiben.
* Polling 10s für services/resources/haproxy, 15s für audit.

Plus: postinst usermod -a -G haproxy edgeguard für admin.sock
read-permission.

Version 1.0.43.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Debian
2026-05-11 07:46:39 +02:00
parent cc500139fc
commit c7b98f196e
14 changed files with 792 additions and 22 deletions

View File

@@ -0,0 +1,38 @@
package handlers
import (
"strconv"
"github.com/gin-gonic/gin"
"git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response"
"git.netcell-it.de/projekte/edgeguard-native/internal/services/audit"
)
type AuditHandler struct {
Repo *audit.Repo
}
func NewAuditHandler(repo *audit.Repo) *AuditHandler { return &AuditHandler{Repo: repo} }
func (h *AuditHandler) Register(rg *gin.RouterGroup) {
g := rg.Group("/audit")
g.GET("/recent", h.Recent)
}
// Recent returns the most recent audit_log entries — used by the
// dashboard's recent-activity card. ?limit=N (1100, default 10).
func (h *AuditHandler) Recent(c *gin.Context) {
limit := 10
if v := c.Query("limit"); v != "" {
if n, err := strconv.Atoi(v); err == nil {
limit = n
}
}
rows, err := h.Repo.ListRecent(c.Request.Context(), limit)
if err != nil {
response.Internal(c, err)
return
}
response.OK(c, gin.H{"entries": rows})
}

View File

@@ -0,0 +1,118 @@
package handlers
import (
"bufio"
"net"
"strconv"
"strings"
"time"
"github.com/gin-gonic/gin"
"git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response"
)
// HAProxyStatsHandler exposes /api/v1/haproxy/stats — a parsed view
// of the haproxy runtime API ('show stat'). Used by the dashboard's
// backend-live-health card. Reads from the unix socket at
// /run/haproxy/admin.sock; postinst adds the edgeguard user to the
// haproxy group so the socket (mode 0660 root:haproxy) is readable.
type HAProxyStatsHandler struct{}
func NewHAProxyStatsHandler() *HAProxyStatsHandler { return &HAProxyStatsHandler{} }
func (h *HAProxyStatsHandler) Register(rg *gin.RouterGroup) {
g := rg.Group("/haproxy")
g.GET("/stats", h.Stats)
}
const haproxyAdminSock = "/run/haproxy/admin.sock"
// Backend is one server inside one backend, parsed from haproxy's
// 'show stat' CSV. We only emit the fields the dashboard cares
// about — full CSV is ~80 columns of which 90% are noise here.
type backendStat struct {
Backend string `json:"backend"` // pxname
Server string `json:"server"` // svname
Status string `json:"status"` // UP|DOWN|MAINT|...
Sessions int64 `json:"sessions"` // current sessions (scur)
BIn int64 `json:"bytes_in"`
BOut int64 `json:"bytes_out"`
LastChg int64 `json:"last_change_sec"` // seconds since last status change
Health string `json:"health,omitempty"` // check_status (e.g. L7OK)
}
func (h *HAProxyStatsHandler) Stats(c *gin.Context) {
conn, err := net.DialTimeout("unix", haproxyAdminSock, 2*time.Second)
if err != nil {
// Socket nicht erreichbar (haproxy down oder no perm) →
// leere Liste statt 500 damit das Dashboard nicht rot wird.
response.OK(c, gin.H{"backends": []backendStat{}, "error": err.Error()})
return
}
defer conn.Close()
_ = conn.SetDeadline(time.Now().Add(3 * time.Second))
if _, err := conn.Write([]byte("show stat\n")); err != nil {
response.OK(c, gin.H{"backends": []backendStat{}, "error": err.Error()})
return
}
// CSV-format der haproxy stats: erste Zeile beginnt mit
// "# pxname,svname,..." — die nutzen wir um Spalten-Indizes
// zu finden, weil das Format zwischen Versionen wechseln kann.
colIdx := map[string]int{}
out := []backendStat{}
scanner := bufio.NewScanner(conn)
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
}
fields := strings.Split(line, ",")
if strings.HasPrefix(line, "# ") {
// Header — strip "# " prefix.
fields[0] = strings.TrimPrefix(fields[0], "# ")
for i, name := range fields {
colIdx[name] = i
}
continue
}
// Skip frontend rows + the "BACKEND" summary row — we want
// the per-server view ("L4OK", "L7OK", etc.).
svname := safeAt(fields, colIdx["svname"])
pxname := safeAt(fields, colIdx["pxname"])
if svname == "" || svname == "FRONTEND" || svname == "BACKEND" {
continue
}
// Skip our internal api_backend stats listener and frontends.
if pxname == "internal_stats" {
continue
}
st := backendStat{
Backend: pxname,
Server: svname,
Status: safeAt(fields, colIdx["status"]),
Sessions: parseInt64(safeAt(fields, colIdx["scur"])),
BIn: parseInt64(safeAt(fields, colIdx["bin"])),
BOut: parseInt64(safeAt(fields, colIdx["bout"])),
LastChg: parseInt64(safeAt(fields, colIdx["lastchg"])),
Health: safeAt(fields, colIdx["check_status"]),
}
out = append(out, st)
}
response.OK(c, gin.H{"backends": out})
}
func safeAt(fields []string, i int) string {
if i <= 0 || i >= len(fields) {
return ""
}
return fields[i]
}
func parseInt64(s string) int64 {
n, _ := strconv.ParseInt(s, 10, 64)
return n
}

View File

@@ -1,14 +1,17 @@
package handlers
import (
"bufio"
"log/slog"
"net"
"net/http"
"os"
"os/exec"
"regexp"
"strconv"
"strings"
"syscall"
"time"
"github.com/gin-gonic/gin"
@@ -32,6 +35,135 @@ func (h *SystemHandler) Register(rg *gin.RouterGroup) {
g.GET("/package-versions", h.PackageVersions)
g.POST("/upgrade", h.Upgrade)
g.GET("/interfaces", h.Interfaces)
g.GET("/services", h.Services)
g.GET("/resources", h.Resources)
}
// servicesToCheck is the curated list shown on the dashboard
// service-health-grid. Order matters (UI renders in this sequence).
// Each entry is a (label, systemd-unit) pair — label is what the
// UI shows, unit is what `systemctl is-active` queries.
var servicesToCheck = []struct{ Label, Unit string }{
{"edgeguard-api", "edgeguard-api"},
{"edgeguard-scheduler", "edgeguard-scheduler"},
{"haproxy", "haproxy"},
{"nftables", "nftables"},
{"unbound", "unbound"},
{"chrony", "chrony"},
{"squid", "squid"},
{"postgresql", "postgresql"},
}
type serviceStatus struct {
Label string `json:"label"`
Unit string `json:"unit"`
Active bool `json:"active"`
State string `json:"state"` // active|inactive|failed|activating|...
Since string `json:"since,omitempty"` // ActiveEnterTimestamp
}
// Services returns systemd-unit status for the curated stack.
func (h *SystemHandler) Services(c *gin.Context) {
out := make([]serviceStatus, 0, len(servicesToCheck))
for _, s := range servicesToCheck {
st := serviceStatus{Label: s.Label, Unit: s.Unit}
// systemctl show -p SubState,ActiveEnterTimestamp gives us
// state + since in one shot, faster than two calls.
raw, err := exec.CommandContext(c.Request.Context(),
"systemctl", "show", "-p", "ActiveState,ActiveEnterTimestamp",
s.Unit).Output()
if err == nil {
for _, line := range strings.Split(string(raw), "\n") {
if k, v, ok := strings.Cut(line, "="); ok {
switch k {
case "ActiveState":
st.State = v
st.Active = v == "active"
case "ActiveEnterTimestamp":
st.Since = v
}
}
}
}
out = append(out, st)
}
response.OK(c, gin.H{"services": out})
}
type resources struct {
LoadAvg1 float64 `json:"load_avg_1"`
LoadAvg5 float64 `json:"load_avg_5"`
LoadAvg15 float64 `json:"load_avg_15"`
MemTotalKB int64 `json:"mem_total_kb"`
MemAvailKB int64 `json:"mem_avail_kb"`
MemUsedPct float64 `json:"mem_used_pct"`
DiskTotalGB float64 `json:"disk_total_gb"`
DiskFreeGB float64 `json:"disk_free_gb"`
DiskUsedPct float64 `json:"disk_used_pct"`
ConntrackCnt int64 `json:"conntrack_count"`
ConntrackMax int64 `json:"conntrack_max"`
UptimeSec int64 `json:"uptime_sec"`
BootTimeUnix int64 `json:"boot_time_unix"`
}
// Resources reads /proc + statfs for the box-level metrics card.
// All best-effort — missing files just leave the field at zero.
func (h *SystemHandler) Resources(c *gin.Context) {
r := resources{}
if data, err := os.ReadFile("/proc/loadavg"); err == nil {
f := strings.Fields(string(data))
if len(f) >= 3 {
r.LoadAvg1, _ = strconv.ParseFloat(f[0], 64)
r.LoadAvg5, _ = strconv.ParseFloat(f[1], 64)
r.LoadAvg15, _ = strconv.ParseFloat(f[2], 64)
}
}
if data, err := os.ReadFile("/proc/meminfo"); err == nil {
s := bufio.NewScanner(strings.NewReader(string(data)))
for s.Scan() {
line := s.Text()
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
val, _ := strconv.ParseInt(fields[1], 10, 64)
switch strings.TrimSuffix(fields[0], ":") {
case "MemTotal":
r.MemTotalKB = val
case "MemAvailable":
r.MemAvailKB = val
}
}
if r.MemTotalKB > 0 {
r.MemUsedPct = float64(r.MemTotalKB-r.MemAvailKB) * 100 / float64(r.MemTotalKB)
}
}
var fs syscall.Statfs_t
if err := syscall.Statfs("/", &fs); err == nil {
total := float64(fs.Blocks) * float64(fs.Bsize)
free := float64(fs.Bavail) * float64(fs.Bsize)
r.DiskTotalGB = total / 1024 / 1024 / 1024
r.DiskFreeGB = free / 1024 / 1024 / 1024
if total > 0 {
r.DiskUsedPct = (total - free) * 100 / total
}
}
if data, err := os.ReadFile("/proc/sys/net/netfilter/nf_conntrack_count"); err == nil {
r.ConntrackCnt, _ = strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
}
if data, err := os.ReadFile("/proc/sys/net/netfilter/nf_conntrack_max"); err == nil {
r.ConntrackMax, _ = strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
}
if data, err := os.ReadFile("/proc/uptime"); err == nil {
f := strings.Fields(string(data))
if len(f) >= 1 {
if up, err := strconv.ParseFloat(f[0], 64); err == nil {
r.UptimeSec = int64(up)
r.BootTimeUnix = time.Now().Unix() - r.UptimeSec
}
}
}
response.OK(c, r)
}
func (h *SystemHandler) Health(c *gin.Context) {