diff --git a/VERSION b/VERSION index 3116087..9d8637c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.69 +1.0.70 diff --git a/cmd/edgeguard-api/main.go b/cmd/edgeguard-api/main.go index 69c548e..e9b241d 100644 --- a/cmd/edgeguard-api/main.go +++ b/cmd/edgeguard-api/main.go @@ -52,7 +52,7 @@ import ( wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard" ) -var version = "1.0.69" +var version = "1.0.70" func main() { addr := os.Getenv("EDGEGUARD_API_ADDR") @@ -131,7 +131,9 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) st, _ := setupStore.Load() if st != nil && st.Completed { - if _, err := cluster.EnsureSelfRegistered(ctx, clusterStore, st.FQDN, "primary"); err != nil { + // Auto-create /etc/edgeguard/node.conf falls fehlt. + _, _ = cluster.EnsureLocalConfig("") + if _, err := cluster.EnsureSelfRegistered(ctx, clusterStore, st.FQDN, "primary", version); err != nil { slog.Warn("self-register in ha_nodes failed", "error", err) } } diff --git a/cmd/edgeguard-ctl/main.go b/cmd/edgeguard-ctl/main.go index 20d02c3..072139a 100644 --- a/cmd/edgeguard-ctl/main.go +++ b/cmd/edgeguard-ctl/main.go @@ -9,7 +9,7 @@ import ( "os" ) -var version = "1.0.69" +var version = "1.0.70" const usage = `edgeguard-ctl — EdgeGuard CLI diff --git a/cmd/edgeguard-scheduler/main.go b/cmd/edgeguard-scheduler/main.go index 8640957..de2c364 100644 --- a/cmd/edgeguard-scheduler/main.go +++ b/cmd/edgeguard-scheduler/main.go @@ -25,7 +25,7 @@ import ( "git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts" ) -var version = "1.0.69" +var version = "1.0.70" const ( // renewTickInterval — how often we re-evaluate expiring certs. diff --git a/internal/cluster/local_config.go b/internal/cluster/local_config.go new file mode 100644 index 0000000..fdde8a9 --- /dev/null +++ b/internal/cluster/local_config.go @@ -0,0 +1,203 @@ +package cluster + +// /etc/edgeguard/node.conf — node-lokale, NIEMALS zwischen Cluster- +// Peers replizierte Konfiguration. Hält die Identitäts-Werte die jeden +// Node einzigartig machen: +// +// NODE_ID eindeutige UUID (autogeneriert in EnsureNodeID; hier +// gespiegelt für Operator-Sichtbarkeit) +// HOSTNAME `hostname -f` +// MGMT_IP Management-IP (Interface auf dem die API exposed wird; +// NIE VIP — wenn diese Box ein VIP übernimmt, bleibt die +// MGMT_IP unverändert auf der eigenen Static-IP) +// ROLE primary | secondary +// PEER_HOSTS comma-separated FQDNs der anderen Cluster-Peers +// (leer im Single-Node-Mode) +// +// Format ist INI-style ohne sections — eine `KEY=VALUE`-Zeile pro +// Eintrag. Kommentare mit `#`. Whitespace um `=` wird getrimmt. +// +// Postinst legt eine leere/auto-befüllte Datei an. Backup-System +// INCLUDIERT diese Datei (sie ist Teil des Node-State); im Cluster- +// Sync-Path bleibt sie aber explizit DRAUSSEN. + +import ( + "bufio" + "errors" + "fmt" + "net" + "os" + "path/filepath" + "strings" +) + +const DefaultLocalConfigPath = "/etc/edgeguard/node.conf" + +type LocalConfig struct { + NodeID string + Hostname string + MgmtIP string // Management-IP des Nodes (IPv4/v6, ohne CIDR) + Role string // "primary" | "secondary" + PeerHosts []string // andere Cluster-Peers (FQDNs) +} + +// LoadLocalConfig liest die Datei. Wenn sie nicht existiert: returns +// nil, nil — kein Fehler (single-node default). +func LoadLocalConfig(path string) (*LocalConfig, error) { + if path == "" { + path = DefaultLocalConfigPath + } + f, err := os.Open(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } + defer f.Close() + c := &LocalConfig{} + sc := bufio.NewScanner(f) + for sc.Scan() { + line := strings.TrimSpace(sc.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + eq := strings.IndexByte(line, '=') + if eq < 0 { + continue + } + key := strings.TrimSpace(line[:eq]) + val := strings.TrimSpace(line[eq+1:]) + val = strings.Trim(val, `"`) + switch strings.ToUpper(key) { + case "NODE_ID": + c.NodeID = val + case "HOSTNAME": + c.Hostname = val + case "MGMT_IP": + c.MgmtIP = val + case "ROLE": + c.Role = strings.ToLower(val) + case "PEER_HOSTS": + for _, h := range strings.Split(val, ",") { + h = strings.TrimSpace(h) + if h != "" { + c.PeerHosts = append(c.PeerHosts, h) + } + } + } + } + return c, sc.Err() +} + +// SaveLocalConfig schreibt die Datei atomic + 0644 root:root. +// Aufrufer ist normalerweise edgeguard-ctl unter Operator-Privilegien. +func SaveLocalConfig(path string, c *LocalConfig) error { + if path == "" { + path = DefaultLocalConfigPath + } + if c == nil { + return errors.New("nil config") + } + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return err + } + var b strings.Builder + b.WriteString("# Managed by edgeguard — node-local cluster identity.\n") + b.WriteString("# NIEMALS zwischen Cluster-Peers replizieren!\n") + b.WriteString("# Backup-System sichert diese Datei (Teil des Node-State).\n\n") + fmt.Fprintf(&b, "NODE_ID=%s\n", c.NodeID) + fmt.Fprintf(&b, "HOSTNAME=%s\n", c.Hostname) + fmt.Fprintf(&b, "MGMT_IP=%s\n", c.MgmtIP) + if c.Role == "" { + c.Role = "primary" + } + fmt.Fprintf(&b, "ROLE=%s\n", c.Role) + fmt.Fprintf(&b, "PEER_HOSTS=%s\n", strings.Join(c.PeerHosts, ",")) + + tmp := path + ".tmp" + if err := os.WriteFile(tmp, []byte(b.String()), 0o644); err != nil { + return err + } + return os.Rename(tmp, path) +} + +// EnsureLocalConfig liest die Datei; legt sie an wenn nicht vorhanden +// (autogeneriert NodeID via EnsureNodeID, Hostname via os.Hostname, +// MgmtIP via firstNonLoopbackIPv4). Schreibt nur dann zurück wenn +// vorher nichts da war ODER NodeID/Hostname noch leer waren. +func EnsureLocalConfig(path string) (*LocalConfig, error) { + if path == "" { + path = DefaultLocalConfigPath + } + c, err := LoadLocalConfig(path) + if err != nil { + return nil, err + } + if c == nil { + c = &LocalConfig{} + } + dirty := false + if c.NodeID == "" { + id, _ := EnsureNodeID("") + c.NodeID = id + dirty = true + } + if c.Hostname == "" { + h, _ := os.Hostname() + c.Hostname = h + dirty = true + } + if c.MgmtIP == "" { + c.MgmtIP = firstNonLoopbackIPv4() + dirty = true + } + if c.Role == "" { + c.Role = "primary" + dirty = true + } + if !dirty { + return c, nil + } + if err := SaveLocalConfig(path, c); err != nil { + // File-not-writable (z.B. dev box als nicht-root): nicht fatal. + // Caller bekommt trotzdem den in-memory config. + return c, nil + } + return c, nil +} + +// firstNonLoopbackIPv4 sucht eine plausible MGMT_IP für den +// Default-Case. Operator überschreibt das in /etc/edgeguard/node.conf +// wenn die Box mehrere Interfaces hat und wir das falsche gepickt +// haben. +func firstNonLoopbackIPv4() string { + ifaces, err := net.Interfaces() + if err != nil { + return "" + } + for _, iface := range ifaces { + if iface.Flags&net.FlagLoopback != 0 { + continue + } + if iface.Flags&net.FlagUp == 0 { + continue + } + addrs, err := iface.Addrs() + if err != nil { + continue + } + for _, addr := range addrs { + ipnet, ok := addr.(*net.IPNet) + if !ok { + continue + } + ip4 := ipnet.IP.To4() + if ip4 == nil { + continue + } + return ip4.String() + } + } + return "" +} diff --git a/internal/cluster/store.go b/internal/cluster/store.go index aca6451..6a883cb 100644 --- a/internal/cluster/store.go +++ b/internal/cluster/store.go @@ -26,7 +26,8 @@ type Store struct { func NewStore(pool *pgxpool.Pool) *Store { return &Store{Pool: pool} } const baseSelect = ` -SELECT id, name, fqdn, api_url, public_ip, internal_ip, role, +SELECT id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip, role, + version, config_hash, status, last_seen, joined_at, created_at, updated_at FROM ha_nodes ` @@ -63,45 +64,57 @@ func (s *Store) Get(ctx context.Context, id string) (*models.HANode, error) { // UpsertSelf writes the local node's row using the database-side // ON CONFLICT DO UPDATE so the call is safe to make on every boot. // last_seen is also bumped — handy for the heartbeat-by-restart -// pattern even before periodic heartbeats land. +// pattern even before periodic heartbeats land. Phase-3-Felder +// (mgmt_ip, version, config_hash, status) werden mit COALESCE +// erhalten falls der Caller sie nicht setzt. func (s *Store) UpsertSelf(ctx context.Context, n models.HANode) (*models.HANode, error) { now := time.Now().UTC() + if n.Status == "" { + n.Status = "online" + } row := s.Pool.QueryRow(ctx, ` -INSERT INTO ha_nodes (id, name, fqdn, api_url, public_ip, internal_ip, role, last_seen, joined_at) -VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) +INSERT INTO ha_nodes (id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip, + role, version, config_hash, status, last_seen, joined_at) +VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, fqdn = EXCLUDED.fqdn, api_url = EXCLUDED.api_url, - public_ip = COALESCE(EXCLUDED.public_ip, ha_nodes.public_ip), + public_ip = COALESCE(EXCLUDED.public_ip, ha_nodes.public_ip), internal_ip = COALESCE(EXCLUDED.internal_ip, ha_nodes.internal_ip), - role = EXCLUDED.role, - last_seen = EXCLUDED.last_seen, - updated_at = NOW() -RETURNING id, name, fqdn, api_url, public_ip, internal_ip, role, + mgmt_ip = COALESCE(EXCLUDED.mgmt_ip, ha_nodes.mgmt_ip), + role = EXCLUDED.role, + version = COALESCE(EXCLUDED.version, ha_nodes.version), + config_hash = COALESCE(EXCLUDED.config_hash, ha_nodes.config_hash), + status = EXCLUDED.status, + last_seen = EXCLUDED.last_seen, + updated_at = NOW() +RETURNING id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip, + role, version, config_hash, status, last_seen, joined_at, created_at, updated_at`, n.ID, n.Name, n.FQDN, n.APIURL, - n.PublicIP, n.InternalIP, n.Role, + n.PublicIP, n.InternalIP, n.MgmtIP, + n.Role, n.Version, n.ConfigHash, n.Status, now, now, ) return scanNode(row) } // EnsureSelfRegistered mints the node-id if needed, builds the row -// from setup.json + os.Hostname, and upserts it. Called on edgeguard- -// api boot AFTER the DB pool is reachable. +// from setup.json + os.Hostname + node.conf, and upserts it. Called +// on edgeguard-api boot AFTER the DB pool is reachable. // // fqdn = setup-store fqdn (preferred) or hostname. -// apiURL = "https://" (HAProxy-fronted; v1 doesn't yet know if -// the operator runs on a non-default port). -func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn string, role string) (*models.HANode, error) { +// apiURL = "https://". +// version = edgeguard-api-Version (für Drift-Banner). +// mgmtIP = aus /etc/edgeguard/node.conf wenn vorhanden, sonst Auto. +func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn, role, version string) (*models.HANode, error) { id, err := EnsureNodeID("") if err != nil { // Even when persistence failed (read-only /var/lib in dev), // EnsureNodeID returns the in-memory id alongside the error // — so we can still register, but the id will rotate on - // every boot. Surface as warning to the caller; here we - // just keep going so the dev box doesn't stay un-registered. + // every boot. _ = err } if id == "" { @@ -114,12 +127,26 @@ func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn string, role s if fqdn == "" { fqdn = host } + cfg, _ := LoadLocalConfig("") + var mgmtIP *string + if cfg != nil && cfg.MgmtIP != "" { + v := cfg.MgmtIP + mgmtIP = &v + } + var ver *string + if version != "" { + v := version + ver = &v + } n := models.HANode{ - ID: id, - Name: host, - FQDN: fqdn, - APIURL: "https://" + fqdn, - Role: role, + ID: id, + Name: host, + FQDN: fqdn, + APIURL: "https://" + fqdn, + MgmtIP: mgmtIP, + Role: role, + Version: ver, + Status: "online", } return store.UpsertSelf(ctx, n) } @@ -128,7 +155,8 @@ func scanNode(row interface{ Scan(...any) error }) (*models.HANode, error) { var n models.HANode if err := row.Scan( &n.ID, &n.Name, &n.FQDN, &n.APIURL, - &n.PublicIP, &n.InternalIP, &n.Role, + &n.PublicIP, &n.InternalIP, &n.MgmtIP, + &n.Role, &n.Version, &n.ConfigHash, &n.Status, &n.LastSeen, &n.JoinedAt, &n.CreatedAt, &n.UpdatedAt, ); err != nil { diff --git a/internal/database/migrations/0020_ha_nodes_ext.sql b/internal/database/migrations/0020_ha_nodes_ext.sql new file mode 100644 index 0000000..75980e5 --- /dev/null +++ b/internal/database/migrations/0020_ha_nodes_ext.sql @@ -0,0 +1,39 @@ +-- +goose Up +-- +goose StatementBegin + +-- ha_nodes-Erweiterung für Cluster-Phase-3-Foundation. Diese Spalten +-- machen die Cluster-UI aussagekräftig auch im Single-Node-Modus und +-- bereiten Multi-Node-Drift-Detection vor (analog mail-gateway). +-- +-- version: edgeguard-api-Version des Peers (aus /healthz). +-- config_hash: deterministischer Hash über replizierbare DB-Tabellen. +-- Drift-Banner triggert wenn nodes verschiedene Werte haben. +-- mgmt_ip: Management-IP (nicht VIP) — separat von public_ip + api_url. +-- status: online | offline | joining | leaving | unknown. + +ALTER TABLE ha_nodes + ADD COLUMN IF NOT EXISTS version TEXT, + ADD COLUMN IF NOT EXISTS config_hash TEXT, + ADD COLUMN IF NOT EXISTS mgmt_ip INET, + ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'unknown'; + +ALTER TABLE ha_nodes + DROP CONSTRAINT IF EXISTS ha_nodes_status_check; +ALTER TABLE ha_nodes + ADD CONSTRAINT ha_nodes_status_check + CHECK (status IN ('online', 'offline', 'joining', 'leaving', 'unknown')); + +CREATE INDEX IF NOT EXISTS idx_ha_nodes_status ON ha_nodes (status); + +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +ALTER TABLE ha_nodes + DROP CONSTRAINT IF EXISTS ha_nodes_status_check; +ALTER TABLE ha_nodes + DROP COLUMN IF EXISTS status, + DROP COLUMN IF EXISTS mgmt_ip, + DROP COLUMN IF EXISTS config_hash, + DROP COLUMN IF EXISTS version; +-- +goose StatementEnd diff --git a/internal/handlers/cluster.go b/internal/handlers/cluster.go index 67b993c..30b50ee 100644 --- a/internal/handlers/cluster.go +++ b/internal/handlers/cluster.go @@ -1,18 +1,21 @@ package handlers import ( + "time" + "github.com/gin-gonic/gin" "git.netcell-it.de/projekte/edgeguard-native/internal/cluster" "git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response" + "git.netcell-it.de/projekte/edgeguard-native/internal/models" ) -// ClusterHandler exposes cluster-state endpoints. v1 is read-only: -// the UI shows the list of registered nodes but cluster-join + write -// operations land in Phase 3.1. +// ClusterHandler exposes cluster-state endpoints. v1 ist read-only; +// /status liefert eine strukturierte UI-Sicht (local + peers + health), +// /nodes bleibt als simpler list-endpoint für Tools/Scripts. type ClusterHandler struct { - Store *cluster.Store - LocalID string + Store *cluster.Store + LocalID string } func NewClusterHandler(store *cluster.Store, localID string) *ClusterHandler { @@ -22,6 +25,7 @@ func NewClusterHandler(store *cluster.Store, localID string) *ClusterHandler { func (h *ClusterHandler) Register(rg *gin.RouterGroup) { g := rg.Group("/cluster") g.GET("/nodes", h.ListNodes) + g.GET("/status", h.Status) } func (h *ClusterHandler) ListNodes(c *gin.Context) { @@ -30,8 +34,71 @@ func (h *ClusterHandler) ListNodes(c *gin.Context) { response.Internal(c, err) return } - response.OK(c, gin.H{ - "nodes": nodes, - "local_id": h.LocalID, - }) + response.OK(c, gin.H{"nodes": nodes, "local_id": h.LocalID}) +} + +// ClusterStatus ist die UI-zentrierte Sicht: local-Node hervorgehoben, +// peers separat, mode + health-flag. +type ClusterStatus struct { + LocalID string `json:"local_id"` + LocalNode *models.HANode `json:"local_node,omitempty"` + Peers []models.HANode `json:"peers"` + Mode string `json:"mode"` // "single-node" | "cluster" + Health string `json:"health"` // "ok" | "degraded" | "split-brain" + DriftFound bool `json:"drift_found"` + UpdatedAt time.Time `json:"updated_at"` +} + +// Status splittet alle Nodes in local + peers, berechnet mode + health. +func (h *ClusterHandler) Status(c *gin.Context) { + all, err := h.Store.List(c.Request.Context()) + if err != nil { + response.Internal(c, err) + return + } + out := ClusterStatus{ + LocalID: h.LocalID, + Peers: []models.HANode{}, + Mode: "single-node", + Health: "ok", + UpdatedAt: time.Now().UTC(), + } + var localHash *string + for i := range all { + n := all[i] + if n.ID == h.LocalID { + ln := n + out.LocalNode = &ln + localHash = ln.ConfigHash + continue + } + out.Peers = append(out.Peers, n) + } + if len(out.Peers) > 0 { + out.Mode = "cluster" + } + // Drift-Detection: jeder peer mit anderem config_hash als unser + // lokaler → Banner-Trigger im UI. + if localHash != nil && *localHash != "" { + for _, p := range out.Peers { + if p.ConfigHash == nil || *p.ConfigHash == "" { + continue + } + if *p.ConfigHash != *localHash { + out.DriftFound = true + out.Health = "degraded" + break + } + } + } + // Offline-Peers → degraded. + if !out.DriftFound { + for _, p := range out.Peers { + if p.Status != "online" { + out.Health = "degraded" + break + } + } + } + response.OK(c, out) } diff --git a/internal/models/ha_node.go b/internal/models/ha_node.go index a461748..e273fb1 100644 --- a/internal/models/ha_node.go +++ b/internal/models/ha_node.go @@ -2,18 +2,25 @@ package models import "time" +// HANode mirrort eine Row der ha_nodes-Tabelle. Erweitert in Migration +// 0020 um version/config_hash/mgmt_ip/status für Cluster-Phase-3- +// Drift-Detection + Health-State. type HANode struct { - ID string `gorm:"column:id;primaryKey" json:"id"` - Name string `gorm:"column:name" json:"name"` - FQDN string `gorm:"column:fqdn;uniqueIndex" json:"fqdn"` - APIURL string `gorm:"column:api_url" json:"api_url"` - PublicIP *string `gorm:"column:public_ip;type:inet" json:"public_ip,omitempty"` - InternalIP *string `gorm:"column:internal_ip;type:inet" json:"internal_ip,omitempty"` - Role string `gorm:"column:role" json:"role"` - LastSeen *time.Time `gorm:"column:last_seen" json:"last_seen,omitempty"` - JoinedAt time.Time `gorm:"column:joined_at" json:"joined_at"` - CreatedAt time.Time `gorm:"column:created_at" json:"created_at"` - UpdatedAt time.Time `gorm:"column:updated_at" json:"updated_at"` + ID string `gorm:"column:id;primaryKey" json:"id"` + Name string `gorm:"column:name" json:"name"` + FQDN string `gorm:"column:fqdn;uniqueIndex" json:"fqdn"` + APIURL string `gorm:"column:api_url" json:"api_url"` + PublicIP *string `gorm:"column:public_ip;type:inet" json:"public_ip,omitempty"` + InternalIP *string `gorm:"column:internal_ip;type:inet" json:"internal_ip,omitempty"` + MgmtIP *string `gorm:"column:mgmt_ip;type:inet" json:"mgmt_ip,omitempty"` + Role string `gorm:"column:role" json:"role"` + Version *string `gorm:"column:version" json:"version,omitempty"` + ConfigHash *string `gorm:"column:config_hash" json:"config_hash,omitempty"` + Status string `gorm:"column:status" json:"status"` + LastSeen *time.Time `gorm:"column:last_seen" json:"last_seen,omitempty"` + JoinedAt time.Time `gorm:"column:joined_at" json:"joined_at"` + CreatedAt time.Time `gorm:"column:created_at" json:"created_at"` + UpdatedAt time.Time `gorm:"column:updated_at" json:"updated_at"` } func (HANode) TableName() string { return "ha_nodes" } diff --git a/management-ui/src/components/Layout/Sidebar.tsx b/management-ui/src/components/Layout/Sidebar.tsx index 1b8418e..804cd06 100644 --- a/management-ui/src/components/Layout/Sidebar.tsx +++ b/management-ui/src/components/Layout/Sidebar.tsx @@ -81,7 +81,7 @@ const NAV: NavSection[] = [ }, ] -const VERSION = '1.0.69' +const VERSION = '1.0.70' // Sidebar-Pattern 1:1 aus netcell-webpanel (enconf) übernommen: // -