feat(cluster): Phase 3 Foundation — node.conf + ha_nodes-Drift + UI
Code-Vorbereitung für Multi-Node, ohne dass eine zweite Box nötig ist.
Single-Node-Mode bleibt der Default; alles existiert und wird sichtbar,
sobald ein 2. Knoten joined (Phase 3.2 später).
Migration 0020:
ha_nodes += version (edgeguard-api-Version)
config_hash (drift-Detection-Hash)
mgmt_ip (Management-IP, niemals VIP)
status (online|offline|joining|leaving|unknown)
internal/cluster/local_config.go:
/etc/edgeguard/node.conf — INI-style, node-lokale Identität:
NODE_ID, HOSTNAME, MGMT_IP, ROLE, PEER_HOSTS. NIEMALS zwischen
Cluster-Peers replizieren. LoadLocalConfig / SaveLocalConfig /
EnsureLocalConfig (auto-Generierung beim ersten Boot).
MgmtIP-Default = firstNonLoopbackIPv4(); Operator kann
überschreiben (mehrere Interfaces).
internal/cluster/store.go:
- HANode-Model um die 4 neuen Felder erweitert
- UpsertSelf nimmt jetzt mgmt_ip/version/config_hash/status, COALESCE
erhält werte wenn der Caller sie nicht setzt
- EnsureSelfRegistered-Signatur: + role + version-Argument
internal/handlers/cluster.go:
GET /api/v1/cluster/status — strukturierter Endpoint:
{local_id, local_node, peers[], mode, health, drift_found, updated_at}
GET /api/v1/cluster/nodes bleibt für Tools.
UI (pages/Cluster):
- Header zeigt Mode-Tag (Single-Node / Cluster) + Health-Tag (OK /
degraded / split-brain)
- Self-Card: Descriptions mit FQDN, Node-ID, Status, Role, Version,
MGMT-IP, API-URL, Config-Hash
- Peers-Tabelle nur wenn vorhanden, mit "drift"-Marker pro Row
- Drift-Alert-Banner wenn ein Peer einen anderen config_hash hat
- Single-Node-Mode Hinweis-Alert ("cluster-join kommt in 3.2")
postinst: leeres /etc/edgeguard/node.conf wird angelegt (chown
edgeguard); API auto-befüllt beim ersten boot.
main.go ruft EnsureLocalConfig + EnsureSelfRegistered mit version.
Verifiziert auf der Box (1.0.70):
- /etc/edgeguard/node.conf hat NODE_ID, HOSTNAME, MGMT_IP=89.163.205.6,
ROLE=primary
- ha_nodes-Row: status=online, version=1.0.70, mgmt_ip=89.163.205.6
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -52,7 +52,7 @@ import (
|
||||
wgsvc "git.netcell-it.de/projekte/edgeguard-native/internal/services/wireguard"
|
||||
)
|
||||
|
||||
var version = "1.0.69"
|
||||
var version = "1.0.70"
|
||||
|
||||
func main() {
|
||||
addr := os.Getenv("EDGEGUARD_API_ADDR")
|
||||
@@ -131,7 +131,9 @@ func main() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
st, _ := setupStore.Load()
|
||||
if st != nil && st.Completed {
|
||||
if _, err := cluster.EnsureSelfRegistered(ctx, clusterStore, st.FQDN, "primary"); err != nil {
|
||||
// Auto-create /etc/edgeguard/node.conf falls fehlt.
|
||||
_, _ = cluster.EnsureLocalConfig("")
|
||||
if _, err := cluster.EnsureSelfRegistered(ctx, clusterStore, st.FQDN, "primary", version); err != nil {
|
||||
slog.Warn("self-register in ha_nodes failed", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"os"
|
||||
)
|
||||
|
||||
var version = "1.0.69"
|
||||
var version = "1.0.70"
|
||||
|
||||
const usage = `edgeguard-ctl — EdgeGuard CLI
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ import (
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/services/tlscerts"
|
||||
)
|
||||
|
||||
var version = "1.0.69"
|
||||
var version = "1.0.70"
|
||||
|
||||
const (
|
||||
// renewTickInterval — how often we re-evaluate expiring certs.
|
||||
|
||||
203
internal/cluster/local_config.go
Normal file
203
internal/cluster/local_config.go
Normal file
@@ -0,0 +1,203 @@
|
||||
package cluster
|
||||
|
||||
// /etc/edgeguard/node.conf — node-lokale, NIEMALS zwischen Cluster-
|
||||
// Peers replizierte Konfiguration. Hält die Identitäts-Werte die jeden
|
||||
// Node einzigartig machen:
|
||||
//
|
||||
// NODE_ID eindeutige UUID (autogeneriert in EnsureNodeID; hier
|
||||
// gespiegelt für Operator-Sichtbarkeit)
|
||||
// HOSTNAME `hostname -f`
|
||||
// MGMT_IP Management-IP (Interface auf dem die API exposed wird;
|
||||
// NIE VIP — wenn diese Box ein VIP übernimmt, bleibt die
|
||||
// MGMT_IP unverändert auf der eigenen Static-IP)
|
||||
// ROLE primary | secondary
|
||||
// PEER_HOSTS comma-separated FQDNs der anderen Cluster-Peers
|
||||
// (leer im Single-Node-Mode)
|
||||
//
|
||||
// Format ist INI-style ohne sections — eine `KEY=VALUE`-Zeile pro
|
||||
// Eintrag. Kommentare mit `#`. Whitespace um `=` wird getrimmt.
|
||||
//
|
||||
// Postinst legt eine leere/auto-befüllte Datei an. Backup-System
|
||||
// INCLUDIERT diese Datei (sie ist Teil des Node-State); im Cluster-
|
||||
// Sync-Path bleibt sie aber explizit DRAUSSEN.
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const DefaultLocalConfigPath = "/etc/edgeguard/node.conf"
|
||||
|
||||
type LocalConfig struct {
|
||||
NodeID string
|
||||
Hostname string
|
||||
MgmtIP string // Management-IP des Nodes (IPv4/v6, ohne CIDR)
|
||||
Role string // "primary" | "secondary"
|
||||
PeerHosts []string // andere Cluster-Peers (FQDNs)
|
||||
}
|
||||
|
||||
// LoadLocalConfig liest die Datei. Wenn sie nicht existiert: returns
|
||||
// nil, nil — kein Fehler (single-node default).
|
||||
func LoadLocalConfig(path string) (*LocalConfig, error) {
|
||||
if path == "" {
|
||||
path = DefaultLocalConfigPath
|
||||
}
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
c := &LocalConfig{}
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
line := strings.TrimSpace(sc.Text())
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
eq := strings.IndexByte(line, '=')
|
||||
if eq < 0 {
|
||||
continue
|
||||
}
|
||||
key := strings.TrimSpace(line[:eq])
|
||||
val := strings.TrimSpace(line[eq+1:])
|
||||
val = strings.Trim(val, `"`)
|
||||
switch strings.ToUpper(key) {
|
||||
case "NODE_ID":
|
||||
c.NodeID = val
|
||||
case "HOSTNAME":
|
||||
c.Hostname = val
|
||||
case "MGMT_IP":
|
||||
c.MgmtIP = val
|
||||
case "ROLE":
|
||||
c.Role = strings.ToLower(val)
|
||||
case "PEER_HOSTS":
|
||||
for _, h := range strings.Split(val, ",") {
|
||||
h = strings.TrimSpace(h)
|
||||
if h != "" {
|
||||
c.PeerHosts = append(c.PeerHosts, h)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return c, sc.Err()
|
||||
}
|
||||
|
||||
// SaveLocalConfig schreibt die Datei atomic + 0644 root:root.
|
||||
// Aufrufer ist normalerweise edgeguard-ctl unter Operator-Privilegien.
|
||||
func SaveLocalConfig(path string, c *LocalConfig) error {
|
||||
if path == "" {
|
||||
path = DefaultLocalConfigPath
|
||||
}
|
||||
if c == nil {
|
||||
return errors.New("nil config")
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
var b strings.Builder
|
||||
b.WriteString("# Managed by edgeguard — node-local cluster identity.\n")
|
||||
b.WriteString("# NIEMALS zwischen Cluster-Peers replizieren!\n")
|
||||
b.WriteString("# Backup-System sichert diese Datei (Teil des Node-State).\n\n")
|
||||
fmt.Fprintf(&b, "NODE_ID=%s\n", c.NodeID)
|
||||
fmt.Fprintf(&b, "HOSTNAME=%s\n", c.Hostname)
|
||||
fmt.Fprintf(&b, "MGMT_IP=%s\n", c.MgmtIP)
|
||||
if c.Role == "" {
|
||||
c.Role = "primary"
|
||||
}
|
||||
fmt.Fprintf(&b, "ROLE=%s\n", c.Role)
|
||||
fmt.Fprintf(&b, "PEER_HOSTS=%s\n", strings.Join(c.PeerHosts, ","))
|
||||
|
||||
tmp := path + ".tmp"
|
||||
if err := os.WriteFile(tmp, []byte(b.String()), 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmp, path)
|
||||
}
|
||||
|
||||
// EnsureLocalConfig liest die Datei; legt sie an wenn nicht vorhanden
|
||||
// (autogeneriert NodeID via EnsureNodeID, Hostname via os.Hostname,
|
||||
// MgmtIP via firstNonLoopbackIPv4). Schreibt nur dann zurück wenn
|
||||
// vorher nichts da war ODER NodeID/Hostname noch leer waren.
|
||||
func EnsureLocalConfig(path string) (*LocalConfig, error) {
|
||||
if path == "" {
|
||||
path = DefaultLocalConfigPath
|
||||
}
|
||||
c, err := LoadLocalConfig(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if c == nil {
|
||||
c = &LocalConfig{}
|
||||
}
|
||||
dirty := false
|
||||
if c.NodeID == "" {
|
||||
id, _ := EnsureNodeID("")
|
||||
c.NodeID = id
|
||||
dirty = true
|
||||
}
|
||||
if c.Hostname == "" {
|
||||
h, _ := os.Hostname()
|
||||
c.Hostname = h
|
||||
dirty = true
|
||||
}
|
||||
if c.MgmtIP == "" {
|
||||
c.MgmtIP = firstNonLoopbackIPv4()
|
||||
dirty = true
|
||||
}
|
||||
if c.Role == "" {
|
||||
c.Role = "primary"
|
||||
dirty = true
|
||||
}
|
||||
if !dirty {
|
||||
return c, nil
|
||||
}
|
||||
if err := SaveLocalConfig(path, c); err != nil {
|
||||
// File-not-writable (z.B. dev box als nicht-root): nicht fatal.
|
||||
// Caller bekommt trotzdem den in-memory config.
|
||||
return c, nil
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// firstNonLoopbackIPv4 sucht eine plausible MGMT_IP für den
|
||||
// Default-Case. Operator überschreibt das in /etc/edgeguard/node.conf
|
||||
// wenn die Box mehrere Interfaces hat und wir das falsche gepickt
|
||||
// haben.
|
||||
func firstNonLoopbackIPv4() string {
|
||||
ifaces, err := net.Interfaces()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
for _, iface := range ifaces {
|
||||
if iface.Flags&net.FlagLoopback != 0 {
|
||||
continue
|
||||
}
|
||||
if iface.Flags&net.FlagUp == 0 {
|
||||
continue
|
||||
}
|
||||
addrs, err := iface.Addrs()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, addr := range addrs {
|
||||
ipnet, ok := addr.(*net.IPNet)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
ip4 := ipnet.IP.To4()
|
||||
if ip4 == nil {
|
||||
continue
|
||||
}
|
||||
return ip4.String()
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@@ -26,7 +26,8 @@ type Store struct {
|
||||
func NewStore(pool *pgxpool.Pool) *Store { return &Store{Pool: pool} }
|
||||
|
||||
const baseSelect = `
|
||||
SELECT id, name, fqdn, api_url, public_ip, internal_ip, role,
|
||||
SELECT id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip, role,
|
||||
version, config_hash, status,
|
||||
last_seen, joined_at, created_at, updated_at
|
||||
FROM ha_nodes
|
||||
`
|
||||
@@ -63,45 +64,57 @@ func (s *Store) Get(ctx context.Context, id string) (*models.HANode, error) {
|
||||
// UpsertSelf writes the local node's row using the database-side
|
||||
// ON CONFLICT DO UPDATE so the call is safe to make on every boot.
|
||||
// last_seen is also bumped — handy for the heartbeat-by-restart
|
||||
// pattern even before periodic heartbeats land.
|
||||
// pattern even before periodic heartbeats land. Phase-3-Felder
|
||||
// (mgmt_ip, version, config_hash, status) werden mit COALESCE
|
||||
// erhalten falls der Caller sie nicht setzt.
|
||||
func (s *Store) UpsertSelf(ctx context.Context, n models.HANode) (*models.HANode, error) {
|
||||
now := time.Now().UTC()
|
||||
if n.Status == "" {
|
||||
n.Status = "online"
|
||||
}
|
||||
row := s.Pool.QueryRow(ctx, `
|
||||
INSERT INTO ha_nodes (id, name, fqdn, api_url, public_ip, internal_ip, role, last_seen, joined_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
INSERT INTO ha_nodes (id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip,
|
||||
role, version, config_hash, status, last_seen, joined_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
fqdn = EXCLUDED.fqdn,
|
||||
api_url = EXCLUDED.api_url,
|
||||
public_ip = COALESCE(EXCLUDED.public_ip, ha_nodes.public_ip),
|
||||
public_ip = COALESCE(EXCLUDED.public_ip, ha_nodes.public_ip),
|
||||
internal_ip = COALESCE(EXCLUDED.internal_ip, ha_nodes.internal_ip),
|
||||
role = EXCLUDED.role,
|
||||
last_seen = EXCLUDED.last_seen,
|
||||
updated_at = NOW()
|
||||
RETURNING id, name, fqdn, api_url, public_ip, internal_ip, role,
|
||||
mgmt_ip = COALESCE(EXCLUDED.mgmt_ip, ha_nodes.mgmt_ip),
|
||||
role = EXCLUDED.role,
|
||||
version = COALESCE(EXCLUDED.version, ha_nodes.version),
|
||||
config_hash = COALESCE(EXCLUDED.config_hash, ha_nodes.config_hash),
|
||||
status = EXCLUDED.status,
|
||||
last_seen = EXCLUDED.last_seen,
|
||||
updated_at = NOW()
|
||||
RETURNING id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip,
|
||||
role, version, config_hash, status,
|
||||
last_seen, joined_at, created_at, updated_at`,
|
||||
n.ID, n.Name, n.FQDN, n.APIURL,
|
||||
n.PublicIP, n.InternalIP, n.Role,
|
||||
n.PublicIP, n.InternalIP, n.MgmtIP,
|
||||
n.Role, n.Version, n.ConfigHash, n.Status,
|
||||
now, now,
|
||||
)
|
||||
return scanNode(row)
|
||||
}
|
||||
|
||||
// EnsureSelfRegistered mints the node-id if needed, builds the row
|
||||
// from setup.json + os.Hostname, and upserts it. Called on edgeguard-
|
||||
// api boot AFTER the DB pool is reachable.
|
||||
// from setup.json + os.Hostname + node.conf, and upserts it. Called
|
||||
// on edgeguard-api boot AFTER the DB pool is reachable.
|
||||
//
|
||||
// fqdn = setup-store fqdn (preferred) or hostname.
|
||||
// apiURL = "https://<fqdn>" (HAProxy-fronted; v1 doesn't yet know if
|
||||
// the operator runs on a non-default port).
|
||||
func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn string, role string) (*models.HANode, error) {
|
||||
// apiURL = "https://<fqdn>".
|
||||
// version = edgeguard-api-Version (für Drift-Banner).
|
||||
// mgmtIP = aus /etc/edgeguard/node.conf wenn vorhanden, sonst Auto.
|
||||
func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn, role, version string) (*models.HANode, error) {
|
||||
id, err := EnsureNodeID("")
|
||||
if err != nil {
|
||||
// Even when persistence failed (read-only /var/lib in dev),
|
||||
// EnsureNodeID returns the in-memory id alongside the error
|
||||
// — so we can still register, but the id will rotate on
|
||||
// every boot. Surface as warning to the caller; here we
|
||||
// just keep going so the dev box doesn't stay un-registered.
|
||||
// every boot.
|
||||
_ = err
|
||||
}
|
||||
if id == "" {
|
||||
@@ -114,12 +127,26 @@ func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn string, role s
|
||||
if fqdn == "" {
|
||||
fqdn = host
|
||||
}
|
||||
cfg, _ := LoadLocalConfig("")
|
||||
var mgmtIP *string
|
||||
if cfg != nil && cfg.MgmtIP != "" {
|
||||
v := cfg.MgmtIP
|
||||
mgmtIP = &v
|
||||
}
|
||||
var ver *string
|
||||
if version != "" {
|
||||
v := version
|
||||
ver = &v
|
||||
}
|
||||
n := models.HANode{
|
||||
ID: id,
|
||||
Name: host,
|
||||
FQDN: fqdn,
|
||||
APIURL: "https://" + fqdn,
|
||||
Role: role,
|
||||
ID: id,
|
||||
Name: host,
|
||||
FQDN: fqdn,
|
||||
APIURL: "https://" + fqdn,
|
||||
MgmtIP: mgmtIP,
|
||||
Role: role,
|
||||
Version: ver,
|
||||
Status: "online",
|
||||
}
|
||||
return store.UpsertSelf(ctx, n)
|
||||
}
|
||||
@@ -128,7 +155,8 @@ func scanNode(row interface{ Scan(...any) error }) (*models.HANode, error) {
|
||||
var n models.HANode
|
||||
if err := row.Scan(
|
||||
&n.ID, &n.Name, &n.FQDN, &n.APIURL,
|
||||
&n.PublicIP, &n.InternalIP, &n.Role,
|
||||
&n.PublicIP, &n.InternalIP, &n.MgmtIP,
|
||||
&n.Role, &n.Version, &n.ConfigHash, &n.Status,
|
||||
&n.LastSeen, &n.JoinedAt,
|
||||
&n.CreatedAt, &n.UpdatedAt,
|
||||
); err != nil {
|
||||
|
||||
39
internal/database/migrations/0020_ha_nodes_ext.sql
Normal file
39
internal/database/migrations/0020_ha_nodes_ext.sql
Normal file
@@ -0,0 +1,39 @@
|
||||
-- +goose Up
|
||||
-- +goose StatementBegin
|
||||
|
||||
-- ha_nodes-Erweiterung für Cluster-Phase-3-Foundation. Diese Spalten
|
||||
-- machen die Cluster-UI aussagekräftig auch im Single-Node-Modus und
|
||||
-- bereiten Multi-Node-Drift-Detection vor (analog mail-gateway).
|
||||
--
|
||||
-- version: edgeguard-api-Version des Peers (aus /healthz).
|
||||
-- config_hash: deterministischer Hash über replizierbare DB-Tabellen.
|
||||
-- Drift-Banner triggert wenn nodes verschiedene Werte haben.
|
||||
-- mgmt_ip: Management-IP (nicht VIP) — separat von public_ip + api_url.
|
||||
-- status: online | offline | joining | leaving | unknown.
|
||||
|
||||
ALTER TABLE ha_nodes
|
||||
ADD COLUMN IF NOT EXISTS version TEXT,
|
||||
ADD COLUMN IF NOT EXISTS config_hash TEXT,
|
||||
ADD COLUMN IF NOT EXISTS mgmt_ip INET,
|
||||
ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'unknown';
|
||||
|
||||
ALTER TABLE ha_nodes
|
||||
DROP CONSTRAINT IF EXISTS ha_nodes_status_check;
|
||||
ALTER TABLE ha_nodes
|
||||
ADD CONSTRAINT ha_nodes_status_check
|
||||
CHECK (status IN ('online', 'offline', 'joining', 'leaving', 'unknown'));
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ha_nodes_status ON ha_nodes (status);
|
||||
|
||||
-- +goose StatementEnd
|
||||
|
||||
-- +goose Down
|
||||
-- +goose StatementBegin
|
||||
ALTER TABLE ha_nodes
|
||||
DROP CONSTRAINT IF EXISTS ha_nodes_status_check;
|
||||
ALTER TABLE ha_nodes
|
||||
DROP COLUMN IF EXISTS status,
|
||||
DROP COLUMN IF EXISTS mgmt_ip,
|
||||
DROP COLUMN IF EXISTS config_hash,
|
||||
DROP COLUMN IF EXISTS version;
|
||||
-- +goose StatementEnd
|
||||
@@ -1,18 +1,21 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/cluster"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/handlers/response"
|
||||
"git.netcell-it.de/projekte/edgeguard-native/internal/models"
|
||||
)
|
||||
|
||||
// ClusterHandler exposes cluster-state endpoints. v1 is read-only:
|
||||
// the UI shows the list of registered nodes but cluster-join + write
|
||||
// operations land in Phase 3.1.
|
||||
// ClusterHandler exposes cluster-state endpoints. v1 ist read-only;
|
||||
// /status liefert eine strukturierte UI-Sicht (local + peers + health),
|
||||
// /nodes bleibt als simpler list-endpoint für Tools/Scripts.
|
||||
type ClusterHandler struct {
|
||||
Store *cluster.Store
|
||||
LocalID string
|
||||
Store *cluster.Store
|
||||
LocalID string
|
||||
}
|
||||
|
||||
func NewClusterHandler(store *cluster.Store, localID string) *ClusterHandler {
|
||||
@@ -22,6 +25,7 @@ func NewClusterHandler(store *cluster.Store, localID string) *ClusterHandler {
|
||||
func (h *ClusterHandler) Register(rg *gin.RouterGroup) {
|
||||
g := rg.Group("/cluster")
|
||||
g.GET("/nodes", h.ListNodes)
|
||||
g.GET("/status", h.Status)
|
||||
}
|
||||
|
||||
func (h *ClusterHandler) ListNodes(c *gin.Context) {
|
||||
@@ -30,8 +34,71 @@ func (h *ClusterHandler) ListNodes(c *gin.Context) {
|
||||
response.Internal(c, err)
|
||||
return
|
||||
}
|
||||
response.OK(c, gin.H{
|
||||
"nodes": nodes,
|
||||
"local_id": h.LocalID,
|
||||
})
|
||||
response.OK(c, gin.H{"nodes": nodes, "local_id": h.LocalID})
|
||||
}
|
||||
|
||||
// ClusterStatus ist die UI-zentrierte Sicht: local-Node hervorgehoben,
|
||||
// peers separat, mode + health-flag.
|
||||
type ClusterStatus struct {
|
||||
LocalID string `json:"local_id"`
|
||||
LocalNode *models.HANode `json:"local_node,omitempty"`
|
||||
Peers []models.HANode `json:"peers"`
|
||||
Mode string `json:"mode"` // "single-node" | "cluster"
|
||||
Health string `json:"health"` // "ok" | "degraded" | "split-brain"
|
||||
DriftFound bool `json:"drift_found"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// Status splittet alle Nodes in local + peers, berechnet mode + health.
|
||||
func (h *ClusterHandler) Status(c *gin.Context) {
|
||||
all, err := h.Store.List(c.Request.Context())
|
||||
if err != nil {
|
||||
response.Internal(c, err)
|
||||
return
|
||||
}
|
||||
out := ClusterStatus{
|
||||
LocalID: h.LocalID,
|
||||
Peers: []models.HANode{},
|
||||
Mode: "single-node",
|
||||
Health: "ok",
|
||||
UpdatedAt: time.Now().UTC(),
|
||||
}
|
||||
var localHash *string
|
||||
for i := range all {
|
||||
n := all[i]
|
||||
if n.ID == h.LocalID {
|
||||
ln := n
|
||||
out.LocalNode = &ln
|
||||
localHash = ln.ConfigHash
|
||||
continue
|
||||
}
|
||||
out.Peers = append(out.Peers, n)
|
||||
}
|
||||
if len(out.Peers) > 0 {
|
||||
out.Mode = "cluster"
|
||||
}
|
||||
// Drift-Detection: jeder peer mit anderem config_hash als unser
|
||||
// lokaler → Banner-Trigger im UI.
|
||||
if localHash != nil && *localHash != "" {
|
||||
for _, p := range out.Peers {
|
||||
if p.ConfigHash == nil || *p.ConfigHash == "" {
|
||||
continue
|
||||
}
|
||||
if *p.ConfigHash != *localHash {
|
||||
out.DriftFound = true
|
||||
out.Health = "degraded"
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Offline-Peers → degraded.
|
||||
if !out.DriftFound {
|
||||
for _, p := range out.Peers {
|
||||
if p.Status != "online" {
|
||||
out.Health = "degraded"
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
response.OK(c, out)
|
||||
}
|
||||
|
||||
@@ -2,18 +2,25 @@ package models
|
||||
|
||||
import "time"
|
||||
|
||||
// HANode mirrort eine Row der ha_nodes-Tabelle. Erweitert in Migration
|
||||
// 0020 um version/config_hash/mgmt_ip/status für Cluster-Phase-3-
|
||||
// Drift-Detection + Health-State.
|
||||
type HANode struct {
|
||||
ID string `gorm:"column:id;primaryKey" json:"id"`
|
||||
Name string `gorm:"column:name" json:"name"`
|
||||
FQDN string `gorm:"column:fqdn;uniqueIndex" json:"fqdn"`
|
||||
APIURL string `gorm:"column:api_url" json:"api_url"`
|
||||
PublicIP *string `gorm:"column:public_ip;type:inet" json:"public_ip,omitempty"`
|
||||
InternalIP *string `gorm:"column:internal_ip;type:inet" json:"internal_ip,omitempty"`
|
||||
Role string `gorm:"column:role" json:"role"`
|
||||
LastSeen *time.Time `gorm:"column:last_seen" json:"last_seen,omitempty"`
|
||||
JoinedAt time.Time `gorm:"column:joined_at" json:"joined_at"`
|
||||
CreatedAt time.Time `gorm:"column:created_at" json:"created_at"`
|
||||
UpdatedAt time.Time `gorm:"column:updated_at" json:"updated_at"`
|
||||
ID string `gorm:"column:id;primaryKey" json:"id"`
|
||||
Name string `gorm:"column:name" json:"name"`
|
||||
FQDN string `gorm:"column:fqdn;uniqueIndex" json:"fqdn"`
|
||||
APIURL string `gorm:"column:api_url" json:"api_url"`
|
||||
PublicIP *string `gorm:"column:public_ip;type:inet" json:"public_ip,omitempty"`
|
||||
InternalIP *string `gorm:"column:internal_ip;type:inet" json:"internal_ip,omitempty"`
|
||||
MgmtIP *string `gorm:"column:mgmt_ip;type:inet" json:"mgmt_ip,omitempty"`
|
||||
Role string `gorm:"column:role" json:"role"`
|
||||
Version *string `gorm:"column:version" json:"version,omitempty"`
|
||||
ConfigHash *string `gorm:"column:config_hash" json:"config_hash,omitempty"`
|
||||
Status string `gorm:"column:status" json:"status"`
|
||||
LastSeen *time.Time `gorm:"column:last_seen" json:"last_seen,omitempty"`
|
||||
JoinedAt time.Time `gorm:"column:joined_at" json:"joined_at"`
|
||||
CreatedAt time.Time `gorm:"column:created_at" json:"created_at"`
|
||||
UpdatedAt time.Time `gorm:"column:updated_at" json:"updated_at"`
|
||||
}
|
||||
|
||||
func (HANode) TableName() string { return "ha_nodes" }
|
||||
|
||||
@@ -81,7 +81,7 @@ const NAV: NavSection[] = [
|
||||
},
|
||||
]
|
||||
|
||||
const VERSION = '1.0.69'
|
||||
const VERSION = '1.0.70'
|
||||
|
||||
// Sidebar-Pattern 1:1 aus netcell-webpanel (enconf) übernommen:
|
||||
// - <nav> als root, dunkler Gradient + Teal/Blue-Accent
|
||||
|
||||
@@ -267,12 +267,37 @@
|
||||
},
|
||||
"cluster": {
|
||||
"title": "Cluster",
|
||||
"intro": "{{count}} Node(s) registriert. Multi-Node-Cluster (KeyDB Active-Active + PG Streaming Replication) folgt in einem späteren Release.",
|
||||
"intro": "{{count}} Node(s) registriert. Multi-Node (KeyDB-AA + PG-Streaming-Replication + Leader-Election) folgt schrittweise.",
|
||||
"id": "Node-ID",
|
||||
"fqdn": "FQDN",
|
||||
"role": "Rolle",
|
||||
"joinedAt": "Beigetreten",
|
||||
"self": "diese Node"
|
||||
"self": "diese Node",
|
||||
"drift": "Drift",
|
||||
"modeSingle": "Single-Node",
|
||||
"modeCluster": "Cluster",
|
||||
"health": {
|
||||
"ok": "OK",
|
||||
"degraded": "degraded",
|
||||
"split-brain": "split-brain"
|
||||
},
|
||||
"selfTitle": "Dieser Knoten",
|
||||
"noSelf": "Selbst-Registrierung in ha_nodes fehlgeschlagen — Setup-Wizard durchlaufen.",
|
||||
"peersTitle": "Peers ({{count}})",
|
||||
"singleNodeTitle": "Single-Node-Modus",
|
||||
"singleNodeDesc": "Nur diese Box bekannt. Zusätzlicher Knoten? `edgeguard-ctl cluster-join` (kommt in Phase 3.2).",
|
||||
"driftBanner": "Config-Drift erkannt",
|
||||
"driftBannerDesc": "Ein oder mehrere Peers haben einen anderen Config-Hash als dieser Node. Entweder stehen noch Änderungen in der Outbox, oder auf einem Peer wurde direkt in der DB editiert. Warte bis die Outbox leer ist oder starte Diagnostics.",
|
||||
"col": {
|
||||
"node": "Knoten",
|
||||
"status": "Status",
|
||||
"role": "Rolle",
|
||||
"apiUrl": "API-URL",
|
||||
"configHash": "Config-Hash",
|
||||
"version": "Version",
|
||||
"lastSeen": "Last seen",
|
||||
"mgmtIp": "MGMT-IP"
|
||||
}
|
||||
},
|
||||
"ssl": {
|
||||
"title": "SSL-Zertifikate",
|
||||
|
||||
@@ -267,12 +267,37 @@
|
||||
},
|
||||
"cluster": {
|
||||
"title": "Cluster",
|
||||
"intro": "{{count}} node(s) registered. Multi-node cluster (KeyDB Active-Active + PG streaming replication) coming in a later release.",
|
||||
"intro": "{{count}} node(s) registered. Multi-node (KeyDB-AA + PG streaming replication + leader election) lands in stages.",
|
||||
"id": "Node ID",
|
||||
"fqdn": "FQDN",
|
||||
"role": "Role",
|
||||
"joinedAt": "Joined",
|
||||
"self": "this node"
|
||||
"self": "this node",
|
||||
"drift": "Drift",
|
||||
"modeSingle": "Single node",
|
||||
"modeCluster": "Cluster",
|
||||
"health": {
|
||||
"ok": "OK",
|
||||
"degraded": "degraded",
|
||||
"split-brain": "split-brain"
|
||||
},
|
||||
"selfTitle": "This node",
|
||||
"noSelf": "Self-registration in ha_nodes failed — run the setup wizard.",
|
||||
"peersTitle": "Peers ({{count}})",
|
||||
"singleNodeTitle": "Single-node mode",
|
||||
"singleNodeDesc": "Only this box is known. To add another node: `edgeguard-ctl cluster-join` (coming in phase 3.2).",
|
||||
"driftBanner": "Config drift detected",
|
||||
"driftBannerDesc": "One or more peers have a different config hash than this node. Either changes are still in the outbox or a peer was edited directly in the DB. Wait for the outbox to drain or run diagnostics.",
|
||||
"col": {
|
||||
"node": "Node",
|
||||
"status": "Status",
|
||||
"role": "Role",
|
||||
"apiUrl": "API URL",
|
||||
"configHash": "Config hash",
|
||||
"version": "Version",
|
||||
"lastSeen": "Last seen",
|
||||
"mgmtIp": "MGMT IP"
|
||||
}
|
||||
},
|
||||
"ssl": {
|
||||
"title": "SSL certificates",
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import { Card, Spin, Tag } from 'antd'
|
||||
import { Alert, Card, Descriptions, Space, Spin, Table, Tag, Typography } from 'antd'
|
||||
import type { ColumnsType } from 'antd/es/table'
|
||||
import { ApartmentOutlined } from '@ant-design/icons'
|
||||
import { useQuery } from '@tanstack/react-query'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import DataTable from '../../components/DataTable'
|
||||
import PageHeader from '../../components/PageHeader'
|
||||
|
||||
import apiClient, { isEnvelope } from '../../api/client'
|
||||
import PageHeader from '../../components/PageHeader'
|
||||
|
||||
const { Text } = Typography
|
||||
|
||||
interface HANode {
|
||||
id: string
|
||||
@@ -15,47 +16,98 @@ interface HANode {
|
||||
api_url: string
|
||||
public_ip?: string | null
|
||||
internal_ip?: string | null
|
||||
mgmt_ip?: string | null
|
||||
role: string
|
||||
version?: string | null
|
||||
config_hash?: string | null
|
||||
status: 'online' | 'offline' | 'joining' | 'leaving' | 'unknown'
|
||||
last_seen?: string | null
|
||||
joined_at: string
|
||||
created_at: string
|
||||
}
|
||||
|
||||
interface ClusterStatus {
|
||||
local_id: string
|
||||
local_node?: HANode | null
|
||||
peers: HANode[]
|
||||
mode: 'single-node' | 'cluster'
|
||||
health: 'ok' | 'degraded' | 'split-brain'
|
||||
drift_found: boolean
|
||||
updated_at: string
|
||||
}
|
||||
|
||||
interface ClusterPayload {
|
||||
nodes: HANode[]
|
||||
local_id: string
|
||||
function statusTag(s: HANode['status']) {
|
||||
switch (s) {
|
||||
case 'online': return <Tag color="green">online</Tag>
|
||||
case 'offline': return <Tag color="red">offline</Tag>
|
||||
case 'joining': return <Tag color="blue">joining</Tag>
|
||||
case 'leaving': return <Tag color="orange">leaving</Tag>
|
||||
default: return <Tag>unknown</Tag>
|
||||
}
|
||||
}
|
||||
|
||||
function lastSeenRelative(iso?: string | null): string {
|
||||
if (!iso) return '—'
|
||||
const ms = Date.now() - new Date(iso).getTime()
|
||||
if (ms < 60_000) return `${Math.round(ms / 1000)}s`
|
||||
if (ms < 3_600_000) return `${Math.round(ms / 60_000)}m`
|
||||
return `${Math.round(ms / 3_600_000)}h`
|
||||
}
|
||||
|
||||
export default function ClusterPage() {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const { data, isLoading } = useQuery({
|
||||
queryKey: ['cluster', 'nodes'],
|
||||
queryKey: ['cluster', 'status'],
|
||||
queryFn: async () => {
|
||||
const r = await apiClient.get('/cluster/nodes')
|
||||
if (isEnvelope(r.data)) return r.data.data as ClusterPayload
|
||||
return null
|
||||
const r = await apiClient.get('/cluster/status')
|
||||
return isEnvelope(r.data) ? (r.data.data as ClusterStatus) : null
|
||||
},
|
||||
refetchInterval: 30_000,
|
||||
})
|
||||
|
||||
if (isLoading) return <Spin />
|
||||
if (!data) return null
|
||||
|
||||
const columns: ColumnsType<HANode> = [
|
||||
const peerColumns: ColumnsType<HANode> = [
|
||||
{
|
||||
title: t('cluster.id'), dataIndex: 'id', key: 'id',
|
||||
render: (id: string) => (
|
||||
<span>
|
||||
<code>{id}</code>{' '}
|
||||
{id === data?.local_id && <Tag color="blue">{t('cluster.self')}</Tag>}
|
||||
</span>
|
||||
title: t('cluster.col.node'), key: 'node',
|
||||
render: (_, r) => (
|
||||
<div>
|
||||
<div><Text strong>{r.fqdn}</Text></div>
|
||||
<div><Text type="secondary" style={{ fontFamily: 'monospace', fontSize: 11 }}>{r.id}</Text></div>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: t('cluster.col.status'), dataIndex: 'status', width: 110,
|
||||
render: (s: HANode['status']) => statusTag(s),
|
||||
},
|
||||
{ title: t('cluster.col.role'), dataIndex: 'role', width: 110,
|
||||
render: (v: string) => <Tag color={v === 'primary' ? 'gold' : 'default'}>{v}</Tag> },
|
||||
{ title: t('cluster.col.apiUrl'), dataIndex: 'api_url', width: 240,
|
||||
render: (v: string) => <Text style={{ fontFamily: 'monospace', fontSize: 11 }}>{v}</Text> },
|
||||
{
|
||||
title: t('cluster.col.configHash'), dataIndex: 'config_hash', width: 160,
|
||||
render: (v: string | null | undefined) => {
|
||||
if (!v) return <Text type="secondary">—</Text>
|
||||
const localHash = data.local_node?.config_hash
|
||||
const drifts = localHash && v !== localHash
|
||||
return (
|
||||
<Space size={4}>
|
||||
<Text code style={{ fontSize: 11 }}>{v.slice(0, 12)}…</Text>
|
||||
{drifts && <Tag color="red">{t('cluster.drift')}</Tag>}
|
||||
</Space>
|
||||
)
|
||||
},
|
||||
},
|
||||
{ title: t('cluster.col.version'), dataIndex: 'version', width: 100,
|
||||
render: (v?: string | null) => v ? <Tag>{v}</Tag> : <Text type="secondary">—</Text> },
|
||||
{
|
||||
title: t('cluster.col.lastSeen'), dataIndex: 'last_seen', width: 100,
|
||||
render: (v?: string | null) => (
|
||||
<Text type="secondary" style={{ fontSize: 12 }}>{lastSeenRelative(v)}</Text>
|
||||
),
|
||||
},
|
||||
{ title: t('cluster.fqdn'), dataIndex: 'fqdn', key: 'fqdn' },
|
||||
{ title: t('cluster.role'), dataIndex: 'role', key: 'role' },
|
||||
{ title: t('cluster.joinedAt'), dataIndex: 'joined_at', key: 'joined_at',
|
||||
render: (s: string) => new Date(s).toLocaleString() },
|
||||
]
|
||||
|
||||
return (
|
||||
@@ -63,11 +115,92 @@ export default function ClusterPage() {
|
||||
<PageHeader
|
||||
icon={<ApartmentOutlined />}
|
||||
title={t('cluster.title')}
|
||||
subtitle={t('cluster.intro', { count: data?.nodes.length ?? 0 })}
|
||||
subtitle={t('cluster.intro', { count: 1 + data.peers.length })}
|
||||
extra={
|
||||
<Space>
|
||||
<Tag color={data.mode === 'cluster' ? 'blue' : 'default'}>
|
||||
{data.mode === 'cluster' ? t('cluster.modeCluster') : t('cluster.modeSingle')}
|
||||
</Tag>
|
||||
<Tag color={data.health === 'ok' ? 'green' : data.health === 'degraded' ? 'orange' : 'red'}>
|
||||
{t(`cluster.health.${data.health}`)}
|
||||
</Tag>
|
||||
</Space>
|
||||
}
|
||||
/>
|
||||
<Card size="small">
|
||||
<DataTable rowKey="id" columns={columns} dataSource={data?.nodes ?? []} />
|
||||
|
||||
{data.drift_found && (
|
||||
<Alert
|
||||
type="warning"
|
||||
showIcon
|
||||
banner
|
||||
className="mb-16"
|
||||
message={t('cluster.driftBanner')}
|
||||
description={t('cluster.driftBannerDesc')}
|
||||
/>
|
||||
)}
|
||||
|
||||
{data.mode === 'single-node' && (
|
||||
<Alert
|
||||
type="info"
|
||||
showIcon
|
||||
className="mb-16"
|
||||
message={t('cluster.singleNodeTitle')}
|
||||
description={t('cluster.singleNodeDesc')}
|
||||
/>
|
||||
)}
|
||||
|
||||
<Card size="small" title={t('cluster.selfTitle')} className="mb-16">
|
||||
{data.local_node ? (
|
||||
<Descriptions size="small" column={2} bordered>
|
||||
<Descriptions.Item label={t('cluster.col.node')} span={2}>
|
||||
<Space direction="vertical" size={2}>
|
||||
<Text strong>{data.local_node.fqdn}</Text>
|
||||
<Text type="secondary" style={{ fontFamily: 'monospace', fontSize: 11 }}>
|
||||
{data.local_node.id}
|
||||
</Text>
|
||||
</Space>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label={t('cluster.col.status')}>
|
||||
{statusTag(data.local_node.status)}
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label={t('cluster.col.role')}>
|
||||
<Tag color={data.local_node.role === 'primary' ? 'gold' : 'default'}>
|
||||
{data.local_node.role}
|
||||
</Tag>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label={t('cluster.col.version')}>
|
||||
{data.local_node.version ? <Tag>{data.local_node.version}</Tag> : '—'}
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label={t('cluster.col.mgmtIp')}>
|
||||
<Text style={{ fontFamily: 'monospace' }}>
|
||||
{data.local_node.mgmt_ip || '—'}
|
||||
</Text>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label={t('cluster.col.apiUrl')} span={2}>
|
||||
<Text style={{ fontFamily: 'monospace', fontSize: 12 }}>
|
||||
{data.local_node.api_url}
|
||||
</Text>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label={t('cluster.col.configHash')} span={2}>
|
||||
<Text code>{data.local_node.config_hash || '—'}</Text>
|
||||
</Descriptions.Item>
|
||||
</Descriptions>
|
||||
) : (
|
||||
<Text type="secondary">{t('cluster.noSelf')}</Text>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{data.peers.length > 0 && (
|
||||
<Card size="small" title={t('cluster.peersTitle', { count: data.peers.length })}>
|
||||
<Table
|
||||
size="small"
|
||||
rowKey="id"
|
||||
dataSource={data.peers}
|
||||
columns={peerColumns}
|
||||
pagination={false}
|
||||
/>
|
||||
</Card>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -60,6 +60,17 @@ case "$1" in
|
||||
# world-readable.
|
||||
install -d -m 0750 -o "$EG_USER" -g "$EG_USER" /var/backups/edgeguard
|
||||
|
||||
# node.conf — node-lokale Cluster-Identität (NIEMALS in Sync
|
||||
# zwischen Peers). API auto-befüllt das beim ersten Boot mit
|
||||
# NODE_ID + Hostname + erster non-loopback-IPv4. Operator
|
||||
# darf danach editieren (z.B. MGMT_IP korrigieren wenn Box
|
||||
# mehrere Interfaces hat).
|
||||
if [ ! -f /etc/edgeguard/node.conf ]; then
|
||||
: > /etc/edgeguard/node.conf
|
||||
chown "$EG_USER":"$EG_USER" /etc/edgeguard/node.conf
|
||||
chmod 0644 /etc/edgeguard/node.conf
|
||||
fi
|
||||
|
||||
# ── sudoers: HAProxy reload + (later) systemd-networkd reload
|
||||
# Damit edgeguard-api nach einer SSL- oder Netzwerk-Mutation
|
||||
# selbst reloaden kann ohne root zu sein. NOPASSWD ist auf
|
||||
|
||||
Reference in New Issue
Block a user