feat(cluster): (c) Phase-3 MVP — stable node-id + self-register + Cluster-Page

Minimal-Slice für Phase-3-Cluster:
* internal/cluster/node_id.go — stable UUID 'n-<16hex>' in
  /var/lib/edgeguard/node-id, idempotent über reboots.
* internal/cluster/store.go — ha_nodes-Repo (List/Get/UpsertSelf)
  via pgxpool. EnsureSelfRegistered upsertet die lokale Row beim
  Boot mit FQDN aus setup.json.
* internal/handlers/cluster.go — GET /api/v1/cluster/nodes liefert
  alle ha_nodes plus local_id (für UI-Highlighting).
* main.go: nach DB-Pool-Open wird EnsureSelfRegistered (nur wenn
  setup.completed) ausgeführt, ClusterHandler registriert.
* management-ui/src/pages/Cluster/index.tsx — Tabelle mit Node-ID,
  FQDN, Rolle, Beitrittszeit; eigene Node mit "diese Node"-Tag
  markiert. Sidebar-Eintrag + i18n de/en.

Bewusst NICHT in dieser Runde: cluster-init/cluster-join CLIs, KeyDB
Active-Active config-gen, PG streaming replication, mTLS zwischen
Peers, License-Leader-Election. Diese kommen mit dem ersten echten
Multi-Node-Test (Phase 3.1) — sonst Code ohne Smoke-Möglichkeit.

End-to-end-Smoke: setup → restart → ha_nodes hat 1 Row mit
fqdn=eg.example.com, /cluster/nodes liefert sie korrekt mit
local_id-Markierung.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Debian
2026-05-09 11:52:54 +02:00
parent 6525cb1a41
commit cb5691cf3c
10 changed files with 421 additions and 2 deletions

138
internal/cluster/store.go Normal file
View File

@@ -0,0 +1,138 @@
package cluster
import (
"context"
"errors"
"fmt"
"os"
"time"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
"git.netcell-it.de/projekte/edgeguard-native/internal/models"
)
var ErrNotFound = errors.New("ha_node not found")
// Store wraps the ha_nodes table — used by the cluster handler and by
// EnsureSelfRegistered. v1 only ever has one row (the local node);
// the table is in place so Phase 3.1 multi-node lands without
// schema churn.
type Store struct {
Pool *pgxpool.Pool
}
func NewStore(pool *pgxpool.Pool) *Store { return &Store{Pool: pool} }
const baseSelect = `
SELECT id, name, fqdn, api_url, public_ip, internal_ip, role,
last_seen, joined_at, created_at, updated_at
FROM ha_nodes
`
func (s *Store) List(ctx context.Context) ([]models.HANode, error) {
rows, err := s.Pool.Query(ctx, baseSelect+" ORDER BY joined_at ASC")
if err != nil {
return nil, err
}
defer rows.Close()
out := make([]models.HANode, 0, 4)
for rows.Next() {
n, err := scanNode(rows)
if err != nil {
return nil, err
}
out = append(out, *n)
}
return out, rows.Err()
}
func (s *Store) Get(ctx context.Context, id string) (*models.HANode, error) {
row := s.Pool.QueryRow(ctx, baseSelect+" WHERE id = $1", id)
n, err := scanNode(row)
if err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return nil, ErrNotFound
}
return nil, err
}
return n, nil
}
// UpsertSelf writes the local node's row using the database-side
// ON CONFLICT DO UPDATE so the call is safe to make on every boot.
// last_seen is also bumped — handy for the heartbeat-by-restart
// pattern even before periodic heartbeats land.
func (s *Store) UpsertSelf(ctx context.Context, n models.HANode) (*models.HANode, error) {
now := time.Now().UTC()
row := s.Pool.QueryRow(ctx, `
INSERT INTO ha_nodes (id, name, fqdn, api_url, public_ip, internal_ip, role, last_seen, joined_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
fqdn = EXCLUDED.fqdn,
api_url = EXCLUDED.api_url,
public_ip = COALESCE(EXCLUDED.public_ip, ha_nodes.public_ip),
internal_ip = COALESCE(EXCLUDED.internal_ip, ha_nodes.internal_ip),
role = EXCLUDED.role,
last_seen = EXCLUDED.last_seen,
updated_at = NOW()
RETURNING id, name, fqdn, api_url, public_ip, internal_ip, role,
last_seen, joined_at, created_at, updated_at`,
n.ID, n.Name, n.FQDN, n.APIURL,
n.PublicIP, n.InternalIP, n.Role,
now, now,
)
return scanNode(row)
}
// EnsureSelfRegistered mints the node-id if needed, builds the row
// from setup.json + os.Hostname, and upserts it. Called on edgeguard-
// api boot AFTER the DB pool is reachable.
//
// fqdn = setup-store fqdn (preferred) or hostname.
// apiURL = "https://<fqdn>" (HAProxy-fronted; v1 doesn't yet know if
// the operator runs on a non-default port).
func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn string, role string) (*models.HANode, error) {
id, err := EnsureNodeID("")
if err != nil {
// Even when persistence failed (read-only /var/lib in dev),
// EnsureNodeID returns the in-memory id alongside the error
// — so we can still register, but the id will rotate on
// every boot. Surface as warning to the caller; here we
// just keep going so the dev box doesn't stay un-registered.
_ = err
}
if id == "" {
return nil, fmt.Errorf("could not derive node id")
}
host, hostErr := os.Hostname()
if hostErr != nil {
host = "unknown"
}
if fqdn == "" {
fqdn = host
}
n := models.HANode{
ID: id,
Name: host,
FQDN: fqdn,
APIURL: "https://" + fqdn,
Role: role,
}
return store.UpsertSelf(ctx, n)
}
func scanNode(row interface{ Scan(...any) error }) (*models.HANode, error) {
var n models.HANode
if err := row.Scan(
&n.ID, &n.Name, &n.FQDN, &n.APIURL,
&n.PublicIP, &n.InternalIP, &n.Role,
&n.LastSeen, &n.JoinedAt,
&n.CreatedAt, &n.UpdatedAt,
); err != nil {
return nil, err
}
return &n, nil
}