Code-Vorbereitung für Multi-Node, ohne dass eine zweite Box nötig ist.
Single-Node-Mode bleibt der Default; alles existiert und wird sichtbar,
sobald ein 2. Knoten joined (Phase 3.2 später).
Migration 0020:
ha_nodes += version (edgeguard-api-Version)
config_hash (drift-Detection-Hash)
mgmt_ip (Management-IP, niemals VIP)
status (online|offline|joining|leaving|unknown)
internal/cluster/local_config.go:
/etc/edgeguard/node.conf — INI-style, node-lokale Identität:
NODE_ID, HOSTNAME, MGMT_IP, ROLE, PEER_HOSTS. NIEMALS zwischen
Cluster-Peers replizieren. LoadLocalConfig / SaveLocalConfig /
EnsureLocalConfig (auto-Generierung beim ersten Boot).
MgmtIP-Default = firstNonLoopbackIPv4(); Operator kann
überschreiben (mehrere Interfaces).
internal/cluster/store.go:
- HANode-Model um die 4 neuen Felder erweitert
- UpsertSelf nimmt jetzt mgmt_ip/version/config_hash/status, COALESCE
erhält werte wenn der Caller sie nicht setzt
- EnsureSelfRegistered-Signatur: + role + version-Argument
internal/handlers/cluster.go:
GET /api/v1/cluster/status — strukturierter Endpoint:
{local_id, local_node, peers[], mode, health, drift_found, updated_at}
GET /api/v1/cluster/nodes bleibt für Tools.
UI (pages/Cluster):
- Header zeigt Mode-Tag (Single-Node / Cluster) + Health-Tag (OK /
degraded / split-brain)
- Self-Card: Descriptions mit FQDN, Node-ID, Status, Role, Version,
MGMT-IP, API-URL, Config-Hash
- Peers-Tabelle nur wenn vorhanden, mit "drift"-Marker pro Row
- Drift-Alert-Banner wenn ein Peer einen anderen config_hash hat
- Single-Node-Mode Hinweis-Alert ("cluster-join kommt in 3.2")
postinst: leeres /etc/edgeguard/node.conf wird angelegt (chown
edgeguard); API auto-befüllt beim ersten boot.
main.go ruft EnsureLocalConfig + EnsureSelfRegistered mit version.
Verifiziert auf der Box (1.0.70):
- /etc/edgeguard/node.conf hat NODE_ID, HOSTNAME, MGMT_IP=89.163.205.6,
ROLE=primary
- ha_nodes-Row: status=online, version=1.0.70, mgmt_ip=89.163.205.6
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
167 lines
4.8 KiB
Go
167 lines
4.8 KiB
Go
package cluster
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5"
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
|
|
"git.netcell-it.de/projekte/edgeguard-native/internal/models"
|
|
)
|
|
|
|
var ErrNotFound = errors.New("ha_node not found")
|
|
|
|
// Store wraps the ha_nodes table — used by the cluster handler and by
|
|
// EnsureSelfRegistered. v1 only ever has one row (the local node);
|
|
// the table is in place so Phase 3.1 multi-node lands without
|
|
// schema churn.
|
|
type Store struct {
|
|
Pool *pgxpool.Pool
|
|
}
|
|
|
|
func NewStore(pool *pgxpool.Pool) *Store { return &Store{Pool: pool} }
|
|
|
|
const baseSelect = `
|
|
SELECT id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip, role,
|
|
version, config_hash, status,
|
|
last_seen, joined_at, created_at, updated_at
|
|
FROM ha_nodes
|
|
`
|
|
|
|
func (s *Store) List(ctx context.Context) ([]models.HANode, error) {
|
|
rows, err := s.Pool.Query(ctx, baseSelect+" ORDER BY joined_at ASC")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
out := make([]models.HANode, 0, 4)
|
|
for rows.Next() {
|
|
n, err := scanNode(rows)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, *n)
|
|
}
|
|
return out, rows.Err()
|
|
}
|
|
|
|
func (s *Store) Get(ctx context.Context, id string) (*models.HANode, error) {
|
|
row := s.Pool.QueryRow(ctx, baseSelect+" WHERE id = $1", id)
|
|
n, err := scanNode(row)
|
|
if err != nil {
|
|
if errors.Is(err, pgx.ErrNoRows) {
|
|
return nil, ErrNotFound
|
|
}
|
|
return nil, err
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
// UpsertSelf writes the local node's row using the database-side
|
|
// ON CONFLICT DO UPDATE so the call is safe to make on every boot.
|
|
// last_seen is also bumped — handy for the heartbeat-by-restart
|
|
// pattern even before periodic heartbeats land. Phase-3-Felder
|
|
// (mgmt_ip, version, config_hash, status) werden mit COALESCE
|
|
// erhalten falls der Caller sie nicht setzt.
|
|
func (s *Store) UpsertSelf(ctx context.Context, n models.HANode) (*models.HANode, error) {
|
|
now := time.Now().UTC()
|
|
if n.Status == "" {
|
|
n.Status = "online"
|
|
}
|
|
row := s.Pool.QueryRow(ctx, `
|
|
INSERT INTO ha_nodes (id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip,
|
|
role, version, config_hash, status, last_seen, joined_at)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
fqdn = EXCLUDED.fqdn,
|
|
api_url = EXCLUDED.api_url,
|
|
public_ip = COALESCE(EXCLUDED.public_ip, ha_nodes.public_ip),
|
|
internal_ip = COALESCE(EXCLUDED.internal_ip, ha_nodes.internal_ip),
|
|
mgmt_ip = COALESCE(EXCLUDED.mgmt_ip, ha_nodes.mgmt_ip),
|
|
role = EXCLUDED.role,
|
|
version = COALESCE(EXCLUDED.version, ha_nodes.version),
|
|
config_hash = COALESCE(EXCLUDED.config_hash, ha_nodes.config_hash),
|
|
status = EXCLUDED.status,
|
|
last_seen = EXCLUDED.last_seen,
|
|
updated_at = NOW()
|
|
RETURNING id, name, fqdn, api_url, public_ip, internal_ip, mgmt_ip,
|
|
role, version, config_hash, status,
|
|
last_seen, joined_at, created_at, updated_at`,
|
|
n.ID, n.Name, n.FQDN, n.APIURL,
|
|
n.PublicIP, n.InternalIP, n.MgmtIP,
|
|
n.Role, n.Version, n.ConfigHash, n.Status,
|
|
now, now,
|
|
)
|
|
return scanNode(row)
|
|
}
|
|
|
|
// EnsureSelfRegistered mints the node-id if needed, builds the row
|
|
// from setup.json + os.Hostname + node.conf, and upserts it. Called
|
|
// on edgeguard-api boot AFTER the DB pool is reachable.
|
|
//
|
|
// fqdn = setup-store fqdn (preferred) or hostname.
|
|
// apiURL = "https://<fqdn>".
|
|
// version = edgeguard-api-Version (für Drift-Banner).
|
|
// mgmtIP = aus /etc/edgeguard/node.conf wenn vorhanden, sonst Auto.
|
|
func EnsureSelfRegistered(ctx context.Context, store *Store, fqdn, role, version string) (*models.HANode, error) {
|
|
id, err := EnsureNodeID("")
|
|
if err != nil {
|
|
// Even when persistence failed (read-only /var/lib in dev),
|
|
// EnsureNodeID returns the in-memory id alongside the error
|
|
// — so we can still register, but the id will rotate on
|
|
// every boot.
|
|
_ = err
|
|
}
|
|
if id == "" {
|
|
return nil, fmt.Errorf("could not derive node id")
|
|
}
|
|
host, hostErr := os.Hostname()
|
|
if hostErr != nil {
|
|
host = "unknown"
|
|
}
|
|
if fqdn == "" {
|
|
fqdn = host
|
|
}
|
|
cfg, _ := LoadLocalConfig("")
|
|
var mgmtIP *string
|
|
if cfg != nil && cfg.MgmtIP != "" {
|
|
v := cfg.MgmtIP
|
|
mgmtIP = &v
|
|
}
|
|
var ver *string
|
|
if version != "" {
|
|
v := version
|
|
ver = &v
|
|
}
|
|
n := models.HANode{
|
|
ID: id,
|
|
Name: host,
|
|
FQDN: fqdn,
|
|
APIURL: "https://" + fqdn,
|
|
MgmtIP: mgmtIP,
|
|
Role: role,
|
|
Version: ver,
|
|
Status: "online",
|
|
}
|
|
return store.UpsertSelf(ctx, n)
|
|
}
|
|
|
|
func scanNode(row interface{ Scan(...any) error }) (*models.HANode, error) {
|
|
var n models.HANode
|
|
if err := row.Scan(
|
|
&n.ID, &n.Name, &n.FQDN, &n.APIURL,
|
|
&n.PublicIP, &n.InternalIP, &n.MgmtIP,
|
|
&n.Role, &n.Version, &n.ConfigHash, &n.Status,
|
|
&n.LastSeen, &n.JoinedAt,
|
|
&n.CreatedAt, &n.UpdatedAt,
|
|
); err != nil {
|
|
return nil, err
|
|
}
|
|
return &n, nil
|
|
}
|