feat(lifecycle): validate shared symlinks at daemon startup (A2)

Wire symlinks.ValidateAll into the lifecycle manager so the daemon
refuses to start if any configured account is missing one of the
shared-state symlinks or if a link diverges from the canonical target.

Previously, a missing link on a freshly deployed VM would silently
create a divergent state tree per account (duplicate JSONL transcripts,
broken undo history) — exactly the failure mode the symlinks package
(A1) was introduced to prevent.

The check runs once at startup before EnsureAllSessions, guarding a
single well-defined invariant: "every account home shares the same
projects/, file-history/ and session-env/ roots". No auto-heal on
divergence — we fail fast with an explicit error so the operator fixes
it manually rather than one account's state being overwritten.

Part of Phase 1 Chantier A — Failover robuste.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-04-16 19:03:43 +00:00
parent 91091d7abf
commit e16e3526a0
3 changed files with 68 additions and 1 deletions

View file

@ -1,4 +1,31 @@
# Version actuelle : 0.3.5 # Version actuelle : 0.3.6
## [0.3.6] - 2026-04-16
**Type:** Patch — Phase 1 / Chantier A2 : validation des symlinks au startup
### Ajouté
- `Manager.ValidateSharedSymlinks()` : nouvelle méthode dans
`internal/lifecycle` qui agrège les `Home` de tous les comptes
configurés et délègue à `symlinks.ValidateAll`. Échoue dur si un
compte n'a pas de `home` défini ou si un lien est absent/divergent.
- `cmd/claude-failover/main.go` appelle cette validation **avant**
`EnsureAllSessions()` : un état partagé cassé ne laissera plus le
daemon démarrer et divergér silencieusement.
### Rationale
- Un opérateur qui copie la config sur une nouvelle VM ne peut plus
oublier les liens — le daemon refuse de démarrer jusqu'à ce qu'ils
soient corrects.
- Pas d'auto-heal sur divergence : on préfère un message d'erreur
explicite à un `rm -f` silencieux qui détruirait l'autre compte.
### Tests
- ✅ `go test ./...` : tous les packages PASS (incluant
`internal/lifecycle` et `internal/symlinks`).
### Fichiers modifiés
- `cmd/claude-failover/main.go` (+9)
- `internal/lifecycle/manager.go` (+31)
## [0.3.5] - 2026-04-16 ## [0.3.5] - 2026-04-16
**Type:** Patch — Phase 1 / Chantier A1 : package `internal/symlinks` **Type:** Patch — Phase 1 / Chantier A1 : package `internal/symlinks`

View file

@ -51,6 +51,15 @@ func main() {
// Initialise tmux client and lifecycle manager. // Initialise tmux client and lifecycle manager.
tmuxClient := tmux.NewExecClient() tmuxClient := tmux.NewExecClient()
lm := lifecycle.New(tmuxClient, s, cfg) lm := lifecycle.New(tmuxClient, s, cfg)
// Validate (and self-heal) the shared-state symlinks BEFORE spawning
// any sessions. A divergent link would silently fork transcripts
// between accounts and make failover destructive, so we fail fast here
// rather than after work is in flight.
if err := lm.ValidateSharedSymlinks(); err != nil {
log.Fatalf("shared symlinks validation failed: %v", err)
}
lm.EnsureAllSessions() lm.EnsureAllSessions()
// Block until SIGINT or SIGTERM. // Block until SIGINT or SIGTERM.

View file

@ -4,11 +4,13 @@ package lifecycle
import ( import (
"context" "context"
"fmt"
"log" "log"
"time" "time"
"forge.secuaas.ovh/olivier/claude-failover/internal/config" "forge.secuaas.ovh/olivier/claude-failover/internal/config"
"forge.secuaas.ovh/olivier/claude-failover/internal/state" "forge.secuaas.ovh/olivier/claude-failover/internal/state"
"forge.secuaas.ovh/olivier/claude-failover/internal/symlinks"
"forge.secuaas.ovh/olivier/claude-failover/internal/tmux" "forge.secuaas.ovh/olivier/claude-failover/internal/tmux"
) )
@ -47,6 +49,35 @@ func (m *Manager) Run(ctx context.Context) {
} }
} }
// ValidateSharedSymlinks verifies that every configured account home has
// the three shared-state symlinks (session-env, file-history, projects)
// in place and pointing at the canonical shared targets.
//
// Called once at daemon startup BEFORE sessions are recreated. A missing
// or divergent link would silently fork the state tree between the two
// accounts, breaking failover. We fail fast so the operator fixes it
// before any work is in flight.
//
// EnsureForAccount creates missing links but refuses to touch divergent
// ones — see internal/symlinks for the rationale.
func (m *Manager) ValidateSharedSymlinks() error {
if len(m.config.Accounts) == 0 {
return fmt.Errorf("[lifecycle] no accounts configured — cannot validate shared symlinks")
}
homes := make([]string, 0, len(m.config.Accounts))
for _, acc := range m.config.Accounts {
if acc.Home == "" {
return fmt.Errorf("[lifecycle] account %q has empty home — refusing to continue", acc.Name)
}
homes = append(homes, acc.Home)
}
if err := symlinks.ValidateAll(homes, symlinks.RequiredShared); err != nil {
return fmt.Errorf("shared symlinks invalid, refusing to start: %w", err)
}
m.logger.Printf("[lifecycle] shared symlinks OK for %d account(s)", len(homes))
return nil
}
// EnsureAllSessions creates all configured sessions that are not yet present in tmux. // EnsureAllSessions creates all configured sessions that are not yet present in tmux.
// It is intended to be called once at daemon startup before Run is launched. // It is intended to be called once at daemon startup before Run is launched.
func (m *Manager) EnsureAllSessions() { func (m *Manager) EnsureAllSessions() {