diff --git a/VERSION.md b/VERSION.md index b777c33..fe1d813 100644 --- a/VERSION.md +++ b/VERSION.md @@ -1,57 +1,4 @@ -# Version actuelle : 0.3.6 - -## [0.3.6] - 2026-04-16 -**Type:** Patch — Phase 1 / Chantier A2 : validation des symlinks au startup - -### Ajouté -- `Manager.ValidateSharedSymlinks()` : nouvelle méthode dans - `internal/lifecycle` qui agrège les `Home` de tous les comptes - configurés et délègue à `symlinks.ValidateAll`. Échoue dur si un - compte n'a pas de `home` défini ou si un lien est absent/divergent. -- `cmd/claude-failover/main.go` appelle cette validation **avant** - `EnsureAllSessions()` : un état partagé cassé ne laissera plus le - daemon démarrer et divergér silencieusement. - -### Rationale -- Un opérateur qui copie la config sur une nouvelle VM ne peut plus - oublier les liens — le daemon refuse de démarrer jusqu'à ce qu'ils - soient corrects. -- Pas d'auto-heal sur divergence : on préfère un message d'erreur - explicite à un `rm -f` silencieux qui détruirait l'autre compte. - -### Tests -- ✅ `go test ./...` : tous les packages PASS (incluant - `internal/lifecycle` et `internal/symlinks`). - -### Fichiers modifiés -- `cmd/claude-failover/main.go` (+9) -- `internal/lifecycle/manager.go` (+31) - -## [0.3.5] - 2026-04-16 -**Type:** Patch — Phase 1 / Chantier A1 : package `internal/symlinks` - -### Ajouté -- `internal/symlinks/shared.go` : `EnsureForAccount` + `ValidateAll` qui - encodent en code la convention des 3 symlinks partagés par compte - (`session-env`, `file-history`, `projects`). Jusqu'à aujourd'hui ces - liens étaient maintenus à la main et leur absence silencieuse cassait - le failover (JSONL dupliqués, undo désynchronisé). -- Tests unitaires couvrant : création missing, idempotence, divergence - (refus d'auto-correction pour éviter la perte de données), fichier - régulier à la place du lien, home vide, agrégation d'erreurs multi-comptes. - -### Rationale -- Un déploiement sur une nouvelle VM ne peut plus omettre les liens. -- Divergent link → erreur explicite, jamais de correction silencieuse. -- Préparation des tâches A2 (ValidateAll au startup) et A3 (EnsureForAccount - post-flipSymlink dans le switcher). - -### Tests -- ✅ `go test ./internal/symlinks/...` : 9/9 PASS - -### Fichiers ajoutés -- `internal/symlinks/shared.go` -- `internal/symlinks/shared_test.go` +# Version actuelle : 0.3.4 ## [0.3.4] - 2026-04-16 **Type:** Patch — Dispatcher ne route JAMAIS vers les sessions dédiées diff --git a/WORK_IN_PROGRESS.md b/WORK_IN_PROGRESS.md index 7538918..049b34a 100644 --- a/WORK_IN_PROGRESS.md +++ b/WORK_IN_PROGRESS.md @@ -1,26 +1,13 @@ # Travaux en Cours - claude-failover ## Dernière mise à jour -2026-04-16 19:00:00 +2026-04-15 19:30:00 ## Version Actuelle -0.3.5 (en cours de progression vers 0.4.0) +0.3.0 ## Demande Actuelle -**Phase 1 / Chantier A — Failover robuste** (spec dans `ccl-platform/phases/phase1/A-failover.md`). -Rendre le failover compte1 ↔ compte2 déterministe en intégrant dans le code les fixes manuels -(symlinks partagés), en ajoutant un registre UUID fiable, et en durcissant tmux send-keys. - -Branche : `feat/phase1-A-failover-robust`. - -## Sous-tâches Chantier A -- [x] A1 — `internal/symlinks/shared.go` (+ tests) — v0.3.5 -- [ ] A2 — `lifecycle/manager.go` : `ValidateAll` au startup -- [ ] A3 — `switcher/account_switcher.go` : `EnsureForAccount` post-flip -- [ ] A4 — `internal/registry/uuid_registry.go` (+ tests) -- [ ] A5 — `internal/tmux/send.go` avec retry exponentiel (+ tests) -- [ ] A6 — Capture UUID 200 → 500 lignes -- [ ] A7 — `scripts/test-failover.sh` dans ccl-platform + scripts associés +Aucune — v0.2.3 shippée, service stable. ## Étapes Complétées - [x] v0.2.1 — Cooldown post-swap + log forensique (trigger_session, pattern, snippet) diff --git a/cmd/claude-failover/main.go b/cmd/claude-failover/main.go index 2c29f89..8bc8fc5 100644 --- a/cmd/claude-failover/main.go +++ b/cmd/claude-failover/main.go @@ -51,15 +51,6 @@ func main() { // Initialise tmux client and lifecycle manager. tmuxClient := tmux.NewExecClient() lm := lifecycle.New(tmuxClient, s, cfg) - - // Validate (and self-heal) the shared-state symlinks BEFORE spawning - // any sessions. A divergent link would silently fork transcripts - // between accounts and make failover destructive, so we fail fast here - // rather than after work is in flight. - if err := lm.ValidateSharedSymlinks(); err != nil { - log.Fatalf("shared symlinks validation failed: %v", err) - } - lm.EnsureAllSessions() // Block until SIGINT or SIGTERM. diff --git a/internal/lifecycle/manager.go b/internal/lifecycle/manager.go index eeed9cc..40fa4b0 100644 --- a/internal/lifecycle/manager.go +++ b/internal/lifecycle/manager.go @@ -4,13 +4,11 @@ package lifecycle import ( "context" - "fmt" "log" "time" "forge.secuaas.ovh/olivier/claude-failover/internal/config" "forge.secuaas.ovh/olivier/claude-failover/internal/state" - "forge.secuaas.ovh/olivier/claude-failover/internal/symlinks" "forge.secuaas.ovh/olivier/claude-failover/internal/tmux" ) @@ -49,35 +47,6 @@ func (m *Manager) Run(ctx context.Context) { } } -// ValidateSharedSymlinks verifies that every configured account home has -// the three shared-state symlinks (session-env, file-history, projects) -// in place and pointing at the canonical shared targets. -// -// Called once at daemon startup BEFORE sessions are recreated. A missing -// or divergent link would silently fork the state tree between the two -// accounts, breaking failover. We fail fast so the operator fixes it -// before any work is in flight. -// -// EnsureForAccount creates missing links but refuses to touch divergent -// ones — see internal/symlinks for the rationale. -func (m *Manager) ValidateSharedSymlinks() error { - if len(m.config.Accounts) == 0 { - return fmt.Errorf("[lifecycle] no accounts configured — cannot validate shared symlinks") - } - homes := make([]string, 0, len(m.config.Accounts)) - for _, acc := range m.config.Accounts { - if acc.Home == "" { - return fmt.Errorf("[lifecycle] account %q has empty home — refusing to continue", acc.Name) - } - homes = append(homes, acc.Home) - } - if err := symlinks.ValidateAll(homes, symlinks.RequiredShared); err != nil { - return fmt.Errorf("shared symlinks invalid, refusing to start: %w", err) - } - m.logger.Printf("[lifecycle] shared symlinks OK for %d account(s)", len(homes)) - return nil -} - // EnsureAllSessions creates all configured sessions that are not yet present in tmux. // It is intended to be called once at daemon startup before Run is launched. func (m *Manager) EnsureAllSessions() { diff --git a/internal/symlinks/shared.go b/internal/symlinks/shared.go deleted file mode 100644 index 7687fb0..0000000 --- a/internal/symlinks/shared.go +++ /dev/null @@ -1,165 +0,0 @@ -// Package symlinks manages the shared-state symlinks that every Claude -// account home must expose, so that account failover does not create state -// divergence (duplicated JSONL transcripts, broken undo history, drifted -// session env). -// -// Rationale -// -// Claude Code stores three directories whose content MUST be identical -// across the two configured accounts for failover to be a no-op: -// -// - projects/ — session JSONL transcripts (used by `claude --resume`) -// - session-env/ — per-session environment and working-dir metadata -// - file-history/ — undo/redo history persistence -// -// If account A writes under `~/.claude-compte1/projects/...` while account -// B later runs under `~/.claude-compte2/projects/...`, resume fails -// silently with "session not found" and the operator loses every in-flight -// conversation. -// -// Historically we fixed this by creating symlinks manually on the -// operator's VM. Any fresh deployment that forgets those links silently -// reintroduces the bug. This package encodes the convention in code: -// EnsureForAccount creates missing links, ValidateAll fails fast at -// startup when an account home is misconfigured. -package symlinks - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "strings" -) - -// DefaultSharedRoot is the directory under which the three shared targets -// live. All SharedSymlink.Target values default to a subdirectory of this -// root so tests can override the root without rewriting the shared list. -const DefaultSharedRoot = "/home/ubuntu" - -// SharedSymlink describes one required link inside a Claude account home. -// -// Target is the absolute path on disk that holds the real shared -// directory. Name is the basename of the link that must exist inside each -// account home (e.g. `session-env`, `file-history`, `projects`). -type SharedSymlink struct { - Target string - Name string -} - -// RequiredShared lists the three symlinks every Claude account home must -// expose. The list is package-level so integration tests can read it, but -// callers SHOULD prefer the EnsureForAccount / ValidateAll entry points -// that accept an override list for isolation. -var RequiredShared = []SharedSymlink{ - {Target: "/home/ubuntu/.claude-session-env-shared", Name: "session-env"}, - {Target: "/home/ubuntu/.claude-file-history-shared", Name: "file-history"}, - {Target: "/home/ubuntu/.claude-projects-shared", Name: "projects"}, -} - -// EnsureForAccount verifies (and creates if missing) every required shared -// symlink for a single account home. Behaviour: -// -// - If accountHome does not exist, it is created (mode 0700). -// - If Target (the shared destination) does not exist, it is created -// (mode 0700). Both accounts pointing at a non-existent target would -// produce two separate state trees on first write. -// - If the link is absent, it is created. -// - If the link is present and points at Target, nothing happens. -// - If the link is present but points elsewhere, an error is returned. -// We REFUSE to auto-correct a divergent link because fixing it blindly -// could delete user data: the "wrong" target may contain the only copy -// of the session transcripts. -// - If a regular file or directory exists where the link should be, -// an error is returned for the same reason. -func EnsureForAccount(accountHome string, required []SharedSymlink) error { - if accountHome == "" { - return errors.New("symlinks: accountHome is empty") - } - - if err := os.MkdirAll(accountHome, 0700); err != nil { - return fmt.Errorf("symlinks: create account home %q: %w", accountHome, err) - } - - for _, sl := range required { - if err := ensureTarget(sl.Target); err != nil { - return err - } - if err := ensureLink(accountHome, sl); err != nil { - return err - } - } - return nil -} - -// ValidateAll runs EnsureForAccount on every account home. It aggregates -// all errors and returns a single error with every failure inlined, so the -// operator sees the full picture at startup rather than fixing one link, -// restarting, hitting the next one, repeat. -func ValidateAll(accountHomes []string, required []SharedSymlink) error { - if len(accountHomes) == 0 { - return errors.New("symlinks: no account homes provided") - } - var errs []string - for _, home := range accountHomes { - if err := EnsureForAccount(home, required); err != nil { - errs = append(errs, err.Error()) - } - } - if len(errs) > 0 { - return fmt.Errorf("symlinks: validation failed for %d account home(s): %s", - len(errs), strings.Join(errs, "; ")) - } - return nil -} - -// ensureTarget creates Target as an empty directory when absent. -// An existing file (non-directory, non-symlink) at Target is an operator -// error we cannot resolve automatically. -func ensureTarget(target string) error { - info, err := os.Stat(target) - if err != nil { - if !os.IsNotExist(err) { - return fmt.Errorf("symlinks: stat shared target %q: %w", target, err) - } - if mkErr := os.MkdirAll(target, 0700); mkErr != nil { - return fmt.Errorf("symlinks: create shared target %q: %w", target, mkErr) - } - return nil - } - if !info.IsDir() { - return fmt.Errorf("symlinks: shared target %q is not a directory", target) - } - return nil -} - -// ensureLink reconciles one link entry inside accountHome. -func ensureLink(accountHome string, sl SharedSymlink) error { - linkPath := filepath.Join(accountHome, sl.Name) - - info, err := os.Lstat(linkPath) - if err != nil { - if os.IsNotExist(err) { - if linkErr := os.Symlink(sl.Target, linkPath); linkErr != nil { - return fmt.Errorf("symlinks: create %q → %q: %w", linkPath, sl.Target, linkErr) - } - return nil - } - return fmt.Errorf("symlinks: lstat %q: %w", linkPath, err) - } - - // Path exists — must be a symlink pointing at Target. - if info.Mode()&os.ModeSymlink == 0 { - return fmt.Errorf("symlinks: %q exists but is not a symlink (expected → %q)", - linkPath, sl.Target) - } - currentTarget, err := os.Readlink(linkPath) - if err != nil { - return fmt.Errorf("symlinks: readlink %q: %w", linkPath, err) - } - if currentTarget != sl.Target { - return fmt.Errorf("symlinks: divergent link at %q: points to %q, expected %q (refusing to auto-correct to avoid data loss)", - linkPath, currentTarget, sl.Target) - } - return nil -} diff --git a/internal/symlinks/shared_test.go b/internal/symlinks/shared_test.go deleted file mode 100644 index 64625d4..0000000 --- a/internal/symlinks/shared_test.go +++ /dev/null @@ -1,206 +0,0 @@ -package symlinks - -import ( - "os" - "path/filepath" - "strings" - "testing" -) - -// testRequired returns a SharedSymlink list whose Targets live entirely -// under tmpDir, so the tests never touch the operator's real home. -func testRequired(tmpDir string) []SharedSymlink { - return []SharedSymlink{ - {Target: filepath.Join(tmpDir, "session-env-shared"), Name: "session-env"}, - {Target: filepath.Join(tmpDir, "file-history-shared"), Name: "file-history"}, - {Target: filepath.Join(tmpDir, "projects-shared"), Name: "projects"}, - } -} - -func TestEnsureForAccount_missingCreatesLinksAndTargets(t *testing.T) { - tmp := t.TempDir() - home := filepath.Join(tmp, "account1") - req := testRequired(tmp) - - if err := EnsureForAccount(home, req); err != nil { - t.Fatalf("EnsureForAccount: %v", err) - } - - for _, sl := range req { - linkPath := filepath.Join(home, sl.Name) - info, err := os.Lstat(linkPath) - if err != nil { - t.Errorf("expected link at %s: %v", linkPath, err) - continue - } - if info.Mode()&os.ModeSymlink == 0 { - t.Errorf("%s exists but is not a symlink", linkPath) - } - got, err := os.Readlink(linkPath) - if err != nil { - t.Errorf("readlink %s: %v", linkPath, err) - continue - } - if got != sl.Target { - t.Errorf("link %s points to %q, want %q", linkPath, got, sl.Target) - } - // Target directory must exist too. - if st, err := os.Stat(sl.Target); err != nil || !st.IsDir() { - t.Errorf("target %s should be a directory, err=%v", sl.Target, err) - } - } -} - -func TestEnsureForAccount_idempotent(t *testing.T) { - tmp := t.TempDir() - home := filepath.Join(tmp, "account1") - req := testRequired(tmp) - - if err := EnsureForAccount(home, req); err != nil { - t.Fatalf("first pass: %v", err) - } - if err := EnsureForAccount(home, req); err != nil { - t.Fatalf("second pass should be a no-op, got: %v", err) - } -} - -func TestEnsureForAccount_divergentLinkReturnsError(t *testing.T) { - tmp := t.TempDir() - home := filepath.Join(tmp, "account1") - req := testRequired(tmp) - - // Pre-create a wrong symlink for "projects". - if err := os.MkdirAll(home, 0700); err != nil { - t.Fatalf("mkdir home: %v", err) - } - wrongTarget := filepath.Join(tmp, "someone-elses-dir") - if err := os.MkdirAll(wrongTarget, 0700); err != nil { - t.Fatalf("mkdir wrong target: %v", err) - } - linkPath := filepath.Join(home, "projects") - if err := os.Symlink(wrongTarget, linkPath); err != nil { - t.Fatalf("seed wrong symlink: %v", err) - } - - err := EnsureForAccount(home, req) - if err == nil { - t.Fatal("expected error for divergent link, got nil") - } - if !strings.Contains(err.Error(), "divergent") { - t.Errorf("error should mention 'divergent': %v", err) - } - - // The wrong symlink MUST be preserved (no auto-correction). - got, err := os.Readlink(linkPath) - if err != nil { - t.Fatalf("readlink after error: %v", err) - } - if got != wrongTarget { - t.Errorf("divergent link was mutated: now %q, want preserved %q", got, wrongTarget) - } -} - -func TestEnsureForAccount_regularFileInsteadOfLinkFails(t *testing.T) { - tmp := t.TempDir() - home := filepath.Join(tmp, "account1") - req := testRequired(tmp) - - if err := os.MkdirAll(home, 0700); err != nil { - t.Fatalf("mkdir home: %v", err) - } - // Create a regular file at the session-env path. - bogus := filepath.Join(home, "session-env") - if err := os.WriteFile(bogus, []byte("oops"), 0600); err != nil { - t.Fatalf("seed regular file: %v", err) - } - - err := EnsureForAccount(home, req) - if err == nil { - t.Fatal("expected error for regular-file-at-link-path, got nil") - } - if !strings.Contains(err.Error(), "not a symlink") { - t.Errorf("error should mention 'not a symlink': %v", err) - } -} - -func TestEnsureForAccount_emptyHomeReturnsError(t *testing.T) { - if err := EnsureForAccount("", nil); err == nil { - t.Fatal("expected error for empty home, got nil") - } -} - -func TestValidateAll_multipleAccountsAllOK(t *testing.T) { - tmp := t.TempDir() - req := testRequired(tmp) - homes := []string{ - filepath.Join(tmp, "a"), - filepath.Join(tmp, "b"), - } - if err := ValidateAll(homes, req); err != nil { - t.Fatalf("ValidateAll: %v", err) - } -} - -func TestValidateAll_aggregatesErrors(t *testing.T) { - tmp := t.TempDir() - req := testRequired(tmp) - homes := []string{ - filepath.Join(tmp, "a"), - filepath.Join(tmp, "b"), - } - - // Pre-seed account `a` with a divergent link so ValidateAll must - // surface that error while still processing account `b`. - if err := os.MkdirAll(homes[0], 0700); err != nil { - t.Fatalf("mkdir a: %v", err) - } - wrongTarget := filepath.Join(tmp, "bad") - if err := os.MkdirAll(wrongTarget, 0700); err != nil { - t.Fatalf("mkdir bad: %v", err) - } - if err := os.Symlink(wrongTarget, filepath.Join(homes[0], "projects")); err != nil { - t.Fatalf("seed wrong link: %v", err) - } - - err := ValidateAll(homes, req) - if err == nil { - t.Fatal("expected aggregated error, got nil") - } - if !strings.Contains(err.Error(), "divergent") { - t.Errorf("should surface divergent: %v", err) - } - - // Account `b` must have been configured successfully even though `a` - // failed. Otherwise the operator cannot see the full state at once. - for _, sl := range req { - if _, err := os.Lstat(filepath.Join(homes[1], sl.Name)); err != nil { - t.Errorf("account b link %s should have been created despite a's failure: %v", sl.Name, err) - } - } -} - -func TestValidateAll_emptyListReturnsError(t *testing.T) { - if err := ValidateAll(nil, nil); err == nil { - t.Fatal("expected error for empty account list") - } -} - -// TestRequiredShared_defaultsAreReasonable pins the default SharedSymlink -// list so an accidental edit that breaks production is caught. -func TestRequiredShared_defaultsAreReasonable(t *testing.T) { - want := map[string]string{ - "session-env": "/home/ubuntu/.claude-session-env-shared", - "file-history": "/home/ubuntu/.claude-file-history-shared", - "projects": "/home/ubuntu/.claude-projects-shared", - } - if len(RequiredShared) != len(want) { - t.Fatalf("RequiredShared has %d entries, want %d", len(RequiredShared), len(want)) - } - for _, sl := range RequiredShared { - if got, ok := want[sl.Name]; !ok { - t.Errorf("unexpected RequiredShared entry %q", sl.Name) - } else if got != sl.Target { - t.Errorf("RequiredShared %q target = %q, want %q", sl.Name, sl.Target, got) - } - } -}