Phase 1 / A3 — EnsureSharedSymlinks après flipSymlink dans switcher #2

Open
olivier wants to merge 2 commits from feat/phase1-A3-switcher-symlinks into feat/phase1-A-failover-robust
3 changed files with 204 additions and 4 deletions
Showing only changes of commit 8eaf0bbd35 - Show all commits

View file

@ -1,4 +1,48 @@
# Version actuelle : 0.3.6 # Version actuelle : 0.3.7
## [0.3.7] - 2026-04-16
**Type:** Patch — Phase 1 / Chantier A3 : wire EnsureForAccount post-flip
### Ajouté
- `AccountSwitcher.executeSwitch` appelle désormais
`symlinks.EnsureForAccount(target.Home, ...)` **juste après** le flip
du lien principal `~/.claude`. Garantit que les 3 liens partagés
(`session-env`, `file-history`, `projects`) existent et pointent aux
bons targets sur le compte cible, même si celui-ci vient juste
d'être provisionné.
- `AccountSwitcher.sharedSymlinks` : override test-only (accepte une
liste `[]symlinks.SharedSymlink`). Défaut = `symlinks.RequiredShared`.
Les tests peuvent scoper la réconciliation dans un `t.TempDir()` pour
ne jamais toucher `/home/ubuntu/.claude-*-shared`.
- 2 tests unitaires :
- `TestFlipReconcilesSharedSymlinksOnTargetHome` : target home vide →
les 3 liens sont créés après le flip et pointent aux targets canoniques.
- `TestFlipEnsureSymlinksFailureDoesNotAbortSwap` : lien divergent
planté à la main → `EnsureForAccount` renvoie une erreur, logguée
en WARN, mais le swap complète quand même (best-effort post-flip).
### Rationale
- Sans cet appel, un compte cible fraîchement provisionné n'aurait
pas encore ses 3 liens ; au premier `claude --resume`, Claude Code
écrirait dans `~/.claude/projects/` (privé) au lieu de
`/home/ubuntu/.claude-projects-shared` → transcripts dupliqués,
undo désynchronisé, resume silencieusement cassé.
- L'ensure est **best-effort** : une erreur est logguée en WARN mais
NE bloque PAS le flip. Si on abortait ici, on laisserait le daemon
dans un état incohérent (symlink déjà flippé mais `SetActiveAccount`
pas appelé).
- L'opérateur voit le WARN dans les logs et peut corriger la
divergence manuellement (ex: lien pointant sur le mauvais target).
### Tests
- ✅ `go test ./...` : tous les packages PASS (incluant
`internal/switcher` et `internal/symlinks`).
- ✅ `go test -race ./internal/switcher/...` : PASS.
- ✅ `go vet ./...` : clean.
### Fichiers modifiés
- `internal/switcher/account_switcher.go`
- `internal/switcher/account_switcher_test.go`
## [0.3.6] - 2026-04-16 ## [0.3.6] - 2026-04-16
**Type:** Patch — Phase 1 / Chantier A2 : validation des symlinks au startup **Type:** Patch — Phase 1 / Chantier A2 : validation des symlinks au startup

View file

@ -17,6 +17,7 @@ import (
"forge.secuaas.ovh/olivier/claude-failover/internal/notify" "forge.secuaas.ovh/olivier/claude-failover/internal/notify"
"forge.secuaas.ovh/olivier/claude-failover/internal/quota" "forge.secuaas.ovh/olivier/claude-failover/internal/quota"
"forge.secuaas.ovh/olivier/claude-failover/internal/state" "forge.secuaas.ovh/olivier/claude-failover/internal/state"
"forge.secuaas.ovh/olivier/claude-failover/internal/symlinks"
"forge.secuaas.ovh/olivier/claude-failover/internal/tmux" "forge.secuaas.ovh/olivier/claude-failover/internal/tmux"
) )
@ -52,6 +53,11 @@ type AccountSwitcher struct {
// homeDir is the directory containing the .claude symlink. Overridable for tests. // homeDir is the directory containing the .claude symlink. Overridable for tests.
// When empty, os.UserHomeDir() is used. // When empty, os.UserHomeDir() is used.
homeDir string homeDir string
// sharedSymlinks is the list of shared-state links reconciled on the
// target account home after every flip. Overridable for tests so the
// suite never touches the operator's real /home/ubuntu/.claude-*
// shared directories. When nil, symlinks.RequiredShared is used.
sharedSymlinks []symlinks.SharedSymlink
} }
// New creates an AccountSwitcher. // New creates an AccountSwitcher.
@ -110,6 +116,16 @@ func (a *AccountSwitcher) executeSwitch(req quota.SwitchRequest) {
if err := a.flipSymlink(target.Home); err != nil { if err := a.flipSymlink(target.Home); err != nil {
a.logger.Printf("[switcher] flipSymlink error: %v", err) a.logger.Printf("[switcher] flipSymlink error: %v", err)
} }
// Best-effort: make sure the target account home exposes the three
// shared-state symlinks (session-env, file-history, projects). The main
// ~/.claude flip is already done, so an error here must NOT abort the
// swap — we just log it so the operator can investigate. Without this
// call, a fresh target account with no shared links would silently
// start writing into private /projects/session-env/file-history dirs
// and diverge from the primary account's transcripts.
if err := symlinks.EnsureForAccount(target.Home, a.requiredShared()); err != nil {
a.logger.Printf("[switcher] WARN ensure shared symlinks for %q: %v", target.Home, err)
}
a.killAllPoolSessions() a.killAllPoolSessions()
a.recreatePoolSessions() a.recreatePoolSessions()
a.relaunchDedicatedSessions(target.Home) a.relaunchDedicatedSessions(target.Home)
@ -225,6 +241,16 @@ func (a *AccountSwitcher) saveAllSessions() {
}) })
} }
// requiredShared returns the shared-symlink list used when reconciling the
// target account home after a flip. Tests may set a.sharedSymlinks to a
// tmpdir-scoped list so they never touch /home/ubuntu/.claude-*-shared.
func (a *AccountSwitcher) requiredShared() []symlinks.SharedSymlink {
if a.sharedSymlinks != nil {
return a.sharedSymlinks
}
return symlinks.RequiredShared
}
// resolveHomeDir returns the configured homeDir (test override) or the real // resolveHomeDir returns the configured homeDir (test override) or the real
// user home. Tests MUST set a.homeDir to a tmpdir to avoid clobbering the // user home. Tests MUST set a.homeDir to a tmpdir to avoid clobbering the
// production ~/.claude symlink. // production ~/.claude symlink.

View file

@ -1,6 +1,8 @@
package switcher package switcher
import ( import (
"os"
"path/filepath"
"strings" "strings"
"testing" "testing"
"time" "time"
@ -8,8 +10,19 @@ import (
"forge.secuaas.ovh/olivier/claude-failover/internal/config" "forge.secuaas.ovh/olivier/claude-failover/internal/config"
"forge.secuaas.ovh/olivier/claude-failover/internal/quota" "forge.secuaas.ovh/olivier/claude-failover/internal/quota"
"forge.secuaas.ovh/olivier/claude-failover/internal/state" "forge.secuaas.ovh/olivier/claude-failover/internal/state"
"forge.secuaas.ovh/olivier/claude-failover/internal/symlinks"
) )
// tmpShared returns a SharedSymlink list whose targets live entirely under
// tmpDir, so switcher tests never touch /home/ubuntu/.claude-*-shared.
func tmpShared(tmpDir string) []symlinks.SharedSymlink {
return []symlinks.SharedSymlink{
{Target: filepath.Join(tmpDir, "session-env-shared"), Name: "session-env"},
{Target: filepath.Join(tmpDir, "file-history-shared"), Name: "file-history"},
{Target: filepath.Join(tmpDir, "projects-shared"), Name: "projects"},
}
}
// mockTmux for switcher tests. // mockTmux for switcher tests.
type mockTmux struct { type mockTmux struct {
sessions map[string]bool sessions map[string]bool
@ -143,6 +156,9 @@ func TestKillAndRecreatePoolSessions(t *testing.T) {
// touches the real ~/.claude (regression: a reboot used to leave Claude // touches the real ~/.claude (regression: a reboot used to leave Claude
// Code unusable because the test had repointed ~/.claude to /tmp/...). // Code unusable because the test had repointed ~/.claude to /tmp/...).
a.homeDir = t.TempDir() a.homeDir = t.TempDir()
// Scope shared-symlink targets to a tmpdir so the post-flip ensure
// pass does not write inside /home/ubuntu/.claude-*-shared.
a.sharedSymlinks = tmpShared(t.TempDir())
a.executeSwitch(quota.SwitchRequest{From: "compte1"}) a.executeSwitch(quota.SwitchRequest{From: "compte1"})
// Active account must have changed. // Active account must have changed.
@ -186,10 +202,12 @@ func TestDedicatedRelaunchAfterSwap(t *testing.T) {
s := state.New("") s := state.New("")
s.SetActiveAccount("compte1") s.SetActiveAccount("compte1")
home1 := filepath.Join(t.TempDir(), "claude-1-xxxx")
home2 := filepath.Join(t.TempDir(), "claude-2-xxxx")
cfg := &config.Config{ cfg := &config.Config{
Accounts: []config.AccountConfig{ Accounts: []config.AccountConfig{
{Name: "compte1", Home: "/tmp/claude-1-xxxx"}, {Name: "compte1", Home: home1},
{Name: "compte2", Home: "/tmp/claude-2-xxxx"}, {Name: "compte2", Home: home2},
}, },
Pool: config.PoolConfig{ Pool: config.PoolConfig{
Dedicated: []config.DedicatedSession{{Name: "dedicated-1", Project: "/tmp"}}, Dedicated: []config.DedicatedSession{{Name: "dedicated-1", Project: "/tmp"}},
@ -199,6 +217,7 @@ func TestDedicatedRelaunchAfterSwap(t *testing.T) {
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil) a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
a.homeDir = t.TempDir() a.homeDir = t.TempDir()
a.sharedSymlinks = tmpShared(t.TempDir())
a.executeSwitch(quota.SwitchRequest{From: "compte1"}) a.executeSwitch(quota.SwitchRequest{From: "compte1"})
// The relaunch must send a resume command on the dedicated session, // The relaunch must send a resume command on the dedicated session,
@ -213,10 +232,121 @@ func TestDedicatedRelaunchAfterSwap(t *testing.T) {
if relaunch == "" { if relaunch == "" {
t.Fatalf("expected dedicated-1 relaunch send-keys; got %v", tc.sendKeyCalls) t.Fatalf("expected dedicated-1 relaunch send-keys; got %v", tc.sendKeyCalls)
} }
if !strings.Contains(relaunch, "CLAUDE_CONFIG_DIR=/tmp/claude-2-xxxx") { if !strings.Contains(relaunch, "CLAUDE_CONFIG_DIR="+home2) {
t.Errorf("relaunch should set CLAUDE_CONFIG_DIR to target home; got %q", relaunch) t.Errorf("relaunch should set CLAUDE_CONFIG_DIR to target home; got %q", relaunch)
} }
if !strings.Contains(relaunch, "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") { if !strings.Contains(relaunch, "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") {
t.Errorf("relaunch should include captured UUID; got %q", relaunch) t.Errorf("relaunch should include captured UUID; got %q", relaunch)
} }
} }
// TestFlipReconcilesSharedSymlinksOnTargetHome verifies A3: after the main
// ~/.claude flip, the switcher reconciles the three shared-state symlinks
// (session-env / file-history / projects) on the TARGET account home.
// Scenario: the target home has NO links yet — a freshly-provisioned account
// that has never been flipped into. Post-switch, all three links must exist
// inside the target home and point at the canonical shared targets.
func TestFlipReconcilesSharedSymlinksOnTargetHome(t *testing.T) {
tc := newMockTmux()
s := state.New("")
s.SetActiveAccount("compte1")
// Target home starts empty: EnsureForAccount will mkdir + create links.
targetHome := filepath.Join(t.TempDir(), "claude-compte2")
cfg := &config.Config{
Accounts: []config.AccountConfig{
{Name: "compte1", Home: filepath.Join(t.TempDir(), "claude-compte1")},
{Name: "compte2", Home: targetHome},
},
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Min: 0, Max: 0},
},
}
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
a.homeDir = t.TempDir()
shared := tmpShared(t.TempDir())
a.sharedSymlinks = shared
// Pre-assert: no link exists in targetHome.
for _, sl := range shared {
if _, err := os.Lstat(filepath.Join(targetHome, sl.Name)); !os.IsNotExist(err) {
t.Fatalf("pre-condition: %q should not exist yet (err=%v)", sl.Name, err)
}
}
a.executeSwitch(quota.SwitchRequest{From: "compte1"})
// Post-assert: every required link exists and points at the canonical
// target under the tmpdir-scoped shared root.
for _, sl := range shared {
linkPath := filepath.Join(targetHome, sl.Name)
info, err := os.Lstat(linkPath)
if err != nil {
t.Errorf("expected link at %s after flip: %v", linkPath, err)
continue
}
if info.Mode()&os.ModeSymlink == 0 {
t.Errorf("%s exists but is not a symlink", linkPath)
continue
}
got, err := os.Readlink(linkPath)
if err != nil {
t.Errorf("readlink %s: %v", linkPath, err)
continue
}
if got != sl.Target {
t.Errorf("link %s points to %q, want %q", linkPath, got, sl.Target)
}
}
}
// TestFlipEnsureSymlinksFailureDoesNotAbortSwap verifies A3 best-effort:
// if EnsureForAccount returns an error (here: a divergent pre-existing link
// that the symlinks package refuses to auto-correct), the flip and the swap
// MUST still complete. The shared symlink reconcile is post-flip cleanup,
// not a gate on the failover itself — aborting here would leave the daemon
// in an inconsistent state (symlink flipped but active account not updated).
func TestFlipEnsureSymlinksFailureDoesNotAbortSwap(t *testing.T) {
tc := newMockTmux()
s := state.New("")
s.SetActiveAccount("compte1")
targetHome := filepath.Join(t.TempDir(), "claude-compte2")
if err := os.MkdirAll(targetHome, 0700); err != nil {
t.Fatalf("mkdir target home: %v", err)
}
// Plant a divergent link at <targetHome>/session-env. The symlinks
// package refuses to auto-correct this (data-loss safeguard) and will
// return an error — which the switcher must swallow with a WARN log.
bogus := filepath.Join(t.TempDir(), "somewhere-else")
if err := os.MkdirAll(bogus, 0700); err != nil {
t.Fatalf("mkdir bogus: %v", err)
}
if err := os.Symlink(bogus, filepath.Join(targetHome, "session-env")); err != nil {
t.Fatalf("plant divergent link: %v", err)
}
cfg := &config.Config{
Accounts: []config.AccountConfig{
{Name: "compte1", Home: filepath.Join(t.TempDir(), "claude-compte1")},
{Name: "compte2", Home: targetHome},
},
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Min: 0, Max: 0},
},
}
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
a.homeDir = t.TempDir()
a.sharedSymlinks = tmpShared(t.TempDir())
a.executeSwitch(quota.SwitchRequest{From: "compte1"})
// The swap must have completed despite the divergent-link error.
if got := s.ActiveAccount(); got != "compte2" {
t.Errorf("swap should complete even when ensure fails; active=%q want compte2", got)
}
}