fix(switcher+symlinks): rollback on ensure failure (Bug #1) + requiredShared contract test (Bug #10)
Bug #1 (CRITIQUE) — A3 flip+ensure inconsistency - Before: EnsureForAccount failure after flip was WARN-only, SetActiveAccount still fired → daemon declared target active while shared symlinks were absent/divergent → transcripts silently duplicated, resume broken. - After: ensure failure triggers rollback flip to previous account home; if rollback succeeds → explicit error, ActiveAccount stays on previous. If rollback ALSO fails → sticky partialSwap flag + ErrPartialSwap; all further swaps refused until operator intervention (daemon restart). - New public IsPartialSwap() for watchdog / health-check integration. Bug #10 (MOYENNE) — requiredShared contract never exercised - All existing tests override a.sharedSymlinks with tmpdir-scoped lists, so symlinks.RequiredShared itself was never tested. A rename or drop would pass every test but silently break prod failover. - TestRequiredSharedIsCoherent asserts (no filesystem): 3 entries with the exact required names, absolute targets, and a single shared parent directory (invariant EnsureForAccount depends on). Tests: - go test ./... PASS - go test -race ./... PASS (no data race) - 2 new switcher tests: TestFlipEnsureFailureTriggersRollback, TestFlipEnsureAndRollbackFailure - 1 new symlinks test: TestRequiredSharedIsCoherent - 1 obsolete test replaced: TestFlipEnsureSymlinksFailureDoesNotAbortSwap (encoded the old buggy best-effort behaviour)
This commit is contained in:
parent
8eaf0bbd35
commit
20063b1939
4 changed files with 356 additions and 24 deletions
|
|
@ -1,6 +1,7 @@
|
|||
package switcher
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
|
@ -302,25 +303,34 @@ func TestFlipReconcilesSharedSymlinksOnTargetHome(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// TestFlipEnsureSymlinksFailureDoesNotAbortSwap verifies A3 best-effort:
|
||||
// if EnsureForAccount returns an error (here: a divergent pre-existing link
|
||||
// that the symlinks package refuses to auto-correct), the flip and the swap
|
||||
// MUST still complete. The shared symlink reconcile is post-flip cleanup,
|
||||
// not a gate on the failover itself — aborting here would leave the daemon
|
||||
// in an inconsistent state (symlink flipped but active account not updated).
|
||||
func TestFlipEnsureSymlinksFailureDoesNotAbortSwap(t *testing.T) {
|
||||
// TestFlipEnsureFailureTriggersRollback verifies the fix for the A3 bug
|
||||
// (flip+ensure inconsistency): if EnsureForAccount fails on the target home
|
||||
// after the ~/.claude flip, the switcher MUST NOT mark the target account
|
||||
// active. It must instead roll back the ~/.claude symlink to the previous
|
||||
// account's home, leaving the daemon in the pre-swap state so subsequent
|
||||
// session work keeps writing to the known-good shared state.
|
||||
//
|
||||
// Old (buggy) behaviour: ensure error was WARN-only, SetActiveAccount still
|
||||
// happened, dedicated sessions were relaunched against a target whose
|
||||
// /projects, /session-env, /file-history were missing or divergent →
|
||||
// transcripts duplicated silently, resume broke, undo history diverged.
|
||||
func TestFlipEnsureFailureTriggersRollback(t *testing.T) {
|
||||
tc := newMockTmux()
|
||||
|
||||
s := state.New("")
|
||||
s.SetActiveAccount("compte1")
|
||||
|
||||
previousHome := filepath.Join(t.TempDir(), "claude-compte1")
|
||||
targetHome := filepath.Join(t.TempDir(), "claude-compte2")
|
||||
if err := os.MkdirAll(previousHome, 0700); err != nil {
|
||||
t.Fatalf("mkdir previous home: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(targetHome, 0700); err != nil {
|
||||
t.Fatalf("mkdir target home: %v", err)
|
||||
}
|
||||
// Plant a divergent link at <targetHome>/session-env. The symlinks
|
||||
// package refuses to auto-correct this (data-loss safeguard) and will
|
||||
// return an error — which the switcher must swallow with a WARN log.
|
||||
// return an error, which must now trigger a rollback.
|
||||
bogus := filepath.Join(t.TempDir(), "somewhere-else")
|
||||
if err := os.MkdirAll(bogus, 0700); err != nil {
|
||||
t.Fatalf("mkdir bogus: %v", err)
|
||||
|
|
@ -331,7 +341,7 @@ func TestFlipEnsureSymlinksFailureDoesNotAbortSwap(t *testing.T) {
|
|||
|
||||
cfg := &config.Config{
|
||||
Accounts: []config.AccountConfig{
|
||||
{Name: "compte1", Home: filepath.Join(t.TempDir(), "claude-compte1")},
|
||||
{Name: "compte1", Home: previousHome},
|
||||
{Name: "compte2", Home: targetHome},
|
||||
},
|
||||
Pool: config.PoolConfig{
|
||||
|
|
@ -340,13 +350,108 @@ func TestFlipEnsureSymlinksFailureDoesNotAbortSwap(t *testing.T) {
|
|||
}
|
||||
|
||||
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
|
||||
a.homeDir = t.TempDir()
|
||||
homeDir := t.TempDir()
|
||||
a.homeDir = homeDir
|
||||
a.sharedSymlinks = tmpShared(t.TempDir())
|
||||
|
||||
a.executeSwitch(quota.SwitchRequest{From: "compte1"})
|
||||
err := a.executeSwitchE(quota.SwitchRequest{From: "compte1"})
|
||||
if err == nil {
|
||||
t.Fatalf("executeSwitchE: expected cancellation error, got nil")
|
||||
}
|
||||
// The public symmetric swap-cancelled error must mention ensure and
|
||||
// wrap the underlying symlinks package message. ErrPartialSwap must
|
||||
// NOT be set (rollback succeeded → recoverable condition).
|
||||
if errors.Is(err, ErrPartialSwap) {
|
||||
t.Errorf("did not expect ErrPartialSwap; rollback succeeded; got %v", err)
|
||||
}
|
||||
if a.IsPartialSwap() {
|
||||
t.Errorf("IsPartialSwap should be false when rollback succeeds")
|
||||
}
|
||||
|
||||
// The swap must have completed despite the divergent-link error.
|
||||
if got := s.ActiveAccount(); got != "compte2" {
|
||||
t.Errorf("swap should complete even when ensure fails; active=%q want compte2", got)
|
||||
// Active account must remain the previous one — SetActiveAccount must
|
||||
// NOT have been called.
|
||||
if got := s.ActiveAccount(); got != "compte1" {
|
||||
t.Errorf("active account should stay compte1 after rollback; got %q", got)
|
||||
}
|
||||
|
||||
// ~/.claude must now point at the previous home (rollback target).
|
||||
link, rlErr := os.Readlink(filepath.Join(homeDir, ".claude"))
|
||||
if rlErr != nil {
|
||||
t.Fatalf("readlink ~/.claude: %v", rlErr)
|
||||
}
|
||||
if link != previousHome {
|
||||
t.Errorf("~/.claude should point at previous home %q after rollback; got %q", previousHome, link)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFlipEnsureAndRollbackFailure verifies that when BOTH EnsureForAccount
|
||||
// AND the rollback flip fail, the switcher sets the sticky partial-swap
|
||||
// flag and returns ErrPartialSwap. The daemon is then in a documented
|
||||
// degraded state where any further swap is refused until the operator
|
||||
// restarts it.
|
||||
func TestFlipEnsureAndRollbackFailure(t *testing.T) {
|
||||
tc := newMockTmux()
|
||||
|
||||
s := state.New("")
|
||||
s.SetActiveAccount("compte1")
|
||||
|
||||
previousHome := filepath.Join(t.TempDir(), "claude-compte1")
|
||||
targetHome := filepath.Join(t.TempDir(), "claude-compte2")
|
||||
if err := os.MkdirAll(previousHome, 0700); err != nil {
|
||||
t.Fatalf("mkdir previous home: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(targetHome, 0700); err != nil {
|
||||
t.Fatalf("mkdir target home: %v", err)
|
||||
}
|
||||
// Plant the divergent link that will cause EnsureForAccount to fail.
|
||||
bogus := filepath.Join(t.TempDir(), "somewhere-else")
|
||||
if err := os.MkdirAll(bogus, 0700); err != nil {
|
||||
t.Fatalf("mkdir bogus: %v", err)
|
||||
}
|
||||
if err := os.Symlink(bogus, filepath.Join(targetHome, "session-env")); err != nil {
|
||||
t.Fatalf("plant divergent link: %v", err)
|
||||
}
|
||||
|
||||
cfg := &config.Config{
|
||||
Accounts: []config.AccountConfig{
|
||||
{Name: "compte1", Home: previousHome},
|
||||
{Name: "compte2", Home: targetHome},
|
||||
},
|
||||
Pool: config.PoolConfig{
|
||||
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Min: 0, Max: 0},
|
||||
},
|
||||
}
|
||||
|
||||
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
|
||||
|
||||
// Force the rollback flip to fail: point homeDir at a file that cannot
|
||||
// host a .claude symlink. We use a regular file; the flipSymlink
|
||||
// implementation does os.Remove() then os.Symlink() under homeDir,
|
||||
// which fails when homeDir is itself a file (ENOTDIR).
|
||||
badHomeFile := filepath.Join(t.TempDir(), "not-a-dir")
|
||||
if err := os.WriteFile(badHomeFile, []byte("block"), 0600); err != nil {
|
||||
t.Fatalf("write bad home: %v", err)
|
||||
}
|
||||
a.homeDir = badHomeFile
|
||||
a.sharedSymlinks = tmpShared(t.TempDir())
|
||||
|
||||
err := a.executeSwitchE(quota.SwitchRequest{From: "compte1"})
|
||||
if err == nil {
|
||||
t.Fatalf("expected ErrPartialSwap, got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrPartialSwap) {
|
||||
t.Errorf("expected ErrPartialSwap, got %v", err)
|
||||
}
|
||||
if !a.IsPartialSwap() {
|
||||
t.Errorf("IsPartialSwap should be true when both ensure AND rollback fail")
|
||||
}
|
||||
// SetActiveAccount must still not have been called.
|
||||
if got := s.ActiveAccount(); got != "compte1" {
|
||||
t.Errorf("active account must stay compte1 in partial-swap; got %q", got)
|
||||
}
|
||||
|
||||
// A subsequent swap attempt must be refused while the flag is set.
|
||||
if err2 := a.executeSwitchE(quota.SwitchRequest{From: "compte1"}); err2 == nil {
|
||||
t.Errorf("expected subsequent swap to be refused in degraded state")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue