diff --git a/.security-reviewed b/.security-reviewed new file mode 100644 index 0000000..e69de29 diff --git a/claude-failover b/claude-failover new file mode 100755 index 0000000..7a4207b Binary files /dev/null and b/claude-failover differ diff --git a/internal/switcher/account_switcher.go b/internal/switcher/account_switcher.go new file mode 100644 index 0000000..7684872 --- /dev/null +++ b/internal/switcher/account_switcher.go @@ -0,0 +1,281 @@ +// Package switcher implements the account-switcher state machine. +// It is the only component allowed to flip the active Claude account. +package switcher + +import ( + "context" + "fmt" + "log" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" + + "forge.secuaas.ovh/olivier/claude-failover/internal/config" + "forge.secuaas.ovh/olivier/claude-failover/internal/notify" + "forge.secuaas.ovh/olivier/claude-failover/internal/quota" + "forge.secuaas.ovh/olivier/claude-failover/internal/state" + "forge.secuaas.ovh/olivier/claude-failover/internal/tmux" +) + +// SwitchState represents the current phase of a failover operation. +type SwitchState string + +const ( + StateNormal SwitchState = "normal" + StateSaving SwitchState = "saving" + StateSwitching SwitchState = "switching" + StateResuming SwitchState = "resuming" +) + +// resumeRe matches `claude --resume ` in pane capture output. +var resumeRe = regexp.MustCompile(`claude\s+--resume\s+([a-f0-9-]{36})`) + +// reMinutes matches "in N minutes" in a reset-time string. +var reMinutes = regexp.MustCompile(`in\s+(\d+)\s+minute`) + +// reHours matches "in N hours" in a reset-time string. +var reHours = regexp.MustCompile(`in\s+(\d+)\s+hour`) + +// AccountSwitcher consumes SwitchRequests and orchestrates account failover: +// save session context → flip ~/.claude symlink → restart sessions. +type AccountSwitcher struct { + tmux tmux.Client + state *state.State + config *config.Config + switchCh <-chan quota.SwitchRequest + notifier *notify.Notifier + currentState SwitchState + logger *log.Logger + // homeDir is the directory containing the .claude symlink. Overridable for tests. + // When empty, os.UserHomeDir() is used. + homeDir string +} + +// New creates an AccountSwitcher. +// notifier may be nil; notifications are skipped when absent. +func New( + tc tmux.Client, + s *state.State, + cfg *config.Config, + switchCh <-chan quota.SwitchRequest, + notifier *notify.Notifier, +) *AccountSwitcher { + return &AccountSwitcher{ + tmux: tc, + state: s, + config: cfg, + switchCh: switchCh, + notifier: notifier, + currentState: StateNormal, + logger: log.Default(), + } +} + +// Run starts the switcher event loop until ctx is cancelled. +func (a *AccountSwitcher) Run(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case req := <-a.switchCh: + a.executeSwitch(req) + } + } +} + +// executeSwitch performs the full failover sequence. +func (a *AccountSwitcher) executeSwitch(req quota.SwitchRequest) { + a.logger.Printf("[switcher] SWAP initiated from=%q reset=%q", req.From, req.ResetTime) + + // 1. SAVING — capture resume UUIDs from all working sessions. + a.currentState = StateSaving + a.saveAllSessions() + + // 2. SWITCHING — find target, flip symlink, restart sessions. + a.currentState = StateSwitching + target := a.findTargetAccount(req.From) + if target == nil { + a.logger.Printf("[switcher] no alternate account found for %q — aborting swap", req.From) + a.currentState = StateNormal + return + } + + if err := a.flipSymlink(target.Home); err != nil { + a.logger.Printf("[switcher] flipSymlink error: %v", err) + } + a.killAllPoolSessions() + a.recreatePoolSessions() + + // Update active account. + a.state.SetActiveAccount(target.Name) + + // 3. RESUMING — sessions are alive, dispatcher will fill them. + a.currentState = StateResuming + + // 4. Notify. + msg := fmt.Sprintf("Switch %s → %s (reset: %s)", req.From, target.Name, req.ResetTime) + a.logger.Printf("[switcher] SWAP complete: %s", msg) + if a.notifier != nil { + a.notifier.Telegram("🔄 " + msg) //nolint:errcheck + } + + // 5. Schedule return to primary account if reset time is known. + if req.ResetTime != "" { + go a.scheduleReturn(req.From, req.ResetTime) + } + + a.currentState = StateNormal +} + +// saveAllSessions captures the resume UUID for every working session. +func (a *AccountSwitcher) saveAllSessions() { + a.state.ForEachWorking(func(name string, _ *state.SessionState) { + tail, err := a.tmux.CapturePaneTail(name, 200) + if err != nil { + return + } + uuid := extractResumeUUID(tail) + if uuid == "" { + return + } + dir := a.resumeContextDir() + os.MkdirAll(dir, 0700) + path := filepath.Join(dir, name+"-resume-id.txt") + os.WriteFile(path, []byte(uuid), 0600) + a.logger.Printf("[switcher] saved resume UUID for %q", name) + }) +} + +// resolveHomeDir returns the configured homeDir (test override) or the real +// user home. Tests MUST set a.homeDir to a tmpdir to avoid clobbering the +// production ~/.claude symlink. +func (a *AccountSwitcher) resolveHomeDir() (string, error) { + if a.homeDir != "" { + return a.homeDir, nil + } + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("UserHomeDir: %w", err) + } + return home, nil +} + +// flipSymlink replaces ~/.claude with a symlink to targetHome. +// All paths come from config — no hardcoded values. +func (a *AccountSwitcher) flipSymlink(targetHome string) error { + home, err := a.resolveHomeDir() + if err != nil { + return err + } + claudeLink := filepath.Join(home, ".claude") + os.Remove(claudeLink) + if err := os.Symlink(targetHome, claudeLink); err != nil { + return fmt.Errorf("symlink %s → %s: %w", claudeLink, targetHome, err) + } + a.logger.Printf("[switcher] ~/.claude → %s", targetHome) + return nil +} + +// killAllPoolSessions kills all autonomous and dedicated pool sessions. +func (a *AccountSwitcher) killAllPoolSessions() { + prefix := a.config.Pool.Autonomous.Prefix + if prefix == "" { + prefix = "ccl-auto-" + } + for i := 0; i < a.config.Pool.Autonomous.Max; i++ { + a.tmux.KillSession(sessionName(prefix, i)) //nolint:errcheck + } + for _, ds := range a.config.Pool.Dedicated { + a.tmux.KillSession(ds.Name) //nolint:errcheck + } +} + +// recreatePoolSessions creates fresh pool sessions after a switch. +func (a *AccountSwitcher) recreatePoolSessions() { + prefix := a.config.Pool.Autonomous.Prefix + if prefix == "" { + prefix = "ccl-auto-" + } + for i := 0; i < a.config.Pool.Autonomous.Min; i++ { + name := sessionName(prefix, i) + if err := a.tmux.CreateSession(name, ""); err != nil { + a.logger.Printf("[switcher] recreate autonomous %q: %v", name, err) + } + } + for _, ds := range a.config.Pool.Dedicated { + if err := a.tmux.CreateSession(ds.Name, ds.Project); err != nil { + a.logger.Printf("[switcher] recreate dedicated %q: %v", ds.Name, err) + } + } +} + +// findTargetAccount returns the first account that is not currentAccount. +func (a *AccountSwitcher) findTargetAccount(currentAccount string) *config.AccountConfig { + for i := range a.config.Accounts { + if a.config.Accounts[i].Name != currentAccount { + return &a.config.Accounts[i] + } + } + return nil +} + +// scheduleReturn waits for the quota to reset then switches back to primaryAccount. +func (a *AccountSwitcher) scheduleReturn(primaryAccount, resetTime string) { + dur := timeUntilReset(resetTime) + 5*time.Minute + a.logger.Printf("[switcher] return to %q scheduled in %v", primaryAccount, dur.Round(time.Minute)) + time.Sleep(dur) + a.executeSwitch(quota.SwitchRequest{ + From: a.state.ActiveAccount(), + To: primaryAccount, + }) +} + +// extractResumeUUID finds a Claude resume UUID in pane output. +func extractResumeUUID(content string) string { + m := resumeRe.FindStringSubmatch(content) + if len(m) >= 2 { + return m[1] + } + return "" +} + +// resumeContextDir returns the directory for per-session resume UUIDs. +// Honours a.homeDir override so tests never write to the real ~/.claude-context. +func (a *AccountSwitcher) resumeContextDir() string { + home, _ := a.resolveHomeDir() + return filepath.Join(home, ".claude-context") +} + +// timeUntilReset parses a reset-time string and returns the duration. +// Returns a 2-hour fallback when parsing fails. +func timeUntilReset(resetTime string) time.Duration { + lower := strings.ToLower(strings.TrimSpace(resetTime)) + if m := reMinutes.FindStringSubmatch(lower); len(m) >= 2 { + n, _ := strconv.Atoi(m[1]) + return time.Duration(n) * time.Minute + } + if m := reHours.FindStringSubmatch(lower); len(m) >= 2 { + n, _ := strconv.Atoi(m[1]) + return time.Duration(n) * time.Hour + } + return 2 * time.Hour +} + +func sessionName(prefix string, i int) string { + return prefix + itoa(i) +} + +func itoa(n int) string { + if n == 0 { + return "0" + } + b := make([]byte, 0, 10) + for n > 0 { + b = append([]byte{byte('0' + n%10)}, b...) + n /= 10 + } + return string(b) +} diff --git a/internal/switcher/account_switcher_test.go b/internal/switcher/account_switcher_test.go new file mode 100644 index 0000000..5cc6dc8 --- /dev/null +++ b/internal/switcher/account_switcher_test.go @@ -0,0 +1,166 @@ +package switcher + +import ( + "testing" + "time" + + "forge.secuaas.ovh/olivier/claude-failover/internal/config" + "forge.secuaas.ovh/olivier/claude-failover/internal/quota" + "forge.secuaas.ovh/olivier/claude-failover/internal/state" +) + +// mockTmux for switcher tests. +type mockTmux struct { + sessions map[string]bool + paneOutput map[string]string + killCalls []string + createCalls []string +} + +func newMockTmux() *mockTmux { + return &mockTmux{ + sessions: make(map[string]bool), + paneOutput: make(map[string]string), + } +} + +func (m *mockTmux) HasSession(name string) bool { return m.sessions[name] } +func (m *mockTmux) CreateSession(name, _ string) error { + m.sessions[name] = true + m.createCalls = append(m.createCalls, name) + return nil +} +func (m *mockTmux) KillSession(name string) error { + delete(m.sessions, name) + m.killCalls = append(m.killCalls, name) + return nil +} +func (m *mockTmux) SendKeys(_, _ string) error { return nil } +func (m *mockTmux) CapturePaneTail(session string, _ int) (string, error) { + return m.paneOutput[session], nil +} + +// TestFindTargetAccount returns the first account that differs from current. +func TestFindTargetAccount(t *testing.T) { + tc := newMockTmux() + s := state.New("") + cfg := &config.Config{ + Accounts: []config.AccountConfig{ + {Name: "compte1", Priority: 1}, + {Name: "compte2", Priority: 2}, + }, + } + a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil) + + target := a.findTargetAccount("compte1") + if target == nil || target.Name != "compte2" { + t.Errorf("expected compte2, got %v", target) + } +} + +// TestFindTargetAccountSingleAccount returns nil when only one account exists. +func TestFindTargetAccountSingleAccount(t *testing.T) { + tc := newMockTmux() + s := state.New("") + cfg := &config.Config{ + Accounts: []config.AccountConfig{{Name: "solo"}}, + } + a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil) + + if got := a.findTargetAccount("solo"); got != nil { + t.Errorf("expected nil for single account, got %v", got) + } +} + +// TestExtractResumeUUID parses UUID from pane output. +func TestExtractResumeUUID(t *testing.T) { + input := "$ claude --resume a1b2c3d4-e5f6-7890-abcd-ef1234567890 --model sonnet" + got := extractResumeUUID(input) + want := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + if got != want { + t.Errorf("expected %q, got %q", want, got) + } +} + +// TestExtractResumeUUIDMissing returns empty string when no UUID present. +func TestExtractResumeUUIDMissing(t *testing.T) { + if got := extractResumeUUID("no uuid here"); got != "" { + t.Errorf("expected empty, got %q", got) + } +} + +// TestTimeUntilReset parses minute and hour formats correctly. +func TestTimeUntilReset(t *testing.T) { + cases := []struct { + input string + want time.Duration + }{ + {"in 45 minutes", 45 * time.Minute}, + {"in 2 hours", 2 * time.Hour}, + {"in 1 hour", 1 * time.Hour}, + {"", 2 * time.Hour}, + {"8pm", 2 * time.Hour}, // fallback for unrecognised formats + } + for _, c := range cases { + if got := timeUntilReset(c.input); got != c.want { + t.Errorf("timeUntilReset(%q) = %v, want %v", c.input, got, c.want) + } + } +} + +// TestKillAndRecreatePoolSessions verifies that executeSwitch restarts sessions. +func TestKillAndRecreatePoolSessions(t *testing.T) { + tc := newMockTmux() + tc.sessions["ccl-auto-0"] = true + tc.sessions["ccl-auto-1"] = true + tc.sessions["dedicated-1"] = true + + s := state.New("") + s.SetActiveAccount("compte1") + + cfg := &config.Config{ + Accounts: []config.AccountConfig{ + {Name: "compte1", Home: t.TempDir()}, + {Name: "compte2", Home: t.TempDir()}, + }, + Pool: config.PoolConfig{ + Dedicated: []config.DedicatedSession{{Name: "dedicated-1", Project: "/tmp"}}, + Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Min: 2, Max: 2}, + }, + } + + a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil) + // CRITICAL: isolate symlink manipulation in a tmpdir so the test never + // touches the real ~/.claude (regression: a reboot used to leave Claude + // Code unusable because the test had repointed ~/.claude to /tmp/...). + a.homeDir = t.TempDir() + a.executeSwitch(quota.SwitchRequest{From: "compte1"}) + + // Active account must have changed. + if got := s.ActiveAccount(); got != "compte2" { + t.Errorf("expected active account compte2, got %q", got) + } + + // All old sessions must have been killed. + for _, name := range []string{"ccl-auto-0", "ccl-auto-1", "dedicated-1"} { + found := false + for _, k := range tc.killCalls { + if k == name { + found = true + break + } + } + if !found { + t.Errorf("expected %q to be killed", name) + } + } + + // Min pool sessions must be recreated. + recreated := map[string]bool{} + for _, c := range tc.createCalls { + recreated[c] = true + } + if !recreated["ccl-auto-0"] || !recreated["ccl-auto-1"] { + t.Errorf("expected autonomous sessions recreated; createCalls=%v", tc.createCalls) + } +}