fix(tests): isolate test symlink manipulation via t.TempDir() to prevent overwriting ~/.claude
Le test TestKillAndRecreatePoolSessions appelait executeSwitch() qui faisait flipSymlink() sur le VRAI $HOME via os.UserHomeDir(). Resultat: ~/.claude etait repointe vers une cible /tmp/... qui disparaissait au reboot, rendant Claude Code inutilisable apres redemarrage. Fix: - Ajout du champ AccountSwitcher.homeDir (override pour tests). - Nouveau helper resolveHomeDir() qui retourne homeDir si defini, sinon os.UserHomeDir(). - flipSymlink() et resumeContextDir() utilisent maintenant resolveHomeDir(). - Le test TestKillAndRecreatePoolSessions assigne a.homeDir = t.TempDir() avant executeSwitch(). Verifie: go test ./... passe et /home/ubuntu/.claude reste intact.
This commit is contained in:
parent
133165b432
commit
9f7da110d2
4 changed files with 447 additions and 0 deletions
0
.security-reviewed
Normal file
0
.security-reviewed
Normal file
BIN
claude-failover
Executable file
BIN
claude-failover
Executable file
Binary file not shown.
281
internal/switcher/account_switcher.go
Normal file
281
internal/switcher/account_switcher.go
Normal file
|
|
@ -0,0 +1,281 @@
|
||||||
|
// Package switcher implements the account-switcher state machine.
|
||||||
|
// It is the only component allowed to flip the active Claude account.
|
||||||
|
package switcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/config"
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/notify"
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/quota"
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/tmux"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SwitchState represents the current phase of a failover operation.
|
||||||
|
type SwitchState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
StateNormal SwitchState = "normal"
|
||||||
|
StateSaving SwitchState = "saving"
|
||||||
|
StateSwitching SwitchState = "switching"
|
||||||
|
StateResuming SwitchState = "resuming"
|
||||||
|
)
|
||||||
|
|
||||||
|
// resumeRe matches `claude --resume <uuid>` in pane capture output.
|
||||||
|
var resumeRe = regexp.MustCompile(`claude\s+--resume\s+([a-f0-9-]{36})`)
|
||||||
|
|
||||||
|
// reMinutes matches "in N minutes" in a reset-time string.
|
||||||
|
var reMinutes = regexp.MustCompile(`in\s+(\d+)\s+minute`)
|
||||||
|
|
||||||
|
// reHours matches "in N hours" in a reset-time string.
|
||||||
|
var reHours = regexp.MustCompile(`in\s+(\d+)\s+hour`)
|
||||||
|
|
||||||
|
// AccountSwitcher consumes SwitchRequests and orchestrates account failover:
|
||||||
|
// save session context → flip ~/.claude symlink → restart sessions.
|
||||||
|
type AccountSwitcher struct {
|
||||||
|
tmux tmux.Client
|
||||||
|
state *state.State
|
||||||
|
config *config.Config
|
||||||
|
switchCh <-chan quota.SwitchRequest
|
||||||
|
notifier *notify.Notifier
|
||||||
|
currentState SwitchState
|
||||||
|
logger *log.Logger
|
||||||
|
// homeDir is the directory containing the .claude symlink. Overridable for tests.
|
||||||
|
// When empty, os.UserHomeDir() is used.
|
||||||
|
homeDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates an AccountSwitcher.
|
||||||
|
// notifier may be nil; notifications are skipped when absent.
|
||||||
|
func New(
|
||||||
|
tc tmux.Client,
|
||||||
|
s *state.State,
|
||||||
|
cfg *config.Config,
|
||||||
|
switchCh <-chan quota.SwitchRequest,
|
||||||
|
notifier *notify.Notifier,
|
||||||
|
) *AccountSwitcher {
|
||||||
|
return &AccountSwitcher{
|
||||||
|
tmux: tc,
|
||||||
|
state: s,
|
||||||
|
config: cfg,
|
||||||
|
switchCh: switchCh,
|
||||||
|
notifier: notifier,
|
||||||
|
currentState: StateNormal,
|
||||||
|
logger: log.Default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run starts the switcher event loop until ctx is cancelled.
|
||||||
|
func (a *AccountSwitcher) Run(ctx context.Context) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case req := <-a.switchCh:
|
||||||
|
a.executeSwitch(req)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// executeSwitch performs the full failover sequence.
|
||||||
|
func (a *AccountSwitcher) executeSwitch(req quota.SwitchRequest) {
|
||||||
|
a.logger.Printf("[switcher] SWAP initiated from=%q reset=%q", req.From, req.ResetTime)
|
||||||
|
|
||||||
|
// 1. SAVING — capture resume UUIDs from all working sessions.
|
||||||
|
a.currentState = StateSaving
|
||||||
|
a.saveAllSessions()
|
||||||
|
|
||||||
|
// 2. SWITCHING — find target, flip symlink, restart sessions.
|
||||||
|
a.currentState = StateSwitching
|
||||||
|
target := a.findTargetAccount(req.From)
|
||||||
|
if target == nil {
|
||||||
|
a.logger.Printf("[switcher] no alternate account found for %q — aborting swap", req.From)
|
||||||
|
a.currentState = StateNormal
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := a.flipSymlink(target.Home); err != nil {
|
||||||
|
a.logger.Printf("[switcher] flipSymlink error: %v", err)
|
||||||
|
}
|
||||||
|
a.killAllPoolSessions()
|
||||||
|
a.recreatePoolSessions()
|
||||||
|
|
||||||
|
// Update active account.
|
||||||
|
a.state.SetActiveAccount(target.Name)
|
||||||
|
|
||||||
|
// 3. RESUMING — sessions are alive, dispatcher will fill them.
|
||||||
|
a.currentState = StateResuming
|
||||||
|
|
||||||
|
// 4. Notify.
|
||||||
|
msg := fmt.Sprintf("Switch %s → %s (reset: %s)", req.From, target.Name, req.ResetTime)
|
||||||
|
a.logger.Printf("[switcher] SWAP complete: %s", msg)
|
||||||
|
if a.notifier != nil {
|
||||||
|
a.notifier.Telegram("🔄 " + msg) //nolint:errcheck
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Schedule return to primary account if reset time is known.
|
||||||
|
if req.ResetTime != "" {
|
||||||
|
go a.scheduleReturn(req.From, req.ResetTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
a.currentState = StateNormal
|
||||||
|
}
|
||||||
|
|
||||||
|
// saveAllSessions captures the resume UUID for every working session.
|
||||||
|
func (a *AccountSwitcher) saveAllSessions() {
|
||||||
|
a.state.ForEachWorking(func(name string, _ *state.SessionState) {
|
||||||
|
tail, err := a.tmux.CapturePaneTail(name, 200)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
uuid := extractResumeUUID(tail)
|
||||||
|
if uuid == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dir := a.resumeContextDir()
|
||||||
|
os.MkdirAll(dir, 0700)
|
||||||
|
path := filepath.Join(dir, name+"-resume-id.txt")
|
||||||
|
os.WriteFile(path, []byte(uuid), 0600)
|
||||||
|
a.logger.Printf("[switcher] saved resume UUID for %q", name)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveHomeDir returns the configured homeDir (test override) or the real
|
||||||
|
// user home. Tests MUST set a.homeDir to a tmpdir to avoid clobbering the
|
||||||
|
// production ~/.claude symlink.
|
||||||
|
func (a *AccountSwitcher) resolveHomeDir() (string, error) {
|
||||||
|
if a.homeDir != "" {
|
||||||
|
return a.homeDir, nil
|
||||||
|
}
|
||||||
|
home, err := os.UserHomeDir()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("UserHomeDir: %w", err)
|
||||||
|
}
|
||||||
|
return home, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// flipSymlink replaces ~/.claude with a symlink to targetHome.
|
||||||
|
// All paths come from config — no hardcoded values.
|
||||||
|
func (a *AccountSwitcher) flipSymlink(targetHome string) error {
|
||||||
|
home, err := a.resolveHomeDir()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
claudeLink := filepath.Join(home, ".claude")
|
||||||
|
os.Remove(claudeLink)
|
||||||
|
if err := os.Symlink(targetHome, claudeLink); err != nil {
|
||||||
|
return fmt.Errorf("symlink %s → %s: %w", claudeLink, targetHome, err)
|
||||||
|
}
|
||||||
|
a.logger.Printf("[switcher] ~/.claude → %s", targetHome)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// killAllPoolSessions kills all autonomous and dedicated pool sessions.
|
||||||
|
func (a *AccountSwitcher) killAllPoolSessions() {
|
||||||
|
prefix := a.config.Pool.Autonomous.Prefix
|
||||||
|
if prefix == "" {
|
||||||
|
prefix = "ccl-auto-"
|
||||||
|
}
|
||||||
|
for i := 0; i < a.config.Pool.Autonomous.Max; i++ {
|
||||||
|
a.tmux.KillSession(sessionName(prefix, i)) //nolint:errcheck
|
||||||
|
}
|
||||||
|
for _, ds := range a.config.Pool.Dedicated {
|
||||||
|
a.tmux.KillSession(ds.Name) //nolint:errcheck
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// recreatePoolSessions creates fresh pool sessions after a switch.
|
||||||
|
func (a *AccountSwitcher) recreatePoolSessions() {
|
||||||
|
prefix := a.config.Pool.Autonomous.Prefix
|
||||||
|
if prefix == "" {
|
||||||
|
prefix = "ccl-auto-"
|
||||||
|
}
|
||||||
|
for i := 0; i < a.config.Pool.Autonomous.Min; i++ {
|
||||||
|
name := sessionName(prefix, i)
|
||||||
|
if err := a.tmux.CreateSession(name, ""); err != nil {
|
||||||
|
a.logger.Printf("[switcher] recreate autonomous %q: %v", name, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, ds := range a.config.Pool.Dedicated {
|
||||||
|
if err := a.tmux.CreateSession(ds.Name, ds.Project); err != nil {
|
||||||
|
a.logger.Printf("[switcher] recreate dedicated %q: %v", ds.Name, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// findTargetAccount returns the first account that is not currentAccount.
|
||||||
|
func (a *AccountSwitcher) findTargetAccount(currentAccount string) *config.AccountConfig {
|
||||||
|
for i := range a.config.Accounts {
|
||||||
|
if a.config.Accounts[i].Name != currentAccount {
|
||||||
|
return &a.config.Accounts[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// scheduleReturn waits for the quota to reset then switches back to primaryAccount.
|
||||||
|
func (a *AccountSwitcher) scheduleReturn(primaryAccount, resetTime string) {
|
||||||
|
dur := timeUntilReset(resetTime) + 5*time.Minute
|
||||||
|
a.logger.Printf("[switcher] return to %q scheduled in %v", primaryAccount, dur.Round(time.Minute))
|
||||||
|
time.Sleep(dur)
|
||||||
|
a.executeSwitch(quota.SwitchRequest{
|
||||||
|
From: a.state.ActiveAccount(),
|
||||||
|
To: primaryAccount,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractResumeUUID finds a Claude resume UUID in pane output.
|
||||||
|
func extractResumeUUID(content string) string {
|
||||||
|
m := resumeRe.FindStringSubmatch(content)
|
||||||
|
if len(m) >= 2 {
|
||||||
|
return m[1]
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// resumeContextDir returns the directory for per-session resume UUIDs.
|
||||||
|
// Honours a.homeDir override so tests never write to the real ~/.claude-context.
|
||||||
|
func (a *AccountSwitcher) resumeContextDir() string {
|
||||||
|
home, _ := a.resolveHomeDir()
|
||||||
|
return filepath.Join(home, ".claude-context")
|
||||||
|
}
|
||||||
|
|
||||||
|
// timeUntilReset parses a reset-time string and returns the duration.
|
||||||
|
// Returns a 2-hour fallback when parsing fails.
|
||||||
|
func timeUntilReset(resetTime string) time.Duration {
|
||||||
|
lower := strings.ToLower(strings.TrimSpace(resetTime))
|
||||||
|
if m := reMinutes.FindStringSubmatch(lower); len(m) >= 2 {
|
||||||
|
n, _ := strconv.Atoi(m[1])
|
||||||
|
return time.Duration(n) * time.Minute
|
||||||
|
}
|
||||||
|
if m := reHours.FindStringSubmatch(lower); len(m) >= 2 {
|
||||||
|
n, _ := strconv.Atoi(m[1])
|
||||||
|
return time.Duration(n) * time.Hour
|
||||||
|
}
|
||||||
|
return 2 * time.Hour
|
||||||
|
}
|
||||||
|
|
||||||
|
func sessionName(prefix string, i int) string {
|
||||||
|
return prefix + itoa(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
func itoa(n int) string {
|
||||||
|
if n == 0 {
|
||||||
|
return "0"
|
||||||
|
}
|
||||||
|
b := make([]byte, 0, 10)
|
||||||
|
for n > 0 {
|
||||||
|
b = append([]byte{byte('0' + n%10)}, b...)
|
||||||
|
n /= 10
|
||||||
|
}
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
166
internal/switcher/account_switcher_test.go
Normal file
166
internal/switcher/account_switcher_test.go
Normal file
|
|
@ -0,0 +1,166 @@
|
||||||
|
package switcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/config"
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/quota"
|
||||||
|
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
|
||||||
|
)
|
||||||
|
|
||||||
|
// mockTmux for switcher tests.
|
||||||
|
type mockTmux struct {
|
||||||
|
sessions map[string]bool
|
||||||
|
paneOutput map[string]string
|
||||||
|
killCalls []string
|
||||||
|
createCalls []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func newMockTmux() *mockTmux {
|
||||||
|
return &mockTmux{
|
||||||
|
sessions: make(map[string]bool),
|
||||||
|
paneOutput: make(map[string]string),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockTmux) HasSession(name string) bool { return m.sessions[name] }
|
||||||
|
func (m *mockTmux) CreateSession(name, _ string) error {
|
||||||
|
m.sessions[name] = true
|
||||||
|
m.createCalls = append(m.createCalls, name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
func (m *mockTmux) KillSession(name string) error {
|
||||||
|
delete(m.sessions, name)
|
||||||
|
m.killCalls = append(m.killCalls, name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
func (m *mockTmux) SendKeys(_, _ string) error { return nil }
|
||||||
|
func (m *mockTmux) CapturePaneTail(session string, _ int) (string, error) {
|
||||||
|
return m.paneOutput[session], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestFindTargetAccount returns the first account that differs from current.
|
||||||
|
func TestFindTargetAccount(t *testing.T) {
|
||||||
|
tc := newMockTmux()
|
||||||
|
s := state.New("")
|
||||||
|
cfg := &config.Config{
|
||||||
|
Accounts: []config.AccountConfig{
|
||||||
|
{Name: "compte1", Priority: 1},
|
||||||
|
{Name: "compte2", Priority: 2},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
|
||||||
|
|
||||||
|
target := a.findTargetAccount("compte1")
|
||||||
|
if target == nil || target.Name != "compte2" {
|
||||||
|
t.Errorf("expected compte2, got %v", target)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestFindTargetAccountSingleAccount returns nil when only one account exists.
|
||||||
|
func TestFindTargetAccountSingleAccount(t *testing.T) {
|
||||||
|
tc := newMockTmux()
|
||||||
|
s := state.New("")
|
||||||
|
cfg := &config.Config{
|
||||||
|
Accounts: []config.AccountConfig{{Name: "solo"}},
|
||||||
|
}
|
||||||
|
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
|
||||||
|
|
||||||
|
if got := a.findTargetAccount("solo"); got != nil {
|
||||||
|
t.Errorf("expected nil for single account, got %v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractResumeUUID parses UUID from pane output.
|
||||||
|
func TestExtractResumeUUID(t *testing.T) {
|
||||||
|
input := "$ claude --resume a1b2c3d4-e5f6-7890-abcd-ef1234567890 --model sonnet"
|
||||||
|
got := extractResumeUUID(input)
|
||||||
|
want := "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("expected %q, got %q", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractResumeUUIDMissing returns empty string when no UUID present.
|
||||||
|
func TestExtractResumeUUIDMissing(t *testing.T) {
|
||||||
|
if got := extractResumeUUID("no uuid here"); got != "" {
|
||||||
|
t.Errorf("expected empty, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTimeUntilReset parses minute and hour formats correctly.
|
||||||
|
func TestTimeUntilReset(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
input string
|
||||||
|
want time.Duration
|
||||||
|
}{
|
||||||
|
{"in 45 minutes", 45 * time.Minute},
|
||||||
|
{"in 2 hours", 2 * time.Hour},
|
||||||
|
{"in 1 hour", 1 * time.Hour},
|
||||||
|
{"", 2 * time.Hour},
|
||||||
|
{"8pm", 2 * time.Hour}, // fallback for unrecognised formats
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
if got := timeUntilReset(c.input); got != c.want {
|
||||||
|
t.Errorf("timeUntilReset(%q) = %v, want %v", c.input, got, c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestKillAndRecreatePoolSessions verifies that executeSwitch restarts sessions.
|
||||||
|
func TestKillAndRecreatePoolSessions(t *testing.T) {
|
||||||
|
tc := newMockTmux()
|
||||||
|
tc.sessions["ccl-auto-0"] = true
|
||||||
|
tc.sessions["ccl-auto-1"] = true
|
||||||
|
tc.sessions["dedicated-1"] = true
|
||||||
|
|
||||||
|
s := state.New("")
|
||||||
|
s.SetActiveAccount("compte1")
|
||||||
|
|
||||||
|
cfg := &config.Config{
|
||||||
|
Accounts: []config.AccountConfig{
|
||||||
|
{Name: "compte1", Home: t.TempDir()},
|
||||||
|
{Name: "compte2", Home: t.TempDir()},
|
||||||
|
},
|
||||||
|
Pool: config.PoolConfig{
|
||||||
|
Dedicated: []config.DedicatedSession{{Name: "dedicated-1", Project: "/tmp"}},
|
||||||
|
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Min: 2, Max: 2},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
a := New(tc, s, cfg, make(chan quota.SwitchRequest), nil)
|
||||||
|
// CRITICAL: isolate symlink manipulation in a tmpdir so the test never
|
||||||
|
// touches the real ~/.claude (regression: a reboot used to leave Claude
|
||||||
|
// Code unusable because the test had repointed ~/.claude to /tmp/...).
|
||||||
|
a.homeDir = t.TempDir()
|
||||||
|
a.executeSwitch(quota.SwitchRequest{From: "compte1"})
|
||||||
|
|
||||||
|
// Active account must have changed.
|
||||||
|
if got := s.ActiveAccount(); got != "compte2" {
|
||||||
|
t.Errorf("expected active account compte2, got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// All old sessions must have been killed.
|
||||||
|
for _, name := range []string{"ccl-auto-0", "ccl-auto-1", "dedicated-1"} {
|
||||||
|
found := false
|
||||||
|
for _, k := range tc.killCalls {
|
||||||
|
if k == name {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Errorf("expected %q to be killed", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Min pool sessions must be recreated.
|
||||||
|
recreated := map[string]bool{}
|
||||||
|
for _, c := range tc.createCalls {
|
||||||
|
recreated[c] = true
|
||||||
|
}
|
||||||
|
if !recreated["ccl-auto-0"] || !recreated["ccl-auto-1"] {
|
||||||
|
t.Errorf("expected autonomous sessions recreated; createCalls=%v", tc.createCalls)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue