feat(switcher): auto-resume dedicated sessions after a swap

When a legitimate quota hit triggered a swap, killAllPoolSessions tore
down the dedicated interactive sessions (ccl-1-conformvault, ccl-2-scanyze)
along with the pool, then recreatePoolSessions re-opened them at a bare
bash prompt. The operator had to manually re-run
  CLAUDE_CONFIG_DIR=<target> claude --dangerously-skip-permissions --resume <uuid>
after every swap, losing whatever conversation was mid-flight.

saveAllSessions only iterates sessions tracked as "working" in state;
user-driven dedicated sessions are rarely in that state so their resume
UUIDs were never saved.

- saveDedicatedUUIDs: capture resume UUID for every configured dedicated
  session regardless of tracked state, before kill.
- relaunchDedicatedSessions(targetHome): after recreate, send a resume
  command on each dedicated session pointing CLAUDE_CONFIG_DIR at the
  target account's home. Missing UUID → leave at shell, no blind launch.
- isValidResumeUUID hardens against a corrupted resume-id.txt.

New TestDedicatedRelaunchAfterSwap verifies end-to-end: pane capture →
UUID persisted → resume command sent with the correct CLAUDE_CONFIG_DIR.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-04-15 20:24:38 +00:00
parent 5cad53ac7a
commit 8fdb1937fc
4 changed files with 169 additions and 14 deletions

View file

@ -90,9 +90,13 @@ func (a *AccountSwitcher) Run(ctx context.Context) {
func (a *AccountSwitcher) executeSwitch(req quota.SwitchRequest) {
a.logger.Printf("[switcher] SWAP initiated from=%q reset=%q", req.From, req.ResetTime)
// 1. SAVING — capture resume UUIDs from all working sessions.
// 1. SAVING — capture resume UUIDs from all working sessions plus
// every dedicated session unconditionally (dedicated sessions are
// user-driven and may not be tracked as "working" in state, but their
// UUIDs are the most valuable to preserve across a swap).
a.currentState = StateSaving
a.saveAllSessions()
a.saveDedicatedUUIDs()
// 2. SWITCHING — find target, flip symlink, restart sessions.
a.currentState = StateSwitching
@ -108,6 +112,7 @@ func (a *AccountSwitcher) executeSwitch(req quota.SwitchRequest) {
}
a.killAllPoolSessions()
a.recreatePoolSessions()
a.relaunchDedicatedSessions(target.Home)
// Update active account and record the swap timestamp so the quota
// monitor can enforce a cooldown before requesting another one.
@ -132,6 +137,75 @@ func (a *AccountSwitcher) executeSwitch(req quota.SwitchRequest) {
a.currentState = StateNormal
}
// saveDedicatedUUIDs captures the resume UUID for every configured dedicated
// session, regardless of its tracked state. Dedicated sessions are typically
// user-driven and not in state="working", but their UUIDs are the most
// valuable to preserve across a swap so the user's work is not lost.
func (a *AccountSwitcher) saveDedicatedUUIDs() {
for _, ds := range a.config.Pool.Dedicated {
if !a.tmux.HasSession(ds.Name) {
continue
}
tail, err := a.tmux.CapturePaneTail(ds.Name, 200)
if err != nil {
continue
}
uuid := extractResumeUUID(tail)
if uuid == "" {
continue
}
dir := a.resumeContextDir()
if err := os.MkdirAll(dir, 0700); err != nil {
a.logger.Printf("[switcher] mkdir %s: %v", dir, err)
continue
}
path := filepath.Join(dir, ds.Name+"-resume-id.txt")
if err := os.WriteFile(path, []byte(uuid), 0600); err != nil {
a.logger.Printf("[switcher] write %s: %v", path, err)
continue
}
a.logger.Printf("[switcher] saved dedicated resume UUID for %q: %s", ds.Name, uuid)
}
}
// relaunchDedicatedSessions sends a `claude --resume <uuid>` command to each
// dedicated session after recreation, using the target account's home via
// CLAUDE_CONFIG_DIR so the session follows the active account. If no UUID was
// captured for a session, it is left at the bash prompt for manual restart.
func (a *AccountSwitcher) relaunchDedicatedSessions(targetHome string) {
for _, ds := range a.config.Pool.Dedicated {
path := filepath.Join(a.resumeContextDir(), ds.Name+"-resume-id.txt")
data, err := os.ReadFile(path)
if err != nil {
a.logger.Printf("[switcher] no saved resume UUID for %q (%v) — leaving at shell", ds.Name, err)
continue
}
uuid := strings.TrimSpace(string(data))
if !isValidResumeUUID(uuid) {
a.logger.Printf("[switcher] invalid UUID for %q: %q", ds.Name, uuid)
continue
}
// targetHome is operator-controlled (config file); uuid is regex-validated.
// Neither is user-supplied runtime input, so shell interpolation is safe.
cmd := fmt.Sprintf("CLAUDE_CONFIG_DIR=%s claude --dangerously-skip-permissions --resume %s",
targetHome, uuid)
if err := a.tmux.SendKeys(ds.Name, cmd); err != nil {
a.logger.Printf("[switcher] relaunch %q: %v", ds.Name, err)
continue
}
a.logger.Printf("[switcher] relaunched %q on %s (resume=%s)", ds.Name, targetHome, uuid)
}
}
// isValidResumeUUID defends against corrupted resume-id files by requiring
// the canonical 36-char lowercase hex+dash UUID format.
func isValidResumeUUID(s string) bool {
if len(s) != 36 {
return false
}
return resumeRe.MatchString("claude --resume " + s)
}
// saveAllSessions captures the resume UUID for every working session.
func (a *AccountSwitcher) saveAllSessions() {
a.state.ForEachWorking(func(name string, _ *state.SessionState) {