claude-failover/internal/watcher/session_watcher.go
Ubuntu 4cbdcf143a fix(dispatcher+watcher): never auto-dispatch into dedicated sessions
Observed: tasks from filesecure/.agent-queue/inbox and SecuScan/
.agent-queue/inbox were being routed into ccl-1-conformvault and
ccl-2-scanyze whenever those sessions happened to be idle. Those are
the operator's manual interactive Claude sessions, not dispatch
targets — the auto-dispatch was (a) hijacking a Claude instance the
operator was using and (b) triggering /exit via the watcher's
completion path when the side-task finished, kicking the operator out
mid-conversation.

findFreeSession was iterating Pool.Dedicated before the autonomous
pool, so any idle dedicated session was the first candidate.

- Dispatcher.findFreeSession: remove the Dedicated loop entirely.
  Auto-dispatch is now pool-only (ccl-auto-11..20).
- Watcher.completeSession: defense-in-depth — even if a dedicated
  session ever ends up in "working" state, it is no longer /exit'd;
  just marked idle. Pool /exit behaviour unchanged (context recycle).
- Tests: new TestFindFreeSessionSkipsDedicated proves the routing;
  3 existing tests rewritten to use the autonomous pool instead of
  relying on Dedicated as a fake pool.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 13:30:26 +00:00

160 lines
4.6 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package watcher detects when a Claude Code session has finished its current
// task and signals the dispatcher to assign a new one.
package watcher
import (
"context"
"log"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"forge.secuaas.ovh/olivier/claude-failover/internal/config"
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
"forge.secuaas.ovh/olivier/claude-failover/internal/tmux"
)
// spinnerRe matches Claude Code's "Xs ·" or "Xs ⠋" progress indicator.
var spinnerRe = regexp.MustCompile(`\d+s\s+[·⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]`)
// SessionWatcher monitors active tmux sessions and emits on DoneChan when
// a Claude Code session returns to the idle prompt () or exceeds its timeout.
type SessionWatcher struct {
tmux tmux.Client
state *state.State
config *config.Config
done chan string
interval time.Duration
idleTimeout time.Duration
signalDir string
logger *log.Logger
}
// New creates a SessionWatcher with defaults from cfg.
func New(tc tmux.Client, s *state.State, cfg *config.Config) *SessionWatcher {
interval := cfg.Watcher.Interval.Duration
if interval == 0 {
interval = 30 * time.Second
}
idleTimeout := cfg.Watcher.IdleTimeout.Duration
if idleTimeout == 0 {
idleTimeout = 60 * time.Minute
}
signalDir := cfg.Watcher.DoneSignalDir
if signalDir == "" {
signalDir = "/tmp"
}
return &SessionWatcher{
tmux: tc,
state: s,
config: cfg,
done: make(chan string, 32),
interval: interval,
idleTimeout: idleTimeout,
signalDir: signalDir,
logger: log.Default(),
}
}
// DoneChan returns the channel on which completed session names are sent.
func (w *SessionWatcher) DoneChan() <-chan string {
return w.done
}
// Run starts the watcher loop until ctx is cancelled.
func (w *SessionWatcher) Run(ctx context.Context) {
ticker := time.NewTicker(w.interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
w.poll()
}
}
}
// poll inspects all currently-working sessions once.
func (w *SessionWatcher) poll() {
w.state.ForEachWorking(func(name string, sess *state.SessionState) {
w.checkSession(name, sess)
})
}
// checkSession evaluates a single working session for completion or timeout.
func (w *SessionWatcher) checkSession(name string, sess *state.SessionState) {
// 1. Check the done-signal file written by hooks or external scripts.
sigFile := filepath.Join(w.signalDir, "agent-done-"+name)
if _, err := os.Stat(sigFile); err == nil {
w.completeSession(name, sigFile)
return
}
// 2. Capture the last 5 pane lines.
tail, err := w.tmux.CapturePaneTail(name, 5)
if err != nil {
// Session may have vanished; lifecycle.Manager handles recreation.
return
}
// 3. Idle prompt without an active spinner → Claude has finished.
if hasClaudePrompt(tail) && !hasSpinner(tail) {
w.completeSession(name, sigFile)
return
}
// 4. Idle-timeout guard.
if sess.AssignedAt != nil && time.Since(*sess.AssignedAt) > w.idleTimeout {
w.logger.Printf("[watcher] TIMEOUT session=%q elapsed=%v idleTimeout=%v",
name, time.Since(*sess.AssignedAt).Round(time.Second), w.idleTimeout)
w.completeSession(name, sigFile)
}
}
// completeSession marks the session idle and notifies the dispatcher. For
// pool sessions, /exit is sent to recycle the Claude process so the next
// dispatch starts with a clean context. For dedicated sessions, /exit is
// skipped — those host the operator's interactive work and must not be
// terminated when a side-dispatched task happens to finish.
func (w *SessionWatcher) completeSession(name, sigFile string) {
if w.isDedicated(name) {
w.logger.Printf("[watcher] DONE session=%q (dedicated — leaving Claude alive)", name)
} else {
w.logger.Printf("[watcher] DONE session=%q → /exit", name)
_ = w.tmux.SendKeys(name, "/exit")
time.Sleep(500 * time.Millisecond)
}
w.state.SetIdle(name)
os.Remove(sigFile)
select {
case w.done <- name:
default:
w.logger.Printf("[watcher] done channel full, dropping signal for %q", name)
}
}
// isDedicated reports whether name matches a configured dedicated session.
func (w *SessionWatcher) isDedicated(name string) bool {
if w.config == nil {
return false
}
for _, ds := range w.config.Pool.Dedicated {
if ds.Name == name {
return true
}
}
return false
}
// hasClaudePrompt returns true if the Claude Code interactive prompt is visible.
func hasClaudePrompt(output string) bool {
return strings.Contains(output, "")
}
// hasSpinner returns true if Claude Code's progress spinner is active.
func hasSpinner(output string) bool {
return spinnerRe.MatchString(output)
}