Observed: tasks from filesecure/.agent-queue/inbox and SecuScan/ .agent-queue/inbox were being routed into ccl-1-conformvault and ccl-2-scanyze whenever those sessions happened to be idle. Those are the operator's manual interactive Claude sessions, not dispatch targets — the auto-dispatch was (a) hijacking a Claude instance the operator was using and (b) triggering /exit via the watcher's completion path when the side-task finished, kicking the operator out mid-conversation. findFreeSession was iterating Pool.Dedicated before the autonomous pool, so any idle dedicated session was the first candidate. - Dispatcher.findFreeSession: remove the Dedicated loop entirely. Auto-dispatch is now pool-only (ccl-auto-11..20). - Watcher.completeSession: defense-in-depth — even if a dedicated session ever ends up in "working" state, it is no longer /exit'd; just marked idle. Pool /exit behaviour unchanged (context recycle). - Tests: new TestFindFreeSessionSkipsDedicated proves the routing; 3 existing tests rewritten to use the autonomous pool instead of relying on Dedicated as a fake pool. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
160 lines
4.6 KiB
Go
160 lines
4.6 KiB
Go
// Package watcher detects when a Claude Code session has finished its current
|
||
// task and signals the dispatcher to assign a new one.
|
||
package watcher
|
||
|
||
import (
|
||
"context"
|
||
"log"
|
||
"os"
|
||
"path/filepath"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
|
||
"forge.secuaas.ovh/olivier/claude-failover/internal/config"
|
||
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
|
||
"forge.secuaas.ovh/olivier/claude-failover/internal/tmux"
|
||
)
|
||
|
||
// spinnerRe matches Claude Code's "Xs ·" or "Xs ⠋" progress indicator.
|
||
var spinnerRe = regexp.MustCompile(`\d+s\s+[·⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]`)
|
||
|
||
// SessionWatcher monitors active tmux sessions and emits on DoneChan when
|
||
// a Claude Code session returns to the idle prompt (❯) or exceeds its timeout.
|
||
type SessionWatcher struct {
|
||
tmux tmux.Client
|
||
state *state.State
|
||
config *config.Config
|
||
done chan string
|
||
interval time.Duration
|
||
idleTimeout time.Duration
|
||
signalDir string
|
||
logger *log.Logger
|
||
}
|
||
|
||
// New creates a SessionWatcher with defaults from cfg.
|
||
func New(tc tmux.Client, s *state.State, cfg *config.Config) *SessionWatcher {
|
||
interval := cfg.Watcher.Interval.Duration
|
||
if interval == 0 {
|
||
interval = 30 * time.Second
|
||
}
|
||
idleTimeout := cfg.Watcher.IdleTimeout.Duration
|
||
if idleTimeout == 0 {
|
||
idleTimeout = 60 * time.Minute
|
||
}
|
||
signalDir := cfg.Watcher.DoneSignalDir
|
||
if signalDir == "" {
|
||
signalDir = "/tmp"
|
||
}
|
||
return &SessionWatcher{
|
||
tmux: tc,
|
||
state: s,
|
||
config: cfg,
|
||
done: make(chan string, 32),
|
||
interval: interval,
|
||
idleTimeout: idleTimeout,
|
||
signalDir: signalDir,
|
||
logger: log.Default(),
|
||
}
|
||
}
|
||
|
||
// DoneChan returns the channel on which completed session names are sent.
|
||
func (w *SessionWatcher) DoneChan() <-chan string {
|
||
return w.done
|
||
}
|
||
|
||
// Run starts the watcher loop until ctx is cancelled.
|
||
func (w *SessionWatcher) Run(ctx context.Context) {
|
||
ticker := time.NewTicker(w.interval)
|
||
defer ticker.Stop()
|
||
for {
|
||
select {
|
||
case <-ctx.Done():
|
||
return
|
||
case <-ticker.C:
|
||
w.poll()
|
||
}
|
||
}
|
||
}
|
||
|
||
// poll inspects all currently-working sessions once.
|
||
func (w *SessionWatcher) poll() {
|
||
w.state.ForEachWorking(func(name string, sess *state.SessionState) {
|
||
w.checkSession(name, sess)
|
||
})
|
||
}
|
||
|
||
// checkSession evaluates a single working session for completion or timeout.
|
||
func (w *SessionWatcher) checkSession(name string, sess *state.SessionState) {
|
||
// 1. Check the done-signal file written by hooks or external scripts.
|
||
sigFile := filepath.Join(w.signalDir, "agent-done-"+name)
|
||
if _, err := os.Stat(sigFile); err == nil {
|
||
w.completeSession(name, sigFile)
|
||
return
|
||
}
|
||
|
||
// 2. Capture the last 5 pane lines.
|
||
tail, err := w.tmux.CapturePaneTail(name, 5)
|
||
if err != nil {
|
||
// Session may have vanished; lifecycle.Manager handles recreation.
|
||
return
|
||
}
|
||
|
||
// 3. Idle prompt ❯ without an active spinner → Claude has finished.
|
||
if hasClaudePrompt(tail) && !hasSpinner(tail) {
|
||
w.completeSession(name, sigFile)
|
||
return
|
||
}
|
||
|
||
// 4. Idle-timeout guard.
|
||
if sess.AssignedAt != nil && time.Since(*sess.AssignedAt) > w.idleTimeout {
|
||
w.logger.Printf("[watcher] TIMEOUT session=%q elapsed=%v idleTimeout=%v",
|
||
name, time.Since(*sess.AssignedAt).Round(time.Second), w.idleTimeout)
|
||
w.completeSession(name, sigFile)
|
||
}
|
||
}
|
||
|
||
// completeSession marks the session idle and notifies the dispatcher. For
|
||
// pool sessions, /exit is sent to recycle the Claude process so the next
|
||
// dispatch starts with a clean context. For dedicated sessions, /exit is
|
||
// skipped — those host the operator's interactive work and must not be
|
||
// terminated when a side-dispatched task happens to finish.
|
||
func (w *SessionWatcher) completeSession(name, sigFile string) {
|
||
if w.isDedicated(name) {
|
||
w.logger.Printf("[watcher] DONE session=%q (dedicated — leaving Claude alive)", name)
|
||
} else {
|
||
w.logger.Printf("[watcher] DONE session=%q → /exit", name)
|
||
_ = w.tmux.SendKeys(name, "/exit")
|
||
time.Sleep(500 * time.Millisecond)
|
||
}
|
||
w.state.SetIdle(name)
|
||
os.Remove(sigFile)
|
||
select {
|
||
case w.done <- name:
|
||
default:
|
||
w.logger.Printf("[watcher] done channel full, dropping signal for %q", name)
|
||
}
|
||
}
|
||
|
||
// isDedicated reports whether name matches a configured dedicated session.
|
||
func (w *SessionWatcher) isDedicated(name string) bool {
|
||
if w.config == nil {
|
||
return false
|
||
}
|
||
for _, ds := range w.config.Pool.Dedicated {
|
||
if ds.Name == name {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// hasClaudePrompt returns true if the Claude Code interactive prompt is visible.
|
||
func hasClaudePrompt(output string) bool {
|
||
return strings.Contains(output, "❯")
|
||
}
|
||
|
||
// hasSpinner returns true if Claude Code's progress spinner is active.
|
||
func hasSpinner(output string) bool {
|
||
return spinnerRe.MatchString(output)
|
||
}
|