// Package lifecycle provides the SessionLifecycleManager, which continuously // monitors tmux sessions and recreates any that have died unexpectedly. package lifecycle import ( "context" "log" "time" "forge.secuaas.ovh/olivier/claude-failover/internal/config" "forge.secuaas.ovh/olivier/claude-failover/internal/state" "forge.secuaas.ovh/olivier/claude-failover/internal/tmux" ) // Manager reconciles the desired pool state (from config) against the actual // tmux sessions, recreating any that have disappeared. type Manager struct { tmux tmux.Client state *state.State config *config.Config logger *log.Logger interval time.Duration } // New creates a Manager with a default reconciliation interval of 15 seconds. func New(tc tmux.Client, s *state.State, cfg *config.Config) *Manager { return &Manager{ tmux: tc, state: s, config: cfg, logger: log.Default(), interval: 15 * time.Second, } } // Run starts the reconciliation loop, ticking every m.interval until ctx is cancelled. func (m *Manager) Run(ctx context.Context) { ticker := time.NewTicker(m.interval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: m.reconcile() } } } // EnsureAllSessions creates all configured sessions that are not yet present in tmux. // It is intended to be called once at daemon startup before Run is launched. func (m *Manager) EnsureAllSessions() { for _, ds := range m.config.Pool.Dedicated { if !m.tmux.HasSession(ds.Name) { if err := m.tmux.CreateSession(ds.Name, ds.Project); err != nil { m.logger.Printf("[lifecycle] EnsureAllSessions: failed to create session %q: %v", ds.Name, err) } else { m.logger.Printf("[lifecycle] EnsureAllSessions: created session %q (workdir=%s)", ds.Name, ds.Project) m.state.SetIdle(ds.Name) } } } // Ensure autonomous pool sessions (prefix + index). prefix := m.config.Pool.Autonomous.Prefix if prefix == "" { prefix = "ccl-auto-" } for i := 0; i < m.config.Pool.Autonomous.Min; i++ { name := sessionName(prefix, i) if !m.tmux.HasSession(name) { if err := m.tmux.CreateSession(name, ""); err != nil { m.logger.Printf("[lifecycle] EnsureAllSessions: failed to create autonomous session %q: %v", name, err) } else { m.logger.Printf("[lifecycle] EnsureAllSessions: created autonomous session %q", name) m.state.SetIdle(name) } } } } // reconcile checks every configured session and repairs missing ones. func (m *Manager) reconcile() { // Reconcile dedicated sessions. for _, ds := range m.config.Pool.Dedicated { m.reconcileSession(ds.Name, ds.Project) } // Reconcile the autonomous pool (min sessions). prefix := m.config.Pool.Autonomous.Prefix if prefix == "" { prefix = "ccl-auto-" } for i := 0; i < m.config.Pool.Autonomous.Min; i++ { name := sessionName(prefix, i) m.reconcileSession(name, "") } } // reconcileSession handles a single named session. func (m *Manager) reconcileSession(name, workdir string) { has := m.tmux.HasSession(name) st := m.state.GetSession(name) if has { // Session exists — if it's supposed to be working, verify it still looks active. if st != nil && st.State == "working" { tail, err := m.tmux.CapturePaneTail(name, 5) if err != nil { m.logger.Printf("[lifecycle] reconcile: cannot capture pane for %q: %v", name, err) } // A session that has exited to the shell prompt after a Claude process crash // will show a shell prompt. We just log a warning here; deeper heuristics can // be added in future phases. _ = tail } return } // Session is missing. if st == nil || st.State == "idle" || st.State == "" { m.logger.Printf("[lifecycle] RECREATED: session %q was absent (state=idle) — creating", name) if err := m.tmux.CreateSession(name, workdir); err != nil { m.logger.Printf("[lifecycle] reconcile: failed to recreate %q: %v", name, err) return } m.state.SetIdle(name) } else if st.State == "working" { m.logger.Printf("[lifecycle] RECOVERED: session %q crashed while working (task=%v) — marking failed and recreating", name, deref(st.Task)) m.state.SetFailed(name) if err := m.tmux.CreateSession(name, workdir); err != nil { m.logger.Printf("[lifecycle] reconcile: failed to recreate %q after recovery: %v", name, err) return } m.state.SetIdle(name) } } // sessionName builds a session name from a prefix and a zero-based index. func sessionName(prefix string, i int) string { return prefix + itoa(i) } // itoa converts an integer to its decimal string representation without importing strconv. func itoa(n int) string { if n == 0 { return "0" } b := make([]byte, 0, 10) for n > 0 { b = append([]byte{byte('0' + n%10)}, b...) n /= 10 } return string(b) } // deref safely dereferences a *string, returning "" if nil. func deref(s *string) string { if s == nil { return "" } return *s }