feat: SessionLifecycleManager — auto-detect and repair dead tmux sessions

- Add internal/lifecycle/manager.go with Manager struct, Run() ticker loop
  (15s interval), EnsureAllSessions() for boot-time session creation, and
  reconcile() that recreates idle sessions and recovers working ones via
  SetFailed + CreateSession
- Add state.SetFailed() to record crash timestamp on SessionState
- Add internal/lifecycle/manager_test.go with mock tmux client and 3 tests:
  TestReconcileCreatesDeadSession, TestReconcileRecoversCrashedSession,
  TestEnsureAllSessions — all pass
- Wire lifecycle.Manager into cmd/claude-failover/main.go after state init

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-04-14 18:02:25 +00:00
parent 2d43580c18
commit 978b60ccf7
10 changed files with 810 additions and 32 deletions

41
internal/api/server.go Normal file
View file

@ -0,0 +1,41 @@
// Package api exposes the HTTP control-plane used by the MCP gateway
// and the orchestrator dashboard.
package api
import (
"fmt"
"net/http"
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
)
const version = "0.1.0"
// Server is a minimal HTTP server exposing /health and /status.
type Server struct {
addr string
state *state.State
}
// New creates a Server listening on addr.
func New(addr string, s *state.State) *Server {
return &Server{addr: addr, state: s}
}
// Start registers routes and begins serving. Blocks until the listener fails.
func (s *Server) Start() error {
mux := http.NewServeMux()
mux.HandleFunc("/health", s.handleHealth)
mux.HandleFunc("/status", s.handleStatus)
return http.ListenAndServe(s.addr, mux)
}
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, `{"status":"ok","version":%q}`, version)
}
func (s *Server) handleStatus(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write(s.state.JSON())
}