// Package main is the entrypoint for the claude-failover daemon. package main import ( "context" "flag" "log" "os" "os/signal" "syscall" "time" "forge.secuaas.ovh/olivier/claude-failover/internal/api" "forge.secuaas.ovh/olivier/claude-failover/internal/config" "forge.secuaas.ovh/olivier/claude-failover/internal/dispatcher" "forge.secuaas.ovh/olivier/claude-failover/internal/janitor" "forge.secuaas.ovh/olivier/claude-failover/internal/lifecycle" "forge.secuaas.ovh/olivier/claude-failover/internal/notify" "forge.secuaas.ovh/olivier/claude-failover/internal/quota" "forge.secuaas.ovh/olivier/claude-failover/internal/state" "forge.secuaas.ovh/olivier/claude-failover/internal/switcher" "forge.secuaas.ovh/olivier/claude-failover/internal/tmux" "forge.secuaas.ovh/olivier/claude-failover/internal/watcher" ) const version = "0.1.0" func main() { var cfgPath string flag.StringVar(&cfgPath, "config", "config.yaml", "path to YAML config file") flag.Parse() log.SetFlags(log.LstdFlags | log.Lmicroseconds | log.LUTC) log.Printf("claude-failover v%s starting (config=%s)", version, cfgPath) cfg, err := config.Load(cfgPath) if err != nil { log.Fatalf("config load failed: %v", err) } log.Printf("config loaded: %d account(s), pool min=%d max=%d", len(cfg.Accounts), cfg.Pool.Autonomous.Min, cfg.Pool.Autonomous.Max) // Initialise state — reload from disk if a snapshot exists. stateFile := cfg.Checkpoint.Dir + "/state.json" s, err := state.LoadFromFile(stateFile) if err != nil { log.Fatalf("state init failed: %v", err) } log.Printf("state loaded (%d sessions tracked)", len(s.Sessions)) // Initialise tmux client and lifecycle manager. tmuxClient := tmux.NewExecClient() lm := lifecycle.New(tmuxClient, s, cfg) // Validate (and self-heal) the shared-state symlinks BEFORE spawning // any sessions. A divergent link would silently fork transcripts // between accounts and make failover destructive, so we fail fast here // rather than after work is in flight. if err := lm.ValidateSharedSymlinks(); err != nil { log.Fatalf("shared symlinks validation failed: %v", err) } lm.EnsureAllSessions() // Block until SIGINT or SIGTERM. ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() go lm.Run(ctx) // Notifier — reads credentials from environment variables. notifier := notify.New(cfg) // Session Watcher — detects when sessions finish their tasks. sw := watcher.New(tmuxClient, s, cfg) go sw.Run(ctx) // Quota Monitor — polls panes for quota exhaustion signals. qm := quota.New(tmuxClient, s, cfg) go qm.Run(ctx) // Account Switcher — orchestrates account failover on quota exhaustion. as := switcher.New(tmuxClient, s, cfg, qm.SwitchChan(), notifier) go as.Run(ctx) // Dispatcher — assigns inbox tasks to idle sessions. disp := dispatcher.New(tmuxClient, s, cfg, sw.DoneChan()) go disp.Run(ctx) // Janitor — periodic cleanup of orphaned files and stale status.json. jan := janitor.New(s, cfg.Dispatcher.ProjectsDir) go jan.Run(ctx) // State flush loop — persists state to disk every 10 seconds. go func() { ticker := time.NewTicker(10 * time.Second) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: s.Flush() //nolint:errcheck } } }() // Start HTTP API server. listenAddr := cfg.MCPHTTP.Listen if listenAddr == "" { listenAddr = "127.0.0.1:9090" } srv := api.New(listenAddr, s) go func() { if err := srv.Start(); err != nil { log.Printf("API server error: %v", err) os.Exit(1) } }() log.Printf("claude-failover v%s — all goroutines running", version) <-ctx.Done() log.Printf("shutdown signal received — flushing state and exiting") if err := s.Flush(); err != nil { log.Printf("state flush warning: %v", err) } }