claude-failover/internal/quota/monitor_test.go
Ubuntu 7c5f8384fa fix(quota): add cooldown + 2-poll confirmation to prevent swap ping-pong
Anthropic HTTP 500 errors surface in the TUI with payloads containing
"rate limit" text, which the monitor was matching against quotaPatterns
and treating as a real 429 quota hit. With no cooldown and no
confirmation, a burst of 500s produced sub-minute ping-pong swaps that
tore down user sessions.

Two-layer fix:
- quota.reactivate_cooldown (already in config, 5m) now gates the
  monitor too — not just the dispatcher. A completed swap suppresses
  further detection for the cooldown window.
- A hit with no parseable reset time is treated as suspected only on
  the first poll; a second consecutive poll is required before
  emitting SwapRequested. Legitimate 429s with "resets in ..." still
  swap instantly on the first detection.

Adds state.RecordSwap / LastSwapInfo for the cooldown, and a
forensic log line on every detection: trigger_session, matched
pattern, 120-char pane snippet.

Tests cover: instant swap with reset, 2-poll confirmation without
reset, and suspected-state reset on recovery.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 19:18:27 +00:00

203 lines
5.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package quota
import (
"testing"
"forge.secuaas.ovh/olivier/claude-failover/internal/config"
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
)
// mockTmux for quota tests.
type mockTmux struct {
sessions map[string]bool
paneOutput map[string]string
}
func newMockTmux() *mockTmux {
return &mockTmux{
sessions: make(map[string]bool),
paneOutput: make(map[string]string),
}
}
func (m *mockTmux) HasSession(name string) bool { return m.sessions[name] }
func (m *mockTmux) CreateSession(name, _ string) error { m.sessions[name] = true; return nil }
func (m *mockTmux) KillSession(_ string) error { return nil }
func (m *mockTmux) SendKeys(_, _ string) error { return nil }
func (m *mockTmux) CapturePaneTail(session string, _ int) (string, error) {
return m.paneOutput[session], nil
}
// TestIsQuotaExhausted verifies pattern matching on pane output.
func TestIsQuotaExhausted(t *testing.T) {
cases := []struct {
input string
want bool
}{
{"You've hit your limit for Claude Pro.", true},
{"rate limit exceeded", true},
{"quota exceeded for this period", true},
{"Usage limit reached", true},
{"Too many requests", true},
{"Some normal output ", false},
{" ", false},
{"still running 5s · ", false},
}
for _, c := range cases {
if got := isQuotaExhausted(c.input); got != c.want {
t.Errorf("isQuotaExhausted(%q) = %v, want %v", c.input, got, c.want)
}
}
}
// TestExtractResetTime parses various reset time formats.
func TestExtractResetTime(t *testing.T) {
cases := []struct {
input string
want string
}{
{"Usage resets 8pm", "8pm"},
{"Your quota resets at 11:30pm", "11:30pm"},
{"resets in 45 minutes", "in 45 minutes"},
{"resets in 2 hours", "in 2 hours"},
{"no reset info here", ""},
}
for _, c := range cases {
if got := extractResetTime(c.input); got != c.want {
t.Errorf("extractResetTime(%q) = %q, want %q", c.input, got, c.want)
}
}
}
// TestPollTriggersSwitchOnTwoBlockedPoolWithReset verifies a legitimate 429
// (reset time present) triggers a swap immediately on the first poll.
func TestPollTriggersSwitchOnTwoBlockedPoolWithReset(t *testing.T) {
tc := newMockTmux()
tc.sessions["ccl-auto-0"] = true
tc.sessions["ccl-auto-1"] = true
tc.paneOutput["ccl-auto-0"] = "You've hit your limit for Claude Pro. resets in 45 minutes"
tc.paneOutput["ccl-auto-1"] = "rate limit exceeded — resets at 8pm"
s := state.New("")
s.SetActiveAccount("compte1")
cfg := &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Max: 2},
},
}
m := New(tc, s, cfg)
m.poll()
select {
case req := <-m.switchCh:
if req.From != "compte1" {
t.Errorf("expected From=compte1, got %q", req.From)
}
if req.ResetTime == "" {
t.Errorf("expected non-empty ResetTime")
}
default:
t.Fatal("expected SwitchRequest on channel")
}
}
// TestPollRequiresConfirmationWhenNoResetTime verifies that a hit without a
// parseable reset time does not trigger a swap on a single poll. A second
// consecutive hit is required. This guards against transient Anthropic 500
// errors whose payload happens to contain "rate limit".
func TestPollRequiresConfirmationWhenNoResetTime(t *testing.T) {
tc := newMockTmux()
tc.sessions["my-session"] = true
tc.paneOutput["my-session"] = "quota exceeded" // no reset time
s := state.New("")
s.SetActiveAccount("compte1")
cfg := &config.Config{
Pool: config.PoolConfig{
Dedicated: []config.DedicatedSession{{Name: "my-session"}},
Autonomous: config.AutonomousConfig{Max: 0},
},
}
m := New(tc, s, cfg)
// First poll — suspected only, no swap yet.
m.poll()
select {
case req := <-m.switchCh:
t.Fatalf("unexpected SwitchRequest on first poll: %+v", req)
default:
}
// Second poll — confirmed, swap emitted.
m.poll()
select {
case req := <-m.switchCh:
if req.From != "compte1" {
t.Errorf("expected From=compte1, got %q", req.From)
}
if req.ResetTime != "" {
t.Errorf("expected empty ResetTime, got %q", req.ResetTime)
}
default:
t.Fatal("expected SwitchRequest on confirmation poll")
}
}
// TestPollSuspectedHitClearedOnRecovery verifies a transient hit followed by
// a clean poll does NOT trigger a swap on a subsequent hit — the suspected
// state must be reset when detection clears.
func TestPollSuspectedHitClearedOnRecovery(t *testing.T) {
tc := newMockTmux()
tc.sessions["my-session"] = true
tc.paneOutput["my-session"] = "quota exceeded"
s := state.New("")
s.SetActiveAccount("compte1")
cfg := &config.Config{
Pool: config.PoolConfig{
Dedicated: []config.DedicatedSession{{Name: "my-session"}},
Autonomous: config.AutonomousConfig{Max: 0},
},
}
m := New(tc, s, cfg)
m.poll() // suspected
tc.paneOutput["my-session"] = "all good "
m.poll() // cleared
tc.paneOutput["my-session"] = "quota exceeded"
m.poll() // re-suspected, NOT confirmed yet
select {
case req := <-m.switchCh:
t.Fatalf("unexpected SwitchRequest after recovery: %+v", req)
default:
}
}
// TestPollNoTriggerWhenBelowThreshold verifies no swap for a single blocked pool session.
func TestPollNoTriggerWhenBelowThreshold(t *testing.T) {
tc := newMockTmux()
tc.sessions["ccl-auto-0"] = true
tc.sessions["ccl-auto-1"] = true
tc.paneOutput["ccl-auto-0"] = "rate limit exceeded"
tc.paneOutput["ccl-auto-1"] = " " // fine
s := state.New("")
cfg := &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Max: 2},
},
}
m := New(tc, s, cfg)
m.poll()
select {
case req := <-m.switchCh:
t.Errorf("unexpected SwitchRequest: %+v", req)
default:
// Correct: only 1 blocked pool session, threshold is 2.
}
}