Anthropic HTTP 500 errors surface in the TUI with payloads containing "rate limit" text, which the monitor was matching against quotaPatterns and treating as a real 429 quota hit. With no cooldown and no confirmation, a burst of 500s produced sub-minute ping-pong swaps that tore down user sessions. Two-layer fix: - quota.reactivate_cooldown (already in config, 5m) now gates the monitor too — not just the dispatcher. A completed swap suppresses further detection for the cooldown window. - A hit with no parseable reset time is treated as suspected only on the first poll; a second consecutive poll is required before emitting SwapRequested. Legitimate 429s with "resets in ..." still swap instantly on the first detection. Adds state.RecordSwap / LastSwapInfo for the cooldown, and a forensic log line on every detection: trigger_session, matched pattern, 120-char pane snippet. Tests cover: instant swap with reset, 2-poll confirmation without reset, and suspected-state reset on recovery. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
203 lines
5.6 KiB
Go
203 lines
5.6 KiB
Go
package quota
|
||
|
||
import (
|
||
"testing"
|
||
|
||
"forge.secuaas.ovh/olivier/claude-failover/internal/config"
|
||
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
|
||
)
|
||
|
||
// mockTmux for quota tests.
|
||
type mockTmux struct {
|
||
sessions map[string]bool
|
||
paneOutput map[string]string
|
||
}
|
||
|
||
func newMockTmux() *mockTmux {
|
||
return &mockTmux{
|
||
sessions: make(map[string]bool),
|
||
paneOutput: make(map[string]string),
|
||
}
|
||
}
|
||
|
||
func (m *mockTmux) HasSession(name string) bool { return m.sessions[name] }
|
||
func (m *mockTmux) CreateSession(name, _ string) error { m.sessions[name] = true; return nil }
|
||
func (m *mockTmux) KillSession(_ string) error { return nil }
|
||
func (m *mockTmux) SendKeys(_, _ string) error { return nil }
|
||
func (m *mockTmux) CapturePaneTail(session string, _ int) (string, error) {
|
||
return m.paneOutput[session], nil
|
||
}
|
||
|
||
// TestIsQuotaExhausted verifies pattern matching on pane output.
|
||
func TestIsQuotaExhausted(t *testing.T) {
|
||
cases := []struct {
|
||
input string
|
||
want bool
|
||
}{
|
||
{"You've hit your limit for Claude Pro.", true},
|
||
{"rate limit exceeded", true},
|
||
{"quota exceeded for this period", true},
|
||
{"Usage limit reached", true},
|
||
{"Too many requests", true},
|
||
{"Some normal output ❯", false},
|
||
{"❯ ", false},
|
||
{"still running 5s · ", false},
|
||
}
|
||
for _, c := range cases {
|
||
if got := isQuotaExhausted(c.input); got != c.want {
|
||
t.Errorf("isQuotaExhausted(%q) = %v, want %v", c.input, got, c.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
// TestExtractResetTime parses various reset time formats.
|
||
func TestExtractResetTime(t *testing.T) {
|
||
cases := []struct {
|
||
input string
|
||
want string
|
||
}{
|
||
{"Usage resets 8pm", "8pm"},
|
||
{"Your quota resets at 11:30pm", "11:30pm"},
|
||
{"resets in 45 minutes", "in 45 minutes"},
|
||
{"resets in 2 hours", "in 2 hours"},
|
||
{"no reset info here", ""},
|
||
}
|
||
for _, c := range cases {
|
||
if got := extractResetTime(c.input); got != c.want {
|
||
t.Errorf("extractResetTime(%q) = %q, want %q", c.input, got, c.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
// TestPollTriggersSwitchOnTwoBlockedPoolWithReset verifies a legitimate 429
|
||
// (reset time present) triggers a swap immediately on the first poll.
|
||
func TestPollTriggersSwitchOnTwoBlockedPoolWithReset(t *testing.T) {
|
||
tc := newMockTmux()
|
||
tc.sessions["ccl-auto-0"] = true
|
||
tc.sessions["ccl-auto-1"] = true
|
||
tc.paneOutput["ccl-auto-0"] = "You've hit your limit for Claude Pro. resets in 45 minutes"
|
||
tc.paneOutput["ccl-auto-1"] = "rate limit exceeded — resets at 8pm"
|
||
|
||
s := state.New("")
|
||
s.SetActiveAccount("compte1")
|
||
|
||
cfg := &config.Config{
|
||
Pool: config.PoolConfig{
|
||
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Max: 2},
|
||
},
|
||
}
|
||
m := New(tc, s, cfg)
|
||
m.poll()
|
||
|
||
select {
|
||
case req := <-m.switchCh:
|
||
if req.From != "compte1" {
|
||
t.Errorf("expected From=compte1, got %q", req.From)
|
||
}
|
||
if req.ResetTime == "" {
|
||
t.Errorf("expected non-empty ResetTime")
|
||
}
|
||
default:
|
||
t.Fatal("expected SwitchRequest on channel")
|
||
}
|
||
}
|
||
|
||
// TestPollRequiresConfirmationWhenNoResetTime verifies that a hit without a
|
||
// parseable reset time does not trigger a swap on a single poll. A second
|
||
// consecutive hit is required. This guards against transient Anthropic 500
|
||
// errors whose payload happens to contain "rate limit".
|
||
func TestPollRequiresConfirmationWhenNoResetTime(t *testing.T) {
|
||
tc := newMockTmux()
|
||
tc.sessions["my-session"] = true
|
||
tc.paneOutput["my-session"] = "quota exceeded" // no reset time
|
||
|
||
s := state.New("")
|
||
s.SetActiveAccount("compte1")
|
||
|
||
cfg := &config.Config{
|
||
Pool: config.PoolConfig{
|
||
Dedicated: []config.DedicatedSession{{Name: "my-session"}},
|
||
Autonomous: config.AutonomousConfig{Max: 0},
|
||
},
|
||
}
|
||
m := New(tc, s, cfg)
|
||
|
||
// First poll — suspected only, no swap yet.
|
||
m.poll()
|
||
select {
|
||
case req := <-m.switchCh:
|
||
t.Fatalf("unexpected SwitchRequest on first poll: %+v", req)
|
||
default:
|
||
}
|
||
|
||
// Second poll — confirmed, swap emitted.
|
||
m.poll()
|
||
select {
|
||
case req := <-m.switchCh:
|
||
if req.From != "compte1" {
|
||
t.Errorf("expected From=compte1, got %q", req.From)
|
||
}
|
||
if req.ResetTime != "" {
|
||
t.Errorf("expected empty ResetTime, got %q", req.ResetTime)
|
||
}
|
||
default:
|
||
t.Fatal("expected SwitchRequest on confirmation poll")
|
||
}
|
||
}
|
||
|
||
// TestPollSuspectedHitClearedOnRecovery verifies a transient hit followed by
|
||
// a clean poll does NOT trigger a swap on a subsequent hit — the suspected
|
||
// state must be reset when detection clears.
|
||
func TestPollSuspectedHitClearedOnRecovery(t *testing.T) {
|
||
tc := newMockTmux()
|
||
tc.sessions["my-session"] = true
|
||
tc.paneOutput["my-session"] = "quota exceeded"
|
||
|
||
s := state.New("")
|
||
s.SetActiveAccount("compte1")
|
||
|
||
cfg := &config.Config{
|
||
Pool: config.PoolConfig{
|
||
Dedicated: []config.DedicatedSession{{Name: "my-session"}},
|
||
Autonomous: config.AutonomousConfig{Max: 0},
|
||
},
|
||
}
|
||
m := New(tc, s, cfg)
|
||
|
||
m.poll() // suspected
|
||
tc.paneOutput["my-session"] = "all good ❯ "
|
||
m.poll() // cleared
|
||
tc.paneOutput["my-session"] = "quota exceeded"
|
||
m.poll() // re-suspected, NOT confirmed yet
|
||
|
||
select {
|
||
case req := <-m.switchCh:
|
||
t.Fatalf("unexpected SwitchRequest after recovery: %+v", req)
|
||
default:
|
||
}
|
||
}
|
||
|
||
// TestPollNoTriggerWhenBelowThreshold verifies no swap for a single blocked pool session.
|
||
func TestPollNoTriggerWhenBelowThreshold(t *testing.T) {
|
||
tc := newMockTmux()
|
||
tc.sessions["ccl-auto-0"] = true
|
||
tc.sessions["ccl-auto-1"] = true
|
||
tc.paneOutput["ccl-auto-0"] = "rate limit exceeded"
|
||
tc.paneOutput["ccl-auto-1"] = "❯ " // fine
|
||
|
||
s := state.New("")
|
||
cfg := &config.Config{
|
||
Pool: config.PoolConfig{
|
||
Autonomous: config.AutonomousConfig{Prefix: "ccl-auto-", Max: 2},
|
||
},
|
||
}
|
||
m := New(tc, s, cfg)
|
||
m.poll()
|
||
|
||
select {
|
||
case req := <-m.switchCh:
|
||
t.Errorf("unexpected SwitchRequest: %+v", req)
|
||
default:
|
||
// Correct: only 1 blocked pool session, threshold is 2.
|
||
}
|
||
}
|