fix(quota): add cooldown + 2-poll confirmation to prevent swap ping-pong
Anthropic HTTP 500 errors surface in the TUI with payloads containing "rate limit" text, which the monitor was matching against quotaPatterns and treating as a real 429 quota hit. With no cooldown and no confirmation, a burst of 500s produced sub-minute ping-pong swaps that tore down user sessions. Two-layer fix: - quota.reactivate_cooldown (already in config, 5m) now gates the monitor too — not just the dispatcher. A completed swap suppresses further detection for the cooldown window. - A hit with no parseable reset time is treated as suspected only on the first poll; a second consecutive poll is required before emitting SwapRequested. Legitimate 429s with "resets in ..." still swap instantly on the first detection. Adds state.RecordSwap / LastSwapInfo for the cooldown, and a forensic log line on every detection: trigger_session, matched pattern, 120-char pane snippet. Tests cover: instant swap with reset, 2-poll confirmation without reset, and suspected-state reset on recovery. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
75b5110748
commit
7c5f8384fa
5 changed files with 246 additions and 25 deletions
|
|
@ -69,13 +69,14 @@ func TestExtractResetTime(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// TestPollTriggersSwitchOnTwoBlockedPool verifies swap trigger for >=2 blocked pool sessions.
|
||||
func TestPollTriggersSwitchOnTwoBlockedPool(t *testing.T) {
|
||||
// TestPollTriggersSwitchOnTwoBlockedPoolWithReset verifies a legitimate 429
|
||||
// (reset time present) triggers a swap immediately on the first poll.
|
||||
func TestPollTriggersSwitchOnTwoBlockedPoolWithReset(t *testing.T) {
|
||||
tc := newMockTmux()
|
||||
tc.sessions["ccl-auto-0"] = true
|
||||
tc.sessions["ccl-auto-1"] = true
|
||||
tc.paneOutput["ccl-auto-0"] = "You've hit your limit for Claude Pro."
|
||||
tc.paneOutput["ccl-auto-1"] = "rate limit exceeded"
|
||||
tc.paneOutput["ccl-auto-0"] = "You've hit your limit for Claude Pro. resets in 45 minutes"
|
||||
tc.paneOutput["ccl-auto-1"] = "rate limit exceeded — resets at 8pm"
|
||||
|
||||
s := state.New("")
|
||||
s.SetActiveAccount("compte1")
|
||||
|
|
@ -93,13 +94,61 @@ func TestPollTriggersSwitchOnTwoBlockedPool(t *testing.T) {
|
|||
if req.From != "compte1" {
|
||||
t.Errorf("expected From=compte1, got %q", req.From)
|
||||
}
|
||||
if req.ResetTime == "" {
|
||||
t.Errorf("expected non-empty ResetTime")
|
||||
}
|
||||
default:
|
||||
t.Fatal("expected SwitchRequest on channel")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollTriggersSwitchOnOneBlockedInteractive verifies swap trigger for >=1 dedicated session.
|
||||
func TestPollTriggersSwitchOnOneBlockedInteractive(t *testing.T) {
|
||||
// TestPollRequiresConfirmationWhenNoResetTime verifies that a hit without a
|
||||
// parseable reset time does not trigger a swap on a single poll. A second
|
||||
// consecutive hit is required. This guards against transient Anthropic 500
|
||||
// errors whose payload happens to contain "rate limit".
|
||||
func TestPollRequiresConfirmationWhenNoResetTime(t *testing.T) {
|
||||
tc := newMockTmux()
|
||||
tc.sessions["my-session"] = true
|
||||
tc.paneOutput["my-session"] = "quota exceeded" // no reset time
|
||||
|
||||
s := state.New("")
|
||||
s.SetActiveAccount("compte1")
|
||||
|
||||
cfg := &config.Config{
|
||||
Pool: config.PoolConfig{
|
||||
Dedicated: []config.DedicatedSession{{Name: "my-session"}},
|
||||
Autonomous: config.AutonomousConfig{Max: 0},
|
||||
},
|
||||
}
|
||||
m := New(tc, s, cfg)
|
||||
|
||||
// First poll — suspected only, no swap yet.
|
||||
m.poll()
|
||||
select {
|
||||
case req := <-m.switchCh:
|
||||
t.Fatalf("unexpected SwitchRequest on first poll: %+v", req)
|
||||
default:
|
||||
}
|
||||
|
||||
// Second poll — confirmed, swap emitted.
|
||||
m.poll()
|
||||
select {
|
||||
case req := <-m.switchCh:
|
||||
if req.From != "compte1" {
|
||||
t.Errorf("expected From=compte1, got %q", req.From)
|
||||
}
|
||||
if req.ResetTime != "" {
|
||||
t.Errorf("expected empty ResetTime, got %q", req.ResetTime)
|
||||
}
|
||||
default:
|
||||
t.Fatal("expected SwitchRequest on confirmation poll")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollSuspectedHitClearedOnRecovery verifies a transient hit followed by
|
||||
// a clean poll does NOT trigger a swap on a subsequent hit — the suspected
|
||||
// state must be reset when detection clears.
|
||||
func TestPollSuspectedHitClearedOnRecovery(t *testing.T) {
|
||||
tc := newMockTmux()
|
||||
tc.sessions["my-session"] = true
|
||||
tc.paneOutput["my-session"] = "quota exceeded"
|
||||
|
|
@ -114,15 +163,17 @@ func TestPollTriggersSwitchOnOneBlockedInteractive(t *testing.T) {
|
|||
},
|
||||
}
|
||||
m := New(tc, s, cfg)
|
||||
m.poll()
|
||||
|
||||
m.poll() // suspected
|
||||
tc.paneOutput["my-session"] = "all good ❯ "
|
||||
m.poll() // cleared
|
||||
tc.paneOutput["my-session"] = "quota exceeded"
|
||||
m.poll() // re-suspected, NOT confirmed yet
|
||||
|
||||
select {
|
||||
case req := <-m.switchCh:
|
||||
if req.From != "compte1" {
|
||||
t.Errorf("expected From=compte1, got %q", req.From)
|
||||
}
|
||||
t.Fatalf("unexpected SwitchRequest after recovery: %+v", req)
|
||||
default:
|
||||
t.Fatal("expected SwitchRequest on channel")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue