feat(phase2-E): multi-provider routing via secutools delegation
Adds optional delegation of agent-queue tasks to the SecuAAS secutools AI platform (GPU / Gemini / Claude API) instead of dispatching to a local Claude Code tmux session. Per-task opt-in via YAML frontmatter fields preferred_ai, allow_delegation, complexity_hint — absence keeps the Phase 1 behaviour exactly (zero breaking change). Go side: - internal/secutools: HTTP client with exponential-backoff retries (SubmitJob/GetJob/WaitForResult), DecideProvider map adapter for CLI use, table tests. - internal/router: struct-typed Decide() with strict precedence (needs_claude_code > preferred_ai=claude-code > allow_delegation=false > preferred_ai > fail-safe local on unknown). - internal/delegation: Manager submits jobs, writes .md.delegated markers for on-restart recovery, runs a periodic reaper that moves completed jobs into done/ with provider/cost footer and failed jobs into failed/. - internal/dispatcher: WithDelegation() opt-in, routeTask hook before findFreeSession, skips .md.delegated in assignNextTask. - internal/api: /api/delegated/status (active jobs + counters), /watchdog/status extended with delegation counters. - cmd/ccl-delegate: small CLI exposing submit/get/result/decide so the bash dispatcher can call the same contract without duplicating logic. - cmd/claude-failover: delegation wired opt-in via SECUTOOLS_API_KEY. Tests: - 29+ new unit tests across router, secutools, delegation, dispatcher, api packages. go test -race -count=1 clean. - tests/phase2-E-integration.sh: bash end-to-end against a Python stdlib mock HTTP server, exercising the dev-management scripts. Forward-compat with watchdog (Phase 1 B1 already ignores state=delegated_to_secutools) so delegated tasks aren't flagged stale.
This commit is contained in:
parent
47ab86eef9
commit
3e20085204
18 changed files with 2819 additions and 22 deletions
190
internal/secutools/client_test.go
Normal file
190
internal/secutools/client_test.go
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
package secutools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestSubmitJob_HappyPath verifies the request body and headers match the
|
||||
// secutools contract and the response is decoded.
|
||||
func TestSubmitJob_HappyPath(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/api/v1/jobs" {
|
||||
t.Errorf("unexpected path %q", r.URL.Path)
|
||||
}
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("unexpected method %q", r.Method)
|
||||
}
|
||||
if r.Header.Get("X-API-Key") != "key123" {
|
||||
t.Errorf("missing/incorrect X-API-Key: %q", r.Header.Get("X-API-Key"))
|
||||
}
|
||||
var got JobRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got.Type != TypeAnalyze || got.PreferredAI != "gpu" {
|
||||
t.Errorf("payload mismatch: %+v", got)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"job_id":"abc","status":"pending"}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewHTTPClient(srv.URL, "key123", srv.Client())
|
||||
resp, err := c.SubmitJob(context.Background(), &JobRequest{
|
||||
Type: TypeAnalyze,
|
||||
Priority: PriorityHigh,
|
||||
Prompt: "hi",
|
||||
PreferredAI: "gpu",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("SubmitJob: %v", err)
|
||||
}
|
||||
if resp.JobID != "abc" || resp.Status != "pending" {
|
||||
t.Errorf("unexpected response: %+v", resp)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSubmitJob_HTTPError surfaces non-2xx responses as errors.
|
||||
func TestSubmitJob_HTTPError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
_, _ = w.Write([]byte("boom"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewHTTPClient(srv.URL, "k", srv.Client())
|
||||
if _, err := c.SubmitJob(context.Background(), &JobRequest{Type: TypeAnalyze, Prompt: "p"}); err == nil {
|
||||
t.Fatal("expected error on HTTP 500, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestWaitForResult_PollsUntilCompleted verifies the polling loop transitions
|
||||
// pending → running → completed and fetches the result.
|
||||
func TestWaitForResult_PollsUntilCompleted(t *testing.T) {
|
||||
var calls atomic.Int64
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch r.URL.Path {
|
||||
case "/api/v1/jobs/job1":
|
||||
n := calls.Add(1)
|
||||
status := "pending"
|
||||
if n >= 2 {
|
||||
status = "completed"
|
||||
}
|
||||
_, _ = w.Write([]byte(`{"job_id":"job1","status":"` + status + `","provider":"gpu"}`))
|
||||
case "/api/v1/jobs/job1/result":
|
||||
_, _ = w.Write([]byte(`{"job_id":"job1","response":"done","provider":"gpu","cost_cad":0.005}`))
|
||||
default:
|
||||
t.Errorf("unexpected path %q", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewHTTPClient(srv.URL, "k", srv.Client())
|
||||
// Override poll cadence indirectly: short timeout proves we don't spin
|
||||
// 2s per poll; the test runs in well under 10s real time.
|
||||
res, err := c.WaitForResult(context.Background(), "job1", 30*time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("WaitForResult: %v", err)
|
||||
}
|
||||
if res.Response != "done" || res.Provider != "gpu" {
|
||||
t.Errorf("unexpected result: %+v", res)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWaitForResult_FailedJob returns ErrJobFailed when secutools reports
|
||||
// terminal failure.
|
||||
func TestWaitForResult_FailedJob(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"job_id":"jobX","status":"failed","error":"oom"}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewHTTPClient(srv.URL, "k", srv.Client())
|
||||
_, err := c.WaitForResult(context.Background(), "jobX", 5*time.Second)
|
||||
if !errors.Is(err, ErrJobFailed) {
|
||||
t.Errorf("expected ErrJobFailed, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSubmitJob_RetriesOn5xx verifies the client retries transient 500s
|
||||
// and succeeds on a later attempt. Uses a tight retry delay so the test
|
||||
// runs in milliseconds.
|
||||
func TestSubmitJob_RetriesOn5xx(t *testing.T) {
|
||||
var calls atomic.Int64
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
n := calls.Add(1)
|
||||
if n < 3 {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
_, _ = w.Write([]byte("transient"))
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"job_id":"ok","status":"pending"}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewHTTPClient(srv.URL, "k", srv.Client())
|
||||
c.SetRetryPolicy(3, 1*time.Millisecond)
|
||||
|
||||
resp, err := c.SubmitJob(context.Background(), &JobRequest{Type: TypeAnalyze, Prompt: "p"})
|
||||
if err != nil {
|
||||
t.Fatalf("expected success after retries, got %v (calls=%d)", err, calls.Load())
|
||||
}
|
||||
if resp.JobID != "ok" {
|
||||
t.Errorf("unexpected response: %+v", resp)
|
||||
}
|
||||
if calls.Load() != 3 {
|
||||
t.Errorf("expected 3 attempts, got %d", calls.Load())
|
||||
}
|
||||
}
|
||||
|
||||
// TestSubmitJob_DoesNotRetry4xx ensures client errors short-circuit
|
||||
// without burning retries (e.g. wrong API key).
|
||||
func TestSubmitJob_DoesNotRetry4xx(t *testing.T) {
|
||||
var calls atomic.Int64
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
calls.Add(1)
|
||||
w.WriteHeader(http.StatusUnauthorized)
|
||||
_, _ = w.Write([]byte("bad key"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewHTTPClient(srv.URL, "k", srv.Client())
|
||||
c.SetRetryPolicy(3, 1*time.Millisecond)
|
||||
|
||||
_, err := c.SubmitJob(context.Background(), &JobRequest{Type: TypeAnalyze, Prompt: "p"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error on 401")
|
||||
}
|
||||
if calls.Load() != 1 {
|
||||
t.Errorf("4xx must not retry, got %d calls", calls.Load())
|
||||
}
|
||||
}
|
||||
|
||||
// TestWaitForResult_ContextCancel exits cleanly when the parent context is
|
||||
// cancelled mid-poll.
|
||||
func TestWaitForResult_ContextCancel(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"job_id":"j","status":"pending"}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // cancel immediately
|
||||
|
||||
c := NewHTTPClient(srv.URL, "k", srv.Client())
|
||||
_, err := c.WaitForResult(ctx, "j", 10*time.Second)
|
||||
if err == nil {
|
||||
t.Fatal("expected error from cancelled context")
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue