claude-failover/tests/phase2-E-integration.sh

215 lines
9.2 KiB
Bash
Raw Normal View History

feat(phase2-E): multi-provider routing via secutools delegation Adds optional delegation of agent-queue tasks to the SecuAAS secutools AI platform (GPU / Gemini / Claude API) instead of dispatching to a local Claude Code tmux session. Per-task opt-in via YAML frontmatter fields preferred_ai, allow_delegation, complexity_hint — absence keeps the Phase 1 behaviour exactly (zero breaking change). Go side: - internal/secutools: HTTP client with exponential-backoff retries (SubmitJob/GetJob/WaitForResult), DecideProvider map adapter for CLI use, table tests. - internal/router: struct-typed Decide() with strict precedence (needs_claude_code > preferred_ai=claude-code > allow_delegation=false > preferred_ai > fail-safe local on unknown). - internal/delegation: Manager submits jobs, writes .md.delegated markers for on-restart recovery, runs a periodic reaper that moves completed jobs into done/ with provider/cost footer and failed jobs into failed/. - internal/dispatcher: WithDelegation() opt-in, routeTask hook before findFreeSession, skips .md.delegated in assignNextTask. - internal/api: /api/delegated/status (active jobs + counters), /watchdog/status extended with delegation counters. - cmd/ccl-delegate: small CLI exposing submit/get/result/decide so the bash dispatcher can call the same contract without duplicating logic. - cmd/claude-failover: delegation wired opt-in via SECUTOOLS_API_KEY. Tests: - 29+ new unit tests across router, secutools, delegation, dispatcher, api packages. go test -race -count=1 clean. - tests/phase2-E-integration.sh: bash end-to-end against a Python stdlib mock HTTP server, exercising the dev-management scripts. Forward-compat with watchdog (Phase 1 B1 already ignores state=delegated_to_secutools) so delegated tasks aren't flagged stale.
2026-04-17 02:17:19 +00:00
#!/bin/bash
# phase2-E-integration.sh — Phase 2 Chantier E integration test
#
# Exercises the bash side of multi-provider delegation end-to-end,
# without hitting the real secutools API. Flow:
#
# 1. Start a local mock HTTP server that implements /api/v1/jobs{,/:id,/:id/result}
# 2. Build ccl-delegate + ccl-delegate decide against a test task
# 3. Call delegate-to-secutools.sh on the task
# 4. Assert: status.json = delegated_to_secutools, .delegated marker
# with job_id, original .md still in inbox
# 5. Drive the mock into 'completed' state
# 6. Call poll-delegated-jobs.sh
# 7. Assert: done/<task>.md exists with result body + provider/cost
# footer, original .md + .delegated marker cleaned up
#
# Exit 0 on success, non-zero on assertion failure.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
SCRIPTS_ROOT="${DEVMGMT_ORCH_DIR:-/home/ubuntu/projects/dev-management/agent-orchestrator}"
TMP_ROOT=$(mktemp -d)
MOCK_PORT=${MOCK_PORT:-18742}
MOCK_STATE="$TMP_ROOT/mock-state"
MOCK_LOG="$TMP_ROOT/mock.log"
MOCK_PID_FILE="$TMP_ROOT/mock.pid"
BIN_DIR="$TMP_ROOT/bin"
PROJECTS_BASE="$TMP_ROOT/projects"
BUDGETS_FILE="$TMP_ROOT/delegation-budgets.jsonl"
mkdir -p "$MOCK_STATE" "$BIN_DIR" "$PROJECTS_BASE/test-proj/.agent-queue/inbox"
cleanup() {
if [[ -f "$MOCK_PID_FILE" ]]; then
kill "$(cat "$MOCK_PID_FILE")" 2>/dev/null || true
fi
rm -rf "$TMP_ROOT"
}
trap cleanup EXIT
echo "==> building ccl-delegate"
GOMODCACHE=/home/ubuntu/go/pkg/mod GOCACHE=/home/ubuntu/.cache/go-build HOME=/home/ubuntu GOSUMDB=off GOTOOLCHAIN=local \
/usr/local/go/bin/go build -C "$REPO_ROOT" -o "$BIN_DIR/ccl-delegate" ./cmd/ccl-delegate
export PATH="$BIN_DIR:$PATH"
export CCL_DELEGATE_BIN="$BIN_DIR/ccl-delegate"
# ── Mock secutools server (Python) ────────────────────────────────────
cat > "$TMP_ROOT/mock-server.py" <<'PY'
import http.server, json, os, sys, threading
STATE_DIR = os.environ["MOCK_STATE"]
os.makedirs(STATE_DIR, exist_ok=True)
JOBS = {}
def _respond(handler, code, body):
data = json.dumps(body).encode()
handler.send_response(code)
handler.send_header("Content-Type", "application/json")
handler.send_header("Content-Length", str(len(data)))
handler.end_headers()
handler.wfile.write(data)
class H(http.server.BaseHTTPRequestHandler):
def log_message(self, fmt, *a): pass
def do_POST(self):
length = int(self.headers.get("Content-Length", "0"))
body = json.loads(self.rfile.read(length).decode() or "{}")
if self.path == "/api/v1/jobs":
jid = f"job-{len(JOBS)+1}"
JOBS[jid] = {"status": "pending", "request": body}
# Persist request for the test to drive completion.
with open(os.path.join(STATE_DIR, f"{jid}.json"), "w") as f:
json.dump({"status":"pending"}, f)
return _respond(self, 200, {"job_id": jid, "status": "pending"})
_respond(self, 404, {"error": "not found"})
def do_GET(self):
parts = self.path.strip("/").split("/")
# /api/v1/jobs/:id or /api/v1/jobs/:id/result
if len(parts) >= 4 and parts[:3] == ["api","v1","jobs"]:
jid = parts[3]
# read status from state file (test can overwrite)
fp = os.path.join(STATE_DIR, f"{jid}.json")
if not os.path.exists(fp):
return _respond(self, 404, {"error":"unknown job"})
with open(fp) as f: state = json.load(f)
if len(parts) == 4:
# job status
return _respond(self, 200, {
"job_id": jid,
"status": state.get("status","pending"),
"provider": state.get("provider","")})
if len(parts) == 5 and parts[4] == "result":
if state.get("status") != "completed":
return _respond(self, 409, {"error":"not completed"})
return _respond(self, 200, {
"job_id": jid,
"response": state.get("response","(no response)"),
"provider": state.get("provider","gpu"),
"model": state.get("model","qwen-coder"),
"cost_cad": state.get("cost_cad", 0.0123),
"tokens": state.get("tokens", 1200),
})
_respond(self, 404, {"error":"not found"})
port = int(sys.argv[1])
srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), H)
print(f"MOCK: listening on {port}", flush=True)
srv.serve_forever()
PY
echo "==> starting mock secutools on 127.0.0.1:$MOCK_PORT"
MOCK_STATE="$MOCK_STATE" python3 "$TMP_ROOT/mock-server.py" "$MOCK_PORT" > "$MOCK_LOG" 2>&1 &
echo $! > "$MOCK_PID_FILE"
sleep 0.5
# Wait up to 3s for the port to be ready
for _ in 1 2 3 4 5 6; do
if curl -sf "http://127.0.0.1:$MOCK_PORT/api/v1/jobs/nope" >/dev/null 2>&1; then break; fi
sleep 0.5
done
# ── Fixture task ──────────────────────────────────────────────────────
TASK_PATH="$PROJECTS_BASE/test-proj/.agent-queue/inbox/TASK-phase2E.md"
cat > "$TASK_PATH" <<'EOF'
---
title: Phase 2 E integration
priority: default
preferred_ai: gpu
allow_delegation: true
complexity_hint: low
---
Summarize this paragraph in one sentence.
EOF
# ── Prepare environment for scripts ───────────────────────────────────
export CCL_SECUTOOLS_API_KEY="test-key"
export CCL_SECUTOOLS_MOCK_URL="http://127.0.0.1:$MOCK_PORT"
export CCL_DELEGATION_BUDGETS_FILE="$BUDGETS_FILE"
export CCL_DELEGATION_BUDGET_CAD_DAILY="10.00" # generous for test
# ── Sanity: `ccl-delegate decide` ─────────────────────────────────────
echo "==> ccl-delegate decide"
dec=$("$BIN_DIR/ccl-delegate" decide --frontmatter="$TASK_PATH")
[[ "$dec" == "gpu" ]] || { echo "FAIL: decide expected 'gpu', got '$dec'"; exit 1; }
echo " decide=$dec OK"
# ── Step 1: delegate ──────────────────────────────────────────────────
echo "==> delegate-to-secutools.sh"
bash "$SCRIPTS_ROOT/delegate-to-secutools.sh" "test-proj" "$TASK_PATH"
marker="$TASK_PATH.delegated"
[[ -f "$marker" ]] || { echo "FAIL: marker missing: $marker"; exit 1; }
job_id=$(awk -F'"' '/"job_id"/ { print $4; exit }' "$marker")
[[ -n "$job_id" ]] || { echo "FAIL: no job_id in marker"; exit 1; }
echo " marker OK (job_id=$job_id)"
# status.json
status_json="$PROJECTS_BASE/test-proj/.agent-queue/status.json"
[[ -f "$status_json" ]] || { echo "FAIL: status.json missing"; exit 1; }
state=$(awk -F'"' '/"state"/ { print $4; exit }' "$status_json")
[[ "$state" == "delegated_to_secutools" ]] || { echo "FAIL: state=$state"; exit 1; }
echo " status.json OK (state=delegated_to_secutools)"
# Original .md must still be in inbox (reaper hasn't processed it yet)
[[ -f "$TASK_PATH" ]] || { echo "FAIL: original .md removed prematurely"; exit 1; }
# ── Step 2: simulate completion in mock and reap ─────────────────────
echo "==> driving mock into 'completed' state for $job_id"
cat > "$MOCK_STATE/${job_id}.json" <<EOF
{"status":"completed","provider":"gpu","model":"qwen-coder","response":"In one sentence: mocked summary.","cost_cad":0.0034,"tokens":321}
EOF
echo "==> poll-delegated-jobs.sh"
bash "$SCRIPTS_ROOT/poll-delegated-jobs.sh" --projects-base "$PROJECTS_BASE"
done_file="$PROJECTS_BASE/test-proj/.agent-queue/done/TASK-phase2E.md"
[[ -f "$done_file" ]] || { echo "FAIL: done/ file missing"; exit 1; }
grep -q "mocked summary" "$done_file" || { echo "FAIL: response not in done body:"; cat "$done_file"; exit 1; }
grep -q "provider: gpu" "$done_file" || { echo "FAIL: provider footer missing:"; cat "$done_file"; exit 1; }
grep -q "cost_cad: 0.0034" "$done_file" || { echo "FAIL: cost_cad footer missing:"; cat "$done_file"; exit 1; }
echo " done/ OK (has response + provider + cost_cad)"
# Marker + original .md cleaned up
[[ -f "$marker" ]] && { echo "FAIL: marker not cleaned"; exit 1; }
[[ -f "$TASK_PATH" ]] && { echo "FAIL: original .md not cleaned"; exit 1; }
echo " inbox cleanup OK"
# Budget tracker recorded the spend
if [[ -f "$BUDGETS_FILE" ]]; then
grep -q "0.0034" "$BUDGETS_FILE" || echo "WARN: spend not found in $BUDGETS_FILE"
grep -q "test-proj" "$BUDGETS_FILE" || echo "WARN: project not found in $BUDGETS_FILE"
echo " budget tracker OK"
else
echo "WARN: no budgets file (maybe unwritable)"
fi
# ── Step 3: assert legacy task (no allow_delegation) exits 2 ─────────
echo "==> negative test: legacy task → exit 2"
LEGACY="$PROJECTS_BASE/test-proj/.agent-queue/inbox/TASK-legacy.md"
cat > "$LEGACY" <<'EOF'
---
title: Legacy
priority: default
---
Do classic work.
EOF
set +e
bash "$SCRIPTS_ROOT/delegate-to-secutools.sh" "test-proj" "$LEGACY" >/dev/null 2>&1
rc=$?
set -e
[[ $rc -eq 2 ]] || { echo "FAIL: legacy task expected rc=2, got $rc"; exit 1; }
echo " legacy task rc=2 OK"
echo ""
echo "=== Phase 2 Chantier E integration: ALL ASSERTIONS PASSED ==="