#!/bin/bash # phase2-E-integration.sh — Phase 2 Chantier E integration test # # Exercises the bash side of multi-provider delegation end-to-end, # without hitting the real secutools API. Flow: # # 1. Start a local mock HTTP server that implements /api/v1/jobs{,/:id,/:id/result} # 2. Build ccl-delegate + ccl-delegate decide against a test task # 3. Call delegate-to-secutools.sh on the task # 4. Assert: status.json = delegated_to_secutools, .delegated marker # with job_id, original .md still in inbox # 5. Drive the mock into 'completed' state # 6. Call poll-delegated-jobs.sh # 7. Assert: done/.md exists with result body + provider/cost # footer, original .md + .delegated marker cleaned up # # Exit 0 on success, non-zero on assertion failure. set -euo pipefail REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" SCRIPTS_ROOT="${DEVMGMT_ORCH_DIR:-/home/ubuntu/projects/dev-management/agent-orchestrator}" TMP_ROOT=$(mktemp -d) MOCK_PORT=${MOCK_PORT:-18742} MOCK_STATE="$TMP_ROOT/mock-state" MOCK_LOG="$TMP_ROOT/mock.log" MOCK_PID_FILE="$TMP_ROOT/mock.pid" BIN_DIR="$TMP_ROOT/bin" PROJECTS_BASE="$TMP_ROOT/projects" BUDGETS_FILE="$TMP_ROOT/delegation-budgets.jsonl" mkdir -p "$MOCK_STATE" "$BIN_DIR" "$PROJECTS_BASE/test-proj/.agent-queue/inbox" cleanup() { if [[ -f "$MOCK_PID_FILE" ]]; then kill "$(cat "$MOCK_PID_FILE")" 2>/dev/null || true fi rm -rf "$TMP_ROOT" } trap cleanup EXIT echo "==> building ccl-delegate" GOMODCACHE=/home/ubuntu/go/pkg/mod GOCACHE=/home/ubuntu/.cache/go-build HOME=/home/ubuntu GOSUMDB=off GOTOOLCHAIN=local \ /usr/local/go/bin/go build -C "$REPO_ROOT" -o "$BIN_DIR/ccl-delegate" ./cmd/ccl-delegate export PATH="$BIN_DIR:$PATH" export CCL_DELEGATE_BIN="$BIN_DIR/ccl-delegate" # ── Mock secutools server (Python) ──────────────────────────────────── cat > "$TMP_ROOT/mock-server.py" <<'PY' import http.server, json, os, sys, threading STATE_DIR = os.environ["MOCK_STATE"] os.makedirs(STATE_DIR, exist_ok=True) JOBS = {} def _respond(handler, code, body): data = json.dumps(body).encode() handler.send_response(code) handler.send_header("Content-Type", "application/json") handler.send_header("Content-Length", str(len(data))) handler.end_headers() handler.wfile.write(data) class H(http.server.BaseHTTPRequestHandler): def log_message(self, fmt, *a): pass def do_POST(self): length = int(self.headers.get("Content-Length", "0")) body = json.loads(self.rfile.read(length).decode() or "{}") if self.path == "/api/v1/jobs": jid = f"job-{len(JOBS)+1}" JOBS[jid] = {"status": "pending", "request": body} # Persist request for the test to drive completion. with open(os.path.join(STATE_DIR, f"{jid}.json"), "w") as f: json.dump({"status":"pending"}, f) return _respond(self, 200, {"job_id": jid, "status": "pending"}) _respond(self, 404, {"error": "not found"}) def do_GET(self): parts = self.path.strip("/").split("/") # /api/v1/jobs/:id or /api/v1/jobs/:id/result if len(parts) >= 4 and parts[:3] == ["api","v1","jobs"]: jid = parts[3] # read status from state file (test can overwrite) fp = os.path.join(STATE_DIR, f"{jid}.json") if not os.path.exists(fp): return _respond(self, 404, {"error":"unknown job"}) with open(fp) as f: state = json.load(f) if len(parts) == 4: # job status return _respond(self, 200, { "job_id": jid, "status": state.get("status","pending"), "provider": state.get("provider","")}) if len(parts) == 5 and parts[4] == "result": if state.get("status") != "completed": return _respond(self, 409, {"error":"not completed"}) return _respond(self, 200, { "job_id": jid, "response": state.get("response","(no response)"), "provider": state.get("provider","gpu"), "model": state.get("model","qwen-coder"), "cost_cad": state.get("cost_cad", 0.0123), "tokens": state.get("tokens", 1200), }) _respond(self, 404, {"error":"not found"}) port = int(sys.argv[1]) srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), H) print(f"MOCK: listening on {port}", flush=True) srv.serve_forever() PY echo "==> starting mock secutools on 127.0.0.1:$MOCK_PORT" MOCK_STATE="$MOCK_STATE" python3 "$TMP_ROOT/mock-server.py" "$MOCK_PORT" > "$MOCK_LOG" 2>&1 & echo $! > "$MOCK_PID_FILE" sleep 0.5 # Wait up to 3s for the port to be ready for _ in 1 2 3 4 5 6; do if curl -sf "http://127.0.0.1:$MOCK_PORT/api/v1/jobs/nope" >/dev/null 2>&1; then break; fi sleep 0.5 done # ── Fixture task ────────────────────────────────────────────────────── TASK_PATH="$PROJECTS_BASE/test-proj/.agent-queue/inbox/TASK-phase2E.md" cat > "$TASK_PATH" <<'EOF' --- title: Phase 2 E integration priority: default preferred_ai: gpu allow_delegation: true complexity_hint: low --- Summarize this paragraph in one sentence. EOF # ── Prepare environment for scripts ─────────────────────────────────── export CCL_SECUTOOLS_API_KEY="test-key" export CCL_SECUTOOLS_MOCK_URL="http://127.0.0.1:$MOCK_PORT" export CCL_DELEGATION_BUDGETS_FILE="$BUDGETS_FILE" export CCL_DELEGATION_BUDGET_CAD_DAILY="10.00" # generous for test # ── Sanity: `ccl-delegate decide` ───────────────────────────────────── echo "==> ccl-delegate decide" dec=$("$BIN_DIR/ccl-delegate" decide --frontmatter="$TASK_PATH") [[ "$dec" == "gpu" ]] || { echo "FAIL: decide expected 'gpu', got '$dec'"; exit 1; } echo " decide=$dec OK" # ── Step 1: delegate ────────────────────────────────────────────────── echo "==> delegate-to-secutools.sh" bash "$SCRIPTS_ROOT/delegate-to-secutools.sh" "test-proj" "$TASK_PATH" marker="$TASK_PATH.delegated" [[ -f "$marker" ]] || { echo "FAIL: marker missing: $marker"; exit 1; } job_id=$(awk -F'"' '/"job_id"/ { print $4; exit }' "$marker") [[ -n "$job_id" ]] || { echo "FAIL: no job_id in marker"; exit 1; } echo " marker OK (job_id=$job_id)" # status.json status_json="$PROJECTS_BASE/test-proj/.agent-queue/status.json" [[ -f "$status_json" ]] || { echo "FAIL: status.json missing"; exit 1; } state=$(awk -F'"' '/"state"/ { print $4; exit }' "$status_json") [[ "$state" == "delegated_to_secutools" ]] || { echo "FAIL: state=$state"; exit 1; } echo " status.json OK (state=delegated_to_secutools)" # Original .md must still be in inbox (reaper hasn't processed it yet) [[ -f "$TASK_PATH" ]] || { echo "FAIL: original .md removed prematurely"; exit 1; } # ── Step 2: simulate completion in mock and reap ───────────────────── echo "==> driving mock into 'completed' state for $job_id" cat > "$MOCK_STATE/${job_id}.json" < poll-delegated-jobs.sh" bash "$SCRIPTS_ROOT/poll-delegated-jobs.sh" --projects-base "$PROJECTS_BASE" done_file="$PROJECTS_BASE/test-proj/.agent-queue/done/TASK-phase2E.md" [[ -f "$done_file" ]] || { echo "FAIL: done/ file missing"; exit 1; } grep -q "mocked summary" "$done_file" || { echo "FAIL: response not in done body:"; cat "$done_file"; exit 1; } grep -q "provider: gpu" "$done_file" || { echo "FAIL: provider footer missing:"; cat "$done_file"; exit 1; } grep -q "cost_cad: 0.0034" "$done_file" || { echo "FAIL: cost_cad footer missing:"; cat "$done_file"; exit 1; } echo " done/ OK (has response + provider + cost_cad)" # Marker + original .md cleaned up [[ -f "$marker" ]] && { echo "FAIL: marker not cleaned"; exit 1; } [[ -f "$TASK_PATH" ]] && { echo "FAIL: original .md not cleaned"; exit 1; } echo " inbox cleanup OK" # Budget tracker recorded the spend if [[ -f "$BUDGETS_FILE" ]]; then grep -q "0.0034" "$BUDGETS_FILE" || echo "WARN: spend not found in $BUDGETS_FILE" grep -q "test-proj" "$BUDGETS_FILE" || echo "WARN: project not found in $BUDGETS_FILE" echo " budget tracker OK" else echo "WARN: no budgets file (maybe unwritable)" fi # ── Step 3: assert legacy task (no allow_delegation) exits 2 ───────── echo "==> negative test: legacy task → exit 2" LEGACY="$PROJECTS_BASE/test-proj/.agent-queue/inbox/TASK-legacy.md" cat > "$LEGACY" <<'EOF' --- title: Legacy priority: default --- Do classic work. EOF set +e bash "$SCRIPTS_ROOT/delegate-to-secutools.sh" "test-proj" "$LEGACY" >/dev/null 2>&1 rc=$? set -e [[ $rc -eq 2 ]] || { echo "FAIL: legacy task expected rc=2, got $rc"; exit 1; } echo " legacy task rc=2 OK" echo "" echo "=== Phase 2 Chantier E integration: ALL ASSERTIONS PASSED ==="