feat(dispatcher): enforce depends_on with .blocked marker (Phase 2/G2)
Before claiming a session for a task, the dispatcher now: 1. Parses the task's frontmatter 2. If `depends_on: [project:task_id]` is non-empty, checks each entry against `<projectsDir>/<project>/.agent-queue/done/<task_id>.md` 3. If any dep is unresolved -> skip the task and write `<task>.md.blocked` next to it. The watchdog (G1) will resolve this marker on its next tick. The `.blocked` marker is idempotent: re-running the dispatcher does not refresh its mtime, so the watchdog can compute the blocked-since timestamp from the FIRST detection (timeout precision). Path-traversal hardening: project / task_id segments must match `[A-Za-z0-9._-]+` and cannot be `.` or `..`. A malicious frontmatter like `depends_on: ../../tmp:foo` is rejected before any filesystem lookup. assignNextTask (the doneChan path) applies the same gate so that a session freed mid-cycle cannot bypass enforcement. Tests (-race clean): - DependsOnUnresolved -> .blocked marker, no dispatch - DependsOnResolved -> normal dispatch, no marker - PartialResolution -> stay blocked - RejectPathTraversal -> blocked, not dispatched - BlockedMarker idempotent (mtime stable across passes) - NoDependsOn regression guard
This commit is contained in:
parent
47ab86eef9
commit
5cfb58c202
3 changed files with 439 additions and 9 deletions
|
|
@ -22,10 +22,16 @@ import (
|
|||
|
||||
// TaskFrontmatter is the YAML header parsed from task .md files.
|
||||
type TaskFrontmatter struct {
|
||||
Title string `yaml:"title"`
|
||||
Priority string `yaml:"priority"` // critical, high, default, low
|
||||
Tags []string `yaml:"tags"`
|
||||
NeedsClaude bool `yaml:"needs_claude_code"`
|
||||
Title string `yaml:"title"`
|
||||
Priority string `yaml:"priority"` // critical, high, default, low
|
||||
Tags []string `yaml:"tags"`
|
||||
NeedsClaude bool `yaml:"needs_claude_code"`
|
||||
// Phase 2/G2: cross-project dependencies. Each entry is
|
||||
// "project:task_id" (e.g. "filesecure:FIX-0123"). The dispatcher
|
||||
// refuses to launch a task whose deps aren't all in the target
|
||||
// project's done/, and drops a `<task>.md.blocked` marker so the
|
||||
// watchdog can resolve it later (Phase 2/G1).
|
||||
DependsOn []string `yaml:"depends_on"`
|
||||
}
|
||||
|
||||
// Dispatcher watches project inbox directories and assigns tasks to idle sessions.
|
||||
|
|
@ -121,15 +127,26 @@ func (d *Dispatcher) dispatchProject(inboxDir string) {
|
|||
projectDir := filepath.Dir(filepath.Dir(inboxDir)) // inboxDir/.agent-queue/inbox → project
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if !strings.HasSuffix(name, ".md") || strings.Contains(name, ".dispatched") {
|
||||
// Skip non-md files, .dispatched markers, .blocked markers, and any
|
||||
// other sidecar (.stuck, .tmp, etc).
|
||||
if !strings.HasSuffix(name, ".md") {
|
||||
continue
|
||||
}
|
||||
taskPath := filepath.Join(inboxDir, name)
|
||||
|
||||
// Phase 2/G2: enforce depends_on before claiming a session. The
|
||||
// session pool is a precious resource — we don't want to burn an
|
||||
// idle slot on a task that can't proceed.
|
||||
if d.taskBlocked(taskPath) {
|
||||
d.touchBlockedMarker(taskPath)
|
||||
continue
|
||||
}
|
||||
|
||||
session := d.findFreeSession()
|
||||
if session == "" {
|
||||
d.logger.Printf("[dispatcher] no free session for task in %s", inboxDir)
|
||||
return
|
||||
}
|
||||
taskPath := filepath.Join(inboxDir, name)
|
||||
if err := d.launchAgent(session, projectDir, taskPath); err != nil {
|
||||
d.logger.Printf("[dispatcher] launchAgent error: %v", err)
|
||||
continue
|
||||
|
|
@ -140,6 +157,106 @@ func (d *Dispatcher) dispatchProject(inboxDir string) {
|
|||
}
|
||||
}
|
||||
|
||||
// taskBlocked returns true when taskPath declares `depends_on` entries that
|
||||
// are not yet present in the target project's `.agent-queue/done/`. The
|
||||
// frontmatter is parsed best-effort; on parse failure we treat the task as
|
||||
// non-blocked (current behaviour preserved — bad frontmatter is the
|
||||
// agent's problem, not the dispatcher's).
|
||||
func (d *Dispatcher) taskBlocked(taskPath string) bool {
|
||||
content, err := os.ReadFile(taskPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
fm, _ := parseFrontmatter(content)
|
||||
if len(fm.DependsOn) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, dep := range fm.DependsOn {
|
||||
if !d.dependencyResolved(dep) {
|
||||
d.logger.Printf("[dispatcher] task %s blocked by unresolved dep %q",
|
||||
filepath.Base(taskPath), dep)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// dependencyResolved checks whether `project:task_id` is in
|
||||
// `<projectsDir>/<project>/.agent-queue/done/<task_id>.md`. Accepts an
|
||||
// exact match or a `<task_id>*.md` prefix match (some queues append a
|
||||
// timestamp). Path segments are validated (no `..`, no `/` inside the
|
||||
// segment) — see isSafeSegment.
|
||||
func (d *Dispatcher) dependencyResolved(dep string) bool {
|
||||
parts := strings.SplitN(dep, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
d.logger.Printf("[dispatcher] malformed depends_on %q (expected 'project:task_id')", dep)
|
||||
return false
|
||||
}
|
||||
project, taskID := parts[0], parts[1]
|
||||
if !isSafeSegment(project) || !isSafeSegment(taskID) {
|
||||
d.logger.Printf("[dispatcher] unsafe depends_on segment %q — refusing to look up", dep)
|
||||
return false
|
||||
}
|
||||
doneDir := filepath.Join(d.projectsDir, project, ".agent-queue", "done")
|
||||
exact := filepath.Join(doneDir, taskID+".md")
|
||||
if _, err := os.Stat(exact); err == nil {
|
||||
return true
|
||||
}
|
||||
entries, err := os.ReadDir(doneDir)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := e.Name()
|
||||
if strings.HasPrefix(name, taskID) && strings.HasSuffix(name, ".md") &&
|
||||
!strings.HasSuffix(name, ".dispatched") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// touchBlockedMarker creates `<taskPath>.blocked` (or refreshes its mtime)
|
||||
// so the watchdog (Phase 2/G1) sees the task as blocked and tracks its
|
||||
// timeout. We use mtime as the "blocked since" timestamp; we do NOT
|
||||
// refresh it on subsequent ticks — the operator wants the timeout to be
|
||||
// counted from the FIRST detection, not the last.
|
||||
func (d *Dispatcher) touchBlockedMarker(taskPath string) {
|
||||
marker := taskPath + ".blocked"
|
||||
if _, err := os.Stat(marker); err == nil {
|
||||
return // marker already exists, leave mtime alone
|
||||
}
|
||||
if err := os.WriteFile(marker, []byte(""), 0o644); err != nil {
|
||||
d.logger.Printf("[dispatcher] write .blocked marker for %s: %v",
|
||||
filepath.Base(taskPath), err)
|
||||
return
|
||||
}
|
||||
d.logger.Printf("[dispatcher] task %s marked .blocked (waiting on depends_on)",
|
||||
filepath.Base(taskPath))
|
||||
}
|
||||
|
||||
// isSafeSegment guards the project / task_id pair against path traversal.
|
||||
// Same rule as the watchdog (`[A-Za-z0-9._-]+`, no `.` / `..`).
|
||||
func isSafeSegment(s string) bool {
|
||||
if s == "" || s == "." || s == ".." {
|
||||
return false
|
||||
}
|
||||
for _, r := range s {
|
||||
switch {
|
||||
case r >= 'a' && r <= 'z',
|
||||
r >= 'A' && r <= 'Z',
|
||||
r >= '0' && r <= '9',
|
||||
r == '_', r == '-', r == '.':
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// findFreeSession returns the name of an idle, live, cooldown-free session
|
||||
// from the autonomous pool. Dedicated sessions are intentionally NOT
|
||||
// considered: those host the operator's manual interactive work. Routing a
|
||||
|
|
@ -184,10 +301,17 @@ func (d *Dispatcher) assignNextTask(session string) {
|
|||
continue
|
||||
}
|
||||
for _, e := range entries {
|
||||
if !strings.HasSuffix(e.Name(), ".md") || strings.Contains(e.Name(), ".dispatched") {
|
||||
name := e.Name()
|
||||
if !strings.HasSuffix(name, ".md") {
|
||||
continue
|
||||
}
|
||||
taskPath := filepath.Join(inbox, name)
|
||||
// Phase 2/G2: respect depends_on here too, otherwise a
|
||||
// session freed mid-cycle would still bypass the gate.
|
||||
if d.taskBlocked(taskPath) {
|
||||
d.touchBlockedMarker(taskPath)
|
||||
continue
|
||||
}
|
||||
taskPath := filepath.Join(inbox, e.Name())
|
||||
if err := d.launchAgent(session, ds.Project, taskPath); err == nil {
|
||||
os.Rename(taskPath, taskPath+".dispatched")
|
||||
return
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"forge.secuaas.ovh/olivier/claude-failover/internal/config"
|
||||
"forge.secuaas.ovh/olivier/claude-failover/internal/state"
|
||||
|
|
@ -240,3 +241,281 @@ func TestDispatchProjectNoFreeSession(t *testing.T) {
|
|||
t.Error("task file should remain when no session is free")
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Phase 2 / G2 — depends_on enforcement tests ────────────────────────────
|
||||
|
||||
// makeProjectInbox sets up a `<project>/.agent-queue/inbox/` directory under
|
||||
// `root` and returns the inbox path.
|
||||
func makeProjectInbox(t *testing.T, root, project string) string {
|
||||
t.Helper()
|
||||
inbox := filepath.Join(root, project, ".agent-queue", "inbox")
|
||||
if err := os.MkdirAll(inbox, 0o755); err != nil {
|
||||
t.Fatalf("mkdir inbox: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(root, project, ".agent-queue", "done"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir done: %v", err)
|
||||
}
|
||||
return inbox
|
||||
}
|
||||
|
||||
// putDoneTaskFailover seeds <root>/<project>/.agent-queue/done/<id>.md.
|
||||
func putDoneTaskFailover(t *testing.T, root, project, id string) {
|
||||
t.Helper()
|
||||
doneDir := filepath.Join(root, project, ".agent-queue", "done")
|
||||
_ = os.MkdirAll(doneDir, 0o755)
|
||||
if err := os.WriteFile(filepath.Join(doneDir, id+".md"), []byte("done\n"), 0o644); err != nil {
|
||||
t.Fatalf("write done: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDispatcher_DependsOnUnresolved_DropsBlockedMarker verifies that a task
|
||||
// whose `depends_on` is not satisfied is NOT dispatched and that a
|
||||
// `.md.blocked` marker is created next to it.
|
||||
func TestDispatcher_DependsOnUnresolved_DropsBlockedMarker(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
inboxA := makeProjectInbox(t, root, "projA")
|
||||
makeProjectInbox(t, root, "projB") // empty done/
|
||||
|
||||
taskContent := "---\nid: TASK-A1\npriority: default\ndepends_on:\n - projB:DEP-1\n---\n\nbody\n"
|
||||
taskPath := filepath.Join(inboxA, "TASK-A1.md")
|
||||
if err := os.WriteFile(taskPath, []byte(taskContent), 0o644); err != nil {
|
||||
t.Fatalf("write task: %v", err)
|
||||
}
|
||||
|
||||
tc := newMockTmux()
|
||||
tc.sessions["pool-0"] = true
|
||||
tc.paneOutput["pool-0"] = "❯ "
|
||||
s := state.New("")
|
||||
s.SetIdle("pool-0")
|
||||
|
||||
d := &Dispatcher{
|
||||
tmux: tc,
|
||||
state: s,
|
||||
projectsDir: root,
|
||||
config: &config.Config{
|
||||
Pool: config.PoolConfig{
|
||||
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
|
||||
},
|
||||
},
|
||||
logger: log.Default(),
|
||||
}
|
||||
|
||||
d.dispatchProject(inboxA)
|
||||
|
||||
if _, err := os.Stat(taskPath + ".blocked"); err != nil {
|
||||
t.Errorf("expected .blocked marker: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(taskPath + ".dispatched"); !os.IsNotExist(err) {
|
||||
t.Errorf("task should NOT be dispatched while blocked")
|
||||
}
|
||||
if _, err := os.Stat(taskPath); err != nil {
|
||||
t.Errorf("task .md should remain in inbox: %v", err)
|
||||
}
|
||||
if st := s.GetSession("pool-0"); st == nil || st.State != "idle" {
|
||||
t.Errorf("session should stay idle when only task is blocked, got %v", st)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDispatcher_DependsOnResolved_Dispatches verifies normal flow when the
|
||||
// upstream task is in done/.
|
||||
func TestDispatcher_DependsOnResolved_Dispatches(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
inboxA := makeProjectInbox(t, root, "projA")
|
||||
makeProjectInbox(t, root, "projB")
|
||||
putDoneTaskFailover(t, root, "projB", "DEP-1")
|
||||
|
||||
taskContent := "---\nid: TASK-A2\npriority: default\ndepends_on:\n - projB:DEP-1\n---\n\nbody\n"
|
||||
taskPath := filepath.Join(inboxA, "TASK-A2.md")
|
||||
_ = os.WriteFile(taskPath, []byte(taskContent), 0o644)
|
||||
|
||||
tc := newMockTmux()
|
||||
tc.sessions["pool-0"] = true
|
||||
tc.paneOutput["pool-0"] = "❯ "
|
||||
s := state.New("")
|
||||
s.SetIdle("pool-0")
|
||||
|
||||
d := &Dispatcher{
|
||||
tmux: tc,
|
||||
state: s,
|
||||
projectsDir: root,
|
||||
config: &config.Config{
|
||||
Pool: config.PoolConfig{
|
||||
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
|
||||
},
|
||||
},
|
||||
logger: log.Default(),
|
||||
}
|
||||
|
||||
d.dispatchProject(inboxA)
|
||||
|
||||
if _, err := os.Stat(taskPath + ".dispatched"); err != nil {
|
||||
t.Errorf("expected .dispatched: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(taskPath + ".blocked"); !os.IsNotExist(err) {
|
||||
t.Errorf(".blocked marker should NOT be present when dep is resolved")
|
||||
}
|
||||
if st := s.GetSession("pool-0"); st == nil || st.State != "working" {
|
||||
t.Errorf("expected working session, got %v", st)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDispatcher_DependsOn_PartialResolution treats partial resolution as
|
||||
// blocked.
|
||||
func TestDispatcher_DependsOn_PartialResolution(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
inboxA := makeProjectInbox(t, root, "projA")
|
||||
makeProjectInbox(t, root, "projB")
|
||||
makeProjectInbox(t, root, "projC")
|
||||
putDoneTaskFailover(t, root, "projB", "DEP-X")
|
||||
|
||||
content := "---\nid: TASK-A3\npriority: default\ndepends_on:\n - projB:DEP-X\n - projC:DEP-Y\n---\n\nbody\n"
|
||||
taskPath := filepath.Join(inboxA, "TASK-A3.md")
|
||||
_ = os.WriteFile(taskPath, []byte(content), 0o644)
|
||||
|
||||
tc := newMockTmux()
|
||||
tc.sessions["pool-0"] = true
|
||||
tc.paneOutput["pool-0"] = "❯ "
|
||||
s := state.New("")
|
||||
s.SetIdle("pool-0")
|
||||
|
||||
d := &Dispatcher{
|
||||
tmux: tc,
|
||||
state: s,
|
||||
projectsDir: root,
|
||||
config: &config.Config{
|
||||
Pool: config.PoolConfig{
|
||||
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
|
||||
},
|
||||
},
|
||||
logger: log.Default(),
|
||||
}
|
||||
|
||||
d.dispatchProject(inboxA)
|
||||
|
||||
if _, err := os.Stat(taskPath + ".blocked"); err != nil {
|
||||
t.Errorf(".blocked marker expected for partial resolution: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(taskPath + ".dispatched"); !os.IsNotExist(err) {
|
||||
t.Errorf("must not dispatch with one unresolved dep")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDispatcher_DependsOn_RejectPathTraversal blocks malicious depends_on
|
||||
// segments containing `..` or `/`.
|
||||
func TestDispatcher_DependsOn_RejectPathTraversal(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
inboxA := makeProjectInbox(t, root, "projA")
|
||||
|
||||
content := "---\nid: TASK-evil\npriority: default\ndepends_on:\n - ../../tmp:foo\n---\n\nbody\n"
|
||||
taskPath := filepath.Join(inboxA, "TASK-evil.md")
|
||||
_ = os.WriteFile(taskPath, []byte(content), 0o644)
|
||||
|
||||
tc := newMockTmux()
|
||||
tc.sessions["pool-0"] = true
|
||||
s := state.New("")
|
||||
s.SetIdle("pool-0")
|
||||
|
||||
d := &Dispatcher{
|
||||
tmux: tc,
|
||||
state: s,
|
||||
projectsDir: root,
|
||||
config: &config.Config{
|
||||
Pool: config.PoolConfig{
|
||||
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
|
||||
},
|
||||
},
|
||||
logger: log.Default(),
|
||||
}
|
||||
|
||||
d.dispatchProject(inboxA)
|
||||
if _, err := os.Stat(taskPath + ".blocked"); err != nil {
|
||||
t.Errorf("malicious dep must result in .blocked, not dispatch: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(taskPath + ".dispatched"); !os.IsNotExist(err) {
|
||||
t.Errorf("malicious dep must NOT be dispatched")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDispatcher_BlockedMarker_Idempotent verifies running the dispatch
|
||||
// twice on the same blocked task does not refresh the marker mtime (so the
|
||||
// watchdog timeout counts from FIRST detection).
|
||||
func TestDispatcher_BlockedMarker_Idempotent(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
inboxA := makeProjectInbox(t, root, "projA")
|
||||
makeProjectInbox(t, root, "projB")
|
||||
|
||||
content := "---\nid: TASK-idem\npriority: default\ndepends_on:\n - projB:NEVER\n---\n\nbody\n"
|
||||
taskPath := filepath.Join(inboxA, "TASK-idem.md")
|
||||
_ = os.WriteFile(taskPath, []byte(content), 0o644)
|
||||
|
||||
tc := newMockTmux()
|
||||
s := state.New("")
|
||||
d := &Dispatcher{
|
||||
tmux: tc,
|
||||
state: s,
|
||||
projectsDir: root,
|
||||
config: &config.Config{
|
||||
Pool: config.PoolConfig{
|
||||
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 0},
|
||||
},
|
||||
},
|
||||
logger: log.Default(),
|
||||
}
|
||||
|
||||
d.dispatchProject(inboxA)
|
||||
marker := taskPath + ".blocked"
|
||||
st1, err := os.Stat(marker)
|
||||
if err != nil {
|
||||
t.Fatalf("expected marker after first run: %v", err)
|
||||
}
|
||||
mtime1 := st1.ModTime()
|
||||
|
||||
// Sleep enough to detect mtime drift on most filesystems (1s
|
||||
// granularity). The marker mtime must be stable so the watchdog
|
||||
// timeout counts from FIRST detection.
|
||||
time.Sleep(1100 * time.Millisecond)
|
||||
|
||||
d.dispatchProject(inboxA)
|
||||
st2, err := os.Stat(marker)
|
||||
if err != nil {
|
||||
t.Fatalf("expected marker after second run: %v", err)
|
||||
}
|
||||
if !st2.ModTime().Equal(mtime1) {
|
||||
t.Errorf("blocked marker mtime should be stable across dispatch passes (was %v, now %v)",
|
||||
mtime1, st2.ModTime())
|
||||
}
|
||||
}
|
||||
|
||||
// TestDispatcher_NoDependsOn_DispatchesNormally is a regression guard:
|
||||
// tasks without depends_on must keep working exactly as before G2.
|
||||
func TestDispatcher_NoDependsOn_DispatchesNormally(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
inboxA := makeProjectInbox(t, root, "projA")
|
||||
|
||||
taskContent := "---\nid: TASK-plain\npriority: default\n---\nbody\n"
|
||||
taskPath := filepath.Join(inboxA, "TASK-plain.md")
|
||||
_ = os.WriteFile(taskPath, []byte(taskContent), 0o644)
|
||||
|
||||
tc := newMockTmux()
|
||||
tc.sessions["pool-0"] = true
|
||||
tc.paneOutput["pool-0"] = "❯ "
|
||||
s := state.New("")
|
||||
s.SetIdle("pool-0")
|
||||
|
||||
d := &Dispatcher{
|
||||
tmux: tc,
|
||||
state: s,
|
||||
projectsDir: root,
|
||||
config: &config.Config{
|
||||
Pool: config.PoolConfig{
|
||||
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
|
||||
},
|
||||
},
|
||||
logger: log.Default(),
|
||||
}
|
||||
|
||||
d.dispatchProject(inboxA)
|
||||
if _, err := os.Stat(taskPath + ".dispatched"); err != nil {
|
||||
t.Errorf("plain task should be dispatched: %v", err)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue