feat(dispatcher): enforce depends_on with .blocked marker (Phase 2/G2)
Before claiming a session for a task, the dispatcher now: 1. Parses the task's frontmatter 2. If `depends_on: [project:task_id]` is non-empty, checks each entry against `<projectsDir>/<project>/.agent-queue/done/<task_id>.md` 3. If any dep is unresolved -> skip the task and write `<task>.md.blocked` next to it. The watchdog (G1) will resolve this marker on its next tick. The `.blocked` marker is idempotent: re-running the dispatcher does not refresh its mtime, so the watchdog can compute the blocked-since timestamp from the FIRST detection (timeout precision). Path-traversal hardening: project / task_id segments must match `[A-Za-z0-9._-]+` and cannot be `.` or `..`. A malicious frontmatter like `depends_on: ../../tmp:foo` is rejected before any filesystem lookup. assignNextTask (the doneChan path) applies the same gate so that a session freed mid-cycle cannot bypass enforcement. Tests (-race clean): - DependsOnUnresolved -> .blocked marker, no dispatch - DependsOnResolved -> normal dispatch, no marker - PartialResolution -> stay blocked - RejectPathTraversal -> blocked, not dispatched - BlockedMarker idempotent (mtime stable across passes) - NoDependsOn regression guard
This commit is contained in:
parent
47ab86eef9
commit
5cfb58c202
3 changed files with 439 additions and 9 deletions
|
|
@ -22,10 +22,16 @@ import (
|
|||
|
||||
// TaskFrontmatter is the YAML header parsed from task .md files.
|
||||
type TaskFrontmatter struct {
|
||||
Title string `yaml:"title"`
|
||||
Priority string `yaml:"priority"` // critical, high, default, low
|
||||
Tags []string `yaml:"tags"`
|
||||
NeedsClaude bool `yaml:"needs_claude_code"`
|
||||
Title string `yaml:"title"`
|
||||
Priority string `yaml:"priority"` // critical, high, default, low
|
||||
Tags []string `yaml:"tags"`
|
||||
NeedsClaude bool `yaml:"needs_claude_code"`
|
||||
// Phase 2/G2: cross-project dependencies. Each entry is
|
||||
// "project:task_id" (e.g. "filesecure:FIX-0123"). The dispatcher
|
||||
// refuses to launch a task whose deps aren't all in the target
|
||||
// project's done/, and drops a `<task>.md.blocked` marker so the
|
||||
// watchdog can resolve it later (Phase 2/G1).
|
||||
DependsOn []string `yaml:"depends_on"`
|
||||
}
|
||||
|
||||
// Dispatcher watches project inbox directories and assigns tasks to idle sessions.
|
||||
|
|
@ -121,15 +127,26 @@ func (d *Dispatcher) dispatchProject(inboxDir string) {
|
|||
projectDir := filepath.Dir(filepath.Dir(inboxDir)) // inboxDir/.agent-queue/inbox → project
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if !strings.HasSuffix(name, ".md") || strings.Contains(name, ".dispatched") {
|
||||
// Skip non-md files, .dispatched markers, .blocked markers, and any
|
||||
// other sidecar (.stuck, .tmp, etc).
|
||||
if !strings.HasSuffix(name, ".md") {
|
||||
continue
|
||||
}
|
||||
taskPath := filepath.Join(inboxDir, name)
|
||||
|
||||
// Phase 2/G2: enforce depends_on before claiming a session. The
|
||||
// session pool is a precious resource — we don't want to burn an
|
||||
// idle slot on a task that can't proceed.
|
||||
if d.taskBlocked(taskPath) {
|
||||
d.touchBlockedMarker(taskPath)
|
||||
continue
|
||||
}
|
||||
|
||||
session := d.findFreeSession()
|
||||
if session == "" {
|
||||
d.logger.Printf("[dispatcher] no free session for task in %s", inboxDir)
|
||||
return
|
||||
}
|
||||
taskPath := filepath.Join(inboxDir, name)
|
||||
if err := d.launchAgent(session, projectDir, taskPath); err != nil {
|
||||
d.logger.Printf("[dispatcher] launchAgent error: %v", err)
|
||||
continue
|
||||
|
|
@ -140,6 +157,106 @@ func (d *Dispatcher) dispatchProject(inboxDir string) {
|
|||
}
|
||||
}
|
||||
|
||||
// taskBlocked returns true when taskPath declares `depends_on` entries that
|
||||
// are not yet present in the target project's `.agent-queue/done/`. The
|
||||
// frontmatter is parsed best-effort; on parse failure we treat the task as
|
||||
// non-blocked (current behaviour preserved — bad frontmatter is the
|
||||
// agent's problem, not the dispatcher's).
|
||||
func (d *Dispatcher) taskBlocked(taskPath string) bool {
|
||||
content, err := os.ReadFile(taskPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
fm, _ := parseFrontmatter(content)
|
||||
if len(fm.DependsOn) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, dep := range fm.DependsOn {
|
||||
if !d.dependencyResolved(dep) {
|
||||
d.logger.Printf("[dispatcher] task %s blocked by unresolved dep %q",
|
||||
filepath.Base(taskPath), dep)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// dependencyResolved checks whether `project:task_id` is in
|
||||
// `<projectsDir>/<project>/.agent-queue/done/<task_id>.md`. Accepts an
|
||||
// exact match or a `<task_id>*.md` prefix match (some queues append a
|
||||
// timestamp). Path segments are validated (no `..`, no `/` inside the
|
||||
// segment) — see isSafeSegment.
|
||||
func (d *Dispatcher) dependencyResolved(dep string) bool {
|
||||
parts := strings.SplitN(dep, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
d.logger.Printf("[dispatcher] malformed depends_on %q (expected 'project:task_id')", dep)
|
||||
return false
|
||||
}
|
||||
project, taskID := parts[0], parts[1]
|
||||
if !isSafeSegment(project) || !isSafeSegment(taskID) {
|
||||
d.logger.Printf("[dispatcher] unsafe depends_on segment %q — refusing to look up", dep)
|
||||
return false
|
||||
}
|
||||
doneDir := filepath.Join(d.projectsDir, project, ".agent-queue", "done")
|
||||
exact := filepath.Join(doneDir, taskID+".md")
|
||||
if _, err := os.Stat(exact); err == nil {
|
||||
return true
|
||||
}
|
||||
entries, err := os.ReadDir(doneDir)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := e.Name()
|
||||
if strings.HasPrefix(name, taskID) && strings.HasSuffix(name, ".md") &&
|
||||
!strings.HasSuffix(name, ".dispatched") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// touchBlockedMarker creates `<taskPath>.blocked` (or refreshes its mtime)
|
||||
// so the watchdog (Phase 2/G1) sees the task as blocked and tracks its
|
||||
// timeout. We use mtime as the "blocked since" timestamp; we do NOT
|
||||
// refresh it on subsequent ticks — the operator wants the timeout to be
|
||||
// counted from the FIRST detection, not the last.
|
||||
func (d *Dispatcher) touchBlockedMarker(taskPath string) {
|
||||
marker := taskPath + ".blocked"
|
||||
if _, err := os.Stat(marker); err == nil {
|
||||
return // marker already exists, leave mtime alone
|
||||
}
|
||||
if err := os.WriteFile(marker, []byte(""), 0o644); err != nil {
|
||||
d.logger.Printf("[dispatcher] write .blocked marker for %s: %v",
|
||||
filepath.Base(taskPath), err)
|
||||
return
|
||||
}
|
||||
d.logger.Printf("[dispatcher] task %s marked .blocked (waiting on depends_on)",
|
||||
filepath.Base(taskPath))
|
||||
}
|
||||
|
||||
// isSafeSegment guards the project / task_id pair against path traversal.
|
||||
// Same rule as the watchdog (`[A-Za-z0-9._-]+`, no `.` / `..`).
|
||||
func isSafeSegment(s string) bool {
|
||||
if s == "" || s == "." || s == ".." {
|
||||
return false
|
||||
}
|
||||
for _, r := range s {
|
||||
switch {
|
||||
case r >= 'a' && r <= 'z',
|
||||
r >= 'A' && r <= 'Z',
|
||||
r >= '0' && r <= '9',
|
||||
r == '_', r == '-', r == '.':
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// findFreeSession returns the name of an idle, live, cooldown-free session
|
||||
// from the autonomous pool. Dedicated sessions are intentionally NOT
|
||||
// considered: those host the operator's manual interactive work. Routing a
|
||||
|
|
@ -184,10 +301,17 @@ func (d *Dispatcher) assignNextTask(session string) {
|
|||
continue
|
||||
}
|
||||
for _, e := range entries {
|
||||
if !strings.HasSuffix(e.Name(), ".md") || strings.Contains(e.Name(), ".dispatched") {
|
||||
name := e.Name()
|
||||
if !strings.HasSuffix(name, ".md") {
|
||||
continue
|
||||
}
|
||||
taskPath := filepath.Join(inbox, name)
|
||||
// Phase 2/G2: respect depends_on here too, otherwise a
|
||||
// session freed mid-cycle would still bypass the gate.
|
||||
if d.taskBlocked(taskPath) {
|
||||
d.touchBlockedMarker(taskPath)
|
||||
continue
|
||||
}
|
||||
taskPath := filepath.Join(inbox, e.Name())
|
||||
if err := d.launchAgent(session, ds.Project, taskPath); err == nil {
|
||||
os.Rename(taskPath, taskPath+".dispatched")
|
||||
return
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue