Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Ubuntu
5cfb58c202 feat(dispatcher): enforce depends_on with .blocked marker (Phase 2/G2)
Before claiming a session for a task, the dispatcher now:
1. Parses the task's frontmatter
2. If `depends_on: [project:task_id]` is non-empty, checks each entry
   against `<projectsDir>/<project>/.agent-queue/done/<task_id>.md`
3. If any dep is unresolved -> skip the task and write
   `<task>.md.blocked` next to it. The watchdog (G1) will resolve
   this marker on its next tick.

The `.blocked` marker is idempotent: re-running the dispatcher does not
refresh its mtime, so the watchdog can compute the blocked-since
timestamp from the FIRST detection (timeout precision).

Path-traversal hardening: project / task_id segments must match
`[A-Za-z0-9._-]+` and cannot be `.` or `..`. A malicious frontmatter
like `depends_on: ../../tmp:foo` is rejected before any filesystem
lookup.

assignNextTask (the doneChan path) applies the same gate so that a
session freed mid-cycle cannot bypass enforcement.

Tests (-race clean):
- DependsOnUnresolved -> .blocked marker, no dispatch
- DependsOnResolved -> normal dispatch, no marker
- PartialResolution -> stay blocked
- RejectPathTraversal -> blocked, not dispatched
- BlockedMarker idempotent (mtime stable across passes)
- NoDependsOn regression guard
2026-04-16 20:30:17 +00:00
3 changed files with 439 additions and 9 deletions

View file

@ -1,4 +1,31 @@
# Version actuelle : 0.3.8 # Version actuelle : 0.4.0
## [0.4.0] - 2026-04-16
**Type:** Minor — Phase 2 / Chantier G2 : dispatcher enforce depends_on
### Ajouté
- `internal/dispatcher/dispatcher.go` : champ `DependsOn []string` ajouté à
`TaskFrontmatter` et nouvelles méthodes `taskBlocked`, `dependencyResolved`,
`touchBlockedMarker`, `isSafeSegment`.
- Avant de claim une session pour une tâche, le dispatcher parse le
frontmatter. Si `depends_on: [project:task_id]` est non vide ET non résolu
(sibling task pas encore dans `<projectsDir>/<project>/.agent-queue/done/`),
la tâche est sautée et un fichier `<task>.md.blocked` est posé à côté.
- Le marker `.blocked` est idempotent : sa mtime n'est PAS rafraîchie aux
passes suivantes pour que le watchdog (G1) puisse compter le timeout
depuis la première détection.
- Sécurité : segments `project` / `task_id` validés (`[A-Za-z0-9._-]+`,
refus de `..`, `/`, NUL) avant d'être concaténés au filesystem path.
- `assignNextTask` (chemin du session done-channel) applique la même règle.
### Tests
- `TestDispatcher_DependsOnUnresolved_DropsBlockedMarker`
- `TestDispatcher_DependsOnResolved_Dispatches`
- `TestDispatcher_DependsOn_PartialResolution`
- `TestDispatcher_DependsOn_RejectPathTraversal`
- `TestDispatcher_BlockedMarker_Idempotent`
- `TestDispatcher_NoDependsOn_DispatchesNormally` (regression guard)
- `-race` clean.
## [0.3.8] - 2026-04-16 ## [0.3.8] - 2026-04-16
**Type:** Patch — Bug #1 (A3 flip+ensure inconsistency) + Bug #10 (requiredShared contract test) **Type:** Patch — Bug #1 (A3 flip+ensure inconsistency) + Bug #10 (requiredShared contract test)

View file

@ -26,6 +26,12 @@ type TaskFrontmatter struct {
Priority string `yaml:"priority"` // critical, high, default, low Priority string `yaml:"priority"` // critical, high, default, low
Tags []string `yaml:"tags"` Tags []string `yaml:"tags"`
NeedsClaude bool `yaml:"needs_claude_code"` NeedsClaude bool `yaml:"needs_claude_code"`
// Phase 2/G2: cross-project dependencies. Each entry is
// "project:task_id" (e.g. "filesecure:FIX-0123"). The dispatcher
// refuses to launch a task whose deps aren't all in the target
// project's done/, and drops a `<task>.md.blocked` marker so the
// watchdog can resolve it later (Phase 2/G1).
DependsOn []string `yaml:"depends_on"`
} }
// Dispatcher watches project inbox directories and assigns tasks to idle sessions. // Dispatcher watches project inbox directories and assigns tasks to idle sessions.
@ -121,15 +127,26 @@ func (d *Dispatcher) dispatchProject(inboxDir string) {
projectDir := filepath.Dir(filepath.Dir(inboxDir)) // inboxDir/.agent-queue/inbox → project projectDir := filepath.Dir(filepath.Dir(inboxDir)) // inboxDir/.agent-queue/inbox → project
for _, e := range entries { for _, e := range entries {
name := e.Name() name := e.Name()
if !strings.HasSuffix(name, ".md") || strings.Contains(name, ".dispatched") { // Skip non-md files, .dispatched markers, .blocked markers, and any
// other sidecar (.stuck, .tmp, etc).
if !strings.HasSuffix(name, ".md") {
continue continue
} }
taskPath := filepath.Join(inboxDir, name)
// Phase 2/G2: enforce depends_on before claiming a session. The
// session pool is a precious resource — we don't want to burn an
// idle slot on a task that can't proceed.
if d.taskBlocked(taskPath) {
d.touchBlockedMarker(taskPath)
continue
}
session := d.findFreeSession() session := d.findFreeSession()
if session == "" { if session == "" {
d.logger.Printf("[dispatcher] no free session for task in %s", inboxDir) d.logger.Printf("[dispatcher] no free session for task in %s", inboxDir)
return return
} }
taskPath := filepath.Join(inboxDir, name)
if err := d.launchAgent(session, projectDir, taskPath); err != nil { if err := d.launchAgent(session, projectDir, taskPath); err != nil {
d.logger.Printf("[dispatcher] launchAgent error: %v", err) d.logger.Printf("[dispatcher] launchAgent error: %v", err)
continue continue
@ -140,6 +157,106 @@ func (d *Dispatcher) dispatchProject(inboxDir string) {
} }
} }
// taskBlocked returns true when taskPath declares `depends_on` entries that
// are not yet present in the target project's `.agent-queue/done/`. The
// frontmatter is parsed best-effort; on parse failure we treat the task as
// non-blocked (current behaviour preserved — bad frontmatter is the
// agent's problem, not the dispatcher's).
func (d *Dispatcher) taskBlocked(taskPath string) bool {
content, err := os.ReadFile(taskPath)
if err != nil {
return false
}
fm, _ := parseFrontmatter(content)
if len(fm.DependsOn) == 0 {
return false
}
for _, dep := range fm.DependsOn {
if !d.dependencyResolved(dep) {
d.logger.Printf("[dispatcher] task %s blocked by unresolved dep %q",
filepath.Base(taskPath), dep)
return true
}
}
return false
}
// dependencyResolved checks whether `project:task_id` is in
// `<projectsDir>/<project>/.agent-queue/done/<task_id>.md`. Accepts an
// exact match or a `<task_id>*.md` prefix match (some queues append a
// timestamp). Path segments are validated (no `..`, no `/` inside the
// segment) — see isSafeSegment.
func (d *Dispatcher) dependencyResolved(dep string) bool {
parts := strings.SplitN(dep, ":", 2)
if len(parts) != 2 {
d.logger.Printf("[dispatcher] malformed depends_on %q (expected 'project:task_id')", dep)
return false
}
project, taskID := parts[0], parts[1]
if !isSafeSegment(project) || !isSafeSegment(taskID) {
d.logger.Printf("[dispatcher] unsafe depends_on segment %q — refusing to look up", dep)
return false
}
doneDir := filepath.Join(d.projectsDir, project, ".agent-queue", "done")
exact := filepath.Join(doneDir, taskID+".md")
if _, err := os.Stat(exact); err == nil {
return true
}
entries, err := os.ReadDir(doneDir)
if err != nil {
return false
}
for _, e := range entries {
if e.IsDir() {
continue
}
name := e.Name()
if strings.HasPrefix(name, taskID) && strings.HasSuffix(name, ".md") &&
!strings.HasSuffix(name, ".dispatched") {
return true
}
}
return false
}
// touchBlockedMarker creates `<taskPath>.blocked` (or refreshes its mtime)
// so the watchdog (Phase 2/G1) sees the task as blocked and tracks its
// timeout. We use mtime as the "blocked since" timestamp; we do NOT
// refresh it on subsequent ticks — the operator wants the timeout to be
// counted from the FIRST detection, not the last.
func (d *Dispatcher) touchBlockedMarker(taskPath string) {
marker := taskPath + ".blocked"
if _, err := os.Stat(marker); err == nil {
return // marker already exists, leave mtime alone
}
if err := os.WriteFile(marker, []byte(""), 0o644); err != nil {
d.logger.Printf("[dispatcher] write .blocked marker for %s: %v",
filepath.Base(taskPath), err)
return
}
d.logger.Printf("[dispatcher] task %s marked .blocked (waiting on depends_on)",
filepath.Base(taskPath))
}
// isSafeSegment guards the project / task_id pair against path traversal.
// Same rule as the watchdog (`[A-Za-z0-9._-]+`, no `.` / `..`).
func isSafeSegment(s string) bool {
if s == "" || s == "." || s == ".." {
return false
}
for _, r := range s {
switch {
case r >= 'a' && r <= 'z',
r >= 'A' && r <= 'Z',
r >= '0' && r <= '9',
r == '_', r == '-', r == '.':
default:
return false
}
}
return true
}
// findFreeSession returns the name of an idle, live, cooldown-free session // findFreeSession returns the name of an idle, live, cooldown-free session
// from the autonomous pool. Dedicated sessions are intentionally NOT // from the autonomous pool. Dedicated sessions are intentionally NOT
// considered: those host the operator's manual interactive work. Routing a // considered: those host the operator's manual interactive work. Routing a
@ -184,10 +301,17 @@ func (d *Dispatcher) assignNextTask(session string) {
continue continue
} }
for _, e := range entries { for _, e := range entries {
if !strings.HasSuffix(e.Name(), ".md") || strings.Contains(e.Name(), ".dispatched") { name := e.Name()
if !strings.HasSuffix(name, ".md") {
continue
}
taskPath := filepath.Join(inbox, name)
// Phase 2/G2: respect depends_on here too, otherwise a
// session freed mid-cycle would still bypass the gate.
if d.taskBlocked(taskPath) {
d.touchBlockedMarker(taskPath)
continue continue
} }
taskPath := filepath.Join(inbox, e.Name())
if err := d.launchAgent(session, ds.Project, taskPath); err == nil { if err := d.launchAgent(session, ds.Project, taskPath); err == nil {
os.Rename(taskPath, taskPath+".dispatched") os.Rename(taskPath, taskPath+".dispatched")
return return

View file

@ -5,6 +5,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"testing" "testing"
"time"
"forge.secuaas.ovh/olivier/claude-failover/internal/config" "forge.secuaas.ovh/olivier/claude-failover/internal/config"
"forge.secuaas.ovh/olivier/claude-failover/internal/state" "forge.secuaas.ovh/olivier/claude-failover/internal/state"
@ -240,3 +241,281 @@ func TestDispatchProjectNoFreeSession(t *testing.T) {
t.Error("task file should remain when no session is free") t.Error("task file should remain when no session is free")
} }
} }
// ─── Phase 2 / G2 — depends_on enforcement tests ────────────────────────────
// makeProjectInbox sets up a `<project>/.agent-queue/inbox/` directory under
// `root` and returns the inbox path.
func makeProjectInbox(t *testing.T, root, project string) string {
t.Helper()
inbox := filepath.Join(root, project, ".agent-queue", "inbox")
if err := os.MkdirAll(inbox, 0o755); err != nil {
t.Fatalf("mkdir inbox: %v", err)
}
if err := os.MkdirAll(filepath.Join(root, project, ".agent-queue", "done"), 0o755); err != nil {
t.Fatalf("mkdir done: %v", err)
}
return inbox
}
// putDoneTaskFailover seeds <root>/<project>/.agent-queue/done/<id>.md.
func putDoneTaskFailover(t *testing.T, root, project, id string) {
t.Helper()
doneDir := filepath.Join(root, project, ".agent-queue", "done")
_ = os.MkdirAll(doneDir, 0o755)
if err := os.WriteFile(filepath.Join(doneDir, id+".md"), []byte("done\n"), 0o644); err != nil {
t.Fatalf("write done: %v", err)
}
}
// TestDispatcher_DependsOnUnresolved_DropsBlockedMarker verifies that a task
// whose `depends_on` is not satisfied is NOT dispatched and that a
// `.md.blocked` marker is created next to it.
func TestDispatcher_DependsOnUnresolved_DropsBlockedMarker(t *testing.T) {
root := t.TempDir()
inboxA := makeProjectInbox(t, root, "projA")
makeProjectInbox(t, root, "projB") // empty done/
taskContent := "---\nid: TASK-A1\npriority: default\ndepends_on:\n - projB:DEP-1\n---\n\nbody\n"
taskPath := filepath.Join(inboxA, "TASK-A1.md")
if err := os.WriteFile(taskPath, []byte(taskContent), 0o644); err != nil {
t.Fatalf("write task: %v", err)
}
tc := newMockTmux()
tc.sessions["pool-0"] = true
tc.paneOutput["pool-0"] = " "
s := state.New("")
s.SetIdle("pool-0")
d := &Dispatcher{
tmux: tc,
state: s,
projectsDir: root,
config: &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
},
},
logger: log.Default(),
}
d.dispatchProject(inboxA)
if _, err := os.Stat(taskPath + ".blocked"); err != nil {
t.Errorf("expected .blocked marker: %v", err)
}
if _, err := os.Stat(taskPath + ".dispatched"); !os.IsNotExist(err) {
t.Errorf("task should NOT be dispatched while blocked")
}
if _, err := os.Stat(taskPath); err != nil {
t.Errorf("task .md should remain in inbox: %v", err)
}
if st := s.GetSession("pool-0"); st == nil || st.State != "idle" {
t.Errorf("session should stay idle when only task is blocked, got %v", st)
}
}
// TestDispatcher_DependsOnResolved_Dispatches verifies normal flow when the
// upstream task is in done/.
func TestDispatcher_DependsOnResolved_Dispatches(t *testing.T) {
root := t.TempDir()
inboxA := makeProjectInbox(t, root, "projA")
makeProjectInbox(t, root, "projB")
putDoneTaskFailover(t, root, "projB", "DEP-1")
taskContent := "---\nid: TASK-A2\npriority: default\ndepends_on:\n - projB:DEP-1\n---\n\nbody\n"
taskPath := filepath.Join(inboxA, "TASK-A2.md")
_ = os.WriteFile(taskPath, []byte(taskContent), 0o644)
tc := newMockTmux()
tc.sessions["pool-0"] = true
tc.paneOutput["pool-0"] = " "
s := state.New("")
s.SetIdle("pool-0")
d := &Dispatcher{
tmux: tc,
state: s,
projectsDir: root,
config: &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
},
},
logger: log.Default(),
}
d.dispatchProject(inboxA)
if _, err := os.Stat(taskPath + ".dispatched"); err != nil {
t.Errorf("expected .dispatched: %v", err)
}
if _, err := os.Stat(taskPath + ".blocked"); !os.IsNotExist(err) {
t.Errorf(".blocked marker should NOT be present when dep is resolved")
}
if st := s.GetSession("pool-0"); st == nil || st.State != "working" {
t.Errorf("expected working session, got %v", st)
}
}
// TestDispatcher_DependsOn_PartialResolution treats partial resolution as
// blocked.
func TestDispatcher_DependsOn_PartialResolution(t *testing.T) {
root := t.TempDir()
inboxA := makeProjectInbox(t, root, "projA")
makeProjectInbox(t, root, "projB")
makeProjectInbox(t, root, "projC")
putDoneTaskFailover(t, root, "projB", "DEP-X")
content := "---\nid: TASK-A3\npriority: default\ndepends_on:\n - projB:DEP-X\n - projC:DEP-Y\n---\n\nbody\n"
taskPath := filepath.Join(inboxA, "TASK-A3.md")
_ = os.WriteFile(taskPath, []byte(content), 0o644)
tc := newMockTmux()
tc.sessions["pool-0"] = true
tc.paneOutput["pool-0"] = " "
s := state.New("")
s.SetIdle("pool-0")
d := &Dispatcher{
tmux: tc,
state: s,
projectsDir: root,
config: &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
},
},
logger: log.Default(),
}
d.dispatchProject(inboxA)
if _, err := os.Stat(taskPath + ".blocked"); err != nil {
t.Errorf(".blocked marker expected for partial resolution: %v", err)
}
if _, err := os.Stat(taskPath + ".dispatched"); !os.IsNotExist(err) {
t.Errorf("must not dispatch with one unresolved dep")
}
}
// TestDispatcher_DependsOn_RejectPathTraversal blocks malicious depends_on
// segments containing `..` or `/`.
func TestDispatcher_DependsOn_RejectPathTraversal(t *testing.T) {
root := t.TempDir()
inboxA := makeProjectInbox(t, root, "projA")
content := "---\nid: TASK-evil\npriority: default\ndepends_on:\n - ../../tmp:foo\n---\n\nbody\n"
taskPath := filepath.Join(inboxA, "TASK-evil.md")
_ = os.WriteFile(taskPath, []byte(content), 0o644)
tc := newMockTmux()
tc.sessions["pool-0"] = true
s := state.New("")
s.SetIdle("pool-0")
d := &Dispatcher{
tmux: tc,
state: s,
projectsDir: root,
config: &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
},
},
logger: log.Default(),
}
d.dispatchProject(inboxA)
if _, err := os.Stat(taskPath + ".blocked"); err != nil {
t.Errorf("malicious dep must result in .blocked, not dispatch: %v", err)
}
if _, err := os.Stat(taskPath + ".dispatched"); !os.IsNotExist(err) {
t.Errorf("malicious dep must NOT be dispatched")
}
}
// TestDispatcher_BlockedMarker_Idempotent verifies running the dispatch
// twice on the same blocked task does not refresh the marker mtime (so the
// watchdog timeout counts from FIRST detection).
func TestDispatcher_BlockedMarker_Idempotent(t *testing.T) {
root := t.TempDir()
inboxA := makeProjectInbox(t, root, "projA")
makeProjectInbox(t, root, "projB")
content := "---\nid: TASK-idem\npriority: default\ndepends_on:\n - projB:NEVER\n---\n\nbody\n"
taskPath := filepath.Join(inboxA, "TASK-idem.md")
_ = os.WriteFile(taskPath, []byte(content), 0o644)
tc := newMockTmux()
s := state.New("")
d := &Dispatcher{
tmux: tc,
state: s,
projectsDir: root,
config: &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 0},
},
},
logger: log.Default(),
}
d.dispatchProject(inboxA)
marker := taskPath + ".blocked"
st1, err := os.Stat(marker)
if err != nil {
t.Fatalf("expected marker after first run: %v", err)
}
mtime1 := st1.ModTime()
// Sleep enough to detect mtime drift on most filesystems (1s
// granularity). The marker mtime must be stable so the watchdog
// timeout counts from FIRST detection.
time.Sleep(1100 * time.Millisecond)
d.dispatchProject(inboxA)
st2, err := os.Stat(marker)
if err != nil {
t.Fatalf("expected marker after second run: %v", err)
}
if !st2.ModTime().Equal(mtime1) {
t.Errorf("blocked marker mtime should be stable across dispatch passes (was %v, now %v)",
mtime1, st2.ModTime())
}
}
// TestDispatcher_NoDependsOn_DispatchesNormally is a regression guard:
// tasks without depends_on must keep working exactly as before G2.
func TestDispatcher_NoDependsOn_DispatchesNormally(t *testing.T) {
root := t.TempDir()
inboxA := makeProjectInbox(t, root, "projA")
taskContent := "---\nid: TASK-plain\npriority: default\n---\nbody\n"
taskPath := filepath.Join(inboxA, "TASK-plain.md")
_ = os.WriteFile(taskPath, []byte(taskContent), 0o644)
tc := newMockTmux()
tc.sessions["pool-0"] = true
tc.paneOutput["pool-0"] = " "
s := state.New("")
s.SetIdle("pool-0")
d := &Dispatcher{
tmux: tc,
state: s,
projectsDir: root,
config: &config.Config{
Pool: config.PoolConfig{
Autonomous: config.AutonomousConfig{Prefix: "pool-", Max: 1},
},
},
logger: log.Default(),
}
d.dispatchProject(inboxA)
if _, err := os.Stat(taskPath + ".dispatched"); err != nil {
t.Errorf("plain task should be dispatched: %v", err)
}
}