feat(KIN-012): auto followup generation and pending_actions auto-resolution
Auto mode now calls generate_followups() after task_auto_approved hook. Permission-blocked followup items are auto-resolved: rerun first, fallback to manual_task on failure. Recursion guard skips followup-sourced tasks. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
01b269e2b8
commit
3cb516193b
4 changed files with 256 additions and 25 deletions
122
agents/runner.py
122
agents/runner.py
|
|
@ -11,6 +11,8 @@ import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from core import models
|
from core import models
|
||||||
from core.context_builder import build_context, format_prompt
|
from core.context_builder import build_context, format_prompt
|
||||||
from core.hooks import run_hooks
|
from core.hooks import run_hooks
|
||||||
|
|
@ -358,6 +360,21 @@ def run_audit(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Permission error detection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _is_permission_error(result: dict) -> bool:
|
||||||
|
"""Return True if agent result indicates a permission/write failure."""
|
||||||
|
from core.followup import PERMISSION_PATTERNS
|
||||||
|
output = (result.get("raw_output") or result.get("output") or "")
|
||||||
|
if not isinstance(output, str):
|
||||||
|
output = json.dumps(output, ensure_ascii=False)
|
||||||
|
error = result.get("error_message") or ""
|
||||||
|
text = output + " " + error
|
||||||
|
return any(re.search(p, text) for p in PERMISSION_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Pipeline executor
|
# Pipeline executor
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -390,6 +407,9 @@ def run_pipeline(
|
||||||
if task.get("brief") and isinstance(task["brief"], dict):
|
if task.get("brief") and isinstance(task["brief"], dict):
|
||||||
route_type = task["brief"].get("route_type", "custom") or "custom"
|
route_type = task["brief"].get("route_type", "custom") or "custom"
|
||||||
|
|
||||||
|
# Determine execution mode (auto vs review)
|
||||||
|
mode = models.get_effective_mode(conn, project_id, task_id)
|
||||||
|
|
||||||
# Create pipeline in DB
|
# Create pipeline in DB
|
||||||
pipeline = None
|
pipeline = None
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
|
|
@ -418,9 +438,9 @@ def run_pipeline(
|
||||||
allow_write=allow_write,
|
allow_write=allow_write,
|
||||||
noninteractive=noninteractive,
|
noninteractive=noninteractive,
|
||||||
)
|
)
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
|
results.append(result)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Accumulate stats
|
# Accumulate stats
|
||||||
|
|
@ -429,26 +449,55 @@ def run_pipeline(
|
||||||
total_duration += result.get("duration_seconds") or 0
|
total_duration += result.get("duration_seconds") or 0
|
||||||
|
|
||||||
if not result["success"]:
|
if not result["success"]:
|
||||||
# Pipeline failed — stop and mark as failed
|
# Auto mode: retry once with allow_write on permission error
|
||||||
if pipeline:
|
if mode == "auto" and not allow_write and _is_permission_error(result):
|
||||||
models.update_pipeline(
|
task_modules = models.get_modules(conn, project_id)
|
||||||
conn, pipeline["id"],
|
try:
|
||||||
status="failed",
|
run_hooks(conn, project_id, task_id,
|
||||||
total_cost_usd=total_cost,
|
event="task_permission_retry",
|
||||||
total_tokens=total_tokens,
|
task_modules=task_modules)
|
||||||
total_duration_seconds=total_duration,
|
except Exception:
|
||||||
|
pass
|
||||||
|
retry = run_agent(
|
||||||
|
conn, role, task_id, project_id,
|
||||||
|
model=model,
|
||||||
|
previous_output=previous_output,
|
||||||
|
brief_override=brief,
|
||||||
|
dry_run=False,
|
||||||
|
allow_write=True,
|
||||||
|
noninteractive=noninteractive,
|
||||||
)
|
)
|
||||||
models.update_task(conn, task_id, status="blocked")
|
allow_write = True # subsequent steps also with allow_write
|
||||||
return {
|
total_cost += retry.get("cost_usd") or 0
|
||||||
"success": False,
|
total_tokens += retry.get("tokens_used") or 0
|
||||||
"error": f"Step {i+1}/{len(steps)} ({role}) failed",
|
total_duration += retry.get("duration_seconds") or 0
|
||||||
"steps_completed": i,
|
if retry["success"]:
|
||||||
"results": results,
|
result = retry
|
||||||
"total_cost_usd": total_cost,
|
|
||||||
"total_tokens": total_tokens,
|
if not result["success"]:
|
||||||
"total_duration_seconds": total_duration,
|
# Still failed — block regardless of mode
|
||||||
"pipeline_id": pipeline["id"] if pipeline else None,
|
results.append(result)
|
||||||
}
|
if pipeline:
|
||||||
|
models.update_pipeline(
|
||||||
|
conn, pipeline["id"],
|
||||||
|
status="failed",
|
||||||
|
total_cost_usd=total_cost,
|
||||||
|
total_tokens=total_tokens,
|
||||||
|
total_duration_seconds=total_duration,
|
||||||
|
)
|
||||||
|
models.update_task(conn, task_id, status="blocked")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Step {i+1}/{len(steps)} ({role}) failed",
|
||||||
|
"steps_completed": i,
|
||||||
|
"results": results,
|
||||||
|
"total_cost_usd": total_cost,
|
||||||
|
"total_tokens": total_tokens,
|
||||||
|
"total_duration_seconds": total_duration,
|
||||||
|
"pipeline_id": pipeline["id"] if pipeline else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
# Chain output to next step
|
# Chain output to next step
|
||||||
previous_output = result.get("raw_output") or result.get("output")
|
previous_output = result.get("raw_output") or result.get("output")
|
||||||
|
|
@ -464,10 +513,38 @@ def run_pipeline(
|
||||||
total_tokens=total_tokens,
|
total_tokens=total_tokens,
|
||||||
total_duration_seconds=total_duration,
|
total_duration_seconds=total_duration,
|
||||||
)
|
)
|
||||||
models.update_task(conn, task_id, status="review")
|
|
||||||
|
task_modules = models.get_modules(conn, project_id)
|
||||||
|
|
||||||
|
if mode == "auto":
|
||||||
|
# Auto mode: skip review, approve immediately
|
||||||
|
models.update_task(conn, task_id, status="done")
|
||||||
|
try:
|
||||||
|
run_hooks(conn, project_id, task_id,
|
||||||
|
event="task_auto_approved", task_modules=task_modules)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Auto followup: generate tasks, auto-resolve permission issues.
|
||||||
|
# Guard: skip for followup-sourced tasks to prevent infinite recursion.
|
||||||
|
task_brief = task.get("brief") or {}
|
||||||
|
is_followup_task = (
|
||||||
|
isinstance(task_brief, dict)
|
||||||
|
and str(task_brief.get("source", "")).startswith("followup:")
|
||||||
|
)
|
||||||
|
if not is_followup_task:
|
||||||
|
try:
|
||||||
|
from core.followup import generate_followups, auto_resolve_pending_actions
|
||||||
|
fu_result = generate_followups(conn, task_id)
|
||||||
|
if fu_result.get("pending_actions"):
|
||||||
|
auto_resolve_pending_actions(conn, task_id, fu_result["pending_actions"])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Review mode: wait for manual approval
|
||||||
|
models.update_task(conn, task_id, status="review")
|
||||||
|
|
||||||
# Run post-pipeline hooks (failures don't affect pipeline status)
|
# Run post-pipeline hooks (failures don't affect pipeline status)
|
||||||
task_modules = models.get_modules(conn, project_id)
|
|
||||||
try:
|
try:
|
||||||
run_hooks(conn, project_id, task_id,
|
run_hooks(conn, project_id, task_id,
|
||||||
event="pipeline_completed", task_modules=task_modules)
|
event="pipeline_completed", task_modules=task_modules)
|
||||||
|
|
@ -483,4 +560,5 @@ def run_pipeline(
|
||||||
"total_duration_seconds": total_duration,
|
"total_duration_seconds": total_duration,
|
||||||
"pipeline_id": pipeline["id"] if pipeline else None,
|
"pipeline_id": pipeline["id"] if pipeline else None,
|
||||||
"dry_run": dry_run,
|
"dry_run": dry_run,
|
||||||
|
"mode": mode,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import sqlite3
|
||||||
from core import models
|
from core import models
|
||||||
from core.context_builder import format_prompt, PROMPTS_DIR
|
from core.context_builder import format_prompt, PROMPTS_DIR
|
||||||
|
|
||||||
_PERMISSION_PATTERNS = [
|
PERMISSION_PATTERNS = [
|
||||||
r"(?i)permission\s+denied",
|
r"(?i)permission\s+denied",
|
||||||
r"(?i)ручное\s+применение",
|
r"(?i)ручное\s+применение",
|
||||||
r"(?i)не\s+получил[иа]?\s+разрешени[ея]",
|
r"(?i)не\s+получил[иа]?\s+разрешени[ея]",
|
||||||
|
|
@ -27,7 +27,7 @@ _PERMISSION_PATTERNS = [
|
||||||
def _is_permission_blocked(item: dict) -> bool:
|
def _is_permission_blocked(item: dict) -> bool:
|
||||||
"""Check if a follow-up item describes a permission/write failure."""
|
"""Check if a follow-up item describes a permission/write failure."""
|
||||||
text = f"{item.get('title', '')} {item.get('brief', '')}".lower()
|
text = f"{item.get('title', '')} {item.get('brief', '')}".lower()
|
||||||
return any(re.search(p, text) for p in _PERMISSION_PATTERNS)
|
return any(re.search(p, text) for p in PERMISSION_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
def _collect_pipeline_output(conn: sqlite3.Connection, task_id: str) -> str:
|
def _collect_pipeline_output(conn: sqlite3.Connection, task_id: str) -> str:
|
||||||
|
|
@ -230,3 +230,30 @@ def resolve_pending_action(
|
||||||
return {"rerun_result": result}
|
return {"rerun_result": result}
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def auto_resolve_pending_actions(
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
task_id: str,
|
||||||
|
pending_actions: list,
|
||||||
|
) -> list:
|
||||||
|
"""Auto-resolve pending permission actions in auto mode.
|
||||||
|
|
||||||
|
Strategy: try 'rerun' first; if rerun fails → escalate to 'manual_task'.
|
||||||
|
Returns list of resolution results.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
for action in pending_actions:
|
||||||
|
result = resolve_pending_action(conn, task_id, action, "rerun")
|
||||||
|
rerun_success = (
|
||||||
|
isinstance(result, dict)
|
||||||
|
and isinstance(result.get("rerun_result"), dict)
|
||||||
|
and result["rerun_result"].get("success")
|
||||||
|
)
|
||||||
|
if rerun_success:
|
||||||
|
results.append({"resolved": "rerun", "result": result})
|
||||||
|
else:
|
||||||
|
# Rerun failed → create manual task for human review
|
||||||
|
manual = resolve_pending_action(conn, task_id, action, "manual_task")
|
||||||
|
results.append({"resolved": "manual_task", "result": manual})
|
||||||
|
return results
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from unittest.mock import patch, MagicMock
|
||||||
from core.db import init_db
|
from core.db import init_db
|
||||||
from core import models
|
from core import models
|
||||||
from core.followup import (
|
from core.followup import (
|
||||||
generate_followups, resolve_pending_action,
|
generate_followups, resolve_pending_action, auto_resolve_pending_actions,
|
||||||
_collect_pipeline_output, _next_task_id, _is_permission_blocked,
|
_collect_pipeline_output, _next_task_id, _is_permission_blocked,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -222,3 +222,48 @@ class TestResolvePendingAction:
|
||||||
def test_nonexistent_task(self, conn):
|
def test_nonexistent_task(self, conn):
|
||||||
action = {"type": "permission_fix", "original_item": {}}
|
action = {"type": "permission_fix", "original_item": {}}
|
||||||
assert resolve_pending_action(conn, "NOPE", action, "skip") is None
|
assert resolve_pending_action(conn, "NOPE", action, "skip") is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestAutoResolvePendingActions:
|
||||||
|
@patch("agents.runner._run_claude")
|
||||||
|
def test_rerun_success_resolves_as_rerun(self, mock_claude, conn):
|
||||||
|
"""Успешный rerun должен резолвиться как 'rerun'."""
|
||||||
|
mock_claude.return_value = {
|
||||||
|
"output": json.dumps({"result": "fixed"}),
|
||||||
|
"returncode": 0,
|
||||||
|
}
|
||||||
|
action = {
|
||||||
|
"type": "permission_fix",
|
||||||
|
"description": "Fix X",
|
||||||
|
"original_item": {"title": "Fix X", "type": "frontend_dev", "brief": "Apply fix"},
|
||||||
|
"options": ["rerun", "manual_task", "skip"],
|
||||||
|
}
|
||||||
|
results = auto_resolve_pending_actions(conn, "VDOL-001", [action])
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0]["resolved"] == "rerun"
|
||||||
|
|
||||||
|
@patch("agents.runner._run_claude")
|
||||||
|
def test_rerun_failure_escalates_to_manual_task(self, mock_claude, conn):
|
||||||
|
"""Провал rerun должен создавать manual_task для эскалации."""
|
||||||
|
mock_claude.return_value = {"output": "", "returncode": 1}
|
||||||
|
action = {
|
||||||
|
"type": "permission_fix",
|
||||||
|
"description": "Fix X",
|
||||||
|
"original_item": {"title": "Fix X", "type": "frontend_dev", "brief": "Apply fix"},
|
||||||
|
"options": ["rerun", "manual_task", "skip"],
|
||||||
|
}
|
||||||
|
results = auto_resolve_pending_actions(conn, "VDOL-001", [action])
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0]["resolved"] == "manual_task"
|
||||||
|
# Manual task должна быть создана в DB
|
||||||
|
tasks = models.list_tasks(conn, project_id="vdol")
|
||||||
|
assert len(tasks) == 2 # VDOL-001 + новая manual task
|
||||||
|
|
||||||
|
@patch("agents.runner._run_claude")
|
||||||
|
def test_empty_pending_actions(self, mock_claude, conn):
|
||||||
|
"""Пустой список — пустой результат."""
|
||||||
|
results = auto_resolve_pending_actions(conn, "VDOL-001", [])
|
||||||
|
assert results == []
|
||||||
|
mock_claude.assert_not_called()
|
||||||
|
|
|
||||||
|
|
@ -289,6 +289,87 @@ class TestRunPipeline:
|
||||||
assert result["success"] is True
|
assert result["success"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Auto mode
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestAutoMode:
|
||||||
|
@patch("core.followup.generate_followups")
|
||||||
|
@patch("agents.runner.run_hooks")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_auto_mode_generates_followups(self, mock_run, mock_hooks, mock_followup, conn):
|
||||||
|
"""Auto mode должен вызывать generate_followups после task_auto_approved."""
|
||||||
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||||
|
mock_hooks.return_value = []
|
||||||
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||||
|
|
||||||
|
models.update_project(conn, "vdol", execution_mode="auto")
|
||||||
|
steps = [{"role": "debugger", "brief": "find"}]
|
||||||
|
result = run_pipeline(conn, "VDOL-001", steps)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
mock_followup.assert_called_once_with(conn, "VDOL-001")
|
||||||
|
task = models.get_task(conn, "VDOL-001")
|
||||||
|
assert task["status"] == "done"
|
||||||
|
|
||||||
|
@patch("core.followup.generate_followups")
|
||||||
|
@patch("agents.runner.run_hooks")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_review_mode_skips_followups(self, mock_run, mock_hooks, mock_followup, conn):
|
||||||
|
"""Review mode НЕ должен вызывать generate_followups автоматически."""
|
||||||
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||||
|
mock_hooks.return_value = []
|
||||||
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||||
|
|
||||||
|
# Проект остаётся в default "review" mode
|
||||||
|
steps = [{"role": "debugger", "brief": "find"}]
|
||||||
|
result = run_pipeline(conn, "VDOL-001", steps)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
mock_followup.assert_not_called()
|
||||||
|
task = models.get_task(conn, "VDOL-001")
|
||||||
|
assert task["status"] == "review"
|
||||||
|
|
||||||
|
@patch("core.followup.generate_followups")
|
||||||
|
@patch("agents.runner.run_hooks")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_auto_mode_skips_followups_for_followup_tasks(self, mock_run, mock_hooks, mock_followup, conn):
|
||||||
|
"""Auto mode НЕ должен генерировать followups для followup-задач (предотвращение рекурсии)."""
|
||||||
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||||
|
mock_hooks.return_value = []
|
||||||
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||||
|
|
||||||
|
models.update_project(conn, "vdol", execution_mode="auto")
|
||||||
|
models.update_task(conn, "VDOL-001", brief={"source": "followup:VDOL-000"})
|
||||||
|
|
||||||
|
steps = [{"role": "debugger", "brief": "find"}]
|
||||||
|
result = run_pipeline(conn, "VDOL-001", steps)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
mock_followup.assert_not_called()
|
||||||
|
|
||||||
|
@patch("core.followup.auto_resolve_pending_actions")
|
||||||
|
@patch("core.followup.generate_followups")
|
||||||
|
@patch("agents.runner.run_hooks")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_auto_mode_resolves_pending_actions(self, mock_run, mock_hooks, mock_followup, mock_resolve, conn):
|
||||||
|
"""Auto mode должен авто-резолвить pending_actions из followup generation."""
|
||||||
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||||
|
mock_hooks.return_value = []
|
||||||
|
|
||||||
|
pending = [{"type": "permission_fix", "description": "Fix X",
|
||||||
|
"original_item": {}, "options": ["rerun"]}]
|
||||||
|
mock_followup.return_value = {"created": [], "pending_actions": pending}
|
||||||
|
mock_resolve.return_value = [{"resolved": "rerun", "result": {}}]
|
||||||
|
|
||||||
|
models.update_project(conn, "vdol", execution_mode="auto")
|
||||||
|
steps = [{"role": "debugger", "brief": "find"}]
|
||||||
|
result = run_pipeline(conn, "VDOL-001", steps)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
mock_resolve.assert_called_once_with(conn, "VDOL-001", pending)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# JSON parsing
|
# JSON parsing
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue