feat(KIN-012): auto followup generation and pending_actions auto-resolution
Auto mode now calls generate_followups() after task_auto_approved hook. Permission-blocked followup items are auto-resolved: rerun first, fallback to manual_task on failure. Recursion guard skips followup-sourced tasks. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
01b269e2b8
commit
3cb516193b
4 changed files with 256 additions and 25 deletions
122
agents/runner.py
122
agents/runner.py
|
|
@ -11,6 +11,8 @@ import time
|
|||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import re
|
||||
|
||||
from core import models
|
||||
from core.context_builder import build_context, format_prompt
|
||||
from core.hooks import run_hooks
|
||||
|
|
@ -358,6 +360,21 @@ def run_audit(
|
|||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Permission error detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _is_permission_error(result: dict) -> bool:
|
||||
"""Return True if agent result indicates a permission/write failure."""
|
||||
from core.followup import PERMISSION_PATTERNS
|
||||
output = (result.get("raw_output") or result.get("output") or "")
|
||||
if not isinstance(output, str):
|
||||
output = json.dumps(output, ensure_ascii=False)
|
||||
error = result.get("error_message") or ""
|
||||
text = output + " " + error
|
||||
return any(re.search(p, text) for p in PERMISSION_PATTERNS)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pipeline executor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -390,6 +407,9 @@ def run_pipeline(
|
|||
if task.get("brief") and isinstance(task["brief"], dict):
|
||||
route_type = task["brief"].get("route_type", "custom") or "custom"
|
||||
|
||||
# Determine execution mode (auto vs review)
|
||||
mode = models.get_effective_mode(conn, project_id, task_id)
|
||||
|
||||
# Create pipeline in DB
|
||||
pipeline = None
|
||||
if not dry_run:
|
||||
|
|
@ -418,9 +438,9 @@ def run_pipeline(
|
|||
allow_write=allow_write,
|
||||
noninteractive=noninteractive,
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
if dry_run:
|
||||
results.append(result)
|
||||
continue
|
||||
|
||||
# Accumulate stats
|
||||
|
|
@ -429,26 +449,55 @@ def run_pipeline(
|
|||
total_duration += result.get("duration_seconds") or 0
|
||||
|
||||
if not result["success"]:
|
||||
# Pipeline failed — stop and mark as failed
|
||||
if pipeline:
|
||||
models.update_pipeline(
|
||||
conn, pipeline["id"],
|
||||
status="failed",
|
||||
total_cost_usd=total_cost,
|
||||
total_tokens=total_tokens,
|
||||
total_duration_seconds=total_duration,
|
||||
# Auto mode: retry once with allow_write on permission error
|
||||
if mode == "auto" and not allow_write and _is_permission_error(result):
|
||||
task_modules = models.get_modules(conn, project_id)
|
||||
try:
|
||||
run_hooks(conn, project_id, task_id,
|
||||
event="task_permission_retry",
|
||||
task_modules=task_modules)
|
||||
except Exception:
|
||||
pass
|
||||
retry = run_agent(
|
||||
conn, role, task_id, project_id,
|
||||
model=model,
|
||||
previous_output=previous_output,
|
||||
brief_override=brief,
|
||||
dry_run=False,
|
||||
allow_write=True,
|
||||
noninteractive=noninteractive,
|
||||
)
|
||||
models.update_task(conn, task_id, status="blocked")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Step {i+1}/{len(steps)} ({role}) failed",
|
||||
"steps_completed": i,
|
||||
"results": results,
|
||||
"total_cost_usd": total_cost,
|
||||
"total_tokens": total_tokens,
|
||||
"total_duration_seconds": total_duration,
|
||||
"pipeline_id": pipeline["id"] if pipeline else None,
|
||||
}
|
||||
allow_write = True # subsequent steps also with allow_write
|
||||
total_cost += retry.get("cost_usd") or 0
|
||||
total_tokens += retry.get("tokens_used") or 0
|
||||
total_duration += retry.get("duration_seconds") or 0
|
||||
if retry["success"]:
|
||||
result = retry
|
||||
|
||||
if not result["success"]:
|
||||
# Still failed — block regardless of mode
|
||||
results.append(result)
|
||||
if pipeline:
|
||||
models.update_pipeline(
|
||||
conn, pipeline["id"],
|
||||
status="failed",
|
||||
total_cost_usd=total_cost,
|
||||
total_tokens=total_tokens,
|
||||
total_duration_seconds=total_duration,
|
||||
)
|
||||
models.update_task(conn, task_id, status="blocked")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Step {i+1}/{len(steps)} ({role}) failed",
|
||||
"steps_completed": i,
|
||||
"results": results,
|
||||
"total_cost_usd": total_cost,
|
||||
"total_tokens": total_tokens,
|
||||
"total_duration_seconds": total_duration,
|
||||
"pipeline_id": pipeline["id"] if pipeline else None,
|
||||
}
|
||||
|
||||
results.append(result)
|
||||
|
||||
# Chain output to next step
|
||||
previous_output = result.get("raw_output") or result.get("output")
|
||||
|
|
@ -464,10 +513,38 @@ def run_pipeline(
|
|||
total_tokens=total_tokens,
|
||||
total_duration_seconds=total_duration,
|
||||
)
|
||||
models.update_task(conn, task_id, status="review")
|
||||
|
||||
task_modules = models.get_modules(conn, project_id)
|
||||
|
||||
if mode == "auto":
|
||||
# Auto mode: skip review, approve immediately
|
||||
models.update_task(conn, task_id, status="done")
|
||||
try:
|
||||
run_hooks(conn, project_id, task_id,
|
||||
event="task_auto_approved", task_modules=task_modules)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Auto followup: generate tasks, auto-resolve permission issues.
|
||||
# Guard: skip for followup-sourced tasks to prevent infinite recursion.
|
||||
task_brief = task.get("brief") or {}
|
||||
is_followup_task = (
|
||||
isinstance(task_brief, dict)
|
||||
and str(task_brief.get("source", "")).startswith("followup:")
|
||||
)
|
||||
if not is_followup_task:
|
||||
try:
|
||||
from core.followup import generate_followups, auto_resolve_pending_actions
|
||||
fu_result = generate_followups(conn, task_id)
|
||||
if fu_result.get("pending_actions"):
|
||||
auto_resolve_pending_actions(conn, task_id, fu_result["pending_actions"])
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
# Review mode: wait for manual approval
|
||||
models.update_task(conn, task_id, status="review")
|
||||
|
||||
# Run post-pipeline hooks (failures don't affect pipeline status)
|
||||
task_modules = models.get_modules(conn, project_id)
|
||||
try:
|
||||
run_hooks(conn, project_id, task_id,
|
||||
event="pipeline_completed", task_modules=task_modules)
|
||||
|
|
@ -483,4 +560,5 @@ def run_pipeline(
|
|||
"total_duration_seconds": total_duration,
|
||||
"pipeline_id": pipeline["id"] if pipeline else None,
|
||||
"dry_run": dry_run,
|
||||
"mode": mode,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import sqlite3
|
|||
from core import models
|
||||
from core.context_builder import format_prompt, PROMPTS_DIR
|
||||
|
||||
_PERMISSION_PATTERNS = [
|
||||
PERMISSION_PATTERNS = [
|
||||
r"(?i)permission\s+denied",
|
||||
r"(?i)ручное\s+применение",
|
||||
r"(?i)не\s+получил[иа]?\s+разрешени[ея]",
|
||||
|
|
@ -27,7 +27,7 @@ _PERMISSION_PATTERNS = [
|
|||
def _is_permission_blocked(item: dict) -> bool:
|
||||
"""Check if a follow-up item describes a permission/write failure."""
|
||||
text = f"{item.get('title', '')} {item.get('brief', '')}".lower()
|
||||
return any(re.search(p, text) for p in _PERMISSION_PATTERNS)
|
||||
return any(re.search(p, text) for p in PERMISSION_PATTERNS)
|
||||
|
||||
|
||||
def _collect_pipeline_output(conn: sqlite3.Connection, task_id: str) -> str:
|
||||
|
|
@ -230,3 +230,30 @@ def resolve_pending_action(
|
|||
return {"rerun_result": result}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def auto_resolve_pending_actions(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
pending_actions: list,
|
||||
) -> list:
|
||||
"""Auto-resolve pending permission actions in auto mode.
|
||||
|
||||
Strategy: try 'rerun' first; if rerun fails → escalate to 'manual_task'.
|
||||
Returns list of resolution results.
|
||||
"""
|
||||
results = []
|
||||
for action in pending_actions:
|
||||
result = resolve_pending_action(conn, task_id, action, "rerun")
|
||||
rerun_success = (
|
||||
isinstance(result, dict)
|
||||
and isinstance(result.get("rerun_result"), dict)
|
||||
and result["rerun_result"].get("success")
|
||||
)
|
||||
if rerun_success:
|
||||
results.append({"resolved": "rerun", "result": result})
|
||||
else:
|
||||
# Rerun failed → create manual task for human review
|
||||
manual = resolve_pending_action(conn, task_id, action, "manual_task")
|
||||
results.append({"resolved": "manual_task", "result": manual})
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from unittest.mock import patch, MagicMock
|
|||
from core.db import init_db
|
||||
from core import models
|
||||
from core.followup import (
|
||||
generate_followups, resolve_pending_action,
|
||||
generate_followups, resolve_pending_action, auto_resolve_pending_actions,
|
||||
_collect_pipeline_output, _next_task_id, _is_permission_blocked,
|
||||
)
|
||||
|
||||
|
|
@ -222,3 +222,48 @@ class TestResolvePendingAction:
|
|||
def test_nonexistent_task(self, conn):
|
||||
action = {"type": "permission_fix", "original_item": {}}
|
||||
assert resolve_pending_action(conn, "NOPE", action, "skip") is None
|
||||
|
||||
|
||||
class TestAutoResolvePendingActions:
|
||||
@patch("agents.runner._run_claude")
|
||||
def test_rerun_success_resolves_as_rerun(self, mock_claude, conn):
|
||||
"""Успешный rerun должен резолвиться как 'rerun'."""
|
||||
mock_claude.return_value = {
|
||||
"output": json.dumps({"result": "fixed"}),
|
||||
"returncode": 0,
|
||||
}
|
||||
action = {
|
||||
"type": "permission_fix",
|
||||
"description": "Fix X",
|
||||
"original_item": {"title": "Fix X", "type": "frontend_dev", "brief": "Apply fix"},
|
||||
"options": ["rerun", "manual_task", "skip"],
|
||||
}
|
||||
results = auto_resolve_pending_actions(conn, "VDOL-001", [action])
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0]["resolved"] == "rerun"
|
||||
|
||||
@patch("agents.runner._run_claude")
|
||||
def test_rerun_failure_escalates_to_manual_task(self, mock_claude, conn):
|
||||
"""Провал rerun должен создавать manual_task для эскалации."""
|
||||
mock_claude.return_value = {"output": "", "returncode": 1}
|
||||
action = {
|
||||
"type": "permission_fix",
|
||||
"description": "Fix X",
|
||||
"original_item": {"title": "Fix X", "type": "frontend_dev", "brief": "Apply fix"},
|
||||
"options": ["rerun", "manual_task", "skip"],
|
||||
}
|
||||
results = auto_resolve_pending_actions(conn, "VDOL-001", [action])
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0]["resolved"] == "manual_task"
|
||||
# Manual task должна быть создана в DB
|
||||
tasks = models.list_tasks(conn, project_id="vdol")
|
||||
assert len(tasks) == 2 # VDOL-001 + новая manual task
|
||||
|
||||
@patch("agents.runner._run_claude")
|
||||
def test_empty_pending_actions(self, mock_claude, conn):
|
||||
"""Пустой список — пустой результат."""
|
||||
results = auto_resolve_pending_actions(conn, "VDOL-001", [])
|
||||
assert results == []
|
||||
mock_claude.assert_not_called()
|
||||
|
|
|
|||
|
|
@ -289,6 +289,87 @@ class TestRunPipeline:
|
|||
assert result["success"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auto mode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAutoMode:
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_auto_mode_generates_followups(self, mock_run, mock_hooks, mock_followup, conn):
|
||||
"""Auto mode должен вызывать generate_followups после task_auto_approved."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
models.update_project(conn, "vdol", execution_mode="auto")
|
||||
steps = [{"role": "debugger", "brief": "find"}]
|
||||
result = run_pipeline(conn, "VDOL-001", steps)
|
||||
|
||||
assert result["success"] is True
|
||||
mock_followup.assert_called_once_with(conn, "VDOL-001")
|
||||
task = models.get_task(conn, "VDOL-001")
|
||||
assert task["status"] == "done"
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_review_mode_skips_followups(self, mock_run, mock_hooks, mock_followup, conn):
|
||||
"""Review mode НЕ должен вызывать generate_followups автоматически."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
# Проект остаётся в default "review" mode
|
||||
steps = [{"role": "debugger", "brief": "find"}]
|
||||
result = run_pipeline(conn, "VDOL-001", steps)
|
||||
|
||||
assert result["success"] is True
|
||||
mock_followup.assert_not_called()
|
||||
task = models.get_task(conn, "VDOL-001")
|
||||
assert task["status"] == "review"
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_auto_mode_skips_followups_for_followup_tasks(self, mock_run, mock_hooks, mock_followup, conn):
|
||||
"""Auto mode НЕ должен генерировать followups для followup-задач (предотвращение рекурсии)."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
models.update_project(conn, "vdol", execution_mode="auto")
|
||||
models.update_task(conn, "VDOL-001", brief={"source": "followup:VDOL-000"})
|
||||
|
||||
steps = [{"role": "debugger", "brief": "find"}]
|
||||
result = run_pipeline(conn, "VDOL-001", steps)
|
||||
|
||||
assert result["success"] is True
|
||||
mock_followup.assert_not_called()
|
||||
|
||||
@patch("core.followup.auto_resolve_pending_actions")
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_auto_mode_resolves_pending_actions(self, mock_run, mock_hooks, mock_followup, mock_resolve, conn):
|
||||
"""Auto mode должен авто-резолвить pending_actions из followup generation."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_hooks.return_value = []
|
||||
|
||||
pending = [{"type": "permission_fix", "description": "Fix X",
|
||||
"original_item": {}, "options": ["rerun"]}]
|
||||
mock_followup.return_value = {"created": [], "pending_actions": pending}
|
||||
mock_resolve.return_value = [{"resolved": "rerun", "result": {}}]
|
||||
|
||||
models.update_project(conn, "vdol", execution_mode="auto")
|
||||
steps = [{"role": "debugger", "brief": "find"}]
|
||||
result = run_pipeline(conn, "VDOL-001", steps)
|
||||
|
||||
assert result["success"] is True
|
||||
mock_resolve.assert_called_once_with(conn, "VDOL-001", pending)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# JSON parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue