kin/tests/test_runner.py
Gros Frumos 3cb516193b feat(KIN-012): auto followup generation and pending_actions auto-resolution
Auto mode now calls generate_followups() after task_auto_approved hook.
Permission-blocked followup items are auto-resolved: rerun first, fallback
to manual_task on failure. Recursion guard skips followup-sourced tasks.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 19:49:34 +02:00

584 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for agents/runner.py — agent execution with mocked claude CLI."""
import json
import subprocess
import pytest
from unittest.mock import patch, MagicMock
from core.db import init_db
from core import models
from agents.runner import run_agent, run_pipeline, run_audit, _try_parse_json
@pytest.fixture
def conn():
c = init_db(":memory:")
models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek",
tech_stack=["vue3"])
models.create_task(c, "VDOL-001", "vdol", "Fix bug",
brief={"route_type": "debug"})
yield c
c.close()
def _mock_claude_success(output_data):
"""Create a mock subprocess result with successful claude output."""
mock = MagicMock()
mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
mock.stderr = ""
mock.returncode = 0
return mock
def _mock_claude_failure(error_msg):
mock = MagicMock()
mock.stdout = ""
mock.stderr = error_msg
mock.returncode = 1
return mock
# ---------------------------------------------------------------------------
# run_agent
# ---------------------------------------------------------------------------
class TestRunAgent:
@patch("agents.runner.subprocess.run")
def test_successful_agent_run(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({
"result": "Found race condition in useSearch.ts",
"usage": {"total_tokens": 5000},
"cost_usd": 0.015,
})
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
assert result["success"] is True
assert result["role"] == "debugger"
assert result["model"] == "sonnet"
assert result["duration_seconds"] >= 0
# Verify claude was called with right args
call_args = mock_run.call_args
cmd = call_args[0][0]
assert "claude" in cmd[0]
assert "-p" in cmd
assert "--output-format" in cmd
assert "json" in cmd
@patch("agents.runner.subprocess.run")
def test_failed_agent_run(self, mock_run, conn):
mock_run.return_value = _mock_claude_failure("API error")
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
assert result["success"] is False
# Should be logged in agent_logs
logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall()
assert len(logs) == 1
assert logs[0]["success"] == 0
def test_dry_run_returns_prompt(self, conn):
result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True)
assert result["dry_run"] is True
assert result["prompt"] is not None
assert "VDOL-001" in result["prompt"]
assert result["output"] is None
@patch("agents.runner.subprocess.run")
def test_agent_logs_to_db(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "tester", "VDOL-001", "vdol")
logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall()
assert len(logs) == 1
assert logs[0]["project_id"] == "vdol"
@patch("agents.runner.subprocess.run")
def test_full_output_saved_to_db(self, mock_run, conn):
"""Bug fix: output_summary must contain the FULL output, not truncated."""
long_json = json.dumps({
"result": json.dumps({
"summary": "Security audit complete",
"findings": [{"title": f"Finding {i}", "severity": "HIGH"} for i in range(50)],
}),
})
mock = MagicMock()
mock.stdout = long_json
mock.stderr = ""
mock.returncode = 0
mock_run.return_value = mock
run_agent(conn, "security", "VDOL-001", "vdol")
logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='security'").fetchall()
assert len(logs) == 1
output = logs[0]["output_summary"]
assert output is not None
assert len(output) > 1000 # Must not be truncated
# Should contain all 50 findings
assert "Finding 49" in output
@patch("agents.runner.subprocess.run")
def test_dict_output_saved_as_json_string(self, mock_run, conn):
"""When claude returns structured JSON, it must be saved as string."""
mock_run.return_value = _mock_claude_success({
"result": {"status": "ok", "files": ["a.py", "b.py"]},
})
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
# output should be a string (JSON serialized), not a dict
assert isinstance(result["raw_output"], str)
logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='debugger'").fetchall()
saved = logs[0]["output_summary"]
assert isinstance(saved, str)
assert "a.py" in saved
@patch("agents.runner.subprocess.run")
def test_previous_output_passed(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "tests pass"})
run_agent(conn, "tester", "VDOL-001", "vdol",
previous_output="Found bug in line 42")
call_args = mock_run.call_args
prompt = call_args[0][0][2] # -p argument
assert "line 42" in prompt
# ---------------------------------------------------------------------------
# run_pipeline
# ---------------------------------------------------------------------------
class TestRunPipeline:
@patch("agents.runner.subprocess.run")
def test_successful_pipeline(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "done"})
steps = [
{"role": "debugger", "brief": "find bug"},
{"role": "tester", "depends_on": "debugger", "brief": "verify"},
]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
assert result["steps_completed"] == 2
assert len(result["results"]) == 2
# Pipeline created in DB
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
assert pipe is not None
assert pipe["status"] == "completed"
# Task updated to review
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "review"
@patch("agents.runner.subprocess.run")
def test_pipeline_fails_on_step(self, mock_run, conn):
# First step succeeds, second fails
mock_run.side_effect = [
_mock_claude_success({"result": "found bug"}),
_mock_claude_failure("compilation error"),
]
steps = [
{"role": "debugger", "brief": "find"},
{"role": "frontend_dev", "brief": "fix"},
{"role": "tester", "brief": "test"},
]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is False
assert result["steps_completed"] == 1 # Only debugger completed
assert "frontend_dev" in result["error"]
# Pipeline marked as failed
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
assert pipe["status"] == "failed"
# Task marked as blocked
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "blocked"
def test_pipeline_dry_run(self, conn):
steps = [
{"role": "debugger", "brief": "find"},
{"role": "tester", "brief": "verify"},
]
result = run_pipeline(conn, "VDOL-001", steps, dry_run=True)
assert result["dry_run"] is True
assert result["success"] is True
assert result["steps_completed"] == 2
# No pipeline created in DB
pipes = conn.execute("SELECT * FROM pipelines").fetchall()
assert len(pipes) == 0
@patch("agents.runner.subprocess.run")
def test_pipeline_chains_output(self, mock_run, conn):
"""Output from step N is passed as previous_output to step N+1."""
call_count = [0]
def side_effect(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
return _mock_claude_success({"result": "bug is in line 42"})
return _mock_claude_success({"result": "test written"})
mock_run.side_effect = side_effect
steps = [
{"role": "debugger", "brief": "find"},
{"role": "tester", "brief": "write test"},
]
run_pipeline(conn, "VDOL-001", steps)
# Second call should include first step's output in prompt
second_call = mock_run.call_args_list[1]
prompt = second_call[0][0][2] # -p argument
assert "line 42" in prompt or "bug" in prompt
def test_pipeline_task_not_found(self, conn):
result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}])
assert result["success"] is False
assert "not found" in result["error"]
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_hooks_called_after_successful_pipeline(self, mock_run, mock_hooks, conn):
mock_run.return_value = _mock_claude_success({"result": "done"})
mock_hooks.return_value = []
steps = [{"role": "debugger", "brief": "find"}]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
mock_hooks.assert_called_once()
call_kwargs = mock_hooks.call_args
assert call_kwargs[1].get("event") == "pipeline_completed" or \
call_kwargs[0][3] == "pipeline_completed"
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_hooks_not_called_on_failed_pipeline(self, mock_run, mock_hooks, conn):
mock_run.return_value = _mock_claude_failure("compilation error")
mock_hooks.return_value = []
steps = [{"role": "debugger", "brief": "find"}]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is False
mock_hooks.assert_not_called()
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_hook_failure_does_not_affect_pipeline_result(self, mock_run, mock_hooks, conn):
mock_run.return_value = _mock_claude_success({"result": "done"})
mock_hooks.side_effect = Exception("hook exploded")
steps = [{"role": "debugger", "brief": "find"}]
# Must not raise — hook failures must not propagate
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
# ---------------------------------------------------------------------------
# Auto mode
# ---------------------------------------------------------------------------
class TestAutoMode:
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_auto_mode_generates_followups(self, mock_run, mock_hooks, mock_followup, conn):
"""Auto mode должен вызывать generate_followups после task_auto_approved."""
mock_run.return_value = _mock_claude_success({"result": "done"})
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
models.update_project(conn, "vdol", execution_mode="auto")
steps = [{"role": "debugger", "brief": "find"}]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
mock_followup.assert_called_once_with(conn, "VDOL-001")
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "done"
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_review_mode_skips_followups(self, mock_run, mock_hooks, mock_followup, conn):
"""Review mode НЕ должен вызывать generate_followups автоматически."""
mock_run.return_value = _mock_claude_success({"result": "done"})
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
# Проект остаётся в default "review" mode
steps = [{"role": "debugger", "brief": "find"}]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
mock_followup.assert_not_called()
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "review"
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_auto_mode_skips_followups_for_followup_tasks(self, mock_run, mock_hooks, mock_followup, conn):
"""Auto mode НЕ должен генерировать followups для followup-задач (предотвращение рекурсии)."""
mock_run.return_value = _mock_claude_success({"result": "done"})
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
models.update_project(conn, "vdol", execution_mode="auto")
models.update_task(conn, "VDOL-001", brief={"source": "followup:VDOL-000"})
steps = [{"role": "debugger", "brief": "find"}]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
mock_followup.assert_not_called()
@patch("core.followup.auto_resolve_pending_actions")
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_auto_mode_resolves_pending_actions(self, mock_run, mock_hooks, mock_followup, mock_resolve, conn):
"""Auto mode должен авто-резолвить pending_actions из followup generation."""
mock_run.return_value = _mock_claude_success({"result": "done"})
mock_hooks.return_value = []
pending = [{"type": "permission_fix", "description": "Fix X",
"original_item": {}, "options": ["rerun"]}]
mock_followup.return_value = {"created": [], "pending_actions": pending}
mock_resolve.return_value = [{"resolved": "rerun", "result": {}}]
models.update_project(conn, "vdol", execution_mode="auto")
steps = [{"role": "debugger", "brief": "find"}]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
mock_resolve.assert_called_once_with(conn, "VDOL-001", pending)
# ---------------------------------------------------------------------------
# JSON parsing
# ---------------------------------------------------------------------------
class TestTryParseJson:
def test_direct_json(self):
assert _try_parse_json('{"a": 1}') == {"a": 1}
def test_json_in_code_fence(self):
text = 'Some text\n```json\n{"a": 1}\n```\nMore text'
assert _try_parse_json(text) == {"a": 1}
def test_json_embedded_in_text(self):
text = 'Here is the result: {"status": "ok", "count": 42} and more'
result = _try_parse_json(text)
assert result == {"status": "ok", "count": 42}
def test_empty_string(self):
assert _try_parse_json("") is None
def test_no_json(self):
assert _try_parse_json("just plain text") is None
def test_json_array(self):
assert _try_parse_json('[1, 2, 3]') == [1, 2, 3]
# ---------------------------------------------------------------------------
# Non-interactive mode
# ---------------------------------------------------------------------------
class TestNonInteractive:
@patch("agents.runner.subprocess.run")
def test_noninteractive_sets_stdin_devnull(self, mock_run, conn):
"""When noninteractive=True, subprocess.run should get stdin=subprocess.DEVNULL."""
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=True)
call_kwargs = mock_run.call_args[1]
assert call_kwargs.get("stdin") == subprocess.DEVNULL
@patch("agents.runner.subprocess.run")
def test_noninteractive_uses_300s_timeout(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=True)
call_kwargs = mock_run.call_args[1]
assert call_kwargs.get("timeout") == 300
@patch("agents.runner.subprocess.run")
def test_interactive_uses_600s_timeout(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False)
call_kwargs = mock_run.call_args[1]
assert call_kwargs.get("timeout") == 300
@patch("agents.runner.subprocess.run")
def test_interactive_no_stdin_override(self, mock_run, conn):
"""In interactive mode, stdin should not be set to DEVNULL."""
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False)
call_kwargs = mock_run.call_args[1]
assert call_kwargs.get("stdin") == subprocess.DEVNULL
@patch.dict("os.environ", {"KIN_NONINTERACTIVE": "1"})
@patch("agents.runner.subprocess.run")
def test_env_var_activates_noninteractive(self, mock_run, conn):
"""KIN_NONINTERACTIVE=1 env var should activate non-interactive mode."""
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False)
call_kwargs = mock_run.call_args[1]
assert call_kwargs.get("stdin") == subprocess.DEVNULL
assert call_kwargs.get("timeout") == 300
@patch("agents.runner.subprocess.run")
def test_allow_write_adds_skip_permissions(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=True)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" in cmd
@patch("agents.runner.subprocess.run")
def test_no_allow_write_no_skip_permissions(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" not in cmd
# ---------------------------------------------------------------------------
# run_audit
# ---------------------------------------------------------------------------
class TestRunAudit:
@patch("agents.runner.subprocess.run")
def test_audit_success(self, mock_run, conn):
"""Audit should return parsed already_done/still_pending/unclear."""
audit_output = json.dumps({
"already_done": [{"id": "VDOL-001", "reason": "Fixed in runner.py"}],
"still_pending": [],
"unclear": [],
})
mock_run.return_value = _mock_claude_success({"result": audit_output})
result = run_audit(conn, "vdol")
assert result["success"] is True
assert len(result["already_done"]) == 1
assert result["already_done"][0]["id"] == "VDOL-001"
@patch("agents.runner.subprocess.run")
def test_audit_logs_to_db(self, mock_run, conn):
"""Audit should log to agent_logs with role=backlog_audit."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
})
run_audit(conn, "vdol")
logs = conn.execute(
"SELECT * FROM agent_logs WHERE agent_role='backlog_audit'"
).fetchall()
assert len(logs) == 1
assert logs[0]["action"] == "audit"
def test_audit_no_pending_tasks(self, conn):
"""If no pending tasks, return success with empty lists."""
# Mark existing task as done
models.update_task(conn, "VDOL-001", status="done")
result = run_audit(conn, "vdol")
assert result["success"] is True
assert result["already_done"] == []
assert "No pending tasks" in result.get("message", "")
def test_audit_project_not_found(self, conn):
result = run_audit(conn, "nonexistent")
assert result["success"] is False
assert "not found" in result["error"]
@patch("agents.runner.subprocess.run")
def test_audit_uses_sonnet(self, mock_run, conn):
"""Audit should use sonnet model."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
})
run_audit(conn, "vdol")
cmd = mock_run.call_args[0][0]
model_idx = cmd.index("--model")
assert cmd[model_idx + 1] == "sonnet"
@patch("agents.runner.subprocess.run")
def test_audit_includes_tasks_in_prompt(self, mock_run, conn):
"""The prompt should contain the task title."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
})
run_audit(conn, "vdol")
prompt = mock_run.call_args[0][0][2] # -p argument
assert "VDOL-001" in prompt
assert "Fix bug" in prompt
@patch("agents.runner.subprocess.run")
def test_audit_auto_apply_marks_done(self, mock_run, conn):
"""auto_apply=True should mark already_done tasks as done in DB."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({
"already_done": [{"id": "VDOL-001", "reason": "Done"}],
"still_pending": [],
"unclear": [],
}),
})
result = run_audit(conn, "vdol", auto_apply=True)
assert result["success"] is True
assert "VDOL-001" in result["applied"]
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "done"
@patch("agents.runner.subprocess.run")
def test_audit_no_auto_apply_keeps_pending(self, mock_run, conn):
"""auto_apply=False should NOT change task status."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({
"already_done": [{"id": "VDOL-001", "reason": "Done"}],
"still_pending": [],
"unclear": [],
}),
})
result = run_audit(conn, "vdol", auto_apply=False)
assert result["success"] is True
assert result["applied"] == []
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "pending"
@patch("agents.runner.subprocess.run")
def test_audit_uses_dangerously_skip_permissions(self, mock_run, conn):
"""Audit must use --dangerously-skip-permissions for tool access."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
})
run_audit(conn, "vdol")
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" in cmd