"""Tests for agents/runner.py — agent execution with mocked claude CLI.""" import json import subprocess import pytest from unittest.mock import patch, MagicMock from core.db import init_db from core import models from agents.runner import run_agent, run_pipeline, run_audit, _try_parse_json @pytest.fixture def conn(): c = init_db(":memory:") models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek", tech_stack=["vue3"]) models.create_task(c, "VDOL-001", "vdol", "Fix bug", brief={"route_type": "debug"}) yield c c.close() def _mock_claude_success(output_data): """Create a mock subprocess result with successful claude output.""" mock = MagicMock() mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data mock.stderr = "" mock.returncode = 0 return mock def _mock_claude_failure(error_msg): mock = MagicMock() mock.stdout = "" mock.stderr = error_msg mock.returncode = 1 return mock # --------------------------------------------------------------------------- # run_agent # --------------------------------------------------------------------------- class TestRunAgent: @patch("agents.runner.subprocess.run") def test_successful_agent_run(self, mock_run, conn): mock_run.return_value = _mock_claude_success({ "result": "Found race condition in useSearch.ts", "usage": {"total_tokens": 5000}, "cost_usd": 0.015, }) result = run_agent(conn, "debugger", "VDOL-001", "vdol") assert result["success"] is True assert result["role"] == "debugger" assert result["model"] == "sonnet" assert result["duration_seconds"] >= 0 # Verify claude was called with right args call_args = mock_run.call_args cmd = call_args[0][0] assert "claude" in cmd[0] assert "-p" in cmd assert "--output-format" in cmd assert "json" in cmd @patch("agents.runner.subprocess.run") def test_failed_agent_run(self, mock_run, conn): mock_run.return_value = _mock_claude_failure("API error") result = run_agent(conn, "debugger", "VDOL-001", "vdol") assert result["success"] is False # Should be logged in agent_logs logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall() assert len(logs) == 1 assert logs[0]["success"] == 0 def test_dry_run_returns_prompt(self, conn): result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True) assert result["dry_run"] is True assert result["prompt"] is not None assert "VDOL-001" in result["prompt"] assert result["output"] is None @patch("agents.runner.subprocess.run") def test_agent_logs_to_db(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "tester", "VDOL-001", "vdol") logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall() assert len(logs) == 1 assert logs[0]["project_id"] == "vdol" @patch("agents.runner.subprocess.run") def test_full_output_saved_to_db(self, mock_run, conn): """Bug fix: output_summary must contain the FULL output, not truncated.""" long_json = json.dumps({ "result": json.dumps({ "summary": "Security audit complete", "findings": [{"title": f"Finding {i}", "severity": "HIGH"} for i in range(50)], }), }) mock = MagicMock() mock.stdout = long_json mock.stderr = "" mock.returncode = 0 mock_run.return_value = mock run_agent(conn, "security", "VDOL-001", "vdol") logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='security'").fetchall() assert len(logs) == 1 output = logs[0]["output_summary"] assert output is not None assert len(output) > 1000 # Must not be truncated # Should contain all 50 findings assert "Finding 49" in output @patch("agents.runner.subprocess.run") def test_dict_output_saved_as_json_string(self, mock_run, conn): """When claude returns structured JSON, it must be saved as string.""" mock_run.return_value = _mock_claude_success({ "result": {"status": "ok", "files": ["a.py", "b.py"]}, }) result = run_agent(conn, "debugger", "VDOL-001", "vdol") # output should be a string (JSON serialized), not a dict assert isinstance(result["raw_output"], str) logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='debugger'").fetchall() saved = logs[0]["output_summary"] assert isinstance(saved, str) assert "a.py" in saved @patch("agents.runner.subprocess.run") def test_previous_output_passed(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "tests pass"}) run_agent(conn, "tester", "VDOL-001", "vdol", previous_output="Found bug in line 42") call_args = mock_run.call_args prompt = call_args[0][0][2] # -p argument assert "line 42" in prompt # --------------------------------------------------------------------------- # run_pipeline # --------------------------------------------------------------------------- class TestRunPipeline: @patch("agents.runner.subprocess.run") def test_successful_pipeline(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "done"}) steps = [ {"role": "debugger", "brief": "find bug"}, {"role": "tester", "depends_on": "debugger", "brief": "verify"}, ] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is True assert result["steps_completed"] == 2 assert len(result["results"]) == 2 # Pipeline created in DB pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone() assert pipe is not None assert pipe["status"] == "completed" # Task updated to review task = models.get_task(conn, "VDOL-001") assert task["status"] == "review" @patch("agents.runner.subprocess.run") def test_pipeline_fails_on_step(self, mock_run, conn): # First step succeeds, second fails mock_run.side_effect = [ _mock_claude_success({"result": "found bug"}), _mock_claude_failure("compilation error"), ] steps = [ {"role": "debugger", "brief": "find"}, {"role": "frontend_dev", "brief": "fix"}, {"role": "tester", "brief": "test"}, ] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is False assert result["steps_completed"] == 1 # Only debugger completed assert "frontend_dev" in result["error"] # Pipeline marked as failed pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone() assert pipe["status"] == "failed" # Task marked as blocked task = models.get_task(conn, "VDOL-001") assert task["status"] == "blocked" def test_pipeline_dry_run(self, conn): steps = [ {"role": "debugger", "brief": "find"}, {"role": "tester", "brief": "verify"}, ] result = run_pipeline(conn, "VDOL-001", steps, dry_run=True) assert result["dry_run"] is True assert result["success"] is True assert result["steps_completed"] == 2 # No pipeline created in DB pipes = conn.execute("SELECT * FROM pipelines").fetchall() assert len(pipes) == 0 @patch("agents.runner.subprocess.run") def test_pipeline_chains_output(self, mock_run, conn): """Output from step N is passed as previous_output to step N+1.""" call_count = [0] def side_effect(*args, **kwargs): call_count[0] += 1 if call_count[0] == 1: return _mock_claude_success({"result": "bug is in line 42"}) return _mock_claude_success({"result": "test written"}) mock_run.side_effect = side_effect steps = [ {"role": "debugger", "brief": "find"}, {"role": "tester", "brief": "write test"}, ] run_pipeline(conn, "VDOL-001", steps) # Second call should include first step's output in prompt second_call = mock_run.call_args_list[1] prompt = second_call[0][0][2] # -p argument assert "line 42" in prompt or "bug" in prompt def test_pipeline_task_not_found(self, conn): result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}]) assert result["success"] is False assert "not found" in result["error"] @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") def test_hooks_called_after_successful_pipeline(self, mock_run, mock_hooks, conn): mock_run.return_value = _mock_claude_success({"result": "done"}) mock_hooks.return_value = [] steps = [{"role": "debugger", "brief": "find"}] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is True mock_hooks.assert_called_once() call_kwargs = mock_hooks.call_args assert call_kwargs[1].get("event") == "pipeline_completed" or \ call_kwargs[0][3] == "pipeline_completed" @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") def test_hooks_not_called_on_failed_pipeline(self, mock_run, mock_hooks, conn): mock_run.return_value = _mock_claude_failure("compilation error") mock_hooks.return_value = [] steps = [{"role": "debugger", "brief": "find"}] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is False mock_hooks.assert_not_called() @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") def test_hook_failure_does_not_affect_pipeline_result(self, mock_run, mock_hooks, conn): mock_run.return_value = _mock_claude_success({"result": "done"}) mock_hooks.side_effect = Exception("hook exploded") steps = [{"role": "debugger", "brief": "find"}] # Must not raise — hook failures must not propagate result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is True # --------------------------------------------------------------------------- # Auto mode # --------------------------------------------------------------------------- class TestAutoMode: @patch("core.followup.generate_followups") @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") def test_auto_mode_generates_followups(self, mock_run, mock_hooks, mock_followup, conn): """Auto mode должен вызывать generate_followups после task_auto_approved.""" mock_run.return_value = _mock_claude_success({"result": "done"}) mock_hooks.return_value = [] mock_followup.return_value = {"created": [], "pending_actions": []} models.update_project(conn, "vdol", execution_mode="auto") steps = [{"role": "debugger", "brief": "find"}] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is True mock_followup.assert_called_once_with(conn, "VDOL-001") task = models.get_task(conn, "VDOL-001") assert task["status"] == "done" @patch("core.followup.generate_followups") @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") def test_review_mode_skips_followups(self, mock_run, mock_hooks, mock_followup, conn): """Review mode НЕ должен вызывать generate_followups автоматически.""" mock_run.return_value = _mock_claude_success({"result": "done"}) mock_hooks.return_value = [] mock_followup.return_value = {"created": [], "pending_actions": []} # Проект остаётся в default "review" mode steps = [{"role": "debugger", "brief": "find"}] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is True mock_followup.assert_not_called() task = models.get_task(conn, "VDOL-001") assert task["status"] == "review" @patch("core.followup.generate_followups") @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") def test_auto_mode_skips_followups_for_followup_tasks(self, mock_run, mock_hooks, mock_followup, conn): """Auto mode НЕ должен генерировать followups для followup-задач (предотвращение рекурсии).""" mock_run.return_value = _mock_claude_success({"result": "done"}) mock_hooks.return_value = [] mock_followup.return_value = {"created": [], "pending_actions": []} models.update_project(conn, "vdol", execution_mode="auto") models.update_task(conn, "VDOL-001", brief={"source": "followup:VDOL-000"}) steps = [{"role": "debugger", "brief": "find"}] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is True mock_followup.assert_not_called() @patch("core.followup.auto_resolve_pending_actions") @patch("core.followup.generate_followups") @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") def test_auto_mode_resolves_pending_actions(self, mock_run, mock_hooks, mock_followup, mock_resolve, conn): """Auto mode должен авто-резолвить pending_actions из followup generation.""" mock_run.return_value = _mock_claude_success({"result": "done"}) mock_hooks.return_value = [] pending = [{"type": "permission_fix", "description": "Fix X", "original_item": {}, "options": ["rerun"]}] mock_followup.return_value = {"created": [], "pending_actions": pending} mock_resolve.return_value = [{"resolved": "rerun", "result": {}}] models.update_project(conn, "vdol", execution_mode="auto") steps = [{"role": "debugger", "brief": "find"}] result = run_pipeline(conn, "VDOL-001", steps) assert result["success"] is True mock_resolve.assert_called_once_with(conn, "VDOL-001", pending) # --------------------------------------------------------------------------- # JSON parsing # --------------------------------------------------------------------------- class TestTryParseJson: def test_direct_json(self): assert _try_parse_json('{"a": 1}') == {"a": 1} def test_json_in_code_fence(self): text = 'Some text\n```json\n{"a": 1}\n```\nMore text' assert _try_parse_json(text) == {"a": 1} def test_json_embedded_in_text(self): text = 'Here is the result: {"status": "ok", "count": 42} and more' result = _try_parse_json(text) assert result == {"status": "ok", "count": 42} def test_empty_string(self): assert _try_parse_json("") is None def test_no_json(self): assert _try_parse_json("just plain text") is None def test_json_array(self): assert _try_parse_json('[1, 2, 3]') == [1, 2, 3] # --------------------------------------------------------------------------- # Non-interactive mode # --------------------------------------------------------------------------- class TestNonInteractive: @patch("agents.runner.subprocess.run") def test_noninteractive_sets_stdin_devnull(self, mock_run, conn): """When noninteractive=True, subprocess.run should get stdin=subprocess.DEVNULL.""" mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=True) call_kwargs = mock_run.call_args[1] assert call_kwargs.get("stdin") == subprocess.DEVNULL @patch("agents.runner.subprocess.run") def test_noninteractive_uses_300s_timeout(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=True) call_kwargs = mock_run.call_args[1] assert call_kwargs.get("timeout") == 300 @patch("agents.runner.subprocess.run") def test_interactive_uses_600s_timeout(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False) call_kwargs = mock_run.call_args[1] assert call_kwargs.get("timeout") == 300 @patch("agents.runner.subprocess.run") def test_interactive_no_stdin_override(self, mock_run, conn): """In interactive mode, stdin should not be set to DEVNULL.""" mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False) call_kwargs = mock_run.call_args[1] assert call_kwargs.get("stdin") == subprocess.DEVNULL @patch.dict("os.environ", {"KIN_NONINTERACTIVE": "1"}) @patch("agents.runner.subprocess.run") def test_env_var_activates_noninteractive(self, mock_run, conn): """KIN_NONINTERACTIVE=1 env var should activate non-interactive mode.""" mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False) call_kwargs = mock_run.call_args[1] assert call_kwargs.get("stdin") == subprocess.DEVNULL assert call_kwargs.get("timeout") == 300 @patch("agents.runner.subprocess.run") def test_allow_write_adds_skip_permissions(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=True) cmd = mock_run.call_args[0][0] assert "--dangerously-skip-permissions" in cmd @patch("agents.runner.subprocess.run") def test_no_allow_write_no_skip_permissions(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=False) cmd = mock_run.call_args[0][0] assert "--dangerously-skip-permissions" not in cmd # --------------------------------------------------------------------------- # run_audit # --------------------------------------------------------------------------- class TestRunAudit: @patch("agents.runner.subprocess.run") def test_audit_success(self, mock_run, conn): """Audit should return parsed already_done/still_pending/unclear.""" audit_output = json.dumps({ "already_done": [{"id": "VDOL-001", "reason": "Fixed in runner.py"}], "still_pending": [], "unclear": [], }) mock_run.return_value = _mock_claude_success({"result": audit_output}) result = run_audit(conn, "vdol") assert result["success"] is True assert len(result["already_done"]) == 1 assert result["already_done"][0]["id"] == "VDOL-001" @patch("agents.runner.subprocess.run") def test_audit_logs_to_db(self, mock_run, conn): """Audit should log to agent_logs with role=backlog_audit.""" mock_run.return_value = _mock_claude_success({ "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}), }) run_audit(conn, "vdol") logs = conn.execute( "SELECT * FROM agent_logs WHERE agent_role='backlog_audit'" ).fetchall() assert len(logs) == 1 assert logs[0]["action"] == "audit" def test_audit_no_pending_tasks(self, conn): """If no pending tasks, return success with empty lists.""" # Mark existing task as done models.update_task(conn, "VDOL-001", status="done") result = run_audit(conn, "vdol") assert result["success"] is True assert result["already_done"] == [] assert "No pending tasks" in result.get("message", "") def test_audit_project_not_found(self, conn): result = run_audit(conn, "nonexistent") assert result["success"] is False assert "not found" in result["error"] @patch("agents.runner.subprocess.run") def test_audit_uses_sonnet(self, mock_run, conn): """Audit should use sonnet model.""" mock_run.return_value = _mock_claude_success({ "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}), }) run_audit(conn, "vdol") cmd = mock_run.call_args[0][0] model_idx = cmd.index("--model") assert cmd[model_idx + 1] == "sonnet" @patch("agents.runner.subprocess.run") def test_audit_includes_tasks_in_prompt(self, mock_run, conn): """The prompt should contain the task title.""" mock_run.return_value = _mock_claude_success({ "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}), }) run_audit(conn, "vdol") prompt = mock_run.call_args[0][0][2] # -p argument assert "VDOL-001" in prompt assert "Fix bug" in prompt @patch("agents.runner.subprocess.run") def test_audit_auto_apply_marks_done(self, mock_run, conn): """auto_apply=True should mark already_done tasks as done in DB.""" mock_run.return_value = _mock_claude_success({ "result": json.dumps({ "already_done": [{"id": "VDOL-001", "reason": "Done"}], "still_pending": [], "unclear": [], }), }) result = run_audit(conn, "vdol", auto_apply=True) assert result["success"] is True assert "VDOL-001" in result["applied"] task = models.get_task(conn, "VDOL-001") assert task["status"] == "done" @patch("agents.runner.subprocess.run") def test_audit_no_auto_apply_keeps_pending(self, mock_run, conn): """auto_apply=False should NOT change task status.""" mock_run.return_value = _mock_claude_success({ "result": json.dumps({ "already_done": [{"id": "VDOL-001", "reason": "Done"}], "still_pending": [], "unclear": [], }), }) result = run_audit(conn, "vdol", auto_apply=False) assert result["success"] is True assert result["applied"] == [] task = models.get_task(conn, "VDOL-001") assert task["status"] == "pending" @patch("agents.runner.subprocess.run") def test_audit_uses_dangerously_skip_permissions(self, mock_run, conn): """Audit must use --dangerously-skip-permissions for tool access.""" mock_run.return_value = _mock_claude_success({ "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}), }) run_audit(conn, "vdol") cmd = mock_run.call_args[0][0] assert "--dangerously-skip-permissions" in cmd