kin/tests/test_runner.py

"""Tests for agents/runner.py — agent execution with mocked claude CLI."""

import json
import pytest
from unittest.mock import patch, MagicMock
from core.db import init_db
from core import models
from agents.runner import run_agent, run_pipeline, _try_parse_json


@pytest.fixture
def conn():
    c = init_db(":memory:")
    models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek",
                          tech_stack=["vue3"])
    models.create_task(c, "VDOL-001", "vdol", "Fix bug",
                       brief={"route_type": "debug"})
    yield c
    c.close()


def _mock_claude_success(output_data):
    """Create a mock subprocess result with successful claude output."""
    mock = MagicMock()
    mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
    mock.stderr = ""
    mock.returncode = 0
    return mock


def _mock_claude_failure(error_msg):
    mock = MagicMock()
    mock.stdout = ""
    mock.stderr = error_msg
    mock.returncode = 1
    return mock


# ---------------------------------------------------------------------------
# run_agent
# ---------------------------------------------------------------------------

class TestRunAgent:
    @patch("agents.runner.subprocess.run")
    def test_successful_agent_run(self, mock_run, conn):
        mock_run.return_value = _mock_claude_success({
            "result": "Found race condition in useSearch.ts",
            "usage": {"total_tokens": 5000},
            "cost_usd": 0.015,
        })

        result = run_agent(conn, "debugger", "VDOL-001", "vdol")

        assert result["success"] is True
        assert result["role"] == "debugger"
        assert result["model"] == "sonnet"
        assert result["duration_seconds"] >= 0

        # Verify claude was called with right args
        call_args = mock_run.call_args
        cmd = call_args[0][0]
        assert "claude" in cmd[0]
        assert "-p" in cmd
        assert "--output-format" in cmd
        assert "json" in cmd

    @patch("agents.runner.subprocess.run")
    def test_failed_agent_run(self, mock_run, conn):
        mock_run.return_value = _mock_claude_failure("API error")

        result = run_agent(conn, "debugger", "VDOL-001", "vdol")

        assert result["success"] is False

        # Should be logged in agent_logs
        logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall()
        assert len(logs) == 1
        assert logs[0]["success"] == 0

    def test_dry_run_returns_prompt(self, conn):
        result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True)

        assert result["dry_run"] is True
        assert result["prompt"] is not None
        assert "VDOL-001" in result["prompt"]
        assert result["output"] is None

    @patch("agents.runner.subprocess.run")
    def test_agent_logs_to_db(self, mock_run, conn):
        mock_run.return_value = _mock_claude_success({"result": "ok"})

        run_agent(conn, "tester", "VDOL-001", "vdol")

        logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall()
        assert len(logs) == 1
        assert logs[0]["project_id"] == "vdol"

    @patch("agents.runner.subprocess.run")
    def test_previous_output_passed(self, mock_run, conn):
        mock_run.return_value = _mock_claude_success({"result": "tests pass"})

        run_agent(conn, "tester", "VDOL-001", "vdol",
                  previous_output="Found bug in line 42")

        call_args = mock_run.call_args
        prompt = call_args[0][0][2]  # -p argument
        assert "line 42" in prompt


# ---------------------------------------------------------------------------
# run_pipeline
# ---------------------------------------------------------------------------

class TestRunPipeline:
    @patch("agents.runner.subprocess.run")
    def test_successful_pipeline(self, mock_run, conn):
        mock_run.return_value = _mock_claude_success({"result": "done"})

        steps = [
            {"role": "debugger", "brief": "find bug"},
            {"role": "tester", "depends_on": "debugger", "brief": "verify"},
        ]
        result = run_pipeline(conn, "VDOL-001", steps)

        assert result["success"] is True
        assert result["steps_completed"] == 2
        assert len(result["results"]) == 2

        # Pipeline created in DB
        pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
        assert pipe is not None
        assert pipe["status"] == "completed"

        # Task updated to review
        task = models.get_task(conn, "VDOL-001")
        assert task["status"] == "review"

    @patch("agents.runner.subprocess.run")
    def test_pipeline_fails_on_step(self, mock_run, conn):
        # First step succeeds, second fails
        mock_run.side_effect = [
            _mock_claude_success({"result": "found bug"}),
            _mock_claude_failure("compilation error"),
        ]

        steps = [
            {"role": "debugger", "brief": "find"},
            {"role": "frontend_dev", "brief": "fix"},
            {"role": "tester", "brief": "test"},
        ]
        result = run_pipeline(conn, "VDOL-001", steps)

        assert result["success"] is False
        assert result["steps_completed"] == 1  # Only debugger completed
        assert "frontend_dev" in result["error"]

        # Pipeline marked as failed
        pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
        assert pipe["status"] == "failed"

        # Task marked as blocked
        task = models.get_task(conn, "VDOL-001")
        assert task["status"] == "blocked"

    def test_pipeline_dry_run(self, conn):
        steps = [
            {"role": "debugger", "brief": "find"},
            {"role": "tester", "brief": "verify"},
        ]
        result = run_pipeline(conn, "VDOL-001", steps, dry_run=True)

        assert result["dry_run"] is True
        assert result["success"] is True
        assert result["steps_completed"] == 2

        # No pipeline created in DB
        pipes = conn.execute("SELECT * FROM pipelines").fetchall()
        assert len(pipes) == 0

    @patch("agents.runner.subprocess.run")
    def test_pipeline_chains_output(self, mock_run, conn):
        """Output from step N is passed as previous_output to step N+1."""
        call_count = [0]

        def side_effect(*args, **kwargs):
            call_count[0] += 1
            if call_count[0] == 1:
                return _mock_claude_success({"result": "bug is in line 42"})
            return _mock_claude_success({"result": "test written"})

        mock_run.side_effect = side_effect

        steps = [
            {"role": "debugger", "brief": "find"},
            {"role": "tester", "brief": "write test"},
        ]
        run_pipeline(conn, "VDOL-001", steps)

        # Second call should include first step's output in prompt
        second_call = mock_run.call_args_list[1]
        prompt = second_call[0][0][2]  # -p argument
        assert "line 42" in prompt or "bug" in prompt

    def test_pipeline_task_not_found(self, conn):
        result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}])
        assert result["success"] is False
        assert "not found" in result["error"]


# ---------------------------------------------------------------------------
# JSON parsing
# ---------------------------------------------------------------------------

class TestTryParseJson:
    def test_direct_json(self):
        assert _try_parse_json('{"a": 1}') == {"a": 1}

    def test_json_in_code_fence(self):
        text = 'Some text\n```json\n{"a": 1}\n```\nMore text'
        assert _try_parse_json(text) == {"a": 1}

    def test_json_embedded_in_text(self):
        text = 'Here is the result: {"status": "ok", "count": 42} and more'
        result = _try_parse_json(text)
        assert result == {"status": "ok", "count": 42}

    def test_empty_string(self):
        assert _try_parse_json("") is None

    def test_no_json(self):
        assert _try_parse_json("just plain text") is None

    def test_json_array(self):
        assert _try_parse_json('[1, 2, 3]') == [1, 2, 3]
Add context builder, agent runner, and pipeline executor core/context_builder.py: build_context() — assembles role-specific context from DB. PM gets everything; debugger gets gotchas/workarounds; reviewer gets conventions only; tester gets minimal context; security gets security-category decisions. format_prompt() — injects context into role templates. agents/runner.py: run_agent() — launches claude CLI as subprocess with role prompt. run_pipeline() — executes multi-step pipelines sequentially, chains output between steps, logs to agent_logs, creates/updates pipeline records, handles failures gracefully. agents/specialists.yaml — 8 roles with tools, permissions, context rules. agents/prompts/pm.md — PM prompt for task decomposition. agents/prompts/security.md — security audit prompt (OWASP, auth, secrets). CLI: kin run <task_id> [--dry-run] PM decomposes → shows pipeline → executes with confirmation. 31 new tests (15 context_builder, 11 runner, 5 JSON parsing). 92 total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-15 14:03:32 +02:00			`"""Tests for agents/runner.py — agent execution with mocked claude CLI."""`

			`import json`
			`import pytest`
			`from unittest.mock import patch, MagicMock`
			`from core.db import init_db`
			`from core import models`
			`from agents.runner import run_agent, run_pipeline, _try_parse_json`


			`@pytest.fixture`
			`def conn():`
			`c = init_db(":memory:")`
			`models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek",`
			`tech_stack=["vue3"])`
			`models.create_task(c, "VDOL-001", "vdol", "Fix bug",`
			`brief={"route_type": "debug"})`
			`yield c`
			`c.close()`


			`def _mock_claude_success(output_data):`
			`"""Create a mock subprocess result with successful claude output."""`
			`mock = MagicMock()`
			`mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data`
			`mock.stderr = ""`
			`mock.returncode = 0`
			`return mock`


			`def _mock_claude_failure(error_msg):`
			`mock = MagicMock()`
			`mock.stdout = ""`
			`mock.stderr = error_msg`
			`mock.returncode = 1`
			`return mock`


			`# ---------------------------------------------------------------------------`
			`# run_agent`
			`# ---------------------------------------------------------------------------`

			`class TestRunAgent:`
			`@patch("agents.runner.subprocess.run")`
			`def test_successful_agent_run(self, mock_run, conn):`
			`mock_run.return_value = _mock_claude_success({`
			`"result": "Found race condition in useSearch.ts",`
			`"usage": {"total_tokens": 5000},`
			`"cost_usd": 0.015,`
			`})`

			`result = run_agent(conn, "debugger", "VDOL-001", "vdol")`

			`assert result["success"] is True`
			`assert result["role"] == "debugger"`
			`assert result["model"] == "sonnet"`
			`assert result["duration_seconds"] >= 0`

			`# Verify claude was called with right args`
			`call_args = mock_run.call_args`
			`cmd = call_args[0][0]`
			`assert "claude" in cmd[0]`
			`assert "-p" in cmd`
			`assert "--output-format" in cmd`
			`assert "json" in cmd`

			`@patch("agents.runner.subprocess.run")`
			`def test_failed_agent_run(self, mock_run, conn):`
			`mock_run.return_value = _mock_claude_failure("API error")`

			`result = run_agent(conn, "debugger", "VDOL-001", "vdol")`

			`assert result["success"] is False`

			`# Should be logged in agent_logs`
			`logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall()`
			`assert len(logs) == 1`
			`assert logs[0]["success"] == 0`

			`def test_dry_run_returns_prompt(self, conn):`
			`result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True)`

			`assert result["dry_run"] is True`
			`assert result["prompt"] is not None`
			`assert "VDOL-001" in result["prompt"]`
			`assert result["output"] is None`

			`@patch("agents.runner.subprocess.run")`
			`def test_agent_logs_to_db(self, mock_run, conn):`
			`mock_run.return_value = _mock_claude_success({"result": "ok"})`

			`run_agent(conn, "tester", "VDOL-001", "vdol")`

			`logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall()`
			`assert len(logs) == 1`
			`assert logs[0]["project_id"] == "vdol"`

			`@patch("agents.runner.subprocess.run")`
			`def test_previous_output_passed(self, mock_run, conn):`
			`mock_run.return_value = _mock_claude_success({"result": "tests pass"})`

			`run_agent(conn, "tester", "VDOL-001", "vdol",`
			`previous_output="Found bug in line 42")`

			`call_args = mock_run.call_args`
			`prompt = call_args[0][0][2] # -p argument`
			`assert "line 42" in prompt`


			`# ---------------------------------------------------------------------------`
			`# run_pipeline`
			`# ---------------------------------------------------------------------------`

			`class TestRunPipeline:`
			`@patch("agents.runner.subprocess.run")`
			`def test_successful_pipeline(self, mock_run, conn):`
			`mock_run.return_value = _mock_claude_success({"result": "done"})`

			`steps = [`
			`{"role": "debugger", "brief": "find bug"},`
			`{"role": "tester", "depends_on": "debugger", "brief": "verify"},`
			`]`
			`result = run_pipeline(conn, "VDOL-001", steps)`

			`assert result["success"] is True`
			`assert result["steps_completed"] == 2`
			`assert len(result["results"]) == 2`

			`# Pipeline created in DB`
			`pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()`
			`assert pipe is not None`
			`assert pipe["status"] == "completed"`

			`# Task updated to review`
			`task = models.get_task(conn, "VDOL-001")`
			`assert task["status"] == "review"`

			`@patch("agents.runner.subprocess.run")`
			`def test_pipeline_fails_on_step(self, mock_run, conn):`
			`# First step succeeds, second fails`
			`mock_run.side_effect = [`
			`_mock_claude_success({"result": "found bug"}),`
			`_mock_claude_failure("compilation error"),`
			`]`

			`steps = [`
			`{"role": "debugger", "brief": "find"},`
			`{"role": "frontend_dev", "brief": "fix"},`
			`{"role": "tester", "brief": "test"},`
			`]`
			`result = run_pipeline(conn, "VDOL-001", steps)`

			`assert result["success"] is False`
			`assert result["steps_completed"] == 1 # Only debugger completed`
			`assert "frontend_dev" in result["error"]`

			`# Pipeline marked as failed`
			`pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()`
			`assert pipe["status"] == "failed"`

			`# Task marked as blocked`
			`task = models.get_task(conn, "VDOL-001")`
			`assert task["status"] == "blocked"`

			`def test_pipeline_dry_run(self, conn):`
			`steps = [`
			`{"role": "debugger", "brief": "find"},`
			`{"role": "tester", "brief": "verify"},`
			`]`
			`result = run_pipeline(conn, "VDOL-001", steps, dry_run=True)`

			`assert result["dry_run"] is True`
			`assert result["success"] is True`
			`assert result["steps_completed"] == 2`

			`# No pipeline created in DB`
			`pipes = conn.execute("SELECT * FROM pipelines").fetchall()`
			`assert len(pipes) == 0`

			`@patch("agents.runner.subprocess.run")`
			`def test_pipeline_chains_output(self, mock_run, conn):`
			`"""Output from step N is passed as previous_output to step N+1."""`
			`call_count = [0]`

			`def side_effect(args, *kwargs):`
			`call_count[0] += 1`
			`if call_count[0] == 1:`
			`return _mock_claude_success({"result": "bug is in line 42"})`
			`return _mock_claude_success({"result": "test written"})`

			`mock_run.side_effect = side_effect`

			`steps = [`
			`{"role": "debugger", "brief": "find"},`
			`{"role": "tester", "brief": "write test"},`
			`]`
			`run_pipeline(conn, "VDOL-001", steps)`

			`# Second call should include first step's output in prompt`
			`second_call = mock_run.call_args_list[1]`
			`prompt = second_call[0][0][2] # -p argument`
			`assert "line 42" in prompt or "bug" in prompt`

			`def test_pipeline_task_not_found(self, conn):`
			`result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}])`
			`assert result["success"] is False`
			`assert "not found" in result["error"]`


			`# ---------------------------------------------------------------------------`
			`# JSON parsing`
			`# ---------------------------------------------------------------------------`

			`class TestTryParseJson:`
			`def test_direct_json(self):`
			`assert _try_parse_json('{"a": 1}') == {"a": 1}`

			`def test_json_in_code_fence(self):`
			text = 'Some text\n```json\n{"a": 1}\n```\nMore text'
			`assert _try_parse_json(text) == {"a": 1}`

			`def test_json_embedded_in_text(self):`
			`text = 'Here is the result: {"status": "ok", "count": 42} and more'`
			`result = _try_parse_json(text)`
			`assert result == {"status": "ok", "count": 42}`

			`def test_empty_string(self):`
			`assert _try_parse_json("") is None`

			`def test_no_json(self):`
			`assert _try_parse_json("just plain text") is None`

			`def test_json_array(self):`
			`assert _try_parse_json('[1, 2, 3]') == [1, 2, 3]`