Add context builder, agent runner, and pipeline executor

core/context_builder.py: build_context() — assembles role-specific context from DB. PM gets everything; debugger gets gotchas/workarounds; reviewer gets conventions only; tester gets minimal context; security gets security-category decisions. format_prompt() — injects context into role templates. agents/runner.py: run_agent() — launches claude CLI as subprocess with role prompt. run_pipeline() — executes multi-step pipelines sequentially, chains output between steps, logs to agent_logs, creates/updates pipeline records, handles failures gracefully. agents/specialists.yaml — 8 roles with tools, permissions, context rules. agents/prompts/pm.md — PM prompt for task decomposition. agents/prompts/security.md — security audit prompt (OWASP, auth, secrets). CLI: kin run <task_id> [--dry-run] PM decomposes → shows pipeline → executes with confirmation. 31 new tests (15 context_builder, 11 runner, 5 JSON parsing). 92 total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:03:32 +02:00 · 2026-03-15 14:03:32 +02:00 · fabae74c19
commit fabae74c19
parent 86e5b8febf
8 changed files with 1207 additions and 0 deletions
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@ -0,0 +1,234 @@
+"""Tests for agents/runner.py — agent execution with mocked claude CLI."""
+
+import json
+import pytest
+from unittest.mock import patch, MagicMock
+from core.db import init_db
+from core import models
+from agents.runner import run_agent, run_pipeline, _try_parse_json
+
+
+@pytest.fixture
+def conn():
+    c = init_db(":memory:")
+    models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek",
+                          tech_stack=["vue3"])
+    models.create_task(c, "VDOL-001", "vdol", "Fix bug",
+                       brief={"route_type": "debug"})
+    yield c
+    c.close()
+
+
+def _mock_claude_success(output_data):
+    """Create a mock subprocess result with successful claude output."""
+    mock = MagicMock()
+    mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
+    mock.stderr = ""
+    mock.returncode = 0
+    return mock
+
+
+def _mock_claude_failure(error_msg):
+    mock = MagicMock()
+    mock.stdout = ""
+    mock.stderr = error_msg
+    mock.returncode = 1
+    return mock
+
+
+# ---------------------------------------------------------------------------
+# run_agent
+# ---------------------------------------------------------------------------
+
+class TestRunAgent:
+    @patch("agents.runner.subprocess.run")
+    def test_successful_agent_run(self, mock_run, conn):
+        mock_run.return_value = _mock_claude_success({
+            "result": "Found race condition in useSearch.ts",
+            "usage": {"total_tokens": 5000},
+            "cost_usd": 0.015,
+        })
+
+        result = run_agent(conn, "debugger", "VDOL-001", "vdol")
+
+        assert result["success"] is True
+        assert result["role"] == "debugger"
+        assert result["model"] == "sonnet"
+        assert result["duration_seconds"] >= 0
+
+        # Verify claude was called with right args
+        call_args = mock_run.call_args
+        cmd = call_args[0][0]
+        assert "claude" in cmd[0]
+        assert "-p" in cmd
+        assert "--output-format" in cmd
+        assert "json" in cmd
+
+    @patch("agents.runner.subprocess.run")
+    def test_failed_agent_run(self, mock_run, conn):
+        mock_run.return_value = _mock_claude_failure("API error")
+
+        result = run_agent(conn, "debugger", "VDOL-001", "vdol")
+
+        assert result["success"] is False
+
+        # Should be logged in agent_logs
+        logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall()
+        assert len(logs) == 1
+        assert logs[0]["success"] == 0
+
+    def test_dry_run_returns_prompt(self, conn):
+        result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True)
+
+        assert result["dry_run"] is True
+        assert result["prompt"] is not None
+        assert "VDOL-001" in result["prompt"]
+        assert result["output"] is None
+
+    @patch("agents.runner.subprocess.run")
+    def test_agent_logs_to_db(self, mock_run, conn):
+        mock_run.return_value = _mock_claude_success({"result": "ok"})
+
+        run_agent(conn, "tester", "VDOL-001", "vdol")
+
+        logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall()
+        assert len(logs) == 1
+        assert logs[0]["project_id"] == "vdol"
+
+    @patch("agents.runner.subprocess.run")
+    def test_previous_output_passed(self, mock_run, conn):
+        mock_run.return_value = _mock_claude_success({"result": "tests pass"})
+
+        run_agent(conn, "tester", "VDOL-001", "vdol",
+                  previous_output="Found bug in line 42")
+
+        call_args = mock_run.call_args
+        prompt = call_args[0][0][2]  # -p argument
+        assert "line 42" in prompt
+
+
+# ---------------------------------------------------------------------------
+# run_pipeline
+# ---------------------------------------------------------------------------
+
+class TestRunPipeline:
+    @patch("agents.runner.subprocess.run")
+    def test_successful_pipeline(self, mock_run, conn):
+        mock_run.return_value = _mock_claude_success({"result": "done"})
+
+        steps = [
+            {"role": "debugger", "brief": "find bug"},
+            {"role": "tester", "depends_on": "debugger", "brief": "verify"},
+        ]
+        result = run_pipeline(conn, "VDOL-001", steps)
+
+        assert result["success"] is True
+        assert result["steps_completed"] == 2
+        assert len(result["results"]) == 2
+
+        # Pipeline created in DB
+        pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
+        assert pipe is not None
+        assert pipe["status"] == "completed"
+
+        # Task updated to review
+        task = models.get_task(conn, "VDOL-001")
+        assert task["status"] == "review"
+
+    @patch("agents.runner.subprocess.run")
+    def test_pipeline_fails_on_step(self, mock_run, conn):
+        # First step succeeds, second fails
+        mock_run.side_effect = [
+            _mock_claude_success({"result": "found bug"}),
+            _mock_claude_failure("compilation error"),
+        ]
+
+        steps = [
+            {"role": "debugger", "brief": "find"},
+            {"role": "frontend_dev", "brief": "fix"},
+            {"role": "tester", "brief": "test"},
+        ]
+        result = run_pipeline(conn, "VDOL-001", steps)
+
+        assert result["success"] is False
+        assert result["steps_completed"] == 1  # Only debugger completed
+        assert "frontend_dev" in result["error"]
+
+        # Pipeline marked as failed
+        pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
+        assert pipe["status"] == "failed"
+
+        # Task marked as blocked
+        task = models.get_task(conn, "VDOL-001")
+        assert task["status"] == "blocked"
+
+    def test_pipeline_dry_run(self, conn):
+        steps = [
+            {"role": "debugger", "brief": "find"},
+            {"role": "tester", "brief": "verify"},
+        ]
+        result = run_pipeline(conn, "VDOL-001", steps, dry_run=True)
+
+        assert result["dry_run"] is True
+        assert result["success"] is True
+        assert result["steps_completed"] == 2
+
+        # No pipeline created in DB
+        pipes = conn.execute("SELECT * FROM pipelines").fetchall()
+        assert len(pipes) == 0
+
+    @patch("agents.runner.subprocess.run")
+    def test_pipeline_chains_output(self, mock_run, conn):
+        """Output from step N is passed as previous_output to step N+1."""
+        call_count = [0]
+
+        def side_effect(*args, **kwargs):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                return _mock_claude_success({"result": "bug is in line 42"})
+            return _mock_claude_success({"result": "test written"})
+
+        mock_run.side_effect = side_effect
+
+        steps = [
+            {"role": "debugger", "brief": "find"},
+            {"role": "tester", "brief": "write test"},
+        ]
+        run_pipeline(conn, "VDOL-001", steps)
+
+        # Second call should include first step's output in prompt
+        second_call = mock_run.call_args_list[1]
+        prompt = second_call[0][0][2]  # -p argument
+        assert "line 42" in prompt or "bug" in prompt
+
+    def test_pipeline_task_not_found(self, conn):
+        result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}])
+        assert result["success"] is False
+        assert "not found" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# JSON parsing
+# ---------------------------------------------------------------------------
+
+class TestTryParseJson:
+    def test_direct_json(self):
+        assert _try_parse_json('{"a": 1}') == {"a": 1}
+
+    def test_json_in_code_fence(self):
+        text = 'Some text\n```json\n{"a": 1}\n```\nMore text'
+        assert _try_parse_json(text) == {"a": 1}
+
+    def test_json_embedded_in_text(self):
+        text = 'Here is the result: {"status": "ok", "count": 42} and more'
+        result = _try_parse_json(text)
+        assert result == {"status": "ok", "count": 42}
+
+    def test_empty_string(self):
+        assert _try_parse_json("") is None
+
+    def test_no_json(self):
+        assert _try_parse_json("just plain text") is None
+
+    def test_json_array(self):
+        assert _try_parse_json('[1, 2, 3]') == [1, 2, 3]