Add context builder, agent runner, and pipeline executor
core/context_builder.py: build_context() — assembles role-specific context from DB. PM gets everything; debugger gets gotchas/workarounds; reviewer gets conventions only; tester gets minimal context; security gets security-category decisions. format_prompt() — injects context into role templates. agents/runner.py: run_agent() — launches claude CLI as subprocess with role prompt. run_pipeline() — executes multi-step pipelines sequentially, chains output between steps, logs to agent_logs, creates/updates pipeline records, handles failures gracefully. agents/specialists.yaml — 8 roles with tools, permissions, context rules. agents/prompts/pm.md — PM prompt for task decomposition. agents/prompts/security.md — security audit prompt (OWASP, auth, secrets). CLI: kin run <task_id> [--dry-run] PM decomposes → shows pipeline → executes with confirmation. 31 new tests (15 context_builder, 11 runner, 5 JSON parsing). 92 total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
86e5b8febf
commit
fabae74c19
8 changed files with 1207 additions and 0 deletions
133
tests/test_context_builder.py
Normal file
133
tests/test_context_builder.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
"""Tests for core/context_builder.py — context assembly per role."""
|
||||
|
||||
import pytest
|
||||
from core.db import init_db
|
||||
from core import models
|
||||
from core.context_builder import build_context, format_prompt
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn():
|
||||
c = init_db(":memory:")
|
||||
# Seed project, modules, decisions, tasks
|
||||
models.create_project(c, "vdol", "ВДОЛЬ и ПОПЕРЕК", "~/projects/vdolipoperek",
|
||||
tech_stack=["vue3", "typescript", "nodejs"])
|
||||
models.add_module(c, "vdol", "search", "frontend", "src/search/")
|
||||
models.add_module(c, "vdol", "api", "backend", "src/api/")
|
||||
models.add_decision(c, "vdol", "gotcha", "Safari bug",
|
||||
"position:fixed breaks", category="ui", tags=["ios"])
|
||||
models.add_decision(c, "vdol", "workaround", "API rate limit",
|
||||
"10 req/s max", category="api")
|
||||
models.add_decision(c, "vdol", "convention", "Use WAL mode",
|
||||
"Always use WAL for SQLite", category="architecture")
|
||||
models.add_decision(c, "vdol", "decision", "Auth required",
|
||||
"All endpoints need auth", category="security")
|
||||
models.create_task(c, "VDOL-001", "vdol", "Fix search filters",
|
||||
brief={"module": "search", "route_type": "debug"})
|
||||
models.create_task(c, "VDOL-002", "vdol", "Add payments",
|
||||
status="in_progress")
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
class TestBuildContext:
|
||||
def test_pm_gets_everything(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "pm", "vdol")
|
||||
assert ctx["task"]["id"] == "VDOL-001"
|
||||
assert ctx["project"]["id"] == "vdol"
|
||||
assert len(ctx["modules"]) == 2
|
||||
assert len(ctx["decisions"]) == 4 # all decisions
|
||||
assert len(ctx["active_tasks"]) == 1 # VDOL-002 in_progress
|
||||
assert "pm" in ctx["available_specialists"]
|
||||
|
||||
def test_architect_gets_all_decisions_and_modules(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "architect", "vdol")
|
||||
assert len(ctx["modules"]) == 2
|
||||
assert len(ctx["decisions"]) == 4
|
||||
|
||||
def test_debugger_gets_only_gotcha_workaround(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "debugger", "vdol")
|
||||
types = {d["type"] for d in ctx["decisions"]}
|
||||
assert types <= {"gotcha", "workaround"}
|
||||
assert "convention" not in types
|
||||
assert "decision" not in types
|
||||
assert ctx["module_hint"] == "search"
|
||||
|
||||
def test_frontend_dev_gets_gotcha_workaround_convention(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "frontend_dev", "vdol")
|
||||
types = {d["type"] for d in ctx["decisions"]}
|
||||
assert "gotcha" in types
|
||||
assert "workaround" in types
|
||||
assert "convention" in types
|
||||
assert "decision" not in types # plain decisions excluded
|
||||
|
||||
def test_backend_dev_same_as_frontend(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "backend_dev", "vdol")
|
||||
types = {d["type"] for d in ctx["decisions"]}
|
||||
assert types == {"gotcha", "workaround", "convention"}
|
||||
|
||||
def test_reviewer_gets_only_conventions(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "reviewer", "vdol")
|
||||
types = {d["type"] for d in ctx["decisions"]}
|
||||
assert types == {"convention"}
|
||||
|
||||
def test_tester_gets_minimal_context(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "tester", "vdol")
|
||||
assert ctx["task"] is not None
|
||||
assert ctx["project"] is not None
|
||||
assert "decisions" not in ctx
|
||||
assert "modules" not in ctx
|
||||
|
||||
def test_security_gets_security_decisions(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "security", "vdol")
|
||||
categories = {d.get("category") for d in ctx["decisions"]}
|
||||
assert categories == {"security"}
|
||||
|
||||
def test_unknown_role_gets_fallback(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "unknown_role", "vdol")
|
||||
assert "decisions" in ctx
|
||||
assert len(ctx["decisions"]) > 0
|
||||
|
||||
|
||||
class TestFormatPrompt:
|
||||
def test_format_with_template(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "debugger", "vdol")
|
||||
prompt = format_prompt(ctx, "debugger", "You are a debugger. Find bugs.")
|
||||
assert "You are a debugger" in prompt
|
||||
assert "VDOL-001" in prompt
|
||||
assert "Fix search filters" in prompt
|
||||
assert "vdol" in prompt
|
||||
assert "vue3" in prompt
|
||||
|
||||
def test_format_includes_decisions(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "debugger", "vdol")
|
||||
prompt = format_prompt(ctx, "debugger", "Debug this.")
|
||||
assert "Safari bug" in prompt
|
||||
assert "API rate limit" in prompt
|
||||
# Convention should NOT be here (debugger doesn't get it)
|
||||
assert "WAL mode" not in prompt
|
||||
|
||||
def test_format_pm_includes_specialists(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "pm", "vdol")
|
||||
prompt = format_prompt(ctx, "pm", "You are PM.")
|
||||
assert "Available specialists" in prompt
|
||||
assert "debugger" in prompt
|
||||
assert "Active tasks" in prompt
|
||||
assert "VDOL-002" in prompt
|
||||
|
||||
def test_format_with_previous_output(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "tester", "vdol")
|
||||
ctx["previous_output"] = "Found race condition in useSearch.ts"
|
||||
prompt = format_prompt(ctx, "tester", "Write tests.")
|
||||
assert "Previous step output" in prompt
|
||||
assert "race condition" in prompt
|
||||
|
||||
def test_format_loads_prompt_file(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "pm", "vdol")
|
||||
prompt = format_prompt(ctx, "pm") # Should load from agents/prompts/pm.md
|
||||
assert "decompose" in prompt.lower() or "pipeline" in prompt.lower()
|
||||
|
||||
def test_format_missing_prompt_file(self, conn):
|
||||
ctx = build_context(conn, "VDOL-001", "analyst", "vdol")
|
||||
prompt = format_prompt(ctx, "analyst") # No analyst.md exists
|
||||
assert "analyst" in prompt.lower()
|
||||
234
tests/test_runner.py
Normal file
234
tests/test_runner.py
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
"""Tests for agents/runner.py — agent execution with mocked claude CLI."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from core.db import init_db
|
||||
from core import models
|
||||
from agents.runner import run_agent, run_pipeline, _try_parse_json
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn():
|
||||
c = init_db(":memory:")
|
||||
models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek",
|
||||
tech_stack=["vue3"])
|
||||
models.create_task(c, "VDOL-001", "vdol", "Fix bug",
|
||||
brief={"route_type": "debug"})
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
def _mock_claude_success(output_data):
|
||||
"""Create a mock subprocess result with successful claude output."""
|
||||
mock = MagicMock()
|
||||
mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
|
||||
mock.stderr = ""
|
||||
mock.returncode = 0
|
||||
return mock
|
||||
|
||||
|
||||
def _mock_claude_failure(error_msg):
|
||||
mock = MagicMock()
|
||||
mock.stdout = ""
|
||||
mock.stderr = error_msg
|
||||
mock.returncode = 1
|
||||
return mock
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRunAgent:
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_successful_agent_run(self, mock_run, conn):
|
||||
mock_run.return_value = _mock_claude_success({
|
||||
"result": "Found race condition in useSearch.ts",
|
||||
"usage": {"total_tokens": 5000},
|
||||
"cost_usd": 0.015,
|
||||
})
|
||||
|
||||
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["role"] == "debugger"
|
||||
assert result["model"] == "sonnet"
|
||||
assert result["duration_seconds"] >= 0
|
||||
|
||||
# Verify claude was called with right args
|
||||
call_args = mock_run.call_args
|
||||
cmd = call_args[0][0]
|
||||
assert "claude" in cmd[0]
|
||||
assert "-p" in cmd
|
||||
assert "--output-format" in cmd
|
||||
assert "json" in cmd
|
||||
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_failed_agent_run(self, mock_run, conn):
|
||||
mock_run.return_value = _mock_claude_failure("API error")
|
||||
|
||||
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
|
||||
|
||||
assert result["success"] is False
|
||||
|
||||
# Should be logged in agent_logs
|
||||
logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall()
|
||||
assert len(logs) == 1
|
||||
assert logs[0]["success"] == 0
|
||||
|
||||
def test_dry_run_returns_prompt(self, conn):
|
||||
result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True)
|
||||
|
||||
assert result["dry_run"] is True
|
||||
assert result["prompt"] is not None
|
||||
assert "VDOL-001" in result["prompt"]
|
||||
assert result["output"] is None
|
||||
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_agent_logs_to_db(self, mock_run, conn):
|
||||
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
||||
|
||||
run_agent(conn, "tester", "VDOL-001", "vdol")
|
||||
|
||||
logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall()
|
||||
assert len(logs) == 1
|
||||
assert logs[0]["project_id"] == "vdol"
|
||||
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_previous_output_passed(self, mock_run, conn):
|
||||
mock_run.return_value = _mock_claude_success({"result": "tests pass"})
|
||||
|
||||
run_agent(conn, "tester", "VDOL-001", "vdol",
|
||||
previous_output="Found bug in line 42")
|
||||
|
||||
call_args = mock_run.call_args
|
||||
prompt = call_args[0][0][2] # -p argument
|
||||
assert "line 42" in prompt
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRunPipeline:
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_successful_pipeline(self, mock_run, conn):
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
|
||||
steps = [
|
||||
{"role": "debugger", "brief": "find bug"},
|
||||
{"role": "tester", "depends_on": "debugger", "brief": "verify"},
|
||||
]
|
||||
result = run_pipeline(conn, "VDOL-001", steps)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["steps_completed"] == 2
|
||||
assert len(result["results"]) == 2
|
||||
|
||||
# Pipeline created in DB
|
||||
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
|
||||
assert pipe is not None
|
||||
assert pipe["status"] == "completed"
|
||||
|
||||
# Task updated to review
|
||||
task = models.get_task(conn, "VDOL-001")
|
||||
assert task["status"] == "review"
|
||||
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pipeline_fails_on_step(self, mock_run, conn):
|
||||
# First step succeeds, second fails
|
||||
mock_run.side_effect = [
|
||||
_mock_claude_success({"result": "found bug"}),
|
||||
_mock_claude_failure("compilation error"),
|
||||
]
|
||||
|
||||
steps = [
|
||||
{"role": "debugger", "brief": "find"},
|
||||
{"role": "frontend_dev", "brief": "fix"},
|
||||
{"role": "tester", "brief": "test"},
|
||||
]
|
||||
result = run_pipeline(conn, "VDOL-001", steps)
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["steps_completed"] == 1 # Only debugger completed
|
||||
assert "frontend_dev" in result["error"]
|
||||
|
||||
# Pipeline marked as failed
|
||||
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
|
||||
assert pipe["status"] == "failed"
|
||||
|
||||
# Task marked as blocked
|
||||
task = models.get_task(conn, "VDOL-001")
|
||||
assert task["status"] == "blocked"
|
||||
|
||||
def test_pipeline_dry_run(self, conn):
|
||||
steps = [
|
||||
{"role": "debugger", "brief": "find"},
|
||||
{"role": "tester", "brief": "verify"},
|
||||
]
|
||||
result = run_pipeline(conn, "VDOL-001", steps, dry_run=True)
|
||||
|
||||
assert result["dry_run"] is True
|
||||
assert result["success"] is True
|
||||
assert result["steps_completed"] == 2
|
||||
|
||||
# No pipeline created in DB
|
||||
pipes = conn.execute("SELECT * FROM pipelines").fetchall()
|
||||
assert len(pipes) == 0
|
||||
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pipeline_chains_output(self, mock_run, conn):
|
||||
"""Output from step N is passed as previous_output to step N+1."""
|
||||
call_count = [0]
|
||||
|
||||
def side_effect(*args, **kwargs):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return _mock_claude_success({"result": "bug is in line 42"})
|
||||
return _mock_claude_success({"result": "test written"})
|
||||
|
||||
mock_run.side_effect = side_effect
|
||||
|
||||
steps = [
|
||||
{"role": "debugger", "brief": "find"},
|
||||
{"role": "tester", "brief": "write test"},
|
||||
]
|
||||
run_pipeline(conn, "VDOL-001", steps)
|
||||
|
||||
# Second call should include first step's output in prompt
|
||||
second_call = mock_run.call_args_list[1]
|
||||
prompt = second_call[0][0][2] # -p argument
|
||||
assert "line 42" in prompt or "bug" in prompt
|
||||
|
||||
def test_pipeline_task_not_found(self, conn):
|
||||
result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}])
|
||||
assert result["success"] is False
|
||||
assert "not found" in result["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# JSON parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTryParseJson:
|
||||
def test_direct_json(self):
|
||||
assert _try_parse_json('{"a": 1}') == {"a": 1}
|
||||
|
||||
def test_json_in_code_fence(self):
|
||||
text = 'Some text\n```json\n{"a": 1}\n```\nMore text'
|
||||
assert _try_parse_json(text) == {"a": 1}
|
||||
|
||||
def test_json_embedded_in_text(self):
|
||||
text = 'Here is the result: {"status": "ok", "count": 42} and more'
|
||||
result = _try_parse_json(text)
|
||||
assert result == {"status": "ok", "count": 42}
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _try_parse_json("") is None
|
||||
|
||||
def test_no_json(self):
|
||||
assert _try_parse_json("just plain text") is None
|
||||
|
||||
def test_json_array(self):
|
||||
assert _try_parse_json('[1, 2, 3]') == [1, 2, 3]
|
||||
Loading…
Add table
Add a link
Reference in a new issue