Add context builder, agent runner, and pipeline executor
core/context_builder.py:
build_context() — assembles role-specific context from DB.
PM gets everything; debugger gets gotchas/workarounds; reviewer
gets conventions only; tester gets minimal context; security
gets security-category decisions.
format_prompt() — injects context into role templates.
agents/runner.py:
run_agent() — launches claude CLI as subprocess with role prompt.
run_pipeline() — executes multi-step pipelines sequentially,
chains output between steps, logs to agent_logs, creates/updates
pipeline records, handles failures gracefully.
agents/specialists.yaml — 8 roles with tools, permissions, context rules.
agents/prompts/pm.md — PM prompt for task decomposition.
agents/prompts/security.md — security audit prompt (OWASP, auth, secrets).
CLI: kin run <task_id> [--dry-run]
PM decomposes → shows pipeline → executes with confirmation.
31 new tests (15 context_builder, 11 runner, 5 JSON parsing).
92 total, all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:03:32 +02:00
|
|
|
|
"""Tests for agents/runner.py — agent execution with mocked claude CLI."""
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
2026-03-15 17:35:08 +02:00
|
|
|
|
import subprocess
|
Add context builder, agent runner, and pipeline executor
core/context_builder.py:
build_context() — assembles role-specific context from DB.
PM gets everything; debugger gets gotchas/workarounds; reviewer
gets conventions only; tester gets minimal context; security
gets security-category decisions.
format_prompt() — injects context into role templates.
agents/runner.py:
run_agent() — launches claude CLI as subprocess with role prompt.
run_pipeline() — executes multi-step pipelines sequentially,
chains output between steps, logs to agent_logs, creates/updates
pipeline records, handles failures gracefully.
agents/specialists.yaml — 8 roles with tools, permissions, context rules.
agents/prompts/pm.md — PM prompt for task decomposition.
agents/prompts/security.md — security audit prompt (OWASP, auth, secrets).
CLI: kin run <task_id> [--dry-run]
PM decomposes → shows pipeline → executes with confirmation.
31 new tests (15 context_builder, 11 runner, 5 JSON parsing).
92 total, all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:03:32 +02:00
|
|
|
|
import pytest
|
|
|
|
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
|
|
from core.db import init_db
|
|
|
|
|
|
from core import models
|
2026-03-15 17:44:16 +02:00
|
|
|
|
from agents.runner import run_agent, run_pipeline, run_audit, _try_parse_json
|
Add context builder, agent runner, and pipeline executor
core/context_builder.py:
build_context() — assembles role-specific context from DB.
PM gets everything; debugger gets gotchas/workarounds; reviewer
gets conventions only; tester gets minimal context; security
gets security-category decisions.
format_prompt() — injects context into role templates.
agents/runner.py:
run_agent() — launches claude CLI as subprocess with role prompt.
run_pipeline() — executes multi-step pipelines sequentially,
chains output between steps, logs to agent_logs, creates/updates
pipeline records, handles failures gracefully.
agents/specialists.yaml — 8 roles with tools, permissions, context rules.
agents/prompts/pm.md — PM prompt for task decomposition.
agents/prompts/security.md — security audit prompt (OWASP, auth, secrets).
CLI: kin run <task_id> [--dry-run]
PM decomposes → shows pipeline → executes with confirmation.
31 new tests (15 context_builder, 11 runner, 5 JSON parsing).
92 total, all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:03:32 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
|
def conn():
|
|
|
|
|
|
c = init_db(":memory:")
|
|
|
|
|
|
models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek",
|
|
|
|
|
|
tech_stack=["vue3"])
|
|
|
|
|
|
models.create_task(c, "VDOL-001", "vdol", "Fix bug",
|
|
|
|
|
|
brief={"route_type": "debug"})
|
|
|
|
|
|
yield c
|
|
|
|
|
|
c.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _mock_claude_success(output_data):
|
|
|
|
|
|
"""Create a mock subprocess result with successful claude output."""
|
|
|
|
|
|
mock = MagicMock()
|
|
|
|
|
|
mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
|
|
|
|
|
|
mock.stderr = ""
|
|
|
|
|
|
mock.returncode = 0
|
|
|
|
|
|
return mock
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _mock_claude_failure(error_msg):
|
|
|
|
|
|
mock = MagicMock()
|
|
|
|
|
|
mock.stdout = ""
|
|
|
|
|
|
mock.stderr = error_msg
|
|
|
|
|
|
mock.returncode = 1
|
|
|
|
|
|
return mock
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# run_agent
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
class TestRunAgent:
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_successful_agent_run(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": "Found race condition in useSearch.ts",
|
|
|
|
|
|
"usage": {"total_tokens": 5000},
|
|
|
|
|
|
"cost_usd": 0.015,
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
assert result["role"] == "debugger"
|
|
|
|
|
|
assert result["model"] == "sonnet"
|
|
|
|
|
|
assert result["duration_seconds"] >= 0
|
|
|
|
|
|
|
|
|
|
|
|
# Verify claude was called with right args
|
|
|
|
|
|
call_args = mock_run.call_args
|
|
|
|
|
|
cmd = call_args[0][0]
|
|
|
|
|
|
assert "claude" in cmd[0]
|
|
|
|
|
|
assert "-p" in cmd
|
|
|
|
|
|
assert "--output-format" in cmd
|
|
|
|
|
|
assert "json" in cmd
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_failed_agent_run(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_failure("API error")
|
|
|
|
|
|
|
|
|
|
|
|
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is False
|
|
|
|
|
|
|
|
|
|
|
|
# Should be logged in agent_logs
|
|
|
|
|
|
logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall()
|
|
|
|
|
|
assert len(logs) == 1
|
|
|
|
|
|
assert logs[0]["success"] == 0
|
|
|
|
|
|
|
|
|
|
|
|
def test_dry_run_returns_prompt(self, conn):
|
|
|
|
|
|
result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["dry_run"] is True
|
|
|
|
|
|
assert result["prompt"] is not None
|
|
|
|
|
|
assert "VDOL-001" in result["prompt"]
|
|
|
|
|
|
assert result["output"] is None
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_agent_logs_to_db(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
|
|
|
|
|
|
run_agent(conn, "tester", "VDOL-001", "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall()
|
|
|
|
|
|
assert len(logs) == 1
|
|
|
|
|
|
assert logs[0]["project_id"] == "vdol"
|
|
|
|
|
|
|
2026-03-15 14:39:33 +02:00
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_full_output_saved_to_db(self, mock_run, conn):
|
|
|
|
|
|
"""Bug fix: output_summary must contain the FULL output, not truncated."""
|
|
|
|
|
|
long_json = json.dumps({
|
|
|
|
|
|
"result": json.dumps({
|
|
|
|
|
|
"summary": "Security audit complete",
|
|
|
|
|
|
"findings": [{"title": f"Finding {i}", "severity": "HIGH"} for i in range(50)],
|
|
|
|
|
|
}),
|
|
|
|
|
|
})
|
|
|
|
|
|
mock = MagicMock()
|
|
|
|
|
|
mock.stdout = long_json
|
|
|
|
|
|
mock.stderr = ""
|
|
|
|
|
|
mock.returncode = 0
|
|
|
|
|
|
mock_run.return_value = mock
|
|
|
|
|
|
|
|
|
|
|
|
run_agent(conn, "security", "VDOL-001", "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='security'").fetchall()
|
|
|
|
|
|
assert len(logs) == 1
|
|
|
|
|
|
output = logs[0]["output_summary"]
|
|
|
|
|
|
assert output is not None
|
|
|
|
|
|
assert len(output) > 1000 # Must not be truncated
|
|
|
|
|
|
# Should contain all 50 findings
|
|
|
|
|
|
assert "Finding 49" in output
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_dict_output_saved_as_json_string(self, mock_run, conn):
|
|
|
|
|
|
"""When claude returns structured JSON, it must be saved as string."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": {"status": "ok", "files": ["a.py", "b.py"]},
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
# output should be a string (JSON serialized), not a dict
|
|
|
|
|
|
assert isinstance(result["raw_output"], str)
|
|
|
|
|
|
|
|
|
|
|
|
logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='debugger'").fetchall()
|
|
|
|
|
|
saved = logs[0]["output_summary"]
|
|
|
|
|
|
assert isinstance(saved, str)
|
|
|
|
|
|
assert "a.py" in saved
|
|
|
|
|
|
|
Add context builder, agent runner, and pipeline executor
core/context_builder.py:
build_context() — assembles role-specific context from DB.
PM gets everything; debugger gets gotchas/workarounds; reviewer
gets conventions only; tester gets minimal context; security
gets security-category decisions.
format_prompt() — injects context into role templates.
agents/runner.py:
run_agent() — launches claude CLI as subprocess with role prompt.
run_pipeline() — executes multi-step pipelines sequentially,
chains output between steps, logs to agent_logs, creates/updates
pipeline records, handles failures gracefully.
agents/specialists.yaml — 8 roles with tools, permissions, context rules.
agents/prompts/pm.md — PM prompt for task decomposition.
agents/prompts/security.md — security audit prompt (OWASP, auth, secrets).
CLI: kin run <task_id> [--dry-run]
PM decomposes → shows pipeline → executes with confirmation.
31 new tests (15 context_builder, 11 runner, 5 JSON parsing).
92 total, all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:03:32 +02:00
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_previous_output_passed(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "tests pass"})
|
|
|
|
|
|
|
|
|
|
|
|
run_agent(conn, "tester", "VDOL-001", "vdol",
|
|
|
|
|
|
previous_output="Found bug in line 42")
|
|
|
|
|
|
|
|
|
|
|
|
call_args = mock_run.call_args
|
|
|
|
|
|
prompt = call_args[0][0][2] # -p argument
|
|
|
|
|
|
assert "line 42" in prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# run_pipeline
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
class TestRunPipeline:
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_successful_pipeline(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
|
|
|
|
|
|
|
|
|
|
|
steps = [
|
|
|
|
|
|
{"role": "debugger", "brief": "find bug"},
|
|
|
|
|
|
{"role": "tester", "depends_on": "debugger", "brief": "verify"},
|
|
|
|
|
|
]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
assert result["steps_completed"] == 2
|
|
|
|
|
|
assert len(result["results"]) == 2
|
|
|
|
|
|
|
|
|
|
|
|
# Pipeline created in DB
|
|
|
|
|
|
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
|
|
|
|
|
|
assert pipe is not None
|
|
|
|
|
|
assert pipe["status"] == "completed"
|
|
|
|
|
|
|
|
|
|
|
|
# Task updated to review
|
|
|
|
|
|
task = models.get_task(conn, "VDOL-001")
|
|
|
|
|
|
assert task["status"] == "review"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_pipeline_fails_on_step(self, mock_run, conn):
|
|
|
|
|
|
# First step succeeds, second fails
|
|
|
|
|
|
mock_run.side_effect = [
|
|
|
|
|
|
_mock_claude_success({"result": "found bug"}),
|
|
|
|
|
|
_mock_claude_failure("compilation error"),
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
steps = [
|
|
|
|
|
|
{"role": "debugger", "brief": "find"},
|
|
|
|
|
|
{"role": "frontend_dev", "brief": "fix"},
|
|
|
|
|
|
{"role": "tester", "brief": "test"},
|
|
|
|
|
|
]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is False
|
|
|
|
|
|
assert result["steps_completed"] == 1 # Only debugger completed
|
|
|
|
|
|
assert "frontend_dev" in result["error"]
|
|
|
|
|
|
|
|
|
|
|
|
# Pipeline marked as failed
|
|
|
|
|
|
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
|
|
|
|
|
|
assert pipe["status"] == "failed"
|
|
|
|
|
|
|
|
|
|
|
|
# Task marked as blocked
|
|
|
|
|
|
task = models.get_task(conn, "VDOL-001")
|
|
|
|
|
|
assert task["status"] == "blocked"
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_dry_run(self, conn):
|
|
|
|
|
|
steps = [
|
|
|
|
|
|
{"role": "debugger", "brief": "find"},
|
|
|
|
|
|
{"role": "tester", "brief": "verify"},
|
|
|
|
|
|
]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps, dry_run=True)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["dry_run"] is True
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
assert result["steps_completed"] == 2
|
|
|
|
|
|
|
|
|
|
|
|
# No pipeline created in DB
|
|
|
|
|
|
pipes = conn.execute("SELECT * FROM pipelines").fetchall()
|
|
|
|
|
|
assert len(pipes) == 0
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_pipeline_chains_output(self, mock_run, conn):
|
|
|
|
|
|
"""Output from step N is passed as previous_output to step N+1."""
|
|
|
|
|
|
call_count = [0]
|
|
|
|
|
|
|
|
|
|
|
|
def side_effect(*args, **kwargs):
|
|
|
|
|
|
call_count[0] += 1
|
|
|
|
|
|
if call_count[0] == 1:
|
|
|
|
|
|
return _mock_claude_success({"result": "bug is in line 42"})
|
|
|
|
|
|
return _mock_claude_success({"result": "test written"})
|
|
|
|
|
|
|
|
|
|
|
|
mock_run.side_effect = side_effect
|
|
|
|
|
|
|
|
|
|
|
|
steps = [
|
|
|
|
|
|
{"role": "debugger", "brief": "find"},
|
|
|
|
|
|
{"role": "tester", "brief": "write test"},
|
|
|
|
|
|
]
|
|
|
|
|
|
run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
# Second call should include first step's output in prompt
|
|
|
|
|
|
second_call = mock_run.call_args_list[1]
|
|
|
|
|
|
prompt = second_call[0][0][2] # -p argument
|
|
|
|
|
|
assert "line 42" in prompt or "bug" in prompt
|
|
|
|
|
|
|
|
|
|
|
|
def test_pipeline_task_not_found(self, conn):
|
|
|
|
|
|
result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}])
|
|
|
|
|
|
assert result["success"] is False
|
|
|
|
|
|
assert "not found" in result["error"]
|
|
|
|
|
|
|
2026-03-15 19:17:42 +02:00
|
|
|
|
@patch("agents.runner.run_hooks")
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_hooks_called_after_successful_pipeline(self, mock_run, mock_hooks, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
|
|
|
|
|
mock_hooks.return_value = []
|
|
|
|
|
|
|
|
|
|
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
mock_hooks.assert_called_once()
|
|
|
|
|
|
call_kwargs = mock_hooks.call_args
|
|
|
|
|
|
assert call_kwargs[1].get("event") == "pipeline_completed" or \
|
|
|
|
|
|
call_kwargs[0][3] == "pipeline_completed"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.run_hooks")
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_hooks_not_called_on_failed_pipeline(self, mock_run, mock_hooks, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_failure("compilation error")
|
|
|
|
|
|
mock_hooks.return_value = []
|
|
|
|
|
|
|
|
|
|
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is False
|
|
|
|
|
|
mock_hooks.assert_not_called()
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.run_hooks")
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_hook_failure_does_not_affect_pipeline_result(self, mock_run, mock_hooks, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
|
|
|
|
|
mock_hooks.side_effect = Exception("hook exploded")
|
|
|
|
|
|
|
|
|
|
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|
|
|
|
|
# Must not raise — hook failures must not propagate
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
|
Add context builder, agent runner, and pipeline executor
core/context_builder.py:
build_context() — assembles role-specific context from DB.
PM gets everything; debugger gets gotchas/workarounds; reviewer
gets conventions only; tester gets minimal context; security
gets security-category decisions.
format_prompt() — injects context into role templates.
agents/runner.py:
run_agent() — launches claude CLI as subprocess with role prompt.
run_pipeline() — executes multi-step pipelines sequentially,
chains output between steps, logs to agent_logs, creates/updates
pipeline records, handles failures gracefully.
agents/specialists.yaml — 8 roles with tools, permissions, context rules.
agents/prompts/pm.md — PM prompt for task decomposition.
agents/prompts/security.md — security audit prompt (OWASP, auth, secrets).
CLI: kin run <task_id> [--dry-run]
PM decomposes → shows pipeline → executes with confirmation.
31 new tests (15 context_builder, 11 runner, 5 JSON parsing).
92 total, all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:03:32 +02:00
|
|
|
|
|
2026-03-15 19:49:34 +02:00
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Auto mode
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
class TestAutoMode:
|
|
|
|
|
|
@patch("core.followup.generate_followups")
|
|
|
|
|
|
@patch("agents.runner.run_hooks")
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_auto_mode_generates_followups(self, mock_run, mock_hooks, mock_followup, conn):
|
|
|
|
|
|
"""Auto mode должен вызывать generate_followups после task_auto_approved."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
|
|
|
|
|
mock_hooks.return_value = []
|
|
|
|
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|
|
|
|
|
|
|
|
|
|
|
models.update_project(conn, "vdol", execution_mode="auto")
|
|
|
|
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
mock_followup.assert_called_once_with(conn, "VDOL-001")
|
|
|
|
|
|
task = models.get_task(conn, "VDOL-001")
|
|
|
|
|
|
assert task["status"] == "done"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("core.followup.generate_followups")
|
|
|
|
|
|
@patch("agents.runner.run_hooks")
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_review_mode_skips_followups(self, mock_run, mock_hooks, mock_followup, conn):
|
|
|
|
|
|
"""Review mode НЕ должен вызывать generate_followups автоматически."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
|
|
|
|
|
mock_hooks.return_value = []
|
|
|
|
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|
|
|
|
|
|
|
|
|
|
|
# Проект остаётся в default "review" mode
|
|
|
|
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
mock_followup.assert_not_called()
|
|
|
|
|
|
task = models.get_task(conn, "VDOL-001")
|
|
|
|
|
|
assert task["status"] == "review"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("core.followup.generate_followups")
|
|
|
|
|
|
@patch("agents.runner.run_hooks")
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_auto_mode_skips_followups_for_followup_tasks(self, mock_run, mock_hooks, mock_followup, conn):
|
|
|
|
|
|
"""Auto mode НЕ должен генерировать followups для followup-задач (предотвращение рекурсии)."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
|
|
|
|
|
mock_hooks.return_value = []
|
|
|
|
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|
|
|
|
|
|
|
|
|
|
|
models.update_project(conn, "vdol", execution_mode="auto")
|
|
|
|
|
|
models.update_task(conn, "VDOL-001", brief={"source": "followup:VDOL-000"})
|
|
|
|
|
|
|
|
|
|
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
mock_followup.assert_not_called()
|
|
|
|
|
|
|
|
|
|
|
|
@patch("core.followup.auto_resolve_pending_actions")
|
|
|
|
|
|
@patch("core.followup.generate_followups")
|
|
|
|
|
|
@patch("agents.runner.run_hooks")
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_auto_mode_resolves_pending_actions(self, mock_run, mock_hooks, mock_followup, mock_resolve, conn):
|
|
|
|
|
|
"""Auto mode должен авто-резолвить pending_actions из followup generation."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "done"})
|
|
|
|
|
|
mock_hooks.return_value = []
|
|
|
|
|
|
|
|
|
|
|
|
pending = [{"type": "permission_fix", "description": "Fix X",
|
|
|
|
|
|
"original_item": {}, "options": ["rerun"]}]
|
|
|
|
|
|
mock_followup.return_value = {"created": [], "pending_actions": pending}
|
|
|
|
|
|
mock_resolve.return_value = [{"resolved": "rerun", "result": {}}]
|
|
|
|
|
|
|
|
|
|
|
|
models.update_project(conn, "vdol", execution_mode="auto")
|
|
|
|
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|
|
|
|
|
result = run_pipeline(conn, "VDOL-001", steps)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
mock_resolve.assert_called_once_with(conn, "VDOL-001", pending)
|
|
|
|
|
|
|
|
|
|
|
|
|
Add context builder, agent runner, and pipeline executor
core/context_builder.py:
build_context() — assembles role-specific context from DB.
PM gets everything; debugger gets gotchas/workarounds; reviewer
gets conventions only; tester gets minimal context; security
gets security-category decisions.
format_prompt() — injects context into role templates.
agents/runner.py:
run_agent() — launches claude CLI as subprocess with role prompt.
run_pipeline() — executes multi-step pipelines sequentially,
chains output between steps, logs to agent_logs, creates/updates
pipeline records, handles failures gracefully.
agents/specialists.yaml — 8 roles with tools, permissions, context rules.
agents/prompts/pm.md — PM prompt for task decomposition.
agents/prompts/security.md — security audit prompt (OWASP, auth, secrets).
CLI: kin run <task_id> [--dry-run]
PM decomposes → shows pipeline → executes with confirmation.
31 new tests (15 context_builder, 11 runner, 5 JSON parsing).
92 total, all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:03:32 +02:00
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# JSON parsing
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
class TestTryParseJson:
|
|
|
|
|
|
def test_direct_json(self):
|
|
|
|
|
|
assert _try_parse_json('{"a": 1}') == {"a": 1}
|
|
|
|
|
|
|
|
|
|
|
|
def test_json_in_code_fence(self):
|
|
|
|
|
|
text = 'Some text\n```json\n{"a": 1}\n```\nMore text'
|
|
|
|
|
|
assert _try_parse_json(text) == {"a": 1}
|
|
|
|
|
|
|
|
|
|
|
|
def test_json_embedded_in_text(self):
|
|
|
|
|
|
text = 'Here is the result: {"status": "ok", "count": 42} and more'
|
|
|
|
|
|
result = _try_parse_json(text)
|
|
|
|
|
|
assert result == {"status": "ok", "count": 42}
|
|
|
|
|
|
|
|
|
|
|
|
def test_empty_string(self):
|
|
|
|
|
|
assert _try_parse_json("") is None
|
|
|
|
|
|
|
|
|
|
|
|
def test_no_json(self):
|
|
|
|
|
|
assert _try_parse_json("just plain text") is None
|
|
|
|
|
|
|
|
|
|
|
|
def test_json_array(self):
|
|
|
|
|
|
assert _try_parse_json('[1, 2, 3]') == [1, 2, 3]
|
2026-03-15 17:35:08 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Non-interactive mode
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
class TestNonInteractive:
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_noninteractive_sets_stdin_devnull(self, mock_run, conn):
|
|
|
|
|
|
"""When noninteractive=True, subprocess.run should get stdin=subprocess.DEVNULL."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=True)
|
|
|
|
|
|
call_kwargs = mock_run.call_args[1]
|
|
|
|
|
|
assert call_kwargs.get("stdin") == subprocess.DEVNULL
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_noninteractive_uses_300s_timeout(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=True)
|
|
|
|
|
|
call_kwargs = mock_run.call_args[1]
|
|
|
|
|
|
assert call_kwargs.get("timeout") == 300
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_interactive_uses_600s_timeout(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False)
|
|
|
|
|
|
call_kwargs = mock_run.call_args[1]
|
2026-03-15 18:34:47 +02:00
|
|
|
|
assert call_kwargs.get("timeout") == 300
|
2026-03-15 17:35:08 +02:00
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_interactive_no_stdin_override(self, mock_run, conn):
|
|
|
|
|
|
"""In interactive mode, stdin should not be set to DEVNULL."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False)
|
|
|
|
|
|
call_kwargs = mock_run.call_args[1]
|
2026-03-15 18:34:47 +02:00
|
|
|
|
assert call_kwargs.get("stdin") == subprocess.DEVNULL
|
2026-03-15 17:35:08 +02:00
|
|
|
|
|
|
|
|
|
|
@patch.dict("os.environ", {"KIN_NONINTERACTIVE": "1"})
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_env_var_activates_noninteractive(self, mock_run, conn):
|
|
|
|
|
|
"""KIN_NONINTERACTIVE=1 env var should activate non-interactive mode."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False)
|
|
|
|
|
|
call_kwargs = mock_run.call_args[1]
|
|
|
|
|
|
assert call_kwargs.get("stdin") == subprocess.DEVNULL
|
|
|
|
|
|
assert call_kwargs.get("timeout") == 300
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_allow_write_adds_skip_permissions(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=True)
|
|
|
|
|
|
cmd = mock_run.call_args[0][0]
|
|
|
|
|
|
assert "--dangerously-skip-permissions" in cmd
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_no_allow_write_no_skip_permissions(self, mock_run, conn):
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": "ok"})
|
|
|
|
|
|
run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=False)
|
|
|
|
|
|
cmd = mock_run.call_args[0][0]
|
|
|
|
|
|
assert "--dangerously-skip-permissions" not in cmd
|
2026-03-15 17:44:16 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# run_audit
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
class TestRunAudit:
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_audit_success(self, mock_run, conn):
|
|
|
|
|
|
"""Audit should return parsed already_done/still_pending/unclear."""
|
|
|
|
|
|
audit_output = json.dumps({
|
|
|
|
|
|
"already_done": [{"id": "VDOL-001", "reason": "Fixed in runner.py"}],
|
|
|
|
|
|
"still_pending": [],
|
|
|
|
|
|
"unclear": [],
|
|
|
|
|
|
})
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({"result": audit_output})
|
|
|
|
|
|
|
|
|
|
|
|
result = run_audit(conn, "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
assert len(result["already_done"]) == 1
|
|
|
|
|
|
assert result["already_done"][0]["id"] == "VDOL-001"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_audit_logs_to_db(self, mock_run, conn):
|
|
|
|
|
|
"""Audit should log to agent_logs with role=backlog_audit."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
run_audit(conn, "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
logs = conn.execute(
|
|
|
|
|
|
"SELECT * FROM agent_logs WHERE agent_role='backlog_audit'"
|
|
|
|
|
|
).fetchall()
|
|
|
|
|
|
assert len(logs) == 1
|
|
|
|
|
|
assert logs[0]["action"] == "audit"
|
|
|
|
|
|
|
|
|
|
|
|
def test_audit_no_pending_tasks(self, conn):
|
|
|
|
|
|
"""If no pending tasks, return success with empty lists."""
|
|
|
|
|
|
# Mark existing task as done
|
|
|
|
|
|
models.update_task(conn, "VDOL-001", status="done")
|
|
|
|
|
|
|
|
|
|
|
|
result = run_audit(conn, "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
assert result["already_done"] == []
|
|
|
|
|
|
assert "No pending tasks" in result.get("message", "")
|
|
|
|
|
|
|
|
|
|
|
|
def test_audit_project_not_found(self, conn):
|
|
|
|
|
|
result = run_audit(conn, "nonexistent")
|
|
|
|
|
|
assert result["success"] is False
|
|
|
|
|
|
assert "not found" in result["error"]
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_audit_uses_sonnet(self, mock_run, conn):
|
|
|
|
|
|
"""Audit should use sonnet model."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
run_audit(conn, "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
cmd = mock_run.call_args[0][0]
|
|
|
|
|
|
model_idx = cmd.index("--model")
|
|
|
|
|
|
assert cmd[model_idx + 1] == "sonnet"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_audit_includes_tasks_in_prompt(self, mock_run, conn):
|
|
|
|
|
|
"""The prompt should contain the task title."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
run_audit(conn, "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
prompt = mock_run.call_args[0][0][2] # -p argument
|
|
|
|
|
|
assert "VDOL-001" in prompt
|
|
|
|
|
|
assert "Fix bug" in prompt
|
2026-03-15 18:00:39 +02:00
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_audit_auto_apply_marks_done(self, mock_run, conn):
|
|
|
|
|
|
"""auto_apply=True should mark already_done tasks as done in DB."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": json.dumps({
|
|
|
|
|
|
"already_done": [{"id": "VDOL-001", "reason": "Done"}],
|
|
|
|
|
|
"still_pending": [],
|
|
|
|
|
|
"unclear": [],
|
|
|
|
|
|
}),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
result = run_audit(conn, "vdol", auto_apply=True)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
assert "VDOL-001" in result["applied"]
|
|
|
|
|
|
task = models.get_task(conn, "VDOL-001")
|
|
|
|
|
|
assert task["status"] == "done"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_audit_no_auto_apply_keeps_pending(self, mock_run, conn):
|
|
|
|
|
|
"""auto_apply=False should NOT change task status."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": json.dumps({
|
|
|
|
|
|
"already_done": [{"id": "VDOL-001", "reason": "Done"}],
|
|
|
|
|
|
"still_pending": [],
|
|
|
|
|
|
"unclear": [],
|
|
|
|
|
|
}),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
result = run_audit(conn, "vdol", auto_apply=False)
|
|
|
|
|
|
|
|
|
|
|
|
assert result["success"] is True
|
|
|
|
|
|
assert result["applied"] == []
|
|
|
|
|
|
task = models.get_task(conn, "VDOL-001")
|
|
|
|
|
|
assert task["status"] == "pending"
|
|
|
|
|
|
|
|
|
|
|
|
@patch("agents.runner.subprocess.run")
|
|
|
|
|
|
def test_audit_uses_dangerously_skip_permissions(self, mock_run, conn):
|
|
|
|
|
|
"""Audit must use --dangerously-skip-permissions for tool access."""
|
|
|
|
|
|
mock_run.return_value = _mock_claude_success({
|
|
|
|
|
|
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
run_audit(conn, "vdol")
|
|
|
|
|
|
|
|
|
|
|
|
cmd = mock_run.call_args[0][0]
|
|
|
|
|
|
assert "--dangerously-skip-permissions" in cmd
|