kin/tests/test_runner.py
johnfrum1234 c129cf9d95 Fix output truncation bug, add language support for agent responses
Bug 1 — Output truncation:
  _run_claude() was replacing raw stdout with parsed sub-field which
  could be a dict (not string). run_agent() then saved dict.__repr__
  to DB instead of full JSON. Fixed: _run_claude() always returns
  string output; run_agent() ensures string before DB write.
  Added tests: full_output_saved_to_db, dict_output_saved_as_json_string.

Bug 2 — Language support:
  Added projects.language column (TEXT DEFAULT 'ru').
  Auto-migration for existing DBs (ALTER TABLE ADD COLUMN).
  context_builder passes language in project context.
  format_prompt() appends "## Language\nALWAYS respond in {language}"
  at the end of every prompt.
  CLI: kin project add --language ru (default: ru).
  Tests: language in prompt for ru/en, project creation, context.

112 tests, all passing. ~/.kin/kin.db migrated (vdol: language=ru).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 14:39:33 +02:00

276 lines
9.7 KiB
Python

"""Tests for agents/runner.py — agent execution with mocked claude CLI."""
import json
import pytest
from unittest.mock import patch, MagicMock
from core.db import init_db
from core import models
from agents.runner import run_agent, run_pipeline, _try_parse_json
@pytest.fixture
def conn():
c = init_db(":memory:")
models.create_project(c, "vdol", "ВДОЛЬ", "~/projects/vdolipoperek",
tech_stack=["vue3"])
models.create_task(c, "VDOL-001", "vdol", "Fix bug",
brief={"route_type": "debug"})
yield c
c.close()
def _mock_claude_success(output_data):
"""Create a mock subprocess result with successful claude output."""
mock = MagicMock()
mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
mock.stderr = ""
mock.returncode = 0
return mock
def _mock_claude_failure(error_msg):
mock = MagicMock()
mock.stdout = ""
mock.stderr = error_msg
mock.returncode = 1
return mock
# ---------------------------------------------------------------------------
# run_agent
# ---------------------------------------------------------------------------
class TestRunAgent:
@patch("agents.runner.subprocess.run")
def test_successful_agent_run(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({
"result": "Found race condition in useSearch.ts",
"usage": {"total_tokens": 5000},
"cost_usd": 0.015,
})
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
assert result["success"] is True
assert result["role"] == "debugger"
assert result["model"] == "sonnet"
assert result["duration_seconds"] >= 0
# Verify claude was called with right args
call_args = mock_run.call_args
cmd = call_args[0][0]
assert "claude" in cmd[0]
assert "-p" in cmd
assert "--output-format" in cmd
assert "json" in cmd
@patch("agents.runner.subprocess.run")
def test_failed_agent_run(self, mock_run, conn):
mock_run.return_value = _mock_claude_failure("API error")
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
assert result["success"] is False
# Should be logged in agent_logs
logs = conn.execute("SELECT * FROM agent_logs WHERE task_id='VDOL-001'").fetchall()
assert len(logs) == 1
assert logs[0]["success"] == 0
def test_dry_run_returns_prompt(self, conn):
result = run_agent(conn, "debugger", "VDOL-001", "vdol", dry_run=True)
assert result["dry_run"] is True
assert result["prompt"] is not None
assert "VDOL-001" in result["prompt"]
assert result["output"] is None
@patch("agents.runner.subprocess.run")
def test_agent_logs_to_db(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "ok"})
run_agent(conn, "tester", "VDOL-001", "vdol")
logs = conn.execute("SELECT * FROM agent_logs WHERE agent_role='tester'").fetchall()
assert len(logs) == 1
assert logs[0]["project_id"] == "vdol"
@patch("agents.runner.subprocess.run")
def test_full_output_saved_to_db(self, mock_run, conn):
"""Bug fix: output_summary must contain the FULL output, not truncated."""
long_json = json.dumps({
"result": json.dumps({
"summary": "Security audit complete",
"findings": [{"title": f"Finding {i}", "severity": "HIGH"} for i in range(50)],
}),
})
mock = MagicMock()
mock.stdout = long_json
mock.stderr = ""
mock.returncode = 0
mock_run.return_value = mock
run_agent(conn, "security", "VDOL-001", "vdol")
logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='security'").fetchall()
assert len(logs) == 1
output = logs[0]["output_summary"]
assert output is not None
assert len(output) > 1000 # Must not be truncated
# Should contain all 50 findings
assert "Finding 49" in output
@patch("agents.runner.subprocess.run")
def test_dict_output_saved_as_json_string(self, mock_run, conn):
"""When claude returns structured JSON, it must be saved as string."""
mock_run.return_value = _mock_claude_success({
"result": {"status": "ok", "files": ["a.py", "b.py"]},
})
result = run_agent(conn, "debugger", "VDOL-001", "vdol")
# output should be a string (JSON serialized), not a dict
assert isinstance(result["raw_output"], str)
logs = conn.execute("SELECT output_summary FROM agent_logs WHERE agent_role='debugger'").fetchall()
saved = logs[0]["output_summary"]
assert isinstance(saved, str)
assert "a.py" in saved
@patch("agents.runner.subprocess.run")
def test_previous_output_passed(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "tests pass"})
run_agent(conn, "tester", "VDOL-001", "vdol",
previous_output="Found bug in line 42")
call_args = mock_run.call_args
prompt = call_args[0][0][2] # -p argument
assert "line 42" in prompt
# ---------------------------------------------------------------------------
# run_pipeline
# ---------------------------------------------------------------------------
class TestRunPipeline:
@patch("agents.runner.subprocess.run")
def test_successful_pipeline(self, mock_run, conn):
mock_run.return_value = _mock_claude_success({"result": "done"})
steps = [
{"role": "debugger", "brief": "find bug"},
{"role": "tester", "depends_on": "debugger", "brief": "verify"},
]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is True
assert result["steps_completed"] == 2
assert len(result["results"]) == 2
# Pipeline created in DB
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
assert pipe is not None
assert pipe["status"] == "completed"
# Task updated to review
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "review"
@patch("agents.runner.subprocess.run")
def test_pipeline_fails_on_step(self, mock_run, conn):
# First step succeeds, second fails
mock_run.side_effect = [
_mock_claude_success({"result": "found bug"}),
_mock_claude_failure("compilation error"),
]
steps = [
{"role": "debugger", "brief": "find"},
{"role": "frontend_dev", "brief": "fix"},
{"role": "tester", "brief": "test"},
]
result = run_pipeline(conn, "VDOL-001", steps)
assert result["success"] is False
assert result["steps_completed"] == 1 # Only debugger completed
assert "frontend_dev" in result["error"]
# Pipeline marked as failed
pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
assert pipe["status"] == "failed"
# Task marked as blocked
task = models.get_task(conn, "VDOL-001")
assert task["status"] == "blocked"
def test_pipeline_dry_run(self, conn):
steps = [
{"role": "debugger", "brief": "find"},
{"role": "tester", "brief": "verify"},
]
result = run_pipeline(conn, "VDOL-001", steps, dry_run=True)
assert result["dry_run"] is True
assert result["success"] is True
assert result["steps_completed"] == 2
# No pipeline created in DB
pipes = conn.execute("SELECT * FROM pipelines").fetchall()
assert len(pipes) == 0
@patch("agents.runner.subprocess.run")
def test_pipeline_chains_output(self, mock_run, conn):
"""Output from step N is passed as previous_output to step N+1."""
call_count = [0]
def side_effect(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
return _mock_claude_success({"result": "bug is in line 42"})
return _mock_claude_success({"result": "test written"})
mock_run.side_effect = side_effect
steps = [
{"role": "debugger", "brief": "find"},
{"role": "tester", "brief": "write test"},
]
run_pipeline(conn, "VDOL-001", steps)
# Second call should include first step's output in prompt
second_call = mock_run.call_args_list[1]
prompt = second_call[0][0][2] # -p argument
assert "line 42" in prompt or "bug" in prompt
def test_pipeline_task_not_found(self, conn):
result = run_pipeline(conn, "NONEXISTENT", [{"role": "debugger"}])
assert result["success"] is False
assert "not found" in result["error"]
# ---------------------------------------------------------------------------
# JSON parsing
# ---------------------------------------------------------------------------
class TestTryParseJson:
def test_direct_json(self):
assert _try_parse_json('{"a": 1}') == {"a": 1}
def test_json_in_code_fence(self):
text = 'Some text\n```json\n{"a": 1}\n```\nMore text'
assert _try_parse_json(text) == {"a": 1}
def test_json_embedded_in_text(self):
text = 'Here is the result: {"status": "ok", "count": 42} and more'
result = _try_parse_json(text)
assert result == {"status": "ok", "count": 42}
def test_empty_string(self):
assert _try_parse_json("") is None
def test_no_json(self):
assert _try_parse_json("just plain text") is None
def test_json_array(self):
assert _try_parse_json('[1, 2, 3]') == [1, 2, 3]