kin: auto-commit after pipeline
This commit is contained in:
parent
c30a4c0fc4
commit
33fc38b01f
4 changed files with 459 additions and 67 deletions
117
tests/test_kin_100_regression.py
Normal file
117
tests/test_kin_100_regression.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
"""Regression tests for KIN-100 — human-readable agent output format.
|
||||
|
||||
Verifies that reviewer.md and tester.md prompts contain the two-block format
|
||||
instructions (## Verdict + ## Details), ensuring agents produce output that
|
||||
is both human-readable and machine-parseable.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
PROMPTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "agents", "prompts")
|
||||
|
||||
|
||||
def _read_prompt(name: str) -> str:
|
||||
path = os.path.join(PROMPTS_DIR, name)
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
class TestReviewerPromptFormat:
|
||||
def test_reviewer_prompt_contains_verdict_section(self):
|
||||
content = _read_prompt("reviewer.md")
|
||||
assert "## Verdict" in content, "reviewer.md must contain '## Verdict' section"
|
||||
|
||||
def test_reviewer_prompt_contains_details_section(self):
|
||||
content = _read_prompt("reviewer.md")
|
||||
assert "## Details" in content, "reviewer.md must contain '## Details' section"
|
||||
|
||||
def test_reviewer_prompt_verdict_instructs_plain_russian(self):
|
||||
"""Verdict section must instruct plain Russian for the project director."""
|
||||
content = _read_prompt("reviewer.md")
|
||||
assert "Russian" in content or "russian" in content, (
|
||||
"reviewer.md must mention Russian language for the Verdict section"
|
||||
)
|
||||
|
||||
def test_reviewer_prompt_details_uses_json_fence(self):
|
||||
"""Details section must specify JSON output in a ```json code fence."""
|
||||
content = _read_prompt("reviewer.md")
|
||||
assert "```json" in content, "reviewer.md Details section must use ```json fence"
|
||||
|
||||
def test_reviewer_prompt_verdict_forbids_json(self):
|
||||
"""Verdict description must explicitly say no JSON/code in that section."""
|
||||
content = _read_prompt("reviewer.md")
|
||||
# The prompt should say something like "No JSON" near the Verdict section
|
||||
assert "No JSON" in content or "no JSON" in content or "no code" in content.lower(), (
|
||||
"reviewer.md Verdict section must explicitly say no JSON/code snippets"
|
||||
)
|
||||
|
||||
def test_reviewer_prompt_has_example_verdict(self):
|
||||
"""Prompt must contain an example of a plain-language verdict."""
|
||||
content = _read_prompt("reviewer.md")
|
||||
# The examples in the prompt contain Russian text after ## Verdict
|
||||
assert "Реализация" in content or "проверен" in content.lower(), (
|
||||
"reviewer.md must contain a Russian-language example verdict"
|
||||
)
|
||||
|
||||
|
||||
class TestTesterPromptFormat:
|
||||
def test_tester_prompt_contains_verdict_section(self):
|
||||
content = _read_prompt("tester.md")
|
||||
assert "## Verdict" in content, "tester.md must contain '## Verdict' section"
|
||||
|
||||
def test_tester_prompt_contains_details_section(self):
|
||||
content = _read_prompt("tester.md")
|
||||
assert "## Details" in content, "tester.md must contain '## Details' section"
|
||||
|
||||
def test_tester_prompt_verdict_instructs_plain_russian(self):
|
||||
content = _read_prompt("tester.md")
|
||||
assert "Russian" in content or "russian" in content, (
|
||||
"tester.md must mention Russian language for the Verdict section"
|
||||
)
|
||||
|
||||
def test_tester_prompt_details_uses_json_fence(self):
|
||||
content = _read_prompt("tester.md")
|
||||
assert "```json" in content, "tester.md Details section must use ```json fence"
|
||||
|
||||
def test_tester_prompt_verdict_forbids_json(self):
|
||||
content = _read_prompt("tester.md")
|
||||
assert "No JSON" in content or "no JSON" in content or "no code" in content.lower(), (
|
||||
"tester.md Verdict section must explicitly say no JSON/code snippets"
|
||||
)
|
||||
|
||||
def test_tester_prompt_has_example_verdict(self):
|
||||
content = _read_prompt("tester.md")
|
||||
# The examples contain Russian text
|
||||
assert "Написано" in content or "тест" in content.lower(), (
|
||||
"tester.md must contain a Russian-language example verdict"
|
||||
)
|
||||
|
||||
|
||||
class TestBothPromptsStructure:
|
||||
def test_reviewer_verdict_comes_before_details(self):
|
||||
"""## Verdict section must appear before ## Details in reviewer.md."""
|
||||
content = _read_prompt("reviewer.md")
|
||||
verdict_pos = content.find("## Verdict")
|
||||
details_pos = content.find("## Details")
|
||||
assert verdict_pos != -1, "## Verdict must exist"
|
||||
assert details_pos != -1, "## Details must exist"
|
||||
assert verdict_pos < details_pos, "## Verdict must come before ## Details"
|
||||
|
||||
def test_tester_verdict_comes_before_details(self):
|
||||
"""## Verdict section must appear before ## Details in tester.md."""
|
||||
content = _read_prompt("tester.md")
|
||||
verdict_pos = content.find("## Verdict")
|
||||
details_pos = content.find("## Details")
|
||||
assert verdict_pos != -1
|
||||
assert details_pos != -1
|
||||
assert verdict_pos < details_pos, "## Verdict must come before ## Details in tester.md"
|
||||
|
||||
def test_reviewer_details_status_field_documented(self):
|
||||
"""Details JSON must document a 'verdict' field."""
|
||||
content = _read_prompt("reviewer.md")
|
||||
assert '"verdict"' in content, "reviewer.md Details must document 'verdict' field"
|
||||
|
||||
def test_tester_details_status_field_documented(self):
|
||||
"""Details JSON must document a 'status' field."""
|
||||
content = _read_prompt("tester.md")
|
||||
assert '"status"' in content, "tester.md Details must document 'status' field"
|
||||
|
|
@ -2602,3 +2602,165 @@ class TestCheckClaudeAuth:
|
|||
def test_ok_when_timeout(self, mock_run):
|
||||
"""При TimeoutExpired не бросает исключение (не блокируем на timeout)."""
|
||||
check_claude_auth() # должна вернуть None без исключений
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# KIN-OBS-030: PM-шаг инструментирован в pipeline_log
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestPMStepPipelineLog:
|
||||
"""Проверяет, что PM-шаг записывается в pipeline_log после run_pipeline."""
|
||||
|
||||
@patch("agents.runner._run_autocommit")
|
||||
@patch("agents.runner._run_learning_extraction")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pm_log_entry_written_when_pm_result_provided(
|
||||
self, mock_run, mock_learn, mock_autocommit, conn
|
||||
):
|
||||
"""Если pm_result передан в run_pipeline, в pipeline_log появляется запись PM-шага."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_learn.return_value = {"added": 0, "skipped": 0}
|
||||
|
||||
pm_result = {"success": True, "duration_seconds": 5, "tokens_used": 1000, "cost_usd": 0.01}
|
||||
steps = [{"role": "debugger", "brief": "find bug"}]
|
||||
run_pipeline(
|
||||
conn, "VDOL-001", steps,
|
||||
pm_result=pm_result,
|
||||
pm_started_at="2026-03-17T10:00:00",
|
||||
pm_ended_at="2026-03-17T10:00:05",
|
||||
)
|
||||
|
||||
logs = conn.execute(
|
||||
"SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
|
||||
).fetchall()
|
||||
assert len(logs) == 1
|
||||
|
||||
@patch("agents.runner._run_autocommit")
|
||||
@patch("agents.runner._run_learning_extraction")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pm_log_entry_has_correct_pipeline_id(
|
||||
self, mock_run, mock_learn, mock_autocommit, conn
|
||||
):
|
||||
"""pipeline_id в PM-записи pipeline_log совпадает с реальным pipeline."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_learn.return_value = {"added": 0, "skipped": 0}
|
||||
|
||||
pm_result = {"success": True, "duration_seconds": 3, "tokens_used": 800, "cost_usd": 0.008}
|
||||
steps = [{"role": "debugger", "brief": "find bug"}]
|
||||
run_pipeline(
|
||||
conn, "VDOL-001", steps,
|
||||
pm_result=pm_result,
|
||||
pm_started_at="2026-03-17T10:00:00",
|
||||
pm_ended_at="2026-03-17T10:00:03",
|
||||
)
|
||||
|
||||
pipeline = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
|
||||
assert pipeline is not None
|
||||
|
||||
pm_log = conn.execute(
|
||||
"SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
|
||||
).fetchone()
|
||||
assert pm_log is not None
|
||||
assert pm_log["pipeline_id"] == pipeline["id"]
|
||||
|
||||
@patch("agents.runner._run_autocommit")
|
||||
@patch("agents.runner._run_learning_extraction")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pm_log_entry_has_step_pm_in_extra(
|
||||
self, mock_run, mock_learn, mock_autocommit, conn
|
||||
):
|
||||
"""extra_json PM-записи содержит role='pm' и корректные данные тайминга."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_learn.return_value = {"added": 0, "skipped": 0}
|
||||
|
||||
pm_result = {"success": True, "duration_seconds": 7, "tokens_used": 1500, "cost_usd": 0.02}
|
||||
steps = [{"role": "debugger", "brief": "find bug"}]
|
||||
run_pipeline(
|
||||
conn, "VDOL-001", steps,
|
||||
pm_result=pm_result,
|
||||
pm_started_at="2026-03-17T10:00:00",
|
||||
pm_ended_at="2026-03-17T10:00:07",
|
||||
)
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT extra_json FROM pipeline_log WHERE message='PM step: task decomposed'"
|
||||
).fetchone()
|
||||
assert row is not None
|
||||
extra = json.loads(row["extra_json"])
|
||||
assert extra["role"] == "pm"
|
||||
assert extra["duration_seconds"] == 7
|
||||
assert extra["pm_started_at"] == "2026-03-17T10:00:00"
|
||||
assert extra["pm_ended_at"] == "2026-03-17T10:00:07"
|
||||
|
||||
@patch("agents.runner._run_autocommit")
|
||||
@patch("agents.runner._run_learning_extraction")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pm_log_not_written_when_pm_result_is_none(
|
||||
self, mock_run, mock_learn, mock_autocommit, conn
|
||||
):
|
||||
"""Если pm_result не передан (None), записи PM-шага в pipeline_log нет."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_learn.return_value = {"added": 0, "skipped": 0}
|
||||
|
||||
steps = [{"role": "debugger", "brief": "find bug"}]
|
||||
run_pipeline(conn, "VDOL-001", steps) # pm_result=None по умолчанию
|
||||
|
||||
pm_logs = conn.execute(
|
||||
"SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
|
||||
).fetchall()
|
||||
assert len(pm_logs) == 0
|
||||
|
||||
@patch("agents.runner._run_autocommit")
|
||||
@patch("agents.runner._run_learning_extraction")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pm_log_not_written_for_sub_pipeline(
|
||||
self, mock_run, mock_learn, mock_autocommit, conn
|
||||
):
|
||||
"""PM-лог НЕ записывается в sub-pipeline (parent_pipeline_id задан)."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_learn.return_value = {"added": 0, "skipped": 0}
|
||||
|
||||
# Сначала создаём родительский pipeline
|
||||
parent_pipeline = models.create_pipeline(conn, "VDOL-001", "vdol", "linear", [])
|
||||
|
||||
pm_result = {"success": True, "duration_seconds": 4, "tokens_used": 900, "cost_usd": 0.009}
|
||||
steps = [{"role": "debugger", "brief": "find bug"}]
|
||||
run_pipeline(
|
||||
conn, "VDOL-001", steps,
|
||||
pm_result=pm_result,
|
||||
pm_started_at="2026-03-17T10:00:00",
|
||||
pm_ended_at="2026-03-17T10:00:04",
|
||||
parent_pipeline_id=parent_pipeline["id"],
|
||||
)
|
||||
|
||||
pm_logs = conn.execute(
|
||||
"SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
|
||||
).fetchall()
|
||||
assert len(pm_logs) == 0
|
||||
|
||||
@patch("agents.runner._run_autocommit")
|
||||
@patch("agents.runner._run_learning_extraction")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_pm_log_no_orphan_records(
|
||||
self, mock_run, mock_learn, mock_autocommit, conn
|
||||
):
|
||||
"""FK integrity: все записи pipeline_log ссылаются на существующий pipeline."""
|
||||
mock_run.return_value = _mock_claude_success({"result": "done"})
|
||||
mock_learn.return_value = {"added": 0, "skipped": 0}
|
||||
|
||||
pm_result = {"success": True, "duration_seconds": 2, "tokens_used": 500, "cost_usd": 0.005}
|
||||
steps = [{"role": "debugger", "brief": "find bug"}]
|
||||
run_pipeline(
|
||||
conn, "VDOL-001", steps,
|
||||
pm_result=pm_result,
|
||||
pm_started_at="2026-03-17T10:00:00",
|
||||
pm_ended_at="2026-03-17T10:00:02",
|
||||
)
|
||||
|
||||
# Проверяем FK через JOIN — orphan-записей не должно быть
|
||||
orphans = conn.execute(
|
||||
"""SELECT pl.id FROM pipeline_log pl
|
||||
LEFT JOIN pipelines p ON pl.pipeline_id = p.id
|
||||
WHERE p.id IS NULL"""
|
||||
).fetchall()
|
||||
assert len(orphans) == 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue