kin: auto-commit after pipeline

2026-03-17 18:31:00 +02:00 · 2026-03-17 18:31:00 +02:00 · 33fc38b01f
commit 33fc38b01f
parent c30a4c0fc4
4 changed files with 459 additions and 67 deletions
--- a/tests/test_kin_100_regression.py
+++ b/tests/test_kin_100_regression.py
@ -0,0 +1,117 @@
+"""Regression tests for KIN-100 — human-readable agent output format.
+
+Verifies that reviewer.md and tester.md prompts contain the two-block format
+instructions (## Verdict + ## Details), ensuring agents produce output that
+is both human-readable and machine-parseable.
+"""
+
+import os
+
+PROMPTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "agents", "prompts")
+
+
+def _read_prompt(name: str) -> str:
+    path = os.path.join(PROMPTS_DIR, name)
+    with open(path, encoding="utf-8") as f:
+        return f.read()
+
+
+class TestReviewerPromptFormat:
+    def test_reviewer_prompt_contains_verdict_section(self):
+        content = _read_prompt("reviewer.md")
+        assert "## Verdict" in content, "reviewer.md must contain '## Verdict' section"
+
+    def test_reviewer_prompt_contains_details_section(self):
+        content = _read_prompt("reviewer.md")
+        assert "## Details" in content, "reviewer.md must contain '## Details' section"
+
+    def test_reviewer_prompt_verdict_instructs_plain_russian(self):
+        """Verdict section must instruct plain Russian for the project director."""
+        content = _read_prompt("reviewer.md")
+        assert "Russian" in content or "russian" in content, (
+            "reviewer.md must mention Russian language for the Verdict section"
+        )
+
+    def test_reviewer_prompt_details_uses_json_fence(self):
+        """Details section must specify JSON output in a ```json code fence."""
+        content = _read_prompt("reviewer.md")
+        assert "```json" in content, "reviewer.md Details section must use ```json fence"
+
+    def test_reviewer_prompt_verdict_forbids_json(self):
+        """Verdict description must explicitly say no JSON/code in that section."""
+        content = _read_prompt("reviewer.md")
+        # The prompt should say something like "No JSON" near the Verdict section
+        assert "No JSON" in content or "no JSON" in content or "no code" in content.lower(), (
+            "reviewer.md Verdict section must explicitly say no JSON/code snippets"
+        )
+
+    def test_reviewer_prompt_has_example_verdict(self):
+        """Prompt must contain an example of a plain-language verdict."""
+        content = _read_prompt("reviewer.md")
+        # The examples in the prompt contain Russian text after ## Verdict
+        assert "Реализация" in content or "проверен" in content.lower(), (
+            "reviewer.md must contain a Russian-language example verdict"
+        )
+
+
+class TestTesterPromptFormat:
+    def test_tester_prompt_contains_verdict_section(self):
+        content = _read_prompt("tester.md")
+        assert "## Verdict" in content, "tester.md must contain '## Verdict' section"
+
+    def test_tester_prompt_contains_details_section(self):
+        content = _read_prompt("tester.md")
+        assert "## Details" in content, "tester.md must contain '## Details' section"
+
+    def test_tester_prompt_verdict_instructs_plain_russian(self):
+        content = _read_prompt("tester.md")
+        assert "Russian" in content or "russian" in content, (
+            "tester.md must mention Russian language for the Verdict section"
+        )
+
+    def test_tester_prompt_details_uses_json_fence(self):
+        content = _read_prompt("tester.md")
+        assert "```json" in content, "tester.md Details section must use ```json fence"
+
+    def test_tester_prompt_verdict_forbids_json(self):
+        content = _read_prompt("tester.md")
+        assert "No JSON" in content or "no JSON" in content or "no code" in content.lower(), (
+            "tester.md Verdict section must explicitly say no JSON/code snippets"
+        )
+
+    def test_tester_prompt_has_example_verdict(self):
+        content = _read_prompt("tester.md")
+        # The examples contain Russian text
+        assert "Написано" in content or "тест" in content.lower(), (
+            "tester.md must contain a Russian-language example verdict"
+        )
+
+
+class TestBothPromptsStructure:
+    def test_reviewer_verdict_comes_before_details(self):
+        """## Verdict section must appear before ## Details in reviewer.md."""
+        content = _read_prompt("reviewer.md")
+        verdict_pos = content.find("## Verdict")
+        details_pos = content.find("## Details")
+        assert verdict_pos != -1, "## Verdict must exist"
+        assert details_pos != -1, "## Details must exist"
+        assert verdict_pos < details_pos, "## Verdict must come before ## Details"
+
+    def test_tester_verdict_comes_before_details(self):
+        """## Verdict section must appear before ## Details in tester.md."""
+        content = _read_prompt("tester.md")
+        verdict_pos = content.find("## Verdict")
+        details_pos = content.find("## Details")
+        assert verdict_pos != -1
+        assert details_pos != -1
+        assert verdict_pos < details_pos, "## Verdict must come before ## Details in tester.md"
+
+    def test_reviewer_details_status_field_documented(self):
+        """Details JSON must document a 'verdict' field."""
+        content = _read_prompt("reviewer.md")
+        assert '"verdict"' in content, "reviewer.md Details must document 'verdict' field"
+
+    def test_tester_details_status_field_documented(self):
+        """Details JSON must document a 'status' field."""
+        content = _read_prompt("tester.md")
+        assert '"status"' in content, "tester.md Details must document 'status' field"
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@ -2602,3 +2602,165 @@ class TestCheckClaudeAuth:
    def test_ok_when_timeout(self, mock_run):
        """При TimeoutExpired не бросает исключение (не блокируем на timeout)."""
        check_claude_auth()  # должна вернуть None без исключений
+
+
+# ---------------------------------------------------------------------------
+# KIN-OBS-030: PM-шаг инструментирован в pipeline_log
+# ---------------------------------------------------------------------------
+
+class TestPMStepPipelineLog:
+    """Проверяет, что PM-шаг записывается в pipeline_log после run_pipeline."""
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner._run_learning_extraction")
+    @patch("agents.runner.subprocess.run")
+    def test_pm_log_entry_written_when_pm_result_provided(
+        self, mock_run, mock_learn, mock_autocommit, conn
+    ):
+        """Если pm_result передан в run_pipeline, в pipeline_log появляется запись PM-шага."""
+        mock_run.return_value = _mock_claude_success({"result": "done"})
+        mock_learn.return_value = {"added": 0, "skipped": 0}
+
+        pm_result = {"success": True, "duration_seconds": 5, "tokens_used": 1000, "cost_usd": 0.01}
+        steps = [{"role": "debugger", "brief": "find bug"}]
+        run_pipeline(
+            conn, "VDOL-001", steps,
+            pm_result=pm_result,
+            pm_started_at="2026-03-17T10:00:00",
+            pm_ended_at="2026-03-17T10:00:05",
+        )
+
+        logs = conn.execute(
+            "SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
+        ).fetchall()
+        assert len(logs) == 1
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner._run_learning_extraction")
+    @patch("agents.runner.subprocess.run")
+    def test_pm_log_entry_has_correct_pipeline_id(
+        self, mock_run, mock_learn, mock_autocommit, conn
+    ):
+        """pipeline_id в PM-записи pipeline_log совпадает с реальным pipeline."""
+        mock_run.return_value = _mock_claude_success({"result": "done"})
+        mock_learn.return_value = {"added": 0, "skipped": 0}
+
+        pm_result = {"success": True, "duration_seconds": 3, "tokens_used": 800, "cost_usd": 0.008}
+        steps = [{"role": "debugger", "brief": "find bug"}]
+        run_pipeline(
+            conn, "VDOL-001", steps,
+            pm_result=pm_result,
+            pm_started_at="2026-03-17T10:00:00",
+            pm_ended_at="2026-03-17T10:00:03",
+        )
+
+        pipeline = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone()
+        assert pipeline is not None
+
+        pm_log = conn.execute(
+            "SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
+        ).fetchone()
+        assert pm_log is not None
+        assert pm_log["pipeline_id"] == pipeline["id"]
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner._run_learning_extraction")
+    @patch("agents.runner.subprocess.run")
+    def test_pm_log_entry_has_step_pm_in_extra(
+        self, mock_run, mock_learn, mock_autocommit, conn
+    ):
+        """extra_json PM-записи содержит role='pm' и корректные данные тайминга."""
+        mock_run.return_value = _mock_claude_success({"result": "done"})
+        mock_learn.return_value = {"added": 0, "skipped": 0}
+
+        pm_result = {"success": True, "duration_seconds": 7, "tokens_used": 1500, "cost_usd": 0.02}
+        steps = [{"role": "debugger", "brief": "find bug"}]
+        run_pipeline(
+            conn, "VDOL-001", steps,
+            pm_result=pm_result,
+            pm_started_at="2026-03-17T10:00:00",
+            pm_ended_at="2026-03-17T10:00:07",
+        )
+
+        row = conn.execute(
+            "SELECT extra_json FROM pipeline_log WHERE message='PM step: task decomposed'"
+        ).fetchone()
+        assert row is not None
+        extra = json.loads(row["extra_json"])
+        assert extra["role"] == "pm"
+        assert extra["duration_seconds"] == 7
+        assert extra["pm_started_at"] == "2026-03-17T10:00:00"
+        assert extra["pm_ended_at"] == "2026-03-17T10:00:07"
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner._run_learning_extraction")
+    @patch("agents.runner.subprocess.run")
+    def test_pm_log_not_written_when_pm_result_is_none(
+        self, mock_run, mock_learn, mock_autocommit, conn
+    ):
+        """Если pm_result не передан (None), записи PM-шага в pipeline_log нет."""
+        mock_run.return_value = _mock_claude_success({"result": "done"})
+        mock_learn.return_value = {"added": 0, "skipped": 0}
+
+        steps = [{"role": "debugger", "brief": "find bug"}]
+        run_pipeline(conn, "VDOL-001", steps)  # pm_result=None по умолчанию
+
+        pm_logs = conn.execute(
+            "SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
+        ).fetchall()
+        assert len(pm_logs) == 0
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner._run_learning_extraction")
+    @patch("agents.runner.subprocess.run")
+    def test_pm_log_not_written_for_sub_pipeline(
+        self, mock_run, mock_learn, mock_autocommit, conn
+    ):
+        """PM-лог НЕ записывается в sub-pipeline (parent_pipeline_id задан)."""
+        mock_run.return_value = _mock_claude_success({"result": "done"})
+        mock_learn.return_value = {"added": 0, "skipped": 0}
+
+        # Сначала создаём родительский pipeline
+        parent_pipeline = models.create_pipeline(conn, "VDOL-001", "vdol", "linear", [])
+
+        pm_result = {"success": True, "duration_seconds": 4, "tokens_used": 900, "cost_usd": 0.009}
+        steps = [{"role": "debugger", "brief": "find bug"}]
+        run_pipeline(
+            conn, "VDOL-001", steps,
+            pm_result=pm_result,
+            pm_started_at="2026-03-17T10:00:00",
+            pm_ended_at="2026-03-17T10:00:04",
+            parent_pipeline_id=parent_pipeline["id"],
+        )
+
+        pm_logs = conn.execute(
+            "SELECT * FROM pipeline_log WHERE message='PM step: task decomposed'"
+        ).fetchall()
+        assert len(pm_logs) == 0
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner._run_learning_extraction")
+    @patch("agents.runner.subprocess.run")
+    def test_pm_log_no_orphan_records(
+        self, mock_run, mock_learn, mock_autocommit, conn
+    ):
+        """FK integrity: все записи pipeline_log ссылаются на существующий pipeline."""
+        mock_run.return_value = _mock_claude_success({"result": "done"})
+        mock_learn.return_value = {"added": 0, "skipped": 0}
+
+        pm_result = {"success": True, "duration_seconds": 2, "tokens_used": 500, "cost_usd": 0.005}
+        steps = [{"role": "debugger", "brief": "find bug"}]
+        run_pipeline(
+            conn, "VDOL-001", steps,
+            pm_result=pm_result,
+            pm_started_at="2026-03-17T10:00:00",
+            pm_ended_at="2026-03-17T10:00:02",
+        )
+
+        # Проверяем FK через JOIN — orphan-записей не должно быть
+        orphans = conn.execute(
+            """SELECT pl.id FROM pipeline_log pl
+               LEFT JOIN pipelines p ON pl.pipeline_id = p.id
+               WHERE p.id IS NULL"""
+        ).fetchall()
+        assert len(orphans) == 0