From cc4133f5ce080ddff966a0b6a07245f04d4e3be4 Mon Sep 17 00:00:00 2001
From: Gros Frumos <grosfrumos@macmini.local>
Date: Tue, 17 Mar 2026 14:52:04 +0200
Subject: [PATCH] kin: auto-commit after pipeline

---
 tests/test_qa_gaps.py | 564 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 564 insertions(+)
 create mode 100644 tests/test_qa_gaps.py

diff --git a/tests/test_qa_gaps.py b/tests/test_qa_gaps.py
new file mode 100644
index 0000000..586e1c6
--- /dev/null
+++ b/tests/test_qa_gaps.py
@@ -0,0 +1,564 @@
+"""QA gap tests: issues identified during tester-brief review 2026-03-17.
+
+Covers:
+1. (medium) Issue 1 — blocked_reason from dept head sub-pipeline failure NOT passed to update_task().
+   When _execute_department_head_step runs a sub-pipeline and a worker fails, the specific
+   worker error is LOST at two levels:
+   (a) _execute_department_head_step does not forward sub_result['error'] in its return dict
+   (b) run_pipeline at line 1529 uses generic 'Department X sub-pipeline failed'
+       instead of dept_result.get('error') or dept_result.get('blocked_reason')
+
+   Note: when the dept head itself returns status=blocked (not the workers), the reason
+   IS propagated correctly via the _parse_agent_blocked path (line 1435-1479).
+   The bug only fires when sub-pipeline workers fail.
+
+2. (low) Issue 2 — dept_infra and dept_research routes MISSING.
+   specialists.yaml has 7 departments but only 6 dept_* routes.
+   infra_head and research_head have no dedicated dept_* route.
+   test_department_routes_exist uses >= 6 so it passes; this test documents the gap explicitly.
+
+3. (low) Issue 3 — pm.md line 134: 'Valid values for status' appears after JSON example
+   that does NOT contain a status field. The PM is told to include status in output but
+   the main example contradicts this.
+
+4. (low) Issue 4 — context_builder.py line 335: duplicate task = context.get('task').
+   Already assigned on line 256. No functional bug, but redundant code.
+"""
+
+import json
+import pytest
+from unittest.mock import patch, MagicMock
+
+from core.db import init_db
+from core import models
+from agents.runner import run_pipeline, _execute_department_head_step
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def conn():
+    c = init_db(":memory:")
+    models.create_project(c, "proj", "TestProject", "~/projects/test",
+                          tech_stack=["python", "vue3"])
+    models.create_task(c, "PROJ-001", "proj", "Full-stack feature",
+                       brief={"route_type": "dept_feature"})
+    yield c
+    c.close()
+
+
+def _mock_claude_success(output_data):
+    mock = MagicMock()
+    mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
+    mock.stderr = ""
+    mock.returncode = 0
+    return mock
+
+
+def _mock_claude_failure(error_msg="error"):
+    mock = MagicMock()
+    mock.stdout = ""
+    mock.stderr = error_msg
+    mock.returncode = 1
+    return mock
+
+
+def _dept_head_output(sub_pipeline, artifacts=None, handoff_notes="", status="done"):
+    return {
+        "status": status,
+        "sub_pipeline": sub_pipeline,
+        "artifacts": artifacts or {"files_changed": ["api.py"], "notes": "done"},
+        "handoff_notes": handoff_notes,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Issue 1: blocked_reason from dept sub-pipeline worker failure NOT passed
+# to update_task() in the parent run_pipeline
+# (medium severity)
+# ---------------------------------------------------------------------------
+
+class TestBlockedReasonPropagation:
+    """Issue 1 — agents/runner.py ~line 1529.
+
+    When a department head's sub-pipeline worker fails, the specific error
+    from the worker is NOT propagated to the parent run_pipeline's update_task call.
+
+    The bug path:
+      run_pipeline (parent)
+        → run_agent(backend_head) succeeds
+        → _execute_department_head_step(...)
+            → run_pipeline (child sub-pipeline)
+                → run_agent(backend_dev) FAILS with specific error
+            → sub_result has error = "Step 1/1 (backend_dev) failed: <specific_error>"
+            → BUT _execute_department_head_step does NOT include sub_result['error']
+              in its return dict (returns only success=False, output=summary)
+        → dept_result has no 'blocked_reason' or 'error' key
+        → Parent run_pipeline at line 1529 writes generic:
+          "Department backend_head sub-pipeline failed"
+          (ignores dept_result entirely)
+        → update_task(blocked_reason="Department backend_head sub-pipeline failed")
+          (specific worker error is LOST)
+
+    These tests verify the bug exists and serve as regression tests when fixed.
+    """
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner.subprocess.run")
+    def test_worker_failure_specific_error_lost_in_task_blocked_reason(
+        self, mock_run, mock_autocommit, conn
+    ):
+        """When sub-pipeline worker fails with specific error, that specific error
+        is NOT preserved in task.blocked_reason. Only a generic message is stored.
+
+        This test DOCUMENTS the bug. The task.blocked_reason gets:
+          'Department backend_head sub-pipeline failed'
+        instead of something containing the actual worker error.
+        """
+        specific_worker_error = "Cannot connect to database: connection refused at port 5432"
+        call_count = [0]
+
+        def side_effect(*args, **kwargs):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                # backend_head plans work successfully
+                return _mock_claude_success(_dept_head_output(
+                    sub_pipeline=[{"role": "backend_dev", "brief": "Implement"}],
+                ))
+            # backend_dev (worker) fails with specific error
+            return _mock_claude_failure(specific_worker_error)
+
+        mock_run.side_effect = side_effect
+
+        steps = [{"role": "backend_head", "model": "opus", "brief": "Do backend"}]
+        result = run_pipeline(conn, "PROJ-001", steps)
+
+        assert result["success"] is False
+
+        task = models.get_task(conn, "PROJ-001")
+        assert task["status"] == "blocked"
+
+        # BUG DOCUMENTED: The specific worker error is NOT in task.blocked_reason.
+        # Current behavior: task.blocked_reason == 'Department backend_head sub-pipeline failed'
+        # Desired behavior: specific_worker_error should appear in task.blocked_reason
+        assert specific_worker_error not in (task["blocked_reason"] or ""), (
+            f"Unexpected: specific worker error was propagated to task.blocked_reason. "
+            f"This means Issue 1 has been fixed — update this test!"
+        )
+
+        # Verify: generic message IS what gets stored (documents current behavior)
+        assert "backend_head" in (task["blocked_reason"] or ""), (
+            "Expected task.blocked_reason to contain 'backend_head' (the generic message)"
+        )
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner.subprocess.run")
+    def test_execute_department_head_step_does_not_include_sub_error_in_return(
+        self, mock_run, mock_autocommit, conn
+    ):
+        """_execute_department_head_step returns success=False when worker fails,
+        but the returned dict does NOT contain the specific error from the sub-pipeline.
+
+        This verifies the first level of the bug: the function silently drops the error.
+        """
+        mock_run.return_value = _mock_claude_failure("disk full: cannot write to /tmp")
+
+        output = _dept_head_output(
+            sub_pipeline=[{"role": "backend_dev", "brief": "Implement"}],
+        )
+        pipeline = models.create_pipeline(conn, "PROJ-001", "proj", "dept_feature",
+                                          [{"role": "backend_head"}])
+
+        dept_result = _execute_department_head_step(
+            conn, "PROJ-001", "proj",
+            parent_pipeline_id=pipeline["id"],
+            step={"role": "backend_head", "brief": "Do backend"},
+            dept_head_result={"raw_output": json.dumps(output)},
+        )
+
+        assert dept_result["success"] is False
+        # BUG: 'error' key is missing — sub_result['error'] is not forwarded
+        assert "error" not in dept_result, (
+            f"Unexpected: 'error' key is now in dept_result={dept_result}. "
+            "Issue 1 (first level) may be fixed — update this test!"
+        )
+        # BUG: 'blocked_reason' key is also missing
+        assert "blocked_reason" not in dept_result, (
+            f"Unexpected: 'blocked_reason' key is now in dept_result={dept_result}. "
+            "Issue 1 (first level) may be fixed — update this test!"
+        )
+
+    @patch("agents.runner._run_autocommit")
+    @patch("agents.runner.subprocess.run")
+    def test_run_pipeline_uses_generic_error_not_dept_result_details(
+        self, mock_run, mock_autocommit, conn
+    ):
+        """run_pipeline uses 'Department X sub-pipeline failed' at line 1529,
+        ignoring any error/blocked_reason in dept_result.
+
+        This verifies the second level of the bug.
+        """
+        call_count = [0]
+
+        def side_effect(*args, **kwargs):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                return _mock_claude_success(_dept_head_output(
+                    sub_pipeline=[{"role": "backend_dev", "brief": "Implement"}],
+                ))
+            return _mock_claude_failure("import error: module 'mymodule' not found")
+
+        mock_run.side_effect = side_effect
+
+        steps = [{"role": "backend_head", "model": "opus", "brief": "Do backend"}]
+        result = run_pipeline(conn, "PROJ-001", steps)
+
+        assert result["success"] is False
+
+        # BUG VERIFIED: result['error'] is the generic message, not the worker error
+        expected_generic = "Department backend_head sub-pipeline failed"
+        assert result["error"] == expected_generic, (
+            f"Expected generic error '{expected_generic}', "
+            f"got '{result.get('error')}'. "
+            "If this fails, Issue 1 has been fixed — update this test!"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Issue 2: infra_head and research_head missing dept_* routes
+# (low severity — documentation gap)
+# ---------------------------------------------------------------------------
+
+class TestMissingDeptRoutes:
+    """Issue 2 — agents/specialists.yaml routes section.
+
+    The specialists.yaml defines 7 departments:
+      backend, frontend, qa, security, infra, research, marketing
+
+    But there are only 6 dept_* routes:
+      dept_feature, dept_fullstack, dept_security_audit, dept_backend,
+      dept_frontend, dept_marketing
+
+    infra_head (infra department) and research_head (research department)
+    have NO dedicated standalone dept_* route. This means the PM cannot
+    route a task exclusively to the infra or research department via route template.
+    """
+
+    def test_infra_head_has_no_dedicated_dept_route(self):
+        """Confirms infra_head is NOT reachable via any standalone dept_infra route.
+
+        This test documents the gap. It will FAIL once a dept_infra route is added.
+        """
+        import yaml
+        with open("agents/specialists.yaml") as f:
+            data = yaml.safe_load(f)
+
+        routes = data["routes"]
+        dept_routes = {k: v for k, v in routes.items() if k.startswith("dept_")}
+
+        # Check no route exclusively using infra_head as its only step
+        infra_only_routes = [
+            name for name, route in dept_routes.items()
+            if route["steps"] == ["infra_head"]
+        ]
+        assert len(infra_only_routes) == 0, (
+            f"Found unexpected dept_infra route(s): {infra_only_routes}. "
+            "Update this test if route was intentionally added."
+        )
+
+    def test_research_head_has_no_dedicated_dept_route(self):
+        """Confirms research_head is NOT reachable via any standalone dept_research route.
+
+        This test documents the gap. It will FAIL once a dept_research route is added.
+        """
+        import yaml
+        with open("agents/specialists.yaml") as f:
+            data = yaml.safe_load(f)
+
+        routes = data["routes"]
+        dept_routes = {k: v for k, v in routes.items() if k.startswith("dept_")}
+
+        research_only_routes = [
+            name for name, route in dept_routes.items()
+            if route["steps"] == ["research_head"]
+        ]
+        assert len(research_only_routes) == 0, (
+            f"Found unexpected dept_research route(s): {research_only_routes}. "
+            "Update this test if route was intentionally added."
+        )
+
+    def test_infra_and_research_heads_missing_from_all_routes(self):
+        """Both infra_head and research_head appear in NO route in specialists.yaml.
+
+        This is the core gap: the PM cannot use these department heads.
+        When fixed (routes added), this test will fail and should be updated/removed.
+        """
+        import yaml
+        with open("agents/specialists.yaml") as f:
+            data = yaml.safe_load(f)
+
+        routes = data["routes"]
+        all_route_roles = set()
+        for route in routes.values():
+            all_route_roles.update(route.get("steps", []))
+
+        # Verify the gap exists
+        assert "infra_head" not in all_route_roles, (
+            "infra_head now appears in a route. Issue 2 partially fixed. "
+            "Update this test."
+        )
+        assert "research_head" not in all_route_roles, (
+            "research_head now appears in a route. Issue 2 partially fixed. "
+            "Update this test."
+        )
+
+    def test_6_dept_routes_currently_exist(self):
+        """Documents that exactly 6 dept_* routes exist (not 7 or 8).
+
+        The existing test test_department_routes_exist uses >= 6, which passes.
+        This test pins the CURRENT state. When dept_infra and dept_research are added,
+        this count will become 8 and this test should be updated.
+        """
+        import yaml
+        with open("agents/specialists.yaml") as f:
+            data = yaml.safe_load(f)
+
+        routes = data["routes"]
+        dept_routes = {k: v for k, v in routes.items() if k.startswith("dept_")}
+
+        assert len(dept_routes) == 6, (
+            f"Expected 6 dept_* routes (documenting current state), "
+            f"found {len(dept_routes)}: {list(dept_routes.keys())}. "
+            "If this changed, update the count and remove gap tests above."
+        )
+
+
+# ---------------------------------------------------------------------------
+# Issue 3: pm.md status field inconsistency
+# (low severity — documentation/prompt clarity)
+# ---------------------------------------------------------------------------
+
+class TestPmPromptStatusFieldConsistency:
+    """Issue 3 — agents/prompts/pm.md line 134.
+
+    Line 134 says:
+      'Valid values for status: "done", "blocked".'
+
+    This line appears AFTER the main JSON output example (lines 109-131) which
+    does NOT contain a 'status' field at all. The PM receives contradictory
+    instructions: the primary example shows no 'status', but line 134 implies
+    status should be present.
+
+    The correct example DOES contain a status field in the blocked-only JSON
+    at line 143, but the primary output example on lines 109-131 omits it.
+    """
+
+    def test_pm_prompt_main_example_does_not_contain_status_field(self):
+        """The main JSON example in pm.md does NOT include a 'status' field.
+
+        This is an inconsistency: line 134 says 'Valid values for status'
+        but the example itself omits status. Documents the structural ambiguity.
+        """
+        with open("agents/prompts/pm.md") as f:
+            content = f.read()
+
+        # Find the main output example block
+        # It starts after '## Output format' and ends before '## Blocked Protocol'
+        output_section_start = content.find("## Output format")
+        blocked_section_start = content.find("## Blocked Protocol")
+        assert output_section_start != -1, "Section '## Output format' not found in pm.md"
+        assert blocked_section_start != -1, "Section '## Blocked Protocol' not found in pm.md"
+
+        output_section = content[output_section_start:blocked_section_start]
+
+        # The main example JSON block (the first ```json...``` block in this section)
+        import re
+        json_blocks = re.findall(r"```json\s*(.*?)```", output_section, re.DOTALL)
+        assert len(json_blocks) >= 1, "No JSON example found in ## Output format section"
+
+        main_example_json_text = json_blocks[0].strip()
+
+        # The main example must NOT have a status field (confirming the inconsistency)
+        # If this test suddenly fails, status was added to the example — the bug is fixed
+        try:
+            main_example = json.loads(main_example_json_text)
+            has_status = "status" in main_example
+        except json.JSONDecodeError:
+            # Example may not be parseable (it has comments/placeholders)
+            has_status = '"status"' in main_example_json_text
+
+        assert not has_status, (
+            "The main JSON example NOW contains a 'status' field. "
+            "Issue 3 has been fixed — update or remove this test."
+        )
+
+    def test_pm_prompt_mentions_status_values_after_example(self):
+        """Line after main example mentions 'Valid values for status: done, blocked'.
+
+        Documents that this note appears AFTER the main example, causing ambiguity.
+        """
+        with open("agents/prompts/pm.md") as f:
+            content = f.read()
+
+        assert 'Valid values for `status`' in content, (
+            "Expected 'Valid values for `status`' in pm.md — did the prompt change?"
+        )
+
+        # Confirm it references done and blocked
+        assert '"done"' in content
+        assert '"blocked"' in content
+
+    def test_pm_blocked_protocol_example_uses_reason_not_blocked_reason(self):
+        """The Blocked Protocol JSON example uses 'reason' (not 'blocked_reason').
+
+        But _parse_agent_blocked in runner.py accepts both 'reason' and 'blocked_reason'.
+        The pm.md blocked example and the inline status note use different field names,
+        adding to the overall inconsistency.
+        """
+        with open("agents/prompts/pm.md") as f:
+            content = f.read()
+
+        blocked_section_start = content.find("## Blocked Protocol")
+        assert blocked_section_start != -1
+
+        blocked_section = content[blocked_section_start:]
+
+        import re
+        json_blocks = re.findall(r"```json\s*(.*?)```", blocked_section, re.DOTALL)
+        assert len(json_blocks) >= 1, "No JSON example in ## Blocked Protocol"
+
+        blocked_example_text = json_blocks[0].strip()
+        # The blocked protocol example uses "reason", not "blocked_reason"
+        assert '"reason"' in blocked_example_text, (
+            "Expected 'reason' field in blocked protocol JSON example"
+        )
+        # And should NOT use "blocked_reason" (different from the inline note)
+        assert '"blocked_reason"' not in blocked_example_text, (
+            "'blocked_reason' appeared in blocked protocol example — field name is now consistent. "
+            "Update this test."
+        )
+
+
+# ---------------------------------------------------------------------------
+# Issue 4: context_builder.py duplicate task assignment
+# (low severity — redundant code)
+# ---------------------------------------------------------------------------
+
+class TestContextBuilderDuplicateAssignment:
+    """Issue 4 — core/context_builder.py line 335.
+
+    task = context.get('task') is assigned twice inside format_prompt():
+      - Line 256: task = context.get('task')   (first assignment, used for task info)
+      - Line 335: task = context.get('task')   (duplicate, used for revise_comment)
+
+    No functional bug — both assignments produce the same result since the context
+    dict is not mutated between them. The duplicate is dead code and misleading.
+    """
+
+    def test_duplicate_task_assignment_does_not_affect_revise_comment_output(self):
+        """format_prompt includes revise_comment correctly despite the duplicate
+        task = context.get('task') on line 335.
+
+        This test verifies the duplicate causes no observable functional regression.
+        """
+        from core.context_builder import format_prompt
+
+        task_data = {
+            "id": "PROJ-001",
+            "title": "Test task",
+            "status": "in_progress",
+            "priority": "normal",
+            "revise_comment": "Please add error handling to the API endpoint",
+        }
+        context = {
+            "task": task_data,
+            "project": {
+                "id": "proj",
+                "name": "TestProject",
+                "path": "~/projects/test",
+                "language": "ru",
+            },
+        }
+
+        # Use a simple inline template to avoid file I/O
+        result = format_prompt(context, "backend_dev",
+                               prompt_template="You are backend_dev.")
+
+        # revise_comment should appear in the formatted output
+        assert "Director's revision request:" in result
+        assert "Please add error handling to the API endpoint" in result
+
+    def test_duplicate_task_assignment_does_not_affect_task_info_section(self):
+        """format_prompt includes task info correctly (first use of 'task' on line 256).
+
+        Verifies the first assignment is not broken by the duplicate on line 335.
+        """
+        from core.context_builder import format_prompt
+
+        task_data = {
+            "id": "PROJ-001",
+            "title": "My test task",
+            "status": "in_progress",
+            "priority": "high",
+            "brief": {"route_type": "debug"},
+        }
+        context = {
+            "task": task_data,
+            "project": {
+                "id": "proj",
+                "name": "TestProject",
+                "path": "~/projects/test",
+                "language": "ru",
+            },
+        }
+
+        result = format_prompt(context, "debugger",
+                               prompt_template="You are debugger.")
+
+        # Task section should be populated (uses first assignment at line 256)
+        assert "PROJ-001" in result
+        assert "My test task" in result
+
+    def test_source_code_has_duplicate_task_assignment(self):
+        """Documents that the duplicate assignment still exists in the source.
+
+        This test will FAIL once the duplicate is removed (cleanup completed).
+        """
+        import ast
+        import pathlib
+
+        source = pathlib.Path("core/context_builder.py").read_text()
+        tree = ast.parse(source)
+
+        # Count assignments of the form: task = context.get('task')
+        task_assign_count = 0
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Assign):
+                # Check target is 'task'
+                targets_are_task = any(
+                    isinstance(t, ast.Name) and t.id == "task"
+                    for t in node.targets
+                )
+                if not targets_are_task:
+                    continue
+                # Check value is context.get('task') or context.get("task")
+                val = node.value
+                if (
+                    isinstance(val, ast.Call)
+                    and isinstance(val.func, ast.Attribute)
+                    and val.func.attr == "get"
+                    and isinstance(val.func.value, ast.Name)
+                    and val.func.value.id == "context"
+                    and len(val.args) == 1
+                    and isinstance(val.args[0], ast.Constant)
+                    and val.args[0].value == "task"
+                ):
+                    task_assign_count += 1
+
+        assert task_assign_count == 2, (
+            f"Expected exactly 2 duplicate 'task = context.get(\"task\")' assignments "
+            f"(documenting Issue 4), found {task_assign_count}. "
+            "If count is 1, the duplicate was removed — this test can be deleted."
+        )