kin/tests/test_qa_gaps.py

"""QA gap tests: issues identified during tester-brief review 2026-03-17.

Covers:
1. (medium) Issue 1 — blocked_reason from dept head sub-pipeline failure NOT passed to update_task().
   When _execute_department_head_step runs a sub-pipeline and a worker fails, the specific
   worker error is LOST at two levels:
   (a) _execute_department_head_step does not forward sub_result['error'] in its return dict
   (b) run_pipeline at line 1529 uses generic 'Department X sub-pipeline failed'
       instead of dept_result.get('error') or dept_result.get('blocked_reason')

   Note: when the dept head itself returns status=blocked (not the workers), the reason
   IS propagated correctly via the _parse_agent_blocked path (line 1435-1479).
   The bug only fires when sub-pipeline workers fail.

2. (low) Issue 2 — dept_infra and dept_research routes MISSING.
   specialists.yaml has 7 departments but only 6 dept_* routes.
   infra_head and research_head have no dedicated dept_* route.
   test_department_routes_exist uses >= 6 so it passes; this test documents the gap explicitly.

3. (low) Issue 3 — pm.md line 134: 'Valid values for status' appears after JSON example
   that does NOT contain a status field. The PM is told to include status in output but
   the main example contradicts this.

4. (low) Issue 4 — context_builder.py line 335: duplicate task = context.get('task').
   Already assigned on line 256. No functional bug, but redundant code.
"""

import json
import pytest
from unittest.mock import patch, MagicMock

from core.db import init_db
from core import models
from agents.runner import run_pipeline, _execute_department_head_step


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------

@pytest.fixture
def conn():
    c = init_db(":memory:")
    models.create_project(c, "proj", "TestProject", "~/projects/test",
                          tech_stack=["python", "vue3"])
    models.create_task(c, "PROJ-001", "proj", "Full-stack feature",
                       brief={"route_type": "dept_feature"})
    yield c
    c.close()


def _mock_claude_success(output_data):
    mock = MagicMock()
    mock.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
    mock.stderr = ""
    mock.returncode = 0
    return mock


def _mock_claude_failure(error_msg="error"):
    mock = MagicMock()
    mock.stdout = ""
    mock.stderr = error_msg
    mock.returncode = 1
    return mock


def _dept_head_output(sub_pipeline, artifacts=None, handoff_notes="", status="done"):
    return {
        "status": status,
        "sub_pipeline": sub_pipeline,
        "artifacts": artifacts or {"files_changed": ["api.py"], "notes": "done"},
        "handoff_notes": handoff_notes,
    }


# ---------------------------------------------------------------------------
# Issue 1: blocked_reason from dept sub-pipeline worker failure NOT passed
# to update_task() in the parent run_pipeline
# (medium severity)
# ---------------------------------------------------------------------------

class TestBlockedReasonPropagation:
    """Issue 1 — agents/runner.py ~line 1529.

    When a department head's sub-pipeline worker fails, the specific error
    from the worker is NOT propagated to the parent run_pipeline's update_task call.

    The bug path:
      run_pipeline (parent)
        → run_agent(backend_head) succeeds
        → _execute_department_head_step(...)
            → run_pipeline (child sub-pipeline)
                → run_agent(backend_dev) FAILS with specific error
            → sub_result has error = "Step 1/1 (backend_dev) failed: <specific_error>"
            → BUT _execute_department_head_step does NOT include sub_result['error']
              in its return dict (returns only success=False, output=summary)
        → dept_result has no 'blocked_reason' or 'error' key
        → Parent run_pipeline at line 1529 writes generic:
          "Department backend_head sub-pipeline failed"
          (ignores dept_result entirely)
        → update_task(blocked_reason="Department backend_head sub-pipeline failed")
          (specific worker error is LOST)

    These tests verify the bug exists and serve as regression tests when fixed.
    """

    @patch("agents.runner._run_autocommit")
    @patch("agents.runner.subprocess.run")
    def test_worker_failure_specific_error_lost_in_task_blocked_reason(
        self, mock_run, mock_autocommit, conn
    ):
        """When sub-pipeline worker fails with specific error, that specific error
        is NOT preserved in task.blocked_reason. Only a generic message is stored.

        This test DOCUMENTS the bug. The task.blocked_reason gets:
          'Department backend_head sub-pipeline failed'
        instead of something containing the actual worker error.
        """
        specific_worker_error = "Cannot connect to database: connection refused at port 5432"
        call_count = [0]

        def side_effect(*args, **kwargs):
            call_count[0] += 1
            if call_count[0] == 1:
                # backend_head plans work successfully
                return _mock_claude_success(_dept_head_output(
                    sub_pipeline=[{"role": "backend_dev", "brief": "Implement"}],
                ))
            # backend_dev (worker) fails with specific error
            return _mock_claude_failure(specific_worker_error)

        mock_run.side_effect = side_effect

        steps = [{"role": "backend_head", "model": "opus", "brief": "Do backend"}]
        result = run_pipeline(conn, "PROJ-001", steps)

        assert result["success"] is False

        task = models.get_task(conn, "PROJ-001")
        assert task["status"] == "blocked"

        # FIXED (KIN-ARCH-014): specific worker error IS now in task.blocked_reason
        assert specific_worker_error in (task["blocked_reason"] or ""), (
            f"Expected specific worker error in task.blocked_reason, "
            f"got: {task['blocked_reason']!r}"
        )

    @patch("agents.runner._run_autocommit")
    @patch("agents.runner.subprocess.run")
    def test_execute_department_head_step_does_not_include_sub_error_in_return(
        self, mock_run, mock_autocommit, conn
    ):
        """_execute_department_head_step returns success=False when worker fails,
        but the returned dict does NOT contain the specific error from the sub-pipeline.

        This verifies the first level of the bug: the function silently drops the error.
        """
        mock_run.return_value = _mock_claude_failure("disk full: cannot write to /tmp")

        output = _dept_head_output(
            sub_pipeline=[{"role": "backend_dev", "brief": "Implement"}],
        )
        pipeline = models.create_pipeline(conn, "PROJ-001", "proj", "dept_feature",
                                          [{"role": "backend_head"}])

        dept_result = _execute_department_head_step(
            conn, "PROJ-001", "proj",
            parent_pipeline_id=pipeline["id"],
            step={"role": "backend_head", "brief": "Do backend"},
            dept_head_result={"raw_output": json.dumps(output)},
        )

        assert dept_result["success"] is False
        # FIXED (KIN-ARCH-014): 'error' and 'blocked_reason' are now forwarded from sub_result
        assert "error" in dept_result, (
            f"Expected 'error' key in dept_result={dept_result}"
        )
        assert "blocked_reason" in dept_result, (
            f"Expected 'blocked_reason' key in dept_result={dept_result}"
        )

    @patch("agents.runner._run_autocommit")
    @patch("agents.runner.subprocess.run")
    def test_run_pipeline_uses_generic_error_not_dept_result_details(
        self, mock_run, mock_autocommit, conn
    ):
        """run_pipeline uses 'Department X sub-pipeline failed' at line 1529,
        ignoring any error/blocked_reason in dept_result.

        This verifies the second level of the bug.
        """
        call_count = [0]

        def side_effect(*args, **kwargs):
            call_count[0] += 1
            if call_count[0] == 1:
                return _mock_claude_success(_dept_head_output(
                    sub_pipeline=[{"role": "backend_dev", "brief": "Implement"}],
                ))
            return _mock_claude_failure("import error: module 'mymodule' not found")

        mock_run.side_effect = side_effect

        steps = [{"role": "backend_head", "model": "opus", "brief": "Do backend"}]
        result = run_pipeline(conn, "PROJ-001", steps)

        assert result["success"] is False

        # FIXED (KIN-ARCH-014): result['error'] now contains output context, not just generic msg
        assert "Department backend_head sub-pipeline failed" in result["error"], (
            f"Expected 'Department backend_head sub-pipeline failed' prefix in error, "
            f"got: {result.get('error')!r}"
        )


# ---------------------------------------------------------------------------
# Issue 2: dept_infra and dept_research routes — FIXED (KIN-ARCH-021)
# ---------------------------------------------------------------------------

class TestMissingDeptRoutes:
    """Issue 2 — agents/specialists.yaml routes section. FIXED in KIN-ARCH-021.

    The specialists.yaml defines 7 departments:
      backend, frontend, qa, security, infra, research, marketing

    After the fix there are 8 dept_* routes:
      dept_feature, dept_fullstack, dept_security_audit, dept_backend,
      dept_frontend, dept_marketing, dept_infra, dept_research

    These tests are regression tests verifying the routes remain present.
    """

    def test_infra_head_has_dedicated_dept_route(self):
        """dept_infra route exists in specialists.yaml with steps: [infra_head]."""
        import yaml
        with open("agents/specialists.yaml") as f:
            data = yaml.safe_load(f)

        routes = data["routes"]
        dept_routes = {k: v for k, v in routes.items() if k.startswith("dept_")}

        infra_only_routes = [
            name for name, route in dept_routes.items()
            if route["steps"] == ["infra_head"]
        ]
        assert len(infra_only_routes) == 1, (
            f"Expected exactly one dept_infra route with steps=[infra_head], "
            f"found: {infra_only_routes}"
        )
        assert "dept_infra" in infra_only_routes

    def test_research_head_has_dedicated_dept_route(self):
        """dept_research route exists in specialists.yaml with steps: [research_head]."""
        import yaml
        with open("agents/specialists.yaml") as f:
            data = yaml.safe_load(f)

        routes = data["routes"]
        dept_routes = {k: v for k, v in routes.items() if k.startswith("dept_")}

        research_only_routes = [
            name for name, route in dept_routes.items()
            if route["steps"] == ["research_head"]
        ]
        assert len(research_only_routes) == 1, (
            f"Expected exactly one dept_research route with steps=[research_head], "
            f"found: {research_only_routes}"
        )
        assert "dept_research" in research_only_routes

    def test_infra_and_research_heads_present_in_routes(self):
        """Both infra_head and research_head are reachable via routes in specialists.yaml."""
        import yaml
        with open("agents/specialists.yaml") as f:
            data = yaml.safe_load(f)

        routes = data["routes"]
        all_route_roles = set()
        for route in routes.values():
            all_route_roles.update(route.get("steps", []))

        assert "infra_head" in all_route_roles, (
            "infra_head is missing from all routes — regression of KIN-ARCH-021"
        )
        assert "research_head" in all_route_roles, (
            "research_head is missing from all routes — regression of KIN-ARCH-021"
        )

    def test_8_dept_routes_exist(self):
        """Exactly 8 dept_* routes exist after adding dept_infra and dept_research."""
        import yaml
        with open("agents/specialists.yaml") as f:
            data = yaml.safe_load(f)

        routes = data["routes"]
        dept_routes = {k: v for k, v in routes.items() if k.startswith("dept_")}

        assert len(dept_routes) == 8, (
            f"Expected 8 dept_* routes, found {len(dept_routes)}: "
            f"{list(dept_routes.keys())}"
        )


# ---------------------------------------------------------------------------
# Issue 3: pm.md status field inconsistency
# (low severity — documentation/prompt clarity)
# ---------------------------------------------------------------------------

class TestPmPromptStatusFieldConsistency:
    """Issue 3 — agents/prompts/pm.md line 134.

    Line 134 says:
      'Valid values for status: "done", "blocked".'

    This line appears AFTER the main JSON output example (lines 109-131) which
    does NOT contain a 'status' field at all. The PM receives contradictory
    instructions: the primary example shows no 'status', but line 134 implies
    status should be present.

    The correct example DOES contain a status field in the blocked-only JSON
    at line 143, but the primary output example on lines 109-131 omits it.
    """

    def test_pm_prompt_main_example_does_not_contain_status_field(self):
        """The main JSON example in pm.md does NOT include a 'status' field.

        This is an inconsistency: line 134 says 'Valid values for status'
        but the example itself omits status. Documents the structural ambiguity.
        """
        with open("agents/prompts/pm.md") as f:
            content = f.read()

        # Find the main output example block
        # It starts after '## Output format' and ends before '## Blocked Protocol'
        output_section_start = content.find("## Output format")
        blocked_section_start = content.find("## Blocked Protocol")
        assert output_section_start != -1, "Section '## Output format' not found in pm.md"
        assert blocked_section_start != -1, "Section '## Blocked Protocol' not found in pm.md"

        output_section = content[output_section_start:blocked_section_start]

        # The main example JSON block (the first ```json...``` block in this section)
        import re
        json_blocks = re.findall(r"```json\s*(.*?)```", output_section, re.DOTALL)
        assert len(json_blocks) >= 1, "No JSON example found in ## Output format section"

        main_example_json_text = json_blocks[0].strip()

        # The main example must NOT have a status field (confirming the inconsistency)
        # If this test suddenly fails, status was added to the example — the bug is fixed
        try:
            main_example = json.loads(main_example_json_text)
            has_status = "status" in main_example
        except json.JSONDecodeError:
            # Example may not be parseable (it has comments/placeholders)
            has_status = '"status"' in main_example_json_text

        assert not has_status, (
            "The main JSON example NOW contains a 'status' field. "
            "Issue 3 has been fixed — update or remove this test."
        )

    def test_pm_prompt_does_not_mention_status_values_after_example(self):
        """Orphaned 'Valid values for status' line was removed from pm.md.

        Issue 3 fixed: the ambiguous note that appeared after the main JSON example
        and referenced a 'status' field absent from that example has been deleted.
        """
        with open("agents/prompts/pm.md") as f:
            content = f.read()

        assert 'Valid values for `status`' not in content, (
            "Orphaned 'Valid values for `status`' line was re-introduced in pm.md"
        )

    def test_pm_blocked_protocol_example_uses_reason_not_blocked_reason(self):
        """The Blocked Protocol JSON example uses 'reason' (not 'blocked_reason').

        But _parse_agent_blocked in runner.py accepts both 'reason' and 'blocked_reason'.
        The pm.md blocked example and the inline status note use different field names,
        adding to the overall inconsistency.
        """
        with open("agents/prompts/pm.md") as f:
            content = f.read()

        blocked_section_start = content.find("## Blocked Protocol")
        assert blocked_section_start != -1

        blocked_section = content[blocked_section_start:]

        import re
        json_blocks = re.findall(r"```json\s*(.*?)```", blocked_section, re.DOTALL)
        assert len(json_blocks) >= 1, "No JSON example in ## Blocked Protocol"

        blocked_example_text = json_blocks[0].strip()
        # The blocked protocol example uses "reason", not "blocked_reason"
        assert '"reason"' in blocked_example_text, (
            "Expected 'reason' field in blocked protocol JSON example"
        )
        # And should NOT use "blocked_reason" (different from the inline note)
        assert '"blocked_reason"' not in blocked_example_text, (
            "'blocked_reason' appeared in blocked protocol example — field name is now consistent. "
            "Update this test."
        )


# ---------------------------------------------------------------------------
# Issue 4: context_builder.py duplicate task assignment
# (low severity — redundant code)
# ---------------------------------------------------------------------------

class TestContextBuilderDuplicateAssignment:
    """Issue 4 — core/context_builder.py line 335.

    task = context.get('task') is assigned twice inside format_prompt():
      - Line 256: task = context.get('task')   (first assignment, used for task info)
      - Line 335: task = context.get('task')   (duplicate, used for revise_comment)

    No functional bug — both assignments produce the same result since the context
    dict is not mutated between them. The duplicate is dead code and misleading.
    """

    def test_duplicate_task_assignment_does_not_affect_revise_comment_output(self):
        """format_prompt includes revise_comment correctly despite the duplicate
        task = context.get('task') on line 335.

        This test verifies the duplicate causes no observable functional regression.
        """
        from core.context_builder import format_prompt

        task_data = {
            "id": "PROJ-001",
            "title": "Test task",
            "status": "in_progress",
            "priority": "normal",
            "revise_comment": "Please add error handling to the API endpoint",
        }
        context = {
            "task": task_data,
            "project": {
                "id": "proj",
                "name": "TestProject",
                "path": "~/projects/test",
                "language": "ru",
            },
        }

        # Use a simple inline template to avoid file I/O
        result = format_prompt(context, "backend_dev",
                               prompt_template="You are backend_dev.")

        # revise_comment should appear in the formatted output
        assert "Director's revision request:" in result
        assert "Please add error handling to the API endpoint" in result

    def test_duplicate_task_assignment_does_not_affect_task_info_section(self):
        """format_prompt includes task info correctly (first use of 'task' on line 256).

        Verifies the first assignment is not broken by the duplicate on line 335.
        """
        from core.context_builder import format_prompt

        task_data = {
            "id": "PROJ-001",
            "title": "My test task",
            "status": "in_progress",
            "priority": "high",
            "brief": {"route_type": "debug"},
        }
        context = {
            "task": task_data,
            "project": {
                "id": "proj",
                "name": "TestProject",
                "path": "~/projects/test",
                "language": "ru",
            },
        }

        result = format_prompt(context, "debugger",
                               prompt_template="You are debugger.")

        # Task section should be populated (uses first assignment at line 256)
        assert "PROJ-001" in result
        assert "My test task" in result

    def test_source_code_has_single_task_assignment(self):
        """Duplicate assignment was removed from context_builder.py.

        Issue 4 fixed: task = context.get('task') now appears exactly once
        inside format_prompt().
        """
        import ast
        import pathlib

        source = pathlib.Path("core/context_builder.py").read_text()
        tree = ast.parse(source)

        # Count assignments of the form: task = context.get('task')
        task_assign_count = 0
        for node in ast.walk(tree):
            if isinstance(node, ast.Assign):
                targets_are_task = any(
                    isinstance(t, ast.Name) and t.id == "task"
                    for t in node.targets
                )
                if not targets_are_task:
                    continue
                val = node.value
                if (
                    isinstance(val, ast.Call)
                    and isinstance(val.func, ast.Attribute)
                    and val.func.attr == "get"
                    and isinstance(val.func.value, ast.Name)
                    and val.func.value.id == "context"
                    and len(val.args) == 1
                    and isinstance(val.args[0], ast.Constant)
                    and val.args[0].value == "task"
                ):
                    task_assign_count += 1

        assert task_assign_count == 1, (
            f"Expected exactly 1 'task = context.get(\"task\")' assignment, "
            f"found {task_assign_count}. Duplicate may have been re-introduced."
        )