kin/tests/test_kin_133_gate_cannot_close.py
2026-03-19 15:50:52 +02:00

309 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Tests for KIN-133: Gate cannot_close — when final gate agent (reviewer/tester)
rejects, task moves to Blocked instead of Done.
Covers:
1. reviewer verdict='changes_requested' → task blocked in auto_complete mode
2. reviewer verdict='revise' → task blocked in auto_complete mode
3. tester status='failed' → task blocked in auto_complete mode
4. reviewer verdict='approved' → task done in auto_complete mode (happy path)
5. tester status='passed' → task done in auto_complete mode (happy path)
6. _parse_gate_cannot_close unit tests (fail-open for unknown formats)
7. _find_gate_result unit tests (last successful, reverse order)
"""
import json
import pytest
from unittest.mock import patch, MagicMock
from core.db import init_db
from core import models
from agents.runner import (
run_pipeline,
_parse_gate_cannot_close,
_find_gate_result,
)
# ---------------------------------------------------------------------------
# Fixtures & helpers
# ---------------------------------------------------------------------------
@pytest.fixture
def conn():
c = init_db(":memory:")
models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"])
models.create_task(c, "P1-001", "p1", "Implement feature",
brief={"route_type": "feature"})
models.update_task(c, "P1-001", execution_mode="auto_complete")
yield c
c.close()
def _mock_subprocess(agent_output: dict) -> MagicMock:
"""Build subprocess.run mock that returns agent_output as parsed JSON.
The subprocess stdout wraps agent output in {"result": "<json string>"}
which is how claude --output-format json structures its response.
"""
m = MagicMock()
m.stdout = json.dumps({"result": json.dumps(agent_output, ensure_ascii=False)})
m.stderr = ""
m.returncode = 0
return m
def _mock_reviewer(verdict: str, reason: str = "Review found issues") -> MagicMock:
return _mock_subprocess({"verdict": verdict, "reason": reason, "findings": []})
def _mock_tester(status: str, reason: str = "Tests failed") -> MagicMock:
return _mock_subprocess({"status": status, "reason": reason, "tests_passed": 0, "tests_failed": 1})
# ---------------------------------------------------------------------------
# Integration tests: run_pipeline gate checks
# ---------------------------------------------------------------------------
class TestGateCannotCloseIntegration:
"""KIN-133: Final gate agent rejection blocks task instead of closing it."""
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_reviewer_changes_requested_blocks_task_in_auto_complete(
self, mock_run, mock_hooks, mock_followup, conn
):
"""reviewer verdict='changes_requested' → task blocked, not done."""
mock_run.return_value = _mock_reviewer("changes_requested")
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
steps = [{"role": "backend_dev", "brief": "implement"},
{"role": "reviewer", "brief": "review"}]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is False
assert result.get("blocked_by") == "reviewer"
task = models.get_task(conn, "P1-001")
assert task["status"] == "blocked", (
"reviewer verdict='changes_requested' должен блокировать задачу в auto_complete"
)
assert task.get("blocked_agent_role") == "reviewer"
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_reviewer_revise_blocks_task_in_auto_complete(
self, mock_run, mock_hooks, mock_followup, conn
):
"""reviewer verdict='revise' → task blocked, not done."""
mock_run.return_value = _mock_reviewer("revise", reason="Needs rework")
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
steps = [{"role": "reviewer", "brief": "review"}]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is False
task = models.get_task(conn, "P1-001")
assert task["status"] == "blocked"
assert "Needs rework" in (task.get("blocked_reason") or "")
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_tester_failed_blocks_task_in_auto_complete(
self, mock_run, mock_hooks, mock_followup, conn
):
"""tester status='failed' → task blocked, not done."""
mock_run.return_value = _mock_tester("failed", reason="3 assertions failed")
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
steps = [{"role": "tester", "brief": "run tests"}]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is False
assert result.get("blocked_by") == "tester"
task = models.get_task(conn, "P1-001")
assert task["status"] == "blocked", (
"tester status='failed' должен блокировать задачу в auto_complete"
)
assert task.get("blocked_agent_role") == "tester"
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_reviewer_approved_closes_task_in_auto_complete(
self, mock_run, mock_hooks, mock_followup, conn
):
"""reviewer verdict='approved' → task done (happy path)."""
mock_run.return_value = _mock_reviewer("approved", reason="")
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
steps = [{"role": "reviewer", "brief": "review"}]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is True
task = models.get_task(conn, "P1-001")
assert task["status"] == "done", (
"reviewer verdict='approved' должен закрывать задачу в auto_complete"
)
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_tester_passed_closes_task_in_auto_complete(
self, mock_run, mock_hooks, mock_followup, conn
):
"""tester status='passed' → task done (happy path)."""
mock_run.return_value = _mock_tester("passed", reason="")
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
steps = [{"role": "tester", "brief": "run tests"}]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is True
task = models.get_task(conn, "P1-001")
assert task["status"] == "done", (
"tester status='passed' должен закрывать задачу в auto_complete"
)
@patch("core.followup.generate_followups")
@patch("agents.runner.run_hooks")
@patch("agents.runner.subprocess.run")
def test_gate_check_only_in_auto_complete_not_review_mode(
self, mock_run, mock_hooks, mock_followup, conn
):
"""В review-режиме gate check не применяется — задача уходит на ручной approve."""
mock_run.return_value = _mock_reviewer("changes_requested")
mock_hooks.return_value = []
mock_followup.return_value = {"created": [], "pending_actions": []}
models.update_task(conn, "P1-001", execution_mode="review")
steps = [{"role": "reviewer", "brief": "review"}]
result = run_pipeline(conn, "P1-001", steps)
# In review mode pipeline succeeds (waits for manual approve)
assert result["success"] is True
task = models.get_task(conn, "P1-001")
assert task["status"] == "review", (
"В review-режиме задача должна уйти на ручной approve, не blocked"
)
# ---------------------------------------------------------------------------
# Unit tests: _parse_gate_cannot_close
# ---------------------------------------------------------------------------
class TestParseGateCannotClose:
"""Unit tests for _parse_gate_cannot_close fail-open logic."""
def _make_result(self, output):
return {"success": True, "output": output, "role": "reviewer"}
# Reviewer tests
def test_reviewer_approved_returns_none(self):
r = self._make_result({"verdict": "approved", "reason": ""})
assert _parse_gate_cannot_close(r, "reviewer") is None
def test_reviewer_changes_requested_returns_reason(self):
r = self._make_result({"verdict": "changes_requested", "reason": "Missing tests"})
result = _parse_gate_cannot_close(r, "reviewer")
assert result is not None
assert "Missing tests" in result["reason"]
def test_reviewer_revise_returns_reason(self):
r = self._make_result({"verdict": "revise", "reason": "Needs rework"})
result = _parse_gate_cannot_close(r, "reviewer")
assert result is not None
assert "Needs rework" in result["reason"]
def test_reviewer_no_verdict_fails_open(self):
"""Если verdict отсутствует — fail-open, не блокировать."""
r = self._make_result({"summary": "looks ok"})
assert _parse_gate_cannot_close(r, "reviewer") is None
def test_reviewer_non_dict_output_fails_open(self):
"""Если output не dict — fail-open."""
r = self._make_result("approved")
assert _parse_gate_cannot_close(r, "reviewer") is None
# Tester tests
def test_tester_passed_returns_none(self):
r = self._make_result({"status": "passed", "tests_passed": 5})
assert _parse_gate_cannot_close(r, "tester") is None
def test_tester_failed_returns_reason(self):
r = self._make_result({"status": "failed", "reason": "3 assertions failed"})
result = _parse_gate_cannot_close(r, "tester")
assert result is not None
assert "3 assertions failed" in result["reason"]
def test_tester_no_status_fails_open(self):
"""Если status отсутствует — fail-open."""
r = self._make_result({"tests_passed": 3})
assert _parse_gate_cannot_close(r, "tester") is None
def test_tester_non_dict_output_fails_open(self):
r = self._make_result("passed")
assert _parse_gate_cannot_close(r, "tester") is None
def test_unknown_role_fails_open(self):
"""Неизвестная роль — fail-open, не блокировать."""
r = self._make_result({"verdict": "rejected"})
assert _parse_gate_cannot_close(r, "smoke_tester") is None
def test_reviewer_reason_fallback_to_verdict(self):
"""Если reason пустой — fallback to 'Reviewer verdict: <verdict>'."""
r = self._make_result({"verdict": "changes_requested"})
result = _parse_gate_cannot_close(r, "reviewer")
assert result is not None
assert result["reason"] # not empty
# ---------------------------------------------------------------------------
# Unit tests: _find_gate_result
# ---------------------------------------------------------------------------
class TestFindGateResult:
"""Unit tests for _find_gate_result reverse-iteration logic."""
def test_returns_last_successful_result_for_role(self):
results = [
{"role": "reviewer", "success": True, "output": {"verdict": "changes_requested"}},
{"role": "backend_dev", "success": True, "output": {"status": "done"}},
{"role": "reviewer", "success": True, "output": {"verdict": "approved"}},
]
r = _find_gate_result(results, "reviewer")
assert r is not None
assert r["output"]["verdict"] == "approved" # последний
def test_skips_failed_results(self):
results = [
{"role": "reviewer", "success": False, "output": {"verdict": "approved"}},
]
assert _find_gate_result(results, "reviewer") is None
def test_returns_none_when_role_absent(self):
results = [
{"role": "tester", "success": True, "output": {"status": "passed"}},
]
assert _find_gate_result(results, "reviewer") is None
def test_returns_none_for_empty_results(self):
assert _find_gate_result([], "reviewer") is None
def test_auto_fix_loop_returns_last_successful_tester(self):
"""После auto_fix loop: возвращает последнюю успешную попытку tester."""
results = [
{"role": "tester", "success": True, "output": {"status": "failed"}, "_auto_fix_attempt": 1},
{"role": "backend_dev", "success": True, "output": {}, "_auto_fix_attempt": 1},
{"role": "tester", "success": True, "output": {"status": "passed"}, "_auto_retest_attempt": 1},
]
r = _find_gate_result(results, "tester")
assert r is not None
assert r["output"]["status"] == "passed"