310 lines
13 KiB
Python
310 lines
13 KiB
Python
|
|
"""
|
|||
|
|
Tests for KIN-133: Gate cannot_close — when final gate agent (reviewer/tester)
|
|||
|
|
rejects, task moves to Blocked instead of Done.
|
|||
|
|
|
|||
|
|
Covers:
|
|||
|
|
1. reviewer verdict='changes_requested' → task blocked in auto_complete mode
|
|||
|
|
2. reviewer verdict='revise' → task blocked in auto_complete mode
|
|||
|
|
3. tester status='failed' → task blocked in auto_complete mode
|
|||
|
|
4. reviewer verdict='approved' → task done in auto_complete mode (happy path)
|
|||
|
|
5. tester status='passed' → task done in auto_complete mode (happy path)
|
|||
|
|
6. _parse_gate_cannot_close unit tests (fail-open for unknown formats)
|
|||
|
|
7. _find_gate_result unit tests (last successful, reverse order)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import pytest
|
|||
|
|
from unittest.mock import patch, MagicMock
|
|||
|
|
|
|||
|
|
from core.db import init_db
|
|||
|
|
from core import models
|
|||
|
|
from agents.runner import (
|
|||
|
|
run_pipeline,
|
|||
|
|
_parse_gate_cannot_close,
|
|||
|
|
_find_gate_result,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Fixtures & helpers
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def conn():
|
|||
|
|
c = init_db(":memory:")
|
|||
|
|
models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"])
|
|||
|
|
models.create_task(c, "P1-001", "p1", "Implement feature",
|
|||
|
|
brief={"route_type": "feature"})
|
|||
|
|
models.update_task(c, "P1-001", execution_mode="auto_complete")
|
|||
|
|
yield c
|
|||
|
|
c.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _mock_subprocess(agent_output: dict) -> MagicMock:
|
|||
|
|
"""Build subprocess.run mock that returns agent_output as parsed JSON.
|
|||
|
|
|
|||
|
|
The subprocess stdout wraps agent output in {"result": "<json string>"}
|
|||
|
|
which is how claude --output-format json structures its response.
|
|||
|
|
"""
|
|||
|
|
m = MagicMock()
|
|||
|
|
m.stdout = json.dumps({"result": json.dumps(agent_output, ensure_ascii=False)})
|
|||
|
|
m.stderr = ""
|
|||
|
|
m.returncode = 0
|
|||
|
|
return m
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _mock_reviewer(verdict: str, reason: str = "Review found issues") -> MagicMock:
|
|||
|
|
return _mock_subprocess({"verdict": verdict, "reason": reason, "findings": []})
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _mock_tester(status: str, reason: str = "Tests failed") -> MagicMock:
|
|||
|
|
return _mock_subprocess({"status": status, "reason": reason, "tests_passed": 0, "tests_failed": 1})
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Integration tests: run_pipeline gate checks
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
class TestGateCannotCloseIntegration:
|
|||
|
|
"""KIN-133: Final gate agent rejection blocks task instead of closing it."""
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_reviewer_changes_requested_blocks_task_in_auto_complete(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""reviewer verdict='changes_requested' → task blocked, not done."""
|
|||
|
|
mock_run.return_value = _mock_reviewer("changes_requested")
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
steps = [{"role": "backend_dev", "brief": "implement"},
|
|||
|
|
{"role": "reviewer", "brief": "review"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result["success"] is False
|
|||
|
|
assert result.get("blocked_by") == "reviewer"
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "blocked", (
|
|||
|
|
"reviewer verdict='changes_requested' должен блокировать задачу в auto_complete"
|
|||
|
|
)
|
|||
|
|
assert task.get("blocked_agent_role") == "reviewer"
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_reviewer_revise_blocks_task_in_auto_complete(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""reviewer verdict='revise' → task blocked, not done."""
|
|||
|
|
mock_run.return_value = _mock_reviewer("revise", reason="Needs rework")
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
steps = [{"role": "reviewer", "brief": "review"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result["success"] is False
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "blocked"
|
|||
|
|
assert "Needs rework" in (task.get("blocked_reason") or "")
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_tester_failed_blocks_task_in_auto_complete(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""tester status='failed' → task blocked, not done."""
|
|||
|
|
mock_run.return_value = _mock_tester("failed", reason="3 assertions failed")
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
steps = [{"role": "tester", "brief": "run tests"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result["success"] is False
|
|||
|
|
assert result.get("blocked_by") == "tester"
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "blocked", (
|
|||
|
|
"tester status='failed' должен блокировать задачу в auto_complete"
|
|||
|
|
)
|
|||
|
|
assert task.get("blocked_agent_role") == "tester"
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_reviewer_approved_closes_task_in_auto_complete(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""reviewer verdict='approved' → task done (happy path)."""
|
|||
|
|
mock_run.return_value = _mock_reviewer("approved", reason="")
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
steps = [{"role": "reviewer", "brief": "review"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "done", (
|
|||
|
|
"reviewer verdict='approved' должен закрывать задачу в auto_complete"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_tester_passed_closes_task_in_auto_complete(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""tester status='passed' → task done (happy path)."""
|
|||
|
|
mock_run.return_value = _mock_tester("passed", reason="")
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
steps = [{"role": "tester", "brief": "run tests"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "done", (
|
|||
|
|
"tester status='passed' должен закрывать задачу в auto_complete"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_gate_check_only_in_auto_complete_not_review_mode(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""В review-режиме gate check не применяется — задача уходит на ручной approve."""
|
|||
|
|
mock_run.return_value = _mock_reviewer("changes_requested")
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
models.update_task(conn, "P1-001", execution_mode="review")
|
|||
|
|
steps = [{"role": "reviewer", "brief": "review"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
# In review mode pipeline succeeds (waits for manual approve)
|
|||
|
|
assert result["success"] is True
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "review", (
|
|||
|
|
"В review-режиме задача должна уйти на ручной approve, не blocked"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Unit tests: _parse_gate_cannot_close
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
class TestParseGateCannotClose:
|
|||
|
|
"""Unit tests for _parse_gate_cannot_close fail-open logic."""
|
|||
|
|
|
|||
|
|
def _make_result(self, output):
|
|||
|
|
return {"success": True, "output": output, "role": "reviewer"}
|
|||
|
|
|
|||
|
|
# Reviewer tests
|
|||
|
|
def test_reviewer_approved_returns_none(self):
|
|||
|
|
r = self._make_result({"verdict": "approved", "reason": ""})
|
|||
|
|
assert _parse_gate_cannot_close(r, "reviewer") is None
|
|||
|
|
|
|||
|
|
def test_reviewer_changes_requested_returns_reason(self):
|
|||
|
|
r = self._make_result({"verdict": "changes_requested", "reason": "Missing tests"})
|
|||
|
|
result = _parse_gate_cannot_close(r, "reviewer")
|
|||
|
|
assert result is not None
|
|||
|
|
assert "Missing tests" in result["reason"]
|
|||
|
|
|
|||
|
|
def test_reviewer_revise_returns_reason(self):
|
|||
|
|
r = self._make_result({"verdict": "revise", "reason": "Needs rework"})
|
|||
|
|
result = _parse_gate_cannot_close(r, "reviewer")
|
|||
|
|
assert result is not None
|
|||
|
|
assert "Needs rework" in result["reason"]
|
|||
|
|
|
|||
|
|
def test_reviewer_no_verdict_fails_open(self):
|
|||
|
|
"""Если verdict отсутствует — fail-open, не блокировать."""
|
|||
|
|
r = self._make_result({"summary": "looks ok"})
|
|||
|
|
assert _parse_gate_cannot_close(r, "reviewer") is None
|
|||
|
|
|
|||
|
|
def test_reviewer_non_dict_output_fails_open(self):
|
|||
|
|
"""Если output не dict — fail-open."""
|
|||
|
|
r = self._make_result("approved")
|
|||
|
|
assert _parse_gate_cannot_close(r, "reviewer") is None
|
|||
|
|
|
|||
|
|
# Tester tests
|
|||
|
|
def test_tester_passed_returns_none(self):
|
|||
|
|
r = self._make_result({"status": "passed", "tests_passed": 5})
|
|||
|
|
assert _parse_gate_cannot_close(r, "tester") is None
|
|||
|
|
|
|||
|
|
def test_tester_failed_returns_reason(self):
|
|||
|
|
r = self._make_result({"status": "failed", "reason": "3 assertions failed"})
|
|||
|
|
result = _parse_gate_cannot_close(r, "tester")
|
|||
|
|
assert result is not None
|
|||
|
|
assert "3 assertions failed" in result["reason"]
|
|||
|
|
|
|||
|
|
def test_tester_no_status_fails_open(self):
|
|||
|
|
"""Если status отсутствует — fail-open."""
|
|||
|
|
r = self._make_result({"tests_passed": 3})
|
|||
|
|
assert _parse_gate_cannot_close(r, "tester") is None
|
|||
|
|
|
|||
|
|
def test_tester_non_dict_output_fails_open(self):
|
|||
|
|
r = self._make_result("passed")
|
|||
|
|
assert _parse_gate_cannot_close(r, "tester") is None
|
|||
|
|
|
|||
|
|
def test_unknown_role_fails_open(self):
|
|||
|
|
"""Неизвестная роль — fail-open, не блокировать."""
|
|||
|
|
r = self._make_result({"verdict": "rejected"})
|
|||
|
|
assert _parse_gate_cannot_close(r, "smoke_tester") is None
|
|||
|
|
|
|||
|
|
def test_reviewer_reason_fallback_to_verdict(self):
|
|||
|
|
"""Если reason пустой — fallback to 'Reviewer verdict: <verdict>'."""
|
|||
|
|
r = self._make_result({"verdict": "changes_requested"})
|
|||
|
|
result = _parse_gate_cannot_close(r, "reviewer")
|
|||
|
|
assert result is not None
|
|||
|
|
assert result["reason"] # not empty
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Unit tests: _find_gate_result
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
class TestFindGateResult:
|
|||
|
|
"""Unit tests for _find_gate_result reverse-iteration logic."""
|
|||
|
|
|
|||
|
|
def test_returns_last_successful_result_for_role(self):
|
|||
|
|
results = [
|
|||
|
|
{"role": "reviewer", "success": True, "output": {"verdict": "changes_requested"}},
|
|||
|
|
{"role": "backend_dev", "success": True, "output": {"status": "done"}},
|
|||
|
|
{"role": "reviewer", "success": True, "output": {"verdict": "approved"}},
|
|||
|
|
]
|
|||
|
|
r = _find_gate_result(results, "reviewer")
|
|||
|
|
assert r is not None
|
|||
|
|
assert r["output"]["verdict"] == "approved" # последний
|
|||
|
|
|
|||
|
|
def test_skips_failed_results(self):
|
|||
|
|
results = [
|
|||
|
|
{"role": "reviewer", "success": False, "output": {"verdict": "approved"}},
|
|||
|
|
]
|
|||
|
|
assert _find_gate_result(results, "reviewer") is None
|
|||
|
|
|
|||
|
|
def test_returns_none_when_role_absent(self):
|
|||
|
|
results = [
|
|||
|
|
{"role": "tester", "success": True, "output": {"status": "passed"}},
|
|||
|
|
]
|
|||
|
|
assert _find_gate_result(results, "reviewer") is None
|
|||
|
|
|
|||
|
|
def test_returns_none_for_empty_results(self):
|
|||
|
|
assert _find_gate_result([], "reviewer") is None
|
|||
|
|
|
|||
|
|
def test_auto_fix_loop_returns_last_successful_tester(self):
|
|||
|
|
"""После auto_fix loop: возвращает последнюю успешную попытку tester."""
|
|||
|
|
results = [
|
|||
|
|
{"role": "tester", "success": True, "output": {"status": "failed"}, "_auto_fix_attempt": 1},
|
|||
|
|
{"role": "backend_dev", "success": True, "output": {}, "_auto_fix_attempt": 1},
|
|||
|
|
{"role": "tester", "success": True, "output": {"status": "passed"}, "_auto_retest_attempt": 1},
|
|||
|
|
]
|
|||
|
|
r = _find_gate_result(results, "tester")
|
|||
|
|
assert r is not None
|
|||
|
|
assert r["output"]["status"] == "passed"
|