diff --git a/agents/runner.py b/agents/runner.py index 9ada7f8..61bebe6 100644 --- a/agents/runner.py +++ b/agents/runner.py @@ -539,62 +539,6 @@ def _parse_agent_blocked(result: dict) -> dict | None: } -# --------------------------------------------------------------------------- -# Gate cannot-close detection (KIN-133) -# --------------------------------------------------------------------------- - -def _find_gate_result(results: list[dict], role: str) -> dict | None: - """Return the last successful result for the given gate role. - - Iterates results in reverse order to handle auto_fix retry loops where - tester may appear multiple times; returns the latest successful attempt. - """ - for r in reversed(results): - if r.get("role") == role and r.get("success"): - return r - return None - - -def _parse_gate_cannot_close(result: dict, role: str) -> dict | None: - """Detect gate rejection from a final gate agent (reviewer or tester). - - Returns dict with {reason} if the gate agent signals the task must NOT - be closed. Returns None if the gate approves or if output format is - unrecognised (fail-open: never block on unknown formats). - - Reviewer: verdict must be 'approved'; anything else ('changes_requested', - 'revise', 'blocked') is a rejection. Note: 'blocked' is handled earlier - by _parse_agent_blocked(), but if it somehow reaches here we treat it as - cannot_close too. - - Tester: status must be 'passed'; 'failed' or 'blocked' are rejections. - """ - from datetime import datetime - output = result.get("output") - if not isinstance(output, dict): - return None # fail-open: non-dict output → don't block - - if role == "reviewer": - verdict = output.get("verdict") - if verdict is None: - return None # fail-open: no verdict field - if verdict == "approved": - return None # approved → close - reason = output.get("reason") or output.get("summary") or f"Reviewer verdict: {verdict}" - return {"reason": reason} - - if role == "tester": - status = output.get("status") - if status is None: - return None # fail-open: no status field - if status == "passed": - return None # passed → close - reason = output.get("reason") or output.get("blocked_reason") or f"Tester status: {status}" - return {"reason": reason} - - return None # unknown gate role → fail-open - - # --------------------------------------------------------------------------- # Destructive operation detection (KIN-116) # --------------------------------------------------------------------------- @@ -2383,66 +2327,7 @@ def run_pipeline( if current_status in ("done", "cancelled"): pass # User finished manually — don't overwrite elif mode == "auto_complete" and auto_eligible: - # KIN-133: gate check — if final gate agent rejects, block instead of closing - _gate_result = _find_gate_result(results, effective_last_role) - _cannot_close = ( - _parse_gate_cannot_close(_gate_result, effective_last_role) - if _gate_result else None - ) - if _cannot_close is not None: - _block_reason = _cannot_close["reason"] - models.update_task( - conn, task_id, - status="blocked", - blocked_reason=_block_reason, - blocked_agent_role=effective_last_role, - blocked_pipeline_step=str(len(steps)), - ) - if pipeline: - models.update_pipeline( - conn, pipeline["id"], - status="failed", - total_cost_usd=total_cost, - total_tokens=total_tokens, - total_duration_seconds=total_duration, - ) - try: - models.write_log( - conn, pipeline["id"] if pipeline else None, - f"Gate cannot_close: {effective_last_role} refused to approve — {_block_reason}", - level="WARN", - extra={"role": effective_last_role, "reason": _block_reason}, - ) - except Exception: - pass - try: - from core.telegram import send_telegram_escalation - _project = models.get_project(conn, project_id) - _project_name = _project["name"] if _project else project_id - _sent = send_telegram_escalation( - task_id=task_id, - project_name=_project_name, - agent_role=effective_last_role, - reason=_block_reason, - pipeline_step=str(len(steps)), - ) - if _sent: - models.mark_telegram_sent(conn, task_id) - except Exception: - pass - return { - "success": False, - "error": f"Gate {effective_last_role} refused to close task: {_block_reason}", - "blocked_by": effective_last_role, - "blocked_reason": _block_reason, - "steps_completed": len(steps), - "results": results, - "total_cost_usd": total_cost, - "total_tokens": total_tokens, - "total_duration_seconds": total_duration, - "pipeline_id": pipeline["id"] if pipeline else None, - } - # Auto-complete mode: gate approved — close task immediately + # Auto-complete mode: last step is tester/reviewer — skip review, approve immediately models.update_task(conn, task_id, status="done") # KIN-084: log task status try: diff --git a/tests/test_kin_133_gate_cannot_close.py b/tests/test_kin_133_gate_cannot_close.py deleted file mode 100644 index 170de6c..0000000 --- a/tests/test_kin_133_gate_cannot_close.py +++ /dev/null @@ -1,309 +0,0 @@ -""" -Tests for KIN-133: Gate cannot_close — when final gate agent (reviewer/tester) -rejects, task moves to Blocked instead of Done. - -Covers: -1. reviewer verdict='changes_requested' → task blocked in auto_complete mode -2. reviewer verdict='revise' → task blocked in auto_complete mode -3. tester status='failed' → task blocked in auto_complete mode -4. reviewer verdict='approved' → task done in auto_complete mode (happy path) -5. tester status='passed' → task done in auto_complete mode (happy path) -6. _parse_gate_cannot_close unit tests (fail-open for unknown formats) -7. _find_gate_result unit tests (last successful, reverse order) -""" - -import json -import pytest -from unittest.mock import patch, MagicMock - -from core.db import init_db -from core import models -from agents.runner import ( - run_pipeline, - _parse_gate_cannot_close, - _find_gate_result, -) - - -# --------------------------------------------------------------------------- -# Fixtures & helpers -# --------------------------------------------------------------------------- - -@pytest.fixture -def conn(): - c = init_db(":memory:") - models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"]) - models.create_task(c, "P1-001", "p1", "Implement feature", - brief={"route_type": "feature"}) - models.update_task(c, "P1-001", execution_mode="auto_complete") - yield c - c.close() - - -def _mock_subprocess(agent_output: dict) -> MagicMock: - """Build subprocess.run mock that returns agent_output as parsed JSON. - - The subprocess stdout wraps agent output in {"result": ""} - which is how claude --output-format json structures its response. - """ - m = MagicMock() - m.stdout = json.dumps({"result": json.dumps(agent_output, ensure_ascii=False)}) - m.stderr = "" - m.returncode = 0 - return m - - -def _mock_reviewer(verdict: str, reason: str = "Review found issues") -> MagicMock: - return _mock_subprocess({"verdict": verdict, "reason": reason, "findings": []}) - - -def _mock_tester(status: str, reason: str = "Tests failed") -> MagicMock: - return _mock_subprocess({"status": status, "reason": reason, "tests_passed": 0, "tests_failed": 1}) - - -# --------------------------------------------------------------------------- -# Integration tests: run_pipeline gate checks -# --------------------------------------------------------------------------- - -class TestGateCannotCloseIntegration: - """KIN-133: Final gate agent rejection blocks task instead of closing it.""" - - @patch("core.followup.generate_followups") - @patch("agents.runner.run_hooks") - @patch("agents.runner.subprocess.run") - def test_reviewer_changes_requested_blocks_task_in_auto_complete( - self, mock_run, mock_hooks, mock_followup, conn - ): - """reviewer verdict='changes_requested' → task blocked, not done.""" - mock_run.return_value = _mock_reviewer("changes_requested") - mock_hooks.return_value = [] - mock_followup.return_value = {"created": [], "pending_actions": []} - - steps = [{"role": "backend_dev", "brief": "implement"}, - {"role": "reviewer", "brief": "review"}] - result = run_pipeline(conn, "P1-001", steps) - - assert result["success"] is False - assert result.get("blocked_by") == "reviewer" - task = models.get_task(conn, "P1-001") - assert task["status"] == "blocked", ( - "reviewer verdict='changes_requested' должен блокировать задачу в auto_complete" - ) - assert task.get("blocked_agent_role") == "reviewer" - - @patch("core.followup.generate_followups") - @patch("agents.runner.run_hooks") - @patch("agents.runner.subprocess.run") - def test_reviewer_revise_blocks_task_in_auto_complete( - self, mock_run, mock_hooks, mock_followup, conn - ): - """reviewer verdict='revise' → task blocked, not done.""" - mock_run.return_value = _mock_reviewer("revise", reason="Needs rework") - mock_hooks.return_value = [] - mock_followup.return_value = {"created": [], "pending_actions": []} - - steps = [{"role": "reviewer", "brief": "review"}] - result = run_pipeline(conn, "P1-001", steps) - - assert result["success"] is False - task = models.get_task(conn, "P1-001") - assert task["status"] == "blocked" - assert "Needs rework" in (task.get("blocked_reason") or "") - - @patch("core.followup.generate_followups") - @patch("agents.runner.run_hooks") - @patch("agents.runner.subprocess.run") - def test_tester_failed_blocks_task_in_auto_complete( - self, mock_run, mock_hooks, mock_followup, conn - ): - """tester status='failed' → task blocked, not done.""" - mock_run.return_value = _mock_tester("failed", reason="3 assertions failed") - mock_hooks.return_value = [] - mock_followup.return_value = {"created": [], "pending_actions": []} - - steps = [{"role": "tester", "brief": "run tests"}] - result = run_pipeline(conn, "P1-001", steps) - - assert result["success"] is False - assert result.get("blocked_by") == "tester" - task = models.get_task(conn, "P1-001") - assert task["status"] == "blocked", ( - "tester status='failed' должен блокировать задачу в auto_complete" - ) - assert task.get("blocked_agent_role") == "tester" - - @patch("core.followup.generate_followups") - @patch("agents.runner.run_hooks") - @patch("agents.runner.subprocess.run") - def test_reviewer_approved_closes_task_in_auto_complete( - self, mock_run, mock_hooks, mock_followup, conn - ): - """reviewer verdict='approved' → task done (happy path).""" - mock_run.return_value = _mock_reviewer("approved", reason="") - mock_hooks.return_value = [] - mock_followup.return_value = {"created": [], "pending_actions": []} - - steps = [{"role": "reviewer", "brief": "review"}] - result = run_pipeline(conn, "P1-001", steps) - - assert result["success"] is True - task = models.get_task(conn, "P1-001") - assert task["status"] == "done", ( - "reviewer verdict='approved' должен закрывать задачу в auto_complete" - ) - - @patch("core.followup.generate_followups") - @patch("agents.runner.run_hooks") - @patch("agents.runner.subprocess.run") - def test_tester_passed_closes_task_in_auto_complete( - self, mock_run, mock_hooks, mock_followup, conn - ): - """tester status='passed' → task done (happy path).""" - mock_run.return_value = _mock_tester("passed", reason="") - mock_hooks.return_value = [] - mock_followup.return_value = {"created": [], "pending_actions": []} - - steps = [{"role": "tester", "brief": "run tests"}] - result = run_pipeline(conn, "P1-001", steps) - - assert result["success"] is True - task = models.get_task(conn, "P1-001") - assert task["status"] == "done", ( - "tester status='passed' должен закрывать задачу в auto_complete" - ) - - @patch("core.followup.generate_followups") - @patch("agents.runner.run_hooks") - @patch("agents.runner.subprocess.run") - def test_gate_check_only_in_auto_complete_not_review_mode( - self, mock_run, mock_hooks, mock_followup, conn - ): - """В review-режиме gate check не применяется — задача уходит на ручной approve.""" - mock_run.return_value = _mock_reviewer("changes_requested") - mock_hooks.return_value = [] - mock_followup.return_value = {"created": [], "pending_actions": []} - - models.update_task(conn, "P1-001", execution_mode="review") - steps = [{"role": "reviewer", "brief": "review"}] - result = run_pipeline(conn, "P1-001", steps) - - # In review mode pipeline succeeds (waits for manual approve) - assert result["success"] is True - task = models.get_task(conn, "P1-001") - assert task["status"] == "review", ( - "В review-режиме задача должна уйти на ручной approve, не blocked" - ) - - -# --------------------------------------------------------------------------- -# Unit tests: _parse_gate_cannot_close -# --------------------------------------------------------------------------- - -class TestParseGateCannotClose: - """Unit tests for _parse_gate_cannot_close fail-open logic.""" - - def _make_result(self, output): - return {"success": True, "output": output, "role": "reviewer"} - - # Reviewer tests - def test_reviewer_approved_returns_none(self): - r = self._make_result({"verdict": "approved", "reason": ""}) - assert _parse_gate_cannot_close(r, "reviewer") is None - - def test_reviewer_changes_requested_returns_reason(self): - r = self._make_result({"verdict": "changes_requested", "reason": "Missing tests"}) - result = _parse_gate_cannot_close(r, "reviewer") - assert result is not None - assert "Missing tests" in result["reason"] - - def test_reviewer_revise_returns_reason(self): - r = self._make_result({"verdict": "revise", "reason": "Needs rework"}) - result = _parse_gate_cannot_close(r, "reviewer") - assert result is not None - assert "Needs rework" in result["reason"] - - def test_reviewer_no_verdict_fails_open(self): - """Если verdict отсутствует — fail-open, не блокировать.""" - r = self._make_result({"summary": "looks ok"}) - assert _parse_gate_cannot_close(r, "reviewer") is None - - def test_reviewer_non_dict_output_fails_open(self): - """Если output не dict — fail-open.""" - r = self._make_result("approved") - assert _parse_gate_cannot_close(r, "reviewer") is None - - # Tester tests - def test_tester_passed_returns_none(self): - r = self._make_result({"status": "passed", "tests_passed": 5}) - assert _parse_gate_cannot_close(r, "tester") is None - - def test_tester_failed_returns_reason(self): - r = self._make_result({"status": "failed", "reason": "3 assertions failed"}) - result = _parse_gate_cannot_close(r, "tester") - assert result is not None - assert "3 assertions failed" in result["reason"] - - def test_tester_no_status_fails_open(self): - """Если status отсутствует — fail-open.""" - r = self._make_result({"tests_passed": 3}) - assert _parse_gate_cannot_close(r, "tester") is None - - def test_tester_non_dict_output_fails_open(self): - r = self._make_result("passed") - assert _parse_gate_cannot_close(r, "tester") is None - - def test_unknown_role_fails_open(self): - """Неизвестная роль — fail-open, не блокировать.""" - r = self._make_result({"verdict": "rejected"}) - assert _parse_gate_cannot_close(r, "smoke_tester") is None - - def test_reviewer_reason_fallback_to_verdict(self): - """Если reason пустой — fallback to 'Reviewer verdict: '.""" - r = self._make_result({"verdict": "changes_requested"}) - result = _parse_gate_cannot_close(r, "reviewer") - assert result is not None - assert result["reason"] # not empty - - -# --------------------------------------------------------------------------- -# Unit tests: _find_gate_result -# --------------------------------------------------------------------------- - -class TestFindGateResult: - """Unit tests for _find_gate_result reverse-iteration logic.""" - - def test_returns_last_successful_result_for_role(self): - results = [ - {"role": "reviewer", "success": True, "output": {"verdict": "changes_requested"}}, - {"role": "backend_dev", "success": True, "output": {"status": "done"}}, - {"role": "reviewer", "success": True, "output": {"verdict": "approved"}}, - ] - r = _find_gate_result(results, "reviewer") - assert r is not None - assert r["output"]["verdict"] == "approved" # последний - - def test_skips_failed_results(self): - results = [ - {"role": "reviewer", "success": False, "output": {"verdict": "approved"}}, - ] - assert _find_gate_result(results, "reviewer") is None - - def test_returns_none_when_role_absent(self): - results = [ - {"role": "tester", "success": True, "output": {"status": "passed"}}, - ] - assert _find_gate_result(results, "reviewer") is None - - def test_returns_none_for_empty_results(self): - assert _find_gate_result([], "reviewer") is None - - def test_auto_fix_loop_returns_last_successful_tester(self): - """После auto_fix loop: возвращает последнюю успешную попытку tester.""" - results = [ - {"role": "tester", "success": True, "output": {"status": "failed"}, "_auto_fix_attempt": 1}, - {"role": "backend_dev", "success": True, "output": {}, "_auto_fix_attempt": 1}, - {"role": "tester", "success": True, "output": {"status": "passed"}, "_auto_retest_attempt": 1}, - ] - r = _find_gate_result(results, "tester") - assert r is not None - assert r["output"]["status"] == "passed"