From 05915fc7cd9f051edaac43a5bd0bfa5530c7de5e Mon Sep 17 00:00:00 2001 From: Gros Frumos Date: Thu, 19 Mar 2026 15:50:52 +0200 Subject: [PATCH] kin: KIN-133-backend_dev --- agents/runner.py | 117 ++++++++- tests/test_kin_133_gate_cannot_close.py | 309 ++++++++++++++++++++++++ 2 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 tests/test_kin_133_gate_cannot_close.py diff --git a/agents/runner.py b/agents/runner.py index 61bebe6..9ada7f8 100644 --- a/agents/runner.py +++ b/agents/runner.py @@ -539,6 +539,62 @@ def _parse_agent_blocked(result: dict) -> dict | None: } +# --------------------------------------------------------------------------- +# Gate cannot-close detection (KIN-133) +# --------------------------------------------------------------------------- + +def _find_gate_result(results: list[dict], role: str) -> dict | None: + """Return the last successful result for the given gate role. + + Iterates results in reverse order to handle auto_fix retry loops where + tester may appear multiple times; returns the latest successful attempt. + """ + for r in reversed(results): + if r.get("role") == role and r.get("success"): + return r + return None + + +def _parse_gate_cannot_close(result: dict, role: str) -> dict | None: + """Detect gate rejection from a final gate agent (reviewer or tester). + + Returns dict with {reason} if the gate agent signals the task must NOT + be closed. Returns None if the gate approves or if output format is + unrecognised (fail-open: never block on unknown formats). + + Reviewer: verdict must be 'approved'; anything else ('changes_requested', + 'revise', 'blocked') is a rejection. Note: 'blocked' is handled earlier + by _parse_agent_blocked(), but if it somehow reaches here we treat it as + cannot_close too. + + Tester: status must be 'passed'; 'failed' or 'blocked' are rejections. + """ + from datetime import datetime + output = result.get("output") + if not isinstance(output, dict): + return None # fail-open: non-dict output → don't block + + if role == "reviewer": + verdict = output.get("verdict") + if verdict is None: + return None # fail-open: no verdict field + if verdict == "approved": + return None # approved → close + reason = output.get("reason") or output.get("summary") or f"Reviewer verdict: {verdict}" + return {"reason": reason} + + if role == "tester": + status = output.get("status") + if status is None: + return None # fail-open: no status field + if status == "passed": + return None # passed → close + reason = output.get("reason") or output.get("blocked_reason") or f"Tester status: {status}" + return {"reason": reason} + + return None # unknown gate role → fail-open + + # --------------------------------------------------------------------------- # Destructive operation detection (KIN-116) # --------------------------------------------------------------------------- @@ -2327,7 +2383,66 @@ def run_pipeline( if current_status in ("done", "cancelled"): pass # User finished manually — don't overwrite elif mode == "auto_complete" and auto_eligible: - # Auto-complete mode: last step is tester/reviewer — skip review, approve immediately + # KIN-133: gate check — if final gate agent rejects, block instead of closing + _gate_result = _find_gate_result(results, effective_last_role) + _cannot_close = ( + _parse_gate_cannot_close(_gate_result, effective_last_role) + if _gate_result else None + ) + if _cannot_close is not None: + _block_reason = _cannot_close["reason"] + models.update_task( + conn, task_id, + status="blocked", + blocked_reason=_block_reason, + blocked_agent_role=effective_last_role, + blocked_pipeline_step=str(len(steps)), + ) + if pipeline: + models.update_pipeline( + conn, pipeline["id"], + status="failed", + total_cost_usd=total_cost, + total_tokens=total_tokens, + total_duration_seconds=total_duration, + ) + try: + models.write_log( + conn, pipeline["id"] if pipeline else None, + f"Gate cannot_close: {effective_last_role} refused to approve — {_block_reason}", + level="WARN", + extra={"role": effective_last_role, "reason": _block_reason}, + ) + except Exception: + pass + try: + from core.telegram import send_telegram_escalation + _project = models.get_project(conn, project_id) + _project_name = _project["name"] if _project else project_id + _sent = send_telegram_escalation( + task_id=task_id, + project_name=_project_name, + agent_role=effective_last_role, + reason=_block_reason, + pipeline_step=str(len(steps)), + ) + if _sent: + models.mark_telegram_sent(conn, task_id) + except Exception: + pass + return { + "success": False, + "error": f"Gate {effective_last_role} refused to close task: {_block_reason}", + "blocked_by": effective_last_role, + "blocked_reason": _block_reason, + "steps_completed": len(steps), + "results": results, + "total_cost_usd": total_cost, + "total_tokens": total_tokens, + "total_duration_seconds": total_duration, + "pipeline_id": pipeline["id"] if pipeline else None, + } + # Auto-complete mode: gate approved — close task immediately models.update_task(conn, task_id, status="done") # KIN-084: log task status try: diff --git a/tests/test_kin_133_gate_cannot_close.py b/tests/test_kin_133_gate_cannot_close.py new file mode 100644 index 0000000..170de6c --- /dev/null +++ b/tests/test_kin_133_gate_cannot_close.py @@ -0,0 +1,309 @@ +""" +Tests for KIN-133: Gate cannot_close — when final gate agent (reviewer/tester) +rejects, task moves to Blocked instead of Done. + +Covers: +1. reviewer verdict='changes_requested' → task blocked in auto_complete mode +2. reviewer verdict='revise' → task blocked in auto_complete mode +3. tester status='failed' → task blocked in auto_complete mode +4. reviewer verdict='approved' → task done in auto_complete mode (happy path) +5. tester status='passed' → task done in auto_complete mode (happy path) +6. _parse_gate_cannot_close unit tests (fail-open for unknown formats) +7. _find_gate_result unit tests (last successful, reverse order) +""" + +import json +import pytest +from unittest.mock import patch, MagicMock + +from core.db import init_db +from core import models +from agents.runner import ( + run_pipeline, + _parse_gate_cannot_close, + _find_gate_result, +) + + +# --------------------------------------------------------------------------- +# Fixtures & helpers +# --------------------------------------------------------------------------- + +@pytest.fixture +def conn(): + c = init_db(":memory:") + models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"]) + models.create_task(c, "P1-001", "p1", "Implement feature", + brief={"route_type": "feature"}) + models.update_task(c, "P1-001", execution_mode="auto_complete") + yield c + c.close() + + +def _mock_subprocess(agent_output: dict) -> MagicMock: + """Build subprocess.run mock that returns agent_output as parsed JSON. + + The subprocess stdout wraps agent output in {"result": ""} + which is how claude --output-format json structures its response. + """ + m = MagicMock() + m.stdout = json.dumps({"result": json.dumps(agent_output, ensure_ascii=False)}) + m.stderr = "" + m.returncode = 0 + return m + + +def _mock_reviewer(verdict: str, reason: str = "Review found issues") -> MagicMock: + return _mock_subprocess({"verdict": verdict, "reason": reason, "findings": []}) + + +def _mock_tester(status: str, reason: str = "Tests failed") -> MagicMock: + return _mock_subprocess({"status": status, "reason": reason, "tests_passed": 0, "tests_failed": 1}) + + +# --------------------------------------------------------------------------- +# Integration tests: run_pipeline gate checks +# --------------------------------------------------------------------------- + +class TestGateCannotCloseIntegration: + """KIN-133: Final gate agent rejection blocks task instead of closing it.""" + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_reviewer_changes_requested_blocks_task_in_auto_complete( + self, mock_run, mock_hooks, mock_followup, conn + ): + """reviewer verdict='changes_requested' → task blocked, not done.""" + mock_run.return_value = _mock_reviewer("changes_requested") + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + steps = [{"role": "backend_dev", "brief": "implement"}, + {"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is False + assert result.get("blocked_by") == "reviewer" + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked", ( + "reviewer verdict='changes_requested' должен блокировать задачу в auto_complete" + ) + assert task.get("blocked_agent_role") == "reviewer" + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_reviewer_revise_blocks_task_in_auto_complete( + self, mock_run, mock_hooks, mock_followup, conn + ): + """reviewer verdict='revise' → task blocked, not done.""" + mock_run.return_value = _mock_reviewer("revise", reason="Needs rework") + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is False + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked" + assert "Needs rework" in (task.get("blocked_reason") or "") + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_tester_failed_blocks_task_in_auto_complete( + self, mock_run, mock_hooks, mock_followup, conn + ): + """tester status='failed' → task blocked, not done.""" + mock_run.return_value = _mock_tester("failed", reason="3 assertions failed") + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + steps = [{"role": "tester", "brief": "run tests"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is False + assert result.get("blocked_by") == "tester" + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked", ( + "tester status='failed' должен блокировать задачу в auto_complete" + ) + assert task.get("blocked_agent_role") == "tester" + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_reviewer_approved_closes_task_in_auto_complete( + self, mock_run, mock_hooks, mock_followup, conn + ): + """reviewer verdict='approved' → task done (happy path).""" + mock_run.return_value = _mock_reviewer("approved", reason="") + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is True + task = models.get_task(conn, "P1-001") + assert task["status"] == "done", ( + "reviewer verdict='approved' должен закрывать задачу в auto_complete" + ) + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_tester_passed_closes_task_in_auto_complete( + self, mock_run, mock_hooks, mock_followup, conn + ): + """tester status='passed' → task done (happy path).""" + mock_run.return_value = _mock_tester("passed", reason="") + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + steps = [{"role": "tester", "brief": "run tests"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is True + task = models.get_task(conn, "P1-001") + assert task["status"] == "done", ( + "tester status='passed' должен закрывать задачу в auto_complete" + ) + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_gate_check_only_in_auto_complete_not_review_mode( + self, mock_run, mock_hooks, mock_followup, conn + ): + """В review-режиме gate check не применяется — задача уходит на ручной approve.""" + mock_run.return_value = _mock_reviewer("changes_requested") + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + models.update_task(conn, "P1-001", execution_mode="review") + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + # In review mode pipeline succeeds (waits for manual approve) + assert result["success"] is True + task = models.get_task(conn, "P1-001") + assert task["status"] == "review", ( + "В review-режиме задача должна уйти на ручной approve, не blocked" + ) + + +# --------------------------------------------------------------------------- +# Unit tests: _parse_gate_cannot_close +# --------------------------------------------------------------------------- + +class TestParseGateCannotClose: + """Unit tests for _parse_gate_cannot_close fail-open logic.""" + + def _make_result(self, output): + return {"success": True, "output": output, "role": "reviewer"} + + # Reviewer tests + def test_reviewer_approved_returns_none(self): + r = self._make_result({"verdict": "approved", "reason": ""}) + assert _parse_gate_cannot_close(r, "reviewer") is None + + def test_reviewer_changes_requested_returns_reason(self): + r = self._make_result({"verdict": "changes_requested", "reason": "Missing tests"}) + result = _parse_gate_cannot_close(r, "reviewer") + assert result is not None + assert "Missing tests" in result["reason"] + + def test_reviewer_revise_returns_reason(self): + r = self._make_result({"verdict": "revise", "reason": "Needs rework"}) + result = _parse_gate_cannot_close(r, "reviewer") + assert result is not None + assert "Needs rework" in result["reason"] + + def test_reviewer_no_verdict_fails_open(self): + """Если verdict отсутствует — fail-open, не блокировать.""" + r = self._make_result({"summary": "looks ok"}) + assert _parse_gate_cannot_close(r, "reviewer") is None + + def test_reviewer_non_dict_output_fails_open(self): + """Если output не dict — fail-open.""" + r = self._make_result("approved") + assert _parse_gate_cannot_close(r, "reviewer") is None + + # Tester tests + def test_tester_passed_returns_none(self): + r = self._make_result({"status": "passed", "tests_passed": 5}) + assert _parse_gate_cannot_close(r, "tester") is None + + def test_tester_failed_returns_reason(self): + r = self._make_result({"status": "failed", "reason": "3 assertions failed"}) + result = _parse_gate_cannot_close(r, "tester") + assert result is not None + assert "3 assertions failed" in result["reason"] + + def test_tester_no_status_fails_open(self): + """Если status отсутствует — fail-open.""" + r = self._make_result({"tests_passed": 3}) + assert _parse_gate_cannot_close(r, "tester") is None + + def test_tester_non_dict_output_fails_open(self): + r = self._make_result("passed") + assert _parse_gate_cannot_close(r, "tester") is None + + def test_unknown_role_fails_open(self): + """Неизвестная роль — fail-open, не блокировать.""" + r = self._make_result({"verdict": "rejected"}) + assert _parse_gate_cannot_close(r, "smoke_tester") is None + + def test_reviewer_reason_fallback_to_verdict(self): + """Если reason пустой — fallback to 'Reviewer verdict: '.""" + r = self._make_result({"verdict": "changes_requested"}) + result = _parse_gate_cannot_close(r, "reviewer") + assert result is not None + assert result["reason"] # not empty + + +# --------------------------------------------------------------------------- +# Unit tests: _find_gate_result +# --------------------------------------------------------------------------- + +class TestFindGateResult: + """Unit tests for _find_gate_result reverse-iteration logic.""" + + def test_returns_last_successful_result_for_role(self): + results = [ + {"role": "reviewer", "success": True, "output": {"verdict": "changes_requested"}}, + {"role": "backend_dev", "success": True, "output": {"status": "done"}}, + {"role": "reviewer", "success": True, "output": {"verdict": "approved"}}, + ] + r = _find_gate_result(results, "reviewer") + assert r is not None + assert r["output"]["verdict"] == "approved" # последний + + def test_skips_failed_results(self): + results = [ + {"role": "reviewer", "success": False, "output": {"verdict": "approved"}}, + ] + assert _find_gate_result(results, "reviewer") is None + + def test_returns_none_when_role_absent(self): + results = [ + {"role": "tester", "success": True, "output": {"status": "passed"}}, + ] + assert _find_gate_result(results, "reviewer") is None + + def test_returns_none_for_empty_results(self): + assert _find_gate_result([], "reviewer") is None + + def test_auto_fix_loop_returns_last_successful_tester(self): + """После auto_fix loop: возвращает последнюю успешную попытку tester.""" + results = [ + {"role": "tester", "success": True, "output": {"status": "failed"}, "_auto_fix_attempt": 1}, + {"role": "backend_dev", "success": True, "output": {}, "_auto_fix_attempt": 1}, + {"role": "tester", "success": True, "output": {"status": "passed"}, "_auto_retest_attempt": 1}, + ] + r = _find_gate_result(results, "tester") + assert r is not None + assert r["output"]["status"] == "passed"