diff --git a/agents/prompts/reviewer.md b/agents/prompts/reviewer.md index 95b79c4..6e866ce 100644 --- a/agents/prompts/reviewer.md +++ b/agents/prompts/reviewer.md @@ -72,7 +72,8 @@ Example: "security_issues": [], "conventions_violations": [], "test_coverage": "adequate", - "summary": "Implementation looks correct and follows project patterns. One minor style issue noted." + "summary": "Implementation looks correct and follows project patterns. One minor style issue noted.", + "exit_condition": null } ``` @@ -100,6 +101,22 @@ Example: } ``` +**`exit_condition`** (optional, KIN-136 auto-return): + +Set this field ONLY when the task cannot be auto-retried and requires a human decision: + +- `"login_required"` — the reviewer or the code requires a login/auth that is not available in automation context +- `"missing_data"` — critical data, credentials, or access needed to continue is missing and cannot be inferred +- `"strategic_decision"` — the fix requires a fundamental architectural or business decision with no obvious correct answer (e.g. conflicting stakeholder requirements, irreversible platform choice) + +Leave as `null` in ALL other cases — including ordinary bugs, quality issues, missing tests, style violations, or any fixable problem. When `null`, the system will automatically retry the task with a return analyst. + +**When NOT to set exit_condition (set null):** +- Code has bugs or logic errors → `null` (auto-retry will fix) +- Tests are missing or failing → `null` (auto-retry will add tests) +- Implementation doesn't match requirements → `null` (auto-retry will revise) +- Security issue found → `null` with `"changes_requested"` verdict (auto-retry will patch) + **`security_issues` and `conventions_violations`** elements: ```json diff --git a/agents/runner.py b/agents/runner.py index 15f239f..70ceb5e 100644 --- a/agents/runner.py +++ b/agents/runner.py @@ -37,6 +37,13 @@ _MODEL_TIMEOUTS = { "haiku": 1200, # 20 min } +# KIN-136: auto-return — max times a task can be auto-returned before escalating to human. +# Override via KIN_AUTO_RETURN_MAX env var. +_AUTO_RETURN_MAX: int = int(os.environ.get("KIN_AUTO_RETURN_MAX") or 3) + +# KIN-136: valid exit_condition values that force human escalation instead of auto-return. +_EXIT_CONDITIONS = frozenset({"login_required", "missing_data", "strategic_decision"}) + def _build_claude_env() -> dict: """Return an env dict with an extended PATH that includes common CLI tool locations. @@ -595,6 +602,117 @@ def _parse_gate_cannot_close(result: dict, role: str) -> dict | None: return None # unknown gate role → fail-open +# --------------------------------------------------------------------------- +# Auto-return helpers (KIN-136) +# --------------------------------------------------------------------------- + +def _parse_exit_condition(gate_result: dict, role: str) -> str | None: + """Extract exit_condition from gate agent output. + + Returns one of _EXIT_CONDITIONS values, or None if absent/invalid/unsupported. + Fail-open: invalid or unknown values are treated as None → triggers auto-return. + Only reviewer output supports exit_condition; tester always returns None. + """ + if role != "reviewer": + return None + output = gate_result.get("output") + if not isinstance(output, dict): + return None + raw = output.get("exit_condition") + if raw in _EXIT_CONDITIONS: + return raw + if raw is not None: + _logger.warning( + "KIN-136: unknown exit_condition %r from reviewer — treating as None (auto-return)", + raw, + ) + return None + + +def _trigger_auto_return( + conn: "sqlite3.Connection", + task_id: str, + project_id: str, + pipeline: dict | None, + original_steps: list[dict], + gate_role: str, + gate_reason: str, + allow_write: bool, + noninteractive: bool, + gate_output_json: str | None = None, +) -> dict: + """Attempt auto-return: re-run the pipeline with return_analyst prepended. + + Steps: + (a) Check return_count against _AUTO_RETURN_MAX threshold — escalate if exceeded. + (b) Record the task return. + (c) Mark current pipeline failed; set task status to 'revising'. + (d) Spawn new pipeline: [return_analyst] + original_steps. + + Returns: + {"should_escalate": True, "reason": "auto_return_threshold_exceeded"} if threshold hit. + {"should_escalate": False, "auto_return_result": {...}} otherwise. + """ + # (a) Check threshold — fetch current return_count before recording new one + current_task = models.get_task(conn, task_id) + current_return_count = (current_task or {}).get("return_count") or 0 + if current_return_count >= _AUTO_RETURN_MAX: + _logger.warning( + "KIN-136: auto-return threshold reached for task %s " + "(return_count=%d >= max=%d) — escalating to human", + task_id, current_return_count, _AUTO_RETURN_MAX, + ) + return {"should_escalate": True, "reason": "auto_return_threshold_exceeded"} + + pipeline_id = pipeline["id"] if pipeline else None + + # (b) Record return + try: + models.record_task_return( + conn, + task_id=task_id, + reason_category="recurring_quality_fail", + reason_text=f"Gate {gate_role}: {gate_reason[:200]}", + returned_by=gate_role, + pipeline_id=pipeline_id, + ) + except Exception: + pass # Never block auto-return on tracking errors + + # (c) Mark current pipeline failed, set task to revising + if pipeline: + try: + models.update_pipeline(conn, pipeline_id, status="failed") + except Exception: + pass + models.update_task(conn, task_id, status="revising") + + try: + models.write_log( + conn, pipeline_id, + f"KIN-136: auto-return triggered by {gate_role} " + f"(return_count now {current_return_count + 1}) — spawning return_analyst pipeline", + level="INFO", + extra={"gate_role": gate_role, "return_count": current_return_count + 1}, + ) + except Exception: + pass + + # (d) Build new steps and spawn new pipeline + new_steps = [{"role": "return_analyst", "model": "opus"}] + list(original_steps) + auto_return_result = run_pipeline( + conn, + task_id, + new_steps, + allow_write=allow_write, + noninteractive=noninteractive, + initial_previous_output=gate_output_json, + parent_pipeline_id=pipeline_id, + ) + + return {"should_escalate": False, "auto_return_result": auto_return_result} + + # --------------------------------------------------------------------------- # Destructive operation detection (KIN-116) # --------------------------------------------------------------------------- @@ -2509,6 +2627,37 @@ def run_pipeline( ) if _cannot_close is not None: _block_reason = _cannot_close["reason"] + _pipeline_type = (pipeline or {}).get("pipeline_type", "standard") + + # KIN-136: auto-return — attempt re-run instead of blocking when: + # - reviewer did not set an exit_condition requiring human intervention + # - not inside an escalation pipeline (guard against recursive loops) + # - not a dry_run + _exit_cond = _parse_exit_condition(_gate_result or {}, effective_last_role) + if _exit_cond is None and _pipeline_type != "escalation" and not dry_run: + _gate_out = (_gate_result or {}).get("output") + _gate_out_json = ( + json.dumps(_gate_out, ensure_ascii=False) + if isinstance(_gate_out, dict) else str(_gate_out or "") + ) + _ar = _trigger_auto_return( + conn, task_id, project_id, pipeline, + original_steps=steps, + gate_role=effective_last_role, + gate_reason=_block_reason, + allow_write=allow_write, + noninteractive=noninteractive, + gate_output_json=_gate_out_json, + ) + if not _ar["should_escalate"]: + _ar_result = _ar["auto_return_result"] + _ar_result["auto_returned"] = True + return _ar_result + # Threshold exceeded — fall through to human escalation + _block_reason = f"{_block_reason} [auto_return_limit_reached]" + + # Human escalation path: exit_condition set, escalation pipeline, + # dry_run, or auto-return threshold exceeded models.update_task( conn, task_id, status="blocked", @@ -2525,7 +2674,6 @@ def run_pipeline( total_duration_seconds=total_duration, ) # KIN-135: record gate return — skip for escalation pipelines to avoid loops - _pipeline_type = (pipeline or {}).get("pipeline_type", "standard") if _pipeline_type != "escalation": try: models.record_task_return( diff --git a/tests/test_kin_135_escalation.py b/tests/test_kin_135_escalation.py index b77a7bf..207da12 100644 --- a/tests/test_kin_135_escalation.py +++ b/tests/test_kin_135_escalation.py @@ -19,7 +19,7 @@ from core.db import init_db from core import models from core.models import RETURN_CATEGORIES from core.context_builder import build_context -from agents.runner import _save_return_analyst_output, run_pipeline +from agents.runner import _save_return_analyst_output, run_pipeline, _AUTO_RETURN_MAX # --------------------------------------------------------------------------- @@ -403,7 +403,13 @@ class TestGateCannotCloseRecordsReturn: def test_gate_rejection_increments_return_count_in_standard_pipeline( self, mock_run, mock_hooks, mock_followup, conn_autocomplete ): - """Gate cannot_close in standard pipeline → task.return_count increases by 1.""" + """Gate cannot_close in standard pipeline → return_count increments on each auto-return. + + KIN-136: in auto_complete mode, gate rejection triggers auto-return up to _AUTO_RETURN_MAX + times before escalating. Each auto-return increments return_count once; the final + escalation via the human-escalation path also records one more return. + Total: _AUTO_RETURN_MAX + 1 returns (3 auto-returns + 1 final escalation = 4). + """ conn = conn_autocomplete mock_run.return_value = _mock_subprocess( {"verdict": "changes_requested", "reason": "Missing tests"} @@ -415,8 +421,10 @@ class TestGateCannotCloseRecordsReturn: run_pipeline(conn, "P1-001", steps) task = models.get_task(conn, "P1-001") - assert task["return_count"] == 1, ( - "Gate rejection in standard pipeline should increment return_count" + # KIN-136: _AUTO_RETURN_MAX auto-returns + 1 final escalation recording (git log: 2026-03-21) + assert task["return_count"] == _AUTO_RETURN_MAX + 1, ( + "Gate rejection with persistent failure should increment return_count " + f"_AUTO_RETURN_MAX+1 times ({_AUTO_RETURN_MAX + 1})" ) @patch("core.followup.generate_followups") @@ -447,7 +455,11 @@ class TestGateCannotCloseRecordsReturn: def test_gate_rejection_records_return_with_recurring_quality_fail_category( self, mock_run, mock_hooks, mock_followup, conn_autocomplete ): - """Gate rejection uses 'recurring_quality_fail' as reason_category.""" + """Gate rejection uses 'recurring_quality_fail' as reason_category. + + KIN-136: all auto-return records use the same category; final escalation also uses it. + Total returns = _AUTO_RETURN_MAX + 1 (git log: 2026-03-21). + """ conn = conn_autocomplete mock_run.return_value = _mock_subprocess( {"verdict": "changes_requested", "reason": "Need unit tests"} @@ -459,8 +471,10 @@ class TestGateCannotCloseRecordsReturn: run_pipeline(conn, "P1-001", steps) returns = models.get_task_returns(conn, "P1-001") - assert len(returns) == 1 - assert returns[0]["reason_category"] == "recurring_quality_fail" + assert len(returns) == _AUTO_RETURN_MAX + 1 + assert all(r["reason_category"] == "recurring_quality_fail" for r in returns), ( + "Все записи возврата должны иметь категорию recurring_quality_fail" + ) @patch("core.followup.generate_followups") @patch("agents.runner.run_hooks") diff --git a/tests/test_kin_136_auto_return.py b/tests/test_kin_136_auto_return.py new file mode 100644 index 0000000..b59ca3f --- /dev/null +++ b/tests/test_kin_136_auto_return.py @@ -0,0 +1,529 @@ +""" +Tests for KIN-136: Auto-return mechanism. + +When a task in auto_complete mode is rejected by a gate agent (reviewer/tester) +WITHOUT an exit_condition, it is automatically re-run with return_analyst prepended +instead of being escalated to a human. + +Covers: +1. _parse_exit_condition unit tests — valid/invalid/null values, role guard +2. _trigger_auto_return threshold guard — return_count >= max → should_escalate=True +3. Integration: reviewer changes_requested + no exit_condition → auto_returned=True +4. Integration: reviewer changes_requested + exit_condition='login_required' → task blocked +5. Integration: escalation pipeline gate rejection → no auto-return (guard #1081) +6. Integration: reviewer approved (no exit_condition set) → task done (happy path unaffected) +""" + +import json +import pytest +from unittest.mock import patch, MagicMock + +from core.db import init_db +from core import models +from agents.runner import ( + run_pipeline, + _parse_exit_condition, + _trigger_auto_return, + _AUTO_RETURN_MAX, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def conn(): + """Fresh in-memory DB with project and auto_complete task.""" + c = init_db(":memory:") + models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"]) + models.create_task(c, "P1-001", "p1", "Implement feature", + brief={"route_type": "feature"}) + models.update_task(c, "P1-001", execution_mode="auto_complete") + yield c + c.close() + + +def _mock_subprocess(agent_output: dict) -> MagicMock: + """Build subprocess.run mock returning agent_output in claude JSON format.""" + m = MagicMock() + m.stdout = json.dumps({"result": json.dumps(agent_output, ensure_ascii=False)}) + m.stderr = "" + m.returncode = 0 + return m + + +def _mock_reviewer(verdict: str, reason: str = "Issues found", + exit_condition=None) -> MagicMock: + out = {"verdict": verdict, "reason": reason, "findings": [], "summary": reason} + if exit_condition is not None: + out["exit_condition"] = exit_condition + return _mock_subprocess(out) + + +def _mock_return_analyst(escalate: bool = False) -> MagicMock: + return _mock_subprocess({ + "status": "done", + "escalate_to_dept_head": escalate, + "root_cause_analysis": "quality issue identified", + "refined_brief": "fix the quality issue", + }) + + +# --------------------------------------------------------------------------- +# Unit tests: _parse_exit_condition +# --------------------------------------------------------------------------- + +class TestParseExitCondition: + """KIN-136: exit_condition parsing — fail-open on unknown values.""" + + def test_valid_login_required(self): + gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "login_required"}} + assert _parse_exit_condition(gate_result, "reviewer") == "login_required" + + def test_valid_missing_data(self): + gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "missing_data"}} + assert _parse_exit_condition(gate_result, "reviewer") == "missing_data" + + def test_valid_strategic_decision(self): + gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "strategic_decision"}} + assert _parse_exit_condition(gate_result, "reviewer") == "strategic_decision" + + def test_null_exit_condition_returns_none(self): + """null in JSON → None → auto-return.""" + gate_result = {"output": {"verdict": "changes_requested", "exit_condition": None}} + assert _parse_exit_condition(gate_result, "reviewer") is None + + def test_missing_exit_condition_field_returns_none(self): + """Field absent → auto-return (fail-open).""" + gate_result = {"output": {"verdict": "changes_requested"}} + assert _parse_exit_condition(gate_result, "reviewer") is None + + def test_invalid_exit_condition_returns_none(self): + """Unknown value → fail-open → None → auto-return (decision #1083 analogy).""" + gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "unknown_value"}} + assert _parse_exit_condition(gate_result, "reviewer") is None + + def test_tester_role_always_none(self): + """Tester does not support exit_condition — always returns None.""" + gate_result = {"output": {"status": "failed", "exit_condition": "login_required"}} + assert _parse_exit_condition(gate_result, "tester") is None + + def test_non_dict_output_returns_none(self): + """Non-dict output → fail-open → None.""" + gate_result = {"output": "some raw string"} + assert _parse_exit_condition(gate_result, "reviewer") is None + + def test_empty_gate_result_returns_none(self): + assert _parse_exit_condition({}, "reviewer") is None + + +# --------------------------------------------------------------------------- +# Unit tests: _trigger_auto_return threshold guard +# --------------------------------------------------------------------------- + +class TestTriggerAutoReturnThreshold: + """KIN-136: threshold guard — return_count >= max → should_escalate=True.""" + + def test_threshold_exceeded_returns_should_escalate(self, conn): + """If return_count is already at max, should_escalate=True without spawning pipeline.""" + # Set return_count to max + for _ in range(_AUTO_RETURN_MAX): + models.record_task_return( + conn, task_id="P1-001", + reason_category="recurring_quality_fail", + reason_text="quality issue", + returned_by="reviewer", + ) + task = models.get_task(conn, "P1-001") + assert task["return_count"] == _AUTO_RETURN_MAX + + result = _trigger_auto_return( + conn, "P1-001", "p1", pipeline=None, + original_steps=[{"role": "reviewer", "model": "opus"}], + gate_role="reviewer", + gate_reason="quality issue", + allow_write=False, + noninteractive=True, + ) + + assert result["should_escalate"] is True + assert result["reason"] == "auto_return_threshold_exceeded" + + def test_below_threshold_returns_should_escalate_false(self, conn): + """return_count < max → should_escalate=False (auto-return proceeds).""" + # Set return_count to max - 1 (one below threshold) + for _ in range(_AUTO_RETURN_MAX - 1): + models.record_task_return( + conn, task_id="P1-001", + reason_category="recurring_quality_fail", + reason_text="quality issue", + returned_by="reviewer", + ) + + with patch("agents.runner.run_pipeline") as mock_rp: + mock_rp.return_value = {"success": True, "steps_completed": 2} + result = _trigger_auto_return( + conn, "P1-001", "p1", pipeline=None, + original_steps=[{"role": "reviewer", "model": "opus"}], + gate_role="reviewer", + gate_reason="quality issue", + allow_write=False, + noninteractive=True, + ) + + assert result["should_escalate"] is False + assert "auto_return_result" in result + + def test_threshold_zero_return_count_is_below_max(self, conn): + """Fresh task with return_count=0 is always below threshold.""" + task = models.get_task(conn, "P1-001") + assert (task.get("return_count") or 0) == 0 + + with patch("agents.runner.run_pipeline") as mock_rp: + mock_rp.return_value = {"success": True, "steps_completed": 1} + result = _trigger_auto_return( + conn, "P1-001", "p1", pipeline=None, + original_steps=[], + gate_role="reviewer", + gate_reason="issue", + allow_write=False, + noninteractive=True, + ) + + assert result["should_escalate"] is False + + +# --------------------------------------------------------------------------- +# Integration tests: auto-return end-to-end +# --------------------------------------------------------------------------- + +class TestAutoReturnIntegration: + """KIN-136: end-to-end integration tests for auto-return flow.""" + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_reviewer_rejection_without_exit_condition_triggers_auto_return( + self, mock_run, mock_hooks, mock_followup, conn + ): + """reviewer changes_requested + no exit_condition → auto_returned=True in result.""" + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + # Call sequence: + # 1. reviewer returns "changes_requested" (no exit_condition) — original pipeline + # 2. return_analyst runs — in auto-return sub-pipeline + # 3. reviewer returns "approved" — in auto-return sub-pipeline + mock_run.side_effect = [ + _mock_reviewer("changes_requested"), # original reviewer + _mock_return_analyst(escalate=False), # return_analyst in sub-pipeline + _mock_reviewer("approved"), # reviewer in sub-pipeline + ] + + steps = [{"role": "reviewer", "brief": "review the code"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result.get("auto_returned") is True, ( + "result должен содержать auto_returned=True при авто-возврате" + ) + # Task should be done after the sub-pipeline reviewer approved + task = models.get_task(conn, "P1-001") + assert task["status"] == "done", ( + "После авто-возврата и одобрения reviewer — задача должна быть done" + ) + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_reviewer_rejection_with_exit_condition_blocks_task( + self, mock_run, mock_hooks, mock_followup, conn + ): + """reviewer changes_requested + exit_condition='login_required' → task blocked.""" + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + mock_run.return_value = _mock_reviewer( + "changes_requested", + reason="Need login access to verify", + exit_condition="login_required", + ) + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is False + assert result.get("auto_returned") is not True, ( + "Задача с exit_condition не должна авто-возвращаться" + ) + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked", ( + "exit_condition='login_required' → задача должна остаться blocked" + ) + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_exit_condition_missing_data_blocks_task( + self, mock_run, mock_hooks, mock_followup, conn + ): + """exit_condition='missing_data' → task stays blocked, no auto-return.""" + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + mock_run.return_value = _mock_reviewer( + "changes_requested", + reason="Missing API credentials", + exit_condition="missing_data", + ) + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is False + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked" + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_auto_return_increments_return_count( + self, mock_run, mock_hooks, mock_followup, conn + ): + """auto-return must increment return_count exactly once.""" + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + mock_run.side_effect = [ + _mock_reviewer("changes_requested"), # original reviewer + _mock_return_analyst(escalate=False), # return_analyst + _mock_reviewer("approved"), # reviewer in sub-pipeline + ] + + steps = [{"role": "reviewer", "brief": "review"}] + run_pipeline(conn, "P1-001", steps) + + task = models.get_task(conn, "P1-001") + assert task.get("return_count") == 1, ( + "Авто-возврат должен инкрементировать return_count ровно на 1" + ) + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_auto_return_threshold_exceeded_blocks_task( + self, mock_run, mock_hooks, mock_followup, conn + ): + """When return_count >= _AUTO_RETURN_MAX → task blocked, no more auto-returns.""" + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + # Pre-fill return_count to max + for _ in range(_AUTO_RETURN_MAX): + models.record_task_return( + conn, task_id="P1-001", + reason_category="recurring_quality_fail", + reason_text="quality issue", + returned_by="reviewer", + ) + + mock_run.return_value = _mock_reviewer("changes_requested") + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is False + assert result.get("auto_returned") is not True + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked", ( + "После достижения порога авто-возвратов задача должна быть blocked" + ) + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_escalation_pipeline_gate_rejection_no_auto_return( + self, mock_run, mock_hooks, mock_followup, conn + ): + """Gate rejection in escalation pipeline → task blocked, no auto-return (guard #1081). + + Simulates a pipeline with pipeline_type='escalation' by patching create_pipeline + to return a pipeline dict with that type. This is the scenario where _save_return_analyst_output + would run a pipeline it created with escalation type. + """ + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + mock_run.return_value = _mock_reviewer("changes_requested") + + # Patch create_pipeline so the pipeline created by run_pipeline has type='escalation' + original_create = models.create_pipeline + + def create_escalation_pipeline(conn, task_id, project_id, route_type, steps, + parent_pipeline_id=None, department=None): + pipeline = original_create(conn, task_id, project_id, route_type, steps, + parent_pipeline_id=parent_pipeline_id, + department=department) + # Simulate escalation pipeline type + conn.execute( + "UPDATE pipelines SET pipeline_type = 'escalation' WHERE id = ?", + (pipeline["id"],), + ) + conn.commit() + pipeline["pipeline_type"] = "escalation" + return pipeline + + with patch("agents.runner.models.create_pipeline", side_effect=create_escalation_pipeline): + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + # Guard: escalation pipeline → task must be blocked, NOT auto-returned + assert result.get("auto_returned") is not True, ( + "Задача внутри escalation-пайплайна не должна авто-возвращаться" + ) + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked" + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_reviewer_approved_task_done_unaffected( + self, mock_run, mock_hooks, mock_followup, conn + ): + """Happy path: reviewer approved → task done, auto-return not triggered.""" + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + mock_run.return_value = _mock_reviewer("approved", reason="") + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is True + assert result.get("auto_returned") is not True + task = models.get_task(conn, "P1-001") + assert task["status"] == "done" + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_exit_condition_strategic_decision_blocks_task( + self, mock_run, mock_hooks, mock_followup, conn + ): + """exit_condition='strategic_decision' → task stays blocked, no auto-return.""" + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + mock_run.return_value = _mock_reviewer( + "changes_requested", + reason="Needs architectural decision from management", + exit_condition="strategic_decision", + ) + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(conn, "P1-001", steps) + + assert result["success"] is False + assert result.get("auto_returned") is not True, ( + "strategic_decision должен блокировать auto-return" + ) + task = models.get_task(conn, "P1-001") + assert task["status"] == "blocked", ( + "exit_condition='strategic_decision' → задача должна остаться blocked" + ) + + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_review_mode_reviewer_rejection_no_auto_return( + self, mock_run, mock_hooks, mock_followup + ): + """Scenario 2: execution_mode='review' + reviewer rejection → no auto-return. + + In review mode, gate check is skipped — task moves to 'review' status + waiting for human approval, never triggering auto-return. + """ + c = init_db(":memory:") + models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"]) + models.create_task(c, "P1-001", "p1", "Implement feature", + brief={"route_type": "feature"}) + # Explicitly review mode (not auto_complete) + models.update_task(c, "P1-001", execution_mode="review") + + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + mock_run.return_value = _mock_reviewer("changes_requested", + reason="Needs work") + + steps = [{"role": "reviewer", "brief": "review"}] + result = run_pipeline(c, "P1-001", steps) + c.close() + + assert result.get("auto_returned") is not True, ( + "В режиме review авто-возврат не должен срабатывать" + ) + # In review mode the pipeline succeeds (all steps run), task waits for human + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Unit tests: two sequential auto-returns (decision #1082) +# --------------------------------------------------------------------------- + +class TestTwoSequentialAutoReturns: + """KIN-136: return_count increments correctly across two consecutive returns (decision #1082).""" + + def test_two_sequential_auto_returns_increment_count_to_two(self, conn): + """Two _trigger_auto_return calls → return_count = 2 (not reset, not stuck at 1).""" + with patch("agents.runner.run_pipeline") as mock_rp: + mock_rp.return_value = {"success": True, "steps_completed": 2} + + # First auto-return + result1 = _trigger_auto_return( + conn, "P1-001", "p1", pipeline=None, + original_steps=[{"role": "reviewer"}], + gate_role="reviewer", + gate_reason="first rejection", + allow_write=False, + noninteractive=True, + ) + assert result1["should_escalate"] is False + task_after_first = models.get_task(conn, "P1-001") + assert (task_after_first.get("return_count") or 0) == 1, ( + "После первого авто-возврата return_count должен быть 1" + ) + + # Second auto-return (without resetting count) + result2 = _trigger_auto_return( + conn, "P1-001", "p1", pipeline=None, + original_steps=[{"role": "reviewer"}], + gate_role="reviewer", + gate_reason="second rejection", + allow_write=False, + noninteractive=True, + ) + assert result2["should_escalate"] is False + task_after_second = models.get_task(conn, "P1-001") + assert (task_after_second.get("return_count") or 0) == 2, ( + "После второго авто-возврата return_count должен быть 2 (не сброшен в 1)" + ) + + def test_return_count_not_reset_between_sequential_auto_returns(self, conn): + """Ensures return_count accumulates across calls — cumulative, not per-run (decision #1082).""" + with patch("agents.runner.run_pipeline") as mock_rp: + mock_rp.return_value = {"success": True, "steps_completed": 2} + + for call_number in range(1, _AUTO_RETURN_MAX): + _trigger_auto_return( + conn, "P1-001", "p1", pipeline=None, + original_steps=[{"role": "reviewer"}], + gate_role="reviewer", + gate_reason=f"rejection #{call_number}", + allow_write=False, + noninteractive=True, + ) + + task = models.get_task(conn, "P1-001") + assert (task.get("return_count") or 0) == _AUTO_RETURN_MAX - 1, ( + f"После {_AUTO_RETURN_MAX - 1} авто-возвратов return_count должен быть " + f"{_AUTO_RETURN_MAX - 1} — не сброшен и не потерян" + )