kin: KIN-136-backend_dev
This commit is contained in:
parent
2f7ccffbc8
commit
aac75dbfdc
4 changed files with 592 additions and 9 deletions
|
|
@ -72,7 +72,8 @@ Example:
|
|||
"security_issues": [],
|
||||
"conventions_violations": [],
|
||||
"test_coverage": "adequate",
|
||||
"summary": "Implementation looks correct and follows project patterns. One minor style issue noted."
|
||||
"summary": "Implementation looks correct and follows project patterns. One minor style issue noted.",
|
||||
"exit_condition": null
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -100,6 +101,22 @@ Example:
|
|||
}
|
||||
```
|
||||
|
||||
**`exit_condition`** (optional, KIN-136 auto-return):
|
||||
|
||||
Set this field ONLY when the task cannot be auto-retried and requires a human decision:
|
||||
|
||||
- `"login_required"` — the reviewer or the code requires a login/auth that is not available in automation context
|
||||
- `"missing_data"` — critical data, credentials, or access needed to continue is missing and cannot be inferred
|
||||
- `"strategic_decision"` — the fix requires a fundamental architectural or business decision with no obvious correct answer (e.g. conflicting stakeholder requirements, irreversible platform choice)
|
||||
|
||||
Leave as `null` in ALL other cases — including ordinary bugs, quality issues, missing tests, style violations, or any fixable problem. When `null`, the system will automatically retry the task with a return analyst.
|
||||
|
||||
**When NOT to set exit_condition (set null):**
|
||||
- Code has bugs or logic errors → `null` (auto-retry will fix)
|
||||
- Tests are missing or failing → `null` (auto-retry will add tests)
|
||||
- Implementation doesn't match requirements → `null` (auto-retry will revise)
|
||||
- Security issue found → `null` with `"changes_requested"` verdict (auto-retry will patch)
|
||||
|
||||
**`security_issues` and `conventions_violations`** elements:
|
||||
|
||||
```json
|
||||
|
|
|
|||
150
agents/runner.py
150
agents/runner.py
|
|
@ -37,6 +37,13 @@ _MODEL_TIMEOUTS = {
|
|||
"haiku": 1200, # 20 min
|
||||
}
|
||||
|
||||
# KIN-136: auto-return — max times a task can be auto-returned before escalating to human.
|
||||
# Override via KIN_AUTO_RETURN_MAX env var.
|
||||
_AUTO_RETURN_MAX: int = int(os.environ.get("KIN_AUTO_RETURN_MAX") or 3)
|
||||
|
||||
# KIN-136: valid exit_condition values that force human escalation instead of auto-return.
|
||||
_EXIT_CONDITIONS = frozenset({"login_required", "missing_data", "strategic_decision"})
|
||||
|
||||
|
||||
def _build_claude_env() -> dict:
|
||||
"""Return an env dict with an extended PATH that includes common CLI tool locations.
|
||||
|
|
@ -595,6 +602,117 @@ def _parse_gate_cannot_close(result: dict, role: str) -> dict | None:
|
|||
return None # unknown gate role → fail-open
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auto-return helpers (KIN-136)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parse_exit_condition(gate_result: dict, role: str) -> str | None:
|
||||
"""Extract exit_condition from gate agent output.
|
||||
|
||||
Returns one of _EXIT_CONDITIONS values, or None if absent/invalid/unsupported.
|
||||
Fail-open: invalid or unknown values are treated as None → triggers auto-return.
|
||||
Only reviewer output supports exit_condition; tester always returns None.
|
||||
"""
|
||||
if role != "reviewer":
|
||||
return None
|
||||
output = gate_result.get("output")
|
||||
if not isinstance(output, dict):
|
||||
return None
|
||||
raw = output.get("exit_condition")
|
||||
if raw in _EXIT_CONDITIONS:
|
||||
return raw
|
||||
if raw is not None:
|
||||
_logger.warning(
|
||||
"KIN-136: unknown exit_condition %r from reviewer — treating as None (auto-return)",
|
||||
raw,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _trigger_auto_return(
|
||||
conn: "sqlite3.Connection",
|
||||
task_id: str,
|
||||
project_id: str,
|
||||
pipeline: dict | None,
|
||||
original_steps: list[dict],
|
||||
gate_role: str,
|
||||
gate_reason: str,
|
||||
allow_write: bool,
|
||||
noninteractive: bool,
|
||||
gate_output_json: str | None = None,
|
||||
) -> dict:
|
||||
"""Attempt auto-return: re-run the pipeline with return_analyst prepended.
|
||||
|
||||
Steps:
|
||||
(a) Check return_count against _AUTO_RETURN_MAX threshold — escalate if exceeded.
|
||||
(b) Record the task return.
|
||||
(c) Mark current pipeline failed; set task status to 'revising'.
|
||||
(d) Spawn new pipeline: [return_analyst] + original_steps.
|
||||
|
||||
Returns:
|
||||
{"should_escalate": True, "reason": "auto_return_threshold_exceeded"} if threshold hit.
|
||||
{"should_escalate": False, "auto_return_result": {...}} otherwise.
|
||||
"""
|
||||
# (a) Check threshold — fetch current return_count before recording new one
|
||||
current_task = models.get_task(conn, task_id)
|
||||
current_return_count = (current_task or {}).get("return_count") or 0
|
||||
if current_return_count >= _AUTO_RETURN_MAX:
|
||||
_logger.warning(
|
||||
"KIN-136: auto-return threshold reached for task %s "
|
||||
"(return_count=%d >= max=%d) — escalating to human",
|
||||
task_id, current_return_count, _AUTO_RETURN_MAX,
|
||||
)
|
||||
return {"should_escalate": True, "reason": "auto_return_threshold_exceeded"}
|
||||
|
||||
pipeline_id = pipeline["id"] if pipeline else None
|
||||
|
||||
# (b) Record return
|
||||
try:
|
||||
models.record_task_return(
|
||||
conn,
|
||||
task_id=task_id,
|
||||
reason_category="recurring_quality_fail",
|
||||
reason_text=f"Gate {gate_role}: {gate_reason[:200]}",
|
||||
returned_by=gate_role,
|
||||
pipeline_id=pipeline_id,
|
||||
)
|
||||
except Exception:
|
||||
pass # Never block auto-return on tracking errors
|
||||
|
||||
# (c) Mark current pipeline failed, set task to revising
|
||||
if pipeline:
|
||||
try:
|
||||
models.update_pipeline(conn, pipeline_id, status="failed")
|
||||
except Exception:
|
||||
pass
|
||||
models.update_task(conn, task_id, status="revising")
|
||||
|
||||
try:
|
||||
models.write_log(
|
||||
conn, pipeline_id,
|
||||
f"KIN-136: auto-return triggered by {gate_role} "
|
||||
f"(return_count now {current_return_count + 1}) — spawning return_analyst pipeline",
|
||||
level="INFO",
|
||||
extra={"gate_role": gate_role, "return_count": current_return_count + 1},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# (d) Build new steps and spawn new pipeline
|
||||
new_steps = [{"role": "return_analyst", "model": "opus"}] + list(original_steps)
|
||||
auto_return_result = run_pipeline(
|
||||
conn,
|
||||
task_id,
|
||||
new_steps,
|
||||
allow_write=allow_write,
|
||||
noninteractive=noninteractive,
|
||||
initial_previous_output=gate_output_json,
|
||||
parent_pipeline_id=pipeline_id,
|
||||
)
|
||||
|
||||
return {"should_escalate": False, "auto_return_result": auto_return_result}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Destructive operation detection (KIN-116)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -2509,6 +2627,37 @@ def run_pipeline(
|
|||
)
|
||||
if _cannot_close is not None:
|
||||
_block_reason = _cannot_close["reason"]
|
||||
_pipeline_type = (pipeline or {}).get("pipeline_type", "standard")
|
||||
|
||||
# KIN-136: auto-return — attempt re-run instead of blocking when:
|
||||
# - reviewer did not set an exit_condition requiring human intervention
|
||||
# - not inside an escalation pipeline (guard against recursive loops)
|
||||
# - not a dry_run
|
||||
_exit_cond = _parse_exit_condition(_gate_result or {}, effective_last_role)
|
||||
if _exit_cond is None and _pipeline_type != "escalation" and not dry_run:
|
||||
_gate_out = (_gate_result or {}).get("output")
|
||||
_gate_out_json = (
|
||||
json.dumps(_gate_out, ensure_ascii=False)
|
||||
if isinstance(_gate_out, dict) else str(_gate_out or "")
|
||||
)
|
||||
_ar = _trigger_auto_return(
|
||||
conn, task_id, project_id, pipeline,
|
||||
original_steps=steps,
|
||||
gate_role=effective_last_role,
|
||||
gate_reason=_block_reason,
|
||||
allow_write=allow_write,
|
||||
noninteractive=noninteractive,
|
||||
gate_output_json=_gate_out_json,
|
||||
)
|
||||
if not _ar["should_escalate"]:
|
||||
_ar_result = _ar["auto_return_result"]
|
||||
_ar_result["auto_returned"] = True
|
||||
return _ar_result
|
||||
# Threshold exceeded — fall through to human escalation
|
||||
_block_reason = f"{_block_reason} [auto_return_limit_reached]"
|
||||
|
||||
# Human escalation path: exit_condition set, escalation pipeline,
|
||||
# dry_run, or auto-return threshold exceeded
|
||||
models.update_task(
|
||||
conn, task_id,
|
||||
status="blocked",
|
||||
|
|
@ -2525,7 +2674,6 @@ def run_pipeline(
|
|||
total_duration_seconds=total_duration,
|
||||
)
|
||||
# KIN-135: record gate return — skip for escalation pipelines to avoid loops
|
||||
_pipeline_type = (pipeline or {}).get("pipeline_type", "standard")
|
||||
if _pipeline_type != "escalation":
|
||||
try:
|
||||
models.record_task_return(
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ from core.db import init_db
|
|||
from core import models
|
||||
from core.models import RETURN_CATEGORIES
|
||||
from core.context_builder import build_context
|
||||
from agents.runner import _save_return_analyst_output, run_pipeline
|
||||
from agents.runner import _save_return_analyst_output, run_pipeline, _AUTO_RETURN_MAX
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -403,7 +403,13 @@ class TestGateCannotCloseRecordsReturn:
|
|||
def test_gate_rejection_increments_return_count_in_standard_pipeline(
|
||||
self, mock_run, mock_hooks, mock_followup, conn_autocomplete
|
||||
):
|
||||
"""Gate cannot_close in standard pipeline → task.return_count increases by 1."""
|
||||
"""Gate cannot_close in standard pipeline → return_count increments on each auto-return.
|
||||
|
||||
KIN-136: in auto_complete mode, gate rejection triggers auto-return up to _AUTO_RETURN_MAX
|
||||
times before escalating. Each auto-return increments return_count once; the final
|
||||
escalation via the human-escalation path also records one more return.
|
||||
Total: _AUTO_RETURN_MAX + 1 returns (3 auto-returns + 1 final escalation = 4).
|
||||
"""
|
||||
conn = conn_autocomplete
|
||||
mock_run.return_value = _mock_subprocess(
|
||||
{"verdict": "changes_requested", "reason": "Missing tests"}
|
||||
|
|
@ -415,8 +421,10 @@ class TestGateCannotCloseRecordsReturn:
|
|||
run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["return_count"] == 1, (
|
||||
"Gate rejection in standard pipeline should increment return_count"
|
||||
# KIN-136: _AUTO_RETURN_MAX auto-returns + 1 final escalation recording (git log: 2026-03-21)
|
||||
assert task["return_count"] == _AUTO_RETURN_MAX + 1, (
|
||||
"Gate rejection with persistent failure should increment return_count "
|
||||
f"_AUTO_RETURN_MAX+1 times ({_AUTO_RETURN_MAX + 1})"
|
||||
)
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
|
|
@ -447,7 +455,11 @@ class TestGateCannotCloseRecordsReturn:
|
|||
def test_gate_rejection_records_return_with_recurring_quality_fail_category(
|
||||
self, mock_run, mock_hooks, mock_followup, conn_autocomplete
|
||||
):
|
||||
"""Gate rejection uses 'recurring_quality_fail' as reason_category."""
|
||||
"""Gate rejection uses 'recurring_quality_fail' as reason_category.
|
||||
|
||||
KIN-136: all auto-return records use the same category; final escalation also uses it.
|
||||
Total returns = _AUTO_RETURN_MAX + 1 (git log: 2026-03-21).
|
||||
"""
|
||||
conn = conn_autocomplete
|
||||
mock_run.return_value = _mock_subprocess(
|
||||
{"verdict": "changes_requested", "reason": "Need unit tests"}
|
||||
|
|
@ -459,8 +471,10 @@ class TestGateCannotCloseRecordsReturn:
|
|||
run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
returns = models.get_task_returns(conn, "P1-001")
|
||||
assert len(returns) == 1
|
||||
assert returns[0]["reason_category"] == "recurring_quality_fail"
|
||||
assert len(returns) == _AUTO_RETURN_MAX + 1
|
||||
assert all(r["reason_category"] == "recurring_quality_fail" for r in returns), (
|
||||
"Все записи возврата должны иметь категорию recurring_quality_fail"
|
||||
)
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
|
|
|
|||
404
tests/test_kin_136_auto_return.py
Normal file
404
tests/test_kin_136_auto_return.py
Normal file
|
|
@ -0,0 +1,404 @@
|
|||
"""
|
||||
Tests for KIN-136: Auto-return mechanism.
|
||||
|
||||
When a task in auto_complete mode is rejected by a gate agent (reviewer/tester)
|
||||
WITHOUT an exit_condition, it is automatically re-run with return_analyst prepended
|
||||
instead of being escalated to a human.
|
||||
|
||||
Covers:
|
||||
1. _parse_exit_condition unit tests — valid/invalid/null values, role guard
|
||||
2. _trigger_auto_return threshold guard — return_count >= max → should_escalate=True
|
||||
3. Integration: reviewer changes_requested + no exit_condition → auto_returned=True
|
||||
4. Integration: reviewer changes_requested + exit_condition='login_required' → task blocked
|
||||
5. Integration: escalation pipeline gate rejection → no auto-return (guard #1081)
|
||||
6. Integration: reviewer approved (no exit_condition set) → task done (happy path unaffected)
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from core.db import init_db
|
||||
from core import models
|
||||
from agents.runner import (
|
||||
run_pipeline,
|
||||
_parse_exit_condition,
|
||||
_trigger_auto_return,
|
||||
_AUTO_RETURN_MAX,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def conn():
|
||||
"""Fresh in-memory DB with project and auto_complete task."""
|
||||
c = init_db(":memory:")
|
||||
models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"])
|
||||
models.create_task(c, "P1-001", "p1", "Implement feature",
|
||||
brief={"route_type": "feature"})
|
||||
models.update_task(c, "P1-001", execution_mode="auto_complete")
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
def _mock_subprocess(agent_output: dict) -> MagicMock:
|
||||
"""Build subprocess.run mock returning agent_output in claude JSON format."""
|
||||
m = MagicMock()
|
||||
m.stdout = json.dumps({"result": json.dumps(agent_output, ensure_ascii=False)})
|
||||
m.stderr = ""
|
||||
m.returncode = 0
|
||||
return m
|
||||
|
||||
|
||||
def _mock_reviewer(verdict: str, reason: str = "Issues found",
|
||||
exit_condition=None) -> MagicMock:
|
||||
out = {"verdict": verdict, "reason": reason, "findings": [], "summary": reason}
|
||||
if exit_condition is not None:
|
||||
out["exit_condition"] = exit_condition
|
||||
return _mock_subprocess(out)
|
||||
|
||||
|
||||
def _mock_return_analyst(escalate: bool = False) -> MagicMock:
|
||||
return _mock_subprocess({
|
||||
"status": "done",
|
||||
"escalate_to_dept_head": escalate,
|
||||
"root_cause_analysis": "quality issue identified",
|
||||
"refined_brief": "fix the quality issue",
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unit tests: _parse_exit_condition
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestParseExitCondition:
|
||||
"""KIN-136: exit_condition parsing — fail-open on unknown values."""
|
||||
|
||||
def test_valid_login_required(self):
|
||||
gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "login_required"}}
|
||||
assert _parse_exit_condition(gate_result, "reviewer") == "login_required"
|
||||
|
||||
def test_valid_missing_data(self):
|
||||
gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "missing_data"}}
|
||||
assert _parse_exit_condition(gate_result, "reviewer") == "missing_data"
|
||||
|
||||
def test_valid_strategic_decision(self):
|
||||
gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "strategic_decision"}}
|
||||
assert _parse_exit_condition(gate_result, "reviewer") == "strategic_decision"
|
||||
|
||||
def test_null_exit_condition_returns_none(self):
|
||||
"""null in JSON → None → auto-return."""
|
||||
gate_result = {"output": {"verdict": "changes_requested", "exit_condition": None}}
|
||||
assert _parse_exit_condition(gate_result, "reviewer") is None
|
||||
|
||||
def test_missing_exit_condition_field_returns_none(self):
|
||||
"""Field absent → auto-return (fail-open)."""
|
||||
gate_result = {"output": {"verdict": "changes_requested"}}
|
||||
assert _parse_exit_condition(gate_result, "reviewer") is None
|
||||
|
||||
def test_invalid_exit_condition_returns_none(self):
|
||||
"""Unknown value → fail-open → None → auto-return (decision #1083 analogy)."""
|
||||
gate_result = {"output": {"verdict": "changes_requested", "exit_condition": "unknown_value"}}
|
||||
assert _parse_exit_condition(gate_result, "reviewer") is None
|
||||
|
||||
def test_tester_role_always_none(self):
|
||||
"""Tester does not support exit_condition — always returns None."""
|
||||
gate_result = {"output": {"status": "failed", "exit_condition": "login_required"}}
|
||||
assert _parse_exit_condition(gate_result, "tester") is None
|
||||
|
||||
def test_non_dict_output_returns_none(self):
|
||||
"""Non-dict output → fail-open → None."""
|
||||
gate_result = {"output": "some raw string"}
|
||||
assert _parse_exit_condition(gate_result, "reviewer") is None
|
||||
|
||||
def test_empty_gate_result_returns_none(self):
|
||||
assert _parse_exit_condition({}, "reviewer") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unit tests: _trigger_auto_return threshold guard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTriggerAutoReturnThreshold:
|
||||
"""KIN-136: threshold guard — return_count >= max → should_escalate=True."""
|
||||
|
||||
def test_threshold_exceeded_returns_should_escalate(self, conn):
|
||||
"""If return_count is already at max, should_escalate=True without spawning pipeline."""
|
||||
# Set return_count to max
|
||||
for _ in range(_AUTO_RETURN_MAX):
|
||||
models.record_task_return(
|
||||
conn, task_id="P1-001",
|
||||
reason_category="recurring_quality_fail",
|
||||
reason_text="quality issue",
|
||||
returned_by="reviewer",
|
||||
)
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["return_count"] == _AUTO_RETURN_MAX
|
||||
|
||||
result = _trigger_auto_return(
|
||||
conn, "P1-001", "p1", pipeline=None,
|
||||
original_steps=[{"role": "reviewer", "model": "opus"}],
|
||||
gate_role="reviewer",
|
||||
gate_reason="quality issue",
|
||||
allow_write=False,
|
||||
noninteractive=True,
|
||||
)
|
||||
|
||||
assert result["should_escalate"] is True
|
||||
assert result["reason"] == "auto_return_threshold_exceeded"
|
||||
|
||||
def test_below_threshold_returns_should_escalate_false(self, conn):
|
||||
"""return_count < max → should_escalate=False (auto-return proceeds)."""
|
||||
# Set return_count to max - 1 (one below threshold)
|
||||
for _ in range(_AUTO_RETURN_MAX - 1):
|
||||
models.record_task_return(
|
||||
conn, task_id="P1-001",
|
||||
reason_category="recurring_quality_fail",
|
||||
reason_text="quality issue",
|
||||
returned_by="reviewer",
|
||||
)
|
||||
|
||||
with patch("agents.runner.run_pipeline") as mock_rp:
|
||||
mock_rp.return_value = {"success": True, "steps_completed": 2}
|
||||
result = _trigger_auto_return(
|
||||
conn, "P1-001", "p1", pipeline=None,
|
||||
original_steps=[{"role": "reviewer", "model": "opus"}],
|
||||
gate_role="reviewer",
|
||||
gate_reason="quality issue",
|
||||
allow_write=False,
|
||||
noninteractive=True,
|
||||
)
|
||||
|
||||
assert result["should_escalate"] is False
|
||||
assert "auto_return_result" in result
|
||||
|
||||
def test_threshold_zero_return_count_is_below_max(self, conn):
|
||||
"""Fresh task with return_count=0 is always below threshold."""
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert (task.get("return_count") or 0) == 0
|
||||
|
||||
with patch("agents.runner.run_pipeline") as mock_rp:
|
||||
mock_rp.return_value = {"success": True, "steps_completed": 1}
|
||||
result = _trigger_auto_return(
|
||||
conn, "P1-001", "p1", pipeline=None,
|
||||
original_steps=[],
|
||||
gate_role="reviewer",
|
||||
gate_reason="issue",
|
||||
allow_write=False,
|
||||
noninteractive=True,
|
||||
)
|
||||
|
||||
assert result["should_escalate"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration tests: auto-return end-to-end
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAutoReturnIntegration:
|
||||
"""KIN-136: end-to-end integration tests for auto-return flow."""
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_reviewer_rejection_without_exit_condition_triggers_auto_return(
|
||||
self, mock_run, mock_hooks, mock_followup, conn
|
||||
):
|
||||
"""reviewer changes_requested + no exit_condition → auto_returned=True in result."""
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
# Call sequence:
|
||||
# 1. reviewer returns "changes_requested" (no exit_condition) — original pipeline
|
||||
# 2. return_analyst runs — in auto-return sub-pipeline
|
||||
# 3. reviewer returns "approved" — in auto-return sub-pipeline
|
||||
mock_run.side_effect = [
|
||||
_mock_reviewer("changes_requested"), # original reviewer
|
||||
_mock_return_analyst(escalate=False), # return_analyst in sub-pipeline
|
||||
_mock_reviewer("approved"), # reviewer in sub-pipeline
|
||||
]
|
||||
|
||||
steps = [{"role": "reviewer", "brief": "review the code"}]
|
||||
result = run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
assert result.get("auto_returned") is True, (
|
||||
"result должен содержать auto_returned=True при авто-возврате"
|
||||
)
|
||||
# Task should be done after the sub-pipeline reviewer approved
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["status"] == "done", (
|
||||
"После авто-возврата и одобрения reviewer — задача должна быть done"
|
||||
)
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_reviewer_rejection_with_exit_condition_blocks_task(
|
||||
self, mock_run, mock_hooks, mock_followup, conn
|
||||
):
|
||||
"""reviewer changes_requested + exit_condition='login_required' → task blocked."""
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
mock_run.return_value = _mock_reviewer(
|
||||
"changes_requested",
|
||||
reason="Need login access to verify",
|
||||
exit_condition="login_required",
|
||||
)
|
||||
|
||||
steps = [{"role": "reviewer", "brief": "review"}]
|
||||
result = run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
assert result["success"] is False
|
||||
assert result.get("auto_returned") is not True, (
|
||||
"Задача с exit_condition не должна авто-возвращаться"
|
||||
)
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["status"] == "blocked", (
|
||||
"exit_condition='login_required' → задача должна остаться blocked"
|
||||
)
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_exit_condition_missing_data_blocks_task(
|
||||
self, mock_run, mock_hooks, mock_followup, conn
|
||||
):
|
||||
"""exit_condition='missing_data' → task stays blocked, no auto-return."""
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
mock_run.return_value = _mock_reviewer(
|
||||
"changes_requested",
|
||||
reason="Missing API credentials",
|
||||
exit_condition="missing_data",
|
||||
)
|
||||
|
||||
steps = [{"role": "reviewer", "brief": "review"}]
|
||||
result = run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
assert result["success"] is False
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["status"] == "blocked"
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_auto_return_increments_return_count(
|
||||
self, mock_run, mock_hooks, mock_followup, conn
|
||||
):
|
||||
"""auto-return must increment return_count exactly once."""
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
mock_run.side_effect = [
|
||||
_mock_reviewer("changes_requested"), # original reviewer
|
||||
_mock_return_analyst(escalate=False), # return_analyst
|
||||
_mock_reviewer("approved"), # reviewer in sub-pipeline
|
||||
]
|
||||
|
||||
steps = [{"role": "reviewer", "brief": "review"}]
|
||||
run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task.get("return_count") == 1, (
|
||||
"Авто-возврат должен инкрементировать return_count ровно на 1"
|
||||
)
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_auto_return_threshold_exceeded_blocks_task(
|
||||
self, mock_run, mock_hooks, mock_followup, conn
|
||||
):
|
||||
"""When return_count >= _AUTO_RETURN_MAX → task blocked, no more auto-returns."""
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
# Pre-fill return_count to max
|
||||
for _ in range(_AUTO_RETURN_MAX):
|
||||
models.record_task_return(
|
||||
conn, task_id="P1-001",
|
||||
reason_category="recurring_quality_fail",
|
||||
reason_text="quality issue",
|
||||
returned_by="reviewer",
|
||||
)
|
||||
|
||||
mock_run.return_value = _mock_reviewer("changes_requested")
|
||||
|
||||
steps = [{"role": "reviewer", "brief": "review"}]
|
||||
result = run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
assert result["success"] is False
|
||||
assert result.get("auto_returned") is not True
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["status"] == "blocked", (
|
||||
"После достижения порога авто-возвратов задача должна быть blocked"
|
||||
)
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_escalation_pipeline_gate_rejection_no_auto_return(
|
||||
self, mock_run, mock_hooks, mock_followup, conn
|
||||
):
|
||||
"""Gate rejection in escalation pipeline → task blocked, no auto-return (guard #1081).
|
||||
|
||||
Simulates a pipeline with pipeline_type='escalation' by patching create_pipeline
|
||||
to return a pipeline dict with that type. This is the scenario where _save_return_analyst_output
|
||||
would run a pipeline it created with escalation type.
|
||||
"""
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
mock_run.return_value = _mock_reviewer("changes_requested")
|
||||
|
||||
# Patch create_pipeline so the pipeline created by run_pipeline has type='escalation'
|
||||
original_create = models.create_pipeline
|
||||
|
||||
def create_escalation_pipeline(conn, task_id, project_id, route_type, steps,
|
||||
parent_pipeline_id=None, department=None):
|
||||
pipeline = original_create(conn, task_id, project_id, route_type, steps,
|
||||
parent_pipeline_id=parent_pipeline_id,
|
||||
department=department)
|
||||
# Simulate escalation pipeline type
|
||||
conn.execute(
|
||||
"UPDATE pipelines SET pipeline_type = 'escalation' WHERE id = ?",
|
||||
(pipeline["id"],),
|
||||
)
|
||||
conn.commit()
|
||||
pipeline["pipeline_type"] = "escalation"
|
||||
return pipeline
|
||||
|
||||
with patch("agents.runner.models.create_pipeline", side_effect=create_escalation_pipeline):
|
||||
steps = [{"role": "reviewer", "brief": "review"}]
|
||||
result = run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
# Guard: escalation pipeline → task must be blocked, NOT auto-returned
|
||||
assert result.get("auto_returned") is not True, (
|
||||
"Задача внутри escalation-пайплайна не должна авто-возвращаться"
|
||||
)
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["status"] == "blocked"
|
||||
|
||||
@patch("core.followup.generate_followups")
|
||||
@patch("agents.runner.run_hooks")
|
||||
@patch("agents.runner.subprocess.run")
|
||||
def test_reviewer_approved_task_done_unaffected(
|
||||
self, mock_run, mock_hooks, mock_followup, conn
|
||||
):
|
||||
"""Happy path: reviewer approved → task done, auto-return not triggered."""
|
||||
mock_hooks.return_value = []
|
||||
mock_followup.return_value = {"created": [], "pending_actions": []}
|
||||
|
||||
mock_run.return_value = _mock_reviewer("approved", reason="")
|
||||
|
||||
steps = [{"role": "reviewer", "brief": "review"}]
|
||||
result = run_pipeline(conn, "P1-001", steps)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result.get("auto_returned") is not True
|
||||
task = models.get_task(conn, "P1-001")
|
||||
assert task["status"] == "done"
|
||||
Loading…
Add table
Add a link
Reference in a new issue