kin: KIN-136-backend_dev

2026-03-21 08:18:11 +02:00 · 2026-03-21 08:18:11 +02:00 · aac75dbfdc
commit aac75dbfdc
parent 2f7ccffbc8
4 changed files with 592 additions and 9 deletions
--- a/agents/prompts/reviewer.md
+++ b/agents/prompts/reviewer.md
@ -72,7 +72,8 @@ Example:
  "security_issues": [],
  "conventions_violations": [],
  "test_coverage": "adequate",
-  "summary": "Implementation looks correct and follows project patterns. One minor style issue noted."
+  "summary": "Implementation looks correct and follows project patterns. One minor style issue noted.",
+  "exit_condition": null
 }
 ```

@ -100,6 +101,22 @@ Example:
    }
    ```

+**`exit_condition`** (optional, KIN-136 auto-return):
+
+Set this field ONLY when the task cannot be auto-retried and requires a human decision:
+
+- `"login_required"` — the reviewer or the code requires a login/auth that is not available in automation context
+- `"missing_data"` — critical data, credentials, or access needed to continue is missing and cannot be inferred
+- `"strategic_decision"` — the fix requires a fundamental architectural or business decision with no obvious correct answer (e.g. conflicting stakeholder requirements, irreversible platform choice)
+
+Leave as `null` in ALL other cases — including ordinary bugs, quality issues, missing tests, style violations, or any fixable problem. When `null`, the system will automatically retry the task with a return analyst.
+
+**When NOT to set exit_condition (set null):**
+- Code has bugs or logic errors → `null` (auto-retry will fix)
+- Tests are missing or failing → `null` (auto-retry will add tests)
+- Implementation doesn't match requirements → `null` (auto-retry will revise)
+- Security issue found → `null` with `"changes_requested"` verdict (auto-retry will patch)
+
 **`security_issues` and `conventions_violations`** elements:

 ```json
--- a/agents/runner.py
+++ b/agents/runner.py
@ -37,6 +37,13 @@ _MODEL_TIMEOUTS = {
    "haiku": 1200,   # 20 min
 }

+# KIN-136: auto-return — max times a task can be auto-returned before escalating to human.
+# Override via KIN_AUTO_RETURN_MAX env var.
+_AUTO_RETURN_MAX: int = int(os.environ.get("KIN_AUTO_RETURN_MAX") or 3)
+
+# KIN-136: valid exit_condition values that force human escalation instead of auto-return.
+_EXIT_CONDITIONS = frozenset({"login_required", "missing_data", "strategic_decision"})
+

 def _build_claude_env() -> dict:
    """Return an env dict with an extended PATH that includes common CLI tool locations.
@ -595,6 +602,117 @@ def _parse_gate_cannot_close(result: dict, role: str) -> dict | None:
    return None  # unknown gate role → fail-open


+# ---------------------------------------------------------------------------
+# Auto-return helpers (KIN-136)
+# ---------------------------------------------------------------------------
+
+def _parse_exit_condition(gate_result: dict, role: str) -> str | None:
+    """Extract exit_condition from gate agent output.
+
+    Returns one of _EXIT_CONDITIONS values, or None if absent/invalid/unsupported.
+    Fail-open: invalid or unknown values are treated as None → triggers auto-return.
+    Only reviewer output supports exit_condition; tester always returns None.
+    """
+    if role != "reviewer":
+        return None
+    output = gate_result.get("output")
+    if not isinstance(output, dict):
+        return None
+    raw = output.get("exit_condition")
+    if raw in _EXIT_CONDITIONS:
+        return raw
+    if raw is not None:
+        _logger.warning(
+            "KIN-136: unknown exit_condition %r from reviewer — treating as None (auto-return)",
+            raw,
+        )
+    return None
+
+
+def _trigger_auto_return(
+    conn: "sqlite3.Connection",
+    task_id: str,
+    project_id: str,
+    pipeline: dict | None,
+    original_steps: list[dict],
+    gate_role: str,
+    gate_reason: str,
+    allow_write: bool,
+    noninteractive: bool,
+    gate_output_json: str | None = None,
+) -> dict:
+    """Attempt auto-return: re-run the pipeline with return_analyst prepended.
+
+    Steps:
+    (a) Check return_count against _AUTO_RETURN_MAX threshold — escalate if exceeded.
+    (b) Record the task return.
+    (c) Mark current pipeline failed; set task status to 'revising'.
+    (d) Spawn new pipeline: [return_analyst] + original_steps.
+
+    Returns:
+        {"should_escalate": True, "reason": "auto_return_threshold_exceeded"} if threshold hit.
+        {"should_escalate": False, "auto_return_result": {...}} otherwise.
+    """
+    # (a) Check threshold — fetch current return_count before recording new one
+    current_task = models.get_task(conn, task_id)
+    current_return_count = (current_task or {}).get("return_count") or 0
+    if current_return_count >= _AUTO_RETURN_MAX:
+        _logger.warning(
+            "KIN-136: auto-return threshold reached for task %s "
+            "(return_count=%d >= max=%d) — escalating to human",
+            task_id, current_return_count, _AUTO_RETURN_MAX,
+        )
+        return {"should_escalate": True, "reason": "auto_return_threshold_exceeded"}
+
+    pipeline_id = pipeline["id"] if pipeline else None
+
+    # (b) Record return
+    try:
+        models.record_task_return(
+            conn,
+            task_id=task_id,
+            reason_category="recurring_quality_fail",
+            reason_text=f"Gate {gate_role}: {gate_reason[:200]}",
+            returned_by=gate_role,
+            pipeline_id=pipeline_id,
+        )
+    except Exception:
+        pass  # Never block auto-return on tracking errors
+
+    # (c) Mark current pipeline failed, set task to revising
+    if pipeline:
+        try:
+            models.update_pipeline(conn, pipeline_id, status="failed")
+        except Exception:
+            pass
+    models.update_task(conn, task_id, status="revising")
+
+    try:
+        models.write_log(
+            conn, pipeline_id,
+            f"KIN-136: auto-return triggered by {gate_role} "
+            f"(return_count now {current_return_count + 1}) — spawning return_analyst pipeline",
+            level="INFO",
+            extra={"gate_role": gate_role, "return_count": current_return_count + 1},
+        )
+    except Exception:
+        pass
+
+    # (d) Build new steps and spawn new pipeline
+    new_steps = [{"role": "return_analyst", "model": "opus"}] + list(original_steps)
+    auto_return_result = run_pipeline(
+        conn,
+        task_id,
+        new_steps,
+        allow_write=allow_write,
+        noninteractive=noninteractive,
+        initial_previous_output=gate_output_json,
+        parent_pipeline_id=pipeline_id,
+    )
+
+    return {"should_escalate": False, "auto_return_result": auto_return_result}
+
+
 # ---------------------------------------------------------------------------
 # Destructive operation detection (KIN-116)
 # ---------------------------------------------------------------------------
@ -2509,6 +2627,37 @@ def run_pipeline(
            )
            if _cannot_close is not None:
                _block_reason = _cannot_close["reason"]
+                _pipeline_type = (pipeline or {}).get("pipeline_type", "standard")
+
+                # KIN-136: auto-return — attempt re-run instead of blocking when:
+                # - reviewer did not set an exit_condition requiring human intervention
+                # - not inside an escalation pipeline (guard against recursive loops)
+                # - not a dry_run
+                _exit_cond = _parse_exit_condition(_gate_result or {}, effective_last_role)
+                if _exit_cond is None and _pipeline_type != "escalation" and not dry_run:
+                    _gate_out = (_gate_result or {}).get("output")
+                    _gate_out_json = (
+                        json.dumps(_gate_out, ensure_ascii=False)
+                        if isinstance(_gate_out, dict) else str(_gate_out or "")
+                    )
+                    _ar = _trigger_auto_return(
+                        conn, task_id, project_id, pipeline,
+                        original_steps=steps,
+                        gate_role=effective_last_role,
+                        gate_reason=_block_reason,
+                        allow_write=allow_write,
+                        noninteractive=noninteractive,
+                        gate_output_json=_gate_out_json,
+                    )
+                    if not _ar["should_escalate"]:
+                        _ar_result = _ar["auto_return_result"]
+                        _ar_result["auto_returned"] = True
+                        return _ar_result
+                    # Threshold exceeded — fall through to human escalation
+                    _block_reason = f"{_block_reason} [auto_return_limit_reached]"
+
+                # Human escalation path: exit_condition set, escalation pipeline,
+                # dry_run, or auto-return threshold exceeded
                models.update_task(
                    conn, task_id,
                    status="blocked",
@ -2525,7 +2674,6 @@ def run_pipeline(
                        total_duration_seconds=total_duration,
                    )
                # KIN-135: record gate return — skip for escalation pipelines to avoid loops
-                _pipeline_type = (pipeline or {}).get("pipeline_type", "standard")
                if _pipeline_type != "escalation":
                    try:
                        models.record_task_return(