kin: auto-commit after pipeline

2026-03-17 14:03:53 +02:00 · 2026-03-17 14:03:53 +02:00 · b6f40a6ace
commit b6f40a6ace
parent 04cbbc563b
9 changed files with 1690 additions and 16 deletions
--- a/agents/runner.py
+++ b/agents/runner.py
@ -27,6 +27,14 @@ _EXTRA_PATH_DIRS = [
    "/usr/local/sbin",
 ]

+# Default timeouts per model (seconds). Override globally with KIN_AGENT_TIMEOUT
+# or per role via timeout_seconds in specialists.yaml.
+_MODEL_TIMEOUTS = {
+    "opus": 1800,    # 30 min
+    "sonnet": 1200,  # 20 min
+    "haiku": 600,    # 10 min
+}
+

 def _build_claude_env() -> dict:
    """Return an env dict with an extended PATH that includes common CLI tool locations.
@ -182,10 +190,22 @@ def run_agent(
        if project_path.is_dir():
            working_dir = str(project_path)

+    # Determine timeout: role-specific (specialists.yaml) > model-based > default
+    role_timeout = None
+    try:
+        from core.context_builder import _load_specialists
+        specs = _load_specialists().get("specialists", {})
+        role_spec = specs.get(role, {})
+        if role_spec.get("timeout_seconds"):
+            role_timeout = int(role_spec["timeout_seconds"])
+    except Exception:
+        pass
+
    # Run claude subprocess
    start = time.monotonic()
    result = _run_claude(prompt, model=model, working_dir=working_dir,
-                         allow_write=allow_write, noninteractive=noninteractive)
+                         allow_write=allow_write, noninteractive=noninteractive,
+                         timeout=role_timeout)
    duration = int(time.monotonic() - start)

    # Parse output — ensure output_text is always a string for DB storage
@ -247,7 +267,11 @@ def _run_claude(

    is_noninteractive = noninteractive or os.environ.get("KIN_NONINTERACTIVE") == "1"
    if timeout is None:
-        timeout = int(os.environ.get("KIN_AGENT_TIMEOUT") or 600)
+        env_timeout = os.environ.get("KIN_AGENT_TIMEOUT")
+        if env_timeout:
+            timeout = int(env_timeout)
+        else:
+            timeout = _MODEL_TIMEOUTS.get(model, _MODEL_TIMEOUTS["sonnet"])
    env = _build_claude_env()

    try:
@ -961,6 +985,187 @@ def _run_learning_extraction(
    return {"added": added, "skipped": skipped}


+# ---------------------------------------------------------------------------
+# Department head detection
+# ---------------------------------------------------------------------------
+
+# Cache of roles with execution_type=department_head from specialists.yaml
+_DEPT_HEAD_ROLES: set[str] | None = None
+
+
+def _is_department_head(role: str) -> bool:
+    """Check if a role is a department head.
+
+    Uses execution_type from specialists.yaml as primary check,
+    falls back to role.endswith('_head') convention.
+    """
+    global _DEPT_HEAD_ROLES
+    if _DEPT_HEAD_ROLES is None:
+        try:
+            from core.context_builder import _load_specialists
+            specs = _load_specialists()
+            all_specs = specs.get("specialists", {})
+            _DEPT_HEAD_ROLES = {
+                name for name, spec in all_specs.items()
+                if spec.get("execution_type") == "department_head"
+            }
+        except Exception:
+            _DEPT_HEAD_ROLES = set()
+    return role in _DEPT_HEAD_ROLES or role.endswith("_head")
+
+
+# ---------------------------------------------------------------------------
+# Department head sub-pipeline execution
+# ---------------------------------------------------------------------------
+
+def _execute_department_head_step(
+    conn: sqlite3.Connection,
+    task_id: str,
+    project_id: str,
+    parent_pipeline_id: int | None,
+    step: dict,
+    dept_head_result: dict,
+    allow_write: bool = False,
+    noninteractive: bool = False,
+    next_department: str | None = None,
+) -> dict:
+    """Execute sub-pipeline planned by a department head.
+
+    Parses the dept head's JSON output, validates the sub_pipeline,
+    creates a child pipeline in DB, runs it, and saves a handoff record.
+
+    Returns dict with success, output, cost_usd, tokens_used, duration_seconds.
+    """
+    raw = dept_head_result.get("raw_output") or dept_head_result.get("output") or ""
+    if isinstance(raw, (dict, list)):
+        raw = json.dumps(raw, ensure_ascii=False)
+
+    parsed = _try_parse_json(raw)
+    if not isinstance(parsed, dict):
+        return {
+            "success": False,
+            "output": "Department head returned non-JSON output",
+            "cost_usd": 0, "tokens_used": 0, "duration_seconds": 0,
+        }
+
+    # Blocked status from dept head
+    if parsed.get("status") == "blocked":
+        reason = parsed.get("blocked_reason", "Department head reported blocked")
+        return {
+            "success": False,
+            "output": json.dumps(parsed, ensure_ascii=False),
+            "blocked": True,
+            "blocked_reason": reason,
+            "cost_usd": 0, "tokens_used": 0, "duration_seconds": 0,
+        }
+
+    sub_pipeline = parsed.get("sub_pipeline", [])
+    if not isinstance(sub_pipeline, list) or not sub_pipeline:
+        return {
+            "success": False,
+            "output": "Department head returned empty or invalid sub_pipeline",
+            "cost_usd": 0, "tokens_used": 0, "duration_seconds": 0,
+        }
+
+    # Recursion guard: no department head roles allowed in sub_pipeline
+    for sub_step in sub_pipeline:
+        if isinstance(sub_step, dict) and _is_department_head(str(sub_step.get("role", ""))):
+            return {
+                "success": False,
+                "output": f"Recursion blocked: sub_pipeline contains _head role '{sub_step['role']}'",
+                "cost_usd": 0, "tokens_used": 0, "duration_seconds": 0,
+            }
+
+    role = step["role"]
+    dept_name = role.replace("_head", "")
+
+    # Create child pipeline in DB
+    child_pipeline = models.create_pipeline(
+        conn, task_id, project_id,
+        route_type="dept_sub",
+        steps=sub_pipeline,
+        parent_pipeline_id=parent_pipeline_id,
+        department=dept_name,
+    )
+
+    # Build initial context for workers: dept head's plan + artifacts
+    dept_plan_context = json.dumps({
+        "department_head_plan": {
+            "department": dept_name,
+            "artifacts": parsed.get("artifacts", {}),
+            "handoff_notes": parsed.get("handoff_notes", ""),
+        },
+    }, ensure_ascii=False)
+
+    # Run the sub-pipeline (noninteractive=True — Opus already reviewed the plan)
+    sub_result = run_pipeline(
+        conn, task_id, sub_pipeline,
+        dry_run=False,
+        allow_write=allow_write,
+        noninteractive=True,
+        initial_previous_output=dept_plan_context,
+    )
+
+    # Extract decisions from sub-pipeline results for handoff
+    decisions_made = []
+    sub_results = sub_result.get("results", [])
+    for sr in sub_results:
+        output = sr.get("output") or sr.get("raw_output") or ""
+        if isinstance(output, str):
+            try:
+                output = json.loads(output)
+            except (json.JSONDecodeError, ValueError):
+                pass
+        if isinstance(output, dict):
+            # Reviewer/tester may include decisions or findings
+            for key in ("decisions", "findings", "recommendations"):
+                val = output.get(key)
+                if isinstance(val, list):
+                    decisions_made.extend(val)
+                elif isinstance(val, str) and val:
+                    decisions_made.append(val)
+
+    # Determine last worker role for auto_complete tracking
+    last_sub_role = sub_pipeline[-1].get("role", "") if sub_pipeline else ""
+
+    # Save handoff for inter-department context
+    handoff_status = "done" if sub_result.get("success") else "partial"
+    try:
+        models.create_handoff(
+            conn,
+            pipeline_id=parent_pipeline_id or child_pipeline["id"],
+            task_id=task_id,
+            from_department=dept_name,
+            to_department=next_department,
+            artifacts=parsed.get("artifacts", {}),
+            decisions_made=decisions_made,
+            blockers=[],
+            status=handoff_status,
+        )
+    except Exception:
+        pass  # Handoff save errors must never block pipeline
+
+    # Build summary output for the next pipeline step
+    summary = {
+        "from_department": dept_name,
+        "handoff_notes": parsed.get("handoff_notes", ""),
+        "artifacts": parsed.get("artifacts", {}),
+        "sub_pipeline_summary": {
+            "steps_completed": sub_result.get("steps_completed", 0),
+            "success": sub_result.get("success", False),
+        },
+    }
+
+    return {
+        "success": sub_result.get("success", False),
+        "output": json.dumps(summary, ensure_ascii=False),
+        "cost_usd": sub_result.get("total_cost_usd", 0),
+        "tokens_used": sub_result.get("total_tokens", 0),
+        "duration_seconds": sub_result.get("total_duration_seconds", 0),
+        "last_sub_role": last_sub_role,
+    }
+
+
 # ---------------------------------------------------------------------------
 # Pipeline executor
 # ---------------------------------------------------------------------------
@ -972,6 +1177,7 @@ def run_pipeline(
    dry_run: bool = False,
    allow_write: bool = False,
    noninteractive: bool = False,
+    initial_previous_output: str | None = None,
 ) -> dict:
    """Execute a multi-step pipeline of agents.

@ -980,6 +1186,9 @@ def run_pipeline(
        {"role": "tester", "depends_on": "debugger", "brief": "..."},
    ]

+    initial_previous_output: context injected as previous_output for the first step
+    (used by dept head sub-pipelines to pass artifacts/plan to workers).
+
    Returns {success, steps_completed, total_cost, total_tokens, total_duration, results}
    """
    # Auth check — skip for dry_run (dry_run never calls claude CLI)
@ -1020,7 +1229,8 @@ def run_pipeline(
    total_cost = 0.0
    total_tokens = 0
    total_duration = 0
-    previous_output = None
+    previous_output = initial_previous_output
+    _last_sub_role = None  # Track last worker role from dept sub-pipelines (for auto_complete)

    for i, step in enumerate(steps):
        role = step["role"]
@ -1283,6 +1493,62 @@ def run_pipeline(
            except Exception:
                pass  # Never block pipeline on decomposer save errors

+        # Department head: execute sub-pipeline planned by the dept head
+        if _is_department_head(role) and result["success"] and not dry_run:
+            # Determine next department for handoff routing
+            _next_dept = None
+            if i + 1 < len(steps):
+                _next_role = steps[i + 1].get("role", "")
+                if _is_department_head(_next_role):
+                    _next_dept = _next_role.replace("_head", "")
+            dept_result = _execute_department_head_step(
+                conn, task_id, project_id,
+                parent_pipeline_id=pipeline["id"] if pipeline else None,
+                step=step,
+                dept_head_result=result,
+                allow_write=allow_write,
+                noninteractive=noninteractive,
+                next_department=_next_dept,
+            )
+            # Accumulate sub-pipeline costs
+            total_cost += dept_result.get("cost_usd") or 0
+            total_tokens += dept_result.get("tokens_used") or 0
+            total_duration += dept_result.get("duration_seconds") or 0
+
+            if not dept_result.get("success"):
+                # Sub-pipeline failed — handle as blocked
+                results.append({"role": role, "_dept_sub": True, **dept_result})
+                if pipeline:
+                    models.update_pipeline(
+                        conn, pipeline["id"],
+                        status="failed",
+                        total_cost_usd=total_cost,
+                        total_tokens=total_tokens,
+                        total_duration_seconds=total_duration,
+                    )
+                error_msg = f"Department {role} sub-pipeline failed"
+                models.update_task(conn, task_id, status="blocked", blocked_reason=error_msg)
+                return {
+                    "success": False,
+                    "error": error_msg,
+                    "steps_completed": i,
+                    "results": results,
+                    "total_cost_usd": total_cost,
+                    "total_tokens": total_tokens,
+                    "total_duration_seconds": total_duration,
+                    "pipeline_id": pipeline["id"] if pipeline else None,
+                }
+
+            # Track last worker role from sub-pipeline for auto_complete eligibility
+            if dept_result.get("last_sub_role"):
+                _last_sub_role = dept_result["last_sub_role"]
+
+            # Override previous_output with dept handoff summary (not raw dept head JSON)
+            previous_output = dept_result.get("output")
+            if isinstance(previous_output, (dict, list)):
+                previous_output = json.dumps(previous_output, ensure_ascii=False)
+            continue
+
        # Project-level auto-test: run `make test` after backend_dev/frontend_dev steps.
        # Enabled per project via auto_test_enabled flag (opt-in).
        # On failure, loop fixer up to KIN_AUTO_TEST_MAX_ATTEMPTS times, then block.
@ -1433,7 +1699,9 @@ def run_pipeline(
                changed_files = _get_changed_files(str(p_path))

        last_role = steps[-1].get("role", "") if steps else ""
-        auto_eligible = last_role in {"tester", "reviewer"}
+        # For dept pipelines: if last step is a _head, check the last worker in its sub-pipeline
+        effective_last_role = _last_sub_role if (_is_department_head(last_role) and _last_sub_role) else last_role
+        auto_eligible = effective_last_role in {"tester", "reviewer"}

        # Guard: re-fetch current status — user may have manually changed it while pipeline ran
        current_task = models.get_task(conn, task_id)