kin/agents/runner.py

"""
Kin agent runner — launches Claude Code as subprocess with role-specific context.
Each agent = separate process with isolated context.
"""

import json
import sqlite3
import subprocess
import time
from pathlib import Path
from typing import Any

from core import models
from core.context_builder import build_context, format_prompt


def run_agent(
    conn: sqlite3.Connection,
    role: str,
    task_id: str,
    project_id: str,
    model: str = "sonnet",
    previous_output: str | None = None,
    brief_override: str | None = None,
    dry_run: bool = False,
) -> dict:
    """Run a single Claude Code agent as a subprocess.

    1. Build context from DB
    2. Format prompt with role template
    3. Run: claude -p "{prompt}" --output-format json
    4. Log result to agent_logs
    5. Return {success, output, tokens_used, duration_seconds, cost_usd}
    """
    # Build context
    ctx = build_context(conn, task_id, role, project_id)
    if previous_output:
        ctx["previous_output"] = previous_output
    if brief_override:
        if ctx.get("task"):
            ctx["task"]["brief"] = brief_override

    prompt = format_prompt(ctx, role)

    if dry_run:
        return {
            "success": True,
            "output": None,
            "prompt": prompt,
            "role": role,
            "model": model,
            "dry_run": True,
        }

    # Determine working directory
    project = models.get_project(conn, project_id)
    working_dir = None
    if project and role in ("debugger", "frontend_dev", "backend_dev", "tester", "security"):
        project_path = Path(project["path"]).expanduser()
        if project_path.is_dir():
            working_dir = str(project_path)

    # Run claude subprocess
    start = time.monotonic()
    result = _run_claude(prompt, model=model, working_dir=working_dir)
    duration = int(time.monotonic() - start)

    # Parse output
    output_text = result.get("output", "")
    success = result["returncode"] == 0
    parsed_output = _try_parse_json(output_text)

    # Log to DB
    models.log_agent_run(
        conn,
        project_id=project_id,
        task_id=task_id,
        agent_role=role,
        action="execute",
        input_summary=f"task={task_id}, model={model}",
        output_summary=output_text or None,
        tokens_used=result.get("tokens_used"),
        model=model,
        cost_usd=result.get("cost_usd"),
        success=success,
        error_message=result.get("error") if not success else None,
        duration_seconds=duration,
    )

    return {
        "success": success,
        "output": parsed_output if parsed_output else output_text,
        "raw_output": output_text,
        "role": role,
        "model": model,
        "duration_seconds": duration,
        "tokens_used": result.get("tokens_used"),
        "cost_usd": result.get("cost_usd"),
    }


def _run_claude(
    prompt: str,
    model: str = "sonnet",
    working_dir: str | None = None,
) -> dict:
    """Execute claude CLI as subprocess. Returns dict with output, returncode, etc."""
    cmd = [
        "claude",
        "-p", prompt,
        "--output-format", "json",
        "--model", model,
    ]

    try:
        proc = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=600,  # 10 min max
            cwd=working_dir,
        )
    except FileNotFoundError:
        return {
            "output": "",
            "error": "claude CLI not found in PATH",
            "returncode": 127,
        }
    except subprocess.TimeoutExpired:
        return {
            "output": "",
            "error": "Agent timed out after 600s",
            "returncode": 124,
        }

    # Try to extract structured data from JSON output
    output = proc.stdout or ""
    result: dict[str, Any] = {
        "output": output,
        "error": proc.stderr if proc.returncode != 0 else None,
        "returncode": proc.returncode,
    }

    # Parse JSON output from claude --output-format json
    parsed = _try_parse_json(output)
    if isinstance(parsed, dict):
        result["tokens_used"] = parsed.get("usage", {}).get("total_tokens")
        result["cost_usd"] = parsed.get("cost_usd")
        # The actual content is usually in result or content
        if "result" in parsed:
            result["output"] = parsed["result"]
        elif "content" in parsed:
            result["output"] = parsed["content"]

    return result


def _try_parse_json(text: str) -> Any:
    """Try to parse JSON from text. Returns parsed obj or None."""
    text = text.strip()
    if not text:
        return None

    # Direct parse
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass

    # Try to find JSON block in markdown code fences
    import re
    m = re.search(r"```(?:json)?\s*\n(.*?)\n```", text, re.DOTALL)
    if m:
        try:
            return json.loads(m.group(1))
        except json.JSONDecodeError:
            pass

    # Try to find first { ... } or [ ... ]
    for start_char, end_char in [("{", "}"), ("[", "]")]:
        start = text.find(start_char)
        if start >= 0:
            # Find matching close
            depth = 0
            for i in range(start, len(text)):
                if text[i] == start_char:
                    depth += 1
                elif text[i] == end_char:
                    depth -= 1
                    if depth == 0:
                        try:
                            return json.loads(text[start:i + 1])
                        except json.JSONDecodeError:
                            break
    return None


# ---------------------------------------------------------------------------
# Pipeline executor
# ---------------------------------------------------------------------------

def run_pipeline(
    conn: sqlite3.Connection,
    task_id: str,
    steps: list[dict],
    dry_run: bool = False,
) -> dict:
    """Execute a multi-step pipeline of agents.

    steps = [
        {"role": "debugger", "model": "opus", "brief": "..."},
        {"role": "tester", "depends_on": "debugger", "brief": "..."},
    ]

    Returns {success, steps_completed, total_cost, total_tokens, total_duration, results}
    """
    task = models.get_task(conn, task_id)
    if not task:
        return {"success": False, "error": f"Task '{task_id}' not found"}

    project_id = task["project_id"]

    # Determine route type from steps or task brief
    route_type = "custom"
    if task.get("brief") and isinstance(task["brief"], dict):
        route_type = task["brief"].get("route_type", "custom") or "custom"

    # Create pipeline in DB
    pipeline = None
    if not dry_run:
        pipeline = models.create_pipeline(
            conn, task_id, project_id, route_type, steps,
        )
        models.update_task(conn, task_id, status="in_progress")

    results = []
    total_cost = 0.0
    total_tokens = 0
    total_duration = 0
    previous_output = None

    for i, step in enumerate(steps):
        role = step["role"]
        model = step.get("model", "sonnet")
        brief = step.get("brief")

        result = run_agent(
            conn, role, task_id, project_id,
            model=model,
            previous_output=previous_output,
            brief_override=brief,
            dry_run=dry_run,
        )
        results.append(result)

        if dry_run:
            continue

        # Accumulate stats
        total_cost += result.get("cost_usd") or 0
        total_tokens += result.get("tokens_used") or 0
        total_duration += result.get("duration_seconds") or 0

        if not result["success"]:
            # Pipeline failed — stop and mark as failed
            if pipeline:
                models.update_pipeline(
                    conn, pipeline["id"],
                    status="failed",
                    total_cost_usd=total_cost,
                    total_tokens=total_tokens,
                    total_duration_seconds=total_duration,
                )
            models.update_task(conn, task_id, status="blocked")
            return {
                "success": False,
                "error": f"Step {i+1}/{len(steps)} ({role}) failed",
                "steps_completed": i,
                "results": results,
                "total_cost_usd": total_cost,
                "total_tokens": total_tokens,
                "total_duration_seconds": total_duration,
                "pipeline_id": pipeline["id"] if pipeline else None,
            }

        # Chain output to next step
        previous_output = result.get("raw_output") or result.get("output")
        if isinstance(previous_output, (dict, list)):
            previous_output = json.dumps(previous_output, ensure_ascii=False)

    # Pipeline completed
    if pipeline and not dry_run:
        models.update_pipeline(
            conn, pipeline["id"],
            status="completed",
            total_cost_usd=total_cost,
            total_tokens=total_tokens,
            total_duration_seconds=total_duration,
        )
        models.update_task(conn, task_id, status="review")

    return {
        "success": True,
        "steps_completed": len(steps),
        "results": results,
        "total_cost_usd": total_cost,
        "total_tokens": total_tokens,
        "total_duration_seconds": total_duration,
        "pipeline_id": pipeline["id"] if pipeline else None,
        "dry_run": dry_run,
    }
Add context builder, agent runner, and pipeline executor core/context_builder.py: build_context() — assembles role-specific context from DB. PM gets everything; debugger gets gotchas/workarounds; reviewer gets conventions only; tester gets minimal context; security gets security-category decisions. format_prompt() — injects context into role templates. agents/runner.py: run_agent() — launches claude CLI as subprocess with role prompt. run_pipeline() — executes multi-step pipelines sequentially, chains output between steps, logs to agent_logs, creates/updates pipeline records, handles failures gracefully. agents/specialists.yaml — 8 roles with tools, permissions, context rules. agents/prompts/pm.md — PM prompt for task decomposition. agents/prompts/security.md — security audit prompt (OWASP, auth, secrets). CLI: kin run <task_id> [--dry-run] PM decomposes → shows pipeline → executes with confirmation. 31 new tests (15 context_builder, 11 runner, 5 JSON parsing). 92 total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-15 14:03:32 +02:00			`"""`
			`Kin agent runner — launches Claude Code as subprocess with role-specific context.`
			`Each agent = separate process with isolated context.`
			`"""`

			`import json`
			`import sqlite3`
			`import subprocess`
			`import time`
			`from pathlib import Path`
			`from typing import Any`

			`from core import models`
			`from core.context_builder import build_context, format_prompt`


			`def run_agent(`
			`conn: sqlite3.Connection,`
			`role: str,`
			`task_id: str,`
			`project_id: str,`
			`model: str = "sonnet",`
			`previous_output: str \| None = None,`
			`brief_override: str \| None = None,`
			`dry_run: bool = False,`
			`) -> dict:`
			`"""Run a single Claude Code agent as a subprocess.`

			`1. Build context from DB`
			`2. Format prompt with role template`
			`3. Run: claude -p "{prompt}" --output-format json`
			`4. Log result to agent_logs`
			`5. Return {success, output, tokens_used, duration_seconds, cost_usd}`
			`"""`
			`# Build context`
			`ctx = build_context(conn, task_id, role, project_id)`
			`if previous_output:`
			`ctx["previous_output"] = previous_output`
			`if brief_override:`
			`if ctx.get("task"):`
			`ctx["task"]["brief"] = brief_override`

			`prompt = format_prompt(ctx, role)`

			`if dry_run:`
			`return {`
			`"success": True,`
			`"output": None,`
			`"prompt": prompt,`
			`"role": role,`
			`"model": model,`
			`"dry_run": True,`
			`}`

			`# Determine working directory`
			`project = models.get_project(conn, project_id)`
			`working_dir = None`
			`if project and role in ("debugger", "frontend_dev", "backend_dev", "tester", "security"):`
			`project_path = Path(project["path"]).expanduser()`
			`if project_path.is_dir():`
			`working_dir = str(project_path)`

			`# Run claude subprocess`
			`start = time.monotonic()`
			`result = _run_claude(prompt, model=model, working_dir=working_dir)`
			`duration = int(time.monotonic() - start)`

			`# Parse output`
			`output_text = result.get("output", "")`
			`success = result["returncode"] == 0`
			`parsed_output = _try_parse_json(output_text)`

			`# Log to DB`
			`models.log_agent_run(`
			`conn,`
			`project_id=project_id,`
			`task_id=task_id,`
			`agent_role=role,`
			`action="execute",`
			`input_summary=f"task={task_id}, model={model}",`
Add task detail view, pipeline visualization, approve/reject workflow API (web/api.py) — 5 new endpoints: GET /api/tasks/{id}/pipeline — agent_logs as pipeline steps GET /api/tasks/{id}/full — task + steps + related decisions POST /api/tasks/{id}/approve — mark done, optionally add decision POST /api/tasks/{id}/reject — return to pending with reason POST /api/tasks/{id}/run — launch pipeline in background (202) Frontend: TaskDetail (/task/:id) — full task page with: - Pipeline graph: role cards with icons, arrows, status colors - Click step → expand output (pre-formatted, JSON detected) - Action bar: Approve (with optional decision), Reject, Run Pipeline - Polling for live pipeline updates Dashboard: review_tasks badge ("awaiting review" in yellow) ProjectView: task rows are now clickable links to /task/:id Runner: output_summary no longer truncated (full output for GUI). Models: get_project_summary includes review_tasks count. 13 new API tests, 105 total, all passing. Frontend builds clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-15 14:32:29 +02:00			`output_summary=output_text or None,`
Add context builder, agent runner, and pipeline executor core/context_builder.py: build_context() — assembles role-specific context from DB. PM gets everything; debugger gets gotchas/workarounds; reviewer gets conventions only; tester gets minimal context; security gets security-category decisions. format_prompt() — injects context into role templates. agents/runner.py: run_agent() — launches claude CLI as subprocess with role prompt. run_pipeline() — executes multi-step pipelines sequentially, chains output between steps, logs to agent_logs, creates/updates pipeline records, handles failures gracefully. agents/specialists.yaml — 8 roles with tools, permissions, context rules. agents/prompts/pm.md — PM prompt for task decomposition. agents/prompts/security.md — security audit prompt (OWASP, auth, secrets). CLI: kin run <task_id> [--dry-run] PM decomposes → shows pipeline → executes with confirmation. 31 new tests (15 context_builder, 11 runner, 5 JSON parsing). 92 total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-15 14:03:32 +02:00			`tokens_used=result.get("tokens_used"),`
			`model=model,`
			`cost_usd=result.get("cost_usd"),`
			`success=success,`
			`error_message=result.get("error") if not success else None,`
			`duration_seconds=duration,`
			`)`

			`return {`
			`"success": success,`
			`"output": parsed_output if parsed_output else output_text,`
			`"raw_output": output_text,`
			`"role": role,`
			`"model": model,`
			`"duration_seconds": duration,`
			`"tokens_used": result.get("tokens_used"),`
			`"cost_usd": result.get("cost_usd"),`
			`}`


			`def _run_claude(`
			`prompt: str,`
			`model: str = "sonnet",`
			`working_dir: str \| None = None,`
			`) -> dict:`
			`"""Execute claude CLI as subprocess. Returns dict with output, returncode, etc."""`
			`cmd = [`
			`"claude",`
			`"-p", prompt,`
			`"--output-format", "json",`
			`"--model", model,`
			`]`

			`try:`
			`proc = subprocess.run(`
			`cmd,`
			`capture_output=True,`
			`text=True,`
			`timeout=600, # 10 min max`
			`cwd=working_dir,`
			`)`
			`except FileNotFoundError:`
			`return {`
			`"output": "",`
			`"error": "claude CLI not found in PATH",`
			`"returncode": 127,`
			`}`
			`except subprocess.TimeoutExpired:`
			`return {`
			`"output": "",`
			`"error": "Agent timed out after 600s",`
			`"returncode": 124,`
			`}`

			`# Try to extract structured data from JSON output`
			`output = proc.stdout or ""`
			`result: dict[str, Any] = {`
			`"output": output,`
			`"error": proc.stderr if proc.returncode != 0 else None,`
			`"returncode": proc.returncode,`
			`}`

			`# Parse JSON output from claude --output-format json`
			`parsed = _try_parse_json(output)`
			`if isinstance(parsed, dict):`
			`result["tokens_used"] = parsed.get("usage", {}).get("total_tokens")`
			`result["cost_usd"] = parsed.get("cost_usd")`
			`# The actual content is usually in result or content`
			`if "result" in parsed:`
			`result["output"] = parsed["result"]`
			`elif "content" in parsed:`
			`result["output"] = parsed["content"]`

			`return result`


			`def _try_parse_json(text: str) -> Any:`
			`"""Try to parse JSON from text. Returns parsed obj or None."""`
			`text = text.strip()`
			`if not text:`
			`return None`

			`# Direct parse`
			`try:`
			`return json.loads(text)`
			`except json.JSONDecodeError:`
			`pass`

			`# Try to find JSON block in markdown code fences`
			`import re`
			m = re.search(r"```(?:json)?\s\n(.?)\n```", text, re.DOTALL)
			`if m:`
			`try:`
			`return json.loads(m.group(1))`
			`except json.JSONDecodeError:`
			`pass`

			`# Try to find first { ... } or [ ... ]`
			`for start_char, end_char in [("{", "}"), ("[", "]")]:`
			`start = text.find(start_char)`
			`if start >= 0:`
			`# Find matching close`
			`depth = 0`
			`for i in range(start, len(text)):`
			`if text[i] == start_char:`
			`depth += 1`
			`elif text[i] == end_char:`
			`depth -= 1`
			`if depth == 0:`
			`try:`
			`return json.loads(text[start:i + 1])`
			`except json.JSONDecodeError:`
			`break`
			`return None`


			`# ---------------------------------------------------------------------------`
			`# Pipeline executor`
			`# ---------------------------------------------------------------------------`

			`def run_pipeline(`
			`conn: sqlite3.Connection,`
			`task_id: str,`
			`steps: list[dict],`
			`dry_run: bool = False,`
			`) -> dict:`
			`"""Execute a multi-step pipeline of agents.`

			`steps = [`
			`{"role": "debugger", "model": "opus", "brief": "..."},`
			`{"role": "tester", "depends_on": "debugger", "brief": "..."},`
			`]`

			`Returns {success, steps_completed, total_cost, total_tokens, total_duration, results}`
			`"""`
			`task = models.get_task(conn, task_id)`
			`if not task:`
			`return {"success": False, "error": f"Task '{task_id}' not found"}`

			`project_id = task["project_id"]`

			`# Determine route type from steps or task brief`
			`route_type = "custom"`
			`if task.get("brief") and isinstance(task["brief"], dict):`
			`route_type = task["brief"].get("route_type", "custom") or "custom"`

			`# Create pipeline in DB`
			`pipeline = None`
			`if not dry_run:`
			`pipeline = models.create_pipeline(`
			`conn, task_id, project_id, route_type, steps,`
			`)`
			`models.update_task(conn, task_id, status="in_progress")`

			`results = []`
			`total_cost = 0.0`
			`total_tokens = 0`
			`total_duration = 0`
			`previous_output = None`

			`for i, step in enumerate(steps):`
			`role = step["role"]`
			`model = step.get("model", "sonnet")`
			`brief = step.get("brief")`

			`result = run_agent(`
			`conn, role, task_id, project_id,`
			`model=model,`
			`previous_output=previous_output,`
			`brief_override=brief,`
			`dry_run=dry_run,`
			`)`
			`results.append(result)`

			`if dry_run:`
			`continue`

			`# Accumulate stats`
			`total_cost += result.get("cost_usd") or 0`
			`total_tokens += result.get("tokens_used") or 0`
			`total_duration += result.get("duration_seconds") or 0`

			`if not result["success"]:`
			`# Pipeline failed — stop and mark as failed`
			`if pipeline:`
			`models.update_pipeline(`
			`conn, pipeline["id"],`
			`status="failed",`
			`total_cost_usd=total_cost,`
			`total_tokens=total_tokens,`
			`total_duration_seconds=total_duration,`
			`)`
			`models.update_task(conn, task_id, status="blocked")`
			`return {`
			`"success": False,`
			`"error": f"Step {i+1}/{len(steps)} ({role}) failed",`
			`"steps_completed": i,`
			`"results": results,`
			`"total_cost_usd": total_cost,`
			`"total_tokens": total_tokens,`
			`"total_duration_seconds": total_duration,`
			`"pipeline_id": pipeline["id"] if pipeline else None,`
			`}`

			`# Chain output to next step`
			`previous_output = result.get("raw_output") or result.get("output")`
			`if isinstance(previous_output, (dict, list)):`
			`previous_output = json.dumps(previous_output, ensure_ascii=False)`

			`# Pipeline completed`
			`if pipeline and not dry_run:`
			`models.update_pipeline(`
			`conn, pipeline["id"],`
			`status="completed",`
			`total_cost_usd=total_cost,`
			`total_tokens=total_tokens,`
			`total_duration_seconds=total_duration,`
			`)`
			`models.update_task(conn, task_id, status="review")`

			`return {`
			`"success": True,`
			`"steps_completed": len(steps),`
			`"results": results,`
			`"total_cost_usd": total_cost,`
			`"total_tokens": total_tokens,`
			`"total_duration_seconds": total_duration,`
			`"pipeline_id": pipeline["id"] if pipeline else None,`
			`"dry_run": dry_run,`
			`}`