Add context builder, agent runner, and pipeline executor
core/context_builder.py: build_context() — assembles role-specific context from DB. PM gets everything; debugger gets gotchas/workarounds; reviewer gets conventions only; tester gets minimal context; security gets security-category decisions. format_prompt() — injects context into role templates. agents/runner.py: run_agent() — launches claude CLI as subprocess with role prompt. run_pipeline() — executes multi-step pipelines sequentially, chains output between steps, logs to agent_logs, creates/updates pipeline records, handles failures gracefully. agents/specialists.yaml — 8 roles with tools, permissions, context rules. agents/prompts/pm.md — PM prompt for task decomposition. agents/prompts/security.md — security audit prompt (OWASP, auth, secrets). CLI: kin run <task_id> [--dry-run] PM decomposes → shows pipeline → executes with confirmation. 31 new tests (15 context_builder, 11 runner, 5 JSON parsing). 92 total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
86e5b8febf
commit
fabae74c19
8 changed files with 1207 additions and 0 deletions
311
agents/runner.py
Normal file
311
agents/runner.py
Normal file
|
|
@ -0,0 +1,311 @@
|
|||
"""
|
||||
Kin agent runner — launches Claude Code as subprocess with role-specific context.
|
||||
Each agent = separate process with isolated context.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from core import models
|
||||
from core.context_builder import build_context, format_prompt
|
||||
|
||||
|
||||
def run_agent(
|
||||
conn: sqlite3.Connection,
|
||||
role: str,
|
||||
task_id: str,
|
||||
project_id: str,
|
||||
model: str = "sonnet",
|
||||
previous_output: str | None = None,
|
||||
brief_override: str | None = None,
|
||||
dry_run: bool = False,
|
||||
) -> dict:
|
||||
"""Run a single Claude Code agent as a subprocess.
|
||||
|
||||
1. Build context from DB
|
||||
2. Format prompt with role template
|
||||
3. Run: claude -p "{prompt}" --output-format json
|
||||
4. Log result to agent_logs
|
||||
5. Return {success, output, tokens_used, duration_seconds, cost_usd}
|
||||
"""
|
||||
# Build context
|
||||
ctx = build_context(conn, task_id, role, project_id)
|
||||
if previous_output:
|
||||
ctx["previous_output"] = previous_output
|
||||
if brief_override:
|
||||
if ctx.get("task"):
|
||||
ctx["task"]["brief"] = brief_override
|
||||
|
||||
prompt = format_prompt(ctx, role)
|
||||
|
||||
if dry_run:
|
||||
return {
|
||||
"success": True,
|
||||
"output": None,
|
||||
"prompt": prompt,
|
||||
"role": role,
|
||||
"model": model,
|
||||
"dry_run": True,
|
||||
}
|
||||
|
||||
# Determine working directory
|
||||
project = models.get_project(conn, project_id)
|
||||
working_dir = None
|
||||
if project and role in ("debugger", "frontend_dev", "backend_dev", "tester", "security"):
|
||||
project_path = Path(project["path"]).expanduser()
|
||||
if project_path.is_dir():
|
||||
working_dir = str(project_path)
|
||||
|
||||
# Run claude subprocess
|
||||
start = time.monotonic()
|
||||
result = _run_claude(prompt, model=model, working_dir=working_dir)
|
||||
duration = int(time.monotonic() - start)
|
||||
|
||||
# Parse output
|
||||
output_text = result.get("output", "")
|
||||
success = result["returncode"] == 0
|
||||
parsed_output = _try_parse_json(output_text)
|
||||
|
||||
# Log to DB
|
||||
models.log_agent_run(
|
||||
conn,
|
||||
project_id=project_id,
|
||||
task_id=task_id,
|
||||
agent_role=role,
|
||||
action="execute",
|
||||
input_summary=f"task={task_id}, model={model}",
|
||||
output_summary=output_text[:500] if output_text else None,
|
||||
tokens_used=result.get("tokens_used"),
|
||||
model=model,
|
||||
cost_usd=result.get("cost_usd"),
|
||||
success=success,
|
||||
error_message=result.get("error") if not success else None,
|
||||
duration_seconds=duration,
|
||||
)
|
||||
|
||||
return {
|
||||
"success": success,
|
||||
"output": parsed_output if parsed_output else output_text,
|
||||
"raw_output": output_text,
|
||||
"role": role,
|
||||
"model": model,
|
||||
"duration_seconds": duration,
|
||||
"tokens_used": result.get("tokens_used"),
|
||||
"cost_usd": result.get("cost_usd"),
|
||||
}
|
||||
|
||||
|
||||
def _run_claude(
|
||||
prompt: str,
|
||||
model: str = "sonnet",
|
||||
working_dir: str | None = None,
|
||||
) -> dict:
|
||||
"""Execute claude CLI as subprocess. Returns dict with output, returncode, etc."""
|
||||
cmd = [
|
||||
"claude",
|
||||
"-p", prompt,
|
||||
"--output-format", "json",
|
||||
"--model", model,
|
||||
]
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600, # 10 min max
|
||||
cwd=working_dir,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return {
|
||||
"output": "",
|
||||
"error": "claude CLI not found in PATH",
|
||||
"returncode": 127,
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {
|
||||
"output": "",
|
||||
"error": "Agent timed out after 600s",
|
||||
"returncode": 124,
|
||||
}
|
||||
|
||||
# Try to extract structured data from JSON output
|
||||
output = proc.stdout or ""
|
||||
result: dict[str, Any] = {
|
||||
"output": output,
|
||||
"error": proc.stderr if proc.returncode != 0 else None,
|
||||
"returncode": proc.returncode,
|
||||
}
|
||||
|
||||
# Parse JSON output from claude --output-format json
|
||||
parsed = _try_parse_json(output)
|
||||
if isinstance(parsed, dict):
|
||||
result["tokens_used"] = parsed.get("usage", {}).get("total_tokens")
|
||||
result["cost_usd"] = parsed.get("cost_usd")
|
||||
# The actual content is usually in result or content
|
||||
if "result" in parsed:
|
||||
result["output"] = parsed["result"]
|
||||
elif "content" in parsed:
|
||||
result["output"] = parsed["content"]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _try_parse_json(text: str) -> Any:
|
||||
"""Try to parse JSON from text. Returns parsed obj or None."""
|
||||
text = text.strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# Direct parse
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find JSON block in markdown code fences
|
||||
import re
|
||||
m = re.search(r"```(?:json)?\s*\n(.*?)\n```", text, re.DOTALL)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find first { ... } or [ ... ]
|
||||
for start_char, end_char in [("{", "}"), ("[", "]")]:
|
||||
start = text.find(start_char)
|
||||
if start >= 0:
|
||||
# Find matching close
|
||||
depth = 0
|
||||
for i in range(start, len(text)):
|
||||
if text[i] == start_char:
|
||||
depth += 1
|
||||
elif text[i] == end_char:
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
try:
|
||||
return json.loads(text[start:i + 1])
|
||||
except json.JSONDecodeError:
|
||||
break
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pipeline executor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_pipeline(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
steps: list[dict],
|
||||
dry_run: bool = False,
|
||||
) -> dict:
|
||||
"""Execute a multi-step pipeline of agents.
|
||||
|
||||
steps = [
|
||||
{"role": "debugger", "model": "opus", "brief": "..."},
|
||||
{"role": "tester", "depends_on": "debugger", "brief": "..."},
|
||||
]
|
||||
|
||||
Returns {success, steps_completed, total_cost, total_tokens, total_duration, results}
|
||||
"""
|
||||
task = models.get_task(conn, task_id)
|
||||
if not task:
|
||||
return {"success": False, "error": f"Task '{task_id}' not found"}
|
||||
|
||||
project_id = task["project_id"]
|
||||
|
||||
# Determine route type from steps or task brief
|
||||
route_type = "custom"
|
||||
if task.get("brief") and isinstance(task["brief"], dict):
|
||||
route_type = task["brief"].get("route_type", "custom") or "custom"
|
||||
|
||||
# Create pipeline in DB
|
||||
pipeline = None
|
||||
if not dry_run:
|
||||
pipeline = models.create_pipeline(
|
||||
conn, task_id, project_id, route_type, steps,
|
||||
)
|
||||
models.update_task(conn, task_id, status="in_progress")
|
||||
|
||||
results = []
|
||||
total_cost = 0.0
|
||||
total_tokens = 0
|
||||
total_duration = 0
|
||||
previous_output = None
|
||||
|
||||
for i, step in enumerate(steps):
|
||||
role = step["role"]
|
||||
model = step.get("model", "sonnet")
|
||||
brief = step.get("brief")
|
||||
|
||||
result = run_agent(
|
||||
conn, role, task_id, project_id,
|
||||
model=model,
|
||||
previous_output=previous_output,
|
||||
brief_override=brief,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
if dry_run:
|
||||
continue
|
||||
|
||||
# Accumulate stats
|
||||
total_cost += result.get("cost_usd") or 0
|
||||
total_tokens += result.get("tokens_used") or 0
|
||||
total_duration += result.get("duration_seconds") or 0
|
||||
|
||||
if not result["success"]:
|
||||
# Pipeline failed — stop and mark as failed
|
||||
if pipeline:
|
||||
models.update_pipeline(
|
||||
conn, pipeline["id"],
|
||||
status="failed",
|
||||
total_cost_usd=total_cost,
|
||||
total_tokens=total_tokens,
|
||||
total_duration_seconds=total_duration,
|
||||
)
|
||||
models.update_task(conn, task_id, status="blocked")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Step {i+1}/{len(steps)} ({role}) failed",
|
||||
"steps_completed": i,
|
||||
"results": results,
|
||||
"total_cost_usd": total_cost,
|
||||
"total_tokens": total_tokens,
|
||||
"total_duration_seconds": total_duration,
|
||||
"pipeline_id": pipeline["id"] if pipeline else None,
|
||||
}
|
||||
|
||||
# Chain output to next step
|
||||
previous_output = result.get("raw_output") or result.get("output")
|
||||
if isinstance(previous_output, (dict, list)):
|
||||
previous_output = json.dumps(previous_output, ensure_ascii=False)
|
||||
|
||||
# Pipeline completed
|
||||
if pipeline and not dry_run:
|
||||
models.update_pipeline(
|
||||
conn, pipeline["id"],
|
||||
status="completed",
|
||||
total_cost_usd=total_cost,
|
||||
total_tokens=total_tokens,
|
||||
total_duration_seconds=total_duration,
|
||||
)
|
||||
models.update_task(conn, task_id, status="review")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"steps_completed": len(steps),
|
||||
"results": results,
|
||||
"total_cost_usd": total_cost,
|
||||
"total_tokens": total_tokens,
|
||||
"total_duration_seconds": total_duration,
|
||||
"pipeline_id": pipeline["id"] if pipeline else None,
|
||||
"dry_run": dry_run,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue