Add backlog audit and task update command

- agents/prompts/backlog_audit.md: QA analyst prompt for checking
  which pending tasks are already implemented in the codebase
- agents/runner.py: run_audit() — project-level agent that reads
  all pending tasks, inspects code, returns classification
- cli/main.py: kin audit <project_id> — runs audit, offers to mark
  done tasks; kin task update <id> --status --priority
- web/api.py: POST /api/projects/{id}/audit (runs audit inline),
  POST /api/projects/{id}/audit/apply (batch mark as done)
- Frontend: "Audit backlog" button on ProjectView with results
  modal showing already_done/still_pending/unclear categories

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Gros Frumos 2026-03-15 17:44:16 +02:00
parent e755a19633
commit 96509dcafc
9 changed files with 548 additions and 2 deletions

View file

@ -0,0 +1,44 @@
You are a QA analyst performing a backlog audit.
## Your task
You receive a list of pending tasks and have access to the project's codebase.
For EACH task, determine: is the described feature/fix already implemented in the current code?
## Rules
- Check actual files, functions, tests — don't guess
- Look at: file existence, function names, imports, test coverage, recent git log
- Read relevant source files before deciding
- If the task describes a feature and you find matching code — it's done
- If the task describes a bug fix and you see the fix applied — it's done
- If you find partial implementation — mark as "unclear"
- If you can't find any related code — it's still pending
## How to investigate
1. Read package.json / pyproject.toml for project structure
2. List src/ directory to understand file layout
3. For each task, search for keywords in the codebase
4. Read relevant files to confirm implementation
5. Check tests if they exist
## Output format
Return ONLY valid JSON:
```json
{
"already_done": [
{"id": "TASK-001", "reason": "Implemented in src/api.ts:42, function fetchData()"}
],
"still_pending": [
{"id": "TASK-003", "reason": "No matching code found in codebase"}
],
"unclear": [
{"id": "TASK-007", "reason": "Partial implementation in src/utils.ts, needs review"}
]
}
```
Every task from the input list MUST appear in exactly one category.

View file

@ -210,6 +210,134 @@ def _try_parse_json(text: str) -> Any:
return None
# ---------------------------------------------------------------------------
# Backlog audit
# ---------------------------------------------------------------------------
PROMPTS_DIR = Path(__file__).parent / "prompts"
_LANG_NAMES = {"ru": "Russian", "en": "English", "es": "Spanish",
"de": "German", "fr": "French"}
def run_audit(
conn: sqlite3.Connection,
project_id: str,
noninteractive: bool = False,
) -> dict:
"""Audit pending tasks against the actual codebase.
Returns {success, already_done, still_pending, unclear, duration_seconds, ...}
"""
project = models.get_project(conn, project_id)
if not project:
return {"success": False, "error": f"Project '{project_id}' not found"}
pending = models.list_tasks(conn, project_id=project_id, status="pending")
if not pending:
return {
"success": True,
"already_done": [],
"still_pending": [],
"unclear": [],
"message": "No pending tasks to audit",
}
# Build prompt
prompt_path = PROMPTS_DIR / "backlog_audit.md"
template = prompt_path.read_text() if prompt_path.exists() else (
"You are a QA analyst. Check if pending tasks are already done in the code."
)
task_list = [
{"id": t["id"], "title": t["title"], "brief": t.get("brief")}
for t in pending
]
sections = [
template,
"",
f"## Project: {project['id']}{project['name']}",
]
if project.get("tech_stack"):
sections.append(f"Tech stack: {', '.join(project['tech_stack'])}")
sections.append(f"Path: {project['path']}")
sections.append("")
sections.append(f"## Pending tasks ({len(task_list)}):")
sections.append(json.dumps(task_list, ensure_ascii=False, indent=2))
sections.append("")
language = project.get("language", "ru")
lang_name = _LANG_NAMES.get(language, language)
sections.append("## Language")
sections.append(f"ALWAYS respond in {lang_name}.")
sections.append("")
prompt = "\n".join(sections)
# Determine working dir
working_dir = None
project_path = Path(project["path"]).expanduser()
if project_path.is_dir():
working_dir = str(project_path)
# Run agent
start = time.monotonic()
result = _run_claude(prompt, model="sonnet", working_dir=working_dir,
noninteractive=noninteractive)
duration = int(time.monotonic() - start)
raw_output = result.get("output", "")
if not isinstance(raw_output, str):
raw_output = json.dumps(raw_output, ensure_ascii=False)
success = result["returncode"] == 0
# Log to agent_logs
models.log_agent_run(
conn,
project_id=project_id,
task_id=None,
agent_role="backlog_audit",
action="audit",
input_summary=f"project={project_id}, pending_tasks={len(pending)}",
output_summary=raw_output or None,
tokens_used=result.get("tokens_used"),
model="sonnet",
cost_usd=result.get("cost_usd"),
success=success,
error_message=result.get("error") if not success else None,
duration_seconds=duration,
)
if not success:
return {
"success": False,
"error": result.get("error", "Agent failed"),
"raw_output": raw_output,
"duration_seconds": duration,
}
# Parse structured output
parsed = _try_parse_json(raw_output)
if not isinstance(parsed, dict):
return {
"success": False,
"error": "Agent returned non-JSON output",
"raw_output": raw_output,
"duration_seconds": duration,
}
return {
"success": True,
"already_done": parsed.get("already_done", []),
"still_pending": parsed.get("still_pending", []),
"unclear": parsed.get("unclear", []),
"duration_seconds": duration,
"tokens_used": result.get("tokens_used"),
"cost_usd": result.get("cost_usd"),
}
# ---------------------------------------------------------------------------
# Pipeline executor
# ---------------------------------------------------------------------------

View file

@ -220,6 +220,32 @@ def task_show(ctx, id):
click.echo(f" Updated: {t['updated_at']}")
@task.command("update")
@click.argument("task_id")
@click.option("--status", type=click.Choice(
["pending", "in_progress", "review", "done", "blocked", "decomposed"]),
default=None, help="New status")
@click.option("--priority", type=int, default=None, help="New priority (1-10)")
@click.pass_context
def task_update(ctx, task_id, status, priority):
"""Update a task's status or priority."""
conn = ctx.obj["conn"]
t = models.get_task(conn, task_id)
if not t:
click.echo(f"Task '{task_id}' not found.", err=True)
raise SystemExit(1)
fields = {}
if status is not None:
fields["status"] = status
if priority is not None:
fields["priority"] = priority
if not fields:
click.echo("Nothing to update. Use --status or --priority.", err=True)
raise SystemExit(1)
updated = models.update_task(conn, task_id, **fields)
click.echo(f"Updated {updated['id']}: status={updated['status']}, priority={updated['priority']}")
# ===========================================================================
# decision
# ===========================================================================
@ -564,6 +590,65 @@ def run_task(ctx, task_id, dry_run, allow_write):
click.echo(f"Duration: {result['total_duration_seconds']}s")
# ===========================================================================
# audit
# ===========================================================================
@cli.command("audit")
@click.argument("project_id")
@click.pass_context
def audit_backlog(ctx, project_id):
"""Audit pending tasks — check which are already implemented in the code."""
from agents.runner import run_audit
conn = ctx.obj["conn"]
p = models.get_project(conn, project_id)
if not p:
click.echo(f"Project '{project_id}' not found.", err=True)
raise SystemExit(1)
pending = models.list_tasks(conn, project_id=project_id, status="pending")
if not pending:
click.echo("No pending tasks to audit.")
return
click.echo(f"Auditing {len(pending)} pending tasks for {project_id}...")
result = run_audit(conn, project_id)
if not result["success"]:
click.echo(f"Audit failed: {result.get('error', 'unknown')}", err=True)
raise SystemExit(1)
done = result.get("already_done", [])
still = result.get("still_pending", [])
unclear = result.get("unclear", [])
if done:
click.echo(f"\nAlready done ({len(done)}):")
for item in done:
click.echo(f" {item['id']}: {item.get('reason', '')}")
if still:
click.echo(f"\nStill pending ({len(still)}):")
for item in still:
click.echo(f" {item['id']}: {item.get('reason', '')}")
if unclear:
click.echo(f"\nUnclear ({len(unclear)}):")
for item in unclear:
click.echo(f" {item['id']}: {item.get('reason', '')}")
if result.get("cost_usd"):
click.echo(f"\nCost: ${result['cost_usd']:.4f}")
if result.get("duration_seconds"):
click.echo(f"Duration: {result['duration_seconds']}s")
if done and click.confirm(f"\nMark {len(done)} tasks as done?"):
for item in done:
models.update_task(conn, item["id"], status="done")
click.echo(f"Marked {len(done)} tasks as done.")
# ===========================================================================
# bootstrap
# ===========================================================================

View file

@ -201,3 +201,35 @@ def test_project_summary_includes_review(client):
r = client.get("/api/projects")
projects = r.json()
assert projects[0]["review_tasks"] == 1
def test_audit_not_found(client):
r = client.post("/api/projects/NOPE/audit")
assert r.status_code == 404
def test_audit_apply(client):
"""POST /audit/apply should mark tasks as done."""
r = client.post("/api/projects/p1/audit/apply",
json={"task_ids": ["P1-001"]})
assert r.status_code == 200
assert r.json()["count"] == 1
assert "P1-001" in r.json()["updated"]
# Verify task is done
r = client.get("/api/tasks/P1-001")
assert r.json()["status"] == "done"
def test_audit_apply_not_found(client):
r = client.post("/api/projects/NOPE/audit/apply",
json={"task_ids": ["P1-001"]})
assert r.status_code == 404
def test_audit_apply_wrong_project(client):
"""Tasks not belonging to the project should be skipped."""
r = client.post("/api/projects/p1/audit/apply",
json={"task_ids": ["WRONG-001"]})
assert r.status_code == 200
assert r.json()["count"] == 0

View file

@ -205,3 +205,38 @@ def test_cost_with_data(runner):
assert r.exit_code == 0
assert "p1" in r.output
assert "$0.1000" in r.output
# ===========================================================================
# task update
# ===========================================================================
def test_task_update_status(runner):
invoke(runner, ["project", "add", "p1", "P1", "/p1"])
invoke(runner, ["task", "add", "p1", "Fix bug"])
r = invoke(runner, ["task", "update", "P1-001", "--status", "done"])
assert r.exit_code == 0
assert "done" in r.output
r = invoke(runner, ["task", "show", "P1-001"])
assert "done" in r.output
def test_task_update_priority(runner):
invoke(runner, ["project", "add", "p1", "P1", "/p1"])
invoke(runner, ["task", "add", "p1", "Fix bug"])
r = invoke(runner, ["task", "update", "P1-001", "--priority", "1"])
assert r.exit_code == 0
assert "priority=1" in r.output
def test_task_update_not_found(runner):
r = invoke(runner, ["task", "update", "NOPE", "--status", "done"])
assert r.exit_code != 0
def test_task_update_no_fields(runner):
invoke(runner, ["project", "add", "p1", "P1", "/p1"])
invoke(runner, ["task", "add", "p1", "Fix bug"])
r = invoke(runner, ["task", "update", "P1-001"])
assert r.exit_code != 0

View file

@ -6,7 +6,7 @@ import pytest
from unittest.mock import patch, MagicMock
from core.db import init_db
from core import models
from agents.runner import run_agent, run_pipeline, _try_parse_json
from agents.runner import run_agent, run_pipeline, run_audit, _try_parse_json
@pytest.fixture
@ -335,3 +335,82 @@ class TestNonInteractive:
run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" not in cmd
# ---------------------------------------------------------------------------
# run_audit
# ---------------------------------------------------------------------------
class TestRunAudit:
@patch("agents.runner.subprocess.run")
def test_audit_success(self, mock_run, conn):
"""Audit should return parsed already_done/still_pending/unclear."""
audit_output = json.dumps({
"already_done": [{"id": "VDOL-001", "reason": "Fixed in runner.py"}],
"still_pending": [],
"unclear": [],
})
mock_run.return_value = _mock_claude_success({"result": audit_output})
result = run_audit(conn, "vdol")
assert result["success"] is True
assert len(result["already_done"]) == 1
assert result["already_done"][0]["id"] == "VDOL-001"
@patch("agents.runner.subprocess.run")
def test_audit_logs_to_db(self, mock_run, conn):
"""Audit should log to agent_logs with role=backlog_audit."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
})
run_audit(conn, "vdol")
logs = conn.execute(
"SELECT * FROM agent_logs WHERE agent_role='backlog_audit'"
).fetchall()
assert len(logs) == 1
assert logs[0]["action"] == "audit"
def test_audit_no_pending_tasks(self, conn):
"""If no pending tasks, return success with empty lists."""
# Mark existing task as done
models.update_task(conn, "VDOL-001", status="done")
result = run_audit(conn, "vdol")
assert result["success"] is True
assert result["already_done"] == []
assert "No pending tasks" in result.get("message", "")
def test_audit_project_not_found(self, conn):
result = run_audit(conn, "nonexistent")
assert result["success"] is False
assert "not found" in result["error"]
@patch("agents.runner.subprocess.run")
def test_audit_uses_sonnet(self, mock_run, conn):
"""Audit should use sonnet model."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
})
run_audit(conn, "vdol")
cmd = mock_run.call_args[0][0]
model_idx = cmd.index("--model")
assert cmd[model_idx + 1] == "sonnet"
@patch("agents.runner.subprocess.run")
def test_audit_includes_tasks_in_prompt(self, mock_run, conn):
"""The prompt should contain the task title."""
mock_run.return_value = _mock_claude_success({
"result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
})
run_audit(conn, "vdol")
prompt = mock_run.call_args[0][0][2] # -p argument
assert "VDOL-001" in prompt
assert "Fix bug" in prompt

View file

@ -385,6 +385,47 @@ def list_tickets(project: str | None = None, status: str | None = None):
return tickets
# ---------------------------------------------------------------------------
# Audit
# ---------------------------------------------------------------------------
@app.post("/api/projects/{project_id}/audit")
def audit_project(project_id: str):
"""Run backlog audit — check which pending tasks are already done."""
from agents.runner import run_audit
conn = get_conn()
p = models.get_project(conn, project_id)
if not p:
conn.close()
raise HTTPException(404, f"Project '{project_id}' not found")
result = run_audit(conn, project_id, noninteractive=True)
conn.close()
return result
class AuditApply(BaseModel):
task_ids: list[str]
@app.post("/api/projects/{project_id}/audit/apply")
def audit_apply(project_id: str, body: AuditApply):
"""Mark tasks as done after audit confirmation."""
conn = get_conn()
p = models.get_project(conn, project_id)
if not p:
conn.close()
raise HTTPException(404, f"Project '{project_id}' not found")
updated = []
for tid in body.task_ids:
t = models.get_task(conn, tid)
if t and t["project_id"] == project_id:
models.update_task(conn, tid, status="done")
updated.append(tid)
conn.close()
return {"updated": updated, "count": len(updated)}
# ---------------------------------------------------------------------------
# Bootstrap
# ---------------------------------------------------------------------------

View file

@ -108,6 +108,21 @@ export interface CostEntry {
total_duration_seconds: number
}
export interface AuditItem {
id: string
reason: string
}
export interface AuditResult {
success: boolean
already_done: AuditItem[]
still_pending: AuditItem[]
unclear: AuditItem[]
duration_seconds?: number
cost_usd?: number
error?: string
}
export const api = {
projects: () => get<Project[]>('/projects'),
project: (id: string) => get<ProjectDetail>(`/projects/${id}`),
@ -129,4 +144,8 @@ export const api = {
post<{ status: string }>(`/tasks/${id}/run`, { allow_write: allowWrite }),
bootstrap: (data: { path: string; id: string; name: string }) =>
post<{ project: Project }>('/bootstrap', data),
auditProject: (projectId: string) =>
post<AuditResult>(`/projects/${projectId}/audit`, {}),
auditApply: (projectId: string, taskIds: string[]) =>
post<{ updated: string[]; count: number }>(`/projects/${projectId}/audit/apply`, { task_ids: taskIds }),
}

View file

@ -1,6 +1,6 @@
<script setup lang="ts">
import { ref, onMounted, computed } from 'vue'
import { api, type ProjectDetail } from '../api'
import { api, type ProjectDetail, type AuditResult } from '../api'
import Badge from '../components/Badge.vue'
import Modal from '../components/Modal.vue'
@ -28,6 +28,42 @@ function toggleMode() {
localStorage.setItem(`kin-mode-${props.id}`, autoMode.value ? 'auto' : 'review')
}
// Audit
const auditLoading = ref(false)
const auditResult = ref<AuditResult | null>(null)
const showAuditModal = ref(false)
const auditApplying = ref(false)
async function runAudit() {
auditLoading.value = true
auditResult.value = null
try {
const res = await api.auditProject(props.id)
auditResult.value = res
showAuditModal.value = true
} catch (e: any) {
error.value = e.message
} finally {
auditLoading.value = false
}
}
async function applyAudit() {
if (!auditResult.value?.already_done?.length) return
auditApplying.value = true
try {
const ids = auditResult.value.already_done.map(t => t.id)
await api.auditApply(props.id, ids)
showAuditModal.value = false
auditResult.value = null
await load()
} catch (e: any) {
error.value = e.message
} finally {
auditApplying.value = false
}
}
// Add task modal
const showAddTask = ref(false)
const taskForm = ref({ title: '', priority: 5, route_type: '' })
@ -216,6 +252,12 @@ async function addDecision() {
:title="autoMode ? 'Auto mode: agents can write files' : 'Review mode: agents read-only'">
{{ autoMode ? '&#x1F513; Auto' : '&#x1F512; Review' }}
</button>
<button @click="runAudit" :disabled="auditLoading"
class="px-2 py-1 text-xs bg-purple-900/30 text-purple-400 border border-purple-800 rounded hover:bg-purple-900/50 disabled:opacity-50"
title="Check which pending tasks are already done">
<span v-if="auditLoading" class="inline-block w-3 h-3 border-2 border-purple-400 border-t-transparent rounded-full animate-spin mr-1"></span>
{{ auditLoading ? 'Auditing...' : 'Audit backlog' }}
</button>
<button @click="showAddTask = true"
class="px-3 py-1 text-xs bg-gray-800 text-gray-300 border border-gray-700 rounded hover:bg-gray-700">
+ Task
@ -350,5 +392,46 @@ async function addDecision() {
</button>
</form>
</Modal>
<!-- Audit Modal -->
<Modal v-if="showAuditModal && auditResult" title="Backlog Audit Results" @close="showAuditModal = false">
<div v-if="!auditResult.success" class="text-red-400 text-sm">
Audit failed: {{ auditResult.error }}
</div>
<div v-else class="space-y-4">
<div v-if="auditResult.already_done?.length">
<h3 class="text-sm font-semibold text-green-400 mb-2">Already done ({{ auditResult.already_done.length }})</h3>
<div v-for="item in auditResult.already_done" :key="item.id"
class="px-3 py-2 border border-green-900/50 rounded text-xs mb-1">
<span class="text-green-400 font-medium">{{ item.id }}</span>
<span class="text-gray-400 ml-2">{{ item.reason }}</span>
</div>
</div>
<div v-if="auditResult.still_pending?.length">
<h3 class="text-sm font-semibold text-gray-400 mb-2">Still pending ({{ auditResult.still_pending.length }})</h3>
<div v-for="item in auditResult.still_pending" :key="item.id"
class="px-3 py-2 border border-gray-800 rounded text-xs mb-1">
<span class="text-gray-300 font-medium">{{ item.id }}</span>
<span class="text-gray-500 ml-2">{{ item.reason }}</span>
</div>
</div>
<div v-if="auditResult.unclear?.length">
<h3 class="text-sm font-semibold text-yellow-400 mb-2">Unclear ({{ auditResult.unclear.length }})</h3>
<div v-for="item in auditResult.unclear" :key="item.id"
class="px-3 py-2 border border-yellow-900/50 rounded text-xs mb-1">
<span class="text-yellow-400 font-medium">{{ item.id }}</span>
<span class="text-gray-400 ml-2">{{ item.reason }}</span>
</div>
</div>
<div v-if="auditResult.cost_usd || auditResult.duration_seconds" class="text-xs text-gray-600">
<span v-if="auditResult.duration_seconds">{{ auditResult.duration_seconds }}s</span>
<span v-if="auditResult.cost_usd" class="ml-2">${{ auditResult.cost_usd?.toFixed(4) }}</span>
</div>
<button v-if="auditResult.already_done?.length" @click="applyAudit" :disabled="auditApplying"
class="w-full py-2 bg-green-900/50 text-green-400 border border-green-800 rounded text-sm hover:bg-green-900 disabled:opacity-50">
{{ auditApplying ? 'Applying...' : `Mark ${auditResult.already_done.length} tasks as done` }}
</button>
</div>
</Modal>
</div>
</template>