Add backlog audit and task update command

- agents/prompts/backlog_audit.md: QA analyst prompt for checking which pending tasks are already implemented in the codebase - agents/runner.py: run_audit() — project-level agent that reads all pending tasks, inspects code, returns classification - cli/main.py: kin audit <project_id> — runs audit, offers to mark done tasks; kin task update <id> --status --priority - web/api.py: POST /api/projects/{id}/audit (runs audit inline), POST /api/projects/{id}/audit/apply (batch mark as done) - Frontend: "Audit backlog" button on ProjectView with results modal showing already_done/still_pending/unclear categories Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 17:44:16 +02:00 · 2026-03-15 17:44:16 +02:00 · 96509dcafc
commit 96509dcafc
parent e755a19633
9 changed files with 548 additions and 2 deletions
--- a/agents/prompts/backlog_audit.md
+++ b/agents/prompts/backlog_audit.md
@ -0,0 +1,44 @@
+You are a QA analyst performing a backlog audit.
+
+## Your task
+
+You receive a list of pending tasks and have access to the project's codebase.
+For EACH task, determine: is the described feature/fix already implemented in the current code?
+
+## Rules
+
+- Check actual files, functions, tests — don't guess
+- Look at: file existence, function names, imports, test coverage, recent git log
+- Read relevant source files before deciding
+- If the task describes a feature and you find matching code — it's done
+- If the task describes a bug fix and you see the fix applied — it's done
+- If you find partial implementation — mark as "unclear"
+- If you can't find any related code — it's still pending
+
+## How to investigate
+
+1. Read package.json / pyproject.toml for project structure
+2. List src/ directory to understand file layout
+3. For each task, search for keywords in the codebase
+4. Read relevant files to confirm implementation
+5. Check tests if they exist
+
+## Output format
+
+Return ONLY valid JSON:
+
+```json
+{
+  "already_done": [
+    {"id": "TASK-001", "reason": "Implemented in src/api.ts:42, function fetchData()"}
+  ],
+  "still_pending": [
+    {"id": "TASK-003", "reason": "No matching code found in codebase"}
+  ],
+  "unclear": [
+    {"id": "TASK-007", "reason": "Partial implementation in src/utils.ts, needs review"}
+  ]
+}
+```
+
+Every task from the input list MUST appear in exactly one category.
--- a/agents/runner.py
+++ b/agents/runner.py
@ -210,6 +210,134 @@ def _try_parse_json(text: str) -> Any:
    return None


+# ---------------------------------------------------------------------------
+# Backlog audit
+# ---------------------------------------------------------------------------
+
+PROMPTS_DIR = Path(__file__).parent / "prompts"
+
+_LANG_NAMES = {"ru": "Russian", "en": "English", "es": "Spanish",
+               "de": "German", "fr": "French"}
+
+
+def run_audit(
+    conn: sqlite3.Connection,
+    project_id: str,
+    noninteractive: bool = False,
+) -> dict:
+    """Audit pending tasks against the actual codebase.
+
+    Returns {success, already_done, still_pending, unclear, duration_seconds, ...}
+    """
+    project = models.get_project(conn, project_id)
+    if not project:
+        return {"success": False, "error": f"Project '{project_id}' not found"}
+
+    pending = models.list_tasks(conn, project_id=project_id, status="pending")
+    if not pending:
+        return {
+            "success": True,
+            "already_done": [],
+            "still_pending": [],
+            "unclear": [],
+            "message": "No pending tasks to audit",
+        }
+
+    # Build prompt
+    prompt_path = PROMPTS_DIR / "backlog_audit.md"
+    template = prompt_path.read_text() if prompt_path.exists() else (
+        "You are a QA analyst. Check if pending tasks are already done in the code."
+    )
+
+    task_list = [
+        {"id": t["id"], "title": t["title"], "brief": t.get("brief")}
+        for t in pending
+    ]
+
+    sections = [
+        template,
+        "",
+        f"## Project: {project['id']} — {project['name']}",
+    ]
+    if project.get("tech_stack"):
+        sections.append(f"Tech stack: {', '.join(project['tech_stack'])}")
+    sections.append(f"Path: {project['path']}")
+    sections.append("")
+    sections.append(f"## Pending tasks ({len(task_list)}):")
+    sections.append(json.dumps(task_list, ensure_ascii=False, indent=2))
+    sections.append("")
+
+    language = project.get("language", "ru")
+    lang_name = _LANG_NAMES.get(language, language)
+    sections.append("## Language")
+    sections.append(f"ALWAYS respond in {lang_name}.")
+    sections.append("")
+
+    prompt = "\n".join(sections)
+
+    # Determine working dir
+    working_dir = None
+    project_path = Path(project["path"]).expanduser()
+    if project_path.is_dir():
+        working_dir = str(project_path)
+
+    # Run agent
+    start = time.monotonic()
+    result = _run_claude(prompt, model="sonnet", working_dir=working_dir,
+                         noninteractive=noninteractive)
+    duration = int(time.monotonic() - start)
+
+    raw_output = result.get("output", "")
+    if not isinstance(raw_output, str):
+        raw_output = json.dumps(raw_output, ensure_ascii=False)
+    success = result["returncode"] == 0
+
+    # Log to agent_logs
+    models.log_agent_run(
+        conn,
+        project_id=project_id,
+        task_id=None,
+        agent_role="backlog_audit",
+        action="audit",
+        input_summary=f"project={project_id}, pending_tasks={len(pending)}",
+        output_summary=raw_output or None,
+        tokens_used=result.get("tokens_used"),
+        model="sonnet",
+        cost_usd=result.get("cost_usd"),
+        success=success,
+        error_message=result.get("error") if not success else None,
+        duration_seconds=duration,
+    )
+
+    if not success:
+        return {
+            "success": False,
+            "error": result.get("error", "Agent failed"),
+            "raw_output": raw_output,
+            "duration_seconds": duration,
+        }
+
+    # Parse structured output
+    parsed = _try_parse_json(raw_output)
+    if not isinstance(parsed, dict):
+        return {
+            "success": False,
+            "error": "Agent returned non-JSON output",
+            "raw_output": raw_output,
+            "duration_seconds": duration,
+        }
+
+    return {
+        "success": True,
+        "already_done": parsed.get("already_done", []),
+        "still_pending": parsed.get("still_pending", []),
+        "unclear": parsed.get("unclear", []),
+        "duration_seconds": duration,
+        "tokens_used": result.get("tokens_used"),
+        "cost_usd": result.get("cost_usd"),
+    }
+
+
 # ---------------------------------------------------------------------------
 # Pipeline executor
 # ---------------------------------------------------------------------------
--- a/cli/main.py
+++ b/cli/main.py
@ -220,6 +220,32 @@ def task_show(ctx, id):
    click.echo(f"  Updated:  {t['updated_at']}")


+@task.command("update")
+@click.argument("task_id")
+@click.option("--status", type=click.Choice(
+    ["pending", "in_progress", "review", "done", "blocked", "decomposed"]),
+    default=None, help="New status")
+@click.option("--priority", type=int, default=None, help="New priority (1-10)")
+@click.pass_context
+def task_update(ctx, task_id, status, priority):
+    """Update a task's status or priority."""
+    conn = ctx.obj["conn"]
+    t = models.get_task(conn, task_id)
+    if not t:
+        click.echo(f"Task '{task_id}' not found.", err=True)
+        raise SystemExit(1)
+    fields = {}
+    if status is not None:
+        fields["status"] = status
+    if priority is not None:
+        fields["priority"] = priority
+    if not fields:
+        click.echo("Nothing to update. Use --status or --priority.", err=True)
+        raise SystemExit(1)
+    updated = models.update_task(conn, task_id, **fields)
+    click.echo(f"Updated {updated['id']}: status={updated['status']}, priority={updated['priority']}")
+
+
 # ===========================================================================
 # decision
 # ===========================================================================
@ -564,6 +590,65 @@ def run_task(ctx, task_id, dry_run, allow_write):
        click.echo(f"Duration: {result['total_duration_seconds']}s")


+# ===========================================================================
+# audit
+# ===========================================================================
+
+@cli.command("audit")
+@click.argument("project_id")
+@click.pass_context
+def audit_backlog(ctx, project_id):
+    """Audit pending tasks — check which are already implemented in the code."""
+    from agents.runner import run_audit
+
+    conn = ctx.obj["conn"]
+    p = models.get_project(conn, project_id)
+    if not p:
+        click.echo(f"Project '{project_id}' not found.", err=True)
+        raise SystemExit(1)
+
+    pending = models.list_tasks(conn, project_id=project_id, status="pending")
+    if not pending:
+        click.echo("No pending tasks to audit.")
+        return
+
+    click.echo(f"Auditing {len(pending)} pending tasks for {project_id}...")
+    result = run_audit(conn, project_id)
+
+    if not result["success"]:
+        click.echo(f"Audit failed: {result.get('error', 'unknown')}", err=True)
+        raise SystemExit(1)
+
+    done = result.get("already_done", [])
+    still = result.get("still_pending", [])
+    unclear = result.get("unclear", [])
+
+    if done:
+        click.echo(f"\nAlready done ({len(done)}):")
+        for item in done:
+            click.echo(f"  {item['id']}: {item.get('reason', '')}")
+
+    if still:
+        click.echo(f"\nStill pending ({len(still)}):")
+        for item in still:
+            click.echo(f"  {item['id']}: {item.get('reason', '')}")
+
+    if unclear:
+        click.echo(f"\nUnclear ({len(unclear)}):")
+        for item in unclear:
+            click.echo(f"  {item['id']}: {item.get('reason', '')}")
+
+    if result.get("cost_usd"):
+        click.echo(f"\nCost: ${result['cost_usd']:.4f}")
+    if result.get("duration_seconds"):
+        click.echo(f"Duration: {result['duration_seconds']}s")
+
+    if done and click.confirm(f"\nMark {len(done)} tasks as done?"):
+        for item in done:
+            models.update_task(conn, item["id"], status="done")
+        click.echo(f"Marked {len(done)} tasks as done.")
+
+
 # ===========================================================================
 # bootstrap
 # ===========================================================================
--- a/tests/test_api.py
+++ b/tests/test_api.py
@ -201,3 +201,35 @@ def test_project_summary_includes_review(client):
    r = client.get("/api/projects")
    projects = r.json()
    assert projects[0]["review_tasks"] == 1
+
+
+def test_audit_not_found(client):
+    r = client.post("/api/projects/NOPE/audit")
+    assert r.status_code == 404
+
+
+def test_audit_apply(client):
+    """POST /audit/apply should mark tasks as done."""
+    r = client.post("/api/projects/p1/audit/apply",
+                    json={"task_ids": ["P1-001"]})
+    assert r.status_code == 200
+    assert r.json()["count"] == 1
+    assert "P1-001" in r.json()["updated"]
+
+    # Verify task is done
+    r = client.get("/api/tasks/P1-001")
+    assert r.json()["status"] == "done"
+
+
+def test_audit_apply_not_found(client):
+    r = client.post("/api/projects/NOPE/audit/apply",
+                    json={"task_ids": ["P1-001"]})
+    assert r.status_code == 404
+
+
+def test_audit_apply_wrong_project(client):
+    """Tasks not belonging to the project should be skipped."""
+    r = client.post("/api/projects/p1/audit/apply",
+                    json={"task_ids": ["WRONG-001"]})
+    assert r.status_code == 200
+    assert r.json()["count"] == 0
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -205,3 +205,38 @@ def test_cost_with_data(runner):
    assert r.exit_code == 0
    assert "p1" in r.output
    assert "$0.1000" in r.output
+
+
+# ===========================================================================
+# task update
+# ===========================================================================
+
+def test_task_update_status(runner):
+    invoke(runner, ["project", "add", "p1", "P1", "/p1"])
+    invoke(runner, ["task", "add", "p1", "Fix bug"])
+    r = invoke(runner, ["task", "update", "P1-001", "--status", "done"])
+    assert r.exit_code == 0
+    assert "done" in r.output
+
+    r = invoke(runner, ["task", "show", "P1-001"])
+    assert "done" in r.output
+
+
+def test_task_update_priority(runner):
+    invoke(runner, ["project", "add", "p1", "P1", "/p1"])
+    invoke(runner, ["task", "add", "p1", "Fix bug"])
+    r = invoke(runner, ["task", "update", "P1-001", "--priority", "1"])
+    assert r.exit_code == 0
+    assert "priority=1" in r.output
+
+
+def test_task_update_not_found(runner):
+    r = invoke(runner, ["task", "update", "NOPE", "--status", "done"])
+    assert r.exit_code != 0
+
+
+def test_task_update_no_fields(runner):
+    invoke(runner, ["project", "add", "p1", "P1", "/p1"])
+    invoke(runner, ["task", "add", "p1", "Fix bug"])
+    r = invoke(runner, ["task", "update", "P1-001"])
+    assert r.exit_code != 0
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@ -6,7 +6,7 @@ import pytest
 from unittest.mock import patch, MagicMock
 from core.db import init_db
 from core import models
-from agents.runner import run_agent, run_pipeline, _try_parse_json
+from agents.runner import run_agent, run_pipeline, run_audit, _try_parse_json


@pytest.fixture
@ -335,3 +335,82 @@ class TestNonInteractive:
        run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=False)
        cmd = mock_run.call_args[0][0]
        assert "--dangerously-skip-permissions" not in cmd
+
+
+# ---------------------------------------------------------------------------
+# run_audit
+# ---------------------------------------------------------------------------
+
+class TestRunAudit:
+    @patch("agents.runner.subprocess.run")
+    def test_audit_success(self, mock_run, conn):
+        """Audit should return parsed already_done/still_pending/unclear."""
+        audit_output = json.dumps({
+            "already_done": [{"id": "VDOL-001", "reason": "Fixed in runner.py"}],
+            "still_pending": [],
+            "unclear": [],
+        })
+        mock_run.return_value = _mock_claude_success({"result": audit_output})
+
+        result = run_audit(conn, "vdol")
+
+        assert result["success"] is True
+        assert len(result["already_done"]) == 1
+        assert result["already_done"][0]["id"] == "VDOL-001"
+
+    @patch("agents.runner.subprocess.run")
+    def test_audit_logs_to_db(self, mock_run, conn):
+        """Audit should log to agent_logs with role=backlog_audit."""
+        mock_run.return_value = _mock_claude_success({
+            "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
+        })
+
+        run_audit(conn, "vdol")
+
+        logs = conn.execute(
+            "SELECT * FROM agent_logs WHERE agent_role='backlog_audit'"
+        ).fetchall()
+        assert len(logs) == 1
+        assert logs[0]["action"] == "audit"
+
+    def test_audit_no_pending_tasks(self, conn):
+        """If no pending tasks, return success with empty lists."""
+        # Mark existing task as done
+        models.update_task(conn, "VDOL-001", status="done")
+
+        result = run_audit(conn, "vdol")
+
+        assert result["success"] is True
+        assert result["already_done"] == []
+        assert "No pending tasks" in result.get("message", "")
+
+    def test_audit_project_not_found(self, conn):
+        result = run_audit(conn, "nonexistent")
+        assert result["success"] is False
+        assert "not found" in result["error"]
+
+    @patch("agents.runner.subprocess.run")
+    def test_audit_uses_sonnet(self, mock_run, conn):
+        """Audit should use sonnet model."""
+        mock_run.return_value = _mock_claude_success({
+            "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
+        })
+
+        run_audit(conn, "vdol")
+
+        cmd = mock_run.call_args[0][0]
+        model_idx = cmd.index("--model")
+        assert cmd[model_idx + 1] == "sonnet"
+
+    @patch("agents.runner.subprocess.run")
+    def test_audit_includes_tasks_in_prompt(self, mock_run, conn):
+        """The prompt should contain the task title."""
+        mock_run.return_value = _mock_claude_success({
+            "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}),
+        })
+
+        run_audit(conn, "vdol")
+
+        prompt = mock_run.call_args[0][0][2]  # -p argument
+        assert "VDOL-001" in prompt
+        assert "Fix bug" in prompt
--- a/web/api.py
+++ b/web/api.py
@ -385,6 +385,47 @@ def list_tickets(project: str | None = None, status: str | None = None):
    return tickets


+# ---------------------------------------------------------------------------
+# Audit
+# ---------------------------------------------------------------------------
+
+@app.post("/api/projects/{project_id}/audit")
+def audit_project(project_id: str):
+    """Run backlog audit — check which pending tasks are already done."""
+    from agents.runner import run_audit
+
+    conn = get_conn()
+    p = models.get_project(conn, project_id)
+    if not p:
+        conn.close()
+        raise HTTPException(404, f"Project '{project_id}' not found")
+    result = run_audit(conn, project_id, noninteractive=True)
+    conn.close()
+    return result
+
+
+class AuditApply(BaseModel):
+    task_ids: list[str]
+
+
+@app.post("/api/projects/{project_id}/audit/apply")
+def audit_apply(project_id: str, body: AuditApply):
+    """Mark tasks as done after audit confirmation."""
+    conn = get_conn()
+    p = models.get_project(conn, project_id)
+    if not p:
+        conn.close()
+        raise HTTPException(404, f"Project '{project_id}' not found")
+    updated = []
+    for tid in body.task_ids:
+        t = models.get_task(conn, tid)
+        if t and t["project_id"] == project_id:
+            models.update_task(conn, tid, status="done")
+            updated.append(tid)
+    conn.close()
+    return {"updated": updated, "count": len(updated)}
+
+
 # ---------------------------------------------------------------------------
 # Bootstrap
 # ---------------------------------------------------------------------------
--- a/web/frontend/src/api.ts
+++ b/web/frontend/src/api.ts
@ -108,6 +108,21 @@ export interface CostEntry {
  total_duration_seconds: number
 }

+export interface AuditItem {
+  id: string
+  reason: string
+}
+
+export interface AuditResult {
+  success: boolean
+  already_done: AuditItem[]
+  still_pending: AuditItem[]
+  unclear: AuditItem[]
+  duration_seconds?: number
+  cost_usd?: number
+  error?: string
+}
+
 export const api = {
  projects: () => get<Project[]>('/projects'),
  project: (id: string) => get<ProjectDetail>(`/projects/${id}`),
@ -129,4 +144,8 @@ export const api = {
    post<{ status: string }>(`/tasks/${id}/run`, { allow_write: allowWrite }),
  bootstrap: (data: { path: string; id: string; name: string }) =>
    post<{ project: Project }>('/bootstrap', data),
+  auditProject: (projectId: string) =>
+    post<AuditResult>(`/projects/${projectId}/audit`, {}),
+  auditApply: (projectId: string, taskIds: string[]) =>
+    post<{ updated: string[]; count: number }>(`/projects/${projectId}/audit/apply`, { task_ids: taskIds }),
 }
--- a/web/frontend/src/views/ProjectView.vue
+++ b/web/frontend/src/views/ProjectView.vue
@ -1,6 +1,6 @@
 <script setup lang="ts">
 import { ref, onMounted, computed } from 'vue'
-import { api, type ProjectDetail } from '../api'
+import { api, type ProjectDetail, type AuditResult } from '../api'
 import Badge from '../components/Badge.vue'
 import Modal from '../components/Modal.vue'

@ -28,6 +28,42 @@ function toggleMode() {
  localStorage.setItem(`kin-mode-${props.id}`, autoMode.value ? 'auto' : 'review')
 }

+// Audit
+const auditLoading = ref(false)
+const auditResult = ref<AuditResult | null>(null)
+const showAuditModal = ref(false)
+const auditApplying = ref(false)
+
+async function runAudit() {
+  auditLoading.value = true
+  auditResult.value = null
+  try {
+    const res = await api.auditProject(props.id)
+    auditResult.value = res
+    showAuditModal.value = true
+  } catch (e: any) {
+    error.value = e.message
+  } finally {
+    auditLoading.value = false
+  }
+}
+
+async function applyAudit() {
+  if (!auditResult.value?.already_done?.length) return
+  auditApplying.value = true
+  try {
+    const ids = auditResult.value.already_done.map(t => t.id)
+    await api.auditApply(props.id, ids)
+    showAuditModal.value = false
+    auditResult.value = null
+    await load()
+  } catch (e: any) {
+    error.value = e.message
+  } finally {
+    auditApplying.value = false
+  }
+}
+
 // Add task modal
 const showAddTask = ref(false)
 const taskForm = ref({ title: '', priority: 5, route_type: '' })
@ -216,6 +252,12 @@ async function addDecision() {
            :title="autoMode ? 'Auto mode: agents can write files' : 'Review mode: agents read-only'">
            {{ autoMode ? '&#x1F513; Auto' : '&#x1F512; Review' }}
          </button>
+          <button @click="runAudit" :disabled="auditLoading"
+            class="px-2 py-1 text-xs bg-purple-900/30 text-purple-400 border border-purple-800 rounded hover:bg-purple-900/50 disabled:opacity-50"
+            title="Check which pending tasks are already done">
+            <span v-if="auditLoading" class="inline-block w-3 h-3 border-2 border-purple-400 border-t-transparent rounded-full animate-spin mr-1"></span>
+            {{ auditLoading ? 'Auditing...' : 'Audit backlog' }}
+          </button>
          <button @click="showAddTask = true"
            class="px-3 py-1 text-xs bg-gray-800 text-gray-300 border border-gray-700 rounded hover:bg-gray-700">
            + Task
@ -350,5 +392,46 @@ async function addDecision() {
        </button>
      </form>
    </Modal>
+
+    <!-- Audit Modal -->
+    <Modal v-if="showAuditModal && auditResult" title="Backlog Audit Results" @close="showAuditModal = false">
+      <div v-if="!auditResult.success" class="text-red-400 text-sm">
+        Audit failed: {{ auditResult.error }}
+      </div>
+      <div v-else class="space-y-4">
+        <div v-if="auditResult.already_done?.length">
+          <h3 class="text-sm font-semibold text-green-400 mb-2">Already done ({{ auditResult.already_done.length }})</h3>
+          <div v-for="item in auditResult.already_done" :key="item.id"
+            class="px-3 py-2 border border-green-900/50 rounded text-xs mb-1">
+            <span class="text-green-400 font-medium">{{ item.id }}</span>
+            <span class="text-gray-400 ml-2">{{ item.reason }}</span>
+          </div>
+        </div>
+        <div v-if="auditResult.still_pending?.length">
+          <h3 class="text-sm font-semibold text-gray-400 mb-2">Still pending ({{ auditResult.still_pending.length }})</h3>
+          <div v-for="item in auditResult.still_pending" :key="item.id"
+            class="px-3 py-2 border border-gray-800 rounded text-xs mb-1">
+            <span class="text-gray-300 font-medium">{{ item.id }}</span>
+            <span class="text-gray-500 ml-2">{{ item.reason }}</span>
+          </div>
+        </div>
+        <div v-if="auditResult.unclear?.length">
+          <h3 class="text-sm font-semibold text-yellow-400 mb-2">Unclear ({{ auditResult.unclear.length }})</h3>
+          <div v-for="item in auditResult.unclear" :key="item.id"
+            class="px-3 py-2 border border-yellow-900/50 rounded text-xs mb-1">
+            <span class="text-yellow-400 font-medium">{{ item.id }}</span>
+            <span class="text-gray-400 ml-2">{{ item.reason }}</span>
+          </div>
+        </div>
+        <div v-if="auditResult.cost_usd || auditResult.duration_seconds" class="text-xs text-gray-600">
+          <span v-if="auditResult.duration_seconds">{{ auditResult.duration_seconds }}s</span>
+          <span v-if="auditResult.cost_usd" class="ml-2">${{ auditResult.cost_usd?.toFixed(4) }}</span>
+        </div>
+        <button v-if="auditResult.already_done?.length" @click="applyAudit" :disabled="auditApplying"
+          class="w-full py-2 bg-green-900/50 text-green-400 border border-green-800 rounded text-sm hover:bg-green-900 disabled:opacity-50">
+          {{ auditApplying ? 'Applying...' : `Mark ${auditResult.already_done.length} tasks as done` }}
+        </button>
+      </div>
+    </Modal>
  </div>
 </template>