diff --git a/agents/prompts/backlog_audit.md b/agents/prompts/backlog_audit.md new file mode 100644 index 0000000..cb6f277 --- /dev/null +++ b/agents/prompts/backlog_audit.md @@ -0,0 +1,44 @@ +You are a QA analyst performing a backlog audit. + +## Your task + +You receive a list of pending tasks and have access to the project's codebase. +For EACH task, determine: is the described feature/fix already implemented in the current code? + +## Rules + +- Check actual files, functions, tests — don't guess +- Look at: file existence, function names, imports, test coverage, recent git log +- Read relevant source files before deciding +- If the task describes a feature and you find matching code — it's done +- If the task describes a bug fix and you see the fix applied — it's done +- If you find partial implementation — mark as "unclear" +- If you can't find any related code — it's still pending + +## How to investigate + +1. Read package.json / pyproject.toml for project structure +2. List src/ directory to understand file layout +3. For each task, search for keywords in the codebase +4. Read relevant files to confirm implementation +5. Check tests if they exist + +## Output format + +Return ONLY valid JSON: + +```json +{ + "already_done": [ + {"id": "TASK-001", "reason": "Implemented in src/api.ts:42, function fetchData()"} + ], + "still_pending": [ + {"id": "TASK-003", "reason": "No matching code found in codebase"} + ], + "unclear": [ + {"id": "TASK-007", "reason": "Partial implementation in src/utils.ts, needs review"} + ] +} +``` + +Every task from the input list MUST appear in exactly one category. diff --git a/agents/runner.py b/agents/runner.py index 90a4d84..62cfec4 100644 --- a/agents/runner.py +++ b/agents/runner.py @@ -210,6 +210,134 @@ def _try_parse_json(text: str) -> Any: return None +# --------------------------------------------------------------------------- +# Backlog audit +# --------------------------------------------------------------------------- + +PROMPTS_DIR = Path(__file__).parent / "prompts" + +_LANG_NAMES = {"ru": "Russian", "en": "English", "es": "Spanish", + "de": "German", "fr": "French"} + + +def run_audit( + conn: sqlite3.Connection, + project_id: str, + noninteractive: bool = False, +) -> dict: + """Audit pending tasks against the actual codebase. + + Returns {success, already_done, still_pending, unclear, duration_seconds, ...} + """ + project = models.get_project(conn, project_id) + if not project: + return {"success": False, "error": f"Project '{project_id}' not found"} + + pending = models.list_tasks(conn, project_id=project_id, status="pending") + if not pending: + return { + "success": True, + "already_done": [], + "still_pending": [], + "unclear": [], + "message": "No pending tasks to audit", + } + + # Build prompt + prompt_path = PROMPTS_DIR / "backlog_audit.md" + template = prompt_path.read_text() if prompt_path.exists() else ( + "You are a QA analyst. Check if pending tasks are already done in the code." + ) + + task_list = [ + {"id": t["id"], "title": t["title"], "brief": t.get("brief")} + for t in pending + ] + + sections = [ + template, + "", + f"## Project: {project['id']} — {project['name']}", + ] + if project.get("tech_stack"): + sections.append(f"Tech stack: {', '.join(project['tech_stack'])}") + sections.append(f"Path: {project['path']}") + sections.append("") + sections.append(f"## Pending tasks ({len(task_list)}):") + sections.append(json.dumps(task_list, ensure_ascii=False, indent=2)) + sections.append("") + + language = project.get("language", "ru") + lang_name = _LANG_NAMES.get(language, language) + sections.append("## Language") + sections.append(f"ALWAYS respond in {lang_name}.") + sections.append("") + + prompt = "\n".join(sections) + + # Determine working dir + working_dir = None + project_path = Path(project["path"]).expanduser() + if project_path.is_dir(): + working_dir = str(project_path) + + # Run agent + start = time.monotonic() + result = _run_claude(prompt, model="sonnet", working_dir=working_dir, + noninteractive=noninteractive) + duration = int(time.monotonic() - start) + + raw_output = result.get("output", "") + if not isinstance(raw_output, str): + raw_output = json.dumps(raw_output, ensure_ascii=False) + success = result["returncode"] == 0 + + # Log to agent_logs + models.log_agent_run( + conn, + project_id=project_id, + task_id=None, + agent_role="backlog_audit", + action="audit", + input_summary=f"project={project_id}, pending_tasks={len(pending)}", + output_summary=raw_output or None, + tokens_used=result.get("tokens_used"), + model="sonnet", + cost_usd=result.get("cost_usd"), + success=success, + error_message=result.get("error") if not success else None, + duration_seconds=duration, + ) + + if not success: + return { + "success": False, + "error": result.get("error", "Agent failed"), + "raw_output": raw_output, + "duration_seconds": duration, + } + + # Parse structured output + parsed = _try_parse_json(raw_output) + if not isinstance(parsed, dict): + return { + "success": False, + "error": "Agent returned non-JSON output", + "raw_output": raw_output, + "duration_seconds": duration, + } + + return { + "success": True, + "already_done": parsed.get("already_done", []), + "still_pending": parsed.get("still_pending", []), + "unclear": parsed.get("unclear", []), + "duration_seconds": duration, + "tokens_used": result.get("tokens_used"), + "cost_usd": result.get("cost_usd"), + } + + # --------------------------------------------------------------------------- # Pipeline executor # --------------------------------------------------------------------------- diff --git a/cli/main.py b/cli/main.py index 8231a8e..1c9fb69 100644 --- a/cli/main.py +++ b/cli/main.py @@ -220,6 +220,32 @@ def task_show(ctx, id): click.echo(f" Updated: {t['updated_at']}") +@task.command("update") +@click.argument("task_id") +@click.option("--status", type=click.Choice( + ["pending", "in_progress", "review", "done", "blocked", "decomposed"]), + default=None, help="New status") +@click.option("--priority", type=int, default=None, help="New priority (1-10)") +@click.pass_context +def task_update(ctx, task_id, status, priority): + """Update a task's status or priority.""" + conn = ctx.obj["conn"] + t = models.get_task(conn, task_id) + if not t: + click.echo(f"Task '{task_id}' not found.", err=True) + raise SystemExit(1) + fields = {} + if status is not None: + fields["status"] = status + if priority is not None: + fields["priority"] = priority + if not fields: + click.echo("Nothing to update. Use --status or --priority.", err=True) + raise SystemExit(1) + updated = models.update_task(conn, task_id, **fields) + click.echo(f"Updated {updated['id']}: status={updated['status']}, priority={updated['priority']}") + + # =========================================================================== # decision # =========================================================================== @@ -564,6 +590,65 @@ def run_task(ctx, task_id, dry_run, allow_write): click.echo(f"Duration: {result['total_duration_seconds']}s") +# =========================================================================== +# audit +# =========================================================================== + +@cli.command("audit") +@click.argument("project_id") +@click.pass_context +def audit_backlog(ctx, project_id): + """Audit pending tasks — check which are already implemented in the code.""" + from agents.runner import run_audit + + conn = ctx.obj["conn"] + p = models.get_project(conn, project_id) + if not p: + click.echo(f"Project '{project_id}' not found.", err=True) + raise SystemExit(1) + + pending = models.list_tasks(conn, project_id=project_id, status="pending") + if not pending: + click.echo("No pending tasks to audit.") + return + + click.echo(f"Auditing {len(pending)} pending tasks for {project_id}...") + result = run_audit(conn, project_id) + + if not result["success"]: + click.echo(f"Audit failed: {result.get('error', 'unknown')}", err=True) + raise SystemExit(1) + + done = result.get("already_done", []) + still = result.get("still_pending", []) + unclear = result.get("unclear", []) + + if done: + click.echo(f"\nAlready done ({len(done)}):") + for item in done: + click.echo(f" {item['id']}: {item.get('reason', '')}") + + if still: + click.echo(f"\nStill pending ({len(still)}):") + for item in still: + click.echo(f" {item['id']}: {item.get('reason', '')}") + + if unclear: + click.echo(f"\nUnclear ({len(unclear)}):") + for item in unclear: + click.echo(f" {item['id']}: {item.get('reason', '')}") + + if result.get("cost_usd"): + click.echo(f"\nCost: ${result['cost_usd']:.4f}") + if result.get("duration_seconds"): + click.echo(f"Duration: {result['duration_seconds']}s") + + if done and click.confirm(f"\nMark {len(done)} tasks as done?"): + for item in done: + models.update_task(conn, item["id"], status="done") + click.echo(f"Marked {len(done)} tasks as done.") + + # =========================================================================== # bootstrap # =========================================================================== diff --git a/tests/test_api.py b/tests/test_api.py index 2e57c32..028c466 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -201,3 +201,35 @@ def test_project_summary_includes_review(client): r = client.get("/api/projects") projects = r.json() assert projects[0]["review_tasks"] == 1 + + +def test_audit_not_found(client): + r = client.post("/api/projects/NOPE/audit") + assert r.status_code == 404 + + +def test_audit_apply(client): + """POST /audit/apply should mark tasks as done.""" + r = client.post("/api/projects/p1/audit/apply", + json={"task_ids": ["P1-001"]}) + assert r.status_code == 200 + assert r.json()["count"] == 1 + assert "P1-001" in r.json()["updated"] + + # Verify task is done + r = client.get("/api/tasks/P1-001") + assert r.json()["status"] == "done" + + +def test_audit_apply_not_found(client): + r = client.post("/api/projects/NOPE/audit/apply", + json={"task_ids": ["P1-001"]}) + assert r.status_code == 404 + + +def test_audit_apply_wrong_project(client): + """Tasks not belonging to the project should be skipped.""" + r = client.post("/api/projects/p1/audit/apply", + json={"task_ids": ["WRONG-001"]}) + assert r.status_code == 200 + assert r.json()["count"] == 0 diff --git a/tests/test_cli.py b/tests/test_cli.py index b19551b..850e881 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -205,3 +205,38 @@ def test_cost_with_data(runner): assert r.exit_code == 0 assert "p1" in r.output assert "$0.1000" in r.output + + +# =========================================================================== +# task update +# =========================================================================== + +def test_task_update_status(runner): + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "update", "P1-001", "--status", "done"]) + assert r.exit_code == 0 + assert "done" in r.output + + r = invoke(runner, ["task", "show", "P1-001"]) + assert "done" in r.output + + +def test_task_update_priority(runner): + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "update", "P1-001", "--priority", "1"]) + assert r.exit_code == 0 + assert "priority=1" in r.output + + +def test_task_update_not_found(runner): + r = invoke(runner, ["task", "update", "NOPE", "--status", "done"]) + assert r.exit_code != 0 + + +def test_task_update_no_fields(runner): + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "update", "P1-001"]) + assert r.exit_code != 0 diff --git a/tests/test_runner.py b/tests/test_runner.py index 7b42b38..2f94062 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import patch, MagicMock from core.db import init_db from core import models -from agents.runner import run_agent, run_pipeline, _try_parse_json +from agents.runner import run_agent, run_pipeline, run_audit, _try_parse_json @pytest.fixture @@ -335,3 +335,82 @@ class TestNonInteractive: run_agent(conn, "debugger", "VDOL-001", "vdol", allow_write=False) cmd = mock_run.call_args[0][0] assert "--dangerously-skip-permissions" not in cmd + + +# --------------------------------------------------------------------------- +# run_audit +# --------------------------------------------------------------------------- + +class TestRunAudit: + @patch("agents.runner.subprocess.run") + def test_audit_success(self, mock_run, conn): + """Audit should return parsed already_done/still_pending/unclear.""" + audit_output = json.dumps({ + "already_done": [{"id": "VDOL-001", "reason": "Fixed in runner.py"}], + "still_pending": [], + "unclear": [], + }) + mock_run.return_value = _mock_claude_success({"result": audit_output}) + + result = run_audit(conn, "vdol") + + assert result["success"] is True + assert len(result["already_done"]) == 1 + assert result["already_done"][0]["id"] == "VDOL-001" + + @patch("agents.runner.subprocess.run") + def test_audit_logs_to_db(self, mock_run, conn): + """Audit should log to agent_logs with role=backlog_audit.""" + mock_run.return_value = _mock_claude_success({ + "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}), + }) + + run_audit(conn, "vdol") + + logs = conn.execute( + "SELECT * FROM agent_logs WHERE agent_role='backlog_audit'" + ).fetchall() + assert len(logs) == 1 + assert logs[0]["action"] == "audit" + + def test_audit_no_pending_tasks(self, conn): + """If no pending tasks, return success with empty lists.""" + # Mark existing task as done + models.update_task(conn, "VDOL-001", status="done") + + result = run_audit(conn, "vdol") + + assert result["success"] is True + assert result["already_done"] == [] + assert "No pending tasks" in result.get("message", "") + + def test_audit_project_not_found(self, conn): + result = run_audit(conn, "nonexistent") + assert result["success"] is False + assert "not found" in result["error"] + + @patch("agents.runner.subprocess.run") + def test_audit_uses_sonnet(self, mock_run, conn): + """Audit should use sonnet model.""" + mock_run.return_value = _mock_claude_success({ + "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}), + }) + + run_audit(conn, "vdol") + + cmd = mock_run.call_args[0][0] + model_idx = cmd.index("--model") + assert cmd[model_idx + 1] == "sonnet" + + @patch("agents.runner.subprocess.run") + def test_audit_includes_tasks_in_prompt(self, mock_run, conn): + """The prompt should contain the task title.""" + mock_run.return_value = _mock_claude_success({ + "result": json.dumps({"already_done": [], "still_pending": [], "unclear": []}), + }) + + run_audit(conn, "vdol") + + prompt = mock_run.call_args[0][0][2] # -p argument + assert "VDOL-001" in prompt + assert "Fix bug" in prompt diff --git a/web/api.py b/web/api.py index 3e2771c..10bcb49 100644 --- a/web/api.py +++ b/web/api.py @@ -385,6 +385,47 @@ def list_tickets(project: str | None = None, status: str | None = None): return tickets +# --------------------------------------------------------------------------- +# Audit +# --------------------------------------------------------------------------- + +@app.post("/api/projects/{project_id}/audit") +def audit_project(project_id: str): + """Run backlog audit — check which pending tasks are already done.""" + from agents.runner import run_audit + + conn = get_conn() + p = models.get_project(conn, project_id) + if not p: + conn.close() + raise HTTPException(404, f"Project '{project_id}' not found") + result = run_audit(conn, project_id, noninteractive=True) + conn.close() + return result + + +class AuditApply(BaseModel): + task_ids: list[str] + + +@app.post("/api/projects/{project_id}/audit/apply") +def audit_apply(project_id: str, body: AuditApply): + """Mark tasks as done after audit confirmation.""" + conn = get_conn() + p = models.get_project(conn, project_id) + if not p: + conn.close() + raise HTTPException(404, f"Project '{project_id}' not found") + updated = [] + for tid in body.task_ids: + t = models.get_task(conn, tid) + if t and t["project_id"] == project_id: + models.update_task(conn, tid, status="done") + updated.append(tid) + conn.close() + return {"updated": updated, "count": len(updated)} + + # --------------------------------------------------------------------------- # Bootstrap # --------------------------------------------------------------------------- diff --git a/web/frontend/src/api.ts b/web/frontend/src/api.ts index 3ed2d66..3a4200c 100644 --- a/web/frontend/src/api.ts +++ b/web/frontend/src/api.ts @@ -108,6 +108,21 @@ export interface CostEntry { total_duration_seconds: number } +export interface AuditItem { + id: string + reason: string +} + +export interface AuditResult { + success: boolean + already_done: AuditItem[] + still_pending: AuditItem[] + unclear: AuditItem[] + duration_seconds?: number + cost_usd?: number + error?: string +} + export const api = { projects: () => get('/projects'), project: (id: string) => get(`/projects/${id}`), @@ -129,4 +144,8 @@ export const api = { post<{ status: string }>(`/tasks/${id}/run`, { allow_write: allowWrite }), bootstrap: (data: { path: string; id: string; name: string }) => post<{ project: Project }>('/bootstrap', data), + auditProject: (projectId: string) => + post(`/projects/${projectId}/audit`, {}), + auditApply: (projectId: string, taskIds: string[]) => + post<{ updated: string[]; count: number }>(`/projects/${projectId}/audit/apply`, { task_ids: taskIds }), } diff --git a/web/frontend/src/views/ProjectView.vue b/web/frontend/src/views/ProjectView.vue index 6fb8c05..13a9ceb 100644 --- a/web/frontend/src/views/ProjectView.vue +++ b/web/frontend/src/views/ProjectView.vue @@ -1,6 +1,6 @@