From 8a6f280cbda62fa59b21f78116a75aa0efdfc57e Mon Sep 17 00:00:00 2001 From: Gros Frumos Date: Sun, 15 Mar 2026 23:22:49 +0200 Subject: [PATCH] day 1: Kin from zero to production - agents, GUI, autopilot, 352 tests --- agents/prompts/architect.md | 67 ++++ agents/prompts/frontend_dev.md | 61 ++++ agents/prompts/tech_researcher.md | 92 +++++ agents/prompts/tester.md | 67 ++++ agents/runner.py | 70 +++- agents/specialists.yaml | 24 ++ cli/main.py | 53 ++- core/db.py | 4 + core/hooks.py | 15 +- core/models.py | 19 + tests/test_api.py | 327 +++++++++++++++++- tests/test_auto_mode.py | 36 +- tests/test_cli.py | 123 ++++++- tests/test_hooks.py | 297 +++++++++++++++- tests/test_runner.py | 173 ++++++++- tests/test_tech_researcher.py | 195 +++++++++++ web/api.py | 30 +- .../src/__tests__/filter-persistence.test.ts | 273 +++++++++++++-- web/frontend/src/api.ts | 13 +- web/frontend/src/views/ProjectView.vue | 59 +++- web/frontend/src/views/TaskDetail.vue | 10 +- web/frontend/vite.config.ts | 2 +- 22 files changed, 1907 insertions(+), 103 deletions(-) create mode 100644 agents/prompts/architect.md create mode 100644 agents/prompts/frontend_dev.md create mode 100644 agents/prompts/tech_researcher.md create mode 100644 agents/prompts/tester.md create mode 100644 tests/test_tech_researcher.py diff --git a/agents/prompts/architect.md b/agents/prompts/architect.md new file mode 100644 index 0000000..3b0526f --- /dev/null +++ b/agents/prompts/architect.md @@ -0,0 +1,67 @@ +You are an Architect for the Kin multi-agent orchestrator. + +Your job: design the technical solution for a feature or refactoring task before implementation begins. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack +- TASK: id, title, brief describing the feature or change +- DECISIONS: known architectural decisions and conventions +- MODULES: map of existing project modules with paths and owners +- PREVIOUS STEP OUTPUT: output from a prior agent in the pipeline (if any) + +## Your responsibilities + +1. Read the relevant existing code to understand the current architecture +2. Design the solution — data model, interfaces, component interactions +3. Identify which modules will be affected or need to be created +4. Define the implementation plan as ordered steps for the dev agent +5. Flag risks, breaking changes, and edge cases upfront + +## Files to read + +- `DESIGN.md` — overall architecture and design decisions +- `core/models.py` — data access layer and DB schema +- `core/db.py` — database initialization and migrations +- `agents/runner.py` — pipeline execution logic +- Module files named in MODULES list that are relevant to the task + +## Rules + +- Design for the minimal viable solution — no over-engineering. +- Every schema change must be backward-compatible or include a migration plan. +- Do NOT write implementation code — produce specs and plans only. +- If existing architecture already solves the problem, say so. +- All new modules must fit the existing pattern (pure functions, no ORM, SQLite as source of truth). + +## Output format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "done", + "summary": "One-sentence summary of the architectural approach", + "affected_modules": ["core/models.py", "agents/runner.py"], + "new_modules": [], + "schema_changes": [ + { + "table": "tasks", + "change": "Add column execution_mode TEXT DEFAULT 'review'" + } + ], + "implementation_steps": [ + "1. Add column to DB schema in core/db.py", + "2. Add get/set functions in core/models.py", + "3. Update runner.py to read the new field" + ], + "risks": ["Breaking change for existing pipelines if migration not applied"], + "decisions_applied": [14, 16], + "notes": "Optional clarifications for the dev agent" +} +``` + +Valid values for `status`: `"done"`, `"blocked"`. + +If status is "blocked", include `"blocked_reason": "..."`. diff --git a/agents/prompts/frontend_dev.md b/agents/prompts/frontend_dev.md new file mode 100644 index 0000000..633d690 --- /dev/null +++ b/agents/prompts/frontend_dev.md @@ -0,0 +1,61 @@ +You are a Frontend Developer for the Kin multi-agent orchestrator. + +Your job: implement UI features and fixes in the Vue 3 frontend. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack +- TASK: id, title, brief describing what to build or fix +- DECISIONS: known gotchas, workarounds, and conventions for this project +- PREVIOUS STEP OUTPUT: architect spec or debugger output (if any) + +## Your responsibilities + +1. Read the relevant frontend files before making changes +2. Implement the feature or fix as described in the task brief +3. Follow existing patterns — don't invent new abstractions +4. Ensure the UI reflects backend state correctly (via API calls) +5. Update `web/frontend/src/api.ts` if new API endpoints are needed + +## Files to read + +- `web/frontend/src/` — all Vue components and TypeScript files +- `web/frontend/src/api.ts` — API client (Axios-based) +- `web/frontend/src/views/` — page-level components +- `web/frontend/src/components/` — reusable UI components +- `web/api.py` — FastAPI routes (to understand available endpoints) +- Read the previous step output if it contains an architect spec + +## Rules + +- Tech stack: Vue 3 Composition API, TypeScript, Tailwind CSS, Vite. +- Use `ref()` and `reactive()` — no Options API. +- API calls go through `web/frontend/src/api.ts` — never call fetch/axios directly in components. +- Do NOT modify Python backend files — scope is frontend only. +- Do NOT add new dependencies without noting it explicitly in `notes`. +- Keep components small and focused on one responsibility. + +## Output format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "done", + "changes": [ + { + "file": "web/frontend/src/views/TaskDetail.vue", + "description": "Added execution mode toggle button with v-model binding" + } + ], + "new_files": [], + "api_changes": "None required — used existing /api/tasks/{id} endpoint", + "notes": "Requires backend endpoint /api/projects/{id}/mode (not yet implemented)" +} +``` + +Valid values for `status`: `"done"`, `"blocked"`, `"partial"`. + +If status is "blocked", include `"blocked_reason": "..."`. +If status is "partial", list what was completed and what remains in `notes`. diff --git a/agents/prompts/tech_researcher.md b/agents/prompts/tech_researcher.md new file mode 100644 index 0000000..b91ed5a --- /dev/null +++ b/agents/prompts/tech_researcher.md @@ -0,0 +1,92 @@ +You are a Tech Researcher for the Kin multi-agent orchestrator. + +Your job: study an external API (documentation, endpoints, constraints, quirks), compare it with the current codebase, and produce a structured review. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack +- TARGET_API: name of the API and URL to its documentation (or path to a local spec file) +- CODEBASE_SCOPE: list of files or directories to scan for existing API usage +- DECISIONS: known gotchas and workarounds for the project + +## Your responsibilities + +1. Fetch and read the API documentation via WebFetch (or read local spec file if URL is unavailable) +2. Map all available endpoints, their methods, parameters, and response schemas +3. Identify rate limits, authentication method, versioning, and known limitations +4. Search the codebase (CODEBASE_SCOPE) for existing API calls, clients, and config +5. Compare: what does the code assume vs. what the API actually provides +6. Produce a structured report with findings and discrepancies + +## Files to read + +- Files listed in CODEBASE_SCOPE — search for API base URLs, client instantiation, endpoint calls +- Any local spec files (OpenAPI, Swagger, Postman) if provided instead of a URL +- Environment/config files for base URL and auth token references (read-only, do NOT log secret values) + +## Rules + +- Use WebFetch for external documentation. If WebFetch is unavailable, work with local files only and set status to "partial" with a note. +- Bash is allowed ONLY for read-only operations: `curl -s -X GET` to verify endpoint availability. Never use Bash for write operations or side-effecting commands. +- Do NOT log or include actual secret values found in config files — reference them by variable name only. +- If CODEBASE_SCOPE is large, limit scanning to files that contain the API name or base URL string. +- codebase_diff must describe concrete discrepancies — e.g. "code calls /v1/users but docs show endpoint is /v2/users". +- If no discrepancies are found, set codebase_diff to an empty array. +- Do NOT write implementation code — produce research and analysis only. + +## Output format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "done", + "api_overview": "One-paragraph summary of what the API does and its general design", + "endpoints": [ + { + "method": "GET", + "path": "/v1/resource", + "description": "Returns a list of resources", + "params": ["limit", "offset"], + "response_schema": "{ items: Resource[], total: number }" + } + ], + "rate_limits": { + "requests_per_minute": 60, + "requests_per_day": null, + "notes": "Per-token limits apply" + }, + "auth_method": "Bearer token in Authorization header", + "data_schemas": [ + { + "name": "Resource", + "fields": "{ id: string, name: string, created_at: ISO8601 }" + } + ], + "limitations": [ + "Pagination max page size is 100", + "Webhooks not supported — polling required" + ], + "gotchas": [ + "created_at is returned in UTC but without timezone suffix", + "Deleted resources return 200 with { deleted: true } instead of 404" + ], + "codebase_diff": [ + { + "file": "services/api_client.py", + "line_hint": "BASE_URL", + "issue": "Code uses /v1/resource but API has migrated to /v2/resource", + "suggestion": "Update BASE_URL and path prefix to /v2" + } + ], + "notes": "Optional context or follow-up recommendations for the architect or dev agent" +} +``` + +Valid values for `status`: `"done"`, `"partial"`, `"blocked"`. + +- `"partial"` — research completed with limited data (e.g. WebFetch unavailable, docs incomplete). +- `"blocked"` — unable to proceed; include `"blocked_reason": "..."`. + +If status is "partial", include `"partial_reason": "..."` explaining what was skipped. diff --git a/agents/prompts/tester.md b/agents/prompts/tester.md new file mode 100644 index 0000000..3b958f7 --- /dev/null +++ b/agents/prompts/tester.md @@ -0,0 +1,67 @@ +You are a Tester for the Kin multi-agent orchestrator. + +Your job: write or update tests that verify the implementation is correct and regressions are prevented. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack +- TASK: id, title, brief describing what was implemented +- PREVIOUS STEP OUTPUT: dev agent output describing what was changed (required) + +## Your responsibilities + +1. Read the previous step output to understand what was implemented +2. Read the existing tests to follow the same patterns and avoid duplication +3. Write tests that cover the new behavior and key edge cases +4. Ensure all existing tests still pass (don't break existing coverage) +5. Run the tests and report the result + +## Files to read + +- `tests/` — all existing test files for patterns and conventions +- `tests/test_models.py` — DB model tests (follow this pattern for core/ tests) +- `tests/test_api.py` — API endpoint tests (follow for web/api.py tests) +- `tests/test_runner.py` — pipeline/agent runner tests +- Source files changed in the previous step + +## Running tests + +Execute: `python -m pytest tests/ -v` from the project root. +For a specific test file: `python -m pytest tests/test_models.py -v` + +## Rules + +- Use `pytest`. No unittest, no custom test runners. +- Tests must be isolated — use in-memory SQLite (`":memory:"`), not the real `kin.db`. +- Mock `subprocess.run` when testing agent runner (never call actual Claude CLI in tests). +- One test per behavior — don't combine multiple assertions in one test without clear reason. +- Test names must describe the scenario: `test_update_task_sets_updated_at`, not `test_task`. +- Do NOT test implementation internals — test observable behavior and return values. + +## Output format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "passed", + "tests_written": [ + { + "file": "tests/test_models.py", + "test_name": "test_get_effective_mode_task_overrides_project", + "description": "Verifies task-level mode takes precedence over project mode" + } + ], + "tests_run": 42, + "tests_passed": 42, + "tests_failed": 0, + "failures": [], + "notes": "Added 3 new tests for execution_mode logic" +} +``` + +Valid values for `status`: `"passed"`, `"failed"`, `"blocked"`. + +If status is "failed", populate `"failures"` with `[{"test": "...", "error": "..."}]`. +If status is "blocked", include `"blocked_reason": "..."`. diff --git a/agents/runner.py b/agents/runner.py index 8fb9f05..33dffbe 100644 --- a/agents/runner.py +++ b/agents/runner.py @@ -99,6 +99,7 @@ def run_agent( return { "success": success, + "error": result.get("error") if not success else None, "output": parsed_output if parsed_output else output_text, "raw_output": output_text, "role": role, @@ -155,7 +156,8 @@ def _run_claude( raw_stdout = proc.stdout or "" result: dict[str, Any] = { "output": raw_stdout, - "error": proc.stderr if proc.returncode != 0 else None, + "error": proc.stderr or None, # preserve stderr always for diagnostics + "empty_output": not raw_stdout.strip(), "returncode": proc.returncode, } @@ -370,7 +372,7 @@ def _is_permission_error(result: dict) -> bool: output = (result.get("raw_output") or result.get("output") or "") if not isinstance(output, str): output = json.dumps(output, ensure_ascii=False) - error = result.get("error_message") or "" + error = result.get("error") or "" text = output + " " + error return any(re.search(p, text) for p in PERMISSION_PATTERNS) @@ -429,15 +431,48 @@ def run_pipeline( model = step.get("model", "sonnet") brief = step.get("brief") - result = run_agent( - conn, role, task_id, project_id, - model=model, - previous_output=previous_output, - brief_override=brief, - dry_run=dry_run, - allow_write=allow_write, - noninteractive=noninteractive, - ) + try: + result = run_agent( + conn, role, task_id, project_id, + model=model, + previous_output=previous_output, + brief_override=brief, + dry_run=dry_run, + allow_write=allow_write, + noninteractive=noninteractive, + ) + except Exception as exc: + exc_msg = f"Step {i+1}/{len(steps)} ({role}) raised exception: {exc}" + if pipeline: + models.update_pipeline( + conn, pipeline["id"], + status="failed", + total_cost_usd=total_cost, + total_tokens=total_tokens, + total_duration_seconds=total_duration, + ) + models.log_agent_run( + conn, + project_id=project_id, + task_id=task_id, + agent_role=role, + action="execute", + input_summary=f"task={task_id}, model={model}", + output_summary=None, + success=False, + error_message=exc_msg, + ) + models.update_task(conn, task_id, status="blocked", blocked_reason=exc_msg) + return { + "success": False, + "error": exc_msg, + "steps_completed": i, + "results": results, + "total_cost_usd": total_cost, + "total_tokens": total_tokens, + "total_duration_seconds": total_duration, + "pipeline_id": pipeline["id"] if pipeline else None, + } if dry_run: results.append(result) @@ -485,10 +520,14 @@ def run_pipeline( total_tokens=total_tokens, total_duration_seconds=total_duration, ) - models.update_task(conn, task_id, status="blocked") + agent_error = result.get("error") or "" + error_msg = f"Step {i+1}/{len(steps)} ({role}) failed" + if agent_error: + error_msg += f": {agent_error}" + models.update_task(conn, task_id, status="blocked", blocked_reason=error_msg) return { "success": False, - "error": f"Step {i+1}/{len(steps)} ({role}) failed", + "error": error_msg, "steps_completed": i, "results": results, "total_cost_usd": total_cost, @@ -524,6 +563,11 @@ def run_pipeline( event="task_auto_approved", task_modules=task_modules) except Exception: pass + try: + run_hooks(conn, project_id, task_id, + event="task_done", task_modules=task_modules) + except Exception: + pass # Auto followup: generate tasks, auto-resolve permission issues. # Guard: skip for followup-sourced tasks to prevent infinite recursion. diff --git a/agents/specialists.yaml b/agents/specialists.yaml index 4e9342c..0a7963a 100644 --- a/agents/specialists.yaml +++ b/agents/specialists.yaml @@ -81,6 +81,26 @@ specialists: context_rules: decisions_category: security + tech_researcher: + name: "Tech Researcher" + model: sonnet + tools: [Read, Grep, Glob, WebFetch, Bash] + description: "Studies external APIs (docs, endpoints, limits, quirks), compares with codebase, produces structured review" + permissions: read_only + context_rules: + decisions: [gotcha, workaround] + output_schema: + status: "done | partial | blocked" + api_overview: string + endpoints: "array of { method, path, description, params, response_schema }" + rate_limits: "{ requests_per_minute, requests_per_day, notes }" + auth_method: string + data_schemas: "array of { name, fields }" + limitations: "array of strings" + gotchas: "array of strings" + codebase_diff: "array of { file, line_hint, issue, suggestion }" + notes: string + # Route templates — PM uses these to build pipelines routes: debug: @@ -102,3 +122,7 @@ routes: security_audit: steps: [security, architect] description: "Audit → remediation plan" + + api_research: + steps: [tech_researcher, architect] + description: "Study external API → integration plan" diff --git a/cli/main.py b/cli/main.py index f11f82d..bc4ba61 100644 --- a/cli/main.py +++ b/cli/main.py @@ -141,6 +141,7 @@ def project_show(ctx, id): click.echo(f" Path: {p['path']}") click.echo(f" Status: {p['status']}") click.echo(f" Priority: {p['priority']}") + click.echo(f" Mode: {p.get('execution_mode') or 'review'}") if p.get("tech_stack"): click.echo(f" Tech stack: {', '.join(p['tech_stack'])}") if p.get("forgejo_repo"): @@ -148,6 +149,21 @@ def project_show(ctx, id): click.echo(f" Created: {p['created_at']}") +@project.command("set-mode") +@click.option("--project", "project_id", required=True, help="Project ID") +@click.argument("mode", type=click.Choice(["auto", "review"])) +@click.pass_context +def project_set_mode(ctx, project_id, mode): + """Set execution mode for a project (auto|review).""" + conn = ctx.obj["conn"] + p = models.get_project(conn, project_id) + if not p: + click.echo(f"Project '{project_id}' not found.", err=True) + raise SystemExit(1) + models.update_project(conn, project_id, execution_mode=mode) + click.echo(f"Project '{project_id}' execution_mode set to '{mode}'.") + + # =========================================================================== # task # =========================================================================== @@ -204,11 +220,15 @@ def task_show(ctx, id): if not t: click.echo(f"Task '{id}' not found.", err=True) raise SystemExit(1) + effective_mode = models.get_effective_mode(conn, t["project_id"], t["id"]) + task_mode = t.get("execution_mode") + mode_label = f"{effective_mode} (overridden)" if task_mode else f"{effective_mode} (inherited)" click.echo(f"Task: {t['id']}") click.echo(f" Project: {t['project_id']}") click.echo(f" Title: {t['title']}") click.echo(f" Status: {t['status']}") click.echo(f" Priority: {t['priority']}") + click.echo(f" Mode: {mode_label}") if t.get("assigned_role"): click.echo(f" Role: {t['assigned_role']}") if t.get("parent_task_id"): @@ -223,13 +243,14 @@ def task_show(ctx, id): @task.command("update") @click.argument("task_id") -@click.option("--status", type=click.Choice( - ["pending", "in_progress", "review", "done", "blocked", "decomposed", "cancelled"]), +@click.option("--status", type=click.Choice(models.VALID_TASK_STATUSES), default=None, help="New status") @click.option("--priority", type=int, default=None, help="New priority (1-10)") +@click.option("--mode", "mode", type=click.Choice(["auto", "review"]), + default=None, help="Override execution mode for this task") @click.pass_context -def task_update(ctx, task_id, status, priority): - """Update a task's status or priority.""" +def task_update(ctx, task_id, status, priority, mode): + """Update a task's status, priority, or execution mode.""" conn = ctx.obj["conn"] t = models.get_task(conn, task_id) if not t: @@ -240,11 +261,13 @@ def task_update(ctx, task_id, status, priority): fields["status"] = status if priority is not None: fields["priority"] = priority + if mode is not None: + fields["execution_mode"] = mode if not fields: - click.echo("Nothing to update. Use --status or --priority.", err=True) + click.echo("Nothing to update. Use --status, --priority, or --mode.", err=True) raise SystemExit(1) updated = models.update_task(conn, task_id, **fields) - click.echo(f"Updated {updated['id']}: status={updated['status']}, priority={updated['priority']}") + click.echo(f"Updated {updated['id']}: status={updated['status']}, priority={updated['priority']}, mode={updated.get('execution_mode') or '(inherited)'}") # =========================================================================== @@ -816,7 +839,8 @@ def hook_logs(ctx, project_id, limit): def hook_setup(ctx, project_id, scripts_dir): """Register standard hooks for a project. - Currently registers: rebuild-frontend (fires on web/frontend/* changes). + Registers: rebuild-frontend (fires on web/frontend/* changes), + auto-commit (fires on task_done — git add -A && git commit). Idempotent — skips hooks that already exist. """ conn = ctx.obj["conn"] @@ -838,7 +862,6 @@ def hook_setup(ctx, project_id, scripts_dir): name="rebuild-frontend", event="pipeline_completed", command=rebuild_cmd, - trigger_module_path="web/frontend/*", working_dir=p.get("path"), timeout_seconds=300, ) @@ -846,6 +869,20 @@ def hook_setup(ctx, project_id, scripts_dir): else: click.echo("Hook 'rebuild-frontend' already exists, skipping.") + if "auto-commit" not in existing_names: + project_path = str(Path(p.get("path", ".")).expanduser()) + hooks_module.create_hook( + conn, project_id, + name="auto-commit", + event="task_done", + command='git add -A && git commit -m "kin: {task_id} {title}"', + working_dir=project_path, + timeout_seconds=30, + ) + created.append("auto-commit") + else: + click.echo("Hook 'auto-commit' already exists, skipping.") + if created: click.echo(f"Registered hooks: {', '.join(created)}") diff --git a/core/db.py b/core/db.py index c8b63de..b91d29c 100644 --- a/core/db.py +++ b/core/db.py @@ -41,6 +41,7 @@ CREATE TABLE IF NOT EXISTS tasks ( security_result JSON, forgejo_issue_id INTEGER, execution_mode TEXT, + blocked_reason TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); @@ -211,6 +212,9 @@ def _migrate(conn: sqlite3.Connection): if "execution_mode" not in task_cols: conn.execute("ALTER TABLE tasks ADD COLUMN execution_mode TEXT") conn.commit() + if "blocked_reason" not in task_cols: + conn.execute("ALTER TABLE tasks ADD COLUMN blocked_reason TEXT") + conn.commit() def init_db(db_path: Path = DB_PATH) -> sqlite3.Connection: diff --git a/core/hooks.py b/core/hooks.py index 1b9775b..c68df47 100644 --- a/core/hooks.py +++ b/core/hooks.py @@ -146,6 +146,17 @@ def _get_hook(conn: sqlite3.Connection, hook_id: int) -> dict: return dict(row) if row else {} +def _substitute_vars(command: str, task_id: str | None, conn: sqlite3.Connection) -> str: + """Substitute {task_id} and {title} in hook command.""" + if task_id is None or "{task_id}" not in command and "{title}" not in command: + return command + row = conn.execute("SELECT title FROM tasks WHERE id = ?", (task_id,)).fetchone() + title = row["title"] if row else "" + # Sanitize title for shell safety (strip quotes and newlines) + safe_title = title.replace('"', "'").replace("\n", " ").replace("\r", "") + return command.replace("{task_id}", task_id).replace("{title}", safe_title) + + def _execute_hook( conn: sqlite3.Connection, hook: dict, @@ -159,9 +170,11 @@ def _execute_hook( exit_code = -1 success = False + command = _substitute_vars(hook["command"], task_id, conn) + try: proc = subprocess.run( - hook["command"], + command, shell=True, cwd=hook.get("working_dir") or None, capture_output=True, diff --git a/core/models.py b/core/models.py index b3b4ae8..0a4825b 100644 --- a/core/models.py +++ b/core/models.py @@ -9,6 +9,12 @@ from datetime import datetime from typing import Any +VALID_TASK_STATUSES = [ + "pending", "in_progress", "review", "done", + "blocked", "decomposed", "cancelled", +] + + def _row_to_dict(row: sqlite3.Row | None) -> dict | None: """Convert sqlite3.Row to dict with JSON fields decoded.""" if row is None: @@ -249,6 +255,19 @@ def get_decisions( return _rows_to_list(conn.execute(query, params).fetchall()) +def get_decision(conn: sqlite3.Connection, decision_id: int) -> dict | None: + """Get a single decision by id.""" + row = conn.execute("SELECT * FROM decisions WHERE id = ?", (decision_id,)).fetchone() + return _row_to_dict(row) if row else None + + +def delete_decision(conn: sqlite3.Connection, decision_id: int) -> bool: + """Delete a decision by id. Returns True if deleted, False if not found.""" + cur = conn.execute("DELETE FROM decisions WHERE id = ?", (decision_id,)) + conn.commit() + return cur.rowcount > 0 + + # --------------------------------------------------------------------------- # Modules # --------------------------------------------------------------------------- diff --git a/tests/test_api.py b/tests/test_api.py index d8939d1..3109486 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -105,6 +105,18 @@ def test_approve_not_found(client): assert r.status_code == 404 +def test_approve_fires_task_done_hooks(client): + """Ручной апрув задачи должен вызывать хуки с event='task_done'.""" + from unittest.mock import patch + with patch("core.hooks.run_hooks") as mock_hooks: + mock_hooks.return_value = [] + r = client.post("/api/tasks/P1-001/approve", json={}) + assert r.status_code == 200 + events_fired = [call[1].get("event") or call[0][3] + for call in mock_hooks.call_args_list] + assert "task_done" in events_fired + + def test_reject_task(client): from core.db import init_db from core import models @@ -173,14 +185,15 @@ def test_run_not_found(client): assert r.status_code == 404 -def test_run_with_allow_write(client): - """POST /run with allow_write=true should be accepted.""" - r = client.post("/api/tasks/P1-001/run", json={"allow_write": True}) +def test_run_kin_038_without_allow_write(client): + """Регрессионный тест KIN-038: allow_write удалён из схемы, + эндпоинт принимает запросы с пустым телом без этого параметра.""" + r = client.post("/api/tasks/P1-001/run", json={}) assert r.status_code == 202 def test_run_with_empty_body(client): - """POST /run with empty JSON body should default allow_write=false.""" + """POST /run with empty JSON body should be accepted.""" r = client.post("/api/tasks/P1-001/run", json={}) assert r.status_code == 202 @@ -256,14 +269,61 @@ def test_patch_task_status_persisted(client): assert r.json()["status"] == "blocked" -@pytest.mark.parametrize("status", ["pending", "in_progress", "review", "done", "blocked", "cancelled"]) +@pytest.mark.parametrize("status", ["pending", "in_progress", "review", "done", "blocked", "decomposed", "cancelled"]) def test_patch_task_all_valid_statuses(client, status): - """Все 6 допустимых статусов должны приниматься.""" + """Все 7 допустимых статусов должны приниматься (включая decomposed).""" r = client.patch("/api/tasks/P1-001", json={"status": status}) assert r.status_code == 200 assert r.json()["status"] == status +def test_patch_task_status_decomposed(client): + """Регрессионный тест KIN-033: API принимает статус 'decomposed'.""" + r = client.patch("/api/tasks/P1-001", json={"status": "decomposed"}) + assert r.status_code == 200 + assert r.json()["status"] == "decomposed" + + +def test_patch_task_status_decomposed_persisted(client): + """После установки 'decomposed' повторный GET возвращает этот статус.""" + client.patch("/api/tasks/P1-001", json={"status": "decomposed"}) + r = client.get("/api/tasks/P1-001") + assert r.status_code == 200 + assert r.json()["status"] == "decomposed" + + +# --------------------------------------------------------------------------- +# KIN-033 — единый источник истины для статусов +# --------------------------------------------------------------------------- + +def test_api_valid_statuses_match_models(): + """API использует models.VALID_TASK_STATUSES как единственный источник истины.""" + from core import models + import web.api as api_module + assert api_module.VALID_STATUSES == set(models.VALID_TASK_STATUSES) + + +def test_cli_valid_statuses_match_models(): + """CLI использует models.VALID_TASK_STATUSES как единственный источник истины.""" + from core import models + from cli.main import task_update + status_param = next(p for p in task_update.params if p.name == "status") + cli_choices = set(status_param.type.choices) + assert cli_choices == set(models.VALID_TASK_STATUSES) + + +def test_cli_and_api_statuses_are_identical(): + """Список статусов в CLI и API идентичен.""" + from core import models + import web.api as api_module + from cli.main import task_update + status_param = next(p for p in task_update.params if p.name == "status") + cli_choices = set(status_param.type.choices) + assert cli_choices == api_module.VALID_STATUSES + assert "decomposed" in cli_choices + assert "decomposed" in api_module.VALID_STATUSES + + def test_patch_task_invalid_status(client): """Недопустимый статус → 400.""" r = client.patch("/api/tasks/P1-001", json={"status": "flying"}) @@ -274,3 +334,258 @@ def test_patch_task_not_found(client): """Несуществующая задача → 404.""" r = client.patch("/api/tasks/NOPE-999", json={"status": "done"}) assert r.status_code == 404 + + +def test_patch_task_empty_body_returns_400(client): + """PATCH с пустым телом (нет status и нет execution_mode) → 400.""" + r = client.patch("/api/tasks/P1-001", json={}) + assert r.status_code == 400 + + +# --------------------------------------------------------------------------- +# KIN-022 — blocked_reason: регрессионные тесты +# --------------------------------------------------------------------------- + +def test_blocked_reason_saved_and_returned(client): + """При переходе в blocked с blocked_reason поле сохраняется и отдаётся в GET.""" + from core.db import init_db + from core import models + conn = init_db(api_module.DB_PATH) + models.update_task(conn, "P1-001", status="blocked", + blocked_reason="Step 1/2 (debugger) failed") + conn.close() + + r = client.get("/api/tasks/P1-001") + assert r.status_code == 200 + data = r.json() + assert data["status"] == "blocked" + assert data["blocked_reason"] == "Step 1/2 (debugger) failed" + + +def test_blocked_reason_present_in_full(client): + """blocked_reason также присутствует в /full эндпоинте.""" + from core.db import init_db + from core import models + conn = init_db(api_module.DB_PATH) + models.update_task(conn, "P1-001", status="blocked", + blocked_reason="tester agent crashed") + conn.close() + + r = client.get("/api/tasks/P1-001/full") + assert r.status_code == 200 + data = r.json() + assert data["status"] == "blocked" + assert data["blocked_reason"] == "tester agent crashed" + + +def test_blocked_reason_none_by_default(client): + """Новая задача не имеет blocked_reason.""" + r = client.get("/api/tasks/P1-001") + assert r.status_code == 200 + data = r.json() + assert data["blocked_reason"] is None + + +def test_blocked_without_reason_allowed(client): + """Переход в blocked без причины допустим (reason=None).""" + from core.db import init_db + from core import models + conn = init_db(api_module.DB_PATH) + models.update_task(conn, "P1-001", status="blocked") + conn.close() + + r = client.get("/api/tasks/P1-001") + assert r.status_code == 200 + data = r.json() + assert data["status"] == "blocked" + assert data["blocked_reason"] is None + + +def test_blocked_reason_cleared_on_retry(client): + """При повторном запуске (статус pending) blocked_reason сбрасывается.""" + from core.db import init_db + from core import models + conn = init_db(api_module.DB_PATH) + models.update_task(conn, "P1-001", status="blocked", + blocked_reason="failed once") + models.update_task(conn, "P1-001", status="pending", blocked_reason=None) + conn.close() + + r = client.get("/api/tasks/P1-001") + assert r.status_code == 200 + data = r.json() + assert data["status"] == "pending" + assert data["blocked_reason"] is None + + +# --------------------------------------------------------------------------- +# KIN-029 — DELETE /api/projects/{project_id}/decisions/{decision_id} +# --------------------------------------------------------------------------- + +def test_delete_decision_ok(client): + """Создаём decision через POST, удаляем DELETE → 200 с телом {"deleted": id}.""" + r = client.post("/api/decisions", json={ + "project_id": "p1", + "type": "decision", + "title": "Use SQLite", + "description": "Chosen for simplicity", + }) + assert r.status_code == 200 + decision_id = r.json()["id"] + + r = client.delete(f"/api/projects/p1/decisions/{decision_id}") + assert r.status_code == 200 + assert r.json() == {"deleted": decision_id} + + r = client.get("/api/decisions?project=p1") + assert r.status_code == 200 + ids = [d["id"] for d in r.json()] + assert decision_id not in ids + + +def test_delete_decision_not_found(client): + """DELETE несуществующего decision → 404.""" + r = client.delete("/api/projects/p1/decisions/99999") + assert r.status_code == 404 + + +def test_delete_decision_wrong_project(client): + """DELETE decision с чужим project_id → 404 (не раскрываем существование).""" + r = client.post("/api/decisions", json={ + "project_id": "p1", + "type": "decision", + "title": "Cross-project check", + "description": "Should not be deletable from p2", + }) + assert r.status_code == 200 + decision_id = r.json()["id"] + + r = client.delete(f"/api/projects/p2/decisions/{decision_id}") + assert r.status_code == 404 + + # Decision должен остаться нетронутым + r = client.get("/api/decisions?project=p1") + ids = [d["id"] for d in r.json()] + assert decision_id in ids + + +# --------------------------------------------------------------------------- +# KIN-035 — регрессионный тест: смена статуса на cancelled +# --------------------------------------------------------------------------- + +def test_patch_task_status_cancelled(client): + """Регрессионный тест KIN-035: PATCH /api/tasks/{id} с status='cancelled' → 200.""" + r = client.patch("/api/tasks/P1-001", json={"status": "cancelled"}) + assert r.status_code == 200 + assert r.json()["status"] == "cancelled" + + +def test_patch_task_status_cancelled_persisted(client): + """После установки 'cancelled' повторный GET возвращает этот статус.""" + client.patch("/api/tasks/P1-001", json={"status": "cancelled"}) + r = client.get("/api/tasks/P1-001") + assert r.status_code == 200 + assert r.json()["status"] == "cancelled" + + +def test_cancelled_in_valid_statuses(): + """'cancelled' присутствует в VALID_TASK_STATUSES модели и в VALID_STATUSES API.""" + from core import models + import web.api as api_module + assert "cancelled" in models.VALID_TASK_STATUSES + assert "cancelled" in api_module.VALID_STATUSES + + +# --------------------------------------------------------------------------- +# KIN-036 — регрессионный тест: --allow-write всегда в команде через web API +# --------------------------------------------------------------------------- + +def test_run_always_includes_allow_write_when_body_false(client): + """Регрессионный тест KIN-036: --allow-write присутствует в команде, + даже если allow_write=False в теле запроса. + + Баг: условие `if body and body.allow_write` не добавляло флаг при + allow_write=False, что приводило к блокировке агента на 300 с.""" + from unittest.mock import patch, MagicMock + with patch("web.api.subprocess.Popen") as mock_popen: + mock_proc = MagicMock() + mock_proc.pid = 12345 + mock_popen.return_value = mock_proc + + r = client.post("/api/tasks/P1-001/run", json={"allow_write": False}) + assert r.status_code == 202 + + cmd = mock_popen.call_args[0][0] + assert "--allow-write" in cmd, ( + "--allow-write обязан присутствовать всегда: без него агент зависает " + "при попытке записи, потому что stdin=DEVNULL и нет интерактивного подтверждения" + ) + + +def test_run_always_includes_allow_write_without_body(client): + """Регрессионный тест KIN-036: --allow-write присутствует даже без тела запроса.""" + from unittest.mock import patch, MagicMock + with patch("web.api.subprocess.Popen") as mock_popen: + mock_proc = MagicMock() + mock_proc.pid = 12345 + mock_popen.return_value = mock_proc + + r = client.post("/api/tasks/P1-001/run") + assert r.status_code == 202 + + cmd = mock_popen.call_args[0][0] + assert "--allow-write" in cmd + + +def test_run_sets_kin_noninteractive_env(client): + """Регрессионный тест KIN-036: KIN_NONINTERACTIVE=1 всегда устанавливается + при запуске через web API, что вместе с --allow-write предотвращает зависание.""" + from unittest.mock import patch, MagicMock + with patch("web.api.subprocess.Popen") as mock_popen: + mock_proc = MagicMock() + mock_proc.pid = 99 + mock_popen.return_value = mock_proc + + r = client.post("/api/tasks/P1-001/run") + assert r.status_code == 202 + + call_kwargs = mock_popen.call_args[1] + env = call_kwargs.get("env", {}) + assert env.get("KIN_NONINTERACTIVE") == "1" + + +def test_run_sets_stdin_devnull(client): + """Регрессионный тест KIN-036: stdin=DEVNULL всегда устанавливается, + что является причиной, по которой --allow-write обязателен.""" + import subprocess as _subprocess + from unittest.mock import patch, MagicMock + with patch("web.api.subprocess.Popen") as mock_popen: + mock_proc = MagicMock() + mock_proc.pid = 42 + mock_popen.return_value = mock_proc + + r = client.post("/api/tasks/P1-001/run") + assert r.status_code == 202 + + call_kwargs = mock_popen.call_args[1] + assert call_kwargs.get("stdin") == _subprocess.DEVNULL + + +# --------------------------------------------------------------------------- +# KIN-040 — регрессионные тесты: удаление TaskRun / allow_write из схемы +# --------------------------------------------------------------------------- + +def test_run_kin_040_no_taskrun_class(): + """Регрессионный тест KIN-040: класс TaskRun удалён из web/api.py. + allow_write больше не является частью схемы эндпоинта /run.""" + import web.api as api_module + assert not hasattr(api_module, "TaskRun"), ( + "Класс TaskRun должен быть удалён из web/api.py (KIN-040)" + ) + + +def test_run_kin_040_allow_write_true_ignored(client): + """Регрессионный тест KIN-040: allow_write=True в теле игнорируется (не 422). + Эндпоинт не имеет body-параметра, поэтому FastAPI не валидирует тело.""" + r = client.post("/api/tasks/P1-001/run", json={"allow_write": True}) + assert r.status_code == 202 diff --git a/tests/test_auto_mode.py b/tests/test_auto_mode.py index 5c0c23c..e71c1e7 100644 --- a/tests/test_auto_mode.py +++ b/tests/test_auto_mode.py @@ -257,24 +257,6 @@ class TestAutoRerunOnPermissionDenied: task = models.get_task(conn, "VDOL-001") assert task["status"] == "blocked" - @patch("core.followup.generate_followups") - @patch("agents.runner.run_hooks") - @patch("agents.runner.subprocess.run") - def test_review_mode_does_not_retry_on_permission_error(self, mock_run, mock_hooks, mock_followup, conn): - """Регрессия: review-режим НЕ делает авто-retry при permission error.""" - mock_run.return_value = _mock_permission_denied() - mock_hooks.return_value = [] - mock_followup.return_value = {"created": [], "pending_actions": []} - - # Проект остаётся в default review mode - steps = [{"role": "debugger", "brief": "fix"}] - result = run_pipeline(conn, "VDOL-001", steps) - - assert result["success"] is False - assert mock_run.call_count == 1, "Review-mode не должен retry" - task = models.get_task(conn, "VDOL-001") - assert task["status"] == "blocked" - @patch("core.followup.generate_followups") @patch("agents.runner.run_hooks") @patch("agents.runner.subprocess.run") @@ -318,6 +300,24 @@ class TestAutoRerunOnPermissionDenied: assert result["success"] is False assert mock_run.call_count == 1, "Retry не нужен для обычных ошибок" + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_review_mode_does_not_retry_on_permission_error(self, mock_run, mock_hooks, mock_followup, conn): + """В review-режиме при permission denied runner НЕ делает retry.""" + mock_run.return_value = _mock_permission_denied() + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + # Проект в default review mode + steps = [{"role": "debugger", "brief": "fix file"}] + result = run_pipeline(conn, "VDOL-001", steps) + + assert result["success"] is False + assert mock_run.call_count == 1, "В review-режиме retry НЕ должен происходить" + task = models.get_task(conn, "VDOL-001") + assert task["status"] == "blocked" + # --------------------------------------------------------------------------- # test_auto_followup diff --git a/tests/test_cli.py b/tests/test_cli.py index f056f6d..a273cd1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -333,7 +333,8 @@ def test_hook_setup_registers_rebuild_frontend(runner, tmp_path): r = invoke(runner, ["hook", "list", "--project", "p1"]) assert r.exit_code == 0 assert "rebuild-frontend" in r.output - assert "web/frontend/*" in r.output + # KIN-050: trigger_module_path должен быть NULL — хук срабатывает безусловно + assert "web/frontend/*" not in r.output def test_hook_setup_idempotent(runner, tmp_path): @@ -352,3 +353,123 @@ def test_hook_setup_project_not_found(runner): r = invoke(runner, ["hook", "setup", "--project", "nope"]) assert r.exit_code == 1 assert "not found" in r.output + + +# =========================================================================== +# KIN-018 — project set-mode / task update --mode / show with mode labels +# =========================================================================== + +def test_project_set_mode_auto(runner): + """project set-mode auto — обновляет режим, выводит подтверждение.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + r = invoke(runner, ["project", "set-mode", "--project", "p1", "auto"]) + assert r.exit_code == 0 + assert "auto" in r.output + + +def test_project_set_mode_review(runner): + """project set-mode review — обновляет режим обратно в review.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["project", "set-mode", "--project", "p1", "auto"]) + r = invoke(runner, ["project", "set-mode", "--project", "p1", "review"]) + assert r.exit_code == 0 + assert "review" in r.output + + +def test_project_set_mode_persisted(runner): + """После project set-mode режим сохраняется в БД и виден в project show.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["project", "set-mode", "--project", "p1", "auto"]) + + r = invoke(runner, ["project", "show", "p1"]) + assert r.exit_code == 0 + assert "auto" in r.output + + +def test_project_set_mode_not_found(runner): + """project set-mode для несуществующего проекта → exit code 1.""" + r = invoke(runner, ["project", "set-mode", "--project", "nope", "auto"]) + assert r.exit_code == 1 + assert "not found" in r.output + + +def test_project_set_mode_invalid(runner): + """project set-mode с недопустимым значением → ошибка click.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + r = invoke(runner, ["project", "set-mode", "--project", "p1", "turbo"]) + assert r.exit_code != 0 + + +def test_project_show_displays_mode(runner): + """project show отображает строку Mode: ...""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + r = invoke(runner, ["project", "show", "p1"]) + assert r.exit_code == 0 + assert "Mode:" in r.output + + +def test_task_update_mode_auto(runner): + """task update --mode auto задаёт execution_mode на задачу.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "update", "P1-001", "--mode", "auto"]) + assert r.exit_code == 0 + assert "auto" in r.output + + +def test_task_update_mode_review(runner): + """task update --mode review задаёт execution_mode=review на задачу.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "update", "P1-001", "--mode", "review"]) + assert r.exit_code == 0 + assert "review" in r.output + + +def test_task_update_mode_persisted(runner): + """После task update --mode режим сохраняется и виден в task show как (overridden).""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + invoke(runner, ["task", "update", "P1-001", "--mode", "auto"]) + + r = invoke(runner, ["task", "show", "P1-001"]) + assert r.exit_code == 0 + assert "overridden" in r.output + + +def test_task_update_mode_invalid(runner): + """task update --mode с недопустимым значением → ошибка click.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "update", "P1-001", "--mode", "turbo"]) + assert r.exit_code != 0 + + +def test_task_show_mode_inherited(runner): + """task show без явного execution_mode показывает (inherited).""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "show", "P1-001"]) + assert r.exit_code == 0 + assert "inherited" in r.output + + +def test_task_show_mode_overridden(runner): + """task show с task-level execution_mode показывает (overridden).""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + invoke(runner, ["task", "update", "P1-001", "--mode", "review"]) + r = invoke(runner, ["task", "show", "P1-001"]) + assert r.exit_code == 0 + assert "overridden" in r.output + + +def test_task_show_mode_label_reflects_project_mode(runner): + """Если у проекта auto, у задачи нет mode — task show показывает 'auto (inherited)'.""" + invoke(runner, ["project", "add", "p1", "P1", "/p1"]) + invoke(runner, ["project", "set-mode", "--project", "p1", "auto"]) + invoke(runner, ["task", "add", "p1", "Fix bug"]) + r = invoke(runner, ["task", "show", "P1-001"]) + assert r.exit_code == 0 + assert "auto" in r.output + assert "inherited" in r.output diff --git a/tests/test_hooks.py b/tests/test_hooks.py index 2778ee0..4a9d554 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -8,7 +8,7 @@ from core.db import init_db from core import models from core.hooks import ( create_hook, get_hooks, update_hook, delete_hook, - run_hooks, get_hook_logs, HookResult, + run_hooks, get_hook_logs, HookResult, _substitute_vars, ) @@ -273,3 +273,298 @@ class TestGetHookLogs: event="pipeline_completed", task_modules=modules) logs = get_hook_logs(conn, project_id="vdol", limit=3) assert len(logs) == 3 + + +# --------------------------------------------------------------------------- +# Variable substitution in hook commands +# --------------------------------------------------------------------------- + +class TestSubstituteVars: + def test_substitutes_task_id_and_title(self, conn): + result = _substitute_vars( + 'git commit -m "kin: {task_id} {title}"', + "VDOL-001", + conn, + ) + assert result == 'git commit -m "kin: VDOL-001 Fix bug"' + + def test_no_substitution_when_task_id_is_none(self, conn): + cmd = 'git commit -m "kin: {task_id} {title}"' + result = _substitute_vars(cmd, None, conn) + assert result == cmd + + def test_sanitizes_double_quotes_in_title(self, conn): + conn.execute('UPDATE tasks SET title = ? WHERE id = ?', + ('Fix "bug" here', "VDOL-001")) + conn.commit() + result = _substitute_vars( + 'git commit -m "kin: {task_id} {title}"', + "VDOL-001", + conn, + ) + assert '"' not in result.split('"kin:')[1].split('"')[0] + assert "Fix 'bug' here" in result + + def test_sanitizes_newlines_in_title(self, conn): + conn.execute('UPDATE tasks SET title = ? WHERE id = ?', + ("Fix\nbug\r\nhere", "VDOL-001")) + conn.commit() + result = _substitute_vars("{title}", "VDOL-001", conn) + assert "\n" not in result + assert "\r" not in result + + def test_unknown_task_id_uses_empty_title(self, conn): + result = _substitute_vars("{task_id} {title}", "NONEXISTENT", conn) + assert result == "NONEXISTENT " + + def test_no_placeholders_returns_command_unchanged(self, conn): + cmd = "npm run build" + result = _substitute_vars(cmd, "VDOL-001", conn) + assert result == cmd + + @patch("core.hooks.subprocess.run") + def test_autocommit_hook_command_substituted(self, mock_run, conn): + """auto-commit hook должен получать реальные task_id и title в команде.""" + mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="") + create_hook(conn, "vdol", "auto-commit", "task_done", + 'git add -A && git commit -m "kin: {task_id} {title}"', + working_dir="/tmp") + run_hooks(conn, "vdol", "VDOL-001", event="task_done", task_modules=[]) + call_kwargs = mock_run.call_args[1] + # shell=True: command is the first positional arg + command = mock_run.call_args[0][0] + assert "VDOL-001" in command + assert "Fix bug" in command + + +# --------------------------------------------------------------------------- +# KIN-050: rebuild-frontend hook — unconditional firing after pipeline +# --------------------------------------------------------------------------- + +class TestRebuildFrontendHookSetup: + """Regression tests for KIN-050. + + Баг: rebuild-frontend не срабатывал, если pipeline не трогал web/frontend/*. + Фикс: убран trigger_module_path из hook_setup — хук должен срабатывать всегда. + """ + + def test_rebuild_frontend_created_without_trigger_module_path(self, conn): + """rebuild-frontend hook должен быть создан без trigger_module_path (KIN-050). + + Воспроизводит логику hook_setup: создаём хук без фильтра и убеждаемся, + что он сохраняется в БД с trigger_module_path=NULL. + """ + hook = create_hook( + conn, "vdol", + name="rebuild-frontend", + event="pipeline_completed", + command="scripts/rebuild-frontend.sh", + trigger_module_path=None, # фикс KIN-050: без фильтра + working_dir="/tmp", + timeout_seconds=300, + ) + + assert hook["trigger_module_path"] is None, ( + "trigger_module_path должен быть NULL — хук не должен фильтровать по модулям" + ) + + # Перечитываем из БД — убеждаемся, что NULL сохранился + hooks = get_hooks(conn, "vdol", enabled_only=False) + rebuild = next((h for h in hooks if h["name"] == "rebuild-frontend"), None) + assert rebuild is not None + assert rebuild["trigger_module_path"] is None + + @patch("core.hooks.subprocess.run") + def test_rebuild_frontend_fires_when_only_backend_modules_changed(self, mock_run, conn): + """Хук без trigger_module_path должен срабатывать при изменении backend-файлов. + + Регрессия KIN-050: раньше хук молчал, если не было web/frontend/* файлов. + """ + mock_run.return_value = MagicMock(returncode=0, stdout="built!", stderr="") + create_hook( + conn, "vdol", "rebuild-frontend", "pipeline_completed", + "npm run build", + trigger_module_path=None, # фикс: нет фильтра + working_dir="/tmp", + ) + + backend_modules = [ + {"path": "core/models.py", "name": "models"}, + {"path": "web/api.py", "name": "api"}, + ] + results = run_hooks(conn, "vdol", "VDOL-001", + event="pipeline_completed", task_modules=backend_modules) + + assert len(results) == 1, "Хук должен сработать несмотря на отсутствие frontend-файлов" + assert results[0].name == "rebuild-frontend" + assert results[0].success is True + mock_run.assert_called_once() + + @patch("core.hooks.subprocess.run") + def test_rebuild_frontend_fires_exactly_once_per_pipeline(self, mock_run, conn): + """Хук rebuild-frontend должен срабатывать ровно один раз за pipeline_completed.""" + mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="") + create_hook( + conn, "vdol", "rebuild-frontend", "pipeline_completed", + "npm run build", + trigger_module_path=None, + working_dir="/tmp", + ) + + any_modules = [ + {"path": "core/hooks.py", "name": "hooks"}, + {"path": "web/frontend/App.vue", "name": "App"}, + {"path": "web/api.py", "name": "api"}, + ] + results = run_hooks(conn, "vdol", "VDOL-001", + event="pipeline_completed", task_modules=any_modules) + + assert len(results) == 1, "Хук должен выполниться ровно один раз" + mock_run.assert_called_once() + + @patch("core.hooks.subprocess.run") + def test_rebuild_frontend_fires_with_empty_module_list(self, mock_run, conn): + """Хук без trigger_module_path должен срабатывать даже с пустым списком модулей.""" + mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="") + create_hook( + conn, "vdol", "rebuild-frontend", "pipeline_completed", + "npm run build", + trigger_module_path=None, + working_dir="/tmp", + ) + + results = run_hooks(conn, "vdol", "VDOL-001", + event="pipeline_completed", task_modules=[]) + + assert len(results) == 1 + assert results[0].name == "rebuild-frontend" + mock_run.assert_called_once() + + @patch("core.hooks.subprocess.run") + def test_rebuild_frontend_with_module_path_skips_non_frontend(self, mock_run, conn): + """Контрольный тест: хук С trigger_module_path НЕ срабатывает на backend-файлы. + + Подтверждает, что фикс (удаление trigger_module_path) был необходим. + """ + mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="") + create_hook( + conn, "vdol", "rebuild-frontend-filtered", "pipeline_completed", + "npm run build", + trigger_module_path="web/frontend/*", # старое (сломанное) поведение + working_dir="/tmp", + ) + + backend_modules = [{"path": "core/models.py", "name": "models"}] + results = run_hooks(conn, "vdol", "VDOL-001", + event="pipeline_completed", task_modules=backend_modules) + + assert len(results) == 0, ( + "Хук с trigger_module_path НЕ должен срабатывать на backend-файлы — " + "именно это было первопричиной бага KIN-050" + ) + + +# --------------------------------------------------------------------------- +# KIN-052: rebuild-frontend hook — команда cd+&& и персистентность в БД +# --------------------------------------------------------------------------- + +class TestKIN052RebuildFrontendCommand: + """Регрессионные тесты для KIN-052. + + Хук rebuild-frontend использует команду вида: + cd /path/to/frontend && npm run build + — то есть цепочку shell-команд без working_dir. + Тесты проверяют, что такая форма работает корректно и хук переживает + пересоздание соединения с БД (симуляция рестарта). + """ + + @patch("core.hooks.subprocess.run") + def test_cd_chained_command_passes_as_string_to_shell(self, mock_run, conn): + """Команда с && должна передаваться в subprocess как строка (не список) с shell=True. + + Если передать список ['cd', '/path', '&&', 'npm', 'run', 'build'] с shell=True, + shell проигнорирует аргументы после первого. Строковая форма обязательна. + """ + mock_run.return_value = MagicMock(returncode=0, stdout="built!", stderr="") + cmd = "cd /Users/grosfrumos/projects/kin/web/frontend && npm run build" + create_hook(conn, "vdol", "rebuild-frontend", "pipeline_completed", cmd, + trigger_module_path=None, working_dir=None) + + run_hooks(conn, "vdol", "VDOL-001", event="pipeline_completed", task_modules=[]) + + call_args = mock_run.call_args + passed_cmd = call_args[0][0] + assert isinstance(passed_cmd, str), ( + "Команда с && должна передаваться как строка, иначе shell не раскроет &&" + ) + assert "&&" in passed_cmd + assert call_args[1].get("shell") is True + + @patch("core.hooks.subprocess.run") + def test_cd_command_without_working_dir_uses_cwd_none(self, mock_run, conn): + """Хук с cd-командой и working_dir=None должен вызывать subprocess с cwd=None. + + Директория смены задаётся через cd в самой команде, а не через cwd. + """ + mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="") + cmd = "cd /Users/grosfrumos/projects/kin/web/frontend && npm run build" + create_hook(conn, "vdol", "rebuild-frontend", "pipeline_completed", cmd, + trigger_module_path=None, working_dir=None) + + run_hooks(conn, "vdol", "VDOL-001", event="pipeline_completed", task_modules=[]) + + cwd = mock_run.call_args[1].get("cwd") + assert cwd is None, ( + f"cwd должен быть None когда working_dir не задан, получили: {cwd!r}" + ) + + @patch("core.hooks.subprocess.run") + def test_cd_command_exits_zero_returns_success(self, mock_run, conn): + """Хук с cd+npm run build при returncode=0 должен вернуть success=True.""" + mock_run.return_value = MagicMock(returncode=0, stdout="✓ build complete", stderr="") + cmd = "cd /Users/grosfrumos/projects/kin/web/frontend && npm run build" + create_hook(conn, "vdol", "rebuild-frontend", "pipeline_completed", cmd, + trigger_module_path=None) + + results = run_hooks(conn, "vdol", "VDOL-001", + event="pipeline_completed", task_modules=[]) + + assert len(results) == 1 + assert results[0].success is True + assert results[0].name == "rebuild-frontend" + + @patch("core.hooks.subprocess.run") + def test_hook_persists_after_db_reconnect(self, mock_run): + """Хук должен сохраняться в файловой БД и быть доступен после пересоздания соединения. + + Симулирует рестарт: создаём хук, закрываем соединение, открываем новое — хук на месте. + """ + import tempfile + import os + from core.db import init_db + + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + db_path = f.name + try: + # Первое соединение — создаём проект и хук + conn1 = init_db(db_path) + from core import models as _models + _models.create_project(conn1, "kin", "Kin", "/projects/kin", tech_stack=["vue3"]) + cmd = "cd /Users/grosfrumos/projects/kin/web/frontend && npm run build" + hook = create_hook(conn1, "kin", "rebuild-frontend", "pipeline_completed", cmd, + trigger_module_path=None) + hook_id = hook["id"] + conn1.close() + + # Второе соединение — «рестарт», хук должен быть на месте + conn2 = init_db(db_path) + hooks = get_hooks(conn2, "kin", event="pipeline_completed", enabled_only=True) + conn2.close() + + assert len(hooks) == 1, "После пересоздания соединения хук должен оставаться в БД" + assert hooks[0]["id"] == hook_id + assert hooks[0]["name"] == "rebuild-frontend" + assert hooks[0]["command"] == cmd + assert hooks[0]["trigger_module_path"] is None + finally: + os.unlink(db_path) diff --git a/tests/test_runner.py b/tests/test_runner.py index 5f85b28..bd7ac9b 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -348,6 +348,24 @@ class TestAutoMode: assert result["success"] is True mock_followup.assert_not_called() + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_auto_mode_fires_task_done_event(self, mock_run, mock_hooks, mock_followup, conn): + """Auto mode должен вызывать run_hooks с event='task_done' после task_auto_approved.""" + mock_run.return_value = _mock_claude_success({"result": "done"}) + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + models.update_project(conn, "vdol", execution_mode="auto") + steps = [{"role": "debugger", "brief": "find"}] + result = run_pipeline(conn, "VDOL-001", steps) + + assert result["success"] is True + events_fired = [call[1].get("event") or call[0][3] + for call in mock_hooks.call_args_list] + assert "task_done" in events_fired + @patch("core.followup.auto_resolve_pending_actions") @patch("core.followup.generate_followups") @patch("agents.runner.run_hooks") @@ -370,6 +388,50 @@ class TestAutoMode: mock_resolve.assert_called_once_with(conn, "VDOL-001", pending) +# --------------------------------------------------------------------------- +# Retry on permission error +# --------------------------------------------------------------------------- + +class TestRetryOnPermissionError: + @patch("core.followup.generate_followups") + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_retry_on_permission_error_auto_mode(self, mock_run, mock_hooks, mock_followup, conn): + """Auto mode: retry при permission error должен срабатывать.""" + permission_fail = _mock_claude_failure("permission denied: cannot write file") + retry_success = _mock_claude_success({"result": "fixed"}) + + mock_run.side_effect = [permission_fail, retry_success] + mock_hooks.return_value = [] + mock_followup.return_value = {"created": [], "pending_actions": []} + + models.update_project(conn, "vdol", execution_mode="auto") + steps = [{"role": "debugger", "brief": "find"}] + result = run_pipeline(conn, "VDOL-001", steps) + + assert result["success"] is True + assert mock_run.call_count == 2 + # Second call must include --dangerously-skip-permissions + second_cmd = mock_run.call_args_list[1][0][0] + assert "--dangerously-skip-permissions" in second_cmd + + @patch("agents.runner.run_hooks") + @patch("agents.runner.subprocess.run") + def test_review_mode_does_not_retry_on_permission_error(self, mock_run, mock_hooks, conn): + """Review mode: retry при permission error НЕ должен срабатывать.""" + permission_fail = _mock_claude_failure("permission denied: cannot write file") + + mock_run.return_value = permission_fail + mock_hooks.return_value = [] + + # Проект остаётся в default "review" mode + steps = [{"role": "debugger", "brief": "find"}] + result = run_pipeline(conn, "VDOL-001", steps) + + assert result["success"] is False + assert mock_run.call_count == 1 + + # --------------------------------------------------------------------------- # JSON parsing # --------------------------------------------------------------------------- @@ -417,20 +479,22 @@ class TestNonInteractive: call_kwargs = mock_run.call_args[1] assert call_kwargs.get("timeout") == 300 + @patch.dict("os.environ", {"KIN_NONINTERACTIVE": ""}) @patch("agents.runner.subprocess.run") def test_interactive_uses_600s_timeout(self, mock_run, conn): mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False) call_kwargs = mock_run.call_args[1] - assert call_kwargs.get("timeout") == 300 + assert call_kwargs.get("timeout") == 600 + @patch.dict("os.environ", {"KIN_NONINTERACTIVE": ""}) @patch("agents.runner.subprocess.run") def test_interactive_no_stdin_override(self, mock_run, conn): """In interactive mode, stdin should not be set to DEVNULL.""" mock_run.return_value = _mock_claude_success({"result": "ok"}) run_agent(conn, "debugger", "VDOL-001", "vdol", noninteractive=False) call_kwargs = mock_run.call_args[1] - assert call_kwargs.get("stdin") == subprocess.DEVNULL + assert call_kwargs.get("stdin") is None @patch.dict("os.environ", {"KIN_NONINTERACTIVE": "1"}) @patch("agents.runner.subprocess.run") @@ -582,3 +646,108 @@ class TestRunAudit: cmd = mock_run.call_args[0][0] assert "--dangerously-skip-permissions" in cmd + + +# --------------------------------------------------------------------------- +# KIN-019: Silent FAILED diagnostics (regression tests) +# --------------------------------------------------------------------------- + +class TestSilentFailedDiagnostics: + """Regression: агент падает без вывода — runner должен сохранять диагностику в БД.""" + + @patch("agents.runner.subprocess.run") + def test_agent_empty_stdout_saves_stderr_as_error_message_in_db(self, mock_run, conn): + """Когда stdout пустой и returncode != 0, stderr должен сохраняться как error_message в agent_logs.""" + mock = MagicMock() + mock.stdout = "" + mock.stderr = "API rate limit exceeded (429)" + mock.returncode = 1 + mock_run.return_value = mock + + run_agent(conn, "debugger", "VDOL-001", "vdol") + + log = conn.execute( + "SELECT error_message FROM agent_logs WHERE task_id='VDOL-001'" + ).fetchone() + assert log is not None + assert log["error_message"] is not None + assert "rate limit" in log["error_message"] + + @patch("agents.runner.subprocess.run") + def test_agent_empty_stdout_returns_error_key_with_stderr(self, mock_run, conn): + """run_agent должен вернуть ключ 'error' с содержимым stderr при пустом stdout и ненулевом returncode.""" + mock = MagicMock() + mock.stdout = "" + mock.stderr = "Permission denied: cannot write to /etc/hosts" + mock.returncode = 1 + mock_run.return_value = mock + + result = run_agent(conn, "debugger", "VDOL-001", "vdol") + + assert result["success"] is False + assert "error" in result + assert result["error"] is not None + assert "Permission denied" in result["error"] + + @patch("agents.runner.subprocess.run") + def test_pipeline_error_message_includes_agent_stderr(self, mock_run, conn): + """Сообщение об ошибке pipeline должно включать stderr агента, а не только generic 'step failed'.""" + mock = MagicMock() + mock.stdout = "" + mock.stderr = "Internal server error: unexpected EOF" + mock.returncode = 1 + mock_run.return_value = mock + + steps = [{"role": "tester", "brief": "run tests"}] + result = run_pipeline(conn, "VDOL-001", steps) + + assert result["success"] is False + assert "Internal server error" in result["error"] or "unexpected EOF" in result["error"] + + @patch("agents.runner.build_context") + def test_pipeline_exception_in_run_agent_marks_task_blocked(self, mock_ctx, conn): + """Исключение внутри run_agent (например, из build_context) должно ставить задачу в blocked.""" + mock_ctx.side_effect = RuntimeError("DB connection lost") + + steps = [{"role": "debugger", "brief": "find"}] + result = run_pipeline(conn, "VDOL-001", steps) + + assert result["success"] is False + + task = models.get_task(conn, "VDOL-001") + assert task["status"] == "blocked" + + @patch("agents.runner.build_context") + def test_pipeline_exception_logs_to_agent_logs(self, mock_ctx, conn): + """Исключение в run_agent должно быть залогировано в agent_logs с success=False.""" + mock_ctx.side_effect = ValueError("bad context data") + + steps = [{"role": "tester", "brief": "test"}] + run_pipeline(conn, "VDOL-001", steps) + + logs = conn.execute( + "SELECT * FROM agent_logs WHERE task_id='VDOL-001' AND success=0" + ).fetchall() + assert len(logs) >= 1 + + @patch("agents.runner.build_context") + def test_pipeline_exception_marks_pipeline_failed_in_db(self, mock_ctx, conn): + """При исключении запись pipeline должна существовать в БД и иметь статус failed.""" + mock_ctx.side_effect = RuntimeError("network timeout") + + steps = [{"role": "debugger", "brief": "find"}] + run_pipeline(conn, "VDOL-001", steps) + + pipe = conn.execute("SELECT * FROM pipelines WHERE task_id='VDOL-001'").fetchone() + assert pipe is not None + assert pipe["status"] == "failed" + + @patch("agents.runner.subprocess.run") + def test_agent_success_has_no_error_key_populated(self, mock_run, conn): + """При успешном запуске агента ключ 'error' в результате должен быть None (нет ложных срабатываний).""" + mock_run.return_value = _mock_claude_success({"result": "all good"}) + + result = run_agent(conn, "debugger", "VDOL-001", "vdol") + + assert result["success"] is True + assert result.get("error") is None diff --git a/tests/test_tech_researcher.py b/tests/test_tech_researcher.py new file mode 100644 index 0000000..7dd5f8d --- /dev/null +++ b/tests/test_tech_researcher.py @@ -0,0 +1,195 @@ +"""Tests for KIN-037: tech_researcher specialist — YAML validation and prompt structure.""" + +from pathlib import Path + +import yaml +import pytest + +SPECIALISTS_YAML = Path(__file__).parent.parent / "agents" / "specialists.yaml" +PROMPTS_DIR = Path(__file__).parent.parent / "agents" / "prompts" +TECH_RESEARCHER_PROMPT = PROMPTS_DIR / "tech_researcher.md" + +REQUIRED_SPECIALIST_FIELDS = {"name", "model", "tools", "description", "permissions"} +REQUIRED_OUTPUT_SCHEMA_FIELDS = { + "status", "api_overview", "endpoints", "rate_limits", "auth_method", + "data_schemas", "limitations", "gotchas", "codebase_diff", "notes", +} + + +@pytest.fixture(scope="module") +def spec(): + """Load and parse specialists.yaml once for all tests.""" + return yaml.safe_load(SPECIALISTS_YAML.read_text()) + + +@pytest.fixture(scope="module") +def tech_researcher(spec): + return spec["specialists"]["tech_researcher"] + + +@pytest.fixture(scope="module") +def prompt_text(): + return TECH_RESEARCHER_PROMPT.read_text() + + +# --------------------------------------------------------------------------- +# YAML validity +# --------------------------------------------------------------------------- + +class TestSpecialistsYaml: + def test_yaml_parses_without_error(self): + content = SPECIALISTS_YAML.read_text() + parsed = yaml.safe_load(content) + assert parsed is not None + + def test_yaml_has_specialists_key(self, spec): + assert "specialists" in spec + + def test_yaml_has_routes_key(self, spec): + assert "routes" in spec + + +# --------------------------------------------------------------------------- +# tech_researcher entry structure +# --------------------------------------------------------------------------- + +class TestTechResearcherEntry: + def test_tech_researcher_exists_in_specialists(self, spec): + assert "tech_researcher" in spec["specialists"] + + def test_tech_researcher_has_required_fields(self, tech_researcher): + missing = REQUIRED_SPECIALIST_FIELDS - set(tech_researcher.keys()) + assert not missing, f"Missing fields: {missing}" + + def test_tech_researcher_name_is_string(self, tech_researcher): + assert isinstance(tech_researcher["name"], str) + assert tech_researcher["name"].strip() + + def test_tech_researcher_model_is_sonnet(self, tech_researcher): + assert tech_researcher["model"] == "sonnet" + + def test_tech_researcher_tools_is_list(self, tech_researcher): + assert isinstance(tech_researcher["tools"], list) + assert len(tech_researcher["tools"]) > 0 + + def test_tech_researcher_tools_include_webfetch(self, tech_researcher): + assert "WebFetch" in tech_researcher["tools"] + + def test_tech_researcher_tools_include_read_grep_glob(self, tech_researcher): + for tool in ("Read", "Grep", "Glob"): + assert tool in tech_researcher["tools"], f"Missing tool: {tool}" + + def test_tech_researcher_permissions_is_read_only(self, tech_researcher): + assert tech_researcher["permissions"] == "read_only" + + def test_tech_researcher_description_is_non_empty_string(self, tech_researcher): + assert isinstance(tech_researcher["description"], str) + assert len(tech_researcher["description"]) > 10 + + def test_tech_researcher_has_output_schema(self, tech_researcher): + assert "output_schema" in tech_researcher + + def test_tech_researcher_output_schema_has_required_fields(self, tech_researcher): + schema = tech_researcher["output_schema"] + missing = REQUIRED_OUTPUT_SCHEMA_FIELDS - set(schema.keys()) + assert not missing, f"Missing output_schema fields: {missing}" + + def test_tech_researcher_context_rules_decisions_is_list(self, tech_researcher): + decisions = tech_researcher.get("context_rules", {}).get("decisions") + assert isinstance(decisions, list) + + def test_tech_researcher_context_rules_includes_gotcha(self, tech_researcher): + decisions = tech_researcher.get("context_rules", {}).get("decisions", []) + assert "gotcha" in decisions + + +# --------------------------------------------------------------------------- +# api_research route +# --------------------------------------------------------------------------- + +class TestApiResearchRoute: + def test_api_research_route_exists(self, spec): + assert "api_research" in spec["routes"] + + def test_api_research_route_has_steps(self, spec): + route = spec["routes"]["api_research"] + assert "steps" in route + assert isinstance(route["steps"], list) + assert len(route["steps"]) >= 1 + + def test_api_research_route_starts_with_tech_researcher(self, spec): + steps = spec["routes"]["api_research"]["steps"] + assert steps[0] == "tech_researcher" + + def test_api_research_route_includes_architect(self, spec): + steps = spec["routes"]["api_research"]["steps"] + assert "architect" in steps + + def test_api_research_route_has_description(self, spec): + route = spec["routes"]["api_research"] + assert "description" in route + assert isinstance(route["description"], str) + + +# --------------------------------------------------------------------------- +# Prompt file existence +# --------------------------------------------------------------------------- + +class TestTechResearcherPromptFile: + def test_prompt_file_exists(self): + assert TECH_RESEARCHER_PROMPT.exists(), ( + f"Prompt file not found: {TECH_RESEARCHER_PROMPT}" + ) + + def test_prompt_file_is_not_empty(self, prompt_text): + assert len(prompt_text.strip()) > 100 + + +# --------------------------------------------------------------------------- +# Prompt content — structured review instructions +# --------------------------------------------------------------------------- + +class TestTechResearcherPromptContent: + def test_prompt_contains_json_output_instruction(self, prompt_text): + assert "JSON" in prompt_text or "json" in prompt_text + + def test_prompt_defines_status_field(self, prompt_text): + assert '"status"' in prompt_text + + def test_prompt_defines_done_partial_blocked_statuses(self, prompt_text): + assert "done" in prompt_text + assert "partial" in prompt_text + assert "blocked" in prompt_text + + def test_prompt_defines_api_overview_field(self, prompt_text): + assert "api_overview" in prompt_text + + def test_prompt_defines_endpoints_field(self, prompt_text): + assert "endpoints" in prompt_text + + def test_prompt_defines_rate_limits_field(self, prompt_text): + assert "rate_limits" in prompt_text + + def test_prompt_defines_codebase_diff_field(self, prompt_text): + assert "codebase_diff" in prompt_text + + def test_prompt_defines_gotchas_field(self, prompt_text): + assert "gotchas" in prompt_text + + def test_prompt_contains_webfetch_instruction(self, prompt_text): + assert "WebFetch" in prompt_text + + def test_prompt_mentions_no_secrets_logging(self, prompt_text): + """Prompt must instruct agent not to log secret values.""" + lower = prompt_text.lower() + assert "secret" in lower or "credential" in lower or "token" in lower + + def test_prompt_specifies_readonly_bash(self, prompt_text): + """Bash must be restricted to read-only operations per rules.""" + assert "read-only" in prompt_text or "read only" in prompt_text or "GET" in prompt_text + + def test_prompt_defines_partial_reason_for_partial_status(self, prompt_text): + assert "partial_reason" in prompt_text + + def test_prompt_defines_blocked_reason_for_blocked_status(self, prompt_text): + assert "blocked_reason" in prompt_text diff --git a/web/api.py b/web/api.py index df9fc85..367063c 100644 --- a/web/api.py +++ b/web/api.py @@ -161,7 +161,7 @@ class TaskPatch(BaseModel): execution_mode: str | None = None -VALID_STATUSES = {"pending", "in_progress", "review", "done", "blocked", "cancelled"} +VALID_STATUSES = set(models.VALID_TASK_STATUSES) VALID_EXECUTION_MODES = {"auto", "review"} @@ -248,6 +248,13 @@ def approve_task(task_id: str, body: TaskApprove | None = None): conn.close() raise HTTPException(404, f"Task '{task_id}' not found") models.update_task(conn, task_id, status="done") + try: + from core.hooks import run_hooks as _run_hooks + task_modules = models.get_modules(conn, t["project_id"]) + _run_hooks(conn, t["project_id"], task_id, + event="task_done", task_modules=task_modules) + except Exception: + pass decision = None if body and body.decision_title: decision = models.add_decision( @@ -328,12 +335,8 @@ def is_task_running(task_id: str): return {"running": False} -class TaskRun(BaseModel): - allow_write: bool = False - - @app.post("/api/tasks/{task_id}/run") -def run_task(task_id: str, body: TaskRun | None = None): +def run_task(task_id: str): """Launch pipeline for a task in background. Returns 202.""" conn = get_conn() t = models.get_task(conn, task_id) @@ -347,8 +350,7 @@ def run_task(task_id: str, body: TaskRun | None = None): kin_root = Path(__file__).parent.parent cmd = [sys.executable, "-m", "cli.main", "--db", str(DB_PATH), "run", task_id] - if body and body.allow_write: - cmd.append("--allow-write") + cmd.append("--allow-write") # always required: subprocess runs non-interactively (stdin=DEVNULL) import os env = os.environ.copy() @@ -413,6 +415,18 @@ def create_decision(body: DecisionCreate): return d +@app.delete("/api/projects/{project_id}/decisions/{decision_id}") +def delete_decision(project_id: str, decision_id: int): + conn = get_conn() + decision = models.get_decision(conn, decision_id) + if not decision or decision["project_id"] != project_id: + conn.close() + raise HTTPException(404, f"Decision #{decision_id} not found") + models.delete_decision(conn, decision_id) + conn.close() + return {"deleted": decision_id} + + # --------------------------------------------------------------------------- # Cost # --------------------------------------------------------------------------- diff --git a/web/frontend/src/__tests__/filter-persistence.test.ts b/web/frontend/src/__tests__/filter-persistence.test.ts index 9c81ef3..1c0249f 100644 --- a/web/frontend/src/__tests__/filter-persistence.test.ts +++ b/web/frontend/src/__tests__/filter-persistence.test.ts @@ -1,14 +1,15 @@ /** - * KIN-011: Тесты сохранения фильтра статусов при навигации + * KIN-011/KIN-014: Тесты фильтра статусов при навигации * * Проверяет: - * 1. Выбор фильтра обновляет URL (?status=...) - * 2. Прямая ссылка с query param инициализирует фильтр + * 1. Клик по кнопке статуса обновляет URL (?status=...) + * 2. Прямая ссылка с query param активирует нужную кнопку * 3. Фильтр показывает только задачи с нужным статусом - * 4. Сброс фильтра удаляет param из URL - * 5. goBack() вызывает router.back() при наличии истории - * 6. goBack() делает push на /project/:id без истории - * 7. После router.back() URL проекта восстанавливается с фильтром + * 4. Сброс фильтра (✕) удаляет param из URL + * 5. Без фильтра отображаются все задачи + * 6. goBack() вызывает router.back() при наличии истории + * 7. goBack() делает push на /project/:id без истории + * 8. После router.back() URL проекта восстанавливается с фильтром */ import { describe, it, expect, vi, beforeEach } from 'vitest' @@ -117,8 +118,8 @@ beforeEach(() => { // ProjectView: фильтр ↔ URL // ───────────────────────────────────────────────────────────── -describe('KIN-011: ProjectView — фильтр и URL', () => { - it('1. При выборе фильтра URL обновляется query param ?status', async () => { +describe('KIN-011/KIN-014: ProjectView — фильтр и URL', () => { + it('1. Клик по кнопке статуса обновляет URL (?status=...)', async () => { const router = makeRouter() await router.push('/project/KIN') @@ -131,16 +132,16 @@ describe('KIN-011: ProjectView — фильтр и URL', () => { // Изначально status нет в URL expect(router.currentRoute.value.query.status).toBeUndefined() - // Меняем фильтр через select (первый select — фильтр статусов) - const select = wrapper.find('select') - await select.setValue('in_progress') + // Кликаем по кнопке in_progress + const btn = wrapper.find('[data-status="in_progress"]') + await btn.trigger('click') await flushPromises() // URL должен содержать ?status=in_progress expect(router.currentRoute.value.query.status).toBe('in_progress') }) - it('2. Прямая ссылка ?status=in_progress инициализирует фильтр в select', async () => { + it('2. Прямая ссылка ?status=in_progress активирует нужную кнопку', async () => { const router = makeRouter() await router.push('/project/KIN?status=in_progress') @@ -150,9 +151,13 @@ describe('KIN-011: ProjectView — фильтр и URL', () => { }) await flushPromises() - // select должен показывать in_progress - const select = wrapper.find('select') - expect((select.element as HTMLSelectElement).value).toBe('in_progress') + // Кнопка in_progress должна быть активна (иметь класс text-blue-300) + const btn = wrapper.find('[data-status="in_progress"]') + expect(btn.classes()).toContain('text-blue-300') + + // Другие кнопки не активны + const pendingBtn = wrapper.find('[data-status="pending"]') + expect(pendingBtn.classes()).not.toContain('text-blue-300') }) it('3. Прямая ссылка ?status=in_progress показывает только задачи с этим статусом', async () => { @@ -171,7 +176,7 @@ describe('KIN-011: ProjectView — фильтр и URL', () => { expect(links[0].text()).toContain('KIN-002') }) - it('4. Сброс фильтра (пустое значение) удаляет status из URL', async () => { + it('4. Сброс фильтра (кнопка ✕) удаляет status из URL', async () => { const router = makeRouter() await router.push('/project/KIN?status=done') @@ -181,9 +186,9 @@ describe('KIN-011: ProjectView — фильтр и URL', () => { }) await flushPromises() - // Сброс фильтра - const select = wrapper.find('select') - await select.setValue('') + // Кликаем кнопку сброса + const clearBtn = wrapper.find('[data-action="clear-status"]') + await clearBtn.trigger('click') await flushPromises() // status должен исчезнуть из URL @@ -203,6 +208,89 @@ describe('KIN-011: ProjectView — фильтр и URL', () => { const links = wrapper.findAll('a[href^="/task/"]') expect(links).toHaveLength(3) }) + + it('KIN-014: Выбор нескольких статусов — URL содержит оба через запятую', async () => { + const router = makeRouter() + await router.push('/project/KIN') + + const wrapper = mount(ProjectView, { + props: { id: 'KIN' }, + global: { plugins: [router] }, + }) + await flushPromises() + + await wrapper.find('[data-status="pending"]').trigger('click') + await wrapper.find('[data-status="in_progress"]').trigger('click') + await flushPromises() + + const status = router.currentRoute.value.query.status as string + expect(status.split(',').sort()).toEqual(['in_progress', 'pending']) + }) + + it('KIN-014: Фильтр сохраняется в localStorage', async () => { + const router = makeRouter() + await router.push('/project/KIN') + + const wrapper = mount(ProjectView, { + props: { id: 'KIN' }, + global: { plugins: [router] }, + }) + await flushPromises() + + await wrapper.find('[data-status="pending"]').trigger('click') + await flushPromises() + + const stored = JSON.parse(localStorageMock.getItem('kin-task-statuses-KIN') ?? '[]') + expect(stored).toContain('pending') + }) +}) + +// ───────────────────────────────────────────────────────────── +// KIN-046: кнопки фильтра и сигнатура runTask +// ───────────────────────────────────────────────────────────── + +describe('KIN-046: ProjectView — фильтр статусов и runTask', () => { + it('Все 7 кнопок фильтра статусов отображаются в DOM', async () => { + const router = makeRouter() + await router.push('/project/KIN') + + const wrapper = mount(ProjectView, { + props: { id: 'KIN' }, + global: { plugins: [router] }, + }) + await flushPromises() + + const ALL_TASK_STATUSES = ['pending', 'in_progress', 'review', 'blocked', 'decomposed', 'done', 'cancelled'] + for (const s of ALL_TASK_STATUSES) { + expect(wrapper.find(`[data-status="${s}"]`).exists(), `кнопка "${s}" должна быть в DOM`).toBe(true) + } + }) + + it('api.runTask вызывается только с taskId — без второго аргумента', async () => { + vi.mocked(api.runTask).mockResolvedValue({ status: 'ok' } as any) + vi.spyOn(window, 'confirm').mockReturnValue(true) + + const router = makeRouter() + await router.push('/project/KIN') + + const wrapper = mount(ProjectView, { + props: { id: 'KIN' }, + global: { plugins: [router] }, + }) + await flushPromises() + + // KIN-001 имеет статус pending — кнопка "Run pipeline" должна быть видна + const runBtn = wrapper.find('button[title="Run pipeline"]') + expect(runBtn.exists()).toBe(true) + await runBtn.trigger('click') + await flushPromises() + + expect(api.runTask).toHaveBeenCalledTimes(1) + // Проверяем: вызван только с taskId, второй аргумент (autoMode) отсутствует + const callArgs = vi.mocked(api.runTask).mock.calls[0] + expect(callArgs).toHaveLength(1) + expect(callArgs[0]).toBe('KIN-001') + }) }) // ───────────────────────────────────────────────────────────── @@ -210,7 +298,7 @@ describe('KIN-011: ProjectView — фильтр и URL', () => { // ───────────────────────────────────────────────────────────── describe('KIN-011: TaskDetail — возврат с сохранением URL', () => { - it('6. goBack() вызывает router.back() когда window.history.length > 1', async () => { + it('6 (KIN-011). goBack() вызывает router.back() когда window.history.length > 1', async () => { const router = makeRouter() await router.push('/project/KIN?status=in_progress') await router.push('/task/KIN-002') @@ -278,3 +366,146 @@ describe('KIN-011: TaskDetail — возврат с сохранением URL', expect(router.currentRoute.value.query.status).toBe('in_progress') }) }) + +// ───────────────────────────────────────────────────────────── +// KIN-047: TaskDetail — кнопки Approve/Reject в статусе review +// ───────────────────────────────────────────────────────────── + +describe('KIN-047: TaskDetail — Approve/Reject в статусе review', () => { + function makeTaskWith(status: string, executionMode: 'auto' | 'review' | null = null) { + return { + id: 'KIN-047', + project_id: 'KIN', + title: 'Review Task', + status, + priority: 3, + assigned_role: null, + parent_task_id: null, + brief: null, + spec: null, + execution_mode: executionMode, + created_at: '2024-01-01', + updated_at: '2024-01-01', + pipeline_steps: [], + related_decisions: [], + } + } + + it('Approve и Reject видны при статусе review и ручном режиме', async () => { + vi.mocked(api.taskFull).mockResolvedValue(makeTaskWith('review', 'review') as any) + const router = makeRouter() + await router.push('/task/KIN-047') + + const wrapper = mount(TaskDetail, { + props: { id: 'KIN-047' }, + global: { plugins: [router] }, + }) + await flushPromises() + + const buttons = wrapper.findAll('button') + const approveExists = buttons.some(b => b.text().includes('Approve')) + const rejectExists = buttons.some(b => b.text().includes('Reject')) + expect(approveExists, 'Approve должна быть видна в review + ручной режим').toBe(true) + expect(rejectExists, 'Reject должна быть видна в review + ручной режим').toBe(true) + }) + + it('Approve и Reject скрыты при autoMode в статусе review', async () => { + vi.mocked(api.taskFull).mockResolvedValue(makeTaskWith('review', 'auto') as any) + const router = makeRouter() + await router.push('/task/KIN-047') + + const wrapper = mount(TaskDetail, { + props: { id: 'KIN-047' }, + global: { plugins: [router] }, + }) + await flushPromises() + + const buttons = wrapper.findAll('button') + const approveExists = buttons.some(b => b.text().includes('Approve')) + const rejectExists = buttons.some(b => b.text().includes('Reject')) + expect(approveExists, 'Approve должна быть скрыта в autoMode').toBe(false) + expect(rejectExists, 'Reject должна быть скрыта в autoMode').toBe(false) + }) + + it('Тоггл Auto/Review виден в статусе review при autoMode (позволяет выйти из автопилота)', async () => { + vi.mocked(api.taskFull).mockResolvedValue(makeTaskWith('review', 'auto') as any) + const router = makeRouter() + await router.push('/task/KIN-047') + + const wrapper = mount(TaskDetail, { + props: { id: 'KIN-047' }, + global: { plugins: [router] }, + }) + await flushPromises() + + const buttons = wrapper.findAll('button') + const toggleExists = buttons.some(b => b.text().includes('Auto') || b.text().includes('Review')) + expect(toggleExists, 'Тоггл Auto/Review должен быть виден в статусе review').toBe(true) + }) + + it('После клика тоггла в review+autoMode появляются Approve и Reject', async () => { + const task = makeTaskWith('review', 'auto') + vi.mocked(api.taskFull).mockResolvedValue(task as any) + vi.mocked(api.patchTask).mockResolvedValue({ execution_mode: 'review' } as any) + + const router = makeRouter() + await router.push('/task/KIN-047') + + const wrapper = mount(TaskDetail, { + props: { id: 'KIN-047' }, + global: { plugins: [router] }, + }) + await flushPromises() + + // Находим тоггл-кнопку (текст "Auto" когда autoMode=true) + const toggleBtn = wrapper.findAll('button').find(b => b.text().includes('Auto')) + expect(toggleBtn?.exists(), 'Тоггл должен быть виден').toBe(true) + await toggleBtn!.trigger('click') + await flushPromises() + + // После переключения autoMode=false → Approve и Reject должны появиться + const buttons = wrapper.findAll('button') + const approveExists = buttons.some(b => b.text().includes('Approve')) + const rejectExists = buttons.some(b => b.text().includes('Reject')) + expect(approveExists, 'Approve должна появиться после отключения autoMode').toBe(true) + expect(rejectExists, 'Reject должна появиться после отключения autoMode').toBe(true) + }) + + it('KIN-051: Approve и Reject видны при статусе review и execution_mode=null (фикс баги)', async () => { + // Воспроизводит баг: задача в review без явного execution_mode зависала + // без кнопок, потому что localStorage мог содержать 'auto' + localStorageMock.setItem('kin-mode-KIN', 'auto') // имитируем "плохой" localStorage + vi.mocked(api.taskFull).mockResolvedValue(makeTaskWith('review', null) as any) + const router = makeRouter() + await router.push('/task/KIN-047') + + const wrapper = mount(TaskDetail, { + props: { id: 'KIN-047' }, + global: { plugins: [router] }, + }) + await flushPromises() + + const buttons = wrapper.findAll('button') + const approveExists = buttons.some(b => b.text().includes('Approve')) + const rejectExists = buttons.some(b => b.text().includes('Reject')) + expect(approveExists, 'Approve должна быть видна: review+null mode игнорирует localStorage').toBe(true) + expect(rejectExists, 'Reject должна быть видна: review+null mode игнорирует localStorage').toBe(true) + }) + + it('Approve скрыта для статусов pending и done', async () => { + for (const status of ['pending', 'done']) { + vi.mocked(api.taskFull).mockResolvedValue(makeTaskWith(status, 'review') as any) + const router = makeRouter() + await router.push('/task/KIN-047') + + const wrapper = mount(TaskDetail, { + props: { id: 'KIN-047' }, + global: { plugins: [router] }, + }) + await flushPromises() + + const approveExists = wrapper.findAll('button').some(b => b.text().includes('Approve')) + expect(approveExists, `Approve не должна быть видна для статуса "${status}"`).toBe(false) + } + }) +}) diff --git a/web/frontend/src/api.ts b/web/frontend/src/api.ts index dffb7f8..d4b8274 100644 --- a/web/frontend/src/api.ts +++ b/web/frontend/src/api.ts @@ -26,6 +26,12 @@ async function post(path: string, body: unknown): Promise { return res.json() } +async function del(path: string): Promise { + const res = await fetch(`${BASE}${path}`, { method: 'DELETE' }) + if (!res.ok) throw new Error(`${res.status} ${res.statusText}`) + return res.json() +} + export interface Project { id: string name: string @@ -59,6 +65,7 @@ export interface Task { brief: Record | null spec: Record | null execution_mode: string | null + blocked_reason: string | null created_at: string updated_at: string } @@ -152,8 +159,8 @@ export const api = { post<{ choice: string; result: unknown }>(`/tasks/${id}/resolve`, { action, choice }), rejectTask: (id: string, reason: string) => post<{ status: string }>(`/tasks/${id}/reject`, { reason }), - runTask: (id: string, allowWrite = false) => - post<{ status: string }>(`/tasks/${id}/run`, { allow_write: allowWrite }), + runTask: (id: string) => + post<{ status: string }>(`/tasks/${id}/run`, {}), bootstrap: (data: { path: string; id: string; name: string }) => post<{ project: Project }>('/bootstrap', data), auditProject: (projectId: string) => @@ -164,4 +171,6 @@ export const api = { patch(`/tasks/${id}`, data), patchProject: (id: string, data: { execution_mode: string }) => patch(`/projects/${id}`, data), + deleteDecision: (projectId: string, decisionId: number) => + del<{ deleted: number }>(`/projects/${projectId}/decisions/${decisionId}`), } diff --git a/web/frontend/src/views/ProjectView.vue b/web/frontend/src/views/ProjectView.vue index 3c11b2e..744fdf2 100644 --- a/web/frontend/src/views/ProjectView.vue +++ b/web/frontend/src/views/ProjectView.vue @@ -15,7 +15,28 @@ const error = ref('') const activeTab = ref<'tasks' | 'decisions' | 'modules'>('tasks') // Filters -const taskStatusFilter = ref((route.query.status as string) || '') +const ALL_TASK_STATUSES = ['pending', 'in_progress', 'review', 'blocked', 'decomposed', 'done', 'cancelled'] + +function initStatusFilter(): string[] { + const q = route.query.status as string + if (q) return q.split(',').filter((s: string) => s) + const stored = localStorage.getItem(`kin-task-statuses-${props.id}`) + if (stored) { try { return JSON.parse(stored) } catch {} } + return [] +} + +const selectedStatuses = ref(initStatusFilter()) + +function toggleStatus(s: string) { + const idx = selectedStatuses.value.indexOf(s) + if (idx >= 0) selectedStatuses.value.splice(idx, 1) + else selectedStatuses.value.push(s) +} + +function clearStatusFilter() { + selectedStatuses.value = [] +} + const decisionTypeFilter = ref('') const decisionSearch = ref('') @@ -98,16 +119,17 @@ async function load() { } } -watch(taskStatusFilter, (val) => { - router.replace({ query: { ...route.query, status: val || undefined } }) -}) +watch(selectedStatuses, (val) => { + localStorage.setItem(`kin-task-statuses-${props.id}`, JSON.stringify(val)) + router.replace({ query: { ...route.query, status: val.length ? val.join(',') : undefined } }) +}, { deep: true }) onMounted(() => { load(); loadMode() }) const filteredTasks = computed(() => { if (!project.value) return [] let tasks = project.value.tasks - if (taskStatusFilter.value) tasks = tasks.filter(t => t.status === taskStatusFilter.value) + if (selectedStatuses.value.length > 0) tasks = tasks.filter(t => selectedStatuses.value.includes(t.status)) return tasks }) @@ -145,12 +167,6 @@ function modTypeColor(t: string) { return m[t] || 'gray' } -const taskStatuses = computed(() => { - if (!project.value) return [] - const s = new Set(project.value.tasks.map(t => t.status)) - return Array.from(s).sort() -}) - const decTypes = computed(() => { if (!project.value) return [] const s = new Set(project.value.decisions.map(d => d.type)) @@ -179,7 +195,7 @@ async function runTask(taskId: string, event: Event) { event.stopPropagation() if (!confirm(`Run pipeline for ${taskId}?`)) return try { - await api.runTask(taskId, autoMode.value) + await api.runTask(taskId) await load() } catch (e: any) { error.value = e.message @@ -253,12 +269,17 @@ async function addDecision() {
-
- +
+ +
{{ t.id }} diff --git a/web/frontend/src/views/TaskDetail.vue b/web/frontend/src/views/TaskDetail.vue index e5827ca..e9f4993 100644 --- a/web/frontend/src/views/TaskDetail.vue +++ b/web/frontend/src/views/TaskDetail.vue @@ -35,6 +35,9 @@ function loadMode(t: typeof task.value) { if (!t) return if (t.execution_mode) { autoMode.value = t.execution_mode === 'auto' + } else if (t.status === 'review') { + // Task is in review — always show Approve/Reject regardless of localStorage + autoMode.value = false } else { autoMode.value = localStorage.getItem(`kin-mode-${t.project_id}`) === 'auto' } @@ -188,7 +191,7 @@ async function reject() { async function runPipeline() { try { - await api.runTask(props.id, autoMode.value) + await api.runTask(props.id) startPolling() await load() } catch (e: any) { @@ -264,6 +267,9 @@ async function changeStatus(newStatus: string) {
Brief: {{ JSON.stringify(task.brief) }}
+
+ Blocked: {{ task.blocked_reason }} +
Assigned: {{ task.assigned_role }}
@@ -346,7 +352,7 @@ async function changeStatus(newStatus: string) { class="px-4 py-2 text-sm bg-red-900/50 text-red-400 border border-red-800 rounded hover:bg-red-900"> ✗ Reject -