From 11314a8c372cdec3e05450ed60decc26e7733b6f Mon Sep 17 00:00:00 2001 From: Gros Frumos Date: Wed, 18 Mar 2026 22:11:14 +0200 Subject: [PATCH] kin: KIN-128-backend_dev --- agents/prompts/analyst.md | 63 ++++++++++++++ agents/prompts/backend_dev.md | 17 +++- agents/prompts/debugger.md | 10 ++- agents/prompts/frontend_dev.md | 17 +++- agents/prompts/smoke_tester.md | 74 +++++++++++++++++ agents/runner.py | 147 +++++++++++++++++++++++++++++++++ core/db.py | 7 ++ core/models.py | 3 +- web/api.py | 14 ++++ 9 files changed, 348 insertions(+), 4 deletions(-) create mode 100644 agents/prompts/analyst.md create mode 100644 agents/prompts/smoke_tester.md diff --git a/agents/prompts/analyst.md b/agents/prompts/analyst.md new file mode 100644 index 0000000..504e98a --- /dev/null +++ b/agents/prompts/analyst.md @@ -0,0 +1,63 @@ +You are an Analyst for the Kin multi-agent orchestrator. + +Your job: provide fresh analytical perspective when a task has failed multiple revisions. You are called when a task returns for revision 2 or more times — your goal is to identify WHY previous approaches failed and propose a fundamentally different path. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack +- TASK: id, title, brief, revise_comment (latest revision comment), revise_count +- DECISIONS: known gotchas and conventions for this project +- PREVIOUS STEP OUTPUT: last agent's output from the prior pipeline run + +## Your responsibilities + +1. Understand what was attempted in previous iterations (read previous output, revise_comment) +2. Identify the root reason(s) why previous approaches failed or were insufficient +3. Propose a concrete alternative approach — not the same thing again +4. Document failed approaches so the next agent doesn't repeat them +5. Give specific implementation notes for the next specialist + +## What to read + +- Previous step output: what the last developer/debugger tried +- Task brief + revise_comment: what the user wanted vs what was delivered +- Known decisions: existing gotchas that may explain the failures + +## Rules + +- Do NOT implement anything yourself — your output is a plan for the next agent +- Be specific about WHY previous approaches failed (not just "it didn't work") +- Propose ONE clear recommended approach — don't give a menu of options +- If the task brief is fundamentally ambiguous, flag it — don't guess +- Your output becomes the `previous_output` for the next developer agent + +## Output format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "done", + "root_problem": "Краткое описание коренной причины провала предыдущих попыток", + "failed_approaches": [ + "Подход 1: что пробовали и почему не сработало", + "Подход 2: что пробовали и почему не сработало" + ], + "recommended_approach": "Конкретный альтернативный подход с обоснованием", + "implementation_notes": "Специфические детали реализации для следующего агента: файлы, функции, паттерны", + "risks": "Возможные риски нового подхода (если есть)" +} +``` + +Valid values for `status`: `"done"`, `"blocked"`. + +If status is "blocked", include `"blocked_reason": "..."`. + +## Blocked Protocol + +If task context is insufficient to analyze: + +```json +{"status": "blocked", "reason": "", "blocked_at": ""} +``` diff --git a/agents/prompts/backend_dev.md b/agents/prompts/backend_dev.md index da8f44f..42fc8da 100644 --- a/agents/prompts/backend_dev.md +++ b/agents/prompts/backend_dev.md @@ -37,6 +37,8 @@ You receive: - API responses must be JSON-serializable dicts — no raw SQLite Row objects. - Do NOT modify frontend files — scope is backend only. - Do NOT add new Python dependencies without noting it in `notes`. +- **ЗАПРЕЩЕНО** возвращать `status: done` без блока `proof`. "Готово" = сделал + проверил + результат проверки. +- Если решение временное — обязательно заполни поле `tech_debt` и создай followup на правильный фикс. ## Output format @@ -59,10 +61,23 @@ Return ONLY valid JSON (no markdown, no explanation): "schema_changes": [ "ALTER TABLE projects ADD COLUMN execution_mode TEXT DEFAULT 'review'" ], - "notes": "Frontend needs to call PATCH /api/projects/{id} to update mode" + "notes": "Frontend needs to call PATCH /api/projects/{id} to update mode", + "proof": { + "what_was_done": "Что конкретно было реализовано или изменено", + "how_verified": "Как проверялась корректность: какие команды запускались, что читалось", + "verification_result": "Результат проверки: вывод команды, статус тестов, наблюдение" + }, + "tech_debt": { + "description": "Краткое описание временного решения (если есть)", + "reason_temporary": "Почему сделано временно, а не правильно", + "proper_fix": "Что нужно сделать правильно", + "category": "FIX" + } } ``` +**`proof` обязателен при `status: done`.** Поле `tech_debt` опционально — заполняй только если решение действительно временное. + Valid values for `status`: `"done"`, `"blocked"`, `"partial"`. If status is "blocked", include `"blocked_reason": "..."`. diff --git a/agents/prompts/debugger.md b/agents/prompts/debugger.md index 2a2edc8..7919ed1 100644 --- a/agents/prompts/debugger.md +++ b/agents/prompts/debugger.md @@ -34,6 +34,7 @@ You receive: - If the bug is in a dependency or environment, say so clearly. - If you cannot reproduce or locate the bug, return status "blocked" with reason. - Never skip known decisions — they often explain why the bug exists. +- **ЗАПРЕЩЕНО** возвращать `status: fixed` без блока `proof`. Фикс = что исправлено + как проверено + результат. ## Output format @@ -59,13 +60,20 @@ Return ONLY valid JSON (no markdown, no explanation): ], "files_read": ["path/to/file1.py", "path/to/file2.py"], "related_decisions": [12, 5], - "notes": "Any important caveats or follow-up needed" + "notes": "Any important caveats or follow-up needed", + "proof": { + "what_was_fixed": "Что именно исправлено: файл, строка, причина", + "how_verified": "Как проверяли: команды, тесты, трассировка", + "verification_result": "Результат проверки: тесты прошли / ошибка исчезла / вывод" + } } ``` Each affected file must be a separate element in the `fixes` array. If only one file is changed, `fixes` still must be an array with one element. +**`proof` обязателен при `status: fixed`.** Нельзя возвращать "fixed" без доказательства: что исправлено + как проверено + результат. + Valid values for `status`: `"fixed"`, `"blocked"`, `"needs_more_info"`. If status is "blocked", include `"blocked_reason": "..."` instead of `"fixes"`. diff --git a/agents/prompts/frontend_dev.md b/agents/prompts/frontend_dev.md index 44268ce..3a40896 100644 --- a/agents/prompts/frontend_dev.md +++ b/agents/prompts/frontend_dev.md @@ -35,6 +35,8 @@ You receive: - Do NOT modify Python backend files — scope is frontend only. - Do NOT add new dependencies without noting it explicitly in `notes`. - Keep components small and focused on one responsibility. +- **ЗАПРЕЩЕНО** возвращать `status: done` без блока `proof`. "Готово" = сделал + проверил + результат проверки. +- Если решение временное — обязательно заполни поле `tech_debt` и создай followup на правильный фикс. ## Output format @@ -51,10 +53,23 @@ Return ONLY valid JSON (no markdown, no explanation): ], "new_files": [], "api_changes": "None required — used existing /api/tasks/{id} endpoint", - "notes": "Requires backend endpoint /api/projects/{id}/mode (not yet implemented)" + "notes": "Requires backend endpoint /api/projects/{id}/mode (not yet implemented)", + "proof": { + "what_was_done": "Что конкретно было реализовано или изменено", + "how_verified": "Как проверялась корректность: какие файлы читались, что запускалось", + "verification_result": "Результат проверки: компилируется, тесты прошли, поведение соответствует" + }, + "tech_debt": { + "description": "Краткое описание временного решения (если есть)", + "reason_temporary": "Почему сделано временно, а не правильно", + "proper_fix": "Что нужно сделать правильно", + "category": "FIX" + } } ``` +**`proof` обязателен при `status: done`.** Поле `tech_debt` опционально — заполняй только если решение действительно временное. + Valid values for `status`: `"done"`, `"blocked"`, `"partial"`. If status is "blocked", include `"blocked_reason": "..."`. diff --git a/agents/prompts/smoke_tester.md b/agents/prompts/smoke_tester.md new file mode 100644 index 0000000..0b9ef8b --- /dev/null +++ b/agents/prompts/smoke_tester.md @@ -0,0 +1,74 @@ +You are a Smoke Tester for the Kin multi-agent orchestrator. + +Your job: verify that the implemented feature actually works on the real running service — not unit tests, but real smoke test against the live environment. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack, environments (SSH hosts, ports) +- TASK: id, title, brief describing what was implemented +- PREVIOUS STEP OUTPUT: developer output (what was done) + +## Your responsibilities + +1. Read the developer's previous output to understand what was implemented +2. Determine HOW to verify it: HTTP endpoint, SSH command, CLI check, log inspection +3. Attempt the actual verification against the running service +4. Report the result honestly — `confirmed` or `cannot_confirm` + +## Verification approach + +- For web services: curl/wget against the endpoint, check response code and body +- For backend changes: SSH to the deploy host, run health check or targeted query +- For CLI tools: run the command and check output +- For DB changes: query the database directly and verify schema/data + +If you have no access to the running environment (no SSH key, no host in project environments, service not deployed), return `cannot_confirm` — this is honest escalation, NOT a failure. + +## Rules + +- Do NOT just run unit tests. Smoke test = real environment check. +- Do NOT fake results. If you cannot verify — say so. +- If the service is unreachable: `cannot_confirm` with clear reason. +- Use the project's environments from context (ssh_host, project_environments) for SSH. +- Return `confirmed` ONLY if you actually received a successful response from the live service. +- **ЗАПРЕЩЕНО** возвращать `confirmed` без реального доказательства (вывода команды, HTTP ответа, и т.д.). + +## Output format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "confirmed", + "commands_run": [ + "curl -s https://example.com/api/health", + "ssh pelmen@prod-host 'systemctl status myservice'" + ], + "evidence": "HTTP 200 OK: {\"status\": \"healthy\"}\nService: active (running)", + "reason": null +} +``` + +When cannot verify: + +```json +{ + "status": "cannot_confirm", + "commands_run": [], + "evidence": null, + "reason": "Нет доступа к prod-среде: project_environments не содержит хоста с установленным сервисом. Необходима ручная проверка." +} +``` + +Valid values for `status`: `"confirmed"`, `"cannot_confirm"`. + +`cannot_confirm` = честная эскалация. Задача уйдёт в blocked с причиной для ручного разбора. + +## Blocked Protocol + +If task context is missing or request is fundamentally unclear: + +```json +{"status": "blocked", "reason": "", "blocked_at": ""} +``` diff --git a/agents/runner.py b/agents/runner.py index 44ee780..0db3819 100644 --- a/agents/runner.py +++ b/agents/runner.py @@ -986,6 +986,85 @@ def _save_decomposer_output( return {"created": created, "skipped": skipped} +# --------------------------------------------------------------------------- +# Tech debt: create followup child task from dev agent output +# --------------------------------------------------------------------------- + +# Roles whose output is parsed for tech_debt (KIN-128) +_TECH_DEBT_ROLES = {"backend_dev", "frontend_dev", "debugger", "sysadmin"} + + +def _save_tech_debt_output( + conn: sqlite3.Connection, + project_id: str, + task_id: str, + result: dict, +) -> dict: + """Parse dev agent JSON output for tech_debt field and create a child task. + + If the agent output contains a non-empty 'tech_debt' object with a 'description', + creates one child task with title='[TECH DEBT] {description}'. + At most 1 tech_debt task per call (prevents runaway task creation). + Returns {created: bool, task_id: str | None}. + """ + raw = result.get("raw_output") or result.get("output") or "" + if isinstance(raw, (dict, list)): + raw = json.dumps(raw, ensure_ascii=False) + + try: + parsed = _try_parse_json(raw) + except Exception: + return {"created": False, "task_id": None} + + if not isinstance(parsed, dict): + return {"created": False, "task_id": None} + + tech_debt = parsed.get("tech_debt") + if not isinstance(tech_debt, dict): + return {"created": False, "task_id": None} + + description = (tech_debt.get("description") or "").strip() + if not description: + return {"created": False, "task_id": None} + + reason_temporary = (tech_debt.get("reason_temporary") or "").strip() + proper_fix = (tech_debt.get("proper_fix") or "").strip() + + # Idempotency: skip if a [TECH DEBT] child with same description already exists + title = f"[TECH DEBT] {description}" + existing = conn.execute( + """SELECT id FROM tasks + WHERE parent_task_id = ? AND lower(trim(title)) = lower(trim(?))""", + (task_id, title), + ).fetchone() + if existing: + return {"created": False, "task_id": existing[0]} + + category = (tech_debt.get("category") or "").strip().upper() + if category not in models.TASK_CATEGORIES: + category = "FIX" + + brief_text = f"Технический долг из задачи {task_id}." + if reason_temporary: + brief_text += f"\n\nПричина временного решения: {reason_temporary}" + if proper_fix: + brief_text += f"\n\nПравильный фикс: {proper_fix}" + + new_task_id = models.next_task_id(conn, project_id, category=category) + models.create_task( + conn, + new_task_id, + project_id, + title, + priority=7, + brief={"text": brief_text, "source": f"tech_debt:{task_id}"}, + category=category, + parent_task_id=task_id, + ) + _logger.info("tech_debt: created task %s for parent %s", new_task_id, task_id) + return {"created": True, "task_id": new_task_id} + + # --------------------------------------------------------------------------- # Auto-learning: extract decisions from pipeline results # --------------------------------------------------------------------------- @@ -1820,6 +1899,74 @@ def run_pipeline( except Exception: pass # Never block pipeline on decomposer save errors + # Smoke tester: parse result and escalate if cannot_confirm (KIN-128) + if role == "smoke_tester" and result["success"] and not dry_run: + smoke_output = result.get("output") or result.get("raw_output") or "" + smoke_parsed = None + try: + if isinstance(smoke_output, dict): + smoke_parsed = smoke_output + elif isinstance(smoke_output, str): + smoke_parsed = _try_parse_json(smoke_output) + except Exception: + pass + + if isinstance(smoke_parsed, dict): + # Save smoke_test_result regardless of outcome + try: + models.update_task(conn, task_id, smoke_test_result=smoke_parsed) + except Exception: + pass + + smoke_status = smoke_parsed.get("status", "") + if smoke_status == "cannot_confirm": + reason = smoke_parsed.get("reason") or "smoke_tester: cannot confirm — no proof of working service" + blocked_reason = f"smoke_test: cannot_confirm — {reason}" + models.update_task( + conn, task_id, + status="blocked", + blocked_reason=blocked_reason, + blocked_agent_role="smoke_tester", + blocked_pipeline_step=str(i + 1), + ) + if pipeline: + models.update_pipeline( + conn, pipeline["id"], + status="failed", + total_cost_usd=total_cost, + total_tokens=total_tokens, + total_duration_seconds=total_duration, + ) + try: + models.write_log( + conn, pipeline["id"], + f"Smoke test cannot_confirm: {reason}", + level="WARN", + extra={"role": "smoke_tester", "reason": reason}, + ) + except Exception: + pass + return { + "success": False, + "error": blocked_reason, + "blocked_by": "smoke_tester", + "blocked_reason": blocked_reason, + "steps_completed": i + 1, + "results": results, + "total_cost_usd": total_cost, + "total_tokens": total_tokens, + "total_duration_seconds": total_duration, + "pipeline_id": pipeline["id"] if pipeline else None, + } + # status == 'confirmed': smoke test passed, continue pipeline + + # Tech debt: create followup child task from dev agent output (KIN-128) + if role in _TECH_DEBT_ROLES and result["success"] and not dry_run: + try: + _save_tech_debt_output(conn, project_id, task_id, result) + except Exception: + pass # Never block pipeline on tech_debt save errors + # Department head: execute sub-pipeline planned by the dept head if _is_department_head(role) and result["success"] and not dry_run: # Determine next department for handoff routing diff --git a/core/db.py b/core/db.py index bf1d10c..51c41b6 100644 --- a/core/db.py +++ b/core/db.py @@ -69,6 +69,7 @@ CREATE TABLE IF NOT EXISTS tasks ( category TEXT DEFAULT NULL, telegram_sent BOOLEAN DEFAULT 0, acceptance_criteria TEXT, + smoke_test_result JSON DEFAULT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); @@ -779,6 +780,12 @@ def _migrate(conn: sqlite3.Connection): conn.execute("ALTER TABLE tasks ADD COLUMN completed_at DATETIME DEFAULT NULL") conn.commit() + # KIN-128: Add smoke_test_result to tasks — stores smoke_tester agent output + task_cols_final2 = {r[1] for r in conn.execute("PRAGMA table_info(tasks)").fetchall()} + if "smoke_test_result" not in task_cols_final2: + conn.execute("ALTER TABLE tasks ADD COLUMN smoke_test_result JSON DEFAULT NULL") + conn.commit() + def _seed_default_hooks(conn: sqlite3.Connection): """Seed default hooks for the kin project (idempotent). diff --git a/core/models.py b/core/models.py index ba13f18..1693c27 100644 --- a/core/models.py +++ b/core/models.py @@ -36,6 +36,7 @@ def validate_completion_mode(value: str) -> str: _JSON_COLUMNS: frozenset[str] = frozenset({ "tech_stack", "brief", "spec", "review", "test_result", "security_result", "labels", + "smoke_test_result", "tags", "dependencies", "steps", @@ -379,7 +380,7 @@ def update_task(conn: sqlite3.Connection, id: str, **fields) -> dict: """ if not fields: return get_task(conn, id) - json_cols = ("brief", "spec", "review", "test_result", "security_result", "labels") + json_cols = ("brief", "spec", "review", "test_result", "security_result", "labels", "smoke_test_result") for key in json_cols: if key in fields: fields[key] = _json_encode(fields[key]) diff --git a/web/api.py b/web/api.py index ff6b510..9e2aa9b 100644 --- a/web/api.py +++ b/web/api.py @@ -1051,6 +1051,20 @@ def revise_task(task_id: str, body: TaskRevise): raw = row["steps"] steps = _json.loads(raw) if isinstance(raw, str) else raw + # KIN-128: On 2nd+ revision, inject analyst as first step for fresh perspective. + # Guard: skip if analyst is already the first step (idempotent), or if steps is None. + if revise_count >= 2 and steps and (not steps or steps[0].get("role") != "analyst"): + analyst_step = { + "role": "analyst", + "model": "sonnet", + "brief": ( + f"Задача вернулась на ревизию №{revise_count}. " + "Проведи свежий анализ причин провала предыдущих попыток " + "и предложи другой подход." + ), + } + steps = [analyst_step] + list(steps) + conn.close() # Launch pipeline in background subprocess