diff --git a/agents/prompts/repo_researcher.md b/agents/prompts/repo_researcher.md new file mode 100644 index 0000000..1dcc987 --- /dev/null +++ b/agents/prompts/repo_researcher.md @@ -0,0 +1,107 @@ +You are a Repo Researcher for the Kin multi-agent orchestrator. + +Your job: analyse a repository or codebase — map its structure, tech stack, architecture, strengths, weaknesses, and integration points — and produce a structured research report. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack +- TARGET_REPO: URL or local path to the repository to analyse +- CODEBASE_SCOPE: list of files or directories to focus on (optional; analyse whole repo if absent) +- DECISIONS: known gotchas and workarounds for the project + +## Working Mode + +1. Fetch or read the repository overview: README, package manifests (package.json, pyproject.toml, go.mod, etc.), top-level directory listing +2. Map key components: identify major modules, services, and directories; record each component's path and role +3. Determine the tech stack: languages, frameworks, databases, build tools, infrastructure +4. Identify architectural patterns: monolith vs microservices, sync vs async, data flow, entry points +5. Assess strengths and weaknesses: code quality signals, test coverage indicators, documentation state, known gotchas from DECISIONS +6. Identify integration points: public APIs, event buses, shared databases, external service dependencies +7. If CODEBASE_SCOPE is set, compare scope files against TARGET_REPO findings and note discrepancies + +## Focus On + +- README and manifest files first — they reveal intent and dependencies fastest +- Directory structure — top-level layout reveals architectural decisions +- Entry points — main files, server bootstraps, CLI roots +- Dependency versions — outdated or conflicting deps are common gotchas +- Test directory presence — indicator of code quality discipline +- Read-only analysis — never write or modify any files during research +- WebFetch availability — if repo is remote and WebFetch is unavailable, set status to "partial" + +## Quality Checks + +- `key_components` contains concrete entries — each with name, path, and role; not generic descriptions +- `tech_stack` lists actual package names and versions where detectable +- `gotchas` are specific and surprising — not general programming advice +- `integration_points` cite actual file paths or config entries, not vague "the app uses Redis" +- `status` is `"partial"` when repo access was incomplete or CODEBASE_SCOPE was only partially covered +- No secret values logged — reference by variable name only + +## Return Format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "done", + "repo_overview": "One-paragraph summary of what the repository is, its purpose, and general maturity", + "tech_stack": { + "languages": ["Python 3.11", "TypeScript 5"], + "frameworks": ["FastAPI 0.110", "Vue 3"], + "databases": ["SQLite"], + "infrastructure": ["Docker", "nginx"], + "build_tools": ["Vite", "pip"] + }, + "architecture_summary": "Description of the overall architectural style, data flow, and major design decisions", + "key_components": [ + { + "name": "core/models.py", + "path": "core/models.py", + "role": "All DB access — pure functions over SQLite", + "dependencies": ["core/db.py"] + } + ], + "strengths": [ + "Pure-function data layer with no ORM — easy to test", + "Consistent JSON output schema across all agents" + ], + "weaknesses": [ + "No migration tooling — schema changes require manual ALTER TABLE", + "Test coverage limited to core/ — agents/runner.py under-tested" + ], + "integration_points": [ + "web/api.py exposes REST endpoints consumed by Vue frontend", + "agents/runner.py spawns claude CLI subprocesses — external dependency" + ], + "gotchas": [ + "SQLite WAL mode not enabled — concurrent writes from watcher may conflict", + "specialists.yaml loaded fresh on every pipeline run — no caching" + ], + "notes": "Optional context or follow-up recommendations for the Architect or dev agent" +} +``` + +Valid values for `status`: `"done"`, `"partial"`, `"blocked"`. + +- `"partial"` — analysis completed with limited data; include `"partial_reason": "..."`. +- `"blocked"` — unable to proceed; include `"blocked_reason": "..."`. + +## Constraints + +- Do NOT log or include actual secret values — reference by variable name only +- Do NOT write implementation code — produce research and analysis only +- Do NOT use Bash for write operations — read-only only +- Do NOT set `key_components` to generic descriptions — cite specific path and concrete role +- Do NOT use WebFetch for private or authenticated repositories — set status to "partial" if remote access fails + +## Blocked Protocol + +If you cannot perform the task (no file access, ambiguous requirements, task outside your scope), return this JSON **instead of** the normal output: + +```json +{"status": "blocked", "reason": "", "blocked_at": ""} +``` + +Use current datetime for `blocked_at`. Do NOT guess or partially complete — return blocked immediately. diff --git a/agents/specialists.yaml b/agents/specialists.yaml index 0e1a9cd..f0d15b3 100644 --- a/agents/specialists.yaml +++ b/agents/specialists.yaml @@ -95,7 +95,7 @@ specialists: name: "Tech Researcher" model: sonnet tools: [Read, Grep, Glob, WebFetch, Bash] - description: "Studies external APIs (docs, endpoints, limits, quirks), compares with codebase, produces structured review" + description: "Studies external APIs (docs, endpoints, limits, quirks), compares with codebase, produces structured review. Use for external API research only. For repository/codebase analysis use repo_researcher." permissions: read_only context_rules: decisions: [gotcha, workaround] @@ -111,6 +111,26 @@ specialists: codebase_diff: "array of { file, line_hint, issue, suggestion }" notes: string + repo_researcher: + name: "Repo Researcher" + model: sonnet + tools: [Read, Grep, Glob, WebFetch, Bash] + description: "Analyses repositories and codebases: maps structure, tech stack, architecture, strengths, weaknesses, and integration points. Use for repository/codebase analysis only. For external API research use tech_researcher." + permissions: read_only + context_rules: + decisions: [gotcha, workaround] + output_schema: + status: "done | partial | blocked" + repo_overview: string + tech_stack: "{ languages, frameworks, databases, infrastructure, build_tools }" + architecture_summary: string + key_components: "array of { name, path, role, dependencies }" + strengths: "array of strings" + weaknesses: "array of strings" + integration_points: "array of strings" + gotchas: "array of strings" + notes: string + constitution: name: "Constitution Agent" model: sonnet @@ -273,7 +293,7 @@ specialists: execution_type: department_head department: research tools: [Read, Grep, Glob] - description: "Plans research work, coordinates tech_researcher/architect/prompt_engineer within research department" + description: "Plans research work, coordinates tech_researcher/repo_researcher/architect/prompt_engineer within research department. tech_researcher — for external API research; repo_researcher — for repository/codebase analysis." permissions: read_only context_rules: decisions: all @@ -323,8 +343,8 @@ departments: research: head: research_head - workers: [tech_researcher, architect, prompt_engineer] - description: "Technical research, architecture planning, and prompt engineering" + workers: [tech_researcher, repo_researcher, architect, prompt_engineer] + description: "Technical research (API and codebase), architecture planning, and prompt engineering. tech_researcher — external APIs; repo_researcher — repositories/codebases." marketing: head: marketing_head diff --git a/tests/test_kin_docs_002_regression.py b/tests/test_kin_docs_002_regression.py index df9675a..2023e0c 100644 --- a/tests/test_kin_docs_002_regression.py +++ b/tests/test_kin_docs_002_regression.py @@ -115,11 +115,11 @@ class TestAllPromptsContainStandardStructure: class TestPromptCount: """Проверяет, что число промптов не изменилось неожиданно.""" - def test_prompt_count_is_27(self): - """В agents/prompts/ ровно 27 файлов .md.""" + def test_prompt_count_is_28(self): + """В agents/prompts/ ровно 28 файлов .md.""" count = len(_prompt_files()) - assert count == 27, ( # 27 промптов — актуально на 2026-03-19, +prompt_engineer (KIN-DOCS-005, см. git log agents/prompts/) - f"Ожидалось 27 промптов, найдено {count}. " + assert count == 28, ( # 28 промптов — актуально на 2026-03-19, +repo_researcher (KIN-DOCS-006, см. git log agents/prompts/) + f"Ожидалось 28 промптов, найдено {count}. " "Если добавлен новый промпт — обнови этот тест." ) diff --git a/tests/test_kin_docs_006_regression.py b/tests/test_kin_docs_006_regression.py new file mode 100644 index 0000000..0df62a5 --- /dev/null +++ b/tests/test_kin_docs_006_regression.py @@ -0,0 +1,297 @@ +"""Regression tests for KIN-DOCS-006 — repo_researcher: no API fields in output schema. + +Acceptance criteria: +1. repo_researcher зарегистрирован в specialists.yaml с output_schema без API-полей +2. output_schema repo_researcher НЕ содержит полей: endpoints, rate_limits, auth_method +3. output_schema repo_researcher содержит поля основного отчёта: repo_overview, tech_stack, + architecture_summary, key_components, strengths, weaknesses, integration_points, gotchas, notes +4. agents/prompts/repo_researcher.md существует и содержит все 5 стандартных секций +5. Промпт repo_researcher НЕ содержит API-полей в Return Format +6. repo_researcher доступен в departments.research.workers +7. Регрессия: tech_researcher по-прежнему содержит свои API-поля (не сломан) +""" + +from pathlib import Path + +import pytest +import yaml + + +SPECIALISTS_YAML = Path(__file__).parent.parent / "agents" / "specialists.yaml" +PROMPTS_DIR = Path(__file__).parent.parent / "agents" / "prompts" +REPO_RESEARCHER_PROMPT = PROMPTS_DIR / "repo_researcher.md" + +REQUIRED_SECTIONS = [ + "## Working Mode", + "## Focus On", + "## Quality Checks", + "## Return Format", + "## Constraints", +] + +# Поля, которые НЕ должны присутствовать в repo_researcher (API-специфичные поля) +API_FIELDS_FORBIDDEN = {"endpoints", "rate_limits", "auth_method"} + +# Поля, которые ОБЯЗАНЫ присутствовать в repo_researcher output_schema +REPO_RESEARCHER_REQUIRED_SCHEMA_FIELDS = { + "status", + "repo_overview", + "tech_stack", + "architecture_summary", + "key_components", + "strengths", + "weaknesses", + "integration_points", + "gotchas", + "notes", +} + +# API-поля, которые обязаны остаться в tech_researcher (регрессия) +TECH_RESEARCHER_API_FIELDS = {"endpoints", "rate_limits", "auth_method"} + + +def _load_yaml(): + return yaml.safe_load(SPECIALISTS_YAML.read_text(encoding="utf-8")) + + +# =========================================================================== +# 1. repo_researcher — YAML schema: отсутствие API-полей +# =========================================================================== + +class TestRepoResearcherOutputSchemaNoApiFields: + """output_schema repo_researcher НЕ должна содержать API-специфичные поля.""" + + @pytest.mark.parametrize("forbidden_field", sorted(API_FIELDS_FORBIDDEN)) + def test_output_schema_does_not_contain_api_field(self, forbidden_field): + """output_schema repo_researcher не содержит API-поле.""" + data = _load_yaml() + schema = data["specialists"]["repo_researcher"]["output_schema"] + assert forbidden_field not in schema, ( + f"output_schema repo_researcher не должна содержать поле {forbidden_field!r} — " + "это API-специфичное поле, принадлежащее tech_researcher" + ) + + +# =========================================================================== +# 2. repo_researcher — YAML schema: наличие полей отчёта +# =========================================================================== + +class TestRepoResearcherOutputSchemaRequiredFields: + """output_schema repo_researcher ДОЛЖНА содержать все поля основного отчёта.""" + + @pytest.mark.parametrize("required_field", sorted(REPO_RESEARCHER_REQUIRED_SCHEMA_FIELDS)) + def test_output_schema_contains_required_field(self, required_field): + """output_schema repo_researcher содержит обязательное поле отчёта.""" + data = _load_yaml() + schema = data["specialists"]["repo_researcher"]["output_schema"] + assert required_field in schema, ( + f"output_schema repo_researcher обязана содержать поле {required_field!r}" + ) + + +# =========================================================================== +# 3. repo_researcher — регистрация и базовая структура в specialists.yaml +# =========================================================================== + +class TestRepoResearcherSpecialistsEntry: + """repo_researcher зарегистрирован в specialists.yaml с корректной базовой структурой.""" + + def test_repo_researcher_exists_in_specialists(self): + """repo_researcher присутствует в секции specialists.""" + data = _load_yaml() + assert "repo_researcher" in data.get("specialists", {}), ( + "repo_researcher отсутствует в specialists.yaml" + ) + + def test_repo_researcher_model_is_sonnet(self): + """repo_researcher использует модель sonnet.""" + data = _load_yaml() + role = data["specialists"]["repo_researcher"] + assert role.get("model") == "sonnet", ( + f"Ожидался model=sonnet, получили: {role.get('model')}" + ) + + def test_repo_researcher_permissions_is_read_only(self): + """repo_researcher имеет permissions=read_only.""" + data = _load_yaml() + role = data["specialists"]["repo_researcher"] + assert role.get("permissions") == "read_only", ( + f"Ожидался permissions=read_only, получили: {role.get('permissions')}" + ) + + def test_repo_researcher_tools_include_read_grep_glob(self): + """repo_researcher имеет инструменты Read, Grep, Glob.""" + data = _load_yaml() + tools = data["specialists"]["repo_researcher"].get("tools", []) + for tool in ("Read", "Grep", "Glob"): + assert tool in tools, f"repo_researcher должен иметь инструмент {tool!r}" + + def test_repo_researcher_has_output_schema(self): + """repo_researcher имеет поле output_schema.""" + data = _load_yaml() + role = data["specialists"]["repo_researcher"] + assert "output_schema" in role, "repo_researcher должен иметь output_schema" + + +# =========================================================================== +# 4. repo_researcher промпт — существование и структура +# =========================================================================== + +class TestRepoResearcherPromptStructure: + """agents/prompts/repo_researcher.md существует и содержит все 5 стандартных секций.""" + + def test_prompt_file_exists(self): + """Файл agents/prompts/repo_researcher.md существует.""" + assert REPO_RESEARCHER_PROMPT.exists(), ( + f"Промпт repo_researcher не найден: {REPO_RESEARCHER_PROMPT}" + ) + + def test_prompt_file_is_not_empty(self): + """Файл repo_researcher.md не пустой.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert len(content.strip()) > 100 + + @pytest.mark.parametrize("section", REQUIRED_SECTIONS) + def test_prompt_has_required_section(self, section): + """Промпт repo_researcher.md содержит каждую из 5 стандартных секций.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert section in content, ( + f"repo_researcher.md не содержит обязательную секцию {section!r}" + ) + + def test_prompt_sections_in_correct_order(self): + """5 обязательных секций расположены в правильном порядке.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + positions = [content.find(sec) for sec in REQUIRED_SECTIONS] + assert all(p != -1 for p in positions), "Не все 5 секций найдены в repo_researcher.md" + assert positions == sorted(positions), ( + f"Секции в repo_researcher.md расположены не по порядку. " + f"Позиции: {dict(zip(REQUIRED_SECTIONS, positions))}" + ) + + def test_prompt_has_input_section(self): + """Промпт repo_researcher.md содержит секцию ## Input.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert "## Input" in content, "repo_researcher.md не содержит секцию '## Input'" + + def test_prompt_contains_blocked_protocol(self): + """Промпт repo_researcher.md содержит Blocked Protocol.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert "blocked_reason" in content, ( + "repo_researcher.md не содержит 'blocked_reason' — Blocked Protocol обязателен" + ) + + +# =========================================================================== +# 5. repo_researcher промпт — отсутствие API-полей +# =========================================================================== + +class TestRepoResearcherPromptNoApiFields: + """Промпт repo_researcher.md не упоминает API-специфичные поля в Return Format.""" + + def test_prompt_does_not_define_endpoints_field(self): + """Промпт repo_researcher.md не содержит поля 'endpoints' в выходной схеме.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + # endpoints может встречаться только в integration_points контексте, + # но не как самостоятельное JSON-поле вида "endpoints": + assert '"endpoints"' not in content, ( + "repo_researcher.md не должен определять JSON-поле 'endpoints' — " + "это API-специфичное поле tech_researcher" + ) + + def test_prompt_does_not_define_rate_limits_field(self): + """Промпт repo_researcher.md не содержит поля 'rate_limits' в выходной схеме.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert '"rate_limits"' not in content, ( + "repo_researcher.md не должен определять JSON-поле 'rate_limits' — " + "это API-специфичное поле tech_researcher" + ) + + def test_prompt_does_not_define_auth_method_field(self): + """Промпт repo_researcher.md не содержит поля 'auth_method' в выходной схеме.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert '"auth_method"' not in content, ( + "repo_researcher.md не должен определять JSON-поле 'auth_method' — " + "это API-специфичное поле tech_researcher" + ) + + def test_prompt_defines_repo_overview_field(self): + """Промпт repo_researcher.md определяет поле 'repo_overview'.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert "repo_overview" in content, ( + "repo_researcher.md должен определять поле 'repo_overview'" + ) + + def test_prompt_defines_architecture_summary_field(self): + """Промпт repo_researcher.md определяет поле 'architecture_summary'.""" + content = REPO_RESEARCHER_PROMPT.read_text(encoding="utf-8") + assert "architecture_summary" in content, ( + "repo_researcher.md должен определять поле 'architecture_summary'" + ) + + +# =========================================================================== +# 6. repo_researcher в departments.research +# =========================================================================== + +class TestRepoResearcherInResearchDepartment: + """repo_researcher доступен в departments.research.workers.""" + + def test_repo_researcher_in_research_workers(self): + """repo_researcher присутствует в departments.research.workers.""" + data = _load_yaml() + workers = data["departments"]["research"].get("workers", []) + assert "repo_researcher" in workers, ( + f"repo_researcher должен быть в departments.research.workers. " + f"Текущие workers: {workers}" + ) + + def test_research_head_describes_repo_researcher(self): + """research_head description упоминает repo_researcher.""" + data = _load_yaml() + description = data["specialists"]["research_head"].get("description", "") + assert "repo_researcher" in description, ( + "research_head description должен упоминать repo_researcher" + ) + + def test_tech_researcher_still_in_research_workers(self): + """Регрессия: tech_researcher по-прежнему в departments.research.workers.""" + data = _load_yaml() + workers = data["departments"]["research"].get("workers", []) + assert "tech_researcher" in workers, ( + "Регрессия: tech_researcher пропал из departments.research.workers" + ) + + +# =========================================================================== +# 7. Регрессия: tech_researcher не сломан (API-поля на месте) +# =========================================================================== + +class TestTechResearcherApiFieldsRegression: + """Регрессия: tech_researcher по-прежнему содержит API-специфичные поля.""" + + @pytest.mark.parametrize("api_field", sorted(TECH_RESEARCHER_API_FIELDS)) + def test_tech_researcher_output_schema_still_has_api_field(self, api_field): + """tech_researcher output_schema по-прежнему содержит API-поле (регрессия).""" + data = _load_yaml() + schema = data["specialists"]["tech_researcher"]["output_schema"] + assert api_field in schema, ( + f"Регрессия: tech_researcher output_schema потеряла поле {api_field!r}" + ) + + def test_tech_researcher_description_mentions_external_api(self): + """tech_researcher description явно указывает назначение: внешние API.""" + data = _load_yaml() + description = data["specialists"]["tech_researcher"].get("description", "").lower() + assert "external" in description or "api" in description, ( + "tech_researcher description должен упоминать внешние API" + ) + + def test_tech_researcher_description_mentions_repo_researcher(self): + """tech_researcher description упоминает repo_researcher для кодовых баз.""" + data = _load_yaml() + description = data["specialists"]["tech_researcher"].get("description", "") + assert "repo_researcher" in description, ( + "tech_researcher description должен упоминать repo_researcher " + "как альтернативу для анализа репозиториев" + )