diff --git a/agents/prompts/prompt_engineer.md b/agents/prompts/prompt_engineer.md new file mode 100644 index 0000000..20cf5d6 --- /dev/null +++ b/agents/prompts/prompt_engineer.md @@ -0,0 +1,104 @@ +You are a Prompt Engineer for the Kin multi-agent orchestrator. + +Your job: design and optimise prompts for LLM agents, evaluate output quality, and recommend the right model for each role. + +## Input + +You receive: +- PROJECT: id, name, path, tech stack +- TASK: id, title, brief describing which prompt or agent role to work on +- DECISIONS: known conventions and gotchas for this project +- PREVIOUS STEP OUTPUT: output from a prior agent in the pipeline (if any) + +## Working Mode + +1. Read existing prompt files in `agents/prompts/` and `agents/specialists.yaml` before making any recommendations +2. Understand the role's purpose, input/output contract, and current weaknesses +3. Design or improve the prompt structure: task framing, step ordering, quality gates, output schema +4. Evaluate LLM output quality against defined criteria (clarity, adherence, format compliance, completeness) +5. Recommend the most appropriate model for the role based on complexity and cost constraints +6. Produce a structured report — do NOT write code or modify files + +## Focus On + +- Prompt clarity — instructions must be unambiguous and consistently followed by the LLM +- Output schema alignment — prompt return format must match the `output_schema` in `specialists.yaml` +- Step ordering — working mode steps should mirror the agent's actual execution order +- Model fit — Opus for complex multi-step reasoning; Sonnet for standard tasks; Haiku for lightweight parsing +- Blocked Protocol presence — every prompt must include the blocked_reason escape hatch +- Standard 5-section structure: `## Working Mode`, `## Focus On`, `## Quality Checks`, `## Return Format`, `## Constraints` + +## Quality Checks + +- `prompt_design` contains concrete, actionable instructions — not vague guidelines +- `quality_evaluation` criteria are specific and measurable, not generic (e.g. "format compliance", not "good output") +- `model_recommendation.rationale` cites task complexity, token budget, or specific capability requirement +- Every finding in `quality_evaluation` has a corresponding suggestion +- No implementation code in the output — recommendations and analysis only + +## Return Format + +Return ONLY valid JSON (no markdown, no explanation): + +```json +{ + "status": "done", + "prompt_design": { + "objective": "One-sentence statement of what the agent must accomplish", + "prompt_structure": "Description of recommended section layout and ordering", + "key_instructions": [ + "Instruction 1 that must be present in the prompt", + "Instruction 2..." + ], + "examples": [ + "Example input/output pair or illustrative scenario (optional)" + ] + }, + "quality_evaluation": { + "criteria": [ + { + "metric": "Format compliance", + "score": 4, + "rationale": "Agent consistently returns valid JSON but occasionally omits optional fields" + } + ], + "overall_score": 4, + "findings": [ + "Finding 1 with concrete suggestion for improvement" + ] + }, + "model_recommendation": { + "recommended_model": "sonnet", + "rationale": "Task requires structured analysis but not multi-step deep reasoning — Sonnet is sufficient", + "alternatives": [ + { + "model": "opus", + "tradeoffs": "Better at nuanced edge cases but 5x cost — only justified for architectural decisions" + } + ] + }, + "notes": "Optional follow-up recommendations or open questions" +} +``` + +Valid values for `status`: `"done"`, `"partial"`, `"blocked"`. + +- `"partial"` — analysis completed with limited data; include `"partial_reason": "..."`. +- `"blocked"` — unable to proceed; include `"blocked_reason": "..."`. + +## Constraints + +- Do NOT write or modify prompt files — produce analysis and recommendations only +- Do NOT implement code — produce structured reports only +- Do NOT evaluate prompts without reading them first +- Do NOT recommend model changes without citing a concrete reason tied to task complexity or cost + +## Blocked Protocol + +If you cannot perform the task (no file access, ambiguous requirements, task outside your scope), return this JSON **instead of** the normal output: + +```json +{"status": "blocked", "reason": "", "blocked_at": ""} +``` + +Use current datetime for `blocked_at`. Do NOT guess or partially complete — return blocked immediately. diff --git a/agents/specialists.yaml b/agents/specialists.yaml index 6bef8bf..0e1a9cd 100644 --- a/agents/specialists.yaml +++ b/agents/specialists.yaml @@ -237,6 +237,21 @@ specialists: output_schema: context_packet: "{ architecture_notes: string, key_files: array, constraints: array, unknowns: array, handoff_for: string }" + prompt_engineer: + name: "Prompt Engineer" + model: sonnet + tools: [Read, Grep, Glob] + description: "Designs and optimises prompts for LLM agents, evaluates output quality, recommends model selection" + permissions: read_only + context_rules: + decisions: all + output_schema: + status: "done | partial | blocked" + prompt_design: "{ objective: string, prompt_structure: string, key_instructions: array, examples: array }" + quality_evaluation: "{ criteria: array of { metric, score: 1-5, rationale }, overall_score: 1-5, findings: array }" + model_recommendation: "{ recommended_model: string, rationale: string, alternatives: array of { model, tradeoffs } }" + notes: string + knowledge_synthesizer: name: "Knowledge Synthesizer" model: sonnet @@ -258,7 +273,7 @@ specialists: execution_type: department_head department: research tools: [Read, Grep, Glob] - description: "Plans research work, coordinates tech_researcher/architect within research department" + description: "Plans research work, coordinates tech_researcher/architect/prompt_engineer within research department" permissions: read_only context_rules: decisions: all @@ -308,8 +323,8 @@ departments: research: head: research_head - workers: [tech_researcher, architect] - description: "Technical research and architecture planning" + workers: [tech_researcher, architect, prompt_engineer] + description: "Technical research, architecture planning, and prompt engineering" marketing: head: marketing_head diff --git a/tests/test_kin_docs_002_regression.py b/tests/test_kin_docs_002_regression.py index 7f8124a..df9675a 100644 --- a/tests/test_kin_docs_002_regression.py +++ b/tests/test_kin_docs_002_regression.py @@ -115,11 +115,11 @@ class TestAllPromptsContainStandardStructure: class TestPromptCount: """Проверяет, что число промптов не изменилось неожиданно.""" - def test_prompt_count_is_26(self): - """В agents/prompts/ ровно 26 файлов .md.""" + def test_prompt_count_is_27(self): + """В agents/prompts/ ровно 27 файлов .md.""" count = len(_prompt_files()) - assert count == 26, ( # 26 промптов — актуально на 2026-03-19, +knowledge_synthesizer (KIN-DOCS-003, см. git log agents/prompts/) - f"Ожидалось 26 промптов, найдено {count}. " + assert count == 27, ( # 27 промптов — актуально на 2026-03-19, +prompt_engineer (KIN-DOCS-005, см. git log agents/prompts/) + f"Ожидалось 27 промптов, найдено {count}. " "Если добавлен новый промпт — обнови этот тест." ) diff --git a/tests/test_kin_docs_005_regression.py b/tests/test_kin_docs_005_regression.py new file mode 100644 index 0000000..bbda0cf --- /dev/null +++ b/tests/test_kin_docs_005_regression.py @@ -0,0 +1,259 @@ +"""Regression tests for KIN-DOCS-005 — prompt_engineer role for AI projects. + +Acceptance criteria: +1. specialists.yaml парсится без ошибок; роль prompt_engineer содержит все обязательные поля +2. agents/prompts/prompt_engineer.md содержит ровно 5 обязательных секций в правильном порядке (#940) +3. Роль prompt_engineer доступна в research department (departments.research.workers) +4. Регрессионный тест на наличие роли в списке specialists +""" + +from pathlib import Path + +import pytest +import yaml + +SPECIALISTS_YAML = Path(__file__).parent.parent / "agents" / "specialists.yaml" +PROMPTS_DIR = Path(__file__).parent.parent / "agents" / "prompts" + +REQUIRED_SECTIONS = [ + "## Working Mode", + "## Focus On", + "## Quality Checks", + "## Return Format", + "## Constraints", +] + +OUTPUT_SCHEMA_FIELDS = [ + "status", + "prompt_design", + "quality_evaluation", + "model_recommendation", + "notes", +] + + +def _load_yaml(): + return yaml.safe_load(SPECIALISTS_YAML.read_text(encoding="utf-8")) + + +# =========================================================================== +# 1. Структурный тест agents/specialists.yaml — роль prompt_engineer +# =========================================================================== + +class TestPromptEngineerSpecialists: + """Тесты регистрации prompt_engineer в agents/specialists.yaml.""" + + def test_role_exists_in_specialists(self): + """specialists.yaml содержит роль prompt_engineer.""" + data = _load_yaml() + assert "prompt_engineer" in data.get("specialists", {}), ( + "prompt_engineer отсутствует в specialists.yaml" + ) + + def test_role_model_is_sonnet(self): + """prompt_engineer использует модель sonnet.""" + data = _load_yaml() + role = data["specialists"]["prompt_engineer"] + assert role.get("model") == "sonnet", ( + f"Ожидался model=sonnet, получили: {role.get('model')}" + ) + + def test_role_tools_include_read_grep_glob(self): + """prompt_engineer имеет инструменты Read, Grep, Glob.""" + data = _load_yaml() + tools = data["specialists"]["prompt_engineer"].get("tools", []) + for required_tool in ("Read", "Grep", "Glob"): + assert required_tool in tools, ( + f"prompt_engineer должен иметь инструмент {required_tool!r}" + ) + + def test_role_has_no_write_tools(self): + """prompt_engineer НЕ имеет write-инструментов (read-only роль).""" + data = _load_yaml() + tools = set(data["specialists"]["prompt_engineer"].get("tools", [])) + write_tools = {"Write", "Edit", "Bash"} + unexpected = write_tools & tools + assert not unexpected, ( + f"prompt_engineer не должен иметь write-инструменты: {unexpected}" + ) + + def test_role_permissions_is_read_only(self): + """prompt_engineer имеет permissions=read_only.""" + data = _load_yaml() + role = data["specialists"]["prompt_engineer"] + assert role.get("permissions") == "read_only", ( + f"Ожидался permissions=read_only, получили: {role.get('permissions')}" + ) + + def test_role_has_output_schema(self): + """prompt_engineer имеет поле output_schema.""" + data = _load_yaml() + role = data["specialists"]["prompt_engineer"] + assert "output_schema" in role, ( + "prompt_engineer должен иметь output_schema" + ) + + @pytest.mark.parametrize("field", OUTPUT_SCHEMA_FIELDS) + def test_output_schema_has_required_field(self, field): + """output_schema содержит каждое из обязательных полей.""" + data = _load_yaml() + schema = data["specialists"]["prompt_engineer"]["output_schema"] + assert field in schema, ( + f"output_schema prompt_engineer не содержит обязательного поля {field!r}" + ) + + def test_yaml_parses_without_error(self): + """specialists.yaml парсится без ошибок (yaml.safe_load не бросает исключений).""" + data = _load_yaml() + assert isinstance(data, dict), "specialists.yaml не вернул dict при парсинге" + assert "specialists" in data, "specialists.yaml не содержит секцию 'specialists'" + + def test_role_context_rules_decisions_all(self): + """prompt_engineer получает все decisions (context_rules.decisions=all).""" + data = _load_yaml() + role = data["specialists"]["prompt_engineer"] + decisions = role.get("context_rules", {}).get("decisions") + assert decisions == "all", ( + f"Ожидался context_rules.decisions=all, получили: {decisions}" + ) + + +# =========================================================================== +# 2. Структурный тест agents/prompts/prompt_engineer.md +# =========================================================================== + +class TestPromptEngineerPrompt: + """Структурный тест agents/prompts/prompt_engineer.md (#940).""" + + def test_prompt_file_exists(self): + """Файл agents/prompts/prompt_engineer.md существует.""" + path = PROMPTS_DIR / "prompt_engineer.md" + assert path.exists(), "prompt_engineer.md не найден в agents/prompts/" + + @pytest.mark.parametrize("section", REQUIRED_SECTIONS) + def test_prompt_has_required_section(self, section): + """Промпт содержит все 5 обязательных секций (REQUIRED_SECTIONS).""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + assert section in content, ( + f"prompt_engineer.md не содержит обязательную секцию {section!r}" + ) + + def test_prompt_sections_in_correct_order(self): + """5 обязательных секций расположены в правильном порядке в prompt_engineer.md.""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + positions = [content.find(sec) for sec in REQUIRED_SECTIONS] + assert all(p != -1 for p in positions), ( + "Не все 5 секций найдены в prompt_engineer.md" + ) + assert positions == sorted(positions), ( + f"Секции расположены не по порядку. Позиции: " + f"{dict(zip(REQUIRED_SECTIONS, positions))}" + ) + + def test_prompt_has_input_section(self): + """Промпт содержит секцию ## Input — агент-специфичная секция.""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + assert "## Input" in content, ( + "prompt_engineer.md не содержит секцию '## Input'" + ) + + def test_prompt_contains_blocked_protocol(self): + """Промпт содержит Blocked Protocol с инструкцией blocked_reason.""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + assert "blocked_reason" in content, ( + "prompt_engineer.md не содержит 'blocked_reason' — Blocked Protocol обязателен" + ) + + def test_prompt_no_legacy_output_format_header(self): + """Промпт НЕ содержит устаревшей секции '## Output format'.""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + assert "## Output format" not in content, ( + "prompt_engineer.md содержит устаревшую секцию '## Output format'" + ) + + def test_prompt_contains_prompt_design_field(self): + """Промпт упоминает поле prompt_design в Return Format.""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + assert "prompt_design" in content, ( + "prompt_engineer.md не содержит поля 'prompt_design'" + ) + + def test_prompt_contains_quality_evaluation_field(self): + """Промпт упоминает поле quality_evaluation в Return Format.""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + assert "quality_evaluation" in content, ( + "prompt_engineer.md не содержит поля 'quality_evaluation'" + ) + + def test_prompt_contains_model_recommendation_field(self): + """Промпт упоминает поле model_recommendation в Return Format.""" + content = (PROMPTS_DIR / "prompt_engineer.md").read_text(encoding="utf-8") + assert "model_recommendation" in content, ( + "prompt_engineer.md не содержит поля 'model_recommendation'" + ) + + +# =========================================================================== +# 3. Роль доступна в research department +# =========================================================================== + +class TestPromptEngineerInResearchDepartment: + """Тесты доступности prompt_engineer в departments.research.""" + + def test_research_department_exists(self): + """departments.research существует в specialists.yaml.""" + data = _load_yaml() + assert "research" in data.get("departments", {}), ( + "departments.research отсутствует в specialists.yaml" + ) + + def test_prompt_engineer_in_research_workers(self): + """prompt_engineer присутствует в departments.research.workers.""" + data = _load_yaml() + workers = data["departments"]["research"].get("workers", []) + assert "prompt_engineer" in workers, ( + f"prompt_engineer должен быть в departments.research.workers. " + f"Текущие workers: {workers}" + ) + + def test_research_head_describes_prompt_engineer(self): + """research_head description упоминает prompt_engineer.""" + data = _load_yaml() + description = data["specialists"]["research_head"].get("description", "") + assert "prompt_engineer" in description, ( + "research_head description должен упоминать prompt_engineer" + ) + + def test_research_workers_include_tech_researcher_and_architect(self): + """departments.research.workers по-прежнему содержит tech_researcher и architect (регрессия).""" + data = _load_yaml() + workers = data["departments"]["research"].get("workers", []) + for existing_role in ("tech_researcher", "architect"): + assert existing_role in workers, ( + f"Регрессия: {existing_role!r} пропал из departments.research.workers" + ) + + +# =========================================================================== +# 4. Регрессионный тест: наличие роли в списке specialists +# =========================================================================== + +class TestPromptEngineerRoleRegistration: + """Регрессионный тест: prompt_engineer зарегистрирован в specialists.""" + + def test_prompt_engineer_in_specialists_list(self): + """prompt_engineer присутствует в секции specialists файла specialists.yaml.""" + data = _load_yaml() + specialist_roles = list(data.get("specialists", {}).keys()) + assert "prompt_engineer" in specialist_roles, ( + f"prompt_engineer отсутствует в списке specialists. " + f"Текущие роли: {specialist_roles}" + ) + + def test_prompt_engineer_not_in_exclusion_list(self): + """prompt_engineer.md не включён в EXCLUDED_FROM_STRUCTURE_CHECK.""" + from tests.test_kin_docs_002_regression import EXCLUDED_FROM_STRUCTURE_CHECK + assert "prompt_engineer.md" not in EXCLUDED_FROM_STRUCTURE_CHECK, ( + "prompt_engineer.md не должен быть в EXCLUDED_FROM_STRUCTURE_CHECK — " + "роль должна проходить все стандартные структурные проверки" + )