kin/tests/test_kin_docs_001_regression.py
2026-03-19 13:52:36 +02:00

401 lines
19 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Regression tests for KIN-DOCS-001 — Constitutional Validator gate.
Covers:
1. specialists.yaml: constitutional_validator роль с корректным frontmatter
(model=sonnet, gate=true, output_schema с 4 вердиктами)
2. Маршрут 'feature': constitutional_validator после architect, перед frontend_dev
3. Маршрут 'spec_driven': constitutional_validator после architect, перед task_decomposer
4. agents/prompts/constitutional_validator.md существует и содержит все 4 вердикта
5. runner.py: changes_required → pipeline blocked; escalated → pipeline blocked;
approved → pipeline continues
"""
import json
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
import yaml
from core.db import init_db
from core import models
from agents.runner import run_pipeline
SPECIALISTS_YAML = Path(__file__).parent.parent / "agents" / "specialists.yaml"
PROMPTS_DIR = Path(__file__).parent.parent / "agents" / "prompts"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _load_yaml():
return yaml.safe_load(SPECIALISTS_YAML.read_text(encoding="utf-8"))
def _mock_success(output_data):
m = MagicMock()
m.stdout = json.dumps(output_data) if isinstance(output_data, dict) else output_data
m.stderr = ""
m.returncode = 0
return m
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def conn():
c = init_db(":memory:")
models.create_project(c, "p1", "TestProject", "/p1", tech_stack=["python"])
models.create_task(c, "P1-001", "p1", "Feature task", brief={"route_type": "feature"})
yield c
c.close()
# ===========================================================================
# 1. specialists.yaml — frontmatter конфигурация
# ===========================================================================
class TestConstitutionalValidatorSpecialists:
"""Проверяет наличие и корректность роли constitutional_validator в specialists.yaml."""
def test_role_exists_in_specialists(self):
"""specialists.yaml содержит роль constitutional_validator."""
data = _load_yaml()
assert "constitutional_validator" in data.get("specialists", {}), (
"constitutional_validator отсутствует в specialists.yaml"
)
def test_role_model_is_sonnet(self):
"""constitutional_validator использует модель sonnet."""
data = _load_yaml()
role = data["specialists"]["constitutional_validator"]
assert role.get("model") == "sonnet", (
f"Ожидался model=sonnet, получили: {role.get('model')}"
)
def test_role_has_gate_true(self):
"""constitutional_validator помечен как gate=true."""
data = _load_yaml()
role = data["specialists"]["constitutional_validator"]
assert role.get("gate") is True, (
"constitutional_validator должен иметь gate: true"
)
def test_role_has_output_schema(self):
"""constitutional_validator имеет поле output_schema."""
data = _load_yaml()
role = data["specialists"]["constitutional_validator"]
assert "output_schema" in role, "output_schema должен быть в constitutional_validator"
def test_output_schema_contains_verdict_field(self):
"""output_schema содержит поле verdict."""
data = _load_yaml()
schema = data["specialists"]["constitutional_validator"]["output_schema"]
assert "verdict" in schema, "output_schema должен содержать поле verdict"
def test_output_schema_verdict_has_all_four_verdicts(self):
"""output_schema.verdict содержит все 4 вердикта."""
data = _load_yaml()
verdict_str = str(data["specialists"]["constitutional_validator"]["output_schema"]["verdict"])
for verdict in ("approved", "changes_required", "escalated", "blocked"):
assert verdict in verdict_str, (
f"output_schema.verdict должен содержать '{verdict}'"
)
def test_role_tools_are_read_only(self):
"""constitutional_validator имеет только read-only инструменты (Read, Grep, Glob)."""
data = _load_yaml()
role = data["specialists"]["constitutional_validator"]
tools = role.get("tools", [])
write_tools = {"Write", "Edit", "Bash"}
unexpected = write_tools & set(tools)
assert not unexpected, (
f"constitutional_validator не должен иметь write-инструменты: {unexpected}"
)
# ===========================================================================
# 2. Маршрут 'feature'
# ===========================================================================
class TestFeatureRouteConstitutionalValidator:
"""Проверяет позицию constitutional_validator в маршруте 'feature'."""
def test_feature_route_exists(self):
"""Маршрут 'feature' существует в routes."""
data = _load_yaml()
assert "feature" in data.get("routes", {}), "Маршрут 'feature' не найден"
def test_feature_route_contains_constitutional_validator(self):
"""Маршрут 'feature' содержит шаг constitutional_validator."""
data = _load_yaml()
steps = data["routes"]["feature"]["steps"]
assert "constitutional_validator" in steps, (
f"constitutional_validator отсутствует в feature route. Шаги: {steps}"
)
def test_feature_route_cv_comes_after_architect(self):
"""В маршруте 'feature' constitutional_validator стоит после architect."""
data = _load_yaml()
steps = data["routes"]["feature"]["steps"]
assert "architect" in steps, "architect отсутствует в feature route"
idx_arch = steps.index("architect")
idx_cv = steps.index("constitutional_validator")
assert idx_cv > idx_arch, (
f"constitutional_validator (pos={idx_cv}) должен идти ПОСЛЕ architect (pos={idx_arch})"
)
def test_feature_route_cv_comes_before_dev_step(self):
"""В маршруте 'feature' constitutional_validator стоит перед dev-шагом."""
data = _load_yaml()
steps = data["routes"]["feature"]["steps"]
idx_cv = steps.index("constitutional_validator")
# Dev-шаги: frontend_dev или backend_dev
dev_roles = {"frontend_dev", "backend_dev"}
dev_positions = [steps.index(r) for r in dev_roles if r in steps]
assert dev_positions, "В feature route должен быть хотя бы один dev-шаг"
first_dev_pos = min(dev_positions)
assert idx_cv < first_dev_pos, (
f"constitutional_validator (pos={idx_cv}) должен идти ПЕРЕД dev-шагом (pos={first_dev_pos})"
)
# ===========================================================================
# 3. Маршрут 'spec_driven'
# ===========================================================================
class TestSpecDrivenRouteConstitutionalValidator:
"""Проверяет позицию constitutional_validator в маршруте 'spec_driven'."""
def test_spec_driven_route_exists(self):
"""Маршрут 'spec_driven' существует в routes."""
data = _load_yaml()
assert "spec_driven" in data.get("routes", {}), "Маршрут 'spec_driven' не найден"
def test_spec_driven_contains_constitutional_validator(self):
"""Маршрут 'spec_driven' содержит шаг constitutional_validator."""
data = _load_yaml()
steps = data["routes"]["spec_driven"]["steps"]
assert "constitutional_validator" in steps, (
f"constitutional_validator отсутствует в spec_driven route. Шаги: {steps}"
)
def test_spec_driven_cv_comes_after_architect(self):
"""В маршруте 'spec_driven' constitutional_validator стоит после architect."""
data = _load_yaml()
steps = data["routes"]["spec_driven"]["steps"]
assert "architect" in steps, "architect отсутствует в spec_driven route"
idx_arch = steps.index("architect")
idx_cv = steps.index("constitutional_validator")
assert idx_cv > idx_arch, (
f"constitutional_validator (pos={idx_cv}) должен идти ПОСЛЕ architect (pos={idx_arch})"
)
def test_spec_driven_cv_comes_before_task_decomposer(self):
"""В маршруте 'spec_driven' constitutional_validator стоит перед task_decomposer."""
data = _load_yaml()
steps = data["routes"]["spec_driven"]["steps"]
assert "task_decomposer" in steps, "task_decomposer отсутствует в spec_driven route"
idx_cv = steps.index("constitutional_validator")
idx_td = steps.index("task_decomposer")
assert idx_cv < idx_td, (
f"constitutional_validator (pos={idx_cv}) должен идти ПЕРЕД task_decomposer (pos={idx_td})"
)
# ===========================================================================
# 4. Промпт агента
# ===========================================================================
class TestConstitutionalValidatorPrompt:
"""Проверяет файл agents/prompts/constitutional_validator.md."""
def test_prompt_file_exists(self):
"""Файл agents/prompts/constitutional_validator.md существует."""
path = PROMPTS_DIR / "constitutional_validator.md"
assert path.exists(), "constitutional_validator.md не найден в agents/prompts/"
def test_prompt_contains_approved_verdict(self):
"""Промпт содержит вердикт 'approved'."""
content = (PROMPTS_DIR / "constitutional_validator.md").read_text(encoding="utf-8")
assert "approved" in content, "Промпт не содержит вердикт 'approved'"
def test_prompt_contains_changes_required_verdict(self):
"""Промпт содержит вердикт 'changes_required'."""
content = (PROMPTS_DIR / "constitutional_validator.md").read_text(encoding="utf-8")
assert "changes_required" in content, "Промпт не содержит вердикт 'changes_required'"
def test_prompt_contains_escalated_verdict(self):
"""Промпт содержит вердикт 'escalated'."""
content = (PROMPTS_DIR / "constitutional_validator.md").read_text(encoding="utf-8")
assert "escalated" in content, "Промпт не содержит вердикт 'escalated'"
def test_prompt_contains_blocked_verdict(self):
"""Промпт содержит вердикт 'blocked'."""
content = (PROMPTS_DIR / "constitutional_validator.md").read_text(encoding="utf-8")
assert "blocked" in content, "Промпт не содержит вердикт 'blocked'"
def test_prompt_has_two_output_sections(self):
"""Промпт содержит оба раздела вывода: ## Verdict и ## Details."""
content = (PROMPTS_DIR / "constitutional_validator.md").read_text(encoding="utf-8")
assert "## Verdict" in content, "Промпт не содержит раздел '## Verdict'"
assert "## Details" in content, "Промпт не содержит раздел '## Details'"
# ===========================================================================
# 5. Runner gate-handler
# ===========================================================================
class TestConstitutionalValidatorGate:
"""Тесты gate-обработчика constitutional_validator в runner.py."""
@patch("agents.runner.subprocess.run")
def test_changes_required_blocks_pipeline(self, mock_run, conn):
"""verdict=changes_required → pipeline останавливается, задача получает статус blocked."""
cv_output = {
"verdict": "changes_required",
"target_role": "architect",
"violations": [
{
"principle": "Simplicity over cleverness",
"severity": "high",
"description": "Предложено использование Redis для 50 записей",
"suggestion": "Использовать SQLite",
}
],
"summary": "Нарушение принципа минимальной сложности.",
}
mock_run.return_value = _mock_success(cv_output)
steps = [{"role": "constitutional_validator", "model": "sonnet"}]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is False
assert result.get("blocked_by") == "constitutional_validator"
assert "changes_required" in (result.get("blocked_reason") or "")
task = models.get_task(conn, "P1-001")
assert task["status"] == "blocked"
assert task.get("blocked_agent_role") == "constitutional_validator"
@patch("agents.runner.subprocess.run")
def test_escalated_blocks_pipeline(self, mock_run, conn):
"""verdict=escalated → pipeline останавливается, задача получает статус blocked."""
cv_output = {
"verdict": "escalated",
"escalation_reason": "Принцип 'no paid APIs' конфликтует с целью 'real-time SMS'",
"violations": [
{
"principle": "No external paid APIs",
"severity": "critical",
"description": "Предложено Twilio без фолбека",
"suggestion": "Добавить бесплатный альтернативный канал",
}
],
"summary": "Конфликт принципов требует решения директора.",
}
mock_run.return_value = _mock_success(cv_output)
steps = [{"role": "constitutional_validator", "model": "sonnet"}]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is False
assert result.get("blocked_by") == "constitutional_validator"
assert "escalated" in (result.get("blocked_reason") or "")
task = models.get_task(conn, "P1-001")
assert task["status"] == "blocked"
assert task.get("blocked_agent_role") == "constitutional_validator"
@patch("agents.runner.subprocess.run")
def test_escalated_includes_escalation_reason_in_blocked_reason(self, mock_run, conn):
"""verdict=escalated → blocked_reason содержит escalation_reason из вердикта."""
escalation_reason = "Директор должен решить: платный API или нет"
cv_output = {
"verdict": "escalated",
"escalation_reason": escalation_reason,
"violations": [],
"summary": "Эскалация к директору.",
}
mock_run.return_value = _mock_success(cv_output)
steps = [{"role": "constitutional_validator", "model": "sonnet"}]
result = run_pipeline(conn, "P1-001", steps)
assert escalation_reason in (result.get("blocked_reason") or ""), (
"blocked_reason должен содержать escalation_reason из вердикта"
)
@patch("agents.runner.subprocess.run")
def test_approved_continues_pipeline(self, mock_run, conn):
"""verdict=approved → pipeline продолжается, задача НЕ блокируется."""
cv_output = {
"verdict": "approved",
"violations": [],
"summary": "План соответствует принципам. Можно приступать к реализации.",
}
mock_run.return_value = _mock_success(cv_output)
steps = [{"role": "constitutional_validator", "model": "sonnet"}]
result = run_pipeline(conn, "P1-001", steps)
assert result.get("blocked_by") != "constitutional_validator"
task = models.get_task(conn, "P1-001")
assert task["status"] != "blocked"
@patch("agents.runner.subprocess.run")
def test_changes_required_violations_summary_in_blocked_reason(self, mock_run, conn):
"""verdict=changes_required → blocked_reason содержит описание нарушения."""
cv_output = {
"verdict": "changes_required",
"target_role": "architect",
"violations": [
{
"principle": "Minimal footprint",
"severity": "critical",
"description": "Добавляется новый сервис без необходимости",
"suggestion": "Обойтись встроенными средствами",
}
],
"summary": "Критическое нарушение.",
}
mock_run.return_value = _mock_success(cv_output)
steps = [{"role": "constitutional_validator", "model": "sonnet"}]
result = run_pipeline(conn, "P1-001", steps)
blocked_reason = result.get("blocked_reason") or ""
assert "Minimal footprint" in blocked_reason or "Добавляется новый сервис" in blocked_reason, (
f"blocked_reason должен содержать описание нарушения. Получили: {blocked_reason!r}"
)
@patch("agents.runner.subprocess.run")
def test_changes_required_two_steps_does_not_execute_second_step(self, mock_run, conn):
"""Pipeline с constitutional_validator + frontend_dev: при changes_required второй шаг не выполняется."""
cv_output = {
"verdict": "changes_required",
"target_role": "architect",
"violations": [{"principle": "X", "severity": "high", "description": "test", "suggestion": "fix"}],
"summary": "Нужна доработка.",
}
mock_run.return_value = _mock_success(cv_output)
steps = [
{"role": "constitutional_validator", "model": "sonnet"},
{"role": "frontend_dev", "model": "sonnet"},
]
result = run_pipeline(conn, "P1-001", steps)
assert result["success"] is False
assert result.get("steps_completed") == 1, (
f"Должен быть выполнен только 1 шаг, получили: {result.get('steps_completed')}"
)
# subprocess.run вызывается только один раз — для constitutional_validator
assert mock_run.call_count == 1, (
f"Ожидался 1 вызов subprocess.run, получили: {mock_run.call_count}"
)