180 lines
7.7 KiB
Python
180 lines
7.7 KiB
Python
|
|
"""
|
|||
|
|
Regression tests for KIN-097:
|
|||
|
|
Tasks should start based on the review/auto toggle state, not independently.
|
|||
|
|
|
|||
|
|
Root causes fixed:
|
|||
|
|
(1) load() now calls loadMode() after reload — toggle syncs with DB
|
|||
|
|
(2) runTask() now patches execution_mode before running — task always gets
|
|||
|
|
the current toggle state, not a stale value from DB
|
|||
|
|
|
|||
|
|
Backend regression:
|
|||
|
|
- task.execution_mode=auto_complete → pipeline auto-approves (status=done)
|
|||
|
|
- task.execution_mode=review → pipeline does NOT auto-approve (status=review),
|
|||
|
|
even if project.execution_mode=auto_complete
|
|||
|
|
- get_effective_mode uses task-level execution_mode with higher priority than project
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import pytest
|
|||
|
|
from unittest.mock import patch, MagicMock
|
|||
|
|
|
|||
|
|
from core.db import init_db
|
|||
|
|
from core import models
|
|||
|
|
from agents.runner import run_pipeline
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Fixtures & helpers
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def conn():
|
|||
|
|
c = init_db(":memory:")
|
|||
|
|
models.create_project(c, "p1", "P1", "/tmp/p1", tech_stack=["python"])
|
|||
|
|
models.create_task(c, "P1-001", "p1", "Fix bug",
|
|||
|
|
brief={"route_type": "debug"})
|
|||
|
|
yield c
|
|||
|
|
c.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _mock_success(output="done"):
|
|||
|
|
m = MagicMock()
|
|||
|
|
m.stdout = json.dumps({"result": output})
|
|||
|
|
m.stderr = ""
|
|||
|
|
m.returncode = 0
|
|||
|
|
return m
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# get_effective_mode: task-level priority regression
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
class TestGetEffectiveMode:
|
|||
|
|
"""Regression: task.execution_mode has higher priority than project.execution_mode."""
|
|||
|
|
|
|||
|
|
def test_task_review_overrides_project_auto_complete(self, conn):
|
|||
|
|
"""KIN-097: task=review + project=auto_complete → effective mode is 'review'."""
|
|||
|
|
models.update_project(conn, "p1", execution_mode="auto_complete")
|
|||
|
|
models.update_task(conn, "P1-001", execution_mode="review")
|
|||
|
|
mode = models.get_effective_mode(conn, "p1", "P1-001")
|
|||
|
|
assert mode == "review", (
|
|||
|
|
"task-level review должен override project-level auto_complete"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_task_auto_complete_overrides_project_review(self, conn):
|
|||
|
|
"""KIN-097: task=auto_complete + project=review → effective mode is 'auto_complete'."""
|
|||
|
|
models.update_project(conn, "p1", execution_mode="review")
|
|||
|
|
models.update_task(conn, "P1-001", execution_mode="auto_complete")
|
|||
|
|
mode = models.get_effective_mode(conn, "p1", "P1-001")
|
|||
|
|
assert mode == "auto_complete", (
|
|||
|
|
"task-level auto_complete должен override project-level review"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_task_none_falls_back_to_project_auto_complete(self, conn):
|
|||
|
|
"""Если task.execution_mode=None, берётся project.execution_mode=auto_complete."""
|
|||
|
|
models.update_project(conn, "p1", execution_mode="auto_complete")
|
|||
|
|
# task остаётся без execution_mode
|
|||
|
|
mode = models.get_effective_mode(conn, "p1", "P1-001")
|
|||
|
|
assert mode == "auto_complete"
|
|||
|
|
|
|||
|
|
def test_task_none_project_none_defaults_to_review(self, conn):
|
|||
|
|
"""Если оба None → fallback 'review' (безопасный режим)."""
|
|||
|
|
# Проект без execution_mode (default NULL)
|
|||
|
|
mode = models.get_effective_mode(conn, "p1", "P1-001")
|
|||
|
|
assert mode == "review"
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# run_pipeline: autopilot only triggers in auto_complete
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
class TestRunPipelineCompletionMode:
|
|||
|
|
"""KIN-097 acceptance criteria: pipeline outcome depends on execution_mode."""
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_task_review_mode_does_not_auto_approve_when_project_is_auto(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""KIN-097 regression: project=auto_complete но task=review → status=review (не done)."""
|
|||
|
|
mock_run.return_value = _mock_success()
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
models.update_project(conn, "p1", execution_mode="auto_complete")
|
|||
|
|
# Frontend патчит task с текущим состоянием тоггла перед run
|
|||
|
|
models.update_task(conn, "P1-001", execution_mode="review")
|
|||
|
|
|
|||
|
|
steps = [{"role": "debugger", "brief": "find bug"},
|
|||
|
|
{"role": "tester", "brief": "verify"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "review", (
|
|||
|
|
"При execution_mode=review задача должна ждать ручного approve, "
|
|||
|
|
"а НЕ auto-approve несмотря на project.execution_mode=auto_complete"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_task_auto_complete_auto_approves_when_project_is_review(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""KIN-097: project=review но task=auto_complete → status=done (автопилот активен)."""
|
|||
|
|
mock_run.return_value = _mock_success()
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
# Проект в review-режиме
|
|||
|
|
# Frontend патчит task с текущим состоянием тоггла перед run
|
|||
|
|
models.update_task(conn, "P1-001", execution_mode="auto_complete")
|
|||
|
|
|
|||
|
|
steps = [{"role": "debugger", "brief": "find bug"},
|
|||
|
|
{"role": "tester", "brief": "verify"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
task = models.get_task(conn, "P1-001")
|
|||
|
|
assert task["status"] == "done", (
|
|||
|
|
"task.execution_mode=auto_complete должен auto-approve (status=done) "
|
|||
|
|
"даже если project.execution_mode=review"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_task_auto_complete_mode_returned_in_result(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""run_pipeline включает поле mode=auto_complete в результат."""
|
|||
|
|
mock_run.return_value = _mock_success()
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
models.update_task(conn, "P1-001", execution_mode="auto_complete")
|
|||
|
|
steps = [{"role": "debugger", "brief": "find"},
|
|||
|
|
{"role": "tester", "brief": "test"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result.get("mode") == "auto_complete"
|
|||
|
|
|
|||
|
|
@patch("core.followup.generate_followups")
|
|||
|
|
@patch("agents.runner.run_hooks")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_task_review_mode_returned_in_result(
|
|||
|
|
self, mock_run, mock_hooks, mock_followup, conn
|
|||
|
|
):
|
|||
|
|
"""run_pipeline включает поле mode=review в результат при review-задаче."""
|
|||
|
|
mock_run.return_value = _mock_success()
|
|||
|
|
mock_hooks.return_value = []
|
|||
|
|
mock_followup.return_value = {"created": [], "pending_actions": []}
|
|||
|
|
|
|||
|
|
models.update_task(conn, "P1-001", execution_mode="review")
|
|||
|
|
steps = [{"role": "debugger", "brief": "find"}]
|
|||
|
|
result = run_pipeline(conn, "P1-001", steps)
|
|||
|
|
|
|||
|
|
assert result.get("mode") == "review"
|