kin/tests/test_KIN-117_regression.py

429 lines
18 KiB
Python
Raw Normal View History

2026-03-17 22:41:47 +02:00
"""Regression tests for KIN-117 — dev-agent silent success detection.
Root cause: dev-agents периодически заявляют об успешной реализации, но файлы
не записываются на диск. Три причины:
(1) --dangerously-skip-permissions не добавляется когда stdin=DEVNULL (noninteractive)
(2) worktree создаётся, но merge не происходит (uncommitted files)
(3) permission-retry вызывает run_agent без working_dir_override=worktree_path
Fix D добавляет guard: после успешного шага dev-агента проверяет git diff
если изменений нет, помечает шаг suspicious=True.
Acceptance criteria:
(1) Fix A: noninteractive=True --dangerously-skip-permissions в subprocess cmd
(2) Fix D: dev-агент success + пустой git diff step.suspicious=True
(3) Fix D: dev-агент success + непустой git diff step.suspicious НЕ выставлен (нет false positive)
(4) Fix C: permission retry в auto_complete режиме subprocess cwd=worktree_path
(5) Fix B: merge_worktree запускает git add -A + git commit перед git merge --no-ff
"""
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from core.db import init_db
from core import models
# ─── Helpers ────────────────────────────────────────────────────────────────
def _ok(stdout=""):
m = MagicMock()
m.returncode = 0
m.stdout = stdout
m.stderr = ""
return m
def _fail(stdout="", stderr="error"):
m = MagicMock()
m.returncode = 1
m.stdout = stdout
m.stderr = stderr
return m
def _claude_success():
return _ok(json.dumps({"result": "all done, implementation complete"}))
def _permission_denied():
"""Subprocess result that triggers _is_permission_error."""
m = MagicMock()
m.returncode = 1
m.stdout = ""
m.stderr = "permission denied to write file"
return m
# ─── Fixtures ────────────────────────────────────────────────────────────────
@pytest.fixture
def conn_basic(tmp_path):
"""Project with path=tmp_path (real dir), review mode, no worktrees."""
c = init_db(":memory:")
models.create_project(c, "prj1", "Test Project", str(tmp_path),
tech_stack=["python"])
models.create_task(c, "TSK-001", "prj1", "Fix something",
brief={"route_type": "debug"})
yield c, str(tmp_path)
c.close()
@pytest.fixture
def conn_auto_wt(tmp_path):
"""Project with auto_complete mode and worktrees enabled, real path."""
c = init_db(":memory:")
models.create_project(c, "prj2", "Auto WT Project", str(tmp_path),
tech_stack=["python"])
c.execute(
"UPDATE projects SET execution_mode='auto_complete', worktrees_enabled=1 WHERE id='prj2'"
)
c.commit()
models.create_task(c, "TSK-002", "prj2", "Fix with worktree",
brief={"route_type": "debug"})
yield c, str(tmp_path)
c.close()
# ─── Fix A: noninteractive mode добавляет --dangerously-skip-permissions ─────
class TestFixA_NoninteractiveSkipPermissions:
"""Fix A: is_noninteractive вычисляется ДО проверки allow_write."""
@patch("agents.runner.subprocess.run")
def test_noninteractive_adds_dangerously_skip_permissions(self, mock_run):
"""noninteractive=True, allow_write=False → --dangerously-skip-permissions в cmd."""
from agents.runner import _run_claude
mock_run.return_value = _claude_success()
_run_claude("test prompt", noninteractive=True, allow_write=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" in cmd, (
"--dangerously-skip-permissions must be added when noninteractive=True, "
f"got cmd: {cmd}"
)
@patch("agents.runner.subprocess.run")
def test_allow_write_adds_dangerously_skip_permissions(self, mock_run):
"""allow_write=True → --dangerously-skip-permissions в cmd (существующее поведение)."""
from agents.runner import _run_claude
mock_run.return_value = _claude_success()
_run_claude("test prompt", allow_write=True, noninteractive=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" in cmd
@patch("agents.runner.subprocess.run")
def test_interactive_no_allow_write_no_skip_permissions(self, mock_run):
"""allow_write=False + noninteractive=False → --dangerously-skip-permissions НЕ в cmd."""
from agents.runner import _run_claude
mock_run.return_value = _claude_success()
_run_claude("test prompt", allow_write=False, noninteractive=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" not in cmd, (
"--dangerously-skip-permissions must NOT be added in interactive mode without allow_write"
)
# ─── Fix D: suspicious guard (non-worktree path) ─────────────────────────────
class TestFixD_SuspiciousGuardNoWorktree:
"""Fix D: guard проверяет git diff после успешного шага dev-агента."""
@patch("agents.runner.check_claude_auth")
@patch("agents.runner.subprocess.run")
def test_success_empty_git_diff_marks_step_suspicious(self, mock_run, mock_auth,
conn_basic):
"""dev-агент success + пустой git diff → result['suspicious']=True."""
from agents.runner import run_pipeline
conn, proj_path = conn_basic
def side_effect(cmd, **kwargs):
cmd_str = " ".join(str(c) for c in cmd)
if "diff" in cmd_str:
return _ok("") # empty diff → no file changes
return _claude_success() # claude call
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert step_result.get("suspicious") is True, (
"Step must be marked suspicious=True when agent claims success "
f"but git diff is empty. Got result: {step_result}"
)
@patch("agents.runner.check_claude_auth")
@patch("agents.runner.subprocess.run")
def test_success_nonempty_git_diff_not_suspicious(self, mock_run, mock_auth,
conn_basic):
"""dev-агент success + непустой git diff → suspicious НЕ выставляется."""
from agents.runner import run_pipeline
conn, proj_path = conn_basic
def side_effect(cmd, **kwargs):
cmd_str = " ".join(str(c) for c in cmd)
if "diff" in cmd_str:
return _ok("src/app.py\nsrc/utils.py\n") # files changed
return _claude_success()
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert not step_result.get("suspicious"), (
"Step must NOT be suspicious when git diff shows changed files. "
f"Got result: {step_result}"
)
@patch("agents.runner.check_claude_auth")
@patch("agents.runner.subprocess.run")
def test_suspicious_step_does_not_block_pipeline(self, mock_run, mock_auth,
conn_basic):
"""suspicious шаг не останавливает pipeline — только выставляет флаг."""
from agents.runner import run_pipeline
conn, proj_path = conn_basic
def side_effect(cmd, **kwargs):
cmd_str = " ".join(str(c) for c in cmd)
if "diff" in cmd_str:
return _ok("") # empty diff → suspicious, but pipeline continues
return _claude_success()
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix"}])
# Pipeline must succeed even when step is suspicious
assert result["success"] is True
assert result["steps_completed"] == 1
# ─── Fix D: suspicious guard (worktree path) ─────────────────────────────────
class TestFixD_SuspiciousGuardWithWorktree:
"""Fix D: guard проверяет merged_files после worktree merge."""
@patch("agents.runner.check_claude_auth")
@patch("core.worktree.cleanup_worktree")
@patch("core.worktree.merge_worktree",
return_value={"success": True, "conflicts": [], "merged_files": []})
@patch("core.worktree.create_worktree", return_value="/tmp/WT-suspicious-debugger")
@patch("core.worktree.ensure_gitignore")
@patch("agents.runner.subprocess.run")
def test_worktree_empty_merged_files_marks_suspicious(
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
conn_auto_wt
):
"""worktree merge возвращает merged_files=[] → шаг помечается suspicious=True."""
from agents.runner import run_pipeline
conn, proj_path = conn_auto_wt
mock_run.return_value = _claude_success()
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-002", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert step_result.get("suspicious") is True, (
"Step must be marked suspicious when worktree merge returns merged_files=[]. "
f"Got result: {step_result}"
)
@patch("agents.runner.check_claude_auth")
@patch("core.worktree.cleanup_worktree")
@patch("core.worktree.merge_worktree",
return_value={"success": True, "conflicts": [], "merged_files": ["src/app.py"]})
@patch("core.worktree.create_worktree", return_value="/tmp/WT-ok-debugger")
@patch("core.worktree.ensure_gitignore")
@patch("agents.runner.subprocess.run")
def test_worktree_nonempty_merged_files_not_suspicious(
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
conn_auto_wt
):
"""worktree merge возвращает merged_files=['src/app.py'] → шаг НЕ suspicious."""
from agents.runner import run_pipeline
conn, proj_path = conn_auto_wt
mock_run.return_value = _claude_success()
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-002", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert not step_result.get("suspicious"), (
"Step must NOT be suspicious when worktree has merged files. "
f"Got result: {step_result}"
)
# ─── Fix C: permission retry использует worktree_path ─────────────────────────
class TestFixC_PermissionRetryWorktreePath:
"""Fix C: retry после permission error передаёт working_dir_override=worktree_path."""
FAKE_WT = "/tmp/fake-worktree-kin117"
@patch("agents.runner.check_claude_auth")
@patch("core.worktree.cleanup_worktree")
@patch("core.worktree.merge_worktree",
return_value={"success": True, "conflicts": [], "merged_files": ["fix.py"]})
@patch("core.worktree.create_worktree", return_value=FAKE_WT)
@patch("core.worktree.ensure_gitignore")
@patch("agents.runner.subprocess.run")
def test_permission_retry_uses_worktree_cwd(
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
conn_auto_wt
):
"""При permission error в auto_complete режиме retry запускается в cwd=worktree_path."""
from agents.runner import run_pipeline
conn, proj_path = conn_auto_wt
claude_calls = []
def side_effect(cmd, **kwargs):
if "--output-format" in cmd:
cwd = kwargs.get("cwd")
claude_calls.append({"cmd": list(cmd), "cwd": cwd})
# First claude call → permission error triggers retry
if len(claude_calls) == 1:
return _permission_denied()
return _claude_success()
# Non-claude subprocess calls (git diff, etc.)
return _ok("")
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-002",
[{"role": "debugger", "brief": "implement feature"}])
assert len(claude_calls) >= 2, (
f"Expected at least 2 claude calls (first + retry), got {len(claude_calls)}"
)
retry_call = claude_calls[1]
assert retry_call["cwd"] == self.FAKE_WT, (
f"Retry must use worktree_path={self.FAKE_WT!r} as cwd, "
f"got {retry_call['cwd']!r}. "
"Fix: add working_dir_override=worktree_path to permission retry run_agent call."
)
# ─── Fix B: merge_worktree auto-commits перед merge ──────────────────────────
class TestFixB_MergeWorktreePreCommit:
"""Fix B: merge_worktree выполняет git add -A + git commit перед git merge --no-ff."""
@patch("subprocess.run")
def test_merge_worktree_runs_git_add_before_merge(self, mock_run, tmp_path):
"""git add -A вызывается ДО git merge --no-ff."""
from core.worktree import merge_worktree
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
worktree_path.mkdir(parents=True)
commands_seen = []
def run_side_effect(cmd, **kwargs):
commands_seen.append(list(cmd))
return _ok()
mock_run.side_effect = run_side_effect
merge_worktree(str(worktree_path), str(tmp_path))
flat = [" ".join(c) for c in commands_seen]
add_idx = next(
(i for i, c in enumerate(flat) if "add" in c and "-A" in c), None
)
merge_idx = next(
(i for i, c in enumerate(flat) if "merge" in c and "--no-ff" in c), None
)
assert add_idx is not None, (
"git add -A must be called before merge. "
f"Commands seen: {flat}"
)
assert merge_idx is not None, "git merge --no-ff must be called"
assert add_idx < merge_idx, (
f"git add -A (idx={add_idx}) must be BEFORE git merge --no-ff (idx={merge_idx})"
)
@patch("subprocess.run")
def test_merge_worktree_runs_git_commit_before_merge(self, mock_run, tmp_path):
"""git commit вызывается ДО git merge --no-ff."""
from core.worktree import merge_worktree
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
worktree_path.mkdir(parents=True)
commands_seen = []
def run_side_effect(cmd, **kwargs):
commands_seen.append(list(cmd))
return _ok()
mock_run.side_effect = run_side_effect
merge_worktree(str(worktree_path), str(tmp_path))
flat = [" ".join(c) for c in commands_seen]
commit_idx = next(
(i for i, c in enumerate(flat) if "commit" in c and "-m" in c), None
)
merge_idx = next(
(i for i, c in enumerate(flat) if "merge" in c and "--no-ff" in c), None
)
assert commit_idx is not None, (
"git commit -m must be called before merge. "
f"Commands seen: {flat}"
)
assert merge_idx is not None, "git merge --no-ff must be called"
assert commit_idx < merge_idx, (
f"git commit (idx={commit_idx}) must be BEFORE git merge --no-ff (idx={merge_idx})"
)
@patch("subprocess.run")
def test_merge_worktree_commit_failure_does_not_block_merge(self, mock_run, tmp_path):
"""Если git commit возвращает non-zero (нечего коммитить) — merge всё равно запускается."""
from core.worktree import merge_worktree
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
worktree_path.mkdir(parents=True)
commands_seen = []
def run_side_effect(cmd, **kwargs):
commands_seen.append(list(cmd))
cmd_str = " ".join(cmd)
if "commit" in cmd_str:
return _fail(stderr="nothing to commit") # nothing to commit
return _ok()
mock_run.side_effect = run_side_effect
result = merge_worktree(str(worktree_path), str(tmp_path))
flat = [" ".join(c) for c in commands_seen]
merge_calls = [c for c in flat if "merge" in c and "--no-ff" in c]
assert len(merge_calls) > 0, (
"git merge --no-ff must still be called even when pre-commit fails. "
f"Commands seen: {flat}"
)
assert result["success"] is True