429 lines
18 KiB
Python
429 lines
18 KiB
Python
|
|
"""Regression tests for KIN-117 — dev-agent silent success detection.
|
|||
|
|
|
|||
|
|
Root cause: dev-agents периодически заявляют об успешной реализации, но файлы
|
|||
|
|
не записываются на диск. Три причины:
|
|||
|
|
(1) --dangerously-skip-permissions не добавляется когда stdin=DEVNULL (noninteractive)
|
|||
|
|
(2) worktree создаётся, но merge не происходит (uncommitted files)
|
|||
|
|
(3) permission-retry вызывает run_agent без working_dir_override=worktree_path
|
|||
|
|
|
|||
|
|
Fix D добавляет guard: после успешного шага dev-агента проверяет git diff —
|
|||
|
|
если изменений нет, помечает шаг suspicious=True.
|
|||
|
|
|
|||
|
|
Acceptance criteria:
|
|||
|
|
(1) Fix A: noninteractive=True → --dangerously-skip-permissions в subprocess cmd
|
|||
|
|
(2) Fix D: dev-агент success + пустой git diff → step.suspicious=True
|
|||
|
|
(3) Fix D: dev-агент success + непустой git diff → step.suspicious НЕ выставлен (нет false positive)
|
|||
|
|
(4) Fix C: permission retry в auto_complete режиме → subprocess cwd=worktree_path
|
|||
|
|
(5) Fix B: merge_worktree запускает git add -A + git commit перед git merge --no-ff
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
from pathlib import Path
|
|||
|
|
from unittest.mock import MagicMock, patch
|
|||
|
|
|
|||
|
|
import pytest
|
|||
|
|
|
|||
|
|
from core.db import init_db
|
|||
|
|
from core import models
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─── Helpers ────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _ok(stdout=""):
|
|||
|
|
m = MagicMock()
|
|||
|
|
m.returncode = 0
|
|||
|
|
m.stdout = stdout
|
|||
|
|
m.stderr = ""
|
|||
|
|
return m
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _fail(stdout="", stderr="error"):
|
|||
|
|
m = MagicMock()
|
|||
|
|
m.returncode = 1
|
|||
|
|
m.stdout = stdout
|
|||
|
|
m.stderr = stderr
|
|||
|
|
return m
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _claude_success():
|
|||
|
|
return _ok(json.dumps({"result": "all done, implementation complete"}))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _permission_denied():
|
|||
|
|
"""Subprocess result that triggers _is_permission_error."""
|
|||
|
|
m = MagicMock()
|
|||
|
|
m.returncode = 1
|
|||
|
|
m.stdout = ""
|
|||
|
|
m.stderr = "permission denied to write file"
|
|||
|
|
return m
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─── Fixtures ────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def conn_basic(tmp_path):
|
|||
|
|
"""Project with path=tmp_path (real dir), review mode, no worktrees."""
|
|||
|
|
c = init_db(":memory:")
|
|||
|
|
models.create_project(c, "prj1", "Test Project", str(tmp_path),
|
|||
|
|
tech_stack=["python"])
|
|||
|
|
models.create_task(c, "TSK-001", "prj1", "Fix something",
|
|||
|
|
brief={"route_type": "debug"})
|
|||
|
|
yield c, str(tmp_path)
|
|||
|
|
c.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def conn_auto_wt(tmp_path):
|
|||
|
|
"""Project with auto_complete mode and worktrees enabled, real path."""
|
|||
|
|
c = init_db(":memory:")
|
|||
|
|
models.create_project(c, "prj2", "Auto WT Project", str(tmp_path),
|
|||
|
|
tech_stack=["python"])
|
|||
|
|
c.execute(
|
|||
|
|
"UPDATE projects SET execution_mode='auto_complete', worktrees_enabled=1 WHERE id='prj2'"
|
|||
|
|
)
|
|||
|
|
c.commit()
|
|||
|
|
models.create_task(c, "TSK-002", "prj2", "Fix with worktree",
|
|||
|
|
brief={"route_type": "debug"})
|
|||
|
|
yield c, str(tmp_path)
|
|||
|
|
c.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─── Fix A: noninteractive mode добавляет --dangerously-skip-permissions ─────
|
|||
|
|
|
|||
|
|
class TestFixA_NoninteractiveSkipPermissions:
|
|||
|
|
"""Fix A: is_noninteractive вычисляется ДО проверки allow_write."""
|
|||
|
|
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_noninteractive_adds_dangerously_skip_permissions(self, mock_run):
|
|||
|
|
"""noninteractive=True, allow_write=False → --dangerously-skip-permissions в cmd."""
|
|||
|
|
from agents.runner import _run_claude
|
|||
|
|
mock_run.return_value = _claude_success()
|
|||
|
|
|
|||
|
|
_run_claude("test prompt", noninteractive=True, allow_write=False)
|
|||
|
|
|
|||
|
|
cmd = mock_run.call_args[0][0]
|
|||
|
|
assert "--dangerously-skip-permissions" in cmd, (
|
|||
|
|
"--dangerously-skip-permissions must be added when noninteractive=True, "
|
|||
|
|
f"got cmd: {cmd}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_allow_write_adds_dangerously_skip_permissions(self, mock_run):
|
|||
|
|
"""allow_write=True → --dangerously-skip-permissions в cmd (существующее поведение)."""
|
|||
|
|
from agents.runner import _run_claude
|
|||
|
|
mock_run.return_value = _claude_success()
|
|||
|
|
|
|||
|
|
_run_claude("test prompt", allow_write=True, noninteractive=False)
|
|||
|
|
|
|||
|
|
cmd = mock_run.call_args[0][0]
|
|||
|
|
assert "--dangerously-skip-permissions" in cmd
|
|||
|
|
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_interactive_no_allow_write_no_skip_permissions(self, mock_run):
|
|||
|
|
"""allow_write=False + noninteractive=False → --dangerously-skip-permissions НЕ в cmd."""
|
|||
|
|
from agents.runner import _run_claude
|
|||
|
|
mock_run.return_value = _claude_success()
|
|||
|
|
|
|||
|
|
_run_claude("test prompt", allow_write=False, noninteractive=False)
|
|||
|
|
|
|||
|
|
cmd = mock_run.call_args[0][0]
|
|||
|
|
assert "--dangerously-skip-permissions" not in cmd, (
|
|||
|
|
"--dangerously-skip-permissions must NOT be added in interactive mode without allow_write"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─── Fix D: suspicious guard (non-worktree path) ─────────────────────────────
|
|||
|
|
|
|||
|
|
class TestFixD_SuspiciousGuardNoWorktree:
|
|||
|
|
"""Fix D: guard проверяет git diff после успешного шага dev-агента."""
|
|||
|
|
|
|||
|
|
@patch("agents.runner.check_claude_auth")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_success_empty_git_diff_marks_step_suspicious(self, mock_run, mock_auth,
|
|||
|
|
conn_basic):
|
|||
|
|
"""dev-агент success + пустой git diff → result['suspicious']=True."""
|
|||
|
|
from agents.runner import run_pipeline
|
|||
|
|
conn, proj_path = conn_basic
|
|||
|
|
|
|||
|
|
def side_effect(cmd, **kwargs):
|
|||
|
|
cmd_str = " ".join(str(c) for c in cmd)
|
|||
|
|
if "diff" in cmd_str:
|
|||
|
|
return _ok("") # empty diff → no file changes
|
|||
|
|
return _claude_success() # claude call
|
|||
|
|
|
|||
|
|
mock_run.side_effect = side_effect
|
|||
|
|
mock_auth.return_value = None
|
|||
|
|
|
|||
|
|
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix it"}])
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
step_result = result["results"][0]
|
|||
|
|
assert step_result.get("suspicious") is True, (
|
|||
|
|
"Step must be marked suspicious=True when agent claims success "
|
|||
|
|
f"but git diff is empty. Got result: {step_result}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("agents.runner.check_claude_auth")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_success_nonempty_git_diff_not_suspicious(self, mock_run, mock_auth,
|
|||
|
|
conn_basic):
|
|||
|
|
"""dev-агент success + непустой git diff → suspicious НЕ выставляется."""
|
|||
|
|
from agents.runner import run_pipeline
|
|||
|
|
conn, proj_path = conn_basic
|
|||
|
|
|
|||
|
|
def side_effect(cmd, **kwargs):
|
|||
|
|
cmd_str = " ".join(str(c) for c in cmd)
|
|||
|
|
if "diff" in cmd_str:
|
|||
|
|
return _ok("src/app.py\nsrc/utils.py\n") # files changed
|
|||
|
|
return _claude_success()
|
|||
|
|
|
|||
|
|
mock_run.side_effect = side_effect
|
|||
|
|
mock_auth.return_value = None
|
|||
|
|
|
|||
|
|
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix it"}])
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
step_result = result["results"][0]
|
|||
|
|
assert not step_result.get("suspicious"), (
|
|||
|
|
"Step must NOT be suspicious when git diff shows changed files. "
|
|||
|
|
f"Got result: {step_result}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("agents.runner.check_claude_auth")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_suspicious_step_does_not_block_pipeline(self, mock_run, mock_auth,
|
|||
|
|
conn_basic):
|
|||
|
|
"""suspicious шаг не останавливает pipeline — только выставляет флаг."""
|
|||
|
|
from agents.runner import run_pipeline
|
|||
|
|
conn, proj_path = conn_basic
|
|||
|
|
|
|||
|
|
def side_effect(cmd, **kwargs):
|
|||
|
|
cmd_str = " ".join(str(c) for c in cmd)
|
|||
|
|
if "diff" in cmd_str:
|
|||
|
|
return _ok("") # empty diff → suspicious, but pipeline continues
|
|||
|
|
return _claude_success()
|
|||
|
|
|
|||
|
|
mock_run.side_effect = side_effect
|
|||
|
|
mock_auth.return_value = None
|
|||
|
|
|
|||
|
|
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix"}])
|
|||
|
|
|
|||
|
|
# Pipeline must succeed even when step is suspicious
|
|||
|
|
assert result["success"] is True
|
|||
|
|
assert result["steps_completed"] == 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─── Fix D: suspicious guard (worktree path) ─────────────────────────────────
|
|||
|
|
|
|||
|
|
class TestFixD_SuspiciousGuardWithWorktree:
|
|||
|
|
"""Fix D: guard проверяет merged_files после worktree merge."""
|
|||
|
|
|
|||
|
|
@patch("agents.runner.check_claude_auth")
|
|||
|
|
@patch("core.worktree.cleanup_worktree")
|
|||
|
|
@patch("core.worktree.merge_worktree",
|
|||
|
|
return_value={"success": True, "conflicts": [], "merged_files": []})
|
|||
|
|
@patch("core.worktree.create_worktree", return_value="/tmp/WT-suspicious-debugger")
|
|||
|
|
@patch("core.worktree.ensure_gitignore")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_worktree_empty_merged_files_marks_suspicious(
|
|||
|
|
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
|
|||
|
|
conn_auto_wt
|
|||
|
|
):
|
|||
|
|
"""worktree merge возвращает merged_files=[] → шаг помечается suspicious=True."""
|
|||
|
|
from agents.runner import run_pipeline
|
|||
|
|
conn, proj_path = conn_auto_wt
|
|||
|
|
mock_run.return_value = _claude_success()
|
|||
|
|
mock_auth.return_value = None
|
|||
|
|
|
|||
|
|
result = run_pipeline(conn, "TSK-002", [{"role": "debugger", "brief": "fix it"}])
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
step_result = result["results"][0]
|
|||
|
|
assert step_result.get("suspicious") is True, (
|
|||
|
|
"Step must be marked suspicious when worktree merge returns merged_files=[]. "
|
|||
|
|
f"Got result: {step_result}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("agents.runner.check_claude_auth")
|
|||
|
|
@patch("core.worktree.cleanup_worktree")
|
|||
|
|
@patch("core.worktree.merge_worktree",
|
|||
|
|
return_value={"success": True, "conflicts": [], "merged_files": ["src/app.py"]})
|
|||
|
|
@patch("core.worktree.create_worktree", return_value="/tmp/WT-ok-debugger")
|
|||
|
|
@patch("core.worktree.ensure_gitignore")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_worktree_nonempty_merged_files_not_suspicious(
|
|||
|
|
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
|
|||
|
|
conn_auto_wt
|
|||
|
|
):
|
|||
|
|
"""worktree merge возвращает merged_files=['src/app.py'] → шаг НЕ suspicious."""
|
|||
|
|
from agents.runner import run_pipeline
|
|||
|
|
conn, proj_path = conn_auto_wt
|
|||
|
|
mock_run.return_value = _claude_success()
|
|||
|
|
mock_auth.return_value = None
|
|||
|
|
|
|||
|
|
result = run_pipeline(conn, "TSK-002", [{"role": "debugger", "brief": "fix it"}])
|
|||
|
|
|
|||
|
|
assert result["success"] is True
|
|||
|
|
step_result = result["results"][0]
|
|||
|
|
assert not step_result.get("suspicious"), (
|
|||
|
|
"Step must NOT be suspicious when worktree has merged files. "
|
|||
|
|
f"Got result: {step_result}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─── Fix C: permission retry использует worktree_path ─────────────────────────
|
|||
|
|
|
|||
|
|
class TestFixC_PermissionRetryWorktreePath:
|
|||
|
|
"""Fix C: retry после permission error передаёт working_dir_override=worktree_path."""
|
|||
|
|
|
|||
|
|
FAKE_WT = "/tmp/fake-worktree-kin117"
|
|||
|
|
|
|||
|
|
@patch("agents.runner.check_claude_auth")
|
|||
|
|
@patch("core.worktree.cleanup_worktree")
|
|||
|
|
@patch("core.worktree.merge_worktree",
|
|||
|
|
return_value={"success": True, "conflicts": [], "merged_files": ["fix.py"]})
|
|||
|
|
@patch("core.worktree.create_worktree", return_value=FAKE_WT)
|
|||
|
|
@patch("core.worktree.ensure_gitignore")
|
|||
|
|
@patch("agents.runner.subprocess.run")
|
|||
|
|
def test_permission_retry_uses_worktree_cwd(
|
|||
|
|
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
|
|||
|
|
conn_auto_wt
|
|||
|
|
):
|
|||
|
|
"""При permission error в auto_complete режиме retry запускается в cwd=worktree_path."""
|
|||
|
|
from agents.runner import run_pipeline
|
|||
|
|
conn, proj_path = conn_auto_wt
|
|||
|
|
|
|||
|
|
claude_calls = []
|
|||
|
|
|
|||
|
|
def side_effect(cmd, **kwargs):
|
|||
|
|
if "--output-format" in cmd:
|
|||
|
|
cwd = kwargs.get("cwd")
|
|||
|
|
claude_calls.append({"cmd": list(cmd), "cwd": cwd})
|
|||
|
|
# First claude call → permission error triggers retry
|
|||
|
|
if len(claude_calls) == 1:
|
|||
|
|
return _permission_denied()
|
|||
|
|
return _claude_success()
|
|||
|
|
# Non-claude subprocess calls (git diff, etc.)
|
|||
|
|
return _ok("")
|
|||
|
|
|
|||
|
|
mock_run.side_effect = side_effect
|
|||
|
|
mock_auth.return_value = None
|
|||
|
|
|
|||
|
|
result = run_pipeline(conn, "TSK-002",
|
|||
|
|
[{"role": "debugger", "brief": "implement feature"}])
|
|||
|
|
|
|||
|
|
assert len(claude_calls) >= 2, (
|
|||
|
|
f"Expected at least 2 claude calls (first + retry), got {len(claude_calls)}"
|
|||
|
|
)
|
|||
|
|
retry_call = claude_calls[1]
|
|||
|
|
assert retry_call["cwd"] == self.FAKE_WT, (
|
|||
|
|
f"Retry must use worktree_path={self.FAKE_WT!r} as cwd, "
|
|||
|
|
f"got {retry_call['cwd']!r}. "
|
|||
|
|
"Fix: add working_dir_override=worktree_path to permission retry run_agent call."
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─── Fix B: merge_worktree auto-commits перед merge ──────────────────────────
|
|||
|
|
|
|||
|
|
class TestFixB_MergeWorktreePreCommit:
|
|||
|
|
"""Fix B: merge_worktree выполняет git add -A + git commit перед git merge --no-ff."""
|
|||
|
|
|
|||
|
|
@patch("subprocess.run")
|
|||
|
|
def test_merge_worktree_runs_git_add_before_merge(self, mock_run, tmp_path):
|
|||
|
|
"""git add -A вызывается ДО git merge --no-ff."""
|
|||
|
|
from core.worktree import merge_worktree
|
|||
|
|
|
|||
|
|
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
|
|||
|
|
worktree_path.mkdir(parents=True)
|
|||
|
|
|
|||
|
|
commands_seen = []
|
|||
|
|
|
|||
|
|
def run_side_effect(cmd, **kwargs):
|
|||
|
|
commands_seen.append(list(cmd))
|
|||
|
|
return _ok()
|
|||
|
|
|
|||
|
|
mock_run.side_effect = run_side_effect
|
|||
|
|
|
|||
|
|
merge_worktree(str(worktree_path), str(tmp_path))
|
|||
|
|
|
|||
|
|
flat = [" ".join(c) for c in commands_seen]
|
|||
|
|
add_idx = next(
|
|||
|
|
(i for i, c in enumerate(flat) if "add" in c and "-A" in c), None
|
|||
|
|
)
|
|||
|
|
merge_idx = next(
|
|||
|
|
(i for i, c in enumerate(flat) if "merge" in c and "--no-ff" in c), None
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
assert add_idx is not None, (
|
|||
|
|
"git add -A must be called before merge. "
|
|||
|
|
f"Commands seen: {flat}"
|
|||
|
|
)
|
|||
|
|
assert merge_idx is not None, "git merge --no-ff must be called"
|
|||
|
|
assert add_idx < merge_idx, (
|
|||
|
|
f"git add -A (idx={add_idx}) must be BEFORE git merge --no-ff (idx={merge_idx})"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("subprocess.run")
|
|||
|
|
def test_merge_worktree_runs_git_commit_before_merge(self, mock_run, tmp_path):
|
|||
|
|
"""git commit вызывается ДО git merge --no-ff."""
|
|||
|
|
from core.worktree import merge_worktree
|
|||
|
|
|
|||
|
|
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
|
|||
|
|
worktree_path.mkdir(parents=True)
|
|||
|
|
|
|||
|
|
commands_seen = []
|
|||
|
|
|
|||
|
|
def run_side_effect(cmd, **kwargs):
|
|||
|
|
commands_seen.append(list(cmd))
|
|||
|
|
return _ok()
|
|||
|
|
|
|||
|
|
mock_run.side_effect = run_side_effect
|
|||
|
|
|
|||
|
|
merge_worktree(str(worktree_path), str(tmp_path))
|
|||
|
|
|
|||
|
|
flat = [" ".join(c) for c in commands_seen]
|
|||
|
|
commit_idx = next(
|
|||
|
|
(i for i, c in enumerate(flat) if "commit" in c and "-m" in c), None
|
|||
|
|
)
|
|||
|
|
merge_idx = next(
|
|||
|
|
(i for i, c in enumerate(flat) if "merge" in c and "--no-ff" in c), None
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
assert commit_idx is not None, (
|
|||
|
|
"git commit -m must be called before merge. "
|
|||
|
|
f"Commands seen: {flat}"
|
|||
|
|
)
|
|||
|
|
assert merge_idx is not None, "git merge --no-ff must be called"
|
|||
|
|
assert commit_idx < merge_idx, (
|
|||
|
|
f"git commit (idx={commit_idx}) must be BEFORE git merge --no-ff (idx={merge_idx})"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@patch("subprocess.run")
|
|||
|
|
def test_merge_worktree_commit_failure_does_not_block_merge(self, mock_run, tmp_path):
|
|||
|
|
"""Если git commit возвращает non-zero (нечего коммитить) — merge всё равно запускается."""
|
|||
|
|
from core.worktree import merge_worktree
|
|||
|
|
|
|||
|
|
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
|
|||
|
|
worktree_path.mkdir(parents=True)
|
|||
|
|
|
|||
|
|
commands_seen = []
|
|||
|
|
|
|||
|
|
def run_side_effect(cmd, **kwargs):
|
|||
|
|
commands_seen.append(list(cmd))
|
|||
|
|
cmd_str = " ".join(cmd)
|
|||
|
|
if "commit" in cmd_str:
|
|||
|
|
return _fail(stderr="nothing to commit") # nothing to commit
|
|||
|
|
return _ok()
|
|||
|
|
|
|||
|
|
mock_run.side_effect = run_side_effect
|
|||
|
|
|
|||
|
|
result = merge_worktree(str(worktree_path), str(tmp_path))
|
|||
|
|
|
|||
|
|
flat = [" ".join(c) for c in commands_seen]
|
|||
|
|
merge_calls = [c for c in flat if "merge" in c and "--no-ff" in c]
|
|||
|
|
assert len(merge_calls) > 0, (
|
|||
|
|
"git merge --no-ff must still be called even when pre-commit fails. "
|
|||
|
|
f"Commands seen: {flat}"
|
|||
|
|
)
|
|||
|
|
assert result["success"] is True
|