kin/tests/test_KIN-117_regression.py
2026-03-17 22:41:47 +02:00

428 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Regression tests for KIN-117 — dev-agent silent success detection.
Root cause: dev-agents периодически заявляют об успешной реализации, но файлы
не записываются на диск. Три причины:
(1) --dangerously-skip-permissions не добавляется когда stdin=DEVNULL (noninteractive)
(2) worktree создаётся, но merge не происходит (uncommitted files)
(3) permission-retry вызывает run_agent без working_dir_override=worktree_path
Fix D добавляет guard: после успешного шага dev-агента проверяет git diff —
если изменений нет, помечает шаг suspicious=True.
Acceptance criteria:
(1) Fix A: noninteractive=True → --dangerously-skip-permissions в subprocess cmd
(2) Fix D: dev-агент success + пустой git diff → step.suspicious=True
(3) Fix D: dev-агент success + непустой git diff → step.suspicious НЕ выставлен (нет false positive)
(4) Fix C: permission retry в auto_complete режиме → subprocess cwd=worktree_path
(5) Fix B: merge_worktree запускает git add -A + git commit перед git merge --no-ff
"""
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from core.db import init_db
from core import models
# ─── Helpers ────────────────────────────────────────────────────────────────
def _ok(stdout=""):
m = MagicMock()
m.returncode = 0
m.stdout = stdout
m.stderr = ""
return m
def _fail(stdout="", stderr="error"):
m = MagicMock()
m.returncode = 1
m.stdout = stdout
m.stderr = stderr
return m
def _claude_success():
return _ok(json.dumps({"result": "all done, implementation complete"}))
def _permission_denied():
"""Subprocess result that triggers _is_permission_error."""
m = MagicMock()
m.returncode = 1
m.stdout = ""
m.stderr = "permission denied to write file"
return m
# ─── Fixtures ────────────────────────────────────────────────────────────────
@pytest.fixture
def conn_basic(tmp_path):
"""Project with path=tmp_path (real dir), review mode, no worktrees."""
c = init_db(":memory:")
models.create_project(c, "prj1", "Test Project", str(tmp_path),
tech_stack=["python"])
models.create_task(c, "TSK-001", "prj1", "Fix something",
brief={"route_type": "debug"})
yield c, str(tmp_path)
c.close()
@pytest.fixture
def conn_auto_wt(tmp_path):
"""Project with auto_complete mode and worktrees enabled, real path."""
c = init_db(":memory:")
models.create_project(c, "prj2", "Auto WT Project", str(tmp_path),
tech_stack=["python"])
c.execute(
"UPDATE projects SET execution_mode='auto_complete', worktrees_enabled=1 WHERE id='prj2'"
)
c.commit()
models.create_task(c, "TSK-002", "prj2", "Fix with worktree",
brief={"route_type": "debug"})
yield c, str(tmp_path)
c.close()
# ─── Fix A: noninteractive mode добавляет --dangerously-skip-permissions ─────
class TestFixA_NoninteractiveSkipPermissions:
"""Fix A: is_noninteractive вычисляется ДО проверки allow_write."""
@patch("agents.runner.subprocess.run")
def test_noninteractive_adds_dangerously_skip_permissions(self, mock_run):
"""noninteractive=True, allow_write=False → --dangerously-skip-permissions в cmd."""
from agents.runner import _run_claude
mock_run.return_value = _claude_success()
_run_claude("test prompt", noninteractive=True, allow_write=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" in cmd, (
"--dangerously-skip-permissions must be added when noninteractive=True, "
f"got cmd: {cmd}"
)
@patch("agents.runner.subprocess.run")
def test_allow_write_adds_dangerously_skip_permissions(self, mock_run):
"""allow_write=True → --dangerously-skip-permissions в cmd (существующее поведение)."""
from agents.runner import _run_claude
mock_run.return_value = _claude_success()
_run_claude("test prompt", allow_write=True, noninteractive=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" in cmd
@patch("agents.runner.subprocess.run")
def test_interactive_no_allow_write_no_skip_permissions(self, mock_run):
"""allow_write=False + noninteractive=False → --dangerously-skip-permissions НЕ в cmd."""
from agents.runner import _run_claude
mock_run.return_value = _claude_success()
_run_claude("test prompt", allow_write=False, noninteractive=False)
cmd = mock_run.call_args[0][0]
assert "--dangerously-skip-permissions" not in cmd, (
"--dangerously-skip-permissions must NOT be added in interactive mode without allow_write"
)
# ─── Fix D: suspicious guard (non-worktree path) ─────────────────────────────
class TestFixD_SuspiciousGuardNoWorktree:
"""Fix D: guard проверяет git diff после успешного шага dev-агента."""
@patch("agents.runner.check_claude_auth")
@patch("agents.runner.subprocess.run")
def test_success_empty_git_diff_marks_step_suspicious(self, mock_run, mock_auth,
conn_basic):
"""dev-агент success + пустой git diff → result['suspicious']=True."""
from agents.runner import run_pipeline
conn, proj_path = conn_basic
def side_effect(cmd, **kwargs):
cmd_str = " ".join(str(c) for c in cmd)
if "diff" in cmd_str:
return _ok("") # empty diff → no file changes
return _claude_success() # claude call
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert step_result.get("suspicious") is True, (
"Step must be marked suspicious=True when agent claims success "
f"but git diff is empty. Got result: {step_result}"
)
@patch("agents.runner.check_claude_auth")
@patch("agents.runner.subprocess.run")
def test_success_nonempty_git_diff_not_suspicious(self, mock_run, mock_auth,
conn_basic):
"""dev-агент success + непустой git diff → suspicious НЕ выставляется."""
from agents.runner import run_pipeline
conn, proj_path = conn_basic
def side_effect(cmd, **kwargs):
cmd_str = " ".join(str(c) for c in cmd)
if "diff" in cmd_str:
return _ok("src/app.py\nsrc/utils.py\n") # files changed
return _claude_success()
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert not step_result.get("suspicious"), (
"Step must NOT be suspicious when git diff shows changed files. "
f"Got result: {step_result}"
)
@patch("agents.runner.check_claude_auth")
@patch("agents.runner.subprocess.run")
def test_suspicious_step_does_not_block_pipeline(self, mock_run, mock_auth,
conn_basic):
"""suspicious шаг не останавливает pipeline — только выставляет флаг."""
from agents.runner import run_pipeline
conn, proj_path = conn_basic
def side_effect(cmd, **kwargs):
cmd_str = " ".join(str(c) for c in cmd)
if "diff" in cmd_str:
return _ok("") # empty diff → suspicious, but pipeline continues
return _claude_success()
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-001", [{"role": "debugger", "brief": "fix"}])
# Pipeline must succeed even when step is suspicious
assert result["success"] is True
assert result["steps_completed"] == 1
# ─── Fix D: suspicious guard (worktree path) ─────────────────────────────────
class TestFixD_SuspiciousGuardWithWorktree:
"""Fix D: guard проверяет merged_files после worktree merge."""
@patch("agents.runner.check_claude_auth")
@patch("core.worktree.cleanup_worktree")
@patch("core.worktree.merge_worktree",
return_value={"success": True, "conflicts": [], "merged_files": []})
@patch("core.worktree.create_worktree", return_value="/tmp/WT-suspicious-debugger")
@patch("core.worktree.ensure_gitignore")
@patch("agents.runner.subprocess.run")
def test_worktree_empty_merged_files_marks_suspicious(
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
conn_auto_wt
):
"""worktree merge возвращает merged_files=[] → шаг помечается suspicious=True."""
from agents.runner import run_pipeline
conn, proj_path = conn_auto_wt
mock_run.return_value = _claude_success()
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-002", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert step_result.get("suspicious") is True, (
"Step must be marked suspicious when worktree merge returns merged_files=[]. "
f"Got result: {step_result}"
)
@patch("agents.runner.check_claude_auth")
@patch("core.worktree.cleanup_worktree")
@patch("core.worktree.merge_worktree",
return_value={"success": True, "conflicts": [], "merged_files": ["src/app.py"]})
@patch("core.worktree.create_worktree", return_value="/tmp/WT-ok-debugger")
@patch("core.worktree.ensure_gitignore")
@patch("agents.runner.subprocess.run")
def test_worktree_nonempty_merged_files_not_suspicious(
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
conn_auto_wt
):
"""worktree merge возвращает merged_files=['src/app.py'] → шаг НЕ suspicious."""
from agents.runner import run_pipeline
conn, proj_path = conn_auto_wt
mock_run.return_value = _claude_success()
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-002", [{"role": "debugger", "brief": "fix it"}])
assert result["success"] is True
step_result = result["results"][0]
assert not step_result.get("suspicious"), (
"Step must NOT be suspicious when worktree has merged files. "
f"Got result: {step_result}"
)
# ─── Fix C: permission retry использует worktree_path ─────────────────────────
class TestFixC_PermissionRetryWorktreePath:
"""Fix C: retry после permission error передаёт working_dir_override=worktree_path."""
FAKE_WT = "/tmp/fake-worktree-kin117"
@patch("agents.runner.check_claude_auth")
@patch("core.worktree.cleanup_worktree")
@patch("core.worktree.merge_worktree",
return_value={"success": True, "conflicts": [], "merged_files": ["fix.py"]})
@patch("core.worktree.create_worktree", return_value=FAKE_WT)
@patch("core.worktree.ensure_gitignore")
@patch("agents.runner.subprocess.run")
def test_permission_retry_uses_worktree_cwd(
self, mock_run, mock_gitignore, mock_create, mock_merge, mock_cleanup, mock_auth,
conn_auto_wt
):
"""При permission error в auto_complete режиме retry запускается в cwd=worktree_path."""
from agents.runner import run_pipeline
conn, proj_path = conn_auto_wt
claude_calls = []
def side_effect(cmd, **kwargs):
if "--output-format" in cmd:
cwd = kwargs.get("cwd")
claude_calls.append({"cmd": list(cmd), "cwd": cwd})
# First claude call → permission error triggers retry
if len(claude_calls) == 1:
return _permission_denied()
return _claude_success()
# Non-claude subprocess calls (git diff, etc.)
return _ok("")
mock_run.side_effect = side_effect
mock_auth.return_value = None
result = run_pipeline(conn, "TSK-002",
[{"role": "debugger", "brief": "implement feature"}])
assert len(claude_calls) >= 2, (
f"Expected at least 2 claude calls (first + retry), got {len(claude_calls)}"
)
retry_call = claude_calls[1]
assert retry_call["cwd"] == self.FAKE_WT, (
f"Retry must use worktree_path={self.FAKE_WT!r} as cwd, "
f"got {retry_call['cwd']!r}. "
"Fix: add working_dir_override=worktree_path to permission retry run_agent call."
)
# ─── Fix B: merge_worktree auto-commits перед merge ──────────────────────────
class TestFixB_MergeWorktreePreCommit:
"""Fix B: merge_worktree выполняет git add -A + git commit перед git merge --no-ff."""
@patch("subprocess.run")
def test_merge_worktree_runs_git_add_before_merge(self, mock_run, tmp_path):
"""git add -A вызывается ДО git merge --no-ff."""
from core.worktree import merge_worktree
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
worktree_path.mkdir(parents=True)
commands_seen = []
def run_side_effect(cmd, **kwargs):
commands_seen.append(list(cmd))
return _ok()
mock_run.side_effect = run_side_effect
merge_worktree(str(worktree_path), str(tmp_path))
flat = [" ".join(c) for c in commands_seen]
add_idx = next(
(i for i, c in enumerate(flat) if "add" in c and "-A" in c), None
)
merge_idx = next(
(i for i, c in enumerate(flat) if "merge" in c and "--no-ff" in c), None
)
assert add_idx is not None, (
"git add -A must be called before merge. "
f"Commands seen: {flat}"
)
assert merge_idx is not None, "git merge --no-ff must be called"
assert add_idx < merge_idx, (
f"git add -A (idx={add_idx}) must be BEFORE git merge --no-ff (idx={merge_idx})"
)
@patch("subprocess.run")
def test_merge_worktree_runs_git_commit_before_merge(self, mock_run, tmp_path):
"""git commit вызывается ДО git merge --no-ff."""
from core.worktree import merge_worktree
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
worktree_path.mkdir(parents=True)
commands_seen = []
def run_side_effect(cmd, **kwargs):
commands_seen.append(list(cmd))
return _ok()
mock_run.side_effect = run_side_effect
merge_worktree(str(worktree_path), str(tmp_path))
flat = [" ".join(c) for c in commands_seen]
commit_idx = next(
(i for i, c in enumerate(flat) if "commit" in c and "-m" in c), None
)
merge_idx = next(
(i for i, c in enumerate(flat) if "merge" in c and "--no-ff" in c), None
)
assert commit_idx is not None, (
"git commit -m must be called before merge. "
f"Commands seen: {flat}"
)
assert merge_idx is not None, "git merge --no-ff must be called"
assert commit_idx < merge_idx, (
f"git commit (idx={commit_idx}) must be BEFORE git merge --no-ff (idx={merge_idx})"
)
@patch("subprocess.run")
def test_merge_worktree_commit_failure_does_not_block_merge(self, mock_run, tmp_path):
"""Если git commit возвращает non-zero (нечего коммитить) — merge всё равно запускается."""
from core.worktree import merge_worktree
worktree_path = tmp_path / ".kin_worktrees" / "TASK-001-debugger"
worktree_path.mkdir(parents=True)
commands_seen = []
def run_side_effect(cmd, **kwargs):
commands_seen.append(list(cmd))
cmd_str = " ".join(cmd)
if "commit" in cmd_str:
return _fail(stderr="nothing to commit") # nothing to commit
return _ok()
mock_run.side_effect = run_side_effect
result = merge_worktree(str(worktree_path), str(tmp_path))
flat = [" ".join(c) for c in commands_seen]
merge_calls = [c for c in flat if "merge" in c and "--no-ff" in c]
assert len(merge_calls) > 0, (
"git merge --no-ff must still be called even when pre-commit fails. "
f"Commands seen: {flat}"
)
assert result["success"] is True