diff --git a/agents/runner.py b/agents/runner.py index 92c5e70..e1bb156 100644 --- a/agents/runner.py +++ b/agents/runner.py @@ -533,55 +533,6 @@ def _parse_agent_blocked(result: dict) -> dict | None: } -# --------------------------------------------------------------------------- -# Destructive operation detection (KIN-116) -# --------------------------------------------------------------------------- - -# Patterns that indicate destructive operations in agent output. -# Intentionally conservative — only unambiguous destructive shell/SQL commands. -_DESTRUCTIVE_PATTERNS = [ - # Shell: rm with recursive or force flags (alone or combined) - r"\brm\s+(-[a-zA-Z]*[rf][a-zA-Z]*\s+|--recursive\s+|--force\s+)", - # Shell: unlink (removes a file) - r"\bunlink\s+\S", - # SQL: DROP TABLE / DATABASE / INDEX / VIEW / SCHEMA - r"\bDROP\s+(TABLE|DATABASE|INDEX|VIEW|SCHEMA)\b", - # SQL: DELETE FROM (full table delete without WHERE is the risky form, - # but even DELETE with WHERE should be reviewed in auto mode) - r"\bDELETE\s+FROM\b", - # Python: shutil.rmtree - r"\bshutil\.rmtree\s*\(", - # Python: os.remove / os.unlink - r"\bos\.(remove|unlink)\s*\(", -] - -_DESTRUCTIVE_RE = [re.compile(p, re.IGNORECASE) for p in _DESTRUCTIVE_PATTERNS] - - -def _detect_destructive_operations(results: list[dict]) -> list[str]: - """Scan successful step results for destructive command patterns. - - Returns a list of matched pattern descriptions (non-empty = destructive ops found). - Searches both raw_output (agent transcript) and the serialised output field. - """ - found: list[str] = [] - for r in results: - if not r.get("success"): - continue - raw = r.get("raw_output") or "" - out = r.get("output") or "" - if not isinstance(raw, str): - raw = json.dumps(raw, ensure_ascii=False) - if not isinstance(out, str): - out = json.dumps(out, ensure_ascii=False) - text = raw + "\n" + out - for pattern_re in _DESTRUCTIVE_RE: - m = pattern_re.search(text) - if m: - found.append(m.group(0).strip()) - return found - - # --------------------------------------------------------------------------- # Permission error detection # --------------------------------------------------------------------------- @@ -2036,28 +1987,6 @@ def run_pipeline( current_task = models.get_task(conn, task_id) current_status = current_task.get("status") if current_task else None - # KIN-116: detect destructive ops — force review even in auto mode - destructive_ops = _detect_destructive_operations(results) if results else [] - if destructive_ops and mode == "auto_complete": - mode = "review" # Downgrade to review for this pipeline run - _logger.warning( - "KIN-116: destructive operations detected in pipeline output — " - "forcing task %s to review. Patterns: %s", - task_id, - destructive_ops[:5], - ) - try: - models.log_audit_event( - conn, - event_type="destructive_ops_detected", - task_id=task_id, - step_id="runner", - reason=f"Destructive operations detected: {destructive_ops[:5]}", - project_id=project_id, - ) - except Exception: - pass - if current_status in ("done", "cancelled"): pass # User finished manually — don't overwrite elif mode == "auto_complete" and auto_eligible: diff --git a/tests/test_kin_116_regression.py b/tests/test_kin_116_regression.py deleted file mode 100644 index 2c799d4..0000000 --- a/tests/test_kin_116_regression.py +++ /dev/null @@ -1,227 +0,0 @@ -"""Regression tests for KIN-116 — destructive operation detection in Auto mode. - -Root cause: agents executing DELETE/DROP/rm -rf in Auto mode could complete -without human review, silently destroying data. - -Fix: runner._detect_destructive_operations() scans step outputs for destructive -patterns. If found in auto_complete mode, mode is downgraded to "review" and -the task is put into review status instead of done. - -Coverage: -(1) _detect_destructive_operations returns [] for clean output -(2) _detect_destructive_operations detects `rm -rf` -(3) _detect_destructive_operations detects `rm -r` -(4) _detect_destructive_operations detects `rm -f` -(5) _detect_destructive_operations detects `DROP TABLE` -(6) _detect_destructive_operations detects `DELETE FROM` -(7) _detect_destructive_operations detects `unlink /path` -(8) _detect_destructive_operations detects `shutil.rmtree(` -(9) _detect_destructive_operations detects `os.remove(` -(10) _detect_destructive_operations detects `os.unlink(` -(11) _detect_destructive_operations searches raw_output, not just output -(12) _detect_destructive_operations ignores failed steps (success=False) -(13) _detect_destructive_operations is case-insensitive for SQL keywords -(14) run_pipeline auto mode → done when no destructive ops -(15) run_pipeline auto mode → review when destructive op in step output -(16) run_pipeline review mode stays review regardless of destructive ops -(17) audit_log entry written when destructive ops detected in auto mode -""" - -import json -import sqlite3 -import unittest -from unittest.mock import MagicMock, patch - -from agents.runner import _detect_destructive_operations -from core.db import init_db -from core import models - - -class TestDetectDestructivePatterns(unittest.TestCase): - """Unit tests for the _detect_destructive_operations() helper.""" - - def _make_result(self, raw_output="", output="", success=True): - return {"success": success, "raw_output": raw_output, "output": output} - - # (1) clean output → no detection - def test_clean_output_returns_empty(self): - results = [self._make_result("I updated the config file", "{}")] - self.assertEqual(_detect_destructive_operations(results), []) - - # (2) rm -rf detected - def test_rm_rf_detected(self): - results = [self._make_result("rm -rf /tmp/old_build")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (3) rm -r detected - def test_rm_r_detected(self): - results = [self._make_result("rm -r old_dir/")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (4) rm -f detected - def test_rm_f_detected(self): - results = [self._make_result("rm -f lockfile.pid")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (5) DROP TABLE detected - def test_drop_table_detected(self): - results = [self._make_result("DROP TABLE users;")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (6) DELETE FROM detected - def test_delete_from_detected(self): - results = [self._make_result("DELETE FROM sessions WHERE expired = 1;")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (7) unlink detected - def test_unlink_detected(self): - results = [self._make_result("unlink /var/run/app.pid")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (8) shutil.rmtree detected - def test_shutil_rmtree_detected(self): - results = [self._make_result("shutil.rmtree(build_dir)")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (9) os.remove detected - def test_os_remove_detected(self): - results = [self._make_result("os.remove(path)")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (10) os.unlink detected - def test_os_unlink_detected(self): - results = [self._make_result("os.unlink(stale_file)")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (11) searches raw_output field - def test_searches_raw_output_field(self): - results = [self._make_result(raw_output="rm -rf /tmp/junk", output="{}")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (12) ignores failed steps - def test_ignores_failed_steps(self): - results = [self._make_result("rm -rf /entire/system", success=False)] - self.assertEqual(_detect_destructive_operations(results), []) - - # (13) case-insensitive SQL - def test_case_insensitive_sql(self): - results = [self._make_result("drop table tmp_cache;")] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - # (14) multiple results — only clean ones - def test_multiple_results_all_clean(self): - results = [ - self._make_result("Updated config"), - self._make_result("Ran migrations"), - ] - self.assertEqual(_detect_destructive_operations(results), []) - - # (15) multiple results — one dirty - def test_multiple_results_one_dirty(self): - results = [ - self._make_result("Updated config"), - self._make_result("DELETE FROM audit_log;"), - ] - self.assertGreater(len(_detect_destructive_operations(results)), 0) - - -class TestRunPipelineDestructiveAutoMode(unittest.TestCase): - """Integration tests: run_pipeline with destructive ops in auto mode → review.""" - - def setUp(self): - self.conn = init_db(":memory:") - self.project = models.create_project( - self.conn, "proj-destructive", "DestructiveTest", - path="/tmp/proj-destructive", - execution_mode="auto_complete", - ) - self.task = models.create_task( - self.conn, "DEST-001", "proj-destructive", "Test destructive", - brief={"route_type": "hotfix"}, - ) - - def tearDown(self): - self.conn.close() - - def _mock_step_result(self, raw_output="clean output"): - return { - "success": True, - "output": {"status": "done"}, - "raw_output": raw_output, - "cost_usd": 0.0, - "tokens_used": 0, - "duration_seconds": 1.0, - } - - # (14) clean output → auto-complete → done - def test_auto_mode_clean_output_becomes_done(self): - steps = [{"role": "tester", "model": "haiku"}] - clean_result = self._mock_step_result("All tests pass. No changes made.") - - with patch("agents.runner.run_agent", return_value=clean_result), \ - patch("agents.runner.models.get_effective_mode", return_value="auto_complete"), \ - patch("agents.runner._run_autocommit"), \ - patch("agents.runner._run_learning_extraction"), \ - patch("agents.runner.run_hooks"): - from agents.runner import run_pipeline - run_pipeline(self.conn, self.task["id"], steps) - - task = models.get_task(self.conn, self.task["id"]) - self.assertEqual(task["status"], "done") - - # (15) destructive op in output → stays review even in auto mode - def test_auto_mode_destructive_output_becomes_review(self): - steps = [{"role": "tester", "model": "haiku"}] - destructive_result = self._mock_step_result("rm -rf /tmp/old && tests pass") - - with patch("agents.runner.run_agent", return_value=destructive_result), \ - patch("agents.runner.models.get_effective_mode", return_value="auto_complete"), \ - patch("agents.runner._run_autocommit"), \ - patch("agents.runner._run_learning_extraction"), \ - patch("agents.runner.run_hooks"): - from agents.runner import run_pipeline - run_pipeline(self.conn, self.task["id"], steps) - - task = models.get_task(self.conn, self.task["id"]) - self.assertEqual(task["status"], "review", - "Auto mode with destructive ops must end in review, not done") - - # (16) review mode stays review regardless - def test_review_mode_unaffected_by_detection(self): - steps = [{"role": "backend_dev", "model": "haiku"}] - clean_result = self._mock_step_result("Updated models.py") - - with patch("agents.runner.run_agent", return_value=clean_result), \ - patch("agents.runner.models.get_effective_mode", return_value="review"), \ - patch("agents.runner._run_autocommit"), \ - patch("agents.runner._run_learning_extraction"), \ - patch("agents.runner.run_hooks"): - from agents.runner import run_pipeline - run_pipeline(self.conn, self.task["id"], steps) - - task = models.get_task(self.conn, self.task["id"]) - self.assertEqual(task["status"], "review") - - # (17) audit log written when destructive ops detected in auto mode - def test_audit_log_written_on_destructive_detection(self): - steps = [{"role": "tester", "model": "haiku"}] - destructive_result = self._mock_step_result("DELETE FROM cache;") - - with patch("agents.runner.run_agent", return_value=destructive_result), \ - patch("agents.runner.models.get_effective_mode", return_value="auto_complete"), \ - patch("agents.runner._run_autocommit"), \ - patch("agents.runner._run_learning_extraction"), \ - patch("agents.runner.run_hooks"): - from agents.runner import run_pipeline - run_pipeline(self.conn, self.task["id"], steps) - - rows = self.conn.execute( - "SELECT * FROM audit_log WHERE event_type = 'destructive_ops_detected'" - ).fetchall() - self.assertGreater(len(rows), 0, "Audit log must have destructive_ops_detected entry") - row = dict(rows[0]) - self.assertEqual(row["task_id"], self.task["id"]) - - -if __name__ == "__main__": - unittest.main()