From 885e8fb388bd587bf491aa1381b1d57f07bf7ee2 Mon Sep 17 00:00:00 2001
From: Gros Frumos <grosfrumos@macmini.local>
Date: Tue, 17 Mar 2026 22:18:19 +0200
Subject: [PATCH] kin: auto-commit after pipeline

---
 agents/runner.py                 |  22 +++
 tests/test_kin_116_regression.py | 227 +++++++++++++++++++++++++++++++
 2 files changed, 249 insertions(+)
 create mode 100644 tests/test_kin_116_regression.py

diff --git a/agents/runner.py b/agents/runner.py
index 275d751..92c5e70 100644
--- a/agents/runner.py
+++ b/agents/runner.py
@@ -2036,6 +2036,28 @@ def run_pipeline(
         current_task = models.get_task(conn, task_id)
         current_status = current_task.get("status") if current_task else None
 
+        # KIN-116: detect destructive ops — force review even in auto mode
+        destructive_ops = _detect_destructive_operations(results) if results else []
+        if destructive_ops and mode == "auto_complete":
+            mode = "review"  # Downgrade to review for this pipeline run
+            _logger.warning(
+                "KIN-116: destructive operations detected in pipeline output — "
+                "forcing task %s to review. Patterns: %s",
+                task_id,
+                destructive_ops[:5],
+            )
+            try:
+                models.log_audit_event(
+                    conn,
+                    event_type="destructive_ops_detected",
+                    task_id=task_id,
+                    step_id="runner",
+                    reason=f"Destructive operations detected: {destructive_ops[:5]}",
+                    project_id=project_id,
+                )
+            except Exception:
+                pass
+
         if current_status in ("done", "cancelled"):
             pass  # User finished manually — don't overwrite
         elif mode == "auto_complete" and auto_eligible:
diff --git a/tests/test_kin_116_regression.py b/tests/test_kin_116_regression.py
new file mode 100644
index 0000000..2c799d4
--- /dev/null
+++ b/tests/test_kin_116_regression.py
@@ -0,0 +1,227 @@
+"""Regression tests for KIN-116 — destructive operation detection in Auto mode.
+
+Root cause: agents executing DELETE/DROP/rm -rf in Auto mode could complete
+without human review, silently destroying data.
+
+Fix: runner._detect_destructive_operations() scans step outputs for destructive
+patterns. If found in auto_complete mode, mode is downgraded to "review" and
+the task is put into review status instead of done.
+
+Coverage:
+(1)  _detect_destructive_operations returns [] for clean output
+(2)  _detect_destructive_operations detects `rm -rf`
+(3)  _detect_destructive_operations detects `rm -r`
+(4)  _detect_destructive_operations detects `rm -f`
+(5)  _detect_destructive_operations detects `DROP TABLE`
+(6)  _detect_destructive_operations detects `DELETE FROM`
+(7)  _detect_destructive_operations detects `unlink /path`
+(8)  _detect_destructive_operations detects `shutil.rmtree(`
+(9)  _detect_destructive_operations detects `os.remove(`
+(10) _detect_destructive_operations detects `os.unlink(`
+(11) _detect_destructive_operations searches raw_output, not just output
+(12) _detect_destructive_operations ignores failed steps (success=False)
+(13) _detect_destructive_operations is case-insensitive for SQL keywords
+(14) run_pipeline auto mode → done when no destructive ops
+(15) run_pipeline auto mode → review when destructive op in step output
+(16) run_pipeline review mode stays review regardless of destructive ops
+(17) audit_log entry written when destructive ops detected in auto mode
+"""
+
+import json
+import sqlite3
+import unittest
+from unittest.mock import MagicMock, patch
+
+from agents.runner import _detect_destructive_operations
+from core.db import init_db
+from core import models
+
+
+class TestDetectDestructivePatterns(unittest.TestCase):
+    """Unit tests for the _detect_destructive_operations() helper."""
+
+    def _make_result(self, raw_output="", output="", success=True):
+        return {"success": success, "raw_output": raw_output, "output": output}
+
+    # (1) clean output → no detection
+    def test_clean_output_returns_empty(self):
+        results = [self._make_result("I updated the config file", "{}")]
+        self.assertEqual(_detect_destructive_operations(results), [])
+
+    # (2) rm -rf detected
+    def test_rm_rf_detected(self):
+        results = [self._make_result("rm -rf /tmp/old_build")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (3) rm -r detected
+    def test_rm_r_detected(self):
+        results = [self._make_result("rm -r old_dir/")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (4) rm -f detected
+    def test_rm_f_detected(self):
+        results = [self._make_result("rm -f lockfile.pid")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (5) DROP TABLE detected
+    def test_drop_table_detected(self):
+        results = [self._make_result("DROP TABLE users;")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (6) DELETE FROM detected
+    def test_delete_from_detected(self):
+        results = [self._make_result("DELETE FROM sessions WHERE expired = 1;")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (7) unlink detected
+    def test_unlink_detected(self):
+        results = [self._make_result("unlink /var/run/app.pid")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (8) shutil.rmtree detected
+    def test_shutil_rmtree_detected(self):
+        results = [self._make_result("shutil.rmtree(build_dir)")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (9) os.remove detected
+    def test_os_remove_detected(self):
+        results = [self._make_result("os.remove(path)")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (10) os.unlink detected
+    def test_os_unlink_detected(self):
+        results = [self._make_result("os.unlink(stale_file)")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (11) searches raw_output field
+    def test_searches_raw_output_field(self):
+        results = [self._make_result(raw_output="rm -rf /tmp/junk", output="{}")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (12) ignores failed steps
+    def test_ignores_failed_steps(self):
+        results = [self._make_result("rm -rf /entire/system", success=False)]
+        self.assertEqual(_detect_destructive_operations(results), [])
+
+    # (13) case-insensitive SQL
+    def test_case_insensitive_sql(self):
+        results = [self._make_result("drop table tmp_cache;")]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+    # (14) multiple results — only clean ones
+    def test_multiple_results_all_clean(self):
+        results = [
+            self._make_result("Updated config"),
+            self._make_result("Ran migrations"),
+        ]
+        self.assertEqual(_detect_destructive_operations(results), [])
+
+    # (15) multiple results — one dirty
+    def test_multiple_results_one_dirty(self):
+        results = [
+            self._make_result("Updated config"),
+            self._make_result("DELETE FROM audit_log;"),
+        ]
+        self.assertGreater(len(_detect_destructive_operations(results)), 0)
+
+
+class TestRunPipelineDestructiveAutoMode(unittest.TestCase):
+    """Integration tests: run_pipeline with destructive ops in auto mode → review."""
+
+    def setUp(self):
+        self.conn = init_db(":memory:")
+        self.project = models.create_project(
+            self.conn, "proj-destructive", "DestructiveTest",
+            path="/tmp/proj-destructive",
+            execution_mode="auto_complete",
+        )
+        self.task = models.create_task(
+            self.conn, "DEST-001", "proj-destructive", "Test destructive",
+            brief={"route_type": "hotfix"},
+        )
+
+    def tearDown(self):
+        self.conn.close()
+
+    def _mock_step_result(self, raw_output="clean output"):
+        return {
+            "success": True,
+            "output": {"status": "done"},
+            "raw_output": raw_output,
+            "cost_usd": 0.0,
+            "tokens_used": 0,
+            "duration_seconds": 1.0,
+        }
+
+    # (14) clean output → auto-complete → done
+    def test_auto_mode_clean_output_becomes_done(self):
+        steps = [{"role": "tester", "model": "haiku"}]
+        clean_result = self._mock_step_result("All tests pass. No changes made.")
+
+        with patch("agents.runner.run_agent", return_value=clean_result), \
+             patch("agents.runner.models.get_effective_mode", return_value="auto_complete"), \
+             patch("agents.runner._run_autocommit"), \
+             patch("agents.runner._run_learning_extraction"), \
+             patch("agents.runner.run_hooks"):
+            from agents.runner import run_pipeline
+            run_pipeline(self.conn, self.task["id"], steps)
+
+        task = models.get_task(self.conn, self.task["id"])
+        self.assertEqual(task["status"], "done")
+
+    # (15) destructive op in output → stays review even in auto mode
+    def test_auto_mode_destructive_output_becomes_review(self):
+        steps = [{"role": "tester", "model": "haiku"}]
+        destructive_result = self._mock_step_result("rm -rf /tmp/old && tests pass")
+
+        with patch("agents.runner.run_agent", return_value=destructive_result), \
+             patch("agents.runner.models.get_effective_mode", return_value="auto_complete"), \
+             patch("agents.runner._run_autocommit"), \
+             patch("agents.runner._run_learning_extraction"), \
+             patch("agents.runner.run_hooks"):
+            from agents.runner import run_pipeline
+            run_pipeline(self.conn, self.task["id"], steps)
+
+        task = models.get_task(self.conn, self.task["id"])
+        self.assertEqual(task["status"], "review",
+                         "Auto mode with destructive ops must end in review, not done")
+
+    # (16) review mode stays review regardless
+    def test_review_mode_unaffected_by_detection(self):
+        steps = [{"role": "backend_dev", "model": "haiku"}]
+        clean_result = self._mock_step_result("Updated models.py")
+
+        with patch("agents.runner.run_agent", return_value=clean_result), \
+             patch("agents.runner.models.get_effective_mode", return_value="review"), \
+             patch("agents.runner._run_autocommit"), \
+             patch("agents.runner._run_learning_extraction"), \
+             patch("agents.runner.run_hooks"):
+            from agents.runner import run_pipeline
+            run_pipeline(self.conn, self.task["id"], steps)
+
+        task = models.get_task(self.conn, self.task["id"])
+        self.assertEqual(task["status"], "review")
+
+    # (17) audit log written when destructive ops detected in auto mode
+    def test_audit_log_written_on_destructive_detection(self):
+        steps = [{"role": "tester", "model": "haiku"}]
+        destructive_result = self._mock_step_result("DELETE FROM cache;")
+
+        with patch("agents.runner.run_agent", return_value=destructive_result), \
+             patch("agents.runner.models.get_effective_mode", return_value="auto_complete"), \
+             patch("agents.runner._run_autocommit"), \
+             patch("agents.runner._run_learning_extraction"), \
+             patch("agents.runner.run_hooks"):
+            from agents.runner import run_pipeline
+            run_pipeline(self.conn, self.task["id"], steps)
+
+        rows = self.conn.execute(
+            "SELECT * FROM audit_log WHERE event_type = 'destructive_ops_detected'"
+        ).fetchall()
+        self.assertGreater(len(rows), 0, "Audit log must have destructive_ops_detected entry")
+        row = dict(rows[0])
+        self.assertEqual(row["task_id"], self.task["id"])
+
+
+if __name__ == "__main__":
+    unittest.main()