kin: auto-commit after pipeline

2026-03-17 19:30:15 +02:00 · 2026-03-17 19:30:15 +02:00 · 0e522e54a9
commit 0e522e54a9
parent e7c65c22e5
7 changed files with 363 additions and 65 deletions
--- a/agents/runner.py
+++ b/agents/runner.py
@ -774,6 +774,50 @@ def _is_test_failure(result: dict) -> bool:
 _AUTO_TEST_ROLES = {"backend_dev", "frontend_dev"}


+def _detect_test_command(project_path: str) -> str | None:
+    """Auto-detect test command by inspecting project files.
+
+    Candidates (in priority order):
+    1. make test   — Makefile exists and has a 'test' target
+    2. npm test    — package.json exists and has scripts.test
+    3. pytest      — pyproject.toml or setup.py exists
+    4. npx tsc --noEmit — tsconfig.json exists
+
+    Returns the first matching command, or None if no framework is detected.
+    """
+    path = Path(project_path)
+
+    # 1. make test
+    makefile = path / "Makefile"
+    if makefile.is_file():
+        try:
+            content = makefile.read_text(errors="ignore")
+            if re.search(r"^test\s*:", content, re.MULTILINE):
+                return "make test"
+        except OSError:
+            pass
+
+    # 2. npm test
+    pkg_json = path / "package.json"
+    if pkg_json.is_file():
+        try:
+            pkg = json.loads(pkg_json.read_text())
+            if pkg.get("scripts", {}).get("test"):
+                return "npm test"
+        except (json.JSONDecodeError, OSError):
+            pass
+
+    # 3. pytest
+    if (path / "pyproject.toml").is_file() or (path / "setup.py").is_file():
+        return "pytest"
+
+    # 4. npx tsc --noEmit
+    if (path / "tsconfig.json").is_file():
+        return "npx tsc --noEmit"
+
+    return None
+
+
 def _run_project_tests(project_path: str, test_command: str = 'make test', timeout: int = 120) -> dict:
    """Run test_command in project_path. Returns {success, output, returncode}.

@ -1748,8 +1792,9 @@ def run_pipeline(
                previous_output = json.dumps(previous_output, ensure_ascii=False)
            continue

-        # Project-level auto-test: run `make test` after backend_dev/frontend_dev steps.
+        # Project-level auto-test: run tests after backend_dev/frontend_dev steps.
        # Enabled per project via auto_test_enabled flag (opt-in).
+        # test_command priority: project.test_command (explicit) → auto-detect → skip.
        # On failure, loop fixer up to KIN_AUTO_TEST_MAX_ATTEMPTS times, then block.
        if (
            not dry_run
@ -1759,57 +1804,73 @@ def run_pipeline(
            and project_for_wt.get("auto_test_enabled")
            and project_for_wt.get("path")
        ):
-            max_auto_test_attempts = int(os.environ.get("KIN_AUTO_TEST_MAX_ATTEMPTS") or 3)
            p_path_str = str(Path(project_for_wt["path"]).expanduser())
-            p_test_cmd = project_for_wt.get("test_command") or "make test"
-            test_run = _run_project_tests(p_path_str, p_test_cmd)
-            results.append({"role": "_auto_test", "success": test_run["success"],
-                             "output": test_run["output"], "_project_test": True})
-            auto_test_attempt = 0
-            while not test_run["success"] and auto_test_attempt < max_auto_test_attempts:
-                auto_test_attempt += 1
-                fix_context = (
-                    f"Automated project test run ({p_test_cmd}) failed after your changes.\n"
-                    f"Test output:\n{test_run['output'][:4000]}\n"
-                    f"Fix the failing tests. Do NOT modify test files."
-                )
-                fix_result = run_agent(
-                    conn, role, task_id, project_id,
-                    model=model,
-                    previous_output=fix_context,
-                    dry_run=False,
-                    allow_write=allow_write,
-                    noninteractive=noninteractive,
-                )
-                total_cost += fix_result.get("cost_usd") or 0
-                total_tokens += fix_result.get("tokens_used") or 0
-                total_duration += fix_result.get("duration_seconds") or 0
-                results.append({**fix_result, "_auto_test_fix_attempt": auto_test_attempt})
+            p_test_cmd_override = project_for_wt.get("test_command")
+            if p_test_cmd_override:
+                p_test_cmd = p_test_cmd_override
+            else:
+                p_test_cmd = _detect_test_command(p_path_str)
+
+            if p_test_cmd is None:
+                # No test framework detected — skip without blocking pipeline
+                _logger.info("auto-test: no test framework detected in %s, skipping", p_path_str)
+                results.append({
+                    "role": "_auto_test",
+                    "success": True,
+                    "output": "no test framework detected",
+                    "_project_test": True,
+                    "_skipped": True,
+                })
+            else:
+                max_auto_test_attempts = int(os.environ.get("KIN_AUTO_TEST_MAX_ATTEMPTS") or 3)
                test_run = _run_project_tests(p_path_str, p_test_cmd)
                results.append({"role": "_auto_test", "success": test_run["success"],
-                                 "output": test_run["output"], "_project_test": True,
-                                 "_attempt": auto_test_attempt})
-            if not test_run["success"]:
-                block_reason = (
-                    f"Auto-test ({p_test_cmd}) failed after {auto_test_attempt} fix attempt(s). "
-                    f"Last output: {test_run['output'][:500]}"
-                )
-                models.update_task(conn, task_id, status="blocked", blocked_reason=block_reason)
-                if pipeline:
-                    models.update_pipeline(conn, pipeline["id"], status="failed",
-                                           total_cost_usd=total_cost,
-                                           total_tokens=total_tokens,
-                                           total_duration_seconds=total_duration)
-                return {
-                    "success": False,
-                    "error": block_reason,
-                    "steps_completed": i,
-                    "results": results,
-                    "total_cost_usd": total_cost,
-                    "total_tokens": total_tokens,
-                    "total_duration_seconds": total_duration,
-                    "pipeline_id": pipeline["id"] if pipeline else None,
-                }
+                                 "output": test_run["output"], "_project_test": True})
+                auto_test_attempt = 0
+                while not test_run["success"] and auto_test_attempt < max_auto_test_attempts:
+                    auto_test_attempt += 1
+                    fix_context = (
+                        f"Automated project test run ({p_test_cmd}) failed after your changes.\n"
+                        f"Test output:\n{test_run['output'][:4000]}\n"
+                        f"Fix the failing tests. Do NOT modify test files."
+                    )
+                    fix_result = run_agent(
+                        conn, role, task_id, project_id,
+                        model=model,
+                        previous_output=fix_context,
+                        dry_run=False,
+                        allow_write=allow_write,
+                        noninteractive=noninteractive,
+                    )
+                    total_cost += fix_result.get("cost_usd") or 0
+                    total_tokens += fix_result.get("tokens_used") or 0
+                    total_duration += fix_result.get("duration_seconds") or 0
+                    results.append({**fix_result, "_auto_test_fix_attempt": auto_test_attempt})
+                    test_run = _run_project_tests(p_path_str, p_test_cmd)
+                    results.append({"role": "_auto_test", "success": test_run["success"],
+                                     "output": test_run["output"], "_project_test": True,
+                                     "_attempt": auto_test_attempt})
+                if not test_run["success"]:
+                    block_reason = (
+                        f"Auto-test ({p_test_cmd}) failed after {auto_test_attempt} fix attempt(s). "
+                        f"Last output: {test_run['output'][:500]}"
+                    )
+                    models.update_task(conn, task_id, status="blocked", blocked_reason=block_reason)
+                    if pipeline:
+                        models.update_pipeline(conn, pipeline["id"], status="failed",
+                                               total_cost_usd=total_cost,
+                                               total_tokens=total_tokens,
+                                               total_duration_seconds=total_duration)
+                    return {
+                        "success": False,
+                        "error": block_reason,
+                        "steps_completed": i,
+                        "results": results,
+                        "total_cost_usd": total_cost,
+                        "total_tokens": total_tokens,
+                        "total_duration_seconds": total_duration,
+                        "pipeline_id": pipeline["id"] if pipeline else None,
+                    }

        # Auto-test loop: if tester step has auto_fix=true and tests failed,
        # call fix_role agent and re-run tester up to max_attempts times.