kin: auto-commit after pipeline

This commit is contained in:
Gros Frumos 2026-03-17 19:30:15 +02:00
parent e7c65c22e5
commit 0e522e54a9
7 changed files with 363 additions and 65 deletions

View file

@ -774,6 +774,50 @@ def _is_test_failure(result: dict) -> bool:
_AUTO_TEST_ROLES = {"backend_dev", "frontend_dev"}
def _detect_test_command(project_path: str) -> str | None:
"""Auto-detect test command by inspecting project files.
Candidates (in priority order):
1. make test Makefile exists and has a 'test' target
2. npm test package.json exists and has scripts.test
3. pytest pyproject.toml or setup.py exists
4. npx tsc --noEmit tsconfig.json exists
Returns the first matching command, or None if no framework is detected.
"""
path = Path(project_path)
# 1. make test
makefile = path / "Makefile"
if makefile.is_file():
try:
content = makefile.read_text(errors="ignore")
if re.search(r"^test\s*:", content, re.MULTILINE):
return "make test"
except OSError:
pass
# 2. npm test
pkg_json = path / "package.json"
if pkg_json.is_file():
try:
pkg = json.loads(pkg_json.read_text())
if pkg.get("scripts", {}).get("test"):
return "npm test"
except (json.JSONDecodeError, OSError):
pass
# 3. pytest
if (path / "pyproject.toml").is_file() or (path / "setup.py").is_file():
return "pytest"
# 4. npx tsc --noEmit
if (path / "tsconfig.json").is_file():
return "npx tsc --noEmit"
return None
def _run_project_tests(project_path: str, test_command: str = 'make test', timeout: int = 120) -> dict:
"""Run test_command in project_path. Returns {success, output, returncode}.
@ -1748,8 +1792,9 @@ def run_pipeline(
previous_output = json.dumps(previous_output, ensure_ascii=False)
continue
# Project-level auto-test: run `make test` after backend_dev/frontend_dev steps.
# Project-level auto-test: run tests after backend_dev/frontend_dev steps.
# Enabled per project via auto_test_enabled flag (opt-in).
# test_command priority: project.test_command (explicit) → auto-detect → skip.
# On failure, loop fixer up to KIN_AUTO_TEST_MAX_ATTEMPTS times, then block.
if (
not dry_run
@ -1759,57 +1804,73 @@ def run_pipeline(
and project_for_wt.get("auto_test_enabled")
and project_for_wt.get("path")
):
max_auto_test_attempts = int(os.environ.get("KIN_AUTO_TEST_MAX_ATTEMPTS") or 3)
p_path_str = str(Path(project_for_wt["path"]).expanduser())
p_test_cmd = project_for_wt.get("test_command") or "make test"
test_run = _run_project_tests(p_path_str, p_test_cmd)
results.append({"role": "_auto_test", "success": test_run["success"],
"output": test_run["output"], "_project_test": True})
auto_test_attempt = 0
while not test_run["success"] and auto_test_attempt < max_auto_test_attempts:
auto_test_attempt += 1
fix_context = (
f"Automated project test run ({p_test_cmd}) failed after your changes.\n"
f"Test output:\n{test_run['output'][:4000]}\n"
f"Fix the failing tests. Do NOT modify test files."
)
fix_result = run_agent(
conn, role, task_id, project_id,
model=model,
previous_output=fix_context,
dry_run=False,
allow_write=allow_write,
noninteractive=noninteractive,
)
total_cost += fix_result.get("cost_usd") or 0
total_tokens += fix_result.get("tokens_used") or 0
total_duration += fix_result.get("duration_seconds") or 0
results.append({**fix_result, "_auto_test_fix_attempt": auto_test_attempt})
p_test_cmd_override = project_for_wt.get("test_command")
if p_test_cmd_override:
p_test_cmd = p_test_cmd_override
else:
p_test_cmd = _detect_test_command(p_path_str)
if p_test_cmd is None:
# No test framework detected — skip without blocking pipeline
_logger.info("auto-test: no test framework detected in %s, skipping", p_path_str)
results.append({
"role": "_auto_test",
"success": True,
"output": "no test framework detected",
"_project_test": True,
"_skipped": True,
})
else:
max_auto_test_attempts = int(os.environ.get("KIN_AUTO_TEST_MAX_ATTEMPTS") or 3)
test_run = _run_project_tests(p_path_str, p_test_cmd)
results.append({"role": "_auto_test", "success": test_run["success"],
"output": test_run["output"], "_project_test": True,
"_attempt": auto_test_attempt})
if not test_run["success"]:
block_reason = (
f"Auto-test ({p_test_cmd}) failed after {auto_test_attempt} fix attempt(s). "
f"Last output: {test_run['output'][:500]}"
)
models.update_task(conn, task_id, status="blocked", blocked_reason=block_reason)
if pipeline:
models.update_pipeline(conn, pipeline["id"], status="failed",
total_cost_usd=total_cost,
total_tokens=total_tokens,
total_duration_seconds=total_duration)
return {
"success": False,
"error": block_reason,
"steps_completed": i,
"results": results,
"total_cost_usd": total_cost,
"total_tokens": total_tokens,
"total_duration_seconds": total_duration,
"pipeline_id": pipeline["id"] if pipeline else None,
}
"output": test_run["output"], "_project_test": True})
auto_test_attempt = 0
while not test_run["success"] and auto_test_attempt < max_auto_test_attempts:
auto_test_attempt += 1
fix_context = (
f"Automated project test run ({p_test_cmd}) failed after your changes.\n"
f"Test output:\n{test_run['output'][:4000]}\n"
f"Fix the failing tests. Do NOT modify test files."
)
fix_result = run_agent(
conn, role, task_id, project_id,
model=model,
previous_output=fix_context,
dry_run=False,
allow_write=allow_write,
noninteractive=noninteractive,
)
total_cost += fix_result.get("cost_usd") or 0
total_tokens += fix_result.get("tokens_used") or 0
total_duration += fix_result.get("duration_seconds") or 0
results.append({**fix_result, "_auto_test_fix_attempt": auto_test_attempt})
test_run = _run_project_tests(p_path_str, p_test_cmd)
results.append({"role": "_auto_test", "success": test_run["success"],
"output": test_run["output"], "_project_test": True,
"_attempt": auto_test_attempt})
if not test_run["success"]:
block_reason = (
f"Auto-test ({p_test_cmd}) failed after {auto_test_attempt} fix attempt(s). "
f"Last output: {test_run['output'][:500]}"
)
models.update_task(conn, task_id, status="blocked", blocked_reason=block_reason)
if pipeline:
models.update_pipeline(conn, pipeline["id"], status="failed",
total_cost_usd=total_cost,
total_tokens=total_tokens,
total_duration_seconds=total_duration)
return {
"success": False,
"error": block_reason,
"steps_completed": i,
"results": results,
"total_cost_usd": total_cost,
"total_tokens": total_tokens,
"total_duration_seconds": total_duration,
"pipeline_id": pipeline["id"] if pipeline else None,
}
# Auto-test loop: if tester step has auto_fix=true and tests failed,
# call fix_role agent and re-run tester up to max_attempts times.