kin: auto-commit after pipeline
This commit is contained in:
parent
a9d3086139
commit
7ee520e18e
5 changed files with 597 additions and 26 deletions
|
|
@ -6,30 +6,30 @@ If the PID is dead (ProcessLookupError / ESRCH), mark pipeline as failed
|
||||||
and task as blocked with a descriptive reason.
|
and task as blocked with a descriptive reason.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import errno
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from core import models
|
from core import models
|
||||||
from core.db import init_db
|
from core.db import get_connection, init_db
|
||||||
|
|
||||||
_logger = logging.getLogger("kin.watchdog")
|
_logger = logging.getLogger("kin.watchdog")
|
||||||
|
|
||||||
|
_watchdog_started = False
|
||||||
|
|
||||||
|
|
||||||
def _check_dead_pipelines(db_path: Path) -> None:
|
def _check_dead_pipelines(db_path: Path) -> None:
|
||||||
"""Single watchdog pass: open a fresh connection, scan running pipelines."""
|
"""Single watchdog pass: open a fresh connection, scan running pipelines."""
|
||||||
|
conn = get_connection(db_path)
|
||||||
try:
|
try:
|
||||||
conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
try:
|
try:
|
||||||
running = models.get_running_pipelines_with_pid(conn)
|
running = models.get_running_pipelines_with_pid(conn)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
# Table may not exist yet on very first startup
|
# Table may not exist yet on very first startup
|
||||||
_logger.debug("Watchdog: could not query pipelines (%s)", exc)
|
_logger.debug("Watchdog: could not query pipelines (%s)", exc)
|
||||||
conn.close()
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for row in running:
|
for row in running:
|
||||||
|
|
@ -38,23 +38,23 @@ def _check_dead_pipelines(db_path: Path) -> None:
|
||||||
task_id = row["task_id"]
|
task_id = row["task_id"]
|
||||||
try:
|
try:
|
||||||
os.kill(pid, 0) # signal 0 = existence check
|
os.kill(pid, 0) # signal 0 = existence check
|
||||||
except ProcessLookupError:
|
except OSError as e:
|
||||||
reason = f"Process died unexpectedly (PID {pid})"
|
if e.errno == errno.ESRCH:
|
||||||
_logger.warning(
|
reason = f"Process died unexpectedly (PID {pid})"
|
||||||
"Watchdog: pipeline %s PID %s is dead — marking blocked (%s)",
|
_logger.warning(
|
||||||
pipeline_id, pid, task_id,
|
"Watchdog: pipeline %s PID %s is dead — marking blocked (%s)",
|
||||||
)
|
pipeline_id, pid, task_id,
|
||||||
try:
|
)
|
||||||
models.update_pipeline(conn, pipeline_id, status="failed")
|
try:
|
||||||
models.update_task(conn, task_id, status="blocked", blocked_reason=reason)
|
models.update_pipeline(conn, pipeline_id, status="failed")
|
||||||
except Exception as upd_exc:
|
models.update_task(conn, task_id, status="blocked", blocked_reason=reason)
|
||||||
_logger.error("Watchdog: failed to update pipeline/task: %s", upd_exc)
|
except Exception as upd_exc:
|
||||||
except PermissionError:
|
_logger.error("Watchdog: failed to update pipeline/task: %s", upd_exc)
|
||||||
# Process exists but we can't signal it (e.g. different user) — skip
|
# else: PermissionError (EACCES) — process exists but we can't signal it, skip
|
||||||
pass
|
|
||||||
conn.close()
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
_logger.error("Watchdog pass failed: %s", exc)
|
_logger.error("Watchdog pass failed: %s", exc)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
def _watchdog_loop(db_path: Path, interval: int) -> None:
|
def _watchdog_loop(db_path: Path, interval: int) -> None:
|
||||||
|
|
@ -67,6 +67,10 @@ def _watchdog_loop(db_path: Path, interval: int) -> None:
|
||||||
|
|
||||||
def start_watchdog(db_path: Path, interval: int = 30) -> None:
|
def start_watchdog(db_path: Path, interval: int = 30) -> None:
|
||||||
"""Start the background watchdog thread (daemon=True, so it dies with the process)."""
|
"""Start the background watchdog thread (daemon=True, so it dies with the process)."""
|
||||||
|
global _watchdog_started
|
||||||
|
if _watchdog_started:
|
||||||
|
return
|
||||||
|
_watchdog_started = True
|
||||||
t = threading.Thread(
|
t = threading.Thread(
|
||||||
target=_watchdog_loop,
|
target=_watchdog_loop,
|
||||||
args=(db_path, interval),
|
args=(db_path, interval),
|
||||||
|
|
|
||||||
365
tests/test_kin_arch_017_regression.py
Normal file
365
tests/test_kin_arch_017_regression.py
Normal file
|
|
@ -0,0 +1,365 @@
|
||||||
|
"""
|
||||||
|
Regression tests for KIN-ARCH-017:
|
||||||
|
Более строгая проверка отсутствия двойного создания pipeline в _execute_department_head_step.
|
||||||
|
|
||||||
|
Решение #354: тест 'WHERE route_type=X AND count=1' НЕ обнаруживает дубликаты
|
||||||
|
с ДРУГИМ route_type. Если orphaned pipeline создаётся с route_type='dept_feature'
|
||||||
|
(а не 'dept_sub'), старый тест пропускает дубликат и ложно-зелёный.
|
||||||
|
|
||||||
|
Исправление (KIN-ARCH-017): проверять ОБЩИЙ count без фильтра по route_type,
|
||||||
|
сравнивая количество pipeline ДО и ПОСЛЕ вызова _execute_department_head_step.
|
||||||
|
Ровно один новый pipeline должен создаваться при любом route_type дубликата.
|
||||||
|
|
||||||
|
Convention #304: имена тестов описывают сломанное поведение.
|
||||||
|
Convention #305: отдельный класс на каждый уровень цепочки.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
from core.db import init_db
|
||||||
|
from core import models
|
||||||
|
from agents.runner import _execute_department_head_step
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fixtures
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def conn():
|
||||||
|
c = init_db(":memory:")
|
||||||
|
models.create_project(c, "proj", "TestProject", "~/projects/test",
|
||||||
|
tech_stack=["python", "vue3"])
|
||||||
|
models.create_task(c, "PROJ-001", "proj", "Full-stack feature",
|
||||||
|
brief={"route_type": "dept_feature"})
|
||||||
|
yield c
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_claude_success(output_data=None):
|
||||||
|
mock = MagicMock()
|
||||||
|
mock.stdout = json.dumps(output_data or {"result": "ok"})
|
||||||
|
mock.stderr = ""
|
||||||
|
mock.returncode = 0
|
||||||
|
return mock
|
||||||
|
|
||||||
|
|
||||||
|
def _dept_head_result_with_workers(workers=None):
|
||||||
|
"""Валидный вывод dept head с sub_pipeline."""
|
||||||
|
return {
|
||||||
|
"raw_output": json.dumps({
|
||||||
|
"status": "done",
|
||||||
|
"sub_pipeline": workers or [
|
||||||
|
{"role": "backend_dev", "brief": "implement endpoint"},
|
||||||
|
],
|
||||||
|
"artifacts": {"files_changed": ["api.py"]},
|
||||||
|
"handoff_notes": "Backend API done",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _count_all_pipelines(conn) -> int:
|
||||||
|
"""Возвращает общее количество pipelines в БД без фильтра по route_type."""
|
||||||
|
row = conn.execute("SELECT COUNT(*) FROM pipelines").fetchone()
|
||||||
|
return row[0]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Уровень 1: Ровно ОДИН pipeline создаётся — проверка ОБЩЕГО count (решение #354)
|
||||||
|
#
|
||||||
|
# Слабость старого теста KIN-ARCH-012: WHERE route_type='dept_sub' AND count=1
|
||||||
|
# не ловит orphaned pipeline с другим route_type (например, 'dept_feature').
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestTotalPipelineCountAfterDeptHeadStep:
|
||||||
|
"""Уровень 1: Общий count всех pipeline до/после — ровно +1 запись в БД."""
|
||||||
|
|
||||||
|
@patch("agents.runner.check_claude_auth")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_exactly_one_new_pipeline_created_total_without_route_type_filter(
|
||||||
|
self, mock_run, mock_auth, conn
|
||||||
|
):
|
||||||
|
"""Решение #354: проверяем ОБЩИЙ count без фильтра по route_type.
|
||||||
|
|
||||||
|
Сломанное поведение (до KIN-ARCH-012/017): явный create_pipeline() в
|
||||||
|
_execute_department_head_step создавал orphaned pipeline (с любым route_type),
|
||||||
|
затем run_pipeline() создавал второй. Итого +2 pipeline за один вызов.
|
||||||
|
|
||||||
|
Тест 'WHERE route_type=dept_sub AND count=1' пропускал дубликат, если orphaned
|
||||||
|
pipeline имел другой route_type (например, dept_feature).
|
||||||
|
|
||||||
|
Fixed: сравниваем total_count_after - total_count_before == 1, без фильтра.
|
||||||
|
"""
|
||||||
|
mock_auth.return_value = None
|
||||||
|
mock_run.return_value = _mock_claude_success()
|
||||||
|
|
||||||
|
parent_pipeline = models.create_pipeline(
|
||||||
|
conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
count_before = _count_all_pipelines(conn)
|
||||||
|
assert count_before == 1, "До вызова должен быть только parent pipeline"
|
||||||
|
|
||||||
|
_execute_department_head_step(
|
||||||
|
conn, "PROJ-001", "proj",
|
||||||
|
parent_pipeline_id=parent_pipeline["id"],
|
||||||
|
step={"role": "backend_head", "brief": "plan backend"},
|
||||||
|
dept_head_result=_dept_head_result_with_workers(),
|
||||||
|
)
|
||||||
|
|
||||||
|
count_after = _count_all_pipelines(conn)
|
||||||
|
new_pipelines_count = count_after - count_before
|
||||||
|
|
||||||
|
assert new_pipelines_count == 1, (
|
||||||
|
f"Ожидалось создание ровно 1 нового pipeline, создано: {new_pipelines_count}. "
|
||||||
|
"Признак двойного создания (KIN-ARCH-012/017): если новых pipeline > 1, "
|
||||||
|
"значит _execute_department_head_step снова вызывает явный create_pipeline() "
|
||||||
|
"до run_pipeline(). Этот тест ловит дубликаты с ЛЮБЫМ route_type — "
|
||||||
|
"в отличие от 'WHERE route_type=dept_sub' из решения #354."
|
||||||
|
)
|
||||||
|
|
||||||
|
@patch("agents.runner.check_claude_auth")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_no_orphaned_pipeline_with_different_route_type_created(
|
||||||
|
self, mock_run, mock_auth, conn
|
||||||
|
):
|
||||||
|
"""Нет orphaned pipeline с route_type != 'dept_sub' после вызова.
|
||||||
|
|
||||||
|
Решение #354: если старый баг возвращается, orphaned pipeline может
|
||||||
|
создаваться с dept_feature или custom route_type. Явно проверяем:
|
||||||
|
все новые pipeline (кроме parent) должны иметь route_type='dept_sub'.
|
||||||
|
"""
|
||||||
|
mock_auth.return_value = None
|
||||||
|
mock_run.return_value = _mock_claude_success()
|
||||||
|
|
||||||
|
parent_pipeline = models.create_pipeline(
|
||||||
|
conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
_execute_department_head_step(
|
||||||
|
conn, "PROJ-001", "proj",
|
||||||
|
parent_pipeline_id=parent_pipeline["id"],
|
||||||
|
step={"role": "backend_head", "brief": "plan backend"},
|
||||||
|
dept_head_result=_dept_head_result_with_workers(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Все pipeline кроме parent должны иметь route_type='dept_sub'
|
||||||
|
child_pipelines = conn.execute(
|
||||||
|
"SELECT id, route_type FROM pipelines WHERE id != ?",
|
||||||
|
(parent_pipeline["id"],)
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for row in child_pipelines:
|
||||||
|
row = dict(row)
|
||||||
|
assert row["route_type"] == "dept_sub", (
|
||||||
|
f"Найден pipeline id={row['id']} с route_type='{row['route_type']}', "
|
||||||
|
"ожидался 'dept_sub'. "
|
||||||
|
"Это orphaned pipeline — признак двойного создания (KIN-ARCH-012/017). "
|
||||||
|
"Решение #354: старый тест не замечал дубликаты с другим route_type."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Уровень 2: route_type созданного pipeline = 'dept_sub'
|
||||||
|
# Convention #305: отдельный класс
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestChildPipelineRouteType:
|
||||||
|
"""Уровень 2: Созданный child pipeline имеет route_type='dept_sub'."""
|
||||||
|
|
||||||
|
@patch("agents.runner.check_claude_auth")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_child_pipeline_route_type_is_dept_sub(
|
||||||
|
self, mock_run, mock_auth, conn
|
||||||
|
):
|
||||||
|
"""Единственный child pipeline должен иметь route_type='dept_sub'.
|
||||||
|
|
||||||
|
runner.py:1267: effective_route_type = 'dept_sub' if parent_pipeline_id else route_type
|
||||||
|
Проверяем что это условие работает и route_type корректен.
|
||||||
|
"""
|
||||||
|
mock_auth.return_value = None
|
||||||
|
mock_run.return_value = _mock_claude_success()
|
||||||
|
|
||||||
|
parent_pipeline = models.create_pipeline(
|
||||||
|
conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
_execute_department_head_step(
|
||||||
|
conn, "PROJ-001", "proj",
|
||||||
|
parent_pipeline_id=parent_pipeline["id"],
|
||||||
|
step={"role": "backend_head", "brief": "plan backend"},
|
||||||
|
dept_head_result=_dept_head_result_with_workers(),
|
||||||
|
)
|
||||||
|
|
||||||
|
child = conn.execute(
|
||||||
|
"SELECT * FROM pipelines WHERE id != ?",
|
||||||
|
(parent_pipeline["id"],)
|
||||||
|
).fetchone()
|
||||||
|
assert child is not None, "Child pipeline не найден в БД"
|
||||||
|
child = dict(child)
|
||||||
|
|
||||||
|
assert child["route_type"] == "dept_sub", (
|
||||||
|
f"route_type={child['route_type']!r}, ожидался 'dept_sub'. "
|
||||||
|
"runner.py:1267: effective_route_type = 'dept_sub' if parent_pipeline_id else route_type"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Уровень 3: parent_pipeline_id и department корректны
|
||||||
|
# Convention #305: отдельный класс
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestChildPipelineParentAndDepartment:
|
||||||
|
"""Уровень 3: Единственный child pipeline имеет корректные parent_pipeline_id и department."""
|
||||||
|
|
||||||
|
@patch("agents.runner.check_claude_auth")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_child_pipeline_parent_id_and_department_are_not_none(
|
||||||
|
self, mock_run, mock_auth, conn
|
||||||
|
):
|
||||||
|
"""Оба поля parent_pipeline_id и department не None у child pipeline.
|
||||||
|
|
||||||
|
runner.py:1223-1224: run_pipeline() принимает parent_pipeline_id и department.
|
||||||
|
runner.py:1270-1271: models.create_pipeline() вызывается с этими полями.
|
||||||
|
"""
|
||||||
|
mock_auth.return_value = None
|
||||||
|
mock_run.return_value = _mock_claude_success()
|
||||||
|
|
||||||
|
parent_pipeline = models.create_pipeline(
|
||||||
|
conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
_execute_department_head_step(
|
||||||
|
conn, "PROJ-001", "proj",
|
||||||
|
parent_pipeline_id=parent_pipeline["id"],
|
||||||
|
step={"role": "backend_head", "brief": "plan backend"},
|
||||||
|
dept_head_result=_dept_head_result_with_workers(),
|
||||||
|
)
|
||||||
|
|
||||||
|
child = conn.execute(
|
||||||
|
"SELECT * FROM pipelines WHERE id != ?",
|
||||||
|
(parent_pipeline["id"],)
|
||||||
|
).fetchone()
|
||||||
|
assert child is not None, "Child pipeline не найден"
|
||||||
|
child = dict(child)
|
||||||
|
|
||||||
|
assert child["parent_pipeline_id"] is not None, \
|
||||||
|
"parent_pipeline_id не должен быть None"
|
||||||
|
assert child["parent_pipeline_id"] == parent_pipeline["id"], (
|
||||||
|
f"parent_pipeline_id={child['parent_pipeline_id']!r}, "
|
||||||
|
f"ожидался {parent_pipeline['id']!r}"
|
||||||
|
)
|
||||||
|
assert child["department"] is not None, \
|
||||||
|
"department не должен быть None"
|
||||||
|
assert child["department"] == "backend", (
|
||||||
|
f"department={child['department']!r}, ожидался 'backend' "
|
||||||
|
"(от role='backend_head' → dept_name = role.replace('_head', ''))"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Уровень 4: handoff.pipeline_id указывает на child (не orphaned) pipeline
|
||||||
|
# Convention #305: отдельный класс
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestHandoffPipelineIdPointsToChildNotOrphaned:
|
||||||
|
"""Уровень 4: handoff.pipeline_id = id единственного корректного child pipeline."""
|
||||||
|
|
||||||
|
@patch("agents.runner.check_claude_auth")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_handoff_pipeline_id_matches_only_child_pipeline_in_db(
|
||||||
|
self, mock_run, mock_auth, conn
|
||||||
|
):
|
||||||
|
"""handoff.pipeline_id должен совпадать с child pipeline в БД.
|
||||||
|
|
||||||
|
Усиленная проверка по сравнению с KIN-ARCH-012: явно сверяем что
|
||||||
|
handoff.pipeline_id не равен parent и не orphaned (не в 'running') записи.
|
||||||
|
|
||||||
|
runner.py:1142: pipeline_id = sub_result.get('pipeline_id') or parent_pipeline_id
|
||||||
|
"""
|
||||||
|
mock_auth.return_value = None
|
||||||
|
mock_run.return_value = _mock_claude_success()
|
||||||
|
|
||||||
|
parent_pipeline = models.create_pipeline(
|
||||||
|
conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
_execute_department_head_step(
|
||||||
|
conn, "PROJ-001", "proj",
|
||||||
|
parent_pipeline_id=parent_pipeline["id"],
|
||||||
|
step={"role": "backend_head", "brief": "plan backend"},
|
||||||
|
dept_head_result=_dept_head_result_with_workers(),
|
||||||
|
next_department="frontend",
|
||||||
|
)
|
||||||
|
|
||||||
|
child = conn.execute(
|
||||||
|
"SELECT * FROM pipelines WHERE id != ?",
|
||||||
|
(parent_pipeline["id"],)
|
||||||
|
).fetchone()
|
||||||
|
assert child is not None, "Child pipeline не найден"
|
||||||
|
child = dict(child)
|
||||||
|
|
||||||
|
handoffs = models.get_handoffs_for_task(conn, "PROJ-001")
|
||||||
|
assert len(handoffs) == 1, f"Ожидался 1 handoff, найдено: {len(handoffs)}"
|
||||||
|
|
||||||
|
handoff_pipeline_id = handoffs[0]["pipeline_id"]
|
||||||
|
|
||||||
|
# handoff должен ссылаться на child, а не parent
|
||||||
|
assert handoff_pipeline_id != parent_pipeline["id"], (
|
||||||
|
"handoff.pipeline_id совпадает с parent_pipeline_id — "
|
||||||
|
"handoff должен описывать sub-pipeline, не родительский"
|
||||||
|
)
|
||||||
|
# handoff должен ссылаться на реальный child в БД
|
||||||
|
assert handoff_pipeline_id == child["id"], (
|
||||||
|
f"handoff.pipeline_id={handoff_pipeline_id!r} не совпадает "
|
||||||
|
f"с child pipeline id={child['id']!r}. "
|
||||||
|
"До фикса KIN-ARCH-012/017 handoff мог ссылаться на orphaned pipeline."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Уровень 5: Нет running dept_sub pipeline после завершения (DB-cleanup)
|
||||||
|
# Convention #305: отдельный класс
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestNoRunningDeptSubPipelinesAfterCompletion:
|
||||||
|
"""Уровень 5: Нет pipeline с route_type='dept_sub' AND status='running' после вызова."""
|
||||||
|
|
||||||
|
@patch("agents.runner.check_claude_auth")
|
||||||
|
@patch("agents.runner.subprocess.run")
|
||||||
|
def test_no_dept_sub_pipeline_remains_in_running_status(
|
||||||
|
self, mock_run, mock_auth, conn
|
||||||
|
):
|
||||||
|
"""После завершения _execute_department_head_step нет 'running' dept_sub pipeline.
|
||||||
|
|
||||||
|
db.py:666-668: _migrate() очищает orphaned running dept_sub pipeline.
|
||||||
|
Тест проверяет финальное состояние — ни один dept_sub не должен висеть
|
||||||
|
в 'running' после вызова (иначе watchdog будет ложно блокировать задачи).
|
||||||
|
"""
|
||||||
|
mock_auth.return_value = None
|
||||||
|
mock_run.return_value = _mock_claude_success()
|
||||||
|
|
||||||
|
parent_pipeline = models.create_pipeline(
|
||||||
|
conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
_execute_department_head_step(
|
||||||
|
conn, "PROJ-001", "proj",
|
||||||
|
parent_pipeline_id=parent_pipeline["id"],
|
||||||
|
step={"role": "backend_head", "brief": "plan backend"},
|
||||||
|
dept_head_result=_dept_head_result_with_workers(),
|
||||||
|
)
|
||||||
|
|
||||||
|
running_dept_sub = conn.execute(
|
||||||
|
"SELECT id, status, route_type FROM pipelines "
|
||||||
|
"WHERE route_type='dept_sub' AND status='running'"
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
assert len(running_dept_sub) == 0, (
|
||||||
|
f"Найдено {len(running_dept_sub)} pipeline с route_type='dept_sub' AND status='running' "
|
||||||
|
"после завершения вызова. Это признак orphaned pipeline из старого бага "
|
||||||
|
"двойного создания (KIN-ARCH-012/017). "
|
||||||
|
"db.py:666-668: _migrate() должна очищать такие записи при инициализации."
|
||||||
|
)
|
||||||
|
|
@ -1,13 +1,15 @@
|
||||||
"""Tests for pipeline watchdog (KIN-099)."""
|
"""Tests for pipeline watchdog (KIN-099, KIN-OBS-015)."""
|
||||||
|
|
||||||
|
import errno
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import patch, MagicMock
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
from core.db import init_db
|
from core.db import init_db, get_connection
|
||||||
from core import models
|
from core import models
|
||||||
from core.watchdog import _check_dead_pipelines
|
from core.watchdog import _check_dead_pipelines, start_watchdog
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -231,3 +233,127 @@ def test_run_pipeline_aborts_when_parent_dies(conn):
|
||||||
"SELECT status FROM pipelines WHERE task_id='VDOL-001' ORDER BY id DESC LIMIT 1"
|
"SELECT status FROM pipelines WHERE task_id='VDOL-001' ORDER BY id DESC LIMIT 1"
|
||||||
).fetchone()
|
).fetchone()
|
||||||
assert row["status"] == "failed"
|
assert row["status"] == "failed"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# KIN-OBS-015: start_watchdog() double-start guard
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_start_watchdog_sets_started_flag(tmp_path):
|
||||||
|
"""start_watchdog() устанавливает _watchdog_started = True."""
|
||||||
|
import core.watchdog as wd
|
||||||
|
wd._watchdog_started = False
|
||||||
|
|
||||||
|
db_path = tmp_path / "kin_flag.db"
|
||||||
|
init_db(db_path=str(db_path))
|
||||||
|
|
||||||
|
with patch("core.watchdog.threading.Thread") as mock_thread_cls:
|
||||||
|
mock_thread = MagicMock()
|
||||||
|
mock_thread_cls.return_value = mock_thread
|
||||||
|
|
||||||
|
start_watchdog(db_path, interval=9999)
|
||||||
|
|
||||||
|
assert wd._watchdog_started is True
|
||||||
|
|
||||||
|
wd._watchdog_started = False # restore
|
||||||
|
|
||||||
|
|
||||||
|
def test_start_watchdog_double_call_no_second_thread(tmp_path):
|
||||||
|
"""Повторный вызов start_watchdog() не создаёт второй поток."""
|
||||||
|
import core.watchdog as wd
|
||||||
|
wd._watchdog_started = False
|
||||||
|
|
||||||
|
db_path = tmp_path / "kin_double.db"
|
||||||
|
init_db(db_path=str(db_path))
|
||||||
|
|
||||||
|
with patch("core.watchdog.threading.Thread") as mock_thread_cls:
|
||||||
|
mock_thread = MagicMock()
|
||||||
|
mock_thread_cls.return_value = mock_thread
|
||||||
|
|
||||||
|
start_watchdog(db_path, interval=9999) # first call
|
||||||
|
start_watchdog(db_path, interval=9999) # second call — guard должен заблокировать
|
||||||
|
|
||||||
|
# Thread создан и запущен ровно один раз
|
||||||
|
assert mock_thread_cls.call_count == 1
|
||||||
|
assert mock_thread.start.call_count == 1
|
||||||
|
|
||||||
|
wd._watchdog_started = False # restore
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# KIN-OBS-015: _check_dead_pipelines использует get_connection()
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_check_dead_pipelines_calls_get_connection(tmp_path):
|
||||||
|
"""_check_dead_pipelines вызывает get_connection() — WAL и foreign_keys PRAGMA применяются."""
|
||||||
|
db_path = tmp_path / "kin_gc.db"
|
||||||
|
real_conn = init_db(db_path=str(db_path))
|
||||||
|
real_conn.close()
|
||||||
|
|
||||||
|
with patch("core.watchdog.get_connection", wraps=get_connection) as mock_gc:
|
||||||
|
_check_dead_pipelines(db_path)
|
||||||
|
mock_gc.assert_called_once_with(db_path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_connection_applies_wal_and_foreign_keys(tmp_path):
|
||||||
|
"""get_connection() применяет PRAGMA journal_mode=WAL и foreign_keys=ON."""
|
||||||
|
db_path = tmp_path / "kin_pragma.db"
|
||||||
|
conn = get_connection(db_path)
|
||||||
|
|
||||||
|
jm = conn.execute("PRAGMA journal_mode").fetchone()[0]
|
||||||
|
fk = conn.execute("PRAGMA foreign_keys").fetchone()[0]
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
assert jm == "wal"
|
||||||
|
assert fk == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# KIN-OBS-015: OSError+errno.ESRCH и другие errno — mock-based
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_check_dead_pipelines_esrch_via_mock(tmp_path):
|
||||||
|
"""OSError с errno=ESRCH перехватывается как мёртвый процесс и блокирует задачу."""
|
||||||
|
fake_pid = 54321
|
||||||
|
db_path, task_id, pipeline_id = _db_with_running_pipeline(tmp_path, fake_pid)
|
||||||
|
|
||||||
|
def _fake_kill(pid, sig):
|
||||||
|
if pid == fake_pid and sig == 0:
|
||||||
|
raise OSError(errno.ESRCH, "No such process")
|
||||||
|
|
||||||
|
with patch("core.watchdog.os.kill", side_effect=_fake_kill):
|
||||||
|
_check_dead_pipelines(db_path)
|
||||||
|
|
||||||
|
conn = init_db(db_path=str(db_path))
|
||||||
|
task = models.get_task(conn, task_id)
|
||||||
|
pipeline_row = conn.execute(
|
||||||
|
"SELECT status FROM pipelines WHERE id=?", (pipeline_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
assert task["status"] == "blocked"
|
||||||
|
assert str(fake_pid) in (task.get("blocked_reason") or "")
|
||||||
|
assert pipeline_row["status"] == "failed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_check_dead_pipelines_permission_error_ignored(tmp_path):
|
||||||
|
"""OSError с errno=EACCES (процесс жив, нет прав) — статус задачи не меняется."""
|
||||||
|
fake_pid = 54322
|
||||||
|
db_path, task_id, pipeline_id = _db_with_running_pipeline(tmp_path, fake_pid)
|
||||||
|
|
||||||
|
def _fake_kill(pid, sig):
|
||||||
|
if pid == fake_pid and sig == 0:
|
||||||
|
raise OSError(errno.EACCES, "Operation not permitted")
|
||||||
|
|
||||||
|
with patch("core.watchdog.os.kill", side_effect=_fake_kill):
|
||||||
|
_check_dead_pipelines(db_path)
|
||||||
|
|
||||||
|
conn = init_db(db_path=str(db_path))
|
||||||
|
task = models.get_task(conn, task_id)
|
||||||
|
pipeline_row = conn.execute(
|
||||||
|
"SELECT status FROM pipelines WHERE id=?", (pipeline_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
assert task["status"] != "blocked"
|
||||||
|
assert pipeline_row["status"] == "running"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import { ref, computed, onMounted, onUnmounted } from 'vue'
|
||||||
import { api, type EscalationNotification } from '../api'
|
import { api, type EscalationNotification } from '../api'
|
||||||
|
|
||||||
const STORAGE_KEY = 'kin_dismissed_escalations'
|
const STORAGE_KEY = 'kin_dismissed_escalations'
|
||||||
|
const WATCHDOG_TOAST_KEY = 'kin_dismissed_watchdog_toasts'
|
||||||
|
|
||||||
const notifications = ref<EscalationNotification[]>([])
|
const notifications = ref<EscalationNotification[]>([])
|
||||||
const showPanel = ref(false)
|
const showPanel = ref(false)
|
||||||
|
|
@ -27,9 +28,62 @@ const visible = computed(() =>
|
||||||
notifications.value.filter(n => !dismissed.value.has(n.task_id))
|
notifications.value.filter(n => !dismissed.value.has(n.task_id))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Watchdog toasts
|
||||||
|
interface WatchdogToast {
|
||||||
|
task_id: string
|
||||||
|
reason: string
|
||||||
|
timerId: ReturnType<typeof setTimeout> | null
|
||||||
|
}
|
||||||
|
|
||||||
|
const watchdogToasts = ref<WatchdogToast[]>([])
|
||||||
|
|
||||||
|
function loadDismissedWatchdog(): Set<string> {
|
||||||
|
try {
|
||||||
|
const raw = localStorage.getItem(WATCHDOG_TOAST_KEY)
|
||||||
|
return new Set(raw ? JSON.parse(raw) : [])
|
||||||
|
} catch {
|
||||||
|
return new Set()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveDismissedWatchdog(ids: Set<string>) {
|
||||||
|
localStorage.setItem(WATCHDOG_TOAST_KEY, JSON.stringify([...ids]))
|
||||||
|
}
|
||||||
|
|
||||||
|
const dismissedWatchdog = ref<Set<string>>(loadDismissedWatchdog())
|
||||||
|
|
||||||
|
function isWatchdogReason(reason: string): boolean {
|
||||||
|
return reason.includes('Process died') || reason.includes('Parent process died')
|
||||||
|
}
|
||||||
|
|
||||||
|
function dismissWatchdogToast(taskId: string) {
|
||||||
|
const idx = watchdogToasts.value.findIndex(t => t.task_id === taskId)
|
||||||
|
if (idx >= 0) {
|
||||||
|
const toast = watchdogToasts.value[idx]
|
||||||
|
if (toast.timerId) clearTimeout(toast.timerId)
|
||||||
|
watchdogToasts.value.splice(idx, 1)
|
||||||
|
}
|
||||||
|
const newSet = new Set([...dismissedWatchdog.value, taskId])
|
||||||
|
dismissedWatchdog.value = newSet
|
||||||
|
saveDismissedWatchdog(newSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
function checkWatchdogToasts(fresh: EscalationNotification[]) {
|
||||||
|
const activeIds = new Set(watchdogToasts.value.map(t => t.task_id))
|
||||||
|
for (const n of fresh) {
|
||||||
|
if (isWatchdogReason(n.reason) && !dismissedWatchdog.value.has(n.task_id) && !activeIds.has(n.task_id)) {
|
||||||
|
const toast: WatchdogToast = { task_id: n.task_id, reason: n.reason, timerId: null }
|
||||||
|
toast.timerId = setTimeout(() => dismissWatchdogToast(n.task_id), 8000)
|
||||||
|
watchdogToasts.value.push(toast)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function load() {
|
async function load() {
|
||||||
try {
|
try {
|
||||||
notifications.value = await api.notifications()
|
const fresh = await api.notifications()
|
||||||
|
checkWatchdogToasts(fresh)
|
||||||
|
notifications.value = fresh
|
||||||
} catch {
|
} catch {
|
||||||
// silent — не ломаем layout при недоступном endpoint
|
// silent — не ломаем layout при недоступном endpoint
|
||||||
}
|
}
|
||||||
|
|
@ -67,6 +121,24 @@ onUnmounted(() => {
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<template>
|
<template>
|
||||||
|
<!-- Watchdog toast notifications -->
|
||||||
|
<div class="fixed top-4 right-4 z-50 flex flex-col gap-2 pointer-events-none">
|
||||||
|
<div
|
||||||
|
v-for="toast in watchdogToasts"
|
||||||
|
:key="toast.task_id"
|
||||||
|
class="pointer-events-auto flex items-start gap-2 px-3 py-2.5 border border-red-700 bg-red-950/40 text-red-400 rounded-lg shadow-xl max-w-sm"
|
||||||
|
>
|
||||||
|
<span class="shrink-0 text-sm">⚠</span>
|
||||||
|
<div class="flex-1 min-w-0">
|
||||||
|
<p class="text-xs leading-snug">Watchdog: задача <span class="font-mono font-semibold">{{ toast.task_id }}</span> заблокирована — {{ toast.reason }}</p>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
@click="dismissWatchdogToast(toast.task_id)"
|
||||||
|
class="shrink-0 text-red-400/60 hover:text-red-300 text-lg leading-none mt-[-2px] bg-transparent border-none cursor-pointer"
|
||||||
|
>×</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="relative">
|
<div class="relative">
|
||||||
<!-- Badge-кнопка — видна только при наличии активных эскалаций -->
|
<!-- Badge-кнопка — видна только при наличии активных эскалаций -->
|
||||||
<button
|
<button
|
||||||
|
|
|
||||||
|
|
@ -849,7 +849,8 @@ async function addDecision() {
|
||||||
<div v-else class="space-y-1">
|
<div v-else class="space-y-1">
|
||||||
<router-link v-for="t in filteredTasks" :key="t.id"
|
<router-link v-for="t in filteredTasks" :key="t.id"
|
||||||
:to="{ path: `/task/${t.id}`, query: selectedStatuses.length ? { back_status: selectedStatuses.join(',') } : undefined }"
|
:to="{ path: `/task/${t.id}`, query: selectedStatuses.length ? { back_status: selectedStatuses.join(',') } : undefined }"
|
||||||
class="flex items-center justify-between px-3 py-2 border border-gray-800 rounded text-sm hover:border-gray-600 no-underline block transition-colors">
|
class="flex flex-col gap-0.5 px-3 py-2 border border-gray-800 rounded text-sm hover:border-gray-600 no-underline block transition-colors">
|
||||||
|
<div class="flex items-center justify-between gap-2">
|
||||||
<div class="flex items-center gap-2 min-w-0">
|
<div class="flex items-center gap-2 min-w-0">
|
||||||
<span class="text-gray-500 shrink-0 w-24">{{ t.id }}</span>
|
<span class="text-gray-500 shrink-0 w-24">{{ t.id }}</span>
|
||||||
<Badge :text="t.status" :color="taskStatusColor(t.status)" />
|
<Badge :text="t.status" :color="taskStatusColor(t.status)" />
|
||||||
|
|
@ -899,6 +900,8 @@ async function addDecision() {
|
||||||
↩ Revise
|
↩ Revise
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
<div v-if="t.status === 'blocked' && t.blocked_reason" class="text-xs text-red-400 truncate">{{ t.blocked_reason }}</div>
|
||||||
</router-link>
|
</router-link>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -1130,7 +1133,8 @@ async function addDecision() {
|
||||||
class="block px-2.5 py-2 bg-gray-900 border border-gray-800 rounded text-xs hover:border-gray-600 no-underline cursor-grab active:cursor-grabbing transition-colors select-none"
|
class="block px-2.5 py-2 bg-gray-900 border border-gray-800 rounded text-xs hover:border-gray-600 no-underline cursor-grab active:cursor-grabbing transition-colors select-none"
|
||||||
:class="draggingTaskId === t.id ? 'opacity-40' : ''">
|
:class="draggingTaskId === t.id ? 'opacity-40' : ''">
|
||||||
<div class="text-gray-500 mb-1 text-[10px]">{{ t.id }}</div>
|
<div class="text-gray-500 mb-1 text-[10px]">{{ t.id }}</div>
|
||||||
<div class="text-gray-300 leading-snug mb-1.5">{{ t.title }}</div>
|
<div class="text-gray-300 leading-snug mb-1">{{ t.title }}</div>
|
||||||
|
<div v-if="t.status === 'blocked' && t.blocked_reason" class="text-xs text-red-400 truncate mb-1">{{ t.blocked_reason }}</div>
|
||||||
<div class="flex items-center gap-1 flex-wrap">
|
<div class="flex items-center gap-1 flex-wrap">
|
||||||
<Badge v-if="t.category" :text="t.category" :color="CATEGORY_COLORS[t.category] || 'gray'" />
|
<Badge v-if="t.category" :text="t.category" :color="CATEGORY_COLORS[t.category] || 'gray'" />
|
||||||
<span class="text-gray-600 text-[10px] ml-auto">p{{ t.priority }}</span>
|
<span class="text-gray-600 text-[10px] ml-auto">p{{ t.priority }}</span>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue