kin: auto-commit after pipeline

2026-03-17 16:14:35 +02:00 · 2026-03-17 16:14:35 +02:00 · 7ee520e18e
commit 7ee520e18e
parent a9d3086139
5 changed files with 597 additions and 26 deletions
--- a/core/watchdog.py
+++ b/core/watchdog.py
@ -6,30 +6,30 @@ If the PID is dead (ProcessLookupError / ESRCH), mark pipeline as failed
 and task as blocked with a descriptive reason.
 """

+import errno
 import logging
 import os
-import sqlite3
 import threading
 import time
 from pathlib import Path

 from core import models
-from core.db import init_db
+from core.db import get_connection, init_db

 _logger = logging.getLogger("kin.watchdog")

+_watchdog_started = False
+

 def _check_dead_pipelines(db_path: Path) -> None:
    """Single watchdog pass: open a fresh connection, scan running pipelines."""
+    conn = get_connection(db_path)
    try:
-        conn = sqlite3.connect(str(db_path), check_same_thread=False)
-        conn.row_factory = sqlite3.Row
        try:
            running = models.get_running_pipelines_with_pid(conn)
        except Exception as exc:
            # Table may not exist yet on very first startup
            _logger.debug("Watchdog: could not query pipelines (%s)", exc)
-            conn.close()
            return

        for row in running:
@ -38,7 +38,8 @@ def _check_dead_pipelines(db_path: Path) -> None:
            task_id = row["task_id"]
            try:
                os.kill(pid, 0)  # signal 0 = existence check
-            except ProcessLookupError:
+            except OSError as e:
+                if e.errno == errno.ESRCH:
                    reason = f"Process died unexpectedly (PID {pid})"
                    _logger.warning(
                        "Watchdog: pipeline %s PID %s is dead — marking blocked (%s)",
@ -49,12 +50,11 @@ def _check_dead_pipelines(db_path: Path) -> None:
                        models.update_task(conn, task_id, status="blocked", blocked_reason=reason)
                    except Exception as upd_exc:
                        _logger.error("Watchdog: failed to update pipeline/task: %s", upd_exc)
-            except PermissionError:
-                # Process exists but we can't signal it (e.g. different user) — skip
-                pass
-        conn.close()
+                # else: PermissionError (EACCES) — process exists but we can't signal it, skip
    except Exception as exc:
        _logger.error("Watchdog pass failed: %s", exc)
+    finally:
+        conn.close()


 def _watchdog_loop(db_path: Path, interval: int) -> None:
@ -67,6 +67,10 @@ def _watchdog_loop(db_path: Path, interval: int) -> None:

 def start_watchdog(db_path: Path, interval: int = 30) -> None:
    """Start the background watchdog thread (daemon=True, so it dies with the process)."""
+    global _watchdog_started
+    if _watchdog_started:
+        return
+    _watchdog_started = True
    t = threading.Thread(
        target=_watchdog_loop,
        args=(db_path, interval),
--- a/tests/test_kin_arch_017_regression.py
+++ b/tests/test_kin_arch_017_regression.py
@ -0,0 +1,365 @@
+"""
+Regression tests for KIN-ARCH-017:
+Более строгая проверка отсутствия двойного создания pipeline в _execute_department_head_step.
+
+Решение #354: тест 'WHERE route_type=X AND count=1' НЕ обнаруживает дубликаты
+с ДРУГИМ route_type. Если orphaned pipeline создаётся с route_type='dept_feature'
+(а не 'dept_sub'), старый тест пропускает дубликат и ложно-зелёный.
+
+Исправление (KIN-ARCH-017): проверять ОБЩИЙ count без фильтра по route_type,
+сравнивая количество pipeline ДО и ПОСЛЕ вызова _execute_department_head_step.
+Ровно один новый pipeline должен создаваться при любом route_type дубликата.
+
+Convention #304: имена тестов описывают сломанное поведение.
+Convention #305: отдельный класс на каждый уровень цепочки.
+"""
+
+import json
+import pytest
+from unittest.mock import patch, MagicMock
+
+from core.db import init_db
+from core import models
+from agents.runner import _execute_department_head_step
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def conn():
+    c = init_db(":memory:")
+    models.create_project(c, "proj", "TestProject", "~/projects/test",
+                          tech_stack=["python", "vue3"])
+    models.create_task(c, "PROJ-001", "proj", "Full-stack feature",
+                       brief={"route_type": "dept_feature"})
+    yield c
+    c.close()
+
+
+def _mock_claude_success(output_data=None):
+    mock = MagicMock()
+    mock.stdout = json.dumps(output_data or {"result": "ok"})
+    mock.stderr = ""
+    mock.returncode = 0
+    return mock
+
+
+def _dept_head_result_with_workers(workers=None):
+    """Валидный вывод dept head с sub_pipeline."""
+    return {
+        "raw_output": json.dumps({
+            "status": "done",
+            "sub_pipeline": workers or [
+                {"role": "backend_dev", "brief": "implement endpoint"},
+            ],
+            "artifacts": {"files_changed": ["api.py"]},
+            "handoff_notes": "Backend API done",
+        })
+    }
+
+
+def _count_all_pipelines(conn) -> int:
+    """Возвращает общее количество pipelines в БД без фильтра по route_type."""
+    row = conn.execute("SELECT COUNT(*) FROM pipelines").fetchone()
+    return row[0]
+
+
+# ---------------------------------------------------------------------------
+# Уровень 1: Ровно ОДИН pipeline создаётся — проверка ОБЩЕГО count (решение #354)
+#
+# Слабость старого теста KIN-ARCH-012: WHERE route_type='dept_sub' AND count=1
+# не ловит orphaned pipeline с другим route_type (например, 'dept_feature').
+# ---------------------------------------------------------------------------
+
+class TestTotalPipelineCountAfterDeptHeadStep:
+    """Уровень 1: Общий count всех pipeline до/после — ровно +1 запись в БД."""
+
+    @patch("agents.runner.check_claude_auth")
+    @patch("agents.runner.subprocess.run")
+    def test_exactly_one_new_pipeline_created_total_without_route_type_filter(
+        self, mock_run, mock_auth, conn
+    ):
+        """Решение #354: проверяем ОБЩИЙ count без фильтра по route_type.
+
+        Сломанное поведение (до KIN-ARCH-012/017): явный create_pipeline() в
+        _execute_department_head_step создавал orphaned pipeline (с любым route_type),
+        затем run_pipeline() создавал второй. Итого +2 pipeline за один вызов.
+
+        Тест 'WHERE route_type=dept_sub AND count=1' пропускал дубликат, если orphaned
+        pipeline имел другой route_type (например, dept_feature).
+
+        Fixed: сравниваем total_count_after - total_count_before == 1, без фильтра.
+        """
+        mock_auth.return_value = None
+        mock_run.return_value = _mock_claude_success()
+
+        parent_pipeline = models.create_pipeline(
+            conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
+        )
+
+        count_before = _count_all_pipelines(conn)
+        assert count_before == 1, "До вызова должен быть только parent pipeline"
+
+        _execute_department_head_step(
+            conn, "PROJ-001", "proj",
+            parent_pipeline_id=parent_pipeline["id"],
+            step={"role": "backend_head", "brief": "plan backend"},
+            dept_head_result=_dept_head_result_with_workers(),
+        )
+
+        count_after = _count_all_pipelines(conn)
+        new_pipelines_count = count_after - count_before
+
+        assert new_pipelines_count == 1, (
+            f"Ожидалось создание ровно 1 нового pipeline, создано: {new_pipelines_count}. "
+            "Признак двойного создания (KIN-ARCH-012/017): если новых pipeline > 1, "
+            "значит _execute_department_head_step снова вызывает явный create_pipeline() "
+            "до run_pipeline(). Этот тест ловит дубликаты с ЛЮБЫМ route_type — "
+            "в отличие от 'WHERE route_type=dept_sub' из решения #354."
+        )
+
+    @patch("agents.runner.check_claude_auth")
+    @patch("agents.runner.subprocess.run")
+    def test_no_orphaned_pipeline_with_different_route_type_created(
+        self, mock_run, mock_auth, conn
+    ):
+        """Нет orphaned pipeline с route_type != 'dept_sub' после вызова.
+
+        Решение #354: если старый баг возвращается, orphaned pipeline может
+        создаваться с dept_feature или custom route_type. Явно проверяем:
+        все новые pipeline (кроме parent) должны иметь route_type='dept_sub'.
+        """
+        mock_auth.return_value = None
+        mock_run.return_value = _mock_claude_success()
+
+        parent_pipeline = models.create_pipeline(
+            conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
+        )
+
+        _execute_department_head_step(
+            conn, "PROJ-001", "proj",
+            parent_pipeline_id=parent_pipeline["id"],
+            step={"role": "backend_head", "brief": "plan backend"},
+            dept_head_result=_dept_head_result_with_workers(),
+        )
+
+        # Все pipeline кроме parent должны иметь route_type='dept_sub'
+        child_pipelines = conn.execute(
+            "SELECT id, route_type FROM pipelines WHERE id != ?",
+            (parent_pipeline["id"],)
+        ).fetchall()
+
+        for row in child_pipelines:
+            row = dict(row)
+            assert row["route_type"] == "dept_sub", (
+                f"Найден pipeline id={row['id']} с route_type='{row['route_type']}', "
+                "ожидался 'dept_sub'. "
+                "Это orphaned pipeline — признак двойного создания (KIN-ARCH-012/017). "
+                "Решение #354: старый тест не замечал дубликаты с другим route_type."
+            )
+
+
+# ---------------------------------------------------------------------------
+# Уровень 2: route_type созданного pipeline = 'dept_sub'
+# Convention #305: отдельный класс
+# ---------------------------------------------------------------------------
+
+class TestChildPipelineRouteType:
+    """Уровень 2: Созданный child pipeline имеет route_type='dept_sub'."""
+
+    @patch("agents.runner.check_claude_auth")
+    @patch("agents.runner.subprocess.run")
+    def test_child_pipeline_route_type_is_dept_sub(
+        self, mock_run, mock_auth, conn
+    ):
+        """Единственный child pipeline должен иметь route_type='dept_sub'.
+
+        runner.py:1267: effective_route_type = 'dept_sub' if parent_pipeline_id else route_type
+        Проверяем что это условие работает и route_type корректен.
+        """
+        mock_auth.return_value = None
+        mock_run.return_value = _mock_claude_success()
+
+        parent_pipeline = models.create_pipeline(
+            conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
+        )
+
+        _execute_department_head_step(
+            conn, "PROJ-001", "proj",
+            parent_pipeline_id=parent_pipeline["id"],
+            step={"role": "backend_head", "brief": "plan backend"},
+            dept_head_result=_dept_head_result_with_workers(),
+        )
+
+        child = conn.execute(
+            "SELECT * FROM pipelines WHERE id != ?",
+            (parent_pipeline["id"],)
+        ).fetchone()
+        assert child is not None, "Child pipeline не найден в БД"
+        child = dict(child)
+
+        assert child["route_type"] == "dept_sub", (
+            f"route_type={child['route_type']!r}, ожидался 'dept_sub'. "
+            "runner.py:1267: effective_route_type = 'dept_sub' if parent_pipeline_id else route_type"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Уровень 3: parent_pipeline_id и department корректны
+# Convention #305: отдельный класс
+# ---------------------------------------------------------------------------
+
+class TestChildPipelineParentAndDepartment:
+    """Уровень 3: Единственный child pipeline имеет корректные parent_pipeline_id и department."""
+
+    @patch("agents.runner.check_claude_auth")
+    @patch("agents.runner.subprocess.run")
+    def test_child_pipeline_parent_id_and_department_are_not_none(
+        self, mock_run, mock_auth, conn
+    ):
+        """Оба поля parent_pipeline_id и department не None у child pipeline.
+
+        runner.py:1223-1224: run_pipeline() принимает parent_pipeline_id и department.
+        runner.py:1270-1271: models.create_pipeline() вызывается с этими полями.
+        """
+        mock_auth.return_value = None
+        mock_run.return_value = _mock_claude_success()
+
+        parent_pipeline = models.create_pipeline(
+            conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
+        )
+
+        _execute_department_head_step(
+            conn, "PROJ-001", "proj",
+            parent_pipeline_id=parent_pipeline["id"],
+            step={"role": "backend_head", "brief": "plan backend"},
+            dept_head_result=_dept_head_result_with_workers(),
+        )
+
+        child = conn.execute(
+            "SELECT * FROM pipelines WHERE id != ?",
+            (parent_pipeline["id"],)
+        ).fetchone()
+        assert child is not None, "Child pipeline не найден"
+        child = dict(child)
+
+        assert child["parent_pipeline_id"] is not None, \
+            "parent_pipeline_id не должен быть None"
+        assert child["parent_pipeline_id"] == parent_pipeline["id"], (
+            f"parent_pipeline_id={child['parent_pipeline_id']!r}, "
+            f"ожидался {parent_pipeline['id']!r}"
+        )
+        assert child["department"] is not None, \
+            "department не должен быть None"
+        assert child["department"] == "backend", (
+            f"department={child['department']!r}, ожидался 'backend' "
+            "(от role='backend_head' → dept_name = role.replace('_head', ''))"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Уровень 4: handoff.pipeline_id указывает на child (не orphaned) pipeline
+# Convention #305: отдельный класс
+# ---------------------------------------------------------------------------
+
+class TestHandoffPipelineIdPointsToChildNotOrphaned:
+    """Уровень 4: handoff.pipeline_id = id единственного корректного child pipeline."""
+
+    @patch("agents.runner.check_claude_auth")
+    @patch("agents.runner.subprocess.run")
+    def test_handoff_pipeline_id_matches_only_child_pipeline_in_db(
+        self, mock_run, mock_auth, conn
+    ):
+        """handoff.pipeline_id должен совпадать с child pipeline в БД.
+
+        Усиленная проверка по сравнению с KIN-ARCH-012: явно сверяем что
+        handoff.pipeline_id не равен parent и не orphaned (не в 'running') записи.
+
+        runner.py:1142: pipeline_id = sub_result.get('pipeline_id') or parent_pipeline_id
+        """
+        mock_auth.return_value = None
+        mock_run.return_value = _mock_claude_success()
+
+        parent_pipeline = models.create_pipeline(
+            conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
+        )
+
+        _execute_department_head_step(
+            conn, "PROJ-001", "proj",
+            parent_pipeline_id=parent_pipeline["id"],
+            step={"role": "backend_head", "brief": "plan backend"},
+            dept_head_result=_dept_head_result_with_workers(),
+            next_department="frontend",
+        )
+
+        child = conn.execute(
+            "SELECT * FROM pipelines WHERE id != ?",
+            (parent_pipeline["id"],)
+        ).fetchone()
+        assert child is not None, "Child pipeline не найден"
+        child = dict(child)
+
+        handoffs = models.get_handoffs_for_task(conn, "PROJ-001")
+        assert len(handoffs) == 1, f"Ожидался 1 handoff, найдено: {len(handoffs)}"
+
+        handoff_pipeline_id = handoffs[0]["pipeline_id"]
+
+        # handoff должен ссылаться на child, а не parent
+        assert handoff_pipeline_id != parent_pipeline["id"], (
+            "handoff.pipeline_id совпадает с parent_pipeline_id — "
+            "handoff должен описывать sub-pipeline, не родительский"
+        )
+        # handoff должен ссылаться на реальный child в БД
+        assert handoff_pipeline_id == child["id"], (
+            f"handoff.pipeline_id={handoff_pipeline_id!r} не совпадает "
+            f"с child pipeline id={child['id']!r}. "
+            "До фикса KIN-ARCH-012/017 handoff мог ссылаться на orphaned pipeline."
+        )
+
+
+# ---------------------------------------------------------------------------
+# Уровень 5: Нет running dept_sub pipeline после завершения (DB-cleanup)
+# Convention #305: отдельный класс
+# ---------------------------------------------------------------------------
+
+class TestNoRunningDeptSubPipelinesAfterCompletion:
+    """Уровень 5: Нет pipeline с route_type='dept_sub' AND status='running' после вызова."""
+
+    @patch("agents.runner.check_claude_auth")
+    @patch("agents.runner.subprocess.run")
+    def test_no_dept_sub_pipeline_remains_in_running_status(
+        self, mock_run, mock_auth, conn
+    ):
+        """После завершения _execute_department_head_step нет 'running' dept_sub pipeline.
+
+        db.py:666-668: _migrate() очищает orphaned running dept_sub pipeline.
+        Тест проверяет финальное состояние — ни один dept_sub не должен висеть
+        в 'running' после вызова (иначе watchdog будет ложно блокировать задачи).
+        """
+        mock_auth.return_value = None
+        mock_run.return_value = _mock_claude_success()
+
+        parent_pipeline = models.create_pipeline(
+            conn, "PROJ-001", "proj", "dept_feature", [{"role": "backend_head"}]
+        )
+
+        _execute_department_head_step(
+            conn, "PROJ-001", "proj",
+            parent_pipeline_id=parent_pipeline["id"],
+            step={"role": "backend_head", "brief": "plan backend"},
+            dept_head_result=_dept_head_result_with_workers(),
+        )
+
+        running_dept_sub = conn.execute(
+            "SELECT id, status, route_type FROM pipelines "
+            "WHERE route_type='dept_sub' AND status='running'"
+        ).fetchall()
+
+        assert len(running_dept_sub) == 0, (
+            f"Найдено {len(running_dept_sub)} pipeline с route_type='dept_sub' AND status='running' "
+            "после завершения вызова. Это признак orphaned pipeline из старого бага "
+            "двойного создания (KIN-ARCH-012/017). "
+            "db.py:666-668: _migrate() должна очищать такие записи при инициализации."
+        )
--- a/tests/test_watchdog.py
+++ b/tests/test_watchdog.py
@ -1,13 +1,15 @@
-"""Tests for pipeline watchdog (KIN-099)."""
+"""Tests for pipeline watchdog (KIN-099, KIN-OBS-015)."""

+import errno
 import json
 import os
+import threading
 import pytest
 from unittest.mock import patch, MagicMock

-from core.db import init_db
+from core.db import init_db, get_connection
 from core import models
-from core.watchdog import _check_dead_pipelines
+from core.watchdog import _check_dead_pipelines, start_watchdog


@pytest.fixture
@ -231,3 +233,127 @@ def test_run_pipeline_aborts_when_parent_dies(conn):
        "SELECT status FROM pipelines WHERE task_id='VDOL-001' ORDER BY id DESC LIMIT 1"
    ).fetchone()
    assert row["status"] == "failed"
+
+
+# ---------------------------------------------------------------------------
+# KIN-OBS-015: start_watchdog() double-start guard
+# ---------------------------------------------------------------------------
+
+def test_start_watchdog_sets_started_flag(tmp_path):
+    """start_watchdog() устанавливает _watchdog_started = True."""
+    import core.watchdog as wd
+    wd._watchdog_started = False
+
+    db_path = tmp_path / "kin_flag.db"
+    init_db(db_path=str(db_path))
+
+    with patch("core.watchdog.threading.Thread") as mock_thread_cls:
+        mock_thread = MagicMock()
+        mock_thread_cls.return_value = mock_thread
+
+        start_watchdog(db_path, interval=9999)
+
+        assert wd._watchdog_started is True
+
+    wd._watchdog_started = False  # restore
+
+
+def test_start_watchdog_double_call_no_second_thread(tmp_path):
+    """Повторный вызов start_watchdog() не создаёт второй поток."""
+    import core.watchdog as wd
+    wd._watchdog_started = False
+
+    db_path = tmp_path / "kin_double.db"
+    init_db(db_path=str(db_path))
+
+    with patch("core.watchdog.threading.Thread") as mock_thread_cls:
+        mock_thread = MagicMock()
+        mock_thread_cls.return_value = mock_thread
+
+        start_watchdog(db_path, interval=9999)  # first call
+        start_watchdog(db_path, interval=9999)  # second call — guard должен заблокировать
+
+        # Thread создан и запущен ровно один раз
+        assert mock_thread_cls.call_count == 1
+        assert mock_thread.start.call_count == 1
+
+    wd._watchdog_started = False  # restore
+
+
+# ---------------------------------------------------------------------------
+# KIN-OBS-015: _check_dead_pipelines использует get_connection()
+# ---------------------------------------------------------------------------
+
+def test_check_dead_pipelines_calls_get_connection(tmp_path):
+    """_check_dead_pipelines вызывает get_connection() — WAL и foreign_keys PRAGMA применяются."""
+    db_path = tmp_path / "kin_gc.db"
+    real_conn = init_db(db_path=str(db_path))
+    real_conn.close()
+
+    with patch("core.watchdog.get_connection", wraps=get_connection) as mock_gc:
+        _check_dead_pipelines(db_path)
+        mock_gc.assert_called_once_with(db_path)
+
+
+def test_get_connection_applies_wal_and_foreign_keys(tmp_path):
+    """get_connection() применяет PRAGMA journal_mode=WAL и foreign_keys=ON."""
+    db_path = tmp_path / "kin_pragma.db"
+    conn = get_connection(db_path)
+
+    jm = conn.execute("PRAGMA journal_mode").fetchone()[0]
+    fk = conn.execute("PRAGMA foreign_keys").fetchone()[0]
+    conn.close()
+
+    assert jm == "wal"
+    assert fk == 1
+
+
+# ---------------------------------------------------------------------------
+# KIN-OBS-015: OSError+errno.ESRCH и другие errno — mock-based
+# ---------------------------------------------------------------------------
+
+def test_check_dead_pipelines_esrch_via_mock(tmp_path):
+    """OSError с errno=ESRCH перехватывается как мёртвый процесс и блокирует задачу."""
+    fake_pid = 54321
+    db_path, task_id, pipeline_id = _db_with_running_pipeline(tmp_path, fake_pid)
+
+    def _fake_kill(pid, sig):
+        if pid == fake_pid and sig == 0:
+            raise OSError(errno.ESRCH, "No such process")
+
+    with patch("core.watchdog.os.kill", side_effect=_fake_kill):
+        _check_dead_pipelines(db_path)
+
+    conn = init_db(db_path=str(db_path))
+    task = models.get_task(conn, task_id)
+    pipeline_row = conn.execute(
+        "SELECT status FROM pipelines WHERE id=?", (pipeline_id,)
+    ).fetchone()
+    conn.close()
+
+    assert task["status"] == "blocked"
+    assert str(fake_pid) in (task.get("blocked_reason") or "")
+    assert pipeline_row["status"] == "failed"
+
+
+def test_check_dead_pipelines_permission_error_ignored(tmp_path):
+    """OSError с errno=EACCES (процесс жив, нет прав) — статус задачи не меняется."""
+    fake_pid = 54322
+    db_path, task_id, pipeline_id = _db_with_running_pipeline(tmp_path, fake_pid)
+
+    def _fake_kill(pid, sig):
+        if pid == fake_pid and sig == 0:
+            raise OSError(errno.EACCES, "Operation not permitted")
+
+    with patch("core.watchdog.os.kill", side_effect=_fake_kill):
+        _check_dead_pipelines(db_path)
+
+    conn = init_db(db_path=str(db_path))
+    task = models.get_task(conn, task_id)
+    pipeline_row = conn.execute(
+        "SELECT status FROM pipelines WHERE id=?", (pipeline_id,)
+    ).fetchone()
+    conn.close()
+
+    assert task["status"] != "blocked"
+    assert pipeline_row["status"] == "running"
--- a/web/frontend/src/components/EscalationBanner.vue
+++ b/web/frontend/src/components/EscalationBanner.vue
@ -3,6 +3,7 @@ import { ref, computed, onMounted, onUnmounted } from 'vue'
 import { api, type EscalationNotification } from '../api'

 const STORAGE_KEY = 'kin_dismissed_escalations'
+const WATCHDOG_TOAST_KEY = 'kin_dismissed_watchdog_toasts'

 const notifications = ref<EscalationNotification[]>([])
 const showPanel = ref(false)
@ -27,9 +28,62 @@ const visible = computed(() =>
  notifications.value.filter(n => !dismissed.value.has(n.task_id))
 )

+// Watchdog toasts
+interface WatchdogToast {
+  task_id: string
+  reason: string
+  timerId: ReturnType<typeof setTimeout> | null
+}
+
+const watchdogToasts = ref<WatchdogToast[]>([])
+
+function loadDismissedWatchdog(): Set<string> {
+  try {
+    const raw = localStorage.getItem(WATCHDOG_TOAST_KEY)
+    return new Set(raw ? JSON.parse(raw) : [])
+  } catch {
+    return new Set()
+  }
+}
+
+function saveDismissedWatchdog(ids: Set<string>) {
+  localStorage.setItem(WATCHDOG_TOAST_KEY, JSON.stringify([...ids]))
+}
+
+const dismissedWatchdog = ref<Set<string>>(loadDismissedWatchdog())
+
+function isWatchdogReason(reason: string): boolean {
+  return reason.includes('Process died') || reason.includes('Parent process died')
+}
+
+function dismissWatchdogToast(taskId: string) {
+  const idx = watchdogToasts.value.findIndex(t => t.task_id === taskId)
+  if (idx >= 0) {
+    const toast = watchdogToasts.value[idx]
+    if (toast.timerId) clearTimeout(toast.timerId)
+    watchdogToasts.value.splice(idx, 1)
+  }
+  const newSet = new Set([...dismissedWatchdog.value, taskId])
+  dismissedWatchdog.value = newSet
+  saveDismissedWatchdog(newSet)
+}
+
+function checkWatchdogToasts(fresh: EscalationNotification[]) {
+  const activeIds = new Set(watchdogToasts.value.map(t => t.task_id))
+  for (const n of fresh) {
+    if (isWatchdogReason(n.reason) && !dismissedWatchdog.value.has(n.task_id) && !activeIds.has(n.task_id)) {
+      const toast: WatchdogToast = { task_id: n.task_id, reason: n.reason, timerId: null }
+      toast.timerId = setTimeout(() => dismissWatchdogToast(n.task_id), 8000)
+      watchdogToasts.value.push(toast)
+    }
+  }
+}
+
 async function load() {
  try {
-    notifications.value = await api.notifications()
+    const fresh = await api.notifications()
+    checkWatchdogToasts(fresh)
+    notifications.value = fresh
  } catch {
    // silent — не ломаем layout при недоступном endpoint
  }
@ -67,6 +121,24 @@ onUnmounted(() => {
 </script>

 <template>
+  <!-- Watchdog toast notifications -->
+  <div class="fixed top-4 right-4 z-50 flex flex-col gap-2 pointer-events-none">
+    <div
+      v-for="toast in watchdogToasts"
+      :key="toast.task_id"
+      class="pointer-events-auto flex items-start gap-2 px-3 py-2.5 border border-red-700 bg-red-950/40 text-red-400 rounded-lg shadow-xl max-w-sm"
+    >
+      <span class="shrink-0 text-sm">&#9888;</span>
+      <div class="flex-1 min-w-0">
+        <p class="text-xs leading-snug">Watchdog: задача <span class="font-mono font-semibold">{{ toast.task_id }}</span> заблокирована — {{ toast.reason }}</p>
+      </div>
+      <button
+        @click="dismissWatchdogToast(toast.task_id)"
+        class="shrink-0 text-red-400/60 hover:text-red-300 text-lg leading-none mt-[-2px] bg-transparent border-none cursor-pointer"
+      >&times;</button>
+    </div>
+  </div>
+
  <div class="relative">
    <!-- Badge-кнопка — видна только при наличии активных эскалаций -->
    <button
--- a/web/frontend/src/views/ProjectView.vue
+++ b/web/frontend/src/views/ProjectView.vue
@ -849,7 +849,8 @@ async function addDecision() {
      <div v-else class="space-y-1">
        <router-link v-for="t in filteredTasks" :key="t.id"
          :to="{ path: `/task/${t.id}`, query: selectedStatuses.length ? { back_status: selectedStatuses.join(',') } : undefined }"
-          class="flex items-center justify-between px-3 py-2 border border-gray-800 rounded text-sm hover:border-gray-600 no-underline block transition-colors">
+          class="flex flex-col gap-0.5 px-3 py-2 border border-gray-800 rounded text-sm hover:border-gray-600 no-underline block transition-colors">
+          <div class="flex items-center justify-between gap-2">
          <div class="flex items-center gap-2 min-w-0">
            <span class="text-gray-500 shrink-0 w-24">{{ t.id }}</span>
            <Badge :text="t.status" :color="taskStatusColor(t.status)" />
@ -899,6 +900,8 @@ async function addDecision() {
              ↩ Revise
            </button>
          </div>
+          </div>
+          <div v-if="t.status === 'blocked' && t.blocked_reason" class="text-xs text-red-400 truncate">{{ t.blocked_reason }}</div>
        </router-link>
      </div>
    </div>
@ -1130,7 +1133,8 @@ async function addDecision() {
              class="block px-2.5 py-2 bg-gray-900 border border-gray-800 rounded text-xs hover:border-gray-600 no-underline cursor-grab active:cursor-grabbing transition-colors select-none"
              :class="draggingTaskId === t.id ? 'opacity-40' : ''">
              <div class="text-gray-500 mb-1 text-[10px]">{{ t.id }}</div>
-              <div class="text-gray-300 leading-snug mb-1.5">{{ t.title }}</div>
+              <div class="text-gray-300 leading-snug mb-1">{{ t.title }}</div>
+              <div v-if="t.status === 'blocked' && t.blocked_reason" class="text-xs text-red-400 truncate mb-1">{{ t.blocked_reason }}</div>
              <div class="flex items-center gap-1 flex-wrap">
                <Badge v-if="t.category" :text="t.category" :color="CATEGORY_COLORS[t.category] || 'gray'" />
                <span class="text-gray-600 text-[10px] ml-auto">p{{ t.priority }}</span>