diff --git a/agents/bootstrap.py b/agents/bootstrap.py
index ca8d1d8..ecd79d7 100644
--- a/agents/bootstrap.py
+++ b/agents/bootstrap.py
@@ -42,54 +42,91 @@ _FILE_MARKERS = {
}
+_SKIP_DIRS = {"node_modules", ".git", "dist", ".next", ".nuxt", "__pycache__", ".venv", "venv"}
+
+
def detect_tech_stack(project_path: Path) -> list[str]:
- """Detect tech stack from project files."""
+ """Detect tech stack from project files.
+
+ Searches recursively up to depth 3, skipping node_modules/.git/dist.
+ Falls back to CLAUDE.md heuristics if no files found.
+ """
stack: set[str] = set()
- # Config file markers
- for fname, tech in _FILE_MARKERS.items():
- # Check root and one level deep
- if (project_path / fname).exists():
- stack.add(tech)
- for sub in ("frontend", "backend", "server", "client", "app"):
- if (project_path / sub / fname).exists():
- stack.add(tech)
+ # Recursive search for config files and package.json (depth ≤ 3)
+ for fpath in _walk_files(project_path, max_depth=3):
+ fname = fpath.name
+ if fname in _FILE_MARKERS:
+ stack.add(_FILE_MARKERS[fname])
+ if fname == "package.json":
+ stack.update(_parse_package_json(fpath))
+ if fname == "requirements.txt":
+ stack.update(_parse_requirements_txt(fpath))
+ if fname == "go.mod":
+ stack.add("go")
+ try:
+ text = fpath.read_text(errors="replace")
+ if "gin-gonic" in text:
+ stack.add("gin")
+ if "fiber" in text:
+ stack.add("fiber")
+ except OSError:
+ pass
- # package.json (root + subdirs)
- for pj_path in _find_package_jsons(project_path):
- stack.update(_parse_package_json(pj_path))
-
- # requirements.txt
- for req_path in project_path.glob("**/requirements.txt"):
- if _is_inside_node_modules(req_path, project_path):
- continue
- stack.update(_parse_requirements_txt(req_path))
-
- # go.mod
- go_mod = project_path / "go.mod"
- if go_mod.exists():
- stack.add("go")
- text = go_mod.read_text(errors="replace")
- if "gin-gonic" in text:
- stack.add("gin")
- if "fiber" in text:
- stack.add("fiber")
+ # Fallback: extract tech hints from CLAUDE.md if no config files found
+ if not stack:
+ stack.update(_detect_stack_from_claude_md(project_path))
return sorted(stack)
-def _find_package_jsons(root: Path) -> list[Path]:
- """Find package.json files (root + immediate subdirs, skip node_modules)."""
- results = []
- pj = root / "package.json"
- if pj.exists():
- results.append(pj)
- for sub in root.iterdir():
- if sub.is_dir() and sub.name != "node_modules" and not sub.name.startswith("."):
- pj = sub / "package.json"
- if pj.exists():
- results.append(pj)
- return results
+# CLAUDE.md text → tech labels (for fallback when project files are on a remote server)
+_CLAUDE_MD_TECH_HINTS = {
+ r"(?i)vue[\s.]?3": "vue3", r"(?i)vue[\s.]?2": "vue2",
+ r"(?i)\bnuxt\b": "nuxt3", r"(?i)\breact\b": "react",
+ r"(?i)\btypescript\b": "typescript", r"(?i)\bvite\b": "vite",
+ r"(?i)\btailwind": "tailwind",
+ r"(?i)node\.?js": "nodejs", r"(?i)\bexpress\b": "express",
+ r"(?i)postgresql|postgres": "postgresql",
+ r"(?i)\bsqlite\b": "sqlite", r"(?i)\bmysql\b": "mysql",
+ r"(?i)\bdocker\b": "docker",
+ r"(?i)\bpython\b": "python", r"(?i)\bfastapi\b": "fastapi",
+ r"(?i)\bdjango\b": "django", r"(?i)\bflask\b": "flask",
+ r"(?i)\bgo\b.*(?:gin|fiber|module)": "go",
+ r"(?i)\bnginx\b": "nginx",
+ r"(?i)\bpinia\b": "pinia", r"(?i)\bvuex\b": "vuex",
+}
+
+
+def _detect_stack_from_claude_md(project_path: Path) -> list[str]:
+ """Fallback: infer tech stack from CLAUDE.md text when no config files exist."""
+ claude_md = project_path / "CLAUDE.md"
+ if not claude_md.exists():
+ return []
+ try:
+ text = claude_md.read_text(errors="replace")[:5000] # First 5KB is enough
+ except OSError:
+ return []
+ stack = []
+ for pattern, tech in _CLAUDE_MD_TECH_HINTS.items():
+ if re.search(pattern, text):
+ stack.append(tech)
+ return stack
+
+
+def _walk_files(root: Path, max_depth: int = 3, _depth: int = 0):
+ """Yield files up to max_depth, skipping node_modules/dist/.git."""
+ if _depth > max_depth:
+ return
+ try:
+ entries = sorted(root.iterdir())
+ except (OSError, PermissionError):
+ return
+ for entry in entries:
+ if entry.is_file():
+ yield entry
+ elif entry.is_dir() and entry.name not in _SKIP_DIRS and not entry.name.startswith("."):
+ yield from _walk_files(entry, max_depth, _depth + 1)
def _parse_package_json(path: Path) -> list[str]:
@@ -140,26 +177,40 @@ _BACKEND_MARKERS = {"express", "fastify", "koa", "router", "controller", "middle
def detect_modules(project_path: Path) -> list[dict]:
- """Scan src/ (or app/, lib/, frontend/, backend/) for modules."""
+ """Scan for modules: checks root subdirs, */src/ patterns, standard names.
+
+ Strategy:
+ 1. Find all "source root" dirs (src/, app/, lib/ at root or inside top-level dirs)
+ 2. Each first-level subdir of a source root = a module candidate
+ 3. Top-level dirs with their own src/ are treated as component roots
+ (e.g. frontend/, backend-pg/) — scan THEIR src/ for modules
+ """
modules = []
- scan_dirs = []
+ scan_dirs: list[tuple[Path, str | None]] = [] # (dir, prefix_hint)
- # Prioritized source dirs
- for name in ("src", "app", "lib", "frontend", "backend", "server", "client"):
+ # Direct source dirs in root
+ for name in ("src", "app", "lib"):
d = project_path / name
if d.is_dir():
- scan_dirs.append(d)
+ scan_dirs.append((d, None))
- # Also check frontend/src, backend/src patterns
- for name in ("frontend/src", "backend/src", "backend-pg/src"):
- d = project_path / name
- if d.is_dir():
- scan_dirs.append(d)
+ # Top-level component dirs (frontend/, backend/, backend-pg/, server/, client/)
+ # These get scanned for src/ inside, or directly if they contain source files
+ for child in sorted(project_path.iterdir()):
+ if not child.is_dir() or child.name in _SKIP_DIRS or child.name.startswith("."):
+ continue
+ child_src = child / "src"
+ if child_src.is_dir():
+ # e.g. frontend/src/, backend-pg/src/ — scan their subdirs
+ scan_dirs.append((child_src, child.name))
+ elif child.name in ("frontend", "backend", "server", "client", "web", "api"):
+ # No src/ but it's a known component dir — scan it directly
+ scan_dirs.append((child, child.name))
seen = set()
- for scan_dir in scan_dirs:
+ for scan_dir, prefix in scan_dirs:
for child in sorted(scan_dir.iterdir()):
- if not child.is_dir() or child.name.startswith(".") or child.name == "node_modules":
+ if not child.is_dir() or child.name in _SKIP_DIRS or child.name.startswith("."):
continue
mod = _analyze_module(child, project_path)
key = (mod["name"], mod["path"])
@@ -230,7 +281,7 @@ def _guess_module_type(dir_path: Path, exts: set[str], files: list[Path]) -> str
_DECISION_PATTERNS = [
(r"(?i)\b(GOTCHA|ВАЖНО|WARNING|ВНИМАНИЕ)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"),
(r"(?i)\b(WORKAROUND|ОБХОДНОЙ|ХАК)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "workaround"),
- (r"(?i)\b(FIXME|TODO|БАГИ?)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"),
+ (r"(?i)\b(FIXME|БАГИ?)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"),
(r"(?i)\b(РЕШЕНИЕ|DECISION)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "decision"),
(r"(?i)\b(CONVENTION|СОГЛАШЕНИЕ|ПРАВИЛО)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "convention"),
]
@@ -238,13 +289,83 @@ _DECISION_PATTERNS = [
# Section headers that likely contain decisions
_DECISION_SECTIONS = [
r"(?i)known\s+issues?", r"(?i)workaround", r"(?i)gotcha",
- r"(?i)решени[яе]", r"(?i)грабл[ия]", r"(?i)важно",
+ r"(?i)решени[яе]", r"(?i)грабл[ия]",
r"(?i)conventions?", r"(?i)правила", r"(?i)нюансы",
]
+# Section headers about UNRELATED services — skip these entirely
+_UNRELATED_SECTION_PATTERNS = [
+ r"(?i)jitsi", r"(?i)nextcloud", r"(?i)prosody",
+ r"(?i)coturn", r"(?i)turn\b", r"(?i)asterisk",
+ r"(?i)ghost\s+блог", r"(?i)onlyoffice",
+ r"(?i)git\s+sync", r"(?i)\.env\s+добав",
+ r"(?i)goip\s+watcher", r"(?i)tbank\s+monitor", # monitoring services
+ r"(?i)фикс\s+удален", # commit-level fixes (not decisions)
+]
-def extract_decisions_from_claude_md(project_path: Path) -> list[dict]:
- """Parse CLAUDE.md for decisions, gotchas, workarounds."""
+# Noise patterns — individual items that look like noise, not decisions
+_NOISE_PATTERNS = [
+ r"^[0-9a-f]{6,40}$", # commit hashes
+ r"^\s*(docker|ssh|scp|git|curl|sudo)\s", # shell commands
+ r"^`[^`]+`$", # inline code-only items
+ r"(?i)(prosody|jitsi|jicofo|jvb|coturn|nextcloud|onlyoffice|ghost)", # unrelated services
+ r"(?i)\.jitsi-meet-cfg", # jitsi config paths
+ r"(?i)(meet\.jitsi|sitemeet\.org)", # jitsi domains
+ r"(?i)(cloud\.vault\.red|office\.vault)", # nextcloud domains
+ r"(?i)JWT_APP_(ID|SECRET)", # jwt config lines
+ r"(?i)XMPP_", # prosody config
+ r"\(коммит\s+`?[0-9a-f]+`?\)", # "(коммит `a33c2b9`)" references
+ r"(?i)known_uids|idle_loop|reconnect", # goip-watcher internals
+]
+
+
+def _is_noise(text: str) -> bool:
+ """Check if a decision candidate is noise."""
+ # Clean markdown bold for matching
+ clean = re.sub(r"\*\*([^*]*)\*\*", r"\1", text).strip()
+ return any(re.search(p, clean) for p in _NOISE_PATTERNS)
+
+
+def _split_into_sections(text: str) -> list[tuple[str, str]]:
+ """Split markdown into (header, body) pairs by ## headers.
+
+ Returns list of (header_text, body_text) tuples.
+ Anything before the first ## is returned with header="".
+ """
+ parts = re.split(r"(?m)^(##\s+.+)$", text)
+ sections = []
+ current_header = ""
+ current_body = parts[0] if parts else ""
+
+ for i in range(1, len(parts), 2):
+ if current_header or current_body.strip():
+ sections.append((current_header, current_body))
+ current_header = parts[i].strip()
+ current_body = parts[i + 1] if i + 1 < len(parts) else ""
+
+ if current_header or current_body.strip():
+ sections.append((current_header, current_body))
+
+ return sections
+
+
+def _is_unrelated_section(header: str) -> bool:
+ """Check if a section header is about an unrelated service."""
+ return any(re.search(p, header) for p in _UNRELATED_SECTION_PATTERNS)
+
+
+def extract_decisions_from_claude_md(
+ project_path: Path,
+ project_id: str | None = None,
+ project_name: str | None = None,
+) -> list[dict]:
+ """Parse CLAUDE.md for decisions, gotchas, workarounds.
+
+ Filters out:
+ - Sections about unrelated services (Jitsi, Nextcloud, Prosody, etc.)
+ - Noise: commit hashes, docker/ssh commands, paths to external services
+ - If CLAUDE.md has multi-project sections, only extracts for current project
+ """
claude_md = project_path / "CLAUDE.md"
if not claude_md.exists():
return []
@@ -254,20 +375,30 @@ def extract_decisions_from_claude_md(project_path: Path) -> list[dict]:
except OSError:
return []
+ # Split into sections and filter out unrelated ones
+ sections = _split_into_sections(text)
+ relevant_text = []
+ for header, body in sections:
+ if _is_unrelated_section(header):
+ continue
+ relevant_text.append(header + "\n" + body)
+
+ filtered_text = "\n".join(relevant_text)
+
decisions = []
seen_titles = set()
- # Pattern-based extraction
+ # Pattern-based extraction from relevant sections only
for pattern, dec_type in _DECISION_PATTERNS:
- for m in re.finditer(pattern, text, re.DOTALL):
- label = m.group(1).strip()
+ for m in re.finditer(pattern, filtered_text, re.DOTALL):
body = m.group(2).strip()
if not body or len(body) < 10:
continue
- # First line as title, rest as description
lines = body.split("\n")
title = lines[0].strip().rstrip(".")[:100]
desc = body
+ if _is_noise(title) or _is_noise(desc):
+ continue
if title not in seen_titles:
seen_titles.add(title)
decisions.append({
@@ -277,26 +408,36 @@ def extract_decisions_from_claude_md(project_path: Path) -> list[dict]:
"category": _guess_category(title + " " + desc),
})
- # Section-based extraction: find headers matching decision sections
- sections = re.split(r"(?m)^(#{1,4}\s+.*?)$", text)
- for i, section in enumerate(sections):
+ # Section-based extraction: find ### or #### headers matching decision patterns
+ sub_sections = re.split(r"(?m)^(#{1,4}\s+.*?)$", filtered_text)
+ for i, section in enumerate(sub_sections):
if any(re.search(pat, section) for pat in _DECISION_SECTIONS):
- # The content is in the next section
- if i + 1 < len(sections):
- content = sections[i + 1].strip()
- # Extract bullet points
+ if i + 1 < len(sub_sections):
+ content = sub_sections[i + 1].strip()
for line in content.split("\n"):
line = line.strip()
- if line.startswith(("- ", "* ", "• ")):
+ # Numbered items (1. **text**) or bullet items
+ item = None
+ if re.match(r"^\d+\.\s+", line):
+ item = re.sub(r"^\d+\.\s+", "", line).strip()
+ elif line.startswith(("- ", "* ", "• ")):
item = line.lstrip("-*• ").strip()
- if item and len(item) > 10 and item[:80] not in seen_titles:
- seen_titles.add(item[:80])
- decisions.append({
- "type": "decision",
- "title": item[:100],
- "description": item,
- "category": _guess_category(item),
- })
+
+ if not item or len(item) < 10:
+ continue
+ # Clean bold markers for title
+ clean = re.sub(r"\*\*([^*]+)\*\*", r"\1", item)
+ if _is_noise(clean):
+ continue
+ title = clean[:100]
+ if title not in seen_titles:
+ seen_titles.add(title)
+ decisions.append({
+ "type": "gotcha",
+ "title": title,
+ "description": item,
+ "category": _guess_category(item),
+ })
return decisions
@@ -414,28 +555,34 @@ def _extract_obsidian_decisions(text: str, source: str, decisions: list[dict]):
for pattern, dec_type in _DECISION_PATTERNS:
for m in re.finditer(pattern, text, re.DOTALL):
body = m.group(2).strip()
- if body and len(body) > 10:
- title = body.split("\n")[0].strip()[:100]
- decisions.append({
- "type": dec_type,
- "title": title,
- "description": body,
- "category": _guess_category(body),
- "source": source,
- })
-
- # Also look for ВАЖНО/GOTCHA/FIXME inline markers not caught above
- for m in re.finditer(r"(?i)\*\*(ВАЖНО|GOTCHA|FIXME)\*\*[:\s]*(.*?)(?=\n|$)", text):
- body = m.group(2).strip()
- if body and len(body) > 10:
+ if not body or len(body) < 10:
+ continue
+ title = body.split("\n")[0].strip()[:100]
+ if _is_noise(title) or _is_noise(body):
+ continue
decisions.append({
- "type": "gotcha",
- "title": body[:100],
+ "type": dec_type,
+ "title": title,
"description": body,
"category": _guess_category(body),
"source": source,
})
+ # Also look for ВАЖНО/GOTCHA/FIXME inline markers not caught above
+ for m in re.finditer(r"(?i)\*\*(ВАЖНО|GOTCHA|FIXME)\*\*[:\s]*(.*?)(?=\n|$)", text):
+ body = m.group(2).strip()
+ if not body or len(body) < 10:
+ continue
+ if _is_noise(body):
+ continue
+ decisions.append({
+ "type": "gotcha",
+ "title": body[:100],
+ "description": body,
+ "category": _guess_category(body),
+ "source": source,
+ })
+
# ---------------------------------------------------------------------------
# Formatting for CLI preview
diff --git a/cli/main.py b/cli/main.py
index 9fde1d3..288fe6b 100644
--- a/cli/main.py
+++ b/cli/main.py
@@ -435,7 +435,7 @@ def bootstrap(ctx, path, project_id, name, vault_path, yes):
click.echo(f"Scanning {project_path} ...")
tech_stack = detect_tech_stack(project_path)
modules = detect_modules(project_path)
- decisions = extract_decisions_from_claude_md(project_path)
+ decisions = extract_decisions_from_claude_md(project_path, project_id, name)
# Obsidian
obsidian = None
diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py
index a11c85d..20dc5ea 100644
--- a/tests/test_bootstrap.py
+++ b/tests/test_bootstrap.py
@@ -67,6 +67,27 @@ def test_detect_monorepo(tmp_path):
assert "fastapi" in stack
+def test_detect_deep_monorepo(tmp_path):
+ """Test that files nested 2-3 levels deep are found (like vdolipoperek)."""
+ fe = tmp_path / "frontend" / "src"
+ fe.mkdir(parents=True)
+ (tmp_path / "frontend" / "package.json").write_text(json.dumps({
+ "dependencies": {"vue": "^3.4"},
+ "devDependencies": {"vite": "^5.0", "tailwindcss": "^3.4"},
+ }))
+ (tmp_path / "frontend" / "vite.config.js").write_text("export default {}")
+ (tmp_path / "frontend" / "tailwind.config.js").write_text("module.exports = {}")
+
+ be = tmp_path / "backend-pg" / "src"
+ be.mkdir(parents=True)
+ (be / "index.js").write_text("const express = require('express');")
+
+ stack = detect_tech_stack(tmp_path)
+ assert "vue3" in stack
+ assert "vite" in stack
+ assert "tailwind" in stack
+
+
def test_detect_empty_dir(tmp_path):
assert detect_tech_stack(tmp_path) == []
@@ -104,6 +125,36 @@ def test_detect_modules_backend_pg(tmp_path):
assert any(m["name"] == "services" for m in modules)
+def test_detect_modules_monorepo(tmp_path):
+ """Full monorepo: frontend/src/ + backend-pg/src/."""
+ # Frontend
+ fe_views = tmp_path / "frontend" / "src" / "views"
+ fe_views.mkdir(parents=True)
+ (fe_views / "Hotel.vue").write_text("")
+ fe_comp = tmp_path / "frontend" / "src" / "components"
+ fe_comp.mkdir(parents=True)
+ (fe_comp / "Search.vue").write_text("")
+
+ # Backend
+ be_svc = tmp_path / "backend-pg" / "src" / "services"
+ be_svc.mkdir(parents=True)
+ (be_svc / "db.js").write_text("const express = require('express');")
+ be_routes = tmp_path / "backend-pg" / "src" / "routes"
+ be_routes.mkdir(parents=True)
+ (be_routes / "api.js").write_text("const router = require('express').Router();")
+
+ modules = detect_modules(tmp_path)
+ names = {m["name"] for m in modules}
+ assert "views" in names
+ assert "components" in names
+ assert "services" in names
+ assert "routes" in names
+ # Check types
+ types = {m["name"]: m["type"] for m in modules}
+ assert types["views"] == "frontend"
+ assert types["components"] == "frontend"
+
+
# ---------------------------------------------------------------------------
# Decisions from CLAUDE.md
# ---------------------------------------------------------------------------
@@ -124,7 +175,7 @@ FIXME: race condition in useSearch composable
- CSS grid fallback для IE11 (но мы его не поддерживаем)
""")
- decisions = extract_decisions_from_claude_md(tmp_path)
+ decisions = extract_decisions_from_claude_md(tmp_path, "myproj", "My Project")
assert len(decisions) >= 4
types = {d["type"] for d in decisions}
@@ -136,6 +187,58 @@ def test_extract_decisions_no_claude_md(tmp_path):
assert extract_decisions_from_claude_md(tmp_path) == []
+def test_extract_decisions_filters_unrelated_sections(tmp_path):
+ """Sections about Jitsi, Nextcloud, Prosody should be skipped."""
+ (tmp_path / "CLAUDE.md").write_text("""# vdolipoperek
+
+## Known Issues
+1. **Hotel ID mismatch** — Sletat GetTours vs GetHotels разные ID
+2. **db.js export** — module.exports = pool (НЕ { pool })
+
+## Jitsi + Nextcloud интеграция (2026-03-04)
+
+ВАЖНО: JWT_APP_SECRET must be synced between Prosody and Nextcloud
+GOTCHA: focus.meet.jitsi must be pinned in custom-config.js
+
+## Prosody config
+
+ВАЖНО: conf.d files принадлежат root → писать через docker exec
+
+## Git Sync (2026-03-03)
+
+ВАЖНО: Все среды синхронизированы на коммите 4ee5603
+""")
+
+ decisions = extract_decisions_from_claude_md(tmp_path, "vdol", "vdolipoperek")
+
+ titles = [d["title"] for d in decisions]
+ # Should have the real known issues
+ assert any("Hotel ID mismatch" in t for t in titles)
+ assert any("db.js export" in t for t in titles)
+ # Should NOT have Jitsi/Prosody/Nextcloud noise
+ assert not any("JWT_APP_SECRET" in t for t in titles)
+ assert not any("focus.meet.jitsi" in t for t in titles)
+ assert not any("conf.d files" in t for t in titles)
+
+
+def test_extract_decisions_filters_noise(tmp_path):
+ """Commit hashes and shell commands should not be decisions."""
+ (tmp_path / "CLAUDE.md").write_text("""# Project
+
+## Known Issues
+1. **Real bug** — actual architectural issue that matters
+- docker exec -it prosody bash
+- ssh dev "cd /opt/project && git pull"
+""")
+
+ decisions = extract_decisions_from_claude_md(tmp_path)
+ titles = [d["title"] for d in decisions]
+ assert any("Real bug" in t for t in titles)
+ # Shell commands should be filtered
+ assert not any("docker exec" in t for t in titles)
+ assert not any("ssh dev" in t for t in titles)
+
+
# ---------------------------------------------------------------------------
# Obsidian vault
# ---------------------------------------------------------------------------