diff --git a/agents/bootstrap.py b/agents/bootstrap.py index ca8d1d8..ecd79d7 100644 --- a/agents/bootstrap.py +++ b/agents/bootstrap.py @@ -42,54 +42,91 @@ _FILE_MARKERS = { } +_SKIP_DIRS = {"node_modules", ".git", "dist", ".next", ".nuxt", "__pycache__", ".venv", "venv"} + + def detect_tech_stack(project_path: Path) -> list[str]: - """Detect tech stack from project files.""" + """Detect tech stack from project files. + + Searches recursively up to depth 3, skipping node_modules/.git/dist. + Falls back to CLAUDE.md heuristics if no files found. + """ stack: set[str] = set() - # Config file markers - for fname, tech in _FILE_MARKERS.items(): - # Check root and one level deep - if (project_path / fname).exists(): - stack.add(tech) - for sub in ("frontend", "backend", "server", "client", "app"): - if (project_path / sub / fname).exists(): - stack.add(tech) + # Recursive search for config files and package.json (depth ≤ 3) + for fpath in _walk_files(project_path, max_depth=3): + fname = fpath.name + if fname in _FILE_MARKERS: + stack.add(_FILE_MARKERS[fname]) + if fname == "package.json": + stack.update(_parse_package_json(fpath)) + if fname == "requirements.txt": + stack.update(_parse_requirements_txt(fpath)) + if fname == "go.mod": + stack.add("go") + try: + text = fpath.read_text(errors="replace") + if "gin-gonic" in text: + stack.add("gin") + if "fiber" in text: + stack.add("fiber") + except OSError: + pass - # package.json (root + subdirs) - for pj_path in _find_package_jsons(project_path): - stack.update(_parse_package_json(pj_path)) - - # requirements.txt - for req_path in project_path.glob("**/requirements.txt"): - if _is_inside_node_modules(req_path, project_path): - continue - stack.update(_parse_requirements_txt(req_path)) - - # go.mod - go_mod = project_path / "go.mod" - if go_mod.exists(): - stack.add("go") - text = go_mod.read_text(errors="replace") - if "gin-gonic" in text: - stack.add("gin") - if "fiber" in text: - stack.add("fiber") + # Fallback: extract tech hints from CLAUDE.md if no config files found + if not stack: + stack.update(_detect_stack_from_claude_md(project_path)) return sorted(stack) -def _find_package_jsons(root: Path) -> list[Path]: - """Find package.json files (root + immediate subdirs, skip node_modules).""" - results = [] - pj = root / "package.json" - if pj.exists(): - results.append(pj) - for sub in root.iterdir(): - if sub.is_dir() and sub.name != "node_modules" and not sub.name.startswith("."): - pj = sub / "package.json" - if pj.exists(): - results.append(pj) - return results +# CLAUDE.md text → tech labels (for fallback when project files are on a remote server) +_CLAUDE_MD_TECH_HINTS = { + r"(?i)vue[\s.]?3": "vue3", r"(?i)vue[\s.]?2": "vue2", + r"(?i)\bnuxt\b": "nuxt3", r"(?i)\breact\b": "react", + r"(?i)\btypescript\b": "typescript", r"(?i)\bvite\b": "vite", + r"(?i)\btailwind": "tailwind", + r"(?i)node\.?js": "nodejs", r"(?i)\bexpress\b": "express", + r"(?i)postgresql|postgres": "postgresql", + r"(?i)\bsqlite\b": "sqlite", r"(?i)\bmysql\b": "mysql", + r"(?i)\bdocker\b": "docker", + r"(?i)\bpython\b": "python", r"(?i)\bfastapi\b": "fastapi", + r"(?i)\bdjango\b": "django", r"(?i)\bflask\b": "flask", + r"(?i)\bgo\b.*(?:gin|fiber|module)": "go", + r"(?i)\bnginx\b": "nginx", + r"(?i)\bpinia\b": "pinia", r"(?i)\bvuex\b": "vuex", +} + + +def _detect_stack_from_claude_md(project_path: Path) -> list[str]: + """Fallback: infer tech stack from CLAUDE.md text when no config files exist.""" + claude_md = project_path / "CLAUDE.md" + if not claude_md.exists(): + return [] + try: + text = claude_md.read_text(errors="replace")[:5000] # First 5KB is enough + except OSError: + return [] + stack = [] + for pattern, tech in _CLAUDE_MD_TECH_HINTS.items(): + if re.search(pattern, text): + stack.append(tech) + return stack + + +def _walk_files(root: Path, max_depth: int = 3, _depth: int = 0): + """Yield files up to max_depth, skipping node_modules/dist/.git.""" + if _depth > max_depth: + return + try: + entries = sorted(root.iterdir()) + except (OSError, PermissionError): + return + for entry in entries: + if entry.is_file(): + yield entry + elif entry.is_dir() and entry.name not in _SKIP_DIRS and not entry.name.startswith("."): + yield from _walk_files(entry, max_depth, _depth + 1) def _parse_package_json(path: Path) -> list[str]: @@ -140,26 +177,40 @@ _BACKEND_MARKERS = {"express", "fastify", "koa", "router", "controller", "middle def detect_modules(project_path: Path) -> list[dict]: - """Scan src/ (or app/, lib/, frontend/, backend/) for modules.""" + """Scan for modules: checks root subdirs, */src/ patterns, standard names. + + Strategy: + 1. Find all "source root" dirs (src/, app/, lib/ at root or inside top-level dirs) + 2. Each first-level subdir of a source root = a module candidate + 3. Top-level dirs with their own src/ are treated as component roots + (e.g. frontend/, backend-pg/) — scan THEIR src/ for modules + """ modules = [] - scan_dirs = [] + scan_dirs: list[tuple[Path, str | None]] = [] # (dir, prefix_hint) - # Prioritized source dirs - for name in ("src", "app", "lib", "frontend", "backend", "server", "client"): + # Direct source dirs in root + for name in ("src", "app", "lib"): d = project_path / name if d.is_dir(): - scan_dirs.append(d) + scan_dirs.append((d, None)) - # Also check frontend/src, backend/src patterns - for name in ("frontend/src", "backend/src", "backend-pg/src"): - d = project_path / name - if d.is_dir(): - scan_dirs.append(d) + # Top-level component dirs (frontend/, backend/, backend-pg/, server/, client/) + # These get scanned for src/ inside, or directly if they contain source files + for child in sorted(project_path.iterdir()): + if not child.is_dir() or child.name in _SKIP_DIRS or child.name.startswith("."): + continue + child_src = child / "src" + if child_src.is_dir(): + # e.g. frontend/src/, backend-pg/src/ — scan their subdirs + scan_dirs.append((child_src, child.name)) + elif child.name in ("frontend", "backend", "server", "client", "web", "api"): + # No src/ but it's a known component dir — scan it directly + scan_dirs.append((child, child.name)) seen = set() - for scan_dir in scan_dirs: + for scan_dir, prefix in scan_dirs: for child in sorted(scan_dir.iterdir()): - if not child.is_dir() or child.name.startswith(".") or child.name == "node_modules": + if not child.is_dir() or child.name in _SKIP_DIRS or child.name.startswith("."): continue mod = _analyze_module(child, project_path) key = (mod["name"], mod["path"]) @@ -230,7 +281,7 @@ def _guess_module_type(dir_path: Path, exts: set[str], files: list[Path]) -> str _DECISION_PATTERNS = [ (r"(?i)\b(GOTCHA|ВАЖНО|WARNING|ВНИМАНИЕ)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"), (r"(?i)\b(WORKAROUND|ОБХОДНОЙ|ХАК)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "workaround"), - (r"(?i)\b(FIXME|TODO|БАГИ?)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"), + (r"(?i)\b(FIXME|БАГИ?)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"), (r"(?i)\b(РЕШЕНИЕ|DECISION)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "decision"), (r"(?i)\b(CONVENTION|СОГЛАШЕНИЕ|ПРАВИЛО)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "convention"), ] @@ -238,13 +289,83 @@ _DECISION_PATTERNS = [ # Section headers that likely contain decisions _DECISION_SECTIONS = [ r"(?i)known\s+issues?", r"(?i)workaround", r"(?i)gotcha", - r"(?i)решени[яе]", r"(?i)грабл[ия]", r"(?i)важно", + r"(?i)решени[яе]", r"(?i)грабл[ия]", r"(?i)conventions?", r"(?i)правила", r"(?i)нюансы", ] +# Section headers about UNRELATED services — skip these entirely +_UNRELATED_SECTION_PATTERNS = [ + r"(?i)jitsi", r"(?i)nextcloud", r"(?i)prosody", + r"(?i)coturn", r"(?i)turn\b", r"(?i)asterisk", + r"(?i)ghost\s+блог", r"(?i)onlyoffice", + r"(?i)git\s+sync", r"(?i)\.env\s+добав", + r"(?i)goip\s+watcher", r"(?i)tbank\s+monitor", # monitoring services + r"(?i)фикс\s+удален", # commit-level fixes (not decisions) +] -def extract_decisions_from_claude_md(project_path: Path) -> list[dict]: - """Parse CLAUDE.md for decisions, gotchas, workarounds.""" +# Noise patterns — individual items that look like noise, not decisions +_NOISE_PATTERNS = [ + r"^[0-9a-f]{6,40}$", # commit hashes + r"^\s*(docker|ssh|scp|git|curl|sudo)\s", # shell commands + r"^`[^`]+`$", # inline code-only items + r"(?i)(prosody|jitsi|jicofo|jvb|coturn|nextcloud|onlyoffice|ghost)", # unrelated services + r"(?i)\.jitsi-meet-cfg", # jitsi config paths + r"(?i)(meet\.jitsi|sitemeet\.org)", # jitsi domains + r"(?i)(cloud\.vault\.red|office\.vault)", # nextcloud domains + r"(?i)JWT_APP_(ID|SECRET)", # jwt config lines + r"(?i)XMPP_", # prosody config + r"\(коммит\s+`?[0-9a-f]+`?\)", # "(коммит `a33c2b9`)" references + r"(?i)known_uids|idle_loop|reconnect", # goip-watcher internals +] + + +def _is_noise(text: str) -> bool: + """Check if a decision candidate is noise.""" + # Clean markdown bold for matching + clean = re.sub(r"\*\*([^*]*)\*\*", r"\1", text).strip() + return any(re.search(p, clean) for p in _NOISE_PATTERNS) + + +def _split_into_sections(text: str) -> list[tuple[str, str]]: + """Split markdown into (header, body) pairs by ## headers. + + Returns list of (header_text, body_text) tuples. + Anything before the first ## is returned with header="". + """ + parts = re.split(r"(?m)^(##\s+.+)$", text) + sections = [] + current_header = "" + current_body = parts[0] if parts else "" + + for i in range(1, len(parts), 2): + if current_header or current_body.strip(): + sections.append((current_header, current_body)) + current_header = parts[i].strip() + current_body = parts[i + 1] if i + 1 < len(parts) else "" + + if current_header or current_body.strip(): + sections.append((current_header, current_body)) + + return sections + + +def _is_unrelated_section(header: str) -> bool: + """Check if a section header is about an unrelated service.""" + return any(re.search(p, header) for p in _UNRELATED_SECTION_PATTERNS) + + +def extract_decisions_from_claude_md( + project_path: Path, + project_id: str | None = None, + project_name: str | None = None, +) -> list[dict]: + """Parse CLAUDE.md for decisions, gotchas, workarounds. + + Filters out: + - Sections about unrelated services (Jitsi, Nextcloud, Prosody, etc.) + - Noise: commit hashes, docker/ssh commands, paths to external services + - If CLAUDE.md has multi-project sections, only extracts for current project + """ claude_md = project_path / "CLAUDE.md" if not claude_md.exists(): return [] @@ -254,20 +375,30 @@ def extract_decisions_from_claude_md(project_path: Path) -> list[dict]: except OSError: return [] + # Split into sections and filter out unrelated ones + sections = _split_into_sections(text) + relevant_text = [] + for header, body in sections: + if _is_unrelated_section(header): + continue + relevant_text.append(header + "\n" + body) + + filtered_text = "\n".join(relevant_text) + decisions = [] seen_titles = set() - # Pattern-based extraction + # Pattern-based extraction from relevant sections only for pattern, dec_type in _DECISION_PATTERNS: - for m in re.finditer(pattern, text, re.DOTALL): - label = m.group(1).strip() + for m in re.finditer(pattern, filtered_text, re.DOTALL): body = m.group(2).strip() if not body or len(body) < 10: continue - # First line as title, rest as description lines = body.split("\n") title = lines[0].strip().rstrip(".")[:100] desc = body + if _is_noise(title) or _is_noise(desc): + continue if title not in seen_titles: seen_titles.add(title) decisions.append({ @@ -277,26 +408,36 @@ def extract_decisions_from_claude_md(project_path: Path) -> list[dict]: "category": _guess_category(title + " " + desc), }) - # Section-based extraction: find headers matching decision sections - sections = re.split(r"(?m)^(#{1,4}\s+.*?)$", text) - for i, section in enumerate(sections): + # Section-based extraction: find ### or #### headers matching decision patterns + sub_sections = re.split(r"(?m)^(#{1,4}\s+.*?)$", filtered_text) + for i, section in enumerate(sub_sections): if any(re.search(pat, section) for pat in _DECISION_SECTIONS): - # The content is in the next section - if i + 1 < len(sections): - content = sections[i + 1].strip() - # Extract bullet points + if i + 1 < len(sub_sections): + content = sub_sections[i + 1].strip() for line in content.split("\n"): line = line.strip() - if line.startswith(("- ", "* ", "• ")): + # Numbered items (1. **text**) or bullet items + item = None + if re.match(r"^\d+\.\s+", line): + item = re.sub(r"^\d+\.\s+", "", line).strip() + elif line.startswith(("- ", "* ", "• ")): item = line.lstrip("-*• ").strip() - if item and len(item) > 10 and item[:80] not in seen_titles: - seen_titles.add(item[:80]) - decisions.append({ - "type": "decision", - "title": item[:100], - "description": item, - "category": _guess_category(item), - }) + + if not item or len(item) < 10: + continue + # Clean bold markers for title + clean = re.sub(r"\*\*([^*]+)\*\*", r"\1", item) + if _is_noise(clean): + continue + title = clean[:100] + if title not in seen_titles: + seen_titles.add(title) + decisions.append({ + "type": "gotcha", + "title": title, + "description": item, + "category": _guess_category(item), + }) return decisions @@ -414,28 +555,34 @@ def _extract_obsidian_decisions(text: str, source: str, decisions: list[dict]): for pattern, dec_type in _DECISION_PATTERNS: for m in re.finditer(pattern, text, re.DOTALL): body = m.group(2).strip() - if body and len(body) > 10: - title = body.split("\n")[0].strip()[:100] - decisions.append({ - "type": dec_type, - "title": title, - "description": body, - "category": _guess_category(body), - "source": source, - }) - - # Also look for ВАЖНО/GOTCHA/FIXME inline markers not caught above - for m in re.finditer(r"(?i)\*\*(ВАЖНО|GOTCHA|FIXME)\*\*[:\s]*(.*?)(?=\n|$)", text): - body = m.group(2).strip() - if body and len(body) > 10: + if not body or len(body) < 10: + continue + title = body.split("\n")[0].strip()[:100] + if _is_noise(title) or _is_noise(body): + continue decisions.append({ - "type": "gotcha", - "title": body[:100], + "type": dec_type, + "title": title, "description": body, "category": _guess_category(body), "source": source, }) + # Also look for ВАЖНО/GOTCHA/FIXME inline markers not caught above + for m in re.finditer(r"(?i)\*\*(ВАЖНО|GOTCHA|FIXME)\*\*[:\s]*(.*?)(?=\n|$)", text): + body = m.group(2).strip() + if not body or len(body) < 10: + continue + if _is_noise(body): + continue + decisions.append({ + "type": "gotcha", + "title": body[:100], + "description": body, + "category": _guess_category(body), + "source": source, + }) + # --------------------------------------------------------------------------- # Formatting for CLI preview diff --git a/cli/main.py b/cli/main.py index 9fde1d3..288fe6b 100644 --- a/cli/main.py +++ b/cli/main.py @@ -435,7 +435,7 @@ def bootstrap(ctx, path, project_id, name, vault_path, yes): click.echo(f"Scanning {project_path} ...") tech_stack = detect_tech_stack(project_path) modules = detect_modules(project_path) - decisions = extract_decisions_from_claude_md(project_path) + decisions = extract_decisions_from_claude_md(project_path, project_id, name) # Obsidian obsidian = None diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py index a11c85d..20dc5ea 100644 --- a/tests/test_bootstrap.py +++ b/tests/test_bootstrap.py @@ -67,6 +67,27 @@ def test_detect_monorepo(tmp_path): assert "fastapi" in stack +def test_detect_deep_monorepo(tmp_path): + """Test that files nested 2-3 levels deep are found (like vdolipoperek).""" + fe = tmp_path / "frontend" / "src" + fe.mkdir(parents=True) + (tmp_path / "frontend" / "package.json").write_text(json.dumps({ + "dependencies": {"vue": "^3.4"}, + "devDependencies": {"vite": "^5.0", "tailwindcss": "^3.4"}, + })) + (tmp_path / "frontend" / "vite.config.js").write_text("export default {}") + (tmp_path / "frontend" / "tailwind.config.js").write_text("module.exports = {}") + + be = tmp_path / "backend-pg" / "src" + be.mkdir(parents=True) + (be / "index.js").write_text("const express = require('express');") + + stack = detect_tech_stack(tmp_path) + assert "vue3" in stack + assert "vite" in stack + assert "tailwind" in stack + + def test_detect_empty_dir(tmp_path): assert detect_tech_stack(tmp_path) == [] @@ -104,6 +125,36 @@ def test_detect_modules_backend_pg(tmp_path): assert any(m["name"] == "services" for m in modules) +def test_detect_modules_monorepo(tmp_path): + """Full monorepo: frontend/src/ + backend-pg/src/.""" + # Frontend + fe_views = tmp_path / "frontend" / "src" / "views" + fe_views.mkdir(parents=True) + (fe_views / "Hotel.vue").write_text("") + fe_comp = tmp_path / "frontend" / "src" / "components" + fe_comp.mkdir(parents=True) + (fe_comp / "Search.vue").write_text("") + + # Backend + be_svc = tmp_path / "backend-pg" / "src" / "services" + be_svc.mkdir(parents=True) + (be_svc / "db.js").write_text("const express = require('express');") + be_routes = tmp_path / "backend-pg" / "src" / "routes" + be_routes.mkdir(parents=True) + (be_routes / "api.js").write_text("const router = require('express').Router();") + + modules = detect_modules(tmp_path) + names = {m["name"] for m in modules} + assert "views" in names + assert "components" in names + assert "services" in names + assert "routes" in names + # Check types + types = {m["name"]: m["type"] for m in modules} + assert types["views"] == "frontend" + assert types["components"] == "frontend" + + # --------------------------------------------------------------------------- # Decisions from CLAUDE.md # --------------------------------------------------------------------------- @@ -124,7 +175,7 @@ FIXME: race condition in useSearch composable - CSS grid fallback для IE11 (но мы его не поддерживаем) """) - decisions = extract_decisions_from_claude_md(tmp_path) + decisions = extract_decisions_from_claude_md(tmp_path, "myproj", "My Project") assert len(decisions) >= 4 types = {d["type"] for d in decisions} @@ -136,6 +187,58 @@ def test_extract_decisions_no_claude_md(tmp_path): assert extract_decisions_from_claude_md(tmp_path) == [] +def test_extract_decisions_filters_unrelated_sections(tmp_path): + """Sections about Jitsi, Nextcloud, Prosody should be skipped.""" + (tmp_path / "CLAUDE.md").write_text("""# vdolipoperek + +## Known Issues +1. **Hotel ID mismatch** — Sletat GetTours vs GetHotels разные ID +2. **db.js export** — module.exports = pool (НЕ { pool }) + +## Jitsi + Nextcloud интеграция (2026-03-04) + +ВАЖНО: JWT_APP_SECRET must be synced between Prosody and Nextcloud +GOTCHA: focus.meet.jitsi must be pinned in custom-config.js + +## Prosody config + +ВАЖНО: conf.d files принадлежат root → писать через docker exec + +## Git Sync (2026-03-03) + +ВАЖНО: Все среды синхронизированы на коммите 4ee5603 +""") + + decisions = extract_decisions_from_claude_md(tmp_path, "vdol", "vdolipoperek") + + titles = [d["title"] for d in decisions] + # Should have the real known issues + assert any("Hotel ID mismatch" in t for t in titles) + assert any("db.js export" in t for t in titles) + # Should NOT have Jitsi/Prosody/Nextcloud noise + assert not any("JWT_APP_SECRET" in t for t in titles) + assert not any("focus.meet.jitsi" in t for t in titles) + assert not any("conf.d files" in t for t in titles) + + +def test_extract_decisions_filters_noise(tmp_path): + """Commit hashes and shell commands should not be decisions.""" + (tmp_path / "CLAUDE.md").write_text("""# Project + +## Known Issues +1. **Real bug** — actual architectural issue that matters +- docker exec -it prosody bash +- ssh dev "cd /opt/project && git pull" +""") + + decisions = extract_decisions_from_claude_md(tmp_path) + titles = [d["title"] for d in decisions] + assert any("Real bug" in t for t in titles) + # Shell commands should be filtered + assert not any("docker exec" in t for t in titles) + assert not any("ssh dev" in t for t in titles) + + # --------------------------------------------------------------------------- # Obsidian vault # ---------------------------------------------------------------------------