Fix bootstrap: deep scan, CLAUDE.md fallback, noise filtering
1. Tech stack: recursive file search (depth 3) + CLAUDE.md text fallback when config files are on remote server (detects nodejs, postgresql, etc.) 2. Modules: scan */src/ patterns in top-level dirs (frontend/src/, backend-pg/src/) 3. Decisions: filter out unrelated sections (Jitsi, Nextcloud, Prosody, GOIP), filter noise (commit hashes, shell commands, external service paths). Noise filtering also applied to Obsidian decisions. Tested on vdolipoperek: 4 tech, 5 modules, 9 clean decisions, 24 Obsidian tasks. 61 tests, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
da4a8aae72
commit
e5444114bd
3 changed files with 344 additions and 94 deletions
|
|
@ -42,54 +42,91 @@ _FILE_MARKERS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_SKIP_DIRS = {"node_modules", ".git", "dist", ".next", ".nuxt", "__pycache__", ".venv", "venv"}
|
||||||
|
|
||||||
|
|
||||||
def detect_tech_stack(project_path: Path) -> list[str]:
|
def detect_tech_stack(project_path: Path) -> list[str]:
|
||||||
"""Detect tech stack from project files."""
|
"""Detect tech stack from project files.
|
||||||
|
|
||||||
|
Searches recursively up to depth 3, skipping node_modules/.git/dist.
|
||||||
|
Falls back to CLAUDE.md heuristics if no files found.
|
||||||
|
"""
|
||||||
stack: set[str] = set()
|
stack: set[str] = set()
|
||||||
|
|
||||||
# Config file markers
|
# Recursive search for config files and package.json (depth ≤ 3)
|
||||||
for fname, tech in _FILE_MARKERS.items():
|
for fpath in _walk_files(project_path, max_depth=3):
|
||||||
# Check root and one level deep
|
fname = fpath.name
|
||||||
if (project_path / fname).exists():
|
if fname in _FILE_MARKERS:
|
||||||
stack.add(tech)
|
stack.add(_FILE_MARKERS[fname])
|
||||||
for sub in ("frontend", "backend", "server", "client", "app"):
|
if fname == "package.json":
|
||||||
if (project_path / sub / fname).exists():
|
stack.update(_parse_package_json(fpath))
|
||||||
stack.add(tech)
|
if fname == "requirements.txt":
|
||||||
|
stack.update(_parse_requirements_txt(fpath))
|
||||||
# package.json (root + subdirs)
|
if fname == "go.mod":
|
||||||
for pj_path in _find_package_jsons(project_path):
|
|
||||||
stack.update(_parse_package_json(pj_path))
|
|
||||||
|
|
||||||
# requirements.txt
|
|
||||||
for req_path in project_path.glob("**/requirements.txt"):
|
|
||||||
if _is_inside_node_modules(req_path, project_path):
|
|
||||||
continue
|
|
||||||
stack.update(_parse_requirements_txt(req_path))
|
|
||||||
|
|
||||||
# go.mod
|
|
||||||
go_mod = project_path / "go.mod"
|
|
||||||
if go_mod.exists():
|
|
||||||
stack.add("go")
|
stack.add("go")
|
||||||
text = go_mod.read_text(errors="replace")
|
try:
|
||||||
|
text = fpath.read_text(errors="replace")
|
||||||
if "gin-gonic" in text:
|
if "gin-gonic" in text:
|
||||||
stack.add("gin")
|
stack.add("gin")
|
||||||
if "fiber" in text:
|
if "fiber" in text:
|
||||||
stack.add("fiber")
|
stack.add("fiber")
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback: extract tech hints from CLAUDE.md if no config files found
|
||||||
|
if not stack:
|
||||||
|
stack.update(_detect_stack_from_claude_md(project_path))
|
||||||
|
|
||||||
return sorted(stack)
|
return sorted(stack)
|
||||||
|
|
||||||
|
|
||||||
def _find_package_jsons(root: Path) -> list[Path]:
|
# CLAUDE.md text → tech labels (for fallback when project files are on a remote server)
|
||||||
"""Find package.json files (root + immediate subdirs, skip node_modules)."""
|
_CLAUDE_MD_TECH_HINTS = {
|
||||||
results = []
|
r"(?i)vue[\s.]?3": "vue3", r"(?i)vue[\s.]?2": "vue2",
|
||||||
pj = root / "package.json"
|
r"(?i)\bnuxt\b": "nuxt3", r"(?i)\breact\b": "react",
|
||||||
if pj.exists():
|
r"(?i)\btypescript\b": "typescript", r"(?i)\bvite\b": "vite",
|
||||||
results.append(pj)
|
r"(?i)\btailwind": "tailwind",
|
||||||
for sub in root.iterdir():
|
r"(?i)node\.?js": "nodejs", r"(?i)\bexpress\b": "express",
|
||||||
if sub.is_dir() and sub.name != "node_modules" and not sub.name.startswith("."):
|
r"(?i)postgresql|postgres": "postgresql",
|
||||||
pj = sub / "package.json"
|
r"(?i)\bsqlite\b": "sqlite", r"(?i)\bmysql\b": "mysql",
|
||||||
if pj.exists():
|
r"(?i)\bdocker\b": "docker",
|
||||||
results.append(pj)
|
r"(?i)\bpython\b": "python", r"(?i)\bfastapi\b": "fastapi",
|
||||||
return results
|
r"(?i)\bdjango\b": "django", r"(?i)\bflask\b": "flask",
|
||||||
|
r"(?i)\bgo\b.*(?:gin|fiber|module)": "go",
|
||||||
|
r"(?i)\bnginx\b": "nginx",
|
||||||
|
r"(?i)\bpinia\b": "pinia", r"(?i)\bvuex\b": "vuex",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_stack_from_claude_md(project_path: Path) -> list[str]:
|
||||||
|
"""Fallback: infer tech stack from CLAUDE.md text when no config files exist."""
|
||||||
|
claude_md = project_path / "CLAUDE.md"
|
||||||
|
if not claude_md.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
text = claude_md.read_text(errors="replace")[:5000] # First 5KB is enough
|
||||||
|
except OSError:
|
||||||
|
return []
|
||||||
|
stack = []
|
||||||
|
for pattern, tech in _CLAUDE_MD_TECH_HINTS.items():
|
||||||
|
if re.search(pattern, text):
|
||||||
|
stack.append(tech)
|
||||||
|
return stack
|
||||||
|
|
||||||
|
|
||||||
|
def _walk_files(root: Path, max_depth: int = 3, _depth: int = 0):
|
||||||
|
"""Yield files up to max_depth, skipping node_modules/dist/.git."""
|
||||||
|
if _depth > max_depth:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
entries = sorted(root.iterdir())
|
||||||
|
except (OSError, PermissionError):
|
||||||
|
return
|
||||||
|
for entry in entries:
|
||||||
|
if entry.is_file():
|
||||||
|
yield entry
|
||||||
|
elif entry.is_dir() and entry.name not in _SKIP_DIRS and not entry.name.startswith("."):
|
||||||
|
yield from _walk_files(entry, max_depth, _depth + 1)
|
||||||
|
|
||||||
|
|
||||||
def _parse_package_json(path: Path) -> list[str]:
|
def _parse_package_json(path: Path) -> list[str]:
|
||||||
|
|
@ -140,26 +177,40 @@ _BACKEND_MARKERS = {"express", "fastify", "koa", "router", "controller", "middle
|
||||||
|
|
||||||
|
|
||||||
def detect_modules(project_path: Path) -> list[dict]:
|
def detect_modules(project_path: Path) -> list[dict]:
|
||||||
"""Scan src/ (or app/, lib/, frontend/, backend/) for modules."""
|
"""Scan for modules: checks root subdirs, */src/ patterns, standard names.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. Find all "source root" dirs (src/, app/, lib/ at root or inside top-level dirs)
|
||||||
|
2. Each first-level subdir of a source root = a module candidate
|
||||||
|
3. Top-level dirs with their own src/ are treated as component roots
|
||||||
|
(e.g. frontend/, backend-pg/) — scan THEIR src/ for modules
|
||||||
|
"""
|
||||||
modules = []
|
modules = []
|
||||||
scan_dirs = []
|
scan_dirs: list[tuple[Path, str | None]] = [] # (dir, prefix_hint)
|
||||||
|
|
||||||
# Prioritized source dirs
|
# Direct source dirs in root
|
||||||
for name in ("src", "app", "lib", "frontend", "backend", "server", "client"):
|
for name in ("src", "app", "lib"):
|
||||||
d = project_path / name
|
d = project_path / name
|
||||||
if d.is_dir():
|
if d.is_dir():
|
||||||
scan_dirs.append(d)
|
scan_dirs.append((d, None))
|
||||||
|
|
||||||
# Also check frontend/src, backend/src patterns
|
# Top-level component dirs (frontend/, backend/, backend-pg/, server/, client/)
|
||||||
for name in ("frontend/src", "backend/src", "backend-pg/src"):
|
# These get scanned for src/ inside, or directly if they contain source files
|
||||||
d = project_path / name
|
for child in sorted(project_path.iterdir()):
|
||||||
if d.is_dir():
|
if not child.is_dir() or child.name in _SKIP_DIRS or child.name.startswith("."):
|
||||||
scan_dirs.append(d)
|
continue
|
||||||
|
child_src = child / "src"
|
||||||
|
if child_src.is_dir():
|
||||||
|
# e.g. frontend/src/, backend-pg/src/ — scan their subdirs
|
||||||
|
scan_dirs.append((child_src, child.name))
|
||||||
|
elif child.name in ("frontend", "backend", "server", "client", "web", "api"):
|
||||||
|
# No src/ but it's a known component dir — scan it directly
|
||||||
|
scan_dirs.append((child, child.name))
|
||||||
|
|
||||||
seen = set()
|
seen = set()
|
||||||
for scan_dir in scan_dirs:
|
for scan_dir, prefix in scan_dirs:
|
||||||
for child in sorted(scan_dir.iterdir()):
|
for child in sorted(scan_dir.iterdir()):
|
||||||
if not child.is_dir() or child.name.startswith(".") or child.name == "node_modules":
|
if not child.is_dir() or child.name in _SKIP_DIRS or child.name.startswith("."):
|
||||||
continue
|
continue
|
||||||
mod = _analyze_module(child, project_path)
|
mod = _analyze_module(child, project_path)
|
||||||
key = (mod["name"], mod["path"])
|
key = (mod["name"], mod["path"])
|
||||||
|
|
@ -230,7 +281,7 @@ def _guess_module_type(dir_path: Path, exts: set[str], files: list[Path]) -> str
|
||||||
_DECISION_PATTERNS = [
|
_DECISION_PATTERNS = [
|
||||||
(r"(?i)\b(GOTCHA|ВАЖНО|WARNING|ВНИМАНИЕ)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"),
|
(r"(?i)\b(GOTCHA|ВАЖНО|WARNING|ВНИМАНИЕ)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"),
|
||||||
(r"(?i)\b(WORKAROUND|ОБХОДНОЙ|ХАК)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "workaround"),
|
(r"(?i)\b(WORKAROUND|ОБХОДНОЙ|ХАК)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "workaround"),
|
||||||
(r"(?i)\b(FIXME|TODO|БАГИ?)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"),
|
(r"(?i)\b(FIXME|БАГИ?)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "gotcha"),
|
||||||
(r"(?i)\b(РЕШЕНИЕ|DECISION)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "decision"),
|
(r"(?i)\b(РЕШЕНИЕ|DECISION)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "decision"),
|
||||||
(r"(?i)\b(CONVENTION|СОГЛАШЕНИЕ|ПРАВИЛО)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "convention"),
|
(r"(?i)\b(CONVENTION|СОГЛАШЕНИЕ|ПРАВИЛО)[:\s]+(.*?)(?=\n[#\-]|\n\n|\Z)", "convention"),
|
||||||
]
|
]
|
||||||
|
|
@ -238,13 +289,83 @@ _DECISION_PATTERNS = [
|
||||||
# Section headers that likely contain decisions
|
# Section headers that likely contain decisions
|
||||||
_DECISION_SECTIONS = [
|
_DECISION_SECTIONS = [
|
||||||
r"(?i)known\s+issues?", r"(?i)workaround", r"(?i)gotcha",
|
r"(?i)known\s+issues?", r"(?i)workaround", r"(?i)gotcha",
|
||||||
r"(?i)решени[яе]", r"(?i)грабл[ия]", r"(?i)важно",
|
r"(?i)решени[яе]", r"(?i)грабл[ия]",
|
||||||
r"(?i)conventions?", r"(?i)правила", r"(?i)нюансы",
|
r"(?i)conventions?", r"(?i)правила", r"(?i)нюансы",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Section headers about UNRELATED services — skip these entirely
|
||||||
|
_UNRELATED_SECTION_PATTERNS = [
|
||||||
|
r"(?i)jitsi", r"(?i)nextcloud", r"(?i)prosody",
|
||||||
|
r"(?i)coturn", r"(?i)turn\b", r"(?i)asterisk",
|
||||||
|
r"(?i)ghost\s+блог", r"(?i)onlyoffice",
|
||||||
|
r"(?i)git\s+sync", r"(?i)\.env\s+добав",
|
||||||
|
r"(?i)goip\s+watcher", r"(?i)tbank\s+monitor", # monitoring services
|
||||||
|
r"(?i)фикс\s+удален", # commit-level fixes (not decisions)
|
||||||
|
]
|
||||||
|
|
||||||
def extract_decisions_from_claude_md(project_path: Path) -> list[dict]:
|
# Noise patterns — individual items that look like noise, not decisions
|
||||||
"""Parse CLAUDE.md for decisions, gotchas, workarounds."""
|
_NOISE_PATTERNS = [
|
||||||
|
r"^[0-9a-f]{6,40}$", # commit hashes
|
||||||
|
r"^\s*(docker|ssh|scp|git|curl|sudo)\s", # shell commands
|
||||||
|
r"^`[^`]+`$", # inline code-only items
|
||||||
|
r"(?i)(prosody|jitsi|jicofo|jvb|coturn|nextcloud|onlyoffice|ghost)", # unrelated services
|
||||||
|
r"(?i)\.jitsi-meet-cfg", # jitsi config paths
|
||||||
|
r"(?i)(meet\.jitsi|sitemeet\.org)", # jitsi domains
|
||||||
|
r"(?i)(cloud\.vault\.red|office\.vault)", # nextcloud domains
|
||||||
|
r"(?i)JWT_APP_(ID|SECRET)", # jwt config lines
|
||||||
|
r"(?i)XMPP_", # prosody config
|
||||||
|
r"\(коммит\s+`?[0-9a-f]+`?\)", # "(коммит `a33c2b9`)" references
|
||||||
|
r"(?i)known_uids|idle_loop|reconnect", # goip-watcher internals
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _is_noise(text: str) -> bool:
|
||||||
|
"""Check if a decision candidate is noise."""
|
||||||
|
# Clean markdown bold for matching
|
||||||
|
clean = re.sub(r"\*\*([^*]*)\*\*", r"\1", text).strip()
|
||||||
|
return any(re.search(p, clean) for p in _NOISE_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_into_sections(text: str) -> list[tuple[str, str]]:
|
||||||
|
"""Split markdown into (header, body) pairs by ## headers.
|
||||||
|
|
||||||
|
Returns list of (header_text, body_text) tuples.
|
||||||
|
Anything before the first ## is returned with header="".
|
||||||
|
"""
|
||||||
|
parts = re.split(r"(?m)^(##\s+.+)$", text)
|
||||||
|
sections = []
|
||||||
|
current_header = ""
|
||||||
|
current_body = parts[0] if parts else ""
|
||||||
|
|
||||||
|
for i in range(1, len(parts), 2):
|
||||||
|
if current_header or current_body.strip():
|
||||||
|
sections.append((current_header, current_body))
|
||||||
|
current_header = parts[i].strip()
|
||||||
|
current_body = parts[i + 1] if i + 1 < len(parts) else ""
|
||||||
|
|
||||||
|
if current_header or current_body.strip():
|
||||||
|
sections.append((current_header, current_body))
|
||||||
|
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _is_unrelated_section(header: str) -> bool:
|
||||||
|
"""Check if a section header is about an unrelated service."""
|
||||||
|
return any(re.search(p, header) for p in _UNRELATED_SECTION_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_decisions_from_claude_md(
|
||||||
|
project_path: Path,
|
||||||
|
project_id: str | None = None,
|
||||||
|
project_name: str | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Parse CLAUDE.md for decisions, gotchas, workarounds.
|
||||||
|
|
||||||
|
Filters out:
|
||||||
|
- Sections about unrelated services (Jitsi, Nextcloud, Prosody, etc.)
|
||||||
|
- Noise: commit hashes, docker/ssh commands, paths to external services
|
||||||
|
- If CLAUDE.md has multi-project sections, only extracts for current project
|
||||||
|
"""
|
||||||
claude_md = project_path / "CLAUDE.md"
|
claude_md = project_path / "CLAUDE.md"
|
||||||
if not claude_md.exists():
|
if not claude_md.exists():
|
||||||
return []
|
return []
|
||||||
|
|
@ -254,20 +375,30 @@ def extract_decisions_from_claude_md(project_path: Path) -> list[dict]:
|
||||||
except OSError:
|
except OSError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Split into sections and filter out unrelated ones
|
||||||
|
sections = _split_into_sections(text)
|
||||||
|
relevant_text = []
|
||||||
|
for header, body in sections:
|
||||||
|
if _is_unrelated_section(header):
|
||||||
|
continue
|
||||||
|
relevant_text.append(header + "\n" + body)
|
||||||
|
|
||||||
|
filtered_text = "\n".join(relevant_text)
|
||||||
|
|
||||||
decisions = []
|
decisions = []
|
||||||
seen_titles = set()
|
seen_titles = set()
|
||||||
|
|
||||||
# Pattern-based extraction
|
# Pattern-based extraction from relevant sections only
|
||||||
for pattern, dec_type in _DECISION_PATTERNS:
|
for pattern, dec_type in _DECISION_PATTERNS:
|
||||||
for m in re.finditer(pattern, text, re.DOTALL):
|
for m in re.finditer(pattern, filtered_text, re.DOTALL):
|
||||||
label = m.group(1).strip()
|
|
||||||
body = m.group(2).strip()
|
body = m.group(2).strip()
|
||||||
if not body or len(body) < 10:
|
if not body or len(body) < 10:
|
||||||
continue
|
continue
|
||||||
# First line as title, rest as description
|
|
||||||
lines = body.split("\n")
|
lines = body.split("\n")
|
||||||
title = lines[0].strip().rstrip(".")[:100]
|
title = lines[0].strip().rstrip(".")[:100]
|
||||||
desc = body
|
desc = body
|
||||||
|
if _is_noise(title) or _is_noise(desc):
|
||||||
|
continue
|
||||||
if title not in seen_titles:
|
if title not in seen_titles:
|
||||||
seen_titles.add(title)
|
seen_titles.add(title)
|
||||||
decisions.append({
|
decisions.append({
|
||||||
|
|
@ -277,23 +408,33 @@ def extract_decisions_from_claude_md(project_path: Path) -> list[dict]:
|
||||||
"category": _guess_category(title + " " + desc),
|
"category": _guess_category(title + " " + desc),
|
||||||
})
|
})
|
||||||
|
|
||||||
# Section-based extraction: find headers matching decision sections
|
# Section-based extraction: find ### or #### headers matching decision patterns
|
||||||
sections = re.split(r"(?m)^(#{1,4}\s+.*?)$", text)
|
sub_sections = re.split(r"(?m)^(#{1,4}\s+.*?)$", filtered_text)
|
||||||
for i, section in enumerate(sections):
|
for i, section in enumerate(sub_sections):
|
||||||
if any(re.search(pat, section) for pat in _DECISION_SECTIONS):
|
if any(re.search(pat, section) for pat in _DECISION_SECTIONS):
|
||||||
# The content is in the next section
|
if i + 1 < len(sub_sections):
|
||||||
if i + 1 < len(sections):
|
content = sub_sections[i + 1].strip()
|
||||||
content = sections[i + 1].strip()
|
|
||||||
# Extract bullet points
|
|
||||||
for line in content.split("\n"):
|
for line in content.split("\n"):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line.startswith(("- ", "* ", "• ")):
|
# Numbered items (1. **text**) or bullet items
|
||||||
|
item = None
|
||||||
|
if re.match(r"^\d+\.\s+", line):
|
||||||
|
item = re.sub(r"^\d+\.\s+", "", line).strip()
|
||||||
|
elif line.startswith(("- ", "* ", "• ")):
|
||||||
item = line.lstrip("-*• ").strip()
|
item = line.lstrip("-*• ").strip()
|
||||||
if item and len(item) > 10 and item[:80] not in seen_titles:
|
|
||||||
seen_titles.add(item[:80])
|
if not item or len(item) < 10:
|
||||||
|
continue
|
||||||
|
# Clean bold markers for title
|
||||||
|
clean = re.sub(r"\*\*([^*]+)\*\*", r"\1", item)
|
||||||
|
if _is_noise(clean):
|
||||||
|
continue
|
||||||
|
title = clean[:100]
|
||||||
|
if title not in seen_titles:
|
||||||
|
seen_titles.add(title)
|
||||||
decisions.append({
|
decisions.append({
|
||||||
"type": "decision",
|
"type": "gotcha",
|
||||||
"title": item[:100],
|
"title": title,
|
||||||
"description": item,
|
"description": item,
|
||||||
"category": _guess_category(item),
|
"category": _guess_category(item),
|
||||||
})
|
})
|
||||||
|
|
@ -414,8 +555,11 @@ def _extract_obsidian_decisions(text: str, source: str, decisions: list[dict]):
|
||||||
for pattern, dec_type in _DECISION_PATTERNS:
|
for pattern, dec_type in _DECISION_PATTERNS:
|
||||||
for m in re.finditer(pattern, text, re.DOTALL):
|
for m in re.finditer(pattern, text, re.DOTALL):
|
||||||
body = m.group(2).strip()
|
body = m.group(2).strip()
|
||||||
if body and len(body) > 10:
|
if not body or len(body) < 10:
|
||||||
|
continue
|
||||||
title = body.split("\n")[0].strip()[:100]
|
title = body.split("\n")[0].strip()[:100]
|
||||||
|
if _is_noise(title) or _is_noise(body):
|
||||||
|
continue
|
||||||
decisions.append({
|
decisions.append({
|
||||||
"type": dec_type,
|
"type": dec_type,
|
||||||
"title": title,
|
"title": title,
|
||||||
|
|
@ -427,7 +571,10 @@ def _extract_obsidian_decisions(text: str, source: str, decisions: list[dict]):
|
||||||
# Also look for ВАЖНО/GOTCHA/FIXME inline markers not caught above
|
# Also look for ВАЖНО/GOTCHA/FIXME inline markers not caught above
|
||||||
for m in re.finditer(r"(?i)\*\*(ВАЖНО|GOTCHA|FIXME)\*\*[:\s]*(.*?)(?=\n|$)", text):
|
for m in re.finditer(r"(?i)\*\*(ВАЖНО|GOTCHA|FIXME)\*\*[:\s]*(.*?)(?=\n|$)", text):
|
||||||
body = m.group(2).strip()
|
body = m.group(2).strip()
|
||||||
if body and len(body) > 10:
|
if not body or len(body) < 10:
|
||||||
|
continue
|
||||||
|
if _is_noise(body):
|
||||||
|
continue
|
||||||
decisions.append({
|
decisions.append({
|
||||||
"type": "gotcha",
|
"type": "gotcha",
|
||||||
"title": body[:100],
|
"title": body[:100],
|
||||||
|
|
|
||||||
|
|
@ -435,7 +435,7 @@ def bootstrap(ctx, path, project_id, name, vault_path, yes):
|
||||||
click.echo(f"Scanning {project_path} ...")
|
click.echo(f"Scanning {project_path} ...")
|
||||||
tech_stack = detect_tech_stack(project_path)
|
tech_stack = detect_tech_stack(project_path)
|
||||||
modules = detect_modules(project_path)
|
modules = detect_modules(project_path)
|
||||||
decisions = extract_decisions_from_claude_md(project_path)
|
decisions = extract_decisions_from_claude_md(project_path, project_id, name)
|
||||||
|
|
||||||
# Obsidian
|
# Obsidian
|
||||||
obsidian = None
|
obsidian = None
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,27 @@ def test_detect_monorepo(tmp_path):
|
||||||
assert "fastapi" in stack
|
assert "fastapi" in stack
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_deep_monorepo(tmp_path):
|
||||||
|
"""Test that files nested 2-3 levels deep are found (like vdolipoperek)."""
|
||||||
|
fe = tmp_path / "frontend" / "src"
|
||||||
|
fe.mkdir(parents=True)
|
||||||
|
(tmp_path / "frontend" / "package.json").write_text(json.dumps({
|
||||||
|
"dependencies": {"vue": "^3.4"},
|
||||||
|
"devDependencies": {"vite": "^5.0", "tailwindcss": "^3.4"},
|
||||||
|
}))
|
||||||
|
(tmp_path / "frontend" / "vite.config.js").write_text("export default {}")
|
||||||
|
(tmp_path / "frontend" / "tailwind.config.js").write_text("module.exports = {}")
|
||||||
|
|
||||||
|
be = tmp_path / "backend-pg" / "src"
|
||||||
|
be.mkdir(parents=True)
|
||||||
|
(be / "index.js").write_text("const express = require('express');")
|
||||||
|
|
||||||
|
stack = detect_tech_stack(tmp_path)
|
||||||
|
assert "vue3" in stack
|
||||||
|
assert "vite" in stack
|
||||||
|
assert "tailwind" in stack
|
||||||
|
|
||||||
|
|
||||||
def test_detect_empty_dir(tmp_path):
|
def test_detect_empty_dir(tmp_path):
|
||||||
assert detect_tech_stack(tmp_path) == []
|
assert detect_tech_stack(tmp_path) == []
|
||||||
|
|
||||||
|
|
@ -104,6 +125,36 @@ def test_detect_modules_backend_pg(tmp_path):
|
||||||
assert any(m["name"] == "services" for m in modules)
|
assert any(m["name"] == "services" for m in modules)
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_modules_monorepo(tmp_path):
|
||||||
|
"""Full monorepo: frontend/src/ + backend-pg/src/."""
|
||||||
|
# Frontend
|
||||||
|
fe_views = tmp_path / "frontend" / "src" / "views"
|
||||||
|
fe_views.mkdir(parents=True)
|
||||||
|
(fe_views / "Hotel.vue").write_text("<template></template>")
|
||||||
|
fe_comp = tmp_path / "frontend" / "src" / "components"
|
||||||
|
fe_comp.mkdir(parents=True)
|
||||||
|
(fe_comp / "Search.vue").write_text("<template></template>")
|
||||||
|
|
||||||
|
# Backend
|
||||||
|
be_svc = tmp_path / "backend-pg" / "src" / "services"
|
||||||
|
be_svc.mkdir(parents=True)
|
||||||
|
(be_svc / "db.js").write_text("const express = require('express');")
|
||||||
|
be_routes = tmp_path / "backend-pg" / "src" / "routes"
|
||||||
|
be_routes.mkdir(parents=True)
|
||||||
|
(be_routes / "api.js").write_text("const router = require('express').Router();")
|
||||||
|
|
||||||
|
modules = detect_modules(tmp_path)
|
||||||
|
names = {m["name"] for m in modules}
|
||||||
|
assert "views" in names
|
||||||
|
assert "components" in names
|
||||||
|
assert "services" in names
|
||||||
|
assert "routes" in names
|
||||||
|
# Check types
|
||||||
|
types = {m["name"]: m["type"] for m in modules}
|
||||||
|
assert types["views"] == "frontend"
|
||||||
|
assert types["components"] == "frontend"
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Decisions from CLAUDE.md
|
# Decisions from CLAUDE.md
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -124,7 +175,7 @@ FIXME: race condition in useSearch composable
|
||||||
- CSS grid fallback для IE11 (но мы его не поддерживаем)
|
- CSS grid fallback для IE11 (но мы его не поддерживаем)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
decisions = extract_decisions_from_claude_md(tmp_path)
|
decisions = extract_decisions_from_claude_md(tmp_path, "myproj", "My Project")
|
||||||
assert len(decisions) >= 4
|
assert len(decisions) >= 4
|
||||||
|
|
||||||
types = {d["type"] for d in decisions}
|
types = {d["type"] for d in decisions}
|
||||||
|
|
@ -136,6 +187,58 @@ def test_extract_decisions_no_claude_md(tmp_path):
|
||||||
assert extract_decisions_from_claude_md(tmp_path) == []
|
assert extract_decisions_from_claude_md(tmp_path) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_decisions_filters_unrelated_sections(tmp_path):
|
||||||
|
"""Sections about Jitsi, Nextcloud, Prosody should be skipped."""
|
||||||
|
(tmp_path / "CLAUDE.md").write_text("""# vdolipoperek
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
1. **Hotel ID mismatch** — Sletat GetTours vs GetHotels разные ID
|
||||||
|
2. **db.js export** — module.exports = pool (НЕ { pool })
|
||||||
|
|
||||||
|
## Jitsi + Nextcloud интеграция (2026-03-04)
|
||||||
|
|
||||||
|
ВАЖНО: JWT_APP_SECRET must be synced between Prosody and Nextcloud
|
||||||
|
GOTCHA: focus.meet.jitsi must be pinned in custom-config.js
|
||||||
|
|
||||||
|
## Prosody config
|
||||||
|
|
||||||
|
ВАЖНО: conf.d files принадлежат root → писать через docker exec
|
||||||
|
|
||||||
|
## Git Sync (2026-03-03)
|
||||||
|
|
||||||
|
ВАЖНО: Все среды синхронизированы на коммите 4ee5603
|
||||||
|
""")
|
||||||
|
|
||||||
|
decisions = extract_decisions_from_claude_md(tmp_path, "vdol", "vdolipoperek")
|
||||||
|
|
||||||
|
titles = [d["title"] for d in decisions]
|
||||||
|
# Should have the real known issues
|
||||||
|
assert any("Hotel ID mismatch" in t for t in titles)
|
||||||
|
assert any("db.js export" in t for t in titles)
|
||||||
|
# Should NOT have Jitsi/Prosody/Nextcloud noise
|
||||||
|
assert not any("JWT_APP_SECRET" in t for t in titles)
|
||||||
|
assert not any("focus.meet.jitsi" in t for t in titles)
|
||||||
|
assert not any("conf.d files" in t for t in titles)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_decisions_filters_noise(tmp_path):
|
||||||
|
"""Commit hashes and shell commands should not be decisions."""
|
||||||
|
(tmp_path / "CLAUDE.md").write_text("""# Project
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
1. **Real bug** — actual architectural issue that matters
|
||||||
|
- docker exec -it prosody bash
|
||||||
|
- ssh dev "cd /opt/project && git pull"
|
||||||
|
""")
|
||||||
|
|
||||||
|
decisions = extract_decisions_from_claude_md(tmp_path)
|
||||||
|
titles = [d["title"] for d in decisions]
|
||||||
|
assert any("Real bug" in t for t in titles)
|
||||||
|
# Shell commands should be filtered
|
||||||
|
assert not any("docker exec" in t for t in titles)
|
||||||
|
assert not any("ssh dev" in t for t in titles)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Obsidian vault
|
# Obsidian vault
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue