cgcardona / muse public
muse_inspect.py python
439 lines 15.1 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """Muse Inspect — serialize the full Muse commit graph to structured output.
2
3 This module is the read-only introspection engine for ``muse inspect``. It
4 traverses the commit graph reachable from one or more branch heads and returns
5 a typed :class:`MuseInspectResult` that CLI formatters render as JSON, DOT, or
6 Mermaid.
7
8 Why this exists
9 ---------------
10 Machine-readable graph export is a prerequisite for tooling (IDEs, CI, AI
11 agents) that need to reason about the shape of a project's musical history
12 without parsing human-readable ``muse log`` output. The three format options
13 target different consumers:
14
15 - **json** — primary format for agents and programmatic clients.
16 - **dot** — Graphviz DOT graph for visualization pipelines.
17 - **mermaid** — Mermaid.js for inline documentation and GitHub markdown.
18
19 Result types
20 ------------
21 :class:`MuseInspectCommit` — one node in the graph.
22 :class:`MuseInspectResult` — the full serialized graph.
23
24 Both are frozen dataclasses; callers treat them as immutable value objects.
25 """
26 from __future__ import annotations
27
28 import json
29 import logging
30 import pathlib
31 from dataclasses import dataclass
32 from enum import Enum
33 from typing import Optional
34
35 from sqlalchemy.ext.asyncio import AsyncSession
36 from sqlalchemy.future import select
37
38 from maestro.muse_cli.models import MuseCliCommit, MuseCliTag
39
40 logger = logging.getLogger(__name__)
41
42
43 # ---------------------------------------------------------------------------
44 # Public enums
45 # ---------------------------------------------------------------------------
46
47
48 class InspectFormat(str, Enum):
49 """Supported output formats for ``muse inspect``."""
50
51 json = "json"
52 dot = "dot"
53 mermaid = "mermaid"
54
55
56 # ---------------------------------------------------------------------------
57 # Result types (registered in docs/reference/type_contracts.md)
58 # ---------------------------------------------------------------------------
59
60
61 @dataclass(frozen=True)
62 class MuseInspectCommit:
63 """One commit node in the inspected graph.
64
65 Fields mirror the :class:`~maestro.muse_cli.models.MuseCliCommit` ORM row
66 but are typed for agent consumption:
67
68 - ``commit_id`` / ``short_id``: full and 8-char abbreviated hash.
69 - ``branch``: the branch this commit was recorded on.
70 - ``parent_commit_id`` / ``parent2_commit_id``: DAG parent links (second
71 parent is reserved for merge commits, ).
72 - ``message``: human-readable commit message.
73 - ``author``: committer identity string.
74 - ``committed_at``: ISO-8601 UTC timestamp string.
75 - ``snapshot_id``: content-addressed snapshot hash.
76 - ``metadata``: extensible annotation dict (tempo_bpm, etc.).
77 - ``tags``: list of music-semantic tag strings attached to this commit.
78 """
79
80 commit_id: str
81 short_id: str
82 branch: str
83 parent_commit_id: Optional[str]
84 parent2_commit_id: Optional[str]
85 message: str
86 author: str
87 committed_at: str
88 snapshot_id: str
89 metadata: dict[str, object]
90 tags: list[str]
91
92 def to_dict(self) -> dict[str, object]:
93 """Return a JSON-serializable dict for this commit node."""
94 return {
95 "commit_id": self.commit_id,
96 "short_id": self.short_id,
97 "branch": self.branch,
98 "parent_commit_id": self.parent_commit_id,
99 "parent2_commit_id": self.parent2_commit_id,
100 "message": self.message,
101 "author": self.author,
102 "committed_at": self.committed_at,
103 "snapshot_id": self.snapshot_id,
104 "metadata": self.metadata,
105 "tags": self.tags,
106 }
107
108
109 @dataclass(frozen=True)
110 class MuseInspectResult:
111 """Full serialized commit graph for a Muse repository.
112
113 Returned by :func:`build_inspect_result` and rendered by the three
114 format functions.
115
116 - ``repo_id``: UUID identifying the local repository.
117 - ``current_branch``: the branch HEAD currently points to.
118 - ``branches``: mapping of branch names to their HEAD commit ID.
119 - ``commits``: all graph nodes reachable from the traversed heads,
120 newest-first.
121 """
122
123 repo_id: str
124 current_branch: str
125 branches: dict[str, str]
126 commits: list[MuseInspectCommit]
127
128 def to_dict(self) -> dict[str, object]:
129 """Return a JSON-serializable dict of the full graph."""
130 return {
131 "repo_id": self.repo_id,
132 "current_branch": self.current_branch,
133 "branches": self.branches,
134 "commits": [c.to_dict() for c in self.commits],
135 }
136
137
138 # ---------------------------------------------------------------------------
139 # Graph builder
140 # ---------------------------------------------------------------------------
141
142
143 def _read_branches(muse_dir: pathlib.Path) -> dict[str, str]:
144 """Scan ``.muse/refs/heads/`` and return a ``{branch: commit_id}`` dict.
145
146 Branches with empty or missing ref files are excluded (no commits yet).
147 """
148 heads_dir = muse_dir / "refs" / "heads"
149 branches: dict[str, str] = {}
150 if not heads_dir.is_dir():
151 return branches
152 for ref_file in heads_dir.iterdir():
153 commit_id = ref_file.read_text().strip()
154 if commit_id:
155 branches[ref_file.name] = commit_id
156 return branches
157
158
159 async def _load_commit_tags(
160 session: AsyncSession, commit_ids: list[str]
161 ) -> dict[str, list[str]]:
162 """Bulk-load tag strings for a set of commit IDs.
163
164 Returns a ``{commit_id: [tag, ...]}`` mapping; commits without tags map
165 to an empty list. A single query is issued regardless of graph size.
166 """
167 if not commit_ids:
168 return {}
169 result = await session.execute(
170 select(MuseCliTag).where(MuseCliTag.commit_id.in_(commit_ids))
171 )
172 tags_by_commit: dict[str, list[str]] = {cid: [] for cid in commit_ids}
173 for tag_row in result.scalars().all():
174 tags_by_commit.setdefault(tag_row.commit_id, []).append(tag_row.tag)
175 return tags_by_commit
176
177
178 async def _walk_from(
179 session: AsyncSession,
180 start_commit_id: str,
181 depth: Optional[int],
182 visited: set[str],
183 ) -> list[MuseCliCommit]:
184 """Walk the parent chain from *start_commit_id*, newest-first.
185
186 Stops when the chain is exhausted, *depth* is reached, or a node has
187 already been visited (avoids re-traversing shared history when
188 ``--branches`` combines multiple heads).
189
190 Args:
191 session: Async SQLAlchemy session.
192 start_commit_id: SHA of the first commit to visit.
193 depth: Maximum number of commits to return (``None`` = unlimited).
194 visited: Mutable set of already-visited commit IDs. Updated
195 in-place so sibling traversals share state.
196
197 Returns:
198 Ordered list of :class:`MuseCliCommit` rows, newest-first.
199 """
200 rows: list[MuseCliCommit] = []
201 current_id: Optional[str] = start_commit_id
202 while current_id and current_id not in visited:
203 if depth is not None and len(rows) >= depth:
204 break
205 row = await session.get(MuseCliCommit, current_id)
206 if row is None:
207 logger.warning("⚠️ muse inspect: commit %s not found — chain broken", current_id[:8])
208 break
209 visited.add(current_id)
210 rows.append(row)
211 current_id = row.parent_commit_id
212 return rows
213
214
215 async def build_inspect_result(
216 session: AsyncSession,
217 root: pathlib.Path,
218 *,
219 ref: Optional[str] = None,
220 depth: Optional[int] = None,
221 include_branches: bool = False,
222 ) -> MuseInspectResult:
223 """Build the full :class:`MuseInspectResult` for a Muse repository.
224
225 This is the primary entry point for ``muse inspect``. It reads the
226 repository state from the ``.muse/`` directory, resolves starting commit
227 IDs, walks the graph, and returns a fully typed result.
228
229 Args:
230 session: Async SQLAlchemy session (read-only operations only).
231 root: Repository root path (contains ``.muse/``).
232 ref: Optional starting commit reference. Accepts a full
233 or abbreviated SHA, a branch name, or ``None``/
234 ``"HEAD"`` (default: HEAD of current branch).
235 depth: Maximum commits per branch traversal (``None`` =
236 unlimited).
237 include_branches: When ``True``, traverse all branches and merge
238 their reachable commits into the output. When
239 ``False``, only the current branch (or *ref*) is
240 traversed.
241
242 Returns:
243 :class:`MuseInspectResult` with all graph nodes and branch pointers.
244
245 Raises:
246 ValueError: When *ref* cannot be resolved to a commit.
247 FileNotFoundError: When ``.muse/`` or ``repo.json`` are missing.
248 """
249 muse_dir = root / ".muse"
250 repo_data: dict[str, str] = json.loads((muse_dir / "repo.json").read_text())
251 repo_id = repo_data["repo_id"]
252
253 head_ref = (muse_dir / "HEAD").read_text().strip() # "refs/heads/main"
254 current_branch = head_ref.rsplit("/", 1)[-1] # "main"
255
256 all_branches = _read_branches(muse_dir)
257
258 # Determine the set of starting commit IDs to traverse.
259 start_ids: list[str] = []
260
261 if ref is not None and ref.upper() != "HEAD":
262 # Resolve *ref*: branch name first, then exact SHA, then prefix.
263 if ref in all_branches:
264 start_ids.append(all_branches[ref])
265 else:
266 # Try exact or prefix match in DB.
267 exact = await session.get(MuseCliCommit, ref)
268 if exact is not None:
269 start_ids.append(exact.commit_id)
270 else:
271 from sqlalchemy.future import select as sa_select
272 result = await session.execute(
273 sa_select(MuseCliCommit).where(
274 MuseCliCommit.repo_id == repo_id,
275 MuseCliCommit.commit_id.startswith(ref),
276 )
277 )
278 first = result.scalars().first()
279 if first is None:
280 raise ValueError(f"Cannot resolve ref {ref!r} to a commit.")
281 start_ids.append(first.commit_id)
282 else:
283 # Default: HEAD of current branch.
284 head_commit_id = all_branches.get(current_branch, "")
285 if head_commit_id:
286 start_ids.append(head_commit_id)
287
288 if include_branches:
289 for branch_commit_id in all_branches.values():
290 if branch_commit_id not in start_ids:
291 start_ids.append(branch_commit_id)
292
293 # Walk the graph.
294 visited: set[str] = set()
295 all_rows: list[MuseCliCommit] = []
296 for start_id in start_ids:
297 rows = await _walk_from(session, start_id, depth, visited)
298 all_rows.extend(rows)
299
300 # Bulk-load tags.
301 row_ids = [r.commit_id for r in all_rows]
302 tags_by_commit = await _load_commit_tags(session, row_ids)
303
304 # Build typed result nodes.
305 commits = [
306 MuseInspectCommit(
307 commit_id=row.commit_id,
308 short_id=row.commit_id[:8],
309 branch=row.branch,
310 parent_commit_id=row.parent_commit_id,
311 parent2_commit_id=row.parent2_commit_id,
312 message=row.message,
313 author=row.author,
314 committed_at=row.committed_at.isoformat(),
315 snapshot_id=row.snapshot_id,
316 metadata=dict(row.commit_metadata) if row.commit_metadata else {},
317 tags=tags_by_commit.get(row.commit_id, []),
318 )
319 for row in all_rows
320 ]
321
322 logger.info(
323 "✅ muse inspect: %d commit(s), %d branch(es) (repo=%s)",
324 len(commits),
325 len(all_branches),
326 repo_id[:8],
327 )
328 return MuseInspectResult(
329 repo_id=repo_id,
330 current_branch=current_branch,
331 branches=all_branches,
332 commits=commits,
333 )
334
335
336 # ---------------------------------------------------------------------------
337 # Format renderers
338 # ---------------------------------------------------------------------------
339
340
341 def render_json(result: MuseInspectResult, indent: int = 2) -> str:
342 """Serialize *result* to a JSON string.
343
344 The JSON shape matches the format specified:
345 ``repo_id``, ``current_branch``, ``branches``, and ``commits`` array.
346
347 Args:
348 result: The inspect result to serialize.
349 indent: JSON indentation level (default 2).
350
351 Returns:
352 Formatted JSON string.
353 """
354 return json.dumps(result.to_dict(), indent=indent, default=str)
355
356
357 def render_dot(result: MuseInspectResult) -> str:
358 """Serialize *result* to a Graphviz DOT directed graph.
359
360 Each commit becomes a labelled node. Parent edges point from child
361 to parent (matching git's convention). Branch refs appear as bold
362 rectangular nodes pointing to their HEAD commit.
363
364 Args:
365 result: The inspect result to serialize.
366
367 Returns:
368 DOT source string, suitable for piping to ``dot -Tsvg``.
369 """
370 lines: list[str] = ["digraph muse_graph {", ' rankdir="LR";', ' node [shape=ellipse];', ""]
371
372 for commit in result.commits:
373 label = f"{commit.short_id}\\n{commit.message[:40]}"
374 if commit.message and len(commit.message) > 40:
375 label += "…"
376 lines.append(f' "{commit.commit_id}" [label="{label}"];')
377
378 lines.append("")
379
380 for commit in result.commits:
381 if commit.parent_commit_id:
382 lines.append(f' "{commit.commit_id}" -> "{commit.parent_commit_id}";')
383 if commit.parent2_commit_id:
384 lines.append(
385 f' "{commit.commit_id}" -> "{commit.parent2_commit_id}" [style=dashed];'
386 )
387
388 lines.append("")
389 lines.append(" // Branch pointers")
390 lines.append(' node [shape=rectangle style=bold];')
391 for branch, head_id in result.branches.items():
392 safe_branch = branch.replace("/", "_").replace("-", "_")
393 arrow = " -> " if head_id else ""
394 if head_id:
395 lines.append(f' "branch_{safe_branch}" [label="{branch}"];')
396 lines.append(f' "branch_{safe_branch}" -> "{head_id}";')
397
398 lines.append("}")
399 return "\n".join(lines)
400
401
402 def render_mermaid(result: MuseInspectResult) -> str:
403 """Serialize *result* to a Mermaid.js graph definition.
404
405 Produces a left-to-right ``graph LR`` block. Commit nodes are labelled
406 with their short ID and truncated message. Branch refs appear as
407 rectangular nodes pointing to their HEAD commit.
408
409 Args:
410 result: The inspect result to serialize.
411
412 Returns:
413 Mermaid source string, suitable for embedding in GitHub markdown
414 inside a ``mermaid`` fenced code block.
415 """
416 lines: list[str] = ["graph LR"]
417
418 for commit in result.commits:
419 msg = commit.message[:35]
420 if len(commit.message) > 35:
421 msg += "…"
422 safe_msg = msg.replace('"', "'")
423 lines.append(f' {commit.commit_id[:8]}["{commit.short_id}: {safe_msg}"]')
424
425 for commit in result.commits:
426 if commit.parent_commit_id:
427 lines.append(f" {commit.commit_id[:8]} --> {commit.parent_commit_id[:8]}")
428 if commit.parent2_commit_id:
429 lines.append(
430 f" {commit.commit_id[:8]} -.-> {commit.parent2_commit_id[:8]}"
431 )
432
433 for branch, head_id in result.branches.items():
434 if head_id:
435 safe_branch = branch.replace("/", "_").replace("-", "_")
436 lines.append(f' {safe_branch}["{branch}"]')
437 lines.append(f" {safe_branch} --> {head_id[:8]}")
438
439 return "\n".join(lines)