muse_inspect.py
python
| 1 | """Muse Inspect — serialize the full Muse commit graph to structured output. |
| 2 | |
| 3 | This module is the read-only introspection engine for ``muse inspect``. It |
| 4 | traverses the commit graph reachable from one or more branch heads and returns |
| 5 | a typed :class:`MuseInspectResult` that CLI formatters render as JSON, DOT, or |
| 6 | Mermaid. |
| 7 | |
| 8 | Why this exists |
| 9 | --------------- |
| 10 | Machine-readable graph export is a prerequisite for tooling (IDEs, CI, AI |
| 11 | agents) that need to reason about the shape of a project's musical history |
| 12 | without parsing human-readable ``muse log`` output. The three format options |
| 13 | target different consumers: |
| 14 | |
| 15 | - **json** — primary format for agents and programmatic clients. |
| 16 | - **dot** — Graphviz DOT graph for visualization pipelines. |
| 17 | - **mermaid** — Mermaid.js for inline documentation and GitHub markdown. |
| 18 | |
| 19 | Result types |
| 20 | ------------ |
| 21 | :class:`MuseInspectCommit` — one node in the graph. |
| 22 | :class:`MuseInspectResult` — the full serialized graph. |
| 23 | |
| 24 | Both are frozen dataclasses; callers treat them as immutable value objects. |
| 25 | """ |
| 26 | from __future__ import annotations |
| 27 | |
| 28 | import json |
| 29 | import logging |
| 30 | import pathlib |
| 31 | from dataclasses import dataclass |
| 32 | from enum import Enum |
| 33 | from typing import Optional |
| 34 | |
| 35 | from sqlalchemy.ext.asyncio import AsyncSession |
| 36 | from sqlalchemy.future import select |
| 37 | |
| 38 | from maestro.muse_cli.models import MuseCliCommit, MuseCliTag |
| 39 | |
| 40 | logger = logging.getLogger(__name__) |
| 41 | |
| 42 | |
| 43 | # --------------------------------------------------------------------------- |
| 44 | # Public enums |
| 45 | # --------------------------------------------------------------------------- |
| 46 | |
| 47 | |
| 48 | class InspectFormat(str, Enum): |
| 49 | """Supported output formats for ``muse inspect``.""" |
| 50 | |
| 51 | json = "json" |
| 52 | dot = "dot" |
| 53 | mermaid = "mermaid" |
| 54 | |
| 55 | |
| 56 | # --------------------------------------------------------------------------- |
| 57 | # Result types (registered in docs/reference/type_contracts.md) |
| 58 | # --------------------------------------------------------------------------- |
| 59 | |
| 60 | |
| 61 | @dataclass(frozen=True) |
| 62 | class MuseInspectCommit: |
| 63 | """One commit node in the inspected graph. |
| 64 | |
| 65 | Fields mirror the :class:`~maestro.muse_cli.models.MuseCliCommit` ORM row |
| 66 | but are typed for agent consumption: |
| 67 | |
| 68 | - ``commit_id`` / ``short_id``: full and 8-char abbreviated hash. |
| 69 | - ``branch``: the branch this commit was recorded on. |
| 70 | - ``parent_commit_id`` / ``parent2_commit_id``: DAG parent links (second |
| 71 | parent is reserved for merge commits, ). |
| 72 | - ``message``: human-readable commit message. |
| 73 | - ``author``: committer identity string. |
| 74 | - ``committed_at``: ISO-8601 UTC timestamp string. |
| 75 | - ``snapshot_id``: content-addressed snapshot hash. |
| 76 | - ``metadata``: extensible annotation dict (tempo_bpm, etc.). |
| 77 | - ``tags``: list of music-semantic tag strings attached to this commit. |
| 78 | """ |
| 79 | |
| 80 | commit_id: str |
| 81 | short_id: str |
| 82 | branch: str |
| 83 | parent_commit_id: Optional[str] |
| 84 | parent2_commit_id: Optional[str] |
| 85 | message: str |
| 86 | author: str |
| 87 | committed_at: str |
| 88 | snapshot_id: str |
| 89 | metadata: dict[str, object] |
| 90 | tags: list[str] |
| 91 | |
| 92 | def to_dict(self) -> dict[str, object]: |
| 93 | """Return a JSON-serializable dict for this commit node.""" |
| 94 | return { |
| 95 | "commit_id": self.commit_id, |
| 96 | "short_id": self.short_id, |
| 97 | "branch": self.branch, |
| 98 | "parent_commit_id": self.parent_commit_id, |
| 99 | "parent2_commit_id": self.parent2_commit_id, |
| 100 | "message": self.message, |
| 101 | "author": self.author, |
| 102 | "committed_at": self.committed_at, |
| 103 | "snapshot_id": self.snapshot_id, |
| 104 | "metadata": self.metadata, |
| 105 | "tags": self.tags, |
| 106 | } |
| 107 | |
| 108 | |
| 109 | @dataclass(frozen=True) |
| 110 | class MuseInspectResult: |
| 111 | """Full serialized commit graph for a Muse repository. |
| 112 | |
| 113 | Returned by :func:`build_inspect_result` and rendered by the three |
| 114 | format functions. |
| 115 | |
| 116 | - ``repo_id``: UUID identifying the local repository. |
| 117 | - ``current_branch``: the branch HEAD currently points to. |
| 118 | - ``branches``: mapping of branch names to their HEAD commit ID. |
| 119 | - ``commits``: all graph nodes reachable from the traversed heads, |
| 120 | newest-first. |
| 121 | """ |
| 122 | |
| 123 | repo_id: str |
| 124 | current_branch: str |
| 125 | branches: dict[str, str] |
| 126 | commits: list[MuseInspectCommit] |
| 127 | |
| 128 | def to_dict(self) -> dict[str, object]: |
| 129 | """Return a JSON-serializable dict of the full graph.""" |
| 130 | return { |
| 131 | "repo_id": self.repo_id, |
| 132 | "current_branch": self.current_branch, |
| 133 | "branches": self.branches, |
| 134 | "commits": [c.to_dict() for c in self.commits], |
| 135 | } |
| 136 | |
| 137 | |
| 138 | # --------------------------------------------------------------------------- |
| 139 | # Graph builder |
| 140 | # --------------------------------------------------------------------------- |
| 141 | |
| 142 | |
| 143 | def _read_branches(muse_dir: pathlib.Path) -> dict[str, str]: |
| 144 | """Scan ``.muse/refs/heads/`` and return a ``{branch: commit_id}`` dict. |
| 145 | |
| 146 | Branches with empty or missing ref files are excluded (no commits yet). |
| 147 | """ |
| 148 | heads_dir = muse_dir / "refs" / "heads" |
| 149 | branches: dict[str, str] = {} |
| 150 | if not heads_dir.is_dir(): |
| 151 | return branches |
| 152 | for ref_file in heads_dir.iterdir(): |
| 153 | commit_id = ref_file.read_text().strip() |
| 154 | if commit_id: |
| 155 | branches[ref_file.name] = commit_id |
| 156 | return branches |
| 157 | |
| 158 | |
| 159 | async def _load_commit_tags( |
| 160 | session: AsyncSession, commit_ids: list[str] |
| 161 | ) -> dict[str, list[str]]: |
| 162 | """Bulk-load tag strings for a set of commit IDs. |
| 163 | |
| 164 | Returns a ``{commit_id: [tag, ...]}`` mapping; commits without tags map |
| 165 | to an empty list. A single query is issued regardless of graph size. |
| 166 | """ |
| 167 | if not commit_ids: |
| 168 | return {} |
| 169 | result = await session.execute( |
| 170 | select(MuseCliTag).where(MuseCliTag.commit_id.in_(commit_ids)) |
| 171 | ) |
| 172 | tags_by_commit: dict[str, list[str]] = {cid: [] for cid in commit_ids} |
| 173 | for tag_row in result.scalars().all(): |
| 174 | tags_by_commit.setdefault(tag_row.commit_id, []).append(tag_row.tag) |
| 175 | return tags_by_commit |
| 176 | |
| 177 | |
| 178 | async def _walk_from( |
| 179 | session: AsyncSession, |
| 180 | start_commit_id: str, |
| 181 | depth: Optional[int], |
| 182 | visited: set[str], |
| 183 | ) -> list[MuseCliCommit]: |
| 184 | """Walk the parent chain from *start_commit_id*, newest-first. |
| 185 | |
| 186 | Stops when the chain is exhausted, *depth* is reached, or a node has |
| 187 | already been visited (avoids re-traversing shared history when |
| 188 | ``--branches`` combines multiple heads). |
| 189 | |
| 190 | Args: |
| 191 | session: Async SQLAlchemy session. |
| 192 | start_commit_id: SHA of the first commit to visit. |
| 193 | depth: Maximum number of commits to return (``None`` = unlimited). |
| 194 | visited: Mutable set of already-visited commit IDs. Updated |
| 195 | in-place so sibling traversals share state. |
| 196 | |
| 197 | Returns: |
| 198 | Ordered list of :class:`MuseCliCommit` rows, newest-first. |
| 199 | """ |
| 200 | rows: list[MuseCliCommit] = [] |
| 201 | current_id: Optional[str] = start_commit_id |
| 202 | while current_id and current_id not in visited: |
| 203 | if depth is not None and len(rows) >= depth: |
| 204 | break |
| 205 | row = await session.get(MuseCliCommit, current_id) |
| 206 | if row is None: |
| 207 | logger.warning("⚠️ muse inspect: commit %s not found — chain broken", current_id[:8]) |
| 208 | break |
| 209 | visited.add(current_id) |
| 210 | rows.append(row) |
| 211 | current_id = row.parent_commit_id |
| 212 | return rows |
| 213 | |
| 214 | |
| 215 | async def build_inspect_result( |
| 216 | session: AsyncSession, |
| 217 | root: pathlib.Path, |
| 218 | *, |
| 219 | ref: Optional[str] = None, |
| 220 | depth: Optional[int] = None, |
| 221 | include_branches: bool = False, |
| 222 | ) -> MuseInspectResult: |
| 223 | """Build the full :class:`MuseInspectResult` for a Muse repository. |
| 224 | |
| 225 | This is the primary entry point for ``muse inspect``. It reads the |
| 226 | repository state from the ``.muse/`` directory, resolves starting commit |
| 227 | IDs, walks the graph, and returns a fully typed result. |
| 228 | |
| 229 | Args: |
| 230 | session: Async SQLAlchemy session (read-only operations only). |
| 231 | root: Repository root path (contains ``.muse/``). |
| 232 | ref: Optional starting commit reference. Accepts a full |
| 233 | or abbreviated SHA, a branch name, or ``None``/ |
| 234 | ``"HEAD"`` (default: HEAD of current branch). |
| 235 | depth: Maximum commits per branch traversal (``None`` = |
| 236 | unlimited). |
| 237 | include_branches: When ``True``, traverse all branches and merge |
| 238 | their reachable commits into the output. When |
| 239 | ``False``, only the current branch (or *ref*) is |
| 240 | traversed. |
| 241 | |
| 242 | Returns: |
| 243 | :class:`MuseInspectResult` with all graph nodes and branch pointers. |
| 244 | |
| 245 | Raises: |
| 246 | ValueError: When *ref* cannot be resolved to a commit. |
| 247 | FileNotFoundError: When ``.muse/`` or ``repo.json`` are missing. |
| 248 | """ |
| 249 | muse_dir = root / ".muse" |
| 250 | repo_data: dict[str, str] = json.loads((muse_dir / "repo.json").read_text()) |
| 251 | repo_id = repo_data["repo_id"] |
| 252 | |
| 253 | head_ref = (muse_dir / "HEAD").read_text().strip() # "refs/heads/main" |
| 254 | current_branch = head_ref.rsplit("/", 1)[-1] # "main" |
| 255 | |
| 256 | all_branches = _read_branches(muse_dir) |
| 257 | |
| 258 | # Determine the set of starting commit IDs to traverse. |
| 259 | start_ids: list[str] = [] |
| 260 | |
| 261 | if ref is not None and ref.upper() != "HEAD": |
| 262 | # Resolve *ref*: branch name first, then exact SHA, then prefix. |
| 263 | if ref in all_branches: |
| 264 | start_ids.append(all_branches[ref]) |
| 265 | else: |
| 266 | # Try exact or prefix match in DB. |
| 267 | exact = await session.get(MuseCliCommit, ref) |
| 268 | if exact is not None: |
| 269 | start_ids.append(exact.commit_id) |
| 270 | else: |
| 271 | from sqlalchemy.future import select as sa_select |
| 272 | result = await session.execute( |
| 273 | sa_select(MuseCliCommit).where( |
| 274 | MuseCliCommit.repo_id == repo_id, |
| 275 | MuseCliCommit.commit_id.startswith(ref), |
| 276 | ) |
| 277 | ) |
| 278 | first = result.scalars().first() |
| 279 | if first is None: |
| 280 | raise ValueError(f"Cannot resolve ref {ref!r} to a commit.") |
| 281 | start_ids.append(first.commit_id) |
| 282 | else: |
| 283 | # Default: HEAD of current branch. |
| 284 | head_commit_id = all_branches.get(current_branch, "") |
| 285 | if head_commit_id: |
| 286 | start_ids.append(head_commit_id) |
| 287 | |
| 288 | if include_branches: |
| 289 | for branch_commit_id in all_branches.values(): |
| 290 | if branch_commit_id not in start_ids: |
| 291 | start_ids.append(branch_commit_id) |
| 292 | |
| 293 | # Walk the graph. |
| 294 | visited: set[str] = set() |
| 295 | all_rows: list[MuseCliCommit] = [] |
| 296 | for start_id in start_ids: |
| 297 | rows = await _walk_from(session, start_id, depth, visited) |
| 298 | all_rows.extend(rows) |
| 299 | |
| 300 | # Bulk-load tags. |
| 301 | row_ids = [r.commit_id for r in all_rows] |
| 302 | tags_by_commit = await _load_commit_tags(session, row_ids) |
| 303 | |
| 304 | # Build typed result nodes. |
| 305 | commits = [ |
| 306 | MuseInspectCommit( |
| 307 | commit_id=row.commit_id, |
| 308 | short_id=row.commit_id[:8], |
| 309 | branch=row.branch, |
| 310 | parent_commit_id=row.parent_commit_id, |
| 311 | parent2_commit_id=row.parent2_commit_id, |
| 312 | message=row.message, |
| 313 | author=row.author, |
| 314 | committed_at=row.committed_at.isoformat(), |
| 315 | snapshot_id=row.snapshot_id, |
| 316 | metadata=dict(row.commit_metadata) if row.commit_metadata else {}, |
| 317 | tags=tags_by_commit.get(row.commit_id, []), |
| 318 | ) |
| 319 | for row in all_rows |
| 320 | ] |
| 321 | |
| 322 | logger.info( |
| 323 | "✅ muse inspect: %d commit(s), %d branch(es) (repo=%s)", |
| 324 | len(commits), |
| 325 | len(all_branches), |
| 326 | repo_id[:8], |
| 327 | ) |
| 328 | return MuseInspectResult( |
| 329 | repo_id=repo_id, |
| 330 | current_branch=current_branch, |
| 331 | branches=all_branches, |
| 332 | commits=commits, |
| 333 | ) |
| 334 | |
| 335 | |
| 336 | # --------------------------------------------------------------------------- |
| 337 | # Format renderers |
| 338 | # --------------------------------------------------------------------------- |
| 339 | |
| 340 | |
| 341 | def render_json(result: MuseInspectResult, indent: int = 2) -> str: |
| 342 | """Serialize *result* to a JSON string. |
| 343 | |
| 344 | The JSON shape matches the format specified: |
| 345 | ``repo_id``, ``current_branch``, ``branches``, and ``commits`` array. |
| 346 | |
| 347 | Args: |
| 348 | result: The inspect result to serialize. |
| 349 | indent: JSON indentation level (default 2). |
| 350 | |
| 351 | Returns: |
| 352 | Formatted JSON string. |
| 353 | """ |
| 354 | return json.dumps(result.to_dict(), indent=indent, default=str) |
| 355 | |
| 356 | |
| 357 | def render_dot(result: MuseInspectResult) -> str: |
| 358 | """Serialize *result* to a Graphviz DOT directed graph. |
| 359 | |
| 360 | Each commit becomes a labelled node. Parent edges point from child |
| 361 | to parent (matching git's convention). Branch refs appear as bold |
| 362 | rectangular nodes pointing to their HEAD commit. |
| 363 | |
| 364 | Args: |
| 365 | result: The inspect result to serialize. |
| 366 | |
| 367 | Returns: |
| 368 | DOT source string, suitable for piping to ``dot -Tsvg``. |
| 369 | """ |
| 370 | lines: list[str] = ["digraph muse_graph {", ' rankdir="LR";', ' node [shape=ellipse];', ""] |
| 371 | |
| 372 | for commit in result.commits: |
| 373 | label = f"{commit.short_id}\\n{commit.message[:40]}" |
| 374 | if commit.message and len(commit.message) > 40: |
| 375 | label += "…" |
| 376 | lines.append(f' "{commit.commit_id}" [label="{label}"];') |
| 377 | |
| 378 | lines.append("") |
| 379 | |
| 380 | for commit in result.commits: |
| 381 | if commit.parent_commit_id: |
| 382 | lines.append(f' "{commit.commit_id}" -> "{commit.parent_commit_id}";') |
| 383 | if commit.parent2_commit_id: |
| 384 | lines.append( |
| 385 | f' "{commit.commit_id}" -> "{commit.parent2_commit_id}" [style=dashed];' |
| 386 | ) |
| 387 | |
| 388 | lines.append("") |
| 389 | lines.append(" // Branch pointers") |
| 390 | lines.append(' node [shape=rectangle style=bold];') |
| 391 | for branch, head_id in result.branches.items(): |
| 392 | safe_branch = branch.replace("/", "_").replace("-", "_") |
| 393 | arrow = " -> " if head_id else "" |
| 394 | if head_id: |
| 395 | lines.append(f' "branch_{safe_branch}" [label="{branch}"];') |
| 396 | lines.append(f' "branch_{safe_branch}" -> "{head_id}";') |
| 397 | |
| 398 | lines.append("}") |
| 399 | return "\n".join(lines) |
| 400 | |
| 401 | |
| 402 | def render_mermaid(result: MuseInspectResult) -> str: |
| 403 | """Serialize *result* to a Mermaid.js graph definition. |
| 404 | |
| 405 | Produces a left-to-right ``graph LR`` block. Commit nodes are labelled |
| 406 | with their short ID and truncated message. Branch refs appear as |
| 407 | rectangular nodes pointing to their HEAD commit. |
| 408 | |
| 409 | Args: |
| 410 | result: The inspect result to serialize. |
| 411 | |
| 412 | Returns: |
| 413 | Mermaid source string, suitable for embedding in GitHub markdown |
| 414 | inside a ``mermaid`` fenced code block. |
| 415 | """ |
| 416 | lines: list[str] = ["graph LR"] |
| 417 | |
| 418 | for commit in result.commits: |
| 419 | msg = commit.message[:35] |
| 420 | if len(commit.message) > 35: |
| 421 | msg += "…" |
| 422 | safe_msg = msg.replace('"', "'") |
| 423 | lines.append(f' {commit.commit_id[:8]}["{commit.short_id}: {safe_msg}"]') |
| 424 | |
| 425 | for commit in result.commits: |
| 426 | if commit.parent_commit_id: |
| 427 | lines.append(f" {commit.commit_id[:8]} --> {commit.parent_commit_id[:8]}") |
| 428 | if commit.parent2_commit_id: |
| 429 | lines.append( |
| 430 | f" {commit.commit_id[:8]} -.-> {commit.parent2_commit_id[:8]}" |
| 431 | ) |
| 432 | |
| 433 | for branch, head_id in result.branches.items(): |
| 434 | if head_id: |
| 435 | safe_branch = branch.replace("/", "_").replace("-", "_") |
| 436 | lines.append(f' {safe_branch}["{branch}"]') |
| 437 | lines.append(f" {safe_branch} --> {head_id[:8]}") |
| 438 | |
| 439 | return "\n".join(lines) |