cgcardona / muse public
commit.py python
565 lines 22.2 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """muse commit — filesystem snapshot commit with deterministic object IDs.
2
3 Algorithm
4 ---------
5 1. Resolve repo root via ``require_repo()``.
6 2. Read ``repo_id`` from ``.muse/repo.json`` and current branch from
7 ``.muse/HEAD``.
8 3. Walk ``muse-work/`` — hash each file with ``sha256(file_bytes)`` to
9 produce an ``object_id``.
10 4. Build snapshot manifest: ``{rel_path → object_id}``.
11 5. Compute ``snapshot_id = sha256(sorted(path:object_id pairs))``.
12 6. If the current branch HEAD already points to a commit with the same
13 ``snapshot_id``, print "Nothing to commit, working tree clean" and
14 exit 0 (unless ``--allow-empty`` is set).
15 7. Compute ``commit_id = sha256(sorted(parent_ids) | snapshot_id | message | timestamp)``.
16 8. Persist to Postgres: upsert ``object`` rows → upsert ``snapshot`` row → insert ``commit`` row.
17 9. Update ``.muse/refs/heads/<branch>`` to the new ``commit_id``.
18
19 Music-domain flags
20 ------------------
21 ``--section TEXT``
22 Tag the commit as belonging to a musical section (e.g. ``verse``,
23 ``chorus``, ``bridge``). Stored in ``commit_metadata["section"]``.
24
25 ``--track TEXT``
26 Tag the commit as affecting a specific instrument track (e.g. ``drums``,
27 ``bass``, ``keys``). Stored in ``commit_metadata["track"]``.
28
29 ``--emotion TEXT``
30 Attach an emotion vector label to the commit (e.g. ``joyful``,
31 ``melancholic``, ``tense``). Stored in ``commit_metadata["emotion"]``.
32 Foundation for future ``muse log --emotion melancholic`` queries.
33
34 ``--co-author TEXT``
35 Add a ``Co-authored-by: Name <email>`` trailer to the commit message
36 (for collaborative sessions).
37
38 ``--allow-empty``
39 Allow committing even when the working tree has not changed since HEAD.
40 Useful for milestone markers and metadata-only annotations.
41
42 ``--amend``
43 Fold working-tree changes into the most recent commit, equivalent to
44 running ``muse amend``. Music-domain flags apply to the amended commit.
45
46 ``--no-verify``
47 Bypass pre-commit hooks. Accepted for forward-compatibility; currently
48 a no-op because the hook system has not been implemented yet.
49
50 ``--from-batch <path>``
51 -----------------------
52 When this flag is provided, the commit pipeline reads ``muse-batch.json``
53 and restricts the snapshot to only the files listed in the manifest's
54 ``files`` array. The ``commit_message_suggestion`` from the batch is used
55 as the commit message, making this a fast path for::
56
57 muse commit --from-batch muse-batch.json
58
59 without needing to specify ``-m``.
60 """
61 from __future__ import annotations
62
63 import asyncio
64 import datetime
65 import json
66 import logging
67 import pathlib
68 from typing import Optional
69
70 import typer
71 from sqlalchemy.ext.asyncio import AsyncSession
72 from sqlalchemy.orm.attributes import flag_modified
73
74 from maestro.muse_cli._repo import require_repo
75 from maestro.muse_cli.db import (
76 get_head_snapshot_id,
77 insert_commit,
78 open_session,
79 upsert_object,
80 upsert_snapshot,
81 )
82 from maestro.muse_cli.errors import ExitCode
83 from maestro.muse_cli.merge_engine import read_merge_state
84 from maestro.muse_cli.models import MuseCliCommit
85 from maestro.muse_cli.object_store import write_object_from_path
86 from maestro.muse_cli.snapshot import (
87 build_snapshot_manifest,
88 compute_commit_id,
89 compute_snapshot_id,
90 hash_file,
91 )
92
93 logger = logging.getLogger(__name__)
94
95 app = typer.Typer()
96
97
98 # ---------------------------------------------------------------------------
99 # Batch manifest helpers
100 # ---------------------------------------------------------------------------
101
102
103 def load_muse_batch(batch_path: pathlib.Path) -> dict[str, object]:
104 """Read and validate a muse-batch.json file.
105
106 Returns the parsed dict. Raises ``typer.Exit`` with ``USER_ERROR`` if
107 the file is missing or malformed so the Typer callback surfaces a clean
108 message.
109 """
110 if not batch_path.exists():
111 typer.echo(f"❌ muse-batch.json not found: {batch_path}")
112 raise typer.Exit(code=ExitCode.USER_ERROR)
113 try:
114 data: dict[str, object] = json.loads(batch_path.read_text())
115 except json.JSONDecodeError as exc:
116 typer.echo(f"❌ Invalid JSON in {batch_path}: {exc}")
117 raise typer.Exit(code=ExitCode.USER_ERROR)
118 return data
119
120
121 def build_snapshot_manifest_from_batch(
122 batch_data: dict[str, object],
123 repo_root: pathlib.Path,
124 ) -> dict[str, str]:
125 """Build a snapshot manifest restricted to files listed in a muse-batch.
126
127 Only files that actually exist on disk are included — missing files are
128 silently skipped (the batch may reference files from a different machine
129 or a partial run).
130
131 ``batch_data["files"]`` entries use paths relative to the repo root
132 (e.g. ``"muse-work/tracks/drums/jazz_4b_abc.mid"``). The returned
133 manifest uses paths relative to ``muse-work/`` so it is compatible with
134 ``build_snapshot_manifest``.
135
136 Returns a ``{rel_path: object_id}`` dict where *rel_path* is relative to
137 ``muse-work/``.
138 """
139 workdir = repo_root / "muse-work"
140 raw_files = batch_data.get("files", [])
141 files: list[dict[str, object]] = list(raw_files) if isinstance(raw_files, list) else []
142 manifest: dict[str, str] = {}
143
144 for entry in files:
145 raw_path = str(entry.get("path", ""))
146 # Paths in the batch are relative to repo root, e.g. muse-work/tracks/…
147 abs_path = repo_root / raw_path
148 if not abs_path.exists() or not abs_path.is_file():
149 continue
150 # Key in the manifest is relative to muse-work/
151 try:
152 rel = abs_path.relative_to(workdir).as_posix()
153 except ValueError:
154 # File is outside muse-work/ — skip
155 continue
156 manifest[rel] = hash_file(abs_path)
157
158 return manifest
159
160
161 # ---------------------------------------------------------------------------
162 # Music metadata helpers
163 # ---------------------------------------------------------------------------
164
165
166 def _append_co_author(message: str, co_author: str) -> str:
167 """Append a Co-authored-by trailer to *message*.
168
169 Follows the Git convention: a blank line separates the message body from
170 trailers. Multiple calls are safe — each appends a new line.
171 """
172 trailer = f"Co-authored-by: {co_author}"
173 return f"{message}\n\n{trailer}" if message else trailer
174
175
176 async def _apply_commit_music_metadata(
177 *,
178 session: AsyncSession,
179 commit_id: str,
180 section: str | None,
181 track: str | None,
182 emotion: str | None,
183 ) -> None:
184 """Merge music-domain flags into commit_metadata for *commit_id*.
185
186 Preserves existing metadata keys (e.g. ``tempo_bpm`` written by
187 ``muse tempo --set``) — only the supplied non-None keys are overwritten.
188 Skips silently when no music flags were provided.
189 """
190 if not any([section, track, emotion]):
191 return
192
193 commit = await session.get(MuseCliCommit, commit_id)
194 if commit is None:
195 logger.warning("⚠️ Commit %s not found for metadata update", commit_id[:8])
196 return
197
198 metadata: dict[str, object] = dict(commit.commit_metadata or {})
199 if section is not None:
200 metadata["section"] = section
201 if track is not None:
202 metadata["track"] = track
203 if emotion is not None:
204 metadata["emotion"] = emotion
205
206 commit.commit_metadata = metadata
207 flag_modified(commit, "commit_metadata")
208 session.add(commit)
209 logger.debug(
210 "✅ Applied music metadata to %s: section=%r track=%r emotion=%r",
211 commit_id[:8],
212 section,
213 track,
214 emotion,
215 )
216
217
218 # ---------------------------------------------------------------------------
219 # Testable async core
220 # ---------------------------------------------------------------------------
221
222
223 async def _commit_async(
224 *,
225 message: str,
226 root: pathlib.Path,
227 session: AsyncSession,
228 batch_path: pathlib.Path | None = None,
229 section: str | None = None,
230 track: str | None = None,
231 emotion: str | None = None,
232 co_author: str | None = None,
233 allow_empty: bool = False,
234 ) -> str:
235 """Run the commit pipeline and return the new ``commit_id``.
236
237 All filesystem and DB side-effects are isolated in this coroutine so
238 tests can inject an in-memory SQLite session and a ``tmp_path`` root
239 without touching a real database.
240
241 When *batch_path* is provided the commit is restricted to files listed in
242 ``muse-batch.json`` and the ``commit_message_suggestion`` from the batch
243 overrides *message*.
244
245 Music-domain flags:
246 - *section* / *track* / *emotion* — stored in ``commit_metadata``.
247 - *co_author* — appended to the commit message as a Co-authored-by trailer.
248 - *allow_empty* — when ``True``, bypasses the "nothing to commit" guard
249 so callers can record milestone commits or metadata-only annotations.
250
251 Raises ``typer.Exit`` with the appropriate exit code on user errors so
252 the Typer callback surfaces a clean message rather than a traceback.
253 """
254 muse_dir = root / ".muse"
255
256 # ── Guard: block commit while a conflicted merge is in progress ──────
257 merge_state = read_merge_state(root)
258 if merge_state is not None and merge_state.conflict_paths:
259 typer.echo(
260 "❌ You have unresolved merge conflicts.\n"
261 " Fix conflicts in the listed files, then run 'muse commit'."
262 )
263 for path in sorted(merge_state.conflict_paths):
264 typer.echo(f"\tboth modified: {path}")
265 raise typer.Exit(code=ExitCode.USER_ERROR)
266
267 # ── Repo identity ────────────────────────────────────────────────────
268 repo_data: dict[str, str] = json.loads((muse_dir / "repo.json").read_text())
269 repo_id = repo_data["repo_id"]
270
271 # ── Current branch ───────────────────────────────────────────────────
272 head_ref = (muse_dir / "HEAD").read_text().strip() # "refs/heads/main"
273 branch = head_ref.rsplit("/", 1)[-1] # "main"
274 ref_path = muse_dir / pathlib.Path(head_ref)
275
276 parent_commit_id: str | None = None
277 if ref_path.exists():
278 raw = ref_path.read_text().strip()
279 if raw:
280 parent_commit_id = raw
281
282 parent_ids = [parent_commit_id] if parent_commit_id else []
283
284 # ── Build snapshot manifest ──────────────────────────────────────────
285 workdir = root / "muse-work"
286
287 if batch_path is not None:
288 # Fast path: restrict snapshot to files listed in muse-batch.json
289 batch_data = load_muse_batch(batch_path)
290 suggestion = str(batch_data.get("commit_message_suggestion", "")).strip()
291 if suggestion:
292 message = suggestion
293 manifest = build_snapshot_manifest_from_batch(batch_data, root)
294 if not manifest:
295 typer.echo(
296 "⚠️ No files from muse-batch.json found on disk — nothing to commit.\n"
297 f" Batch: {batch_path}"
298 )
299 raise typer.Exit(code=ExitCode.USER_ERROR)
300 else:
301 # Standard path: walk the entire muse-work/ directory
302 if not workdir.exists():
303 typer.echo(
304 "⚠️ No muse-work/ directory found. Generate some artifacts first.\n"
305 " Tip: run the Maestro stress test to populate muse-work/."
306 )
307 raise typer.Exit(code=ExitCode.USER_ERROR)
308
309 manifest = build_snapshot_manifest(workdir)
310 if not manifest:
311 typer.echo("⚠️ muse-work/ is empty — nothing to commit.")
312 raise typer.Exit(code=ExitCode.USER_ERROR)
313
314 snapshot_id = compute_snapshot_id(manifest)
315
316 # ── Nothing-to-commit guard (bypassable via --allow-empty) ───────────
317 if not allow_empty:
318 last_snapshot_id = await get_head_snapshot_id(session, repo_id, branch)
319 if last_snapshot_id == snapshot_id:
320 typer.echo("Nothing to commit, working tree clean")
321 raise typer.Exit(code=ExitCode.SUCCESS)
322
323 # ── Apply Co-authored-by trailer ─────────────────────────────────────
324 if co_author:
325 message = _append_co_author(message, co_author)
326
327 # ── Deterministic commit ID ──────────────────────────────────────────
328 committed_at = datetime.datetime.now(datetime.timezone.utc)
329 commit_id = compute_commit_id(
330 parent_ids=parent_ids,
331 snapshot_id=snapshot_id,
332 message=message,
333 committed_at_iso=committed_at.isoformat(),
334 )
335
336 # ── Build music metadata dict ─────────────────────────────────────────
337 commit_metadata: dict[str, object] | None = None
338 music_keys = {k: v for k, v in [("section", section), ("track", track), ("emotion", emotion)] if v is not None}
339 if music_keys:
340 commit_metadata = dict(music_keys)
341
342 # ── Persist objects ──────────────────────────────────────────────────
343 for rel_path, object_id in manifest.items():
344 file_path = workdir / rel_path
345 size = file_path.stat().st_size
346 await upsert_object(session, object_id=object_id, size_bytes=size)
347 # Write the file into the local content-addressed store so that
348 # ``muse read-tree`` and ``muse reset --hard`` can reconstruct
349 # muse-work/ from any historical snapshot. Path-based copy avoids
350 # loading large blobs (audio previews, dense MIDI renders) into memory.
351 write_object_from_path(root, object_id, file_path)
352
353 # ── Persist snapshot ─────────────────────────────────────────────────
354 await upsert_snapshot(session, manifest=manifest, snapshot_id=snapshot_id)
355 # Flush now so the snapshot row exists in the DB transaction before the
356 # commit row's FK constraint is checked on insert.
357 await session.flush()
358
359 # ── Persist commit ───────────────────────────────────────────────────
360 new_commit = MuseCliCommit(
361 commit_id=commit_id,
362 repo_id=repo_id,
363 branch=branch,
364 parent_commit_id=parent_commit_id,
365 snapshot_id=snapshot_id,
366 message=message,
367 author="",
368 committed_at=committed_at,
369 commit_metadata=commit_metadata,
370 )
371 await insert_commit(session, new_commit)
372
373 # ── Update branch HEAD pointer ────────────────────────────────────────
374 ref_path.parent.mkdir(parents=True, exist_ok=True)
375 ref_path.write_text(commit_id)
376
377 typer.echo(f"✅ [{branch} {commit_id[:8]}] {message}")
378 logger.info("✅ muse commit %s on %r: %s", commit_id[:8], branch, message)
379 return commit_id
380
381
382 # ---------------------------------------------------------------------------
383 # Typer command
384 # ---------------------------------------------------------------------------
385
386
387 @app.callback(invoke_without_command=True)
388 def commit(
389 ctx: typer.Context,
390 message: Optional[str] = typer.Option(
391 None, "-m", "--message", help="Commit message."
392 ),
393 from_batch: Optional[str] = typer.Option(
394 None,
395 "--from-batch",
396 help=(
397 "Path to muse-batch.json produced by the stress test. "
398 "Uses commit_message_suggestion from the batch and snapshots only "
399 "the files listed in files[]. Overrides -m when present."
400 ),
401 ),
402 amend: bool = typer.Option(
403 False,
404 "--amend",
405 help=(
406 "Fold working-tree changes into the most recent commit. "
407 "Equivalent to running 'muse amend'. Music-domain flags "
408 "(--section, --track, --emotion, --co-author) apply to the "
409 "amended commit."
410 ),
411 ),
412 no_verify: bool = typer.Option(
413 False,
414 "--no-verify",
415 help=(
416 "Bypass pre-commit hooks. Currently a no-op — accepted for "
417 "forward-compatibility with the planned hook system."
418 ),
419 ),
420 section: Optional[str] = typer.Option(
421 None,
422 "--section",
423 help=(
424 "Tag this commit as belonging to a musical section "
425 "(e.g. verse, chorus, bridge). Stored in commit_metadata and "
426 "queryable via 'muse log --section <value>'."
427 ),
428 ),
429 track: Optional[str] = typer.Option(
430 None,
431 "--track",
432 help=(
433 "Tag this commit as affecting a specific instrument track "
434 "(e.g. drums, bass, keys). Stored in commit_metadata and "
435 "queryable via 'muse log --track <value>'."
436 ),
437 ),
438 emotion: Optional[str] = typer.Option(
439 None,
440 "--emotion",
441 help=(
442 "Attach an emotion vector label to this commit "
443 "(e.g. joyful, melancholic, tense). Foundation for future "
444 "'muse log --emotion melancholic' queries."
445 ),
446 ),
447 co_author: Optional[str] = typer.Option(
448 None,
449 "--co-author",
450 help=(
451 "Add a Co-authored-by trailer to the commit message. "
452 "Use 'Name <email>' format for Git-compatible attribution."
453 ),
454 ),
455 allow_empty: bool = typer.Option(
456 False,
457 "--allow-empty",
458 help=(
459 "Allow committing even when the working tree has not changed "
460 "since HEAD. Useful for milestone markers or metadata-only "
461 "annotations (e.g. 'muse commit --allow-empty --emotion joyful')."
462 ),
463 ),
464 ) -> None:
465 """Record the current muse-work/ state as a new version in history."""
466 if no_verify:
467 logger.debug("⚠️ --no-verify supplied; hook system not yet implemented — proceeding")
468
469 root = require_repo()
470
471 if amend:
472 # Delegate to the amend pipeline then apply music metadata.
473 from maestro.muse_cli.commands.amend import _amend_async
474
475 # Determine if co_author needs to be merged into the amend message.
476 # _amend_async resolves the effective message internally from HEAD or -m;
477 # we pre-compute the co_author trailer here so we can pass a fully-formed
478 # message string regardless of the no-edit path.
479 async def _run_amend() -> None:
480 async with open_session() as session:
481 # Resolve effective message for co_author appending.
482 # When -m is provided, use it directly.
483 # When --amend without -m, _amend_async will use the HEAD message
484 # (no_edit path). We need to read HEAD here so we can append
485 # the co_author trailer before passing to _amend_async.
486 effective_message = message
487 if co_author:
488 if effective_message is None:
489 # Load HEAD commit message so we can append the trailer.
490 muse_dir = root / ".muse"
491 head_ref = (muse_dir / "HEAD").read_text().strip()
492 ref_path = muse_dir / pathlib.Path(head_ref)
493 if ref_path.exists():
494 head_commit_id = ref_path.read_text().strip()
495 if head_commit_id:
496 head_commit = await session.get(MuseCliCommit, head_commit_id)
497 if head_commit:
498 effective_message = head_commit.message
499 effective_message = _append_co_author(effective_message or "", co_author)
500
501 # When we've computed a final message, pass no_edit=False so
502 # _amend_async uses it verbatim. Otherwise let _amend_async
503 # fall through to its own no_edit logic.
504 use_no_edit = effective_message is None
505 commit_id = await _amend_async(
506 message=effective_message,
507 no_edit=use_no_edit,
508 reset_author=False,
509 root=root,
510 session=session,
511 )
512
513 # Apply music-domain metadata after the amend.
514 await _apply_commit_music_metadata(
515 session=session,
516 commit_id=commit_id,
517 section=section,
518 track=track,
519 emotion=emotion,
520 )
521
522 try:
523 asyncio.run(_run_amend())
524 except typer.Exit:
525 raise
526 except Exception as exc:
527 typer.echo(f"❌ muse commit --amend failed: {exc}")
528 logger.error("❌ muse commit --amend error: %s", exc, exc_info=True)
529 raise typer.Exit(code=ExitCode.INTERNAL_ERROR)
530 return
531
532 # ── Standard (non-amend) path ─────────────────────────────────────────
533 # Validate that at least one of -m or --from-batch is provided.
534 if from_batch is None and message is None and not allow_empty:
535 typer.echo("❌ Provide either -m MESSAGE or --from-batch PATH.")
536 raise typer.Exit(code=ExitCode.USER_ERROR)
537
538 batch_path = pathlib.Path(from_batch) if from_batch is not None else None
539
540 # message may be None when --from-batch is used; _commit_async will
541 # replace it with commit_message_suggestion from the batch.
542 effective_message = message or ""
543
544 async def _run() -> None:
545 async with open_session() as session:
546 await _commit_async(
547 message=effective_message,
548 root=root,
549 session=session,
550 batch_path=batch_path,
551 section=section,
552 track=track,
553 emotion=emotion,
554 co_author=co_author,
555 allow_empty=allow_empty,
556 )
557
558 try:
559 asyncio.run(_run())
560 except typer.Exit:
561 raise
562 except Exception as exc:
563 typer.echo(f"❌ muse commit failed: {exc}")
564 logger.error("❌ muse commit error: %s", exc, exc_info=True)
565 raise typer.Exit(code=ExitCode.INTERNAL_ERROR)