cgcardona / muse public
plugin.py python
652 lines 23.9 KB
c5b7bd6b feat(phase-2): domain schema declaration + diff algorithm library (#15) Gabriel Cardona <cgcardona@gmail.com> 2d ago
1 """Music domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the five Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same five interfaces.
9
10 Live State
11 ----------
12 For the music domain, ``LiveState`` is either:
13
14 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "music"
34 }
35
36 The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their
37 SHA-256 content digests.
38
39 Delta Format (Phase 1)
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53 from __future__ import annotations
54
55 import hashlib
56 import json
57 import logging
58 import pathlib
59
60 from muse.core.schema import (
61 DimensionSpec,
62 DomainSchema,
63 SequenceSchema,
64 SetSchema,
65 TensorSchema,
66 TreeSchema,
67 )
68 from muse.domain import (
69 DeleteOp,
70 DomainOp,
71 DriftReport,
72 InsertOp,
73 LiveState,
74 MergeResult,
75 MuseDomainPlugin,
76 PatchOp,
77 ReplaceOp,
78 SnapshotManifest,
79 StateDelta,
80 StateSnapshot,
81 StructuredDelta,
82 )
83
84 logger = logging.getLogger(__name__)
85
86 _DOMAIN_TAG = "music"
87
88
89 class MusicPlugin:
90 """Music domain plugin for the Muse VCS.
91
92 Implements :class:`~muse.domain.MuseDomainPlugin` for MIDI state stored
93 as files in ``muse-work/``. Use this plugin when running ``muse`` against
94 a directory of MIDI, audio, or other music production files.
95
96 This is the reference implementation. It demonstrates the five-interface
97 contract that every other domain plugin must satisfy.
98 """
99
100 # ------------------------------------------------------------------
101 # 1. snapshot — capture live state as a content-addressed dict
102 # ------------------------------------------------------------------
103
104 def snapshot(self, live_state: LiveState) -> StateSnapshot:
105 """Capture the current ``muse-work/`` directory as a snapshot dict.
106
107 Args:
108 live_state: Either a ``pathlib.Path`` pointing to ``muse-work/``
109 or an existing snapshot dict (returned as-is).
110
111 Returns:
112 A JSON-serialisable ``{"files": {path: sha256}, "domain": "music"}``
113 dict. The ``files`` mapping is the canonical snapshot manifest used
114 by the core VCS engine for commit / checkout / diff.
115
116 Ignore rules
117 ------------
118 When *live_state* is a ``pathlib.Path``, the plugin reads
119 ``.museignore`` from the repository root (the parent of ``muse-work/``)
120 and excludes any matching paths from the snapshot. Dotfiles are always
121 excluded regardless of ``.museignore``.
122 """
123 if isinstance(live_state, pathlib.Path):
124 from muse.core.ignore import is_ignored, load_patterns
125 workdir = live_state
126 repo_root = workdir.parent
127 patterns = load_patterns(repo_root)
128 files: dict[str, str] = {}
129 for file_path in sorted(workdir.rglob("*")):
130 if not file_path.is_file():
131 continue
132 if file_path.name.startswith("."):
133 continue
134 rel = file_path.relative_to(workdir).as_posix()
135 if is_ignored(rel, patterns):
136 continue
137 files[rel] = _hash_file(file_path)
138 return SnapshotManifest(files=files, domain=_DOMAIN_TAG)
139
140 return live_state
141
142 # ------------------------------------------------------------------
143 # 2. diff — compute the structured delta between two snapshots
144 # ------------------------------------------------------------------
145
146 def diff(
147 self,
148 base: StateSnapshot,
149 target: StateSnapshot,
150 *,
151 repo_root: pathlib.Path | None = None,
152 ) -> StateDelta:
153 """Compute a ``StructuredDelta`` between two music snapshots.
154
155 File additions and removals produce ``InsertOp`` and ``DeleteOp``
156 entries respectively. For modified files:
157
158 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
159 from the object store and produce a ``PatchOp`` with note-level
160 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
161 when the object store is unavailable or parsing fails.
162 - All other files: ``ReplaceOp`` with file-level content IDs.
163
164 Args:
165 base: The ancestor snapshot.
166 target: The later snapshot.
167 repo_root: Repository root directory. When provided, MIDI files are
168 loaded from ``.muse/objects/`` for note-level diffing.
169
170 Returns:
171 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
172 *target* and whose ``summary`` is human-readable.
173 """
174 base_files = base["files"]
175 target_files = target["files"]
176
177 base_paths = set(base_files)
178 target_paths = set(target_files)
179
180 ops: list[DomainOp] = []
181
182 # Added files → InsertOp
183 for path in sorted(target_paths - base_paths):
184 ops.append(
185 InsertOp(
186 op="insert",
187 address=path,
188 position=None,
189 content_id=target_files[path],
190 content_summary=f"new file: {path}",
191 )
192 )
193
194 # Removed files → DeleteOp
195 for path in sorted(base_paths - target_paths):
196 ops.append(
197 DeleteOp(
198 op="delete",
199 address=path,
200 position=None,
201 content_id=base_files[path],
202 content_summary=f"deleted: {path}",
203 )
204 )
205
206 # Modified files
207 for path in sorted(
208 p for p in base_paths & target_paths if base_files[p] != target_files[p]
209 ):
210 op = _diff_modified_file(
211 path=path,
212 old_hash=base_files[path],
213 new_hash=target_files[path],
214 repo_root=repo_root,
215 )
216 ops.append(op)
217
218 summary = _summarise_ops(ops)
219 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
220
221 # ------------------------------------------------------------------
222 # 3. merge — three-way reconciliation
223 # ------------------------------------------------------------------
224
225 def merge(
226 self,
227 base: StateSnapshot,
228 left: StateSnapshot,
229 right: StateSnapshot,
230 *,
231 repo_root: pathlib.Path | None = None,
232 ) -> MergeResult:
233 """Three-way merge two divergent music state lines against a common base.
234
235 A file is auto-merged when only one side changed it. When both sides
236 changed the same file, the merge proceeds in two stages:
237
238 1. **File-level strategy** — if ``.museattributes`` contains an
239 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
240 the rule is applied and the file is removed from the conflict list.
241
242 2. **Dimension-level merge** — for ``.mid`` files that survive the
243 file-level check, the MIDI event stream is split into orthogonal
244 dimension slices (notes/melodic, harmonic, dynamic, structural).
245 Each dimension is merged independently. Dimension-specific
246 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
247 Only dimensions where *both* sides changed AND no resolvable rule
248 exists cause a true file-level conflict.
249
250 3. **Manual override** — ``manual`` strategy in ``.museattributes``
251 forces a path into the conflict list even when the engine would
252 normally auto-resolve it.
253 """
254 import hashlib as _hashlib
255
256 from muse.core.attributes import load_attributes, resolve_strategy
257 from muse.core.object_store import read_object, write_object
258 from muse.plugins.music.midi_merge import merge_midi_dimensions
259
260 base_files = base["files"]
261 left_files = left["files"]
262 right_files = right["files"]
263
264 attrs = load_attributes(repo_root) if repo_root is not None else []
265
266 left_changed: set[str] = _changed_paths(base_files, left_files)
267 right_changed: set[str] = _changed_paths(base_files, right_files)
268 all_conflict_paths: set[str] = left_changed & right_changed
269
270 merged: dict[str, str] = dict(base_files)
271
272 # Apply clean single-side changes first.
273 for path in left_changed - all_conflict_paths:
274 if path in left_files:
275 merged[path] = left_files[path]
276 else:
277 merged.pop(path, None)
278
279 for path in right_changed - all_conflict_paths:
280 if path in right_files:
281 merged[path] = right_files[path]
282 else:
283 merged.pop(path, None)
284
285 # Consensus deletions (both sides removed the same file) — not a conflict.
286 consensus_deleted = {
287 p for p in all_conflict_paths
288 if p not in left_files and p not in right_files
289 }
290 for path in consensus_deleted:
291 merged.pop(path, None)
292
293 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
294
295 applied_strategies: dict[str, str] = {}
296 dimension_reports: dict[str, dict[str, str]] = {}
297 final_conflicts: list[str] = []
298
299 for path in sorted(real_conflicts):
300 file_strategy = resolve_strategy(attrs, path, "*")
301
302 if file_strategy == "ours":
303 if path in left_files:
304 merged[path] = left_files[path]
305 else:
306 merged.pop(path, None)
307 applied_strategies[path] = "ours"
308 continue
309
310 if file_strategy == "theirs":
311 if path in right_files:
312 merged[path] = right_files[path]
313 else:
314 merged.pop(path, None)
315 applied_strategies[path] = "theirs"
316 continue
317
318 if (
319 repo_root is not None
320 and path.lower().endswith(".mid")
321 and path in left_files
322 and path in right_files
323 and path in base_files
324 ):
325 base_obj = read_object(repo_root, base_files[path])
326 left_obj = read_object(repo_root, left_files[path])
327 right_obj = read_object(repo_root, right_files[path])
328
329 if base_obj is not None and left_obj is not None and right_obj is not None:
330 try:
331 dim_result = merge_midi_dimensions(
332 base_obj, left_obj, right_obj,
333 attrs,
334 path,
335 )
336 except ValueError:
337 dim_result = None
338
339 if dim_result is not None:
340 merged_bytes, dim_report = dim_result
341 new_hash = _hashlib.sha256(merged_bytes).hexdigest()
342 write_object(repo_root, new_hash, merged_bytes)
343 merged[path] = new_hash
344 applied_strategies[path] = "dimension-merge"
345 dimension_reports[path] = dim_report
346 continue
347
348 final_conflicts.append(path)
349
350 for path in sorted((left_changed | right_changed) - real_conflicts):
351 if path in consensus_deleted:
352 continue
353 if resolve_strategy(attrs, path, "*") == "manual":
354 final_conflicts.append(path)
355 applied_strategies[path] = "manual"
356 if path in base_files:
357 merged[path] = base_files[path]
358 else:
359 merged.pop(path, None)
360
361 return MergeResult(
362 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG),
363 conflicts=sorted(final_conflicts),
364 applied_strategies=applied_strategies,
365 dimension_reports=dimension_reports,
366 )
367
368 # ------------------------------------------------------------------
369 # 4. drift — compare committed state vs live state
370 # ------------------------------------------------------------------
371
372 def drift(
373 self,
374 committed: StateSnapshot,
375 live: LiveState,
376 ) -> DriftReport:
377 """Detect uncommitted changes in ``muse-work/`` relative to *committed*.
378
379 Args:
380 committed: The last committed snapshot.
381 live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot
382 dict representing current live state.
383
384 Returns:
385 A :class:`~muse.domain.DriftReport` describing whether and how the
386 live state differs from the committed snapshot.
387 """
388 live_snapshot = self.snapshot(live)
389 delta = self.diff(committed, live_snapshot)
390
391 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
392 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
393 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
394 has_drift = bool(inserts or deletes or modified)
395
396 parts: list[str] = []
397 if inserts:
398 parts.append(f"{inserts} added")
399 if deletes:
400 parts.append(f"{deletes} removed")
401 if modified:
402 parts.append(f"{modified} modified")
403
404 summary = ", ".join(parts) if parts else "working tree clean"
405 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
406
407 # ------------------------------------------------------------------
408 # 5. apply — execute a delta against live state (checkout)
409 # ------------------------------------------------------------------
410
411 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
412 """Apply a structured delta to produce a new live state.
413
414 When ``live_state`` is a ``pathlib.Path`` the physical files have
415 already been updated by the caller (``muse checkout`` restores objects
416 from the store before calling this). Rescanning the directory is the
417 cheapest correct way to reflect the new state.
418
419 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
420 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
421 at the file level requires the new content to be on disk; callers that
422 need those should pass the workdir ``pathlib.Path`` instead.
423 ``PatchOp`` entries are skipped in-memory since reconstructing patched
424 file content requires both the original bytes and the object store.
425
426 Args:
427 delta: A ``StructuredDelta`` produced by :meth:`diff`.
428 live_state: The workdir path (preferred) or a snapshot dict.
429
430 Returns:
431 The updated live state as a ``SnapshotManifest``.
432 """
433 if isinstance(live_state, pathlib.Path):
434 return self.snapshot(live_state)
435
436 current_files = dict(live_state["files"])
437
438 for op in delta["ops"]:
439 if op["op"] == "delete":
440 current_files.pop(op["address"], None)
441 elif op["op"] == "replace":
442 current_files[op["address"]] = op["new_content_id"]
443 elif op["op"] == "insert":
444 current_files[op["address"]] = op["content_id"]
445 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
446
447 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG)
448
449 # ------------------------------------------------------------------
450 # 6. schema — declare structural schema for the algorithm library
451 # ------------------------------------------------------------------
452
453 def schema(self) -> DomainSchema:
454 """Return the full structural schema for the music domain.
455
456 Declares four semantic dimensions — melodic, harmonic, dynamic, and
457 structural — that the core diff algorithm library (Phase 2) and merge
458 engine (Phase 3) use to drive per-dimension operations.
459
460 Top level is a ``SetSchema``: the music workspace is an unordered
461 collection of audio/MIDI files, each identified by its SHA-256 content
462 hash.
463
464 Dimensions:
465
466 - **melodic** — the sequence of note events over time. LCS-diffed so
467 that insertions and deletions of individual notes are surfaced.
468 - **harmonic** — the sequence of chord events and key-signature changes.
469 LCS-diffed independently of the melodic dimension.
470 - **dynamic** — velocity and expression curves as a 1-D float tensor.
471 Epsilon of 1.0 ignores sub-1-velocity noise; sparse mode emits one
472 ``ReplaceOp`` per changed event.
473 - **structural** — track layout, time signatures, and tempo map as a
474 labeled ordered tree. Structural changes are non-independent: they
475 block merging all other dimensions until resolved, because a tempo
476 change shifts the meaning of every subsequent note position.
477 """
478 return DomainSchema(
479 domain=_DOMAIN_TAG,
480 description="MIDI and audio file versioning with note-level diff",
481 top_level=SetSchema(
482 kind="set",
483 element_type="audio_file",
484 identity="by_content",
485 ),
486 dimensions=[
487 DimensionSpec(
488 name="melodic",
489 description="Note pitches and durations over time",
490 schema=SequenceSchema(
491 kind="sequence",
492 element_type="note_event",
493 identity="by_position",
494 diff_algorithm="lcs",
495 alphabet=None,
496 ),
497 independent_merge=True,
498 ),
499 DimensionSpec(
500 name="harmonic",
501 description="Chord progressions and key signatures",
502 schema=SequenceSchema(
503 kind="sequence",
504 element_type="chord_event",
505 identity="by_position",
506 diff_algorithm="lcs",
507 alphabet=None,
508 ),
509 independent_merge=True,
510 ),
511 DimensionSpec(
512 name="dynamic",
513 description="Velocity and expression curves",
514 schema=TensorSchema(
515 kind="tensor",
516 dtype="float32",
517 rank=1,
518 epsilon=1.0,
519 diff_mode="sparse",
520 ),
521 independent_merge=True,
522 ),
523 DimensionSpec(
524 name="structural",
525 description="Track layout, time signatures, tempo map",
526 schema=TreeSchema(
527 kind="tree",
528 node_type="track_node",
529 diff_algorithm="zhang_shasha",
530 ),
531 independent_merge=False,
532 ),
533 ],
534 merge_mode="three_way",
535 schema_version=1,
536 )
537
538
539 # ---------------------------------------------------------------------------
540 # Module-level helpers
541 # ---------------------------------------------------------------------------
542
543
544 def _diff_modified_file(
545 *,
546 path: str,
547 old_hash: str,
548 new_hash: str,
549 repo_root: pathlib.Path | None,
550 ) -> DomainOp:
551 """Produce the best available op for a modified file.
552
553 Tries deep MIDI diff when possible; falls back to ``ReplaceOp``.
554 """
555 if path.lower().endswith(".mid") and repo_root is not None:
556 from muse.core.object_store import read_object
557 from muse.plugins.music.midi_diff import diff_midi_notes
558
559 base_bytes = read_object(repo_root, old_hash)
560 target_bytes = read_object(repo_root, new_hash)
561
562 if base_bytes is not None and target_bytes is not None:
563 try:
564 child_delta = diff_midi_notes(
565 base_bytes, target_bytes, file_path=path
566 )
567 return PatchOp(
568 op="patch",
569 address=path,
570 child_ops=child_delta["ops"],
571 child_domain=child_delta["domain"],
572 child_summary=child_delta["summary"],
573 )
574 except (ValueError, Exception) as exc:
575 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
576
577 return ReplaceOp(
578 op="replace",
579 address=path,
580 position=None,
581 old_content_id=old_hash,
582 new_content_id=new_hash,
583 old_summary=f"{path} (previous)",
584 new_summary=f"{path} (updated)",
585 )
586
587
588 def _summarise_ops(ops: list[DomainOp]) -> str:
589 """Build a human-readable summary string from a list of domain ops."""
590 inserts = 0
591 deletes = 0
592 replaces = 0
593 patches = 0
594
595 for op in ops:
596 kind = op["op"]
597 if kind == "insert":
598 inserts += 1
599 elif kind == "delete":
600 deletes += 1
601 elif kind == "replace":
602 replaces += 1
603 elif kind == "patch":
604 patches += 1
605
606 parts: list[str] = []
607 if inserts:
608 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
609 if deletes:
610 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
611 if replaces:
612 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
613 if patches:
614 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
615
616 return ", ".join(parts) if parts else "no changes"
617
618
619 def _hash_file(path: pathlib.Path) -> str:
620 """Return the SHA-256 hex digest of a file's raw bytes."""
621 h = hashlib.sha256()
622 with path.open("rb") as fh:
623 for chunk in iter(lambda: fh.read(65536), b""):
624 h.update(chunk)
625 return h.hexdigest()
626
627
628 def _changed_paths(
629 base: dict[str, str], other: dict[str, str]
630 ) -> set[str]:
631 """Return paths that differ between *base* and *other*."""
632 base_p = set(base)
633 other_p = set(other)
634 added = other_p - base_p
635 deleted = base_p - other_p
636 common = base_p & other_p
637 modified = {p for p in common if base[p] != other[p]}
638 return added | deleted | modified
639
640
641 def content_hash(snapshot: StateSnapshot) -> str:
642 """Return a stable SHA-256 digest of a snapshot for content-addressing."""
643 canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":"))
644 return hashlib.sha256(canonical.encode()).hexdigest()
645
646
647 #: Module-level singleton — import and use directly.
648 plugin = MusicPlugin()
649
650 assert isinstance(plugin, MuseDomainPlugin), (
651 "MusicPlugin does not satisfy the MuseDomainPlugin protocol"
652 )