cgcardona / muse public
plugin.py python
1140 lines 44.3 KB
9ee9c39c refactor: rename music→midi domain, strip all 5-dim backward compat Gabriel Cardona <gabriel@tellurstori.com> 1d ago
1 """MIDI domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the six Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same six interfaces.
9
10 Live State
11 ----------
12 For the MIDI domain, ``LiveState`` is either:
13
14 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "midi"
34 }
35
36 The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their
37 SHA-256 content digests.
38
39 Delta Format
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53 from __future__ import annotations
54
55 import hashlib
56 import json
57 import logging
58 import pathlib
59
60 from muse.core.schema import (
61 DimensionSpec,
62 DomainSchema,
63 SequenceSchema,
64 SetSchema,
65 TensorSchema,
66 TreeSchema,
67 )
68 from muse.domain import (
69 DeleteOp,
70 DomainOp,
71 DriftReport,
72 InsertOp,
73 LiveState,
74 MergeResult,
75 MuseDomainPlugin,
76 PatchOp,
77 ReplaceOp,
78 SnapshotManifest,
79 StateDelta,
80 StateSnapshot,
81 StructuredDelta,
82 StructuredMergePlugin,
83 )
84 from muse.plugins.midi.midi_diff import NoteKey
85
86 logger = logging.getLogger(__name__)
87
88 _DOMAIN_TAG = "midi"
89
90
91 class MidiPlugin:
92 """MIDI domain plugin for the Muse VCS.
93
94 Implements :class:`~muse.domain.MuseDomainPlugin` (six core interfaces)
95 and :class:`~muse.domain.StructuredMergePlugin` (operation-level
96 merge) for MIDI state stored as files in ``muse-work/``.
97
98 This is the reference implementation. Every other domain plugin implements
99 the same six core interfaces; the :class:`~muse.domain.StructuredMergePlugin`
100 extension is optional but strongly recommended for domains that produce
101 note-level (sub-file) diffs.
102 """
103
104 # ------------------------------------------------------------------
105 # 1. snapshot — capture live state as a content-addressed dict
106 # ------------------------------------------------------------------
107
108 def snapshot(self, live_state: LiveState) -> StateSnapshot:
109 """Capture the current ``muse-work/`` directory as a snapshot dict.
110
111 Args:
112 live_state: Either a ``pathlib.Path`` pointing to ``muse-work/``
113 or an existing snapshot dict (returned as-is).
114
115 Returns:
116 A JSON-serialisable ``{"files": {path: sha256}, "domain": "midi"}``
117 dict. The ``files`` mapping is the canonical snapshot manifest used
118 by the core VCS engine for commit / checkout / diff.
119
120 Ignore rules
121 ------------
122 When *live_state* is a ``pathlib.Path``, the plugin reads
123 ``.museignore`` from the repository root (the parent of ``muse-work/``)
124 and excludes any matching paths from the snapshot. Dotfiles are always
125 excluded regardless of ``.museignore``.
126 """
127 if isinstance(live_state, pathlib.Path):
128 from muse.core.ignore import is_ignored, load_patterns
129 workdir = live_state
130 repo_root = workdir.parent
131 patterns = load_patterns(repo_root)
132 files: dict[str, str] = {}
133 for file_path in sorted(workdir.rglob("*")):
134 if not file_path.is_file():
135 continue
136 if file_path.name.startswith("."):
137 continue
138 rel = file_path.relative_to(workdir).as_posix()
139 if is_ignored(rel, patterns):
140 continue
141 files[rel] = _hash_file(file_path)
142 return SnapshotManifest(files=files, domain=_DOMAIN_TAG)
143
144 return live_state
145
146 # ------------------------------------------------------------------
147 # 2. diff — compute the structured delta between two snapshots
148 # ------------------------------------------------------------------
149
150 def diff(
151 self,
152 base: StateSnapshot,
153 target: StateSnapshot,
154 *,
155 repo_root: pathlib.Path | None = None,
156 ) -> StateDelta:
157 """Compute a ``StructuredDelta`` between two music snapshots.
158
159 File additions and removals produce ``InsertOp`` and ``DeleteOp``
160 entries respectively. For modified files:
161
162 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
163 from the object store and produce a ``PatchOp`` with note-level
164 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
165 when the object store is unavailable or parsing fails.
166 - All other files: ``ReplaceOp`` with file-level content IDs.
167
168 Args:
169 base: The ancestor snapshot.
170 target: The later snapshot.
171 repo_root: Repository root directory. When provided, MIDI files are
172 loaded from ``.muse/objects/`` for note-level diffing.
173
174 Returns:
175 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
176 *target* and whose ``summary`` is human-readable.
177 """
178 base_files = base["files"]
179 target_files = target["files"]
180
181 base_paths = set(base_files)
182 target_paths = set(target_files)
183
184 ops: list[DomainOp] = []
185
186 # Added files → InsertOp
187 for path in sorted(target_paths - base_paths):
188 ops.append(
189 InsertOp(
190 op="insert",
191 address=path,
192 position=None,
193 content_id=target_files[path],
194 content_summary=f"new file: {path}",
195 )
196 )
197
198 # Removed files → DeleteOp
199 for path in sorted(base_paths - target_paths):
200 ops.append(
201 DeleteOp(
202 op="delete",
203 address=path,
204 position=None,
205 content_id=base_files[path],
206 content_summary=f"deleted: {path}",
207 )
208 )
209
210 # Modified files
211 for path in sorted(
212 p for p in base_paths & target_paths if base_files[p] != target_files[p]
213 ):
214 op = _diff_modified_file(
215 path=path,
216 old_hash=base_files[path],
217 new_hash=target_files[path],
218 repo_root=repo_root,
219 )
220 ops.append(op)
221
222 summary = _summarise_ops(ops)
223 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
224
225 # ------------------------------------------------------------------
226 # 3. merge — three-way reconciliation
227 # ------------------------------------------------------------------
228
229 def merge(
230 self,
231 base: StateSnapshot,
232 left: StateSnapshot,
233 right: StateSnapshot,
234 *,
235 repo_root: pathlib.Path | None = None,
236 ) -> MergeResult:
237 """Three-way merge two divergent music state lines against a common base.
238
239 A file is auto-merged when only one side changed it. When both sides
240 changed the same file, the merge proceeds in two stages:
241
242 1. **File-level strategy** — if ``.museattributes`` contains an
243 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
244 the rule is applied and the file is removed from the conflict list.
245
246 2. **Dimension-level merge** — for ``.mid`` files that survive the
247 file-level check, the MIDI event stream is split into orthogonal
248 dimension slices (notes/melodic/rhythmic, harmonic, dynamic, structural).
249 Each dimension is merged independently. Dimension-specific
250 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
251 Only dimensions where *both* sides changed AND no resolvable rule
252 exists cause a true file-level conflict.
253
254 3. **Manual override** — ``manual`` strategy in ``.museattributes``
255 forces a path into the conflict list even when the engine would
256 normally auto-resolve it.
257
258 Args:
259 base: Snapshot at the common ancestor commit.
260 left: Snapshot for the *ours* (current) branch. The distinction
261 between ``left`` and ``right`` only affects the ``applied_strategies``
262 key in the result; the merge is symmetric for clean paths.
263 right: Snapshot for the *theirs* (incoming) branch.
264 repo_root: Path to the repository root so ``.museattributes`` and the
265 object store can be located. ``None`` disables attribute
266 loading and MIDI reconstruction (all conflicts become hard).
267
268 Returns:
269 A :class:`~muse.domain.MergeResult` whose ``snapshot`` holds the
270 merged manifest (conflict paths absent), ``conflicts`` lists the
271 unresolvable paths, and ``applied_strategies`` records which
272 ``.museattributes`` rules were used.
273 """
274 import hashlib as _hashlib
275
276 from muse.core.attributes import load_attributes, resolve_strategy
277 from muse.core.object_store import read_object, write_object
278 from muse.plugins.midi.midi_merge import merge_midi_dimensions
279
280 base_files = base["files"]
281 left_files = left["files"]
282 right_files = right["files"]
283
284 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
285
286 left_changed: set[str] = _changed_paths(base_files, left_files)
287 right_changed: set[str] = _changed_paths(base_files, right_files)
288 all_conflict_paths: set[str] = left_changed & right_changed
289
290 merged: dict[str, str] = dict(base_files)
291
292 # Apply clean single-side changes first.
293 for path in left_changed - all_conflict_paths:
294 if path in left_files:
295 merged[path] = left_files[path]
296 else:
297 merged.pop(path, None)
298
299 for path in right_changed - all_conflict_paths:
300 if path in right_files:
301 merged[path] = right_files[path]
302 else:
303 merged.pop(path, None)
304
305 # Consensus deletions (both sides removed the same file) — not a conflict.
306 consensus_deleted = {
307 p for p in all_conflict_paths
308 if p not in left_files and p not in right_files
309 }
310 for path in consensus_deleted:
311 merged.pop(path, None)
312
313 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
314
315 applied_strategies: dict[str, str] = {}
316 dimension_reports: dict[str, dict[str, str]] = {}
317 final_conflicts: list[str] = []
318
319 for path in sorted(real_conflicts):
320 file_strategy = resolve_strategy(attrs, path, "*")
321
322 if file_strategy == "ours":
323 if path in left_files:
324 merged[path] = left_files[path]
325 else:
326 merged.pop(path, None)
327 applied_strategies[path] = "ours"
328 continue
329
330 if file_strategy == "theirs":
331 if path in right_files:
332 merged[path] = right_files[path]
333 else:
334 merged.pop(path, None)
335 applied_strategies[path] = "theirs"
336 continue
337
338 if (
339 repo_root is not None
340 and path.lower().endswith(".mid")
341 and path in left_files
342 and path in right_files
343 and path in base_files
344 ):
345 base_obj = read_object(repo_root, base_files[path])
346 left_obj = read_object(repo_root, left_files[path])
347 right_obj = read_object(repo_root, right_files[path])
348
349 if base_obj is not None and left_obj is not None and right_obj is not None:
350 try:
351 dim_result = merge_midi_dimensions(
352 base_obj, left_obj, right_obj,
353 attrs,
354 path,
355 )
356 except ValueError:
357 dim_result = None
358
359 if dim_result is not None:
360 merged_bytes, dim_report = dim_result
361 new_hash = _hashlib.sha256(merged_bytes).hexdigest()
362 write_object(repo_root, new_hash, merged_bytes)
363 merged[path] = new_hash
364 applied_strategies[path] = "dimension-merge"
365 dimension_reports[path] = dim_report
366 continue
367
368 final_conflicts.append(path)
369
370 for path in sorted((left_changed | right_changed) - real_conflicts):
371 if path in consensus_deleted:
372 continue
373 if resolve_strategy(attrs, path, "*") == "manual":
374 final_conflicts.append(path)
375 applied_strategies[path] = "manual"
376 if path in base_files:
377 merged[path] = base_files[path]
378 else:
379 merged.pop(path, None)
380
381 return MergeResult(
382 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG),
383 conflicts=sorted(final_conflicts),
384 applied_strategies=applied_strategies,
385 dimension_reports=dimension_reports,
386 )
387
388 # ------------------------------------------------------------------
389 # 4. drift — compare committed state vs live state
390 # ------------------------------------------------------------------
391
392 def drift(
393 self,
394 committed: StateSnapshot,
395 live: LiveState,
396 ) -> DriftReport:
397 """Detect uncommitted changes in ``muse-work/`` relative to *committed*.
398
399 Args:
400 committed: The last committed snapshot.
401 live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot
402 dict representing current live state.
403
404 Returns:
405 A :class:`~muse.domain.DriftReport` describing whether and how the
406 live state differs from the committed snapshot.
407 """
408 live_snapshot = self.snapshot(live)
409 delta = self.diff(committed, live_snapshot)
410
411 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
412 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
413 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
414 has_drift = bool(inserts or deletes or modified)
415
416 parts: list[str] = []
417 if inserts:
418 parts.append(f"{inserts} added")
419 if deletes:
420 parts.append(f"{deletes} removed")
421 if modified:
422 parts.append(f"{modified} modified")
423
424 summary = ", ".join(parts) if parts else "working tree clean"
425 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
426
427 # ------------------------------------------------------------------
428 # 5. apply — execute a delta against live state (checkout)
429 # ------------------------------------------------------------------
430
431 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
432 """Apply a structured delta to produce a new live state.
433
434 When ``live_state`` is a ``pathlib.Path`` the physical files have
435 already been updated by the caller (``muse checkout`` restores objects
436 from the store before calling this). Rescanning the directory is the
437 cheapest correct way to reflect the new state.
438
439 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
440 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
441 at the file level requires the new content to be on disk; callers that
442 need those should pass the workdir ``pathlib.Path`` instead.
443 ``PatchOp`` entries are skipped in-memory since reconstructing patched
444 file content requires both the original bytes and the object store.
445
446 Args:
447 delta: A ``StructuredDelta`` produced by :meth:`diff`.
448 live_state: The workdir path (preferred) or a snapshot dict.
449
450 Returns:
451 The updated live state as a ``SnapshotManifest``.
452 """
453 if isinstance(live_state, pathlib.Path):
454 return self.snapshot(live_state)
455
456 current_files = dict(live_state["files"])
457
458 for op in delta["ops"]:
459 if op["op"] == "delete":
460 current_files.pop(op["address"], None)
461 elif op["op"] == "replace":
462 current_files[op["address"]] = op["new_content_id"]
463 elif op["op"] == "insert":
464 current_files[op["address"]] = op["content_id"]
465 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
466
467 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG)
468
469 # ------------------------------------------------------------------
470 # 6. schema — declare structural schema for the algorithm library
471 # ------------------------------------------------------------------
472
473 def schema(self) -> DomainSchema:
474 """Return the full structural schema for the MIDI domain.
475
476 Declares 21 semantic dimensions — one per independent MIDI event class
477 — that the core diff algorithm library and OT merge engine use to drive
478 per-dimension operations. This is a significant expansion from the
479 original 5 dimensions; the finer granularity means two agents can edit
480 completely different aspects of the same MIDI file (e.g. sustain pedal
481 and channel volume) without ever creating a merge conflict.
482
483 Top level is a ``SetSchema``: the music workspace is an unordered
484 collection of audio/MIDI files, each identified by its SHA-256 content
485 hash.
486
487 Independent dimensions (conflicts do not block merging others):
488 - **notes** (melodic/rhythmic) — note_on / note_off events
489 - **pitch_bend** — pitchwheel controller
490 - **channel_pressure** — monophonic aftertouch
491 - **poly_pressure** — per-note polyphonic aftertouch
492 - **cc_modulation** — CC 1 modulation wheel
493 - **cc_volume** — CC 7 channel volume
494 - **cc_pan** — CC 10 stereo pan
495 - **cc_expression** — CC 11 expression controller
496 - **cc_sustain** — CC 64 damper / sustain pedal
497 - **cc_portamento** — CC 65 portamento on/off
498 - **cc_sostenuto** — CC 66 sostenuto pedal
499 - **cc_soft_pedal** — CC 67 soft pedal (una corda)
500 - **cc_reverb** — CC 91 reverb send level
501 - **cc_chorus** — CC 93 chorus send level
502 - **cc_other** — all other numbered CC controllers
503 - **program_change** — instrument / patch selection
504 - **key_signatures** — key signature meta events
505 - **markers** — section markers, cue points, text annotations
506
507 Non-independent dimensions (conflicts block all others):
508 - **tempo_map** — set_tempo meta events; tempo changes shift the
509 musical meaning of every subsequent tick position, so a bilateral
510 tempo conflict requires human resolution before other dimensions
511 can be finalised.
512 - **time_signatures** — time_signature meta events; bar structure
513 changes have the same semantic blocking effect as tempo changes.
514 - **track_structure** — track name, instrument name, sysex, and
515 unknown meta events affecting routing and session layout.
516 """
517 seq_schema = SequenceSchema(
518 kind="sequence",
519 element_type="note_event",
520 identity="by_position",
521 diff_algorithm="lcs",
522 alphabet=None,
523 )
524 cc_schema = TensorSchema(
525 kind="tensor",
526 dtype="float32",
527 rank=1,
528 epsilon=0.5,
529 diff_mode="sparse",
530 )
531 tree_schema = TreeSchema(
532 kind="tree",
533 node_type="track_node",
534 diff_algorithm="zhang_shasha",
535 )
536 meta_schema = SequenceSchema(
537 kind="sequence",
538 element_type="meta_event",
539 identity="by_position",
540 diff_algorithm="lcs",
541 alphabet=None,
542 )
543 return DomainSchema(
544 domain=_DOMAIN_TAG,
545 description=(
546 "MIDI and audio file versioning with note-level diff and "
547 "21-dimension independent merge"
548 ),
549 top_level=SetSchema(
550 kind="set",
551 element_type="audio_file",
552 identity="by_content",
553 ),
554 dimensions=[
555 # --- Expressive note content ---
556 DimensionSpec(
557 name="notes",
558 description="Note pitches, durations, and timing (melodic + rhythmic)",
559 schema=seq_schema,
560 independent_merge=True,
561 ),
562 DimensionSpec(
563 name="pitch_bend",
564 description="Pitchwheel controller — expressive pitch deviation",
565 schema=cc_schema,
566 independent_merge=True,
567 ),
568 DimensionSpec(
569 name="channel_pressure",
570 description="Monophonic aftertouch — channel-wide pressure",
571 schema=cc_schema,
572 independent_merge=True,
573 ),
574 DimensionSpec(
575 name="poly_pressure",
576 description="Polyphonic aftertouch — per-note pressure",
577 schema=cc_schema,
578 independent_merge=True,
579 ),
580 # --- Named CC controllers ---
581 DimensionSpec(
582 name="cc_modulation",
583 description="CC 1 — modulation wheel",
584 schema=cc_schema,
585 independent_merge=True,
586 ),
587 DimensionSpec(
588 name="cc_volume",
589 description="CC 7 — channel volume",
590 schema=cc_schema,
591 independent_merge=True,
592 ),
593 DimensionSpec(
594 name="cc_pan",
595 description="CC 10 — stereo pan position",
596 schema=cc_schema,
597 independent_merge=True,
598 ),
599 DimensionSpec(
600 name="cc_expression",
601 description="CC 11 — expression controller",
602 schema=cc_schema,
603 independent_merge=True,
604 ),
605 DimensionSpec(
606 name="cc_sustain",
607 description="CC 64 — damper / sustain pedal",
608 schema=cc_schema,
609 independent_merge=True,
610 ),
611 DimensionSpec(
612 name="cc_portamento",
613 description="CC 65 — portamento on/off",
614 schema=cc_schema,
615 independent_merge=True,
616 ),
617 DimensionSpec(
618 name="cc_sostenuto",
619 description="CC 66 — sostenuto pedal",
620 schema=cc_schema,
621 independent_merge=True,
622 ),
623 DimensionSpec(
624 name="cc_soft_pedal",
625 description="CC 67 — soft pedal (una corda)",
626 schema=cc_schema,
627 independent_merge=True,
628 ),
629 DimensionSpec(
630 name="cc_reverb",
631 description="CC 91 — reverb send level",
632 schema=cc_schema,
633 independent_merge=True,
634 ),
635 DimensionSpec(
636 name="cc_chorus",
637 description="CC 93 — chorus send level",
638 schema=cc_schema,
639 independent_merge=True,
640 ),
641 DimensionSpec(
642 name="cc_other",
643 description="All other numbered CC controllers",
644 schema=cc_schema,
645 independent_merge=True,
646 ),
647 # --- Patch / program selection ---
648 DimensionSpec(
649 name="program_change",
650 description="Instrument / patch selection events",
651 schema=meta_schema,
652 independent_merge=True,
653 ),
654 # --- Non-independent timeline metadata ---
655 DimensionSpec(
656 name="tempo_map",
657 description=(
658 "Tempo (BPM) changes — non-independent: a conflict "
659 "blocks merging all other dimensions"
660 ),
661 schema=meta_schema,
662 independent_merge=False,
663 ),
664 DimensionSpec(
665 name="time_signatures",
666 description=(
667 "Time signature changes — non-independent: affects "
668 "bar structure for all other dimensions"
669 ),
670 schema=meta_schema,
671 independent_merge=False,
672 ),
673 # --- Tonal and annotation metadata ---
674 DimensionSpec(
675 name="key_signatures",
676 description="Key signature events",
677 schema=meta_schema,
678 independent_merge=True,
679 ),
680 DimensionSpec(
681 name="markers",
682 description="Section markers, cue points, text, lyrics, copyright",
683 schema=meta_schema,
684 independent_merge=True,
685 ),
686 # --- Track structure (non-independent) ---
687 DimensionSpec(
688 name="track_structure",
689 description=(
690 "Track name, instrument name, sysex, unknown meta — "
691 "non-independent: routing changes affect all tracks"
692 ),
693 schema=tree_schema,
694 independent_merge=False,
695 ),
696 ],
697 merge_mode="three_way",
698 schema_version=1,
699 )
700
701 # ------------------------------------------------------------------
702 # 7. merge_ops — operation-level OT merge (StructuredMergePlugin)
703 # ------------------------------------------------------------------
704
705 def merge_ops(
706 self,
707 base: StateSnapshot,
708 ours_snap: StateSnapshot,
709 theirs_snap: StateSnapshot,
710 ours_ops: list[DomainOp],
711 theirs_ops: list[DomainOp],
712 *,
713 repo_root: pathlib.Path | None = None,
714 ) -> MergeResult:
715 """Operation-level three-way merge using the OT engine.
716
717 Extends the file-level ``merge()`` method with sub-file granularity: two
718 changes to non-overlapping notes in the same MIDI file no longer produce
719 a conflict.
720
721 Algorithm
722 ---------
723 1. Run :func:`~muse.core.op_transform.merge_op_lists` on the flat op
724 lists to classify each (ours, theirs) pair as commuting or
725 conflicting.
726 2. Build the merged manifest from *base* by applying all clean merged
727 ops. ``InsertOp`` and ``ReplaceOp`` entries supply a ``content_id``
728 / ``new_content_id`` directly. For ``PatchOp`` entries (sub-file
729 note changes), the final file hash is looked up from *ours_snap* or
730 *theirs_snap*. When both sides produced a ``PatchOp`` for the same
731 MIDI file and the note-level ops commute, an attempt is made to
732 reconstruct the merged MIDI bytes; on failure the file falls back to
733 a conflict.
734 3. For conflicting pairs, consult ``.museattributes``. Strategies
735 ``"ours"`` and ``"theirs"`` are applied automatically; everything
736 else enters ``MergeResult.conflicts``.
737
738 Args:
739 base: Common ancestor snapshot.
740 ours_snap: Final snapshot of our branch.
741 theirs_snap: Final snapshot of their branch.
742 ours_ops: Operations from our branch delta (base → ours).
743 theirs_ops: Operations from their branch delta (base → theirs).
744 repo_root: Repository root for object store and attributes.
745
746 Returns:
747 A :class:`~muse.domain.MergeResult` with the reconciled snapshot
748 and any remaining unresolvable conflicts.
749 """
750 from muse.core.attributes import load_attributes, resolve_strategy
751 from muse.core.op_transform import merge_op_lists
752
753 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
754
755 # OT classification: find commuting and conflicting op pairs.
756 ot_result = merge_op_lists([], ours_ops, theirs_ops)
757
758 # Build the merged manifest starting from base.
759 merged_files: dict[str, str] = dict(base["files"])
760 applied_strategies: dict[str, str] = {}
761 final_conflicts: list[str] = []
762 op_log: list[DomainOp] = list(ot_result.merged_ops)
763
764 # Group PatchOps by address so we can detect same-file note merges.
765 ours_patches: dict[str, PatchOp] = {}
766 theirs_patches: dict[str, PatchOp] = {}
767 for op in ours_ops:
768 if op["op"] == "patch":
769 ours_patches[op["address"]] = op
770 for op in theirs_ops:
771 if op["op"] == "patch":
772 theirs_patches[op["address"]] = op
773
774 # Track which addresses are involved in a conflict.
775 conflicting_addresses: set[str] = {
776 our_op["address"] for our_op, _ in ot_result.conflict_ops
777 }
778
779 # --- Apply clean merged ops ---
780 for op in ot_result.merged_ops:
781 addr = op["address"]
782 if addr in conflicting_addresses:
783 continue # handled in conflict resolution below
784
785 if op["op"] == "insert":
786 merged_files[addr] = op["content_id"]
787
788 elif op["op"] == "delete":
789 merged_files.pop(addr, None)
790
791 elif op["op"] == "replace":
792 merged_files[addr] = op["new_content_id"]
793
794 elif op["op"] == "patch":
795 # PatchOp: determine which side(s) patched this file.
796 has_ours = addr in ours_patches
797 has_theirs = addr in theirs_patches
798
799 if has_ours and not has_theirs:
800 # Only our side changed this file — take our version.
801 if addr in ours_snap["files"]:
802 merged_files[addr] = ours_snap["files"][addr]
803 else:
804 merged_files.pop(addr, None)
805
806 elif has_theirs and not has_ours:
807 # Only their side changed this file — take their version.
808 if addr in theirs_snap["files"]:
809 merged_files[addr] = theirs_snap["files"][addr]
810 else:
811 merged_files.pop(addr, None)
812
813 else:
814 # Both sides patched the same file with commuting note ops.
815 # Attempt note-level MIDI reconstruction.
816 merged_content_id = _merge_patch_ops(
817 addr=addr,
818 ours_patch=ours_patches[addr],
819 theirs_patch=theirs_patches[addr],
820 base_files=dict(base["files"]),
821 ours_snap_files=dict(ours_snap["files"]),
822 theirs_snap_files=dict(theirs_snap["files"]),
823 repo_root=repo_root,
824 )
825 if merged_content_id is not None:
826 merged_files[addr] = merged_content_id
827 else:
828 # Reconstruction failed — treat as manual conflict.
829 final_conflicts.append(addr)
830
831 # --- Resolve conflicts ---
832 for our_op, their_op in ot_result.conflict_ops:
833 addr = our_op["address"]
834 strategy = resolve_strategy(attrs, addr, "*")
835
836 if strategy == "ours":
837 if addr in ours_snap["files"]:
838 merged_files[addr] = ours_snap["files"][addr]
839 else:
840 merged_files.pop(addr, None)
841 applied_strategies[addr] = "ours"
842
843 elif strategy == "theirs":
844 if addr in theirs_snap["files"]:
845 merged_files[addr] = theirs_snap["files"][addr]
846 else:
847 merged_files.pop(addr, None)
848 applied_strategies[addr] = "theirs"
849
850 else:
851 # Strategy "manual" or "auto" without a clear resolution.
852 final_conflicts.append(addr)
853
854 return MergeResult(
855 merged=SnapshotManifest(files=merged_files, domain=_DOMAIN_TAG),
856 conflicts=sorted(set(final_conflicts)),
857 applied_strategies=applied_strategies,
858 op_log=op_log,
859 )
860
861
862 # ---------------------------------------------------------------------------
863 # Module-level helpers
864 # ---------------------------------------------------------------------------
865
866
867 def _merge_patch_ops(
868 *,
869 addr: str,
870 ours_patch: PatchOp,
871 theirs_patch: PatchOp,
872 base_files: dict[str, str],
873 ours_snap_files: dict[str, str],
874 theirs_snap_files: dict[str, str],
875 repo_root: pathlib.Path | None,
876 ) -> str | None:
877 """Attempt note-level MIDI merge for two ``PatchOp``\\s on the same file.
878
879 Runs OT on the child_ops of each PatchOp. If the note-level ops all
880 commute, reconstructs the merged MIDI by:
881
882 1. Loading base, ours, and theirs MIDI bytes from the object store.
883 2. Extracting note sequences from all three versions.
884 3. Building ``content_id → NoteKey`` look-ups for the ours and theirs
885 sequences (so that InsertOp content IDs can be resolved to real notes).
886 4. Applying the merged note ops (deletions then insertions) to the base
887 note sequence.
888 5. Calling :func:`~muse.plugins.midi.midi_diff.reconstruct_midi` and
889 storing the resulting bytes.
890
891 Returns the SHA-256 hash of the reconstructed MIDI (ready to store in the
892 object store) on success, or ``None`` when:
893
894 - *repo_root* is ``None`` (cannot access object store).
895 - Base or branch bytes are not in the local object store.
896 - Note-level OT found conflicts.
897 - MIDI reconstruction raised any exception.
898
899 Args:
900 addr: Workspace-relative MIDI file path.
901 ours_patch: Our PatchOp for this file.
902 theirs_patch: Their PatchOp for this file.
903 base_files: Content-ID map for the common ancestor snapshot.
904 ours_snap_files: Content-ID map for our branch's final snapshot.
905 theirs_snap_files: Content-ID map for their branch's final snapshot.
906 repo_root: Repository root for object store access.
907
908 Returns:
909 Content-ID (SHA-256 hex) of the merged MIDI, or ``None`` on failure.
910 """
911 if repo_root is None or addr not in base_files:
912 return None
913
914 from muse.core.object_store import read_object, write_object
915 from muse.core.op_transform import merge_op_lists
916 from muse.plugins.midi.midi_diff import NoteKey, extract_notes, reconstruct_midi
917
918 # Run OT on note-level ops to classify conflicts.
919 note_result = merge_op_lists([], ours_patch["child_ops"], theirs_patch["child_ops"])
920 if not note_result.is_clean:
921 logger.debug(
922 "⚠️ Note-level conflict in %r: %d pair(s) — falling back to file conflict",
923 addr,
924 len(note_result.conflict_ops),
925 )
926 return None
927
928 try:
929 base_bytes = read_object(repo_root, base_files[addr])
930 if base_bytes is None:
931 return None
932
933 ours_hash = ours_snap_files.get(addr)
934 theirs_hash = theirs_snap_files.get(addr)
935 ours_bytes = read_object(repo_root, ours_hash) if ours_hash else None
936 theirs_bytes = read_object(repo_root, theirs_hash) if theirs_hash else None
937
938 base_notes, ticks_per_beat = extract_notes(base_bytes)
939
940 # Build content_id → NoteKey lookups from ours and theirs versions.
941 ours_by_id: dict[str, NoteKey] = {}
942 if ours_bytes is not None:
943 ours_notes, _ = extract_notes(ours_bytes)
944 ours_by_id = {_note_content_id(n): n for n in ours_notes}
945
946 theirs_by_id: dict[str, NoteKey] = {}
947 if theirs_bytes is not None:
948 theirs_notes, _ = extract_notes(theirs_bytes)
949 theirs_by_id = {_note_content_id(n): n for n in theirs_notes}
950
951 # Collect content IDs to delete.
952 delete_ids: set[str] = {
953 op["content_id"] for op in note_result.merged_ops if op["op"] == "delete"
954 }
955
956 # Apply deletions to base note list.
957 base_note_by_id = {_note_content_id(n): n for n in base_notes}
958 surviving: list[NoteKey] = [
959 n for n in base_notes if _note_content_id(n) not in delete_ids
960 ]
961
962 # Collect insertions: resolve content_id → NoteKey via ours then theirs.
963 inserted: list[NoteKey] = []
964 for op in note_result.merged_ops:
965 if op["op"] == "insert":
966 cid = op["content_id"]
967 note = ours_by_id.get(cid) or theirs_by_id.get(cid)
968 if note is None:
969 # Fallback: base itself shouldn't have it, but check anyway.
970 note = base_note_by_id.get(cid)
971 if note is None:
972 logger.debug(
973 "⚠️ Cannot resolve note content_id %s for %r — skipping",
974 cid[:12],
975 addr,
976 )
977 continue
978 inserted.append(note)
979
980 merged_notes = surviving + inserted
981 merged_bytes = reconstruct_midi(merged_notes, ticks_per_beat=ticks_per_beat)
982
983 merged_hash = hashlib.sha256(merged_bytes).hexdigest()
984 write_object(repo_root, merged_hash, merged_bytes)
985
986 logger.info(
987 "✅ Note-level MIDI merge for %r: %d ops clean, %d notes in result",
988 addr,
989 len(note_result.merged_ops),
990 len(merged_notes),
991 )
992 return merged_hash
993
994 except Exception as exc: # noqa: BLE001 intentional broad catch
995 logger.debug("⚠️ MIDI note-level reconstruction failed for %r: %s", addr, exc)
996 return None
997
998
999 def _note_content_id(note: NoteKey) -> str:
1000 """Return the SHA-256 content ID for a :class:`~muse.plugins.midi.midi_diff.NoteKey`.
1001
1002 Delegates to the same algorithm used in :mod:`muse.plugins.midi.midi_diff`
1003 so that content IDs computed here are identical to those stored in
1004 ``InsertOp`` / ``DeleteOp`` entries.
1005 """
1006 payload = (
1007 f"{note['pitch']}:{note['velocity']}:"
1008 f"{note['start_tick']}:{note['duration_ticks']}:{note['channel']}"
1009 )
1010 return hashlib.sha256(payload.encode()).hexdigest()
1011
1012
1013 def _diff_modified_file(
1014 *,
1015 path: str,
1016 old_hash: str,
1017 new_hash: str,
1018 repo_root: pathlib.Path | None,
1019 ) -> DomainOp:
1020 """Produce the richest available operation for a modified file.
1021
1022 For ``.mid`` files where both content revisions are readable from the
1023 object store, performs a full note-level MIDI diff and returns a
1024 ``PatchOp`` carrying the individual ``InsertOp``/``DeleteOp`` child
1025 operations. Falls back to a ``ReplaceOp`` (opaque before/after hash
1026 pair) when the file is not a MIDI file, ``repo_root`` is ``None``, or
1027 either content revision cannot be retrieved from the store.
1028
1029 Args:
1030 path: Workspace-relative POSIX path of the modified file.
1031 old_hash: SHA-256 of the base content in the object store.
1032 new_hash: SHA-256 of the current content in the object store.
1033 repo_root: Repository root for object store access. ``None`` forces
1034 immediate fallback to ``ReplaceOp``.
1035
1036 Returns:
1037 A ``PatchOp`` with note-level child ops when deep diff succeeds,
1038 otherwise a ``ReplaceOp`` with the opaque before/after content hashes.
1039 """
1040 if path.lower().endswith(".mid") and repo_root is not None:
1041 from muse.core.object_store import read_object
1042 from muse.plugins.midi.midi_diff import diff_midi_notes
1043
1044 base_bytes = read_object(repo_root, old_hash)
1045 target_bytes = read_object(repo_root, new_hash)
1046
1047 if base_bytes is not None and target_bytes is not None:
1048 try:
1049 child_delta = diff_midi_notes(
1050 base_bytes, target_bytes, file_path=path
1051 )
1052 return PatchOp(
1053 op="patch",
1054 address=path,
1055 child_ops=child_delta["ops"],
1056 child_domain=child_delta["domain"],
1057 child_summary=child_delta["summary"],
1058 )
1059 except (ValueError, Exception) as exc:
1060 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
1061
1062 return ReplaceOp(
1063 op="replace",
1064 address=path,
1065 position=None,
1066 old_content_id=old_hash,
1067 new_content_id=new_hash,
1068 old_summary=f"{path} (previous)",
1069 new_summary=f"{path} (updated)",
1070 )
1071
1072
1073 def _summarise_ops(ops: list[DomainOp]) -> str:
1074 """Build a human-readable summary string from a list of domain ops."""
1075 inserts = 0
1076 deletes = 0
1077 replaces = 0
1078 patches = 0
1079
1080 for op in ops:
1081 kind = op["op"]
1082 if kind == "insert":
1083 inserts += 1
1084 elif kind == "delete":
1085 deletes += 1
1086 elif kind == "replace":
1087 replaces += 1
1088 elif kind == "patch":
1089 patches += 1
1090
1091 parts: list[str] = []
1092 if inserts:
1093 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
1094 if deletes:
1095 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
1096 if replaces:
1097 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
1098 if patches:
1099 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
1100
1101 return ", ".join(parts) if parts else "no changes"
1102
1103
1104 def _hash_file(path: pathlib.Path) -> str:
1105 """Return the SHA-256 hex digest of a file's raw bytes."""
1106 h = hashlib.sha256()
1107 with path.open("rb") as fh:
1108 for chunk in iter(lambda: fh.read(65536), b""):
1109 h.update(chunk)
1110 return h.hexdigest()
1111
1112
1113 def _changed_paths(
1114 base: dict[str, str], other: dict[str, str]
1115 ) -> set[str]:
1116 """Return paths that differ between *base* and *other*."""
1117 base_p = set(base)
1118 other_p = set(other)
1119 added = other_p - base_p
1120 deleted = base_p - other_p
1121 common = base_p & other_p
1122 modified = {p for p in common if base[p] != other[p]}
1123 return added | deleted | modified
1124
1125
1126 def content_hash(snapshot: StateSnapshot) -> str:
1127 """Return a stable SHA-256 digest of a snapshot for content-addressing."""
1128 canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":"))
1129 return hashlib.sha256(canonical.encode()).hexdigest()
1130
1131
1132 #: Module-level singleton — import and use directly.
1133 plugin = MidiPlugin()
1134
1135 assert isinstance(plugin, MuseDomainPlugin), (
1136 "MidiPlugin does not satisfy the MuseDomainPlugin protocol"
1137 )
1138 assert isinstance(plugin, StructuredMergePlugin), (
1139 "MidiPlugin does not satisfy the StructuredMergePlugin protocol"
1140 )