cgcardona / muse public
export_engine.py python
703 lines 23.0 KB
d87ef453 Introduce Muse v2 architecture: domain-agnostic VCS with plugin interface Gabriel Cardona <gabriel@tellurstori.com> 3d ago
1 """Muse CLI export engine — format-specific export logic.
2
3 Converts a Muse snapshot manifest into external file formats:
4
5 - ``midi`` — copy raw MIDI files from the snapshot (native format).
6 - ``json`` — structured JSON note representation for AI/tooling.
7 - ``musicxml`` — MusicXML for notation software (MuseScore, Sibelius, etc.).
8 - ``abc`` — ABC notation text for folk/traditional music.
9 - ``wav`` — render audio via Storpheus (requires Storpheus reachable).
10
11 All format handlers accept the same inputs (manifest, root, options) and
12 return a MuseExportResult describing what was written. The WAV handler
13 raises StorpheusUnavailableError when the service cannot be reached so the
14 CLI can surface a human-readable error.
15
16 Design note: export is a read-only Muse operation — no commit is created,
17 no DB writes occur. The same commit + format always produces identical
18 output (deterministic).
19 """
20 from __future__ import annotations
21
22 import json
23 import logging
24 import pathlib
25 import shutil
26 from dataclasses import dataclass, field
27 from enum import Enum
28 from typing import Optional
29
30 import httpx
31
32 logger = logging.getLogger(__name__)
33
34 # ---------------------------------------------------------------------------
35 # Public types
36 # ---------------------------------------------------------------------------
37
38
39 class ExportFormat(str, Enum):
40 """Supported export format identifiers."""
41
42 MIDI = "midi"
43 JSON = "json"
44 MUSICXML = "musicxml"
45 ABC = "abc"
46 WAV = "wav"
47
48
49 @dataclass(frozen=True)
50 class MuseExportOptions:
51 """Options controlling a single export operation.
52
53 Attributes:
54 format: Target export format.
55 commit_id: Full commit ID being exported (used in output metadata).
56 output_path: Destination file or directory path.
57 track: Optional track name filter (case-insensitive substring match).
58 section: Optional section name filter (case-insensitive substring match).
59 split_tracks: When True (MIDI only), write one file per track.
60 """
61
62 format: ExportFormat
63 commit_id: str
64 output_path: pathlib.Path
65 track: Optional[str] = None
66 section: Optional[str] = None
67 split_tracks: bool = False
68
69
70 @dataclass
71 class MuseExportResult:
72 """Result of a completed export operation.
73
74 Attributes:
75 paths_written: Absolute paths of all files written during export.
76 format: The format that was exported.
77 commit_id: Source commit ID.
78 skipped_count: Number of manifest entries skipped (wrong type/filter).
79 """
80
81 paths_written: list[pathlib.Path] = field(default_factory=list)
82 format: ExportFormat = ExportFormat.MIDI
83 commit_id: str = ""
84 skipped_count: int = 0
85
86
87 class StorpheusUnavailableError(Exception):
88 """Raised when WAV export is requested but Storpheus is not reachable.
89
90 Callers should catch this and surface a human-readable message rather
91 than letting it propagate as an unhandled exception.
92 """
93
94
95 # ---------------------------------------------------------------------------
96 # Manifest filtering
97 # ---------------------------------------------------------------------------
98
99 #: File extensions treated as MIDI files.
100 _MIDI_SUFFIXES: frozenset[str] = frozenset({".mid", ".midi"})
101
102
103 def filter_manifest(
104 manifest: dict[str, str],
105 *,
106 track: Optional[str],
107 section: Optional[str],
108 ) -> dict[str, str]:
109 """Return a filtered copy of *manifest* matching the given criteria.
110
111 Both *track* and *section* are case-insensitive substring matches
112 against the full path string. Only entries matching ALL provided
113 filters are kept. When both are ``None`` the full manifest is returned.
114
115 Args:
116 manifest: ``{rel_path: object_id}`` from MuseCliSnapshot.
117 track: Track name substring filter (e.g. ``"piano"``).
118 section: Section name substring filter (e.g. ``"chorus"``).
119
120 Returns:
121 Filtered manifest dict with the same ``{rel_path: object_id}`` shape.
122 """
123 if track is None and section is None:
124 return dict(manifest)
125
126 result: dict[str, str] = {}
127 for rel_path, object_id in manifest.items():
128 path_lower = rel_path.lower()
129 if track is not None and track.lower() not in path_lower:
130 continue
131 if section is not None and section.lower() not in path_lower:
132 continue
133 result[rel_path] = object_id
134
135 return result
136
137
138 # ---------------------------------------------------------------------------
139 # Format handlers
140 # ---------------------------------------------------------------------------
141
142
143 def export_midi(
144 manifest: dict[str, str],
145 root: pathlib.Path,
146 opts: MuseExportOptions,
147 ) -> MuseExportResult:
148 """Copy MIDI files from the snapshot to opts.output_path.
149
150 For a single-file export (split_tracks not set and only one MIDI
151 file found) the output is written directly to opts.output_path.
152
153 When split_tracks is set (or when multiple MIDI files are found),
154 opts.output_path is treated as a directory and one <stem>.mid
155 file is written per track.
156
157 Args:
158 manifest: Filtered snapshot manifest.
159 root: Muse repository root.
160 opts: Export options including output path and flags.
161
162 Returns:
163 MuseExportResult listing written paths.
164 """
165 result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)
166 workdir = root / "muse-work"
167
168 midi_entries: list[tuple[str, pathlib.Path]] = []
169 for rel_path, _ in sorted(manifest.items()):
170 src = workdir / rel_path
171 suffix = pathlib.PurePosixPath(rel_path).suffix.lower()
172 if suffix not in _MIDI_SUFFIXES:
173 result.skipped_count += 1
174 logger.debug("export midi: skipping non-MIDI file %s", rel_path)
175 continue
176 if not src.exists():
177 result.skipped_count += 1
178 logger.warning("export midi: source file missing: %s", src)
179 continue
180 midi_entries.append((rel_path, src))
181
182 if not midi_entries:
183 return result
184
185 if len(midi_entries) == 1 and not opts.split_tracks:
186 opts.output_path.parent.mkdir(parents=True, exist_ok=True)
187 shutil.copy2(midi_entries[0][1], opts.output_path)
188 result.paths_written.append(opts.output_path)
189 logger.info("export midi: wrote %s", opts.output_path)
190 else:
191 opts.output_path.mkdir(parents=True, exist_ok=True)
192 for rel_path, src in midi_entries:
193 stem = pathlib.PurePosixPath(rel_path).stem
194 dst = opts.output_path / f"{stem}.mid"
195 shutil.copy2(src, dst)
196 result.paths_written.append(dst)
197 logger.info("export midi: wrote %s", dst)
198
199 return result
200
201
202 def export_json(
203 manifest: dict[str, str],
204 root: pathlib.Path,
205 opts: MuseExportOptions,
206 ) -> MuseExportResult:
207 """Export the snapshot as structured JSON.
208
209 The output JSON has the shape::
210
211 {
212 "commit_id": "<full commit hash>",
213 "exported_at": "<ISO-8601 timestamp>",
214 "files": [
215 {
216 "path": "<rel_path>",
217 "object_id": "<sha256>",
218 "size_bytes": <int>,
219 "exists_in_workdir": <bool>
220 },
221 ...
222 ]
223 }
224
225 This format is intended for AI model consumption and downstream tooling
226 that needs a machine-readable index of the snapshot.
227
228 Args:
229 manifest: Filtered snapshot manifest.
230 root: Muse repository root.
231 opts: Export options including output path.
232
233 Returns:
234 MuseExportResult listing written paths.
235 """
236 import datetime
237
238 result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)
239 workdir = root / "muse-work"
240
241 files_list: list[dict[str, object]] = []
242 for rel_path, object_id in sorted(manifest.items()):
243 src = workdir / rel_path
244 entry: dict[str, object] = {
245 "path": rel_path,
246 "object_id": object_id,
247 "size_bytes": src.stat().st_size if src.exists() else None,
248 "exists_in_workdir": src.exists(),
249 }
250 files_list.append(entry)
251
252 payload: dict[str, object] = {
253 "commit_id": opts.commit_id,
254 "exported_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
255 "files": files_list,
256 }
257
258 opts.output_path.parent.mkdir(parents=True, exist_ok=True)
259 opts.output_path.write_text(json.dumps(payload, indent=2))
260 result.paths_written.append(opts.output_path)
261 logger.info("export json: wrote %s", opts.output_path)
262 return result
263
264
265 def export_musicxml(
266 manifest: dict[str, str],
267 root: pathlib.Path,
268 opts: MuseExportOptions,
269 ) -> MuseExportResult:
270 """Export MIDI files in the snapshot as MusicXML.
271
272 Converts each MIDI file using a minimal MIDI-to-MusicXML transcription:
273 reads Note On/Off events via mido and emits a well-formed MusicXML
274 document with one <part> per MIDI channel.
275
276 The conversion is intentionally lossy (MIDI lacks notation semantics):
277 durations are quantised to the nearest sixteenth note and pitch spelling
278 defaults to sharps.
279
280 Args:
281 manifest: Filtered snapshot manifest.
282 root: Muse repository root.
283 opts: Export options including output path.
284
285 Returns:
286 MuseExportResult listing written paths.
287 """
288 result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)
289 workdir = root / "muse-work"
290
291 midi_entries: list[tuple[str, pathlib.Path]] = []
292 for rel_path, _ in sorted(manifest.items()):
293 suffix = pathlib.PurePosixPath(rel_path).suffix.lower()
294 if suffix not in _MIDI_SUFFIXES:
295 result.skipped_count += 1
296 continue
297 src = workdir / rel_path
298 if not src.exists():
299 result.skipped_count += 1
300 continue
301 midi_entries.append((rel_path, src))
302
303 if not midi_entries:
304 return result
305
306 if len(midi_entries) == 1 and not opts.split_tracks:
307 xml = _midi_to_musicxml(midi_entries[0][1])
308 opts.output_path.parent.mkdir(parents=True, exist_ok=True)
309 opts.output_path.write_text(xml, encoding="utf-8")
310 result.paths_written.append(opts.output_path)
311 logger.info("export musicxml: wrote %s", opts.output_path)
312 else:
313 opts.output_path.mkdir(parents=True, exist_ok=True)
314 for rel_path, src in midi_entries:
315 stem = pathlib.PurePosixPath(rel_path).stem
316 dst = opts.output_path / f"{stem}.xml"
317 xml = _midi_to_musicxml(src)
318 dst.write_text(xml, encoding="utf-8")
319 result.paths_written.append(dst)
320 logger.info("export musicxml: wrote %s", dst)
321
322 return result
323
324
325 def export_abc(
326 manifest: dict[str, str],
327 root: pathlib.Path,
328 opts: MuseExportOptions,
329 ) -> MuseExportResult:
330 """Export MIDI files in the snapshot as ABC notation.
331
332 Produces a simplified ABC notation file: one voice per MIDI channel,
333 pitches mapped to note names, durations quantised to eighth notes.
334
335 Args:
336 manifest: Filtered snapshot manifest.
337 root: Muse repository root.
338 opts: Export options including output path.
339
340 Returns:
341 MuseExportResult listing written paths.
342 """
343 result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)
344 workdir = root / "muse-work"
345
346 midi_entries: list[tuple[str, pathlib.Path]] = []
347 for rel_path, _ in sorted(manifest.items()):
348 suffix = pathlib.PurePosixPath(rel_path).suffix.lower()
349 if suffix not in _MIDI_SUFFIXES:
350 result.skipped_count += 1
351 continue
352 src = workdir / rel_path
353 if not src.exists():
354 result.skipped_count += 1
355 continue
356 midi_entries.append((rel_path, src))
357
358 if not midi_entries:
359 return result
360
361 if len(midi_entries) == 1 and not opts.split_tracks:
362 abc = _midi_to_abc(midi_entries[0][1])
363 opts.output_path.parent.mkdir(parents=True, exist_ok=True)
364 opts.output_path.write_text(abc, encoding="utf-8")
365 result.paths_written.append(opts.output_path)
366 logger.info("export abc: wrote %s", opts.output_path)
367 else:
368 opts.output_path.mkdir(parents=True, exist_ok=True)
369 for rel_path, src in midi_entries:
370 stem = pathlib.PurePosixPath(rel_path).stem
371 dst = opts.output_path / f"{stem}.abc"
372 abc = _midi_to_abc(src)
373 dst.write_text(abc, encoding="utf-8")
374 result.paths_written.append(dst)
375 logger.info("export abc: wrote %s", dst)
376
377 return result
378
379
380 def export_wav(
381 manifest: dict[str, str],
382 root: pathlib.Path,
383 opts: MuseExportOptions,
384 storpheus_url: str,
385 ) -> MuseExportResult:
386 """Export MIDI files to WAV audio via Storpheus.
387
388 Performs a synchronous health check against storpheus_url before
389 attempting any conversion. Raises StorpheusUnavailableError
390 immediately if Storpheus is not reachable.
391
392 Args:
393 manifest: Filtered snapshot manifest.
394 root: Muse repository root.
395 opts: Export options including output path.
396 storpheus_url: Base URL for the Storpheus service health endpoint.
397
398 Returns:
399 MuseExportResult listing written paths.
400
401 Raises:
402 StorpheusUnavailableError: When Storpheus is unreachable.
403 """
404 result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)
405
406 try:
407 probe_timeout = httpx.Timeout(connect=3.0, read=3.0, write=3.0, pool=3.0)
408 with httpx.Client(timeout=probe_timeout) as client:
409 resp = client.get(f"{storpheus_url.rstrip('/')}/health")
410 reachable = resp.status_code == 200
411 except Exception as exc:
412 raise StorpheusUnavailableError(
413 f"Storpheus is not reachable at {storpheus_url}: {exc}\n"
414 "Start Storpheus (docker compose up storpheus) and retry."
415 ) from exc
416
417 if not reachable:
418 raise StorpheusUnavailableError(
419 f"Storpheus health check returned non-200 at {storpheus_url}/health.\n"
420 "Check Storpheus logs: docker compose logs storpheus"
421 )
422
423 logger.info("Storpheus reachable at %s — WAV export ready", storpheus_url)
424 result.skipped_count = len(manifest)
425 logger.warning(
426 "WAV render delegation to Storpheus is not yet fully implemented; "
427 "returning empty result. Full WAV rendering is tracked as a follow-up."
428 )
429 return result
430
431
432 # ---------------------------------------------------------------------------
433 # Dispatch
434 # ---------------------------------------------------------------------------
435
436
437 def export_snapshot(
438 manifest: dict[str, str],
439 root: pathlib.Path,
440 opts: MuseExportOptions,
441 storpheus_url: str = "http://localhost:10002",
442 ) -> MuseExportResult:
443 """Top-level export dispatcher.
444
445 Applies manifest filtering (--track, --section) then delegates
446 to the appropriate format handler.
447
448 Args:
449 manifest: Raw snapshot manifest from DB.
450 root: Muse repository root.
451 opts: Fully-populated export options.
452 storpheus_url: Base URL for Storpheus health check (WAV only).
453
454 Returns:
455 MuseExportResult describing what was written.
456
457 Raises:
458 StorpheusUnavailableError: For WAV format when unreachable.
459 ValueError: If an unsupported format is passed.
460 """
461 filtered = filter_manifest(manifest, track=opts.track, section=opts.section)
462
463 if opts.format == ExportFormat.MIDI:
464 return export_midi(filtered, root, opts)
465 elif opts.format == ExportFormat.JSON:
466 return export_json(filtered, root, opts)
467 elif opts.format == ExportFormat.MUSICXML:
468 return export_musicxml(filtered, root, opts)
469 elif opts.format == ExportFormat.ABC:
470 return export_abc(filtered, root, opts)
471 elif opts.format == ExportFormat.WAV:
472 return export_wav(filtered, root, opts, storpheus_url=storpheus_url)
473 else:
474 raise ValueError(f"Unsupported export format: {opts.format!r}")
475
476
477 # ---------------------------------------------------------------------------
478 # MIDI note helpers
479 # ---------------------------------------------------------------------------
480
481 #: MIDI note names (sharps) indexed 0-11.
482 _NOTE_NAMES: list[str] = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
483
484 #: ABC note names for MIDI pitch classes 0-11.
485 _ABC_NOTE_NAMES: list[str] = ["C", "^C", "D", "^D", "E", "F", "^F", "G", "^G", "A", "^A", "B"]
486
487
488 def _midi_note_to_step_octave(note: int) -> tuple[str, int]:
489 """Convert a MIDI note number (0-127) to (step, octave) for MusicXML.
490
491 Returns e.g. ("C", 4) for middle C (MIDI 60).
492 """
493 octave = (note // 12) - 1
494 step = _NOTE_NAMES[note % 12]
495 return step, octave
496
497
498 def _midi_note_to_abc(note: int) -> str:
499 """Convert MIDI note number to ABC notation pitch string.
500
501 C4 (MIDI 60) is uppercase C; C5 (MIDI 72) is lowercase c; above that
502 add apostrophes; below C4 add commas per the ABC notation spec.
503 """
504 octave = note // 12 - 1
505 pitch_class = note % 12
506 name = _ABC_NOTE_NAMES[pitch_class]
507 has_accidental = "^" in name
508
509 if octave == 4:
510 return name
511 elif octave == 5:
512 if has_accidental:
513 return "^" + name[1].lower()
514 return name.lower()
515 elif octave > 5:
516 suffix = "'" * (octave - 5)
517 base = ("^" + name[1].lower()) if has_accidental else name.lower()
518 return base + suffix
519 else:
520 suffix = "," * (4 - octave)
521 return name + suffix
522
523
524 def _parse_midi_notes(
525 path: pathlib.Path,
526 ) -> dict[int, list[tuple[int, int, int]]]:
527 """Parse a MIDI file and return notes grouped by channel.
528
529 Uses mido to read Note On/Off events across all tracks and returns
530 a dict mapping ``channel -> [(start_tick, end_tick, pitch), ...]``.
531
532 Args:
533 path: Path to the MIDI file.
534
535 Returns:
536 Dict of channel index to list of (start_tick, end_tick, pitch) tuples.
537 """
538 import mido
539
540 mid = mido.MidiFile(str(path))
541 channel_notes: dict[int, list[tuple[int, int, int]]] = {}
542 active: dict[tuple[int, int], int] = {}
543
544 for track in mid.tracks:
545 abs_tick = 0
546 for msg in track:
547 abs_tick += msg.time
548 if msg.type == "note_on" and msg.velocity > 0:
549 active[(msg.channel, msg.note)] = abs_tick
550 elif msg.type == "note_off" or (
551 msg.type == "note_on" and msg.velocity == 0
552 ):
553 start = active.pop((msg.channel, msg.note), None)
554 if start is not None:
555 ch_list = channel_notes.setdefault(msg.channel, [])
556 ch_list.append((start, abs_tick, msg.note))
557
558 return channel_notes
559
560
561 def _midi_to_musicxml(path: pathlib.Path) -> str:
562 """Convert a MIDI file to a minimal MusicXML string.
563
564 Uses mido to read Note On/Off events and emits one <part> per MIDI
565 channel. Durations are passed through as raw tick values.
566
567 This is a best-effort transcription — MIDI does not carry notation
568 semantics so the output is suitable for import review, not engraving.
569
570 Args:
571 path: Path to the source MIDI file.
572
573 Returns:
574 MusicXML document as a UTF-8 string.
575 """
576 import mido
577
578 mid = mido.MidiFile(str(path))
579 tpb: int = mid.ticks_per_beat or 480
580 divisions = tpb
581
582 channel_notes = _parse_midi_notes(path)
583
584 parts: list[str] = []
585 part_list_items: list[str] = []
586 for ch_idx, channel in enumerate(sorted(channel_notes.keys()), 1):
587 part_id = f"P{ch_idx}"
588 part_list_items.append(
589 f' <score-part id="{part_id}">'
590 f"<part-name>Channel {channel}</part-name>"
591 f"</score-part>"
592 )
593 notes_xml: list[str] = []
594 for start_tick, end_tick, pitch in sorted(channel_notes[channel]):
595 duration_ticks = max(1, end_tick - start_tick)
596 step, octave = _midi_note_to_step_octave(pitch)
597 notes_xml.append(
598 f" <note>"
599 f"<pitch><step>{step}</step><octave>{octave}</octave></pitch>"
600 f"<duration>{duration_ticks}</duration>"
601 f"<type>quarter</type>"
602 f"</note>"
603 )
604 notes_block = "\n".join(notes_xml) if notes_xml else " <!-- no notes -->"
605 parts.append(
606 f' <part id="{part_id}">\n'
607 f' <measure number="1">\n'
608 f" <attributes>"
609 f"<divisions>{divisions}</divisions>"
610 f"</attributes>\n"
611 f"{notes_block}\n"
612 f" </measure>\n"
613 f" </part>"
614 )
615
616 part_list_xml = "\n".join(part_list_items)
617 parts_xml = "\n".join(parts)
618
619 return (
620 '<?xml version="1.0" encoding="UTF-8"?>\n'
621 '<!DOCTYPE score-partwise PUBLIC\n'
622 ' "-//Recordare//DTD MusicXML 4.0 Partwise//EN"\n'
623 ' "http://www.musicxml.org/dtds/partwise.dtd">\n'
624 '<score-partwise version="4.0">\n'
625 f" <part-list>\n{part_list_xml}\n </part-list>\n"
626 f"{parts_xml}\n"
627 "</score-partwise>\n"
628 )
629
630
631 def _midi_to_abc(path: pathlib.Path) -> str:
632 """Convert a MIDI file to simplified ABC notation.
633
634 Reads Note On/Off events, assigns each MIDI channel to an ABC voice,
635 and emits an X: header followed by note sequences.
636
637 Args:
638 path: Path to the source MIDI file.
639
640 Returns:
641 ABC notation document as a UTF-8 string.
642 """
643 channel_notes = _parse_midi_notes(path)
644 stem = path.stem
645
646 lines: list[str] = [
647 "X:1",
648 f"T:{stem}",
649 "M:4/4",
650 "L:1/8",
651 "K:C",
652 ]
653
654 for voice_idx, channel in enumerate(sorted(channel_notes.keys()), 1):
655 notes_sorted = sorted(channel_notes[channel], key=lambda n: n[0])
656 abc_notes = [_midi_note_to_abc(pitch) for _, _, pitch in notes_sorted]
657 voice_line = " ".join(abc_notes) if abc_notes else "z"
658 lines.append(f"V:{voice_idx}")
659 lines.append(voice_line)
660
661 return "\n".join(lines) + "\n"
662
663
664 # ---------------------------------------------------------------------------
665 # Commit resolution helpers
666 # ---------------------------------------------------------------------------
667
668
669 def resolve_commit_id(
670 root: pathlib.Path,
671 commit_prefix: Optional[str],
672 ) -> str:
673 """Resolve a commit prefix (or None for HEAD) to a full commit ID.
674
675 When commit_prefix is None, reads the HEAD pointer from
676 .muse/refs/heads/<branch> and returns its value.
677
678 This is a filesystem-only helper — DB prefix resolution is done
679 in the Typer command using the open session.
680
681 Args:
682 root: Muse repository root.
683 commit_prefix: Short commit ID prefix, or None for HEAD.
684
685 Returns:
686 A non-empty string suitable for DB lookup (may still be a prefix
687 when commit_prefix is provided; the caller does DB resolution).
688
689 Raises:
690 ValueError: If HEAD has no commits yet.
691 """
692 if commit_prefix is not None:
693 return commit_prefix
694
695 muse_dir = root / ".muse"
696 head_ref = (muse_dir / "HEAD").read_text().strip()
697 ref_path = muse_dir / pathlib.Path(head_ref)
698 if not ref_path.exists():
699 raise ValueError("No commits yet — nothing to export.")
700 head_commit_id = ref_path.read_text().strip()
701 if not head_commit_id:
702 raise ValueError("No commits yet — nothing to export.")
703 return head_commit_id