maestro/muse_cli/export_engine.py · cgcardona/muse

1

"""Muse CLI export engine — format-specific export logic.

2

3

Converts a Muse snapshot manifest into external file formats:

4

5

- ``midi`` — copy raw MIDI files from the snapshot (native format).

6

- ``json`` — structured JSON note representation for AI/tooling.

7

- ``musicxml`` — MusicXML for notation software (MuseScore, Sibelius, etc.).

8

- ``abc`` — ABC notation text for folk/traditional music.

9

- ``wav`` — render audio via Storpheus (requires Storpheus reachable).

10

11

All format handlers accept the same inputs (manifest, root, options) and

12

return a MuseExportResult describing what was written. The WAV handler

13

raises StorpheusUnavailableError when the service cannot be reached so the

14

CLI can surface a human-readable error.

15

16

Design note: export is a read-only Muse operation — no commit is created,

17

no DB writes occur. The same commit + format always produces identical

18

output (deterministic).

19

"""

20

from __future__ import annotations

import json

import logging

import pathlib

import shutil

from dataclasses import dataclass, field

27

from enum import Enum

28

from typing import Optional

import httpx

logger = logging.getLogger(__name__)

33

34

# ---------------------------------------------------------------------------

35

# Public types

36

# ---------------------------------------------------------------------------

37

38

39

class ExportFormat(str, Enum):

40

"""Supported export format identifiers."""

MIDI = "midi"

JSON = "json"

MUSICXML = "musicxml"

ABC = "abc"

WAV = "wav"

@dataclass(frozen=True)

50

class MuseExportOptions:

51

"""Options controlling a single export operation.

52

53

Attributes:

54

format: Target export format.

55

commit_id: Full commit ID being exported (used in output metadata).

56

output_path: Destination file or directory path.

57

track: Optional track name filter (case-insensitive substring match).

58

section: Optional section name filter (case-insensitive substring match).

59

split_tracks: When True (MIDI only), write one file per track.

"""

format: ExportFormat

commit_id: str

output_path: pathlib.Path

65

track: Optional[str] = None

66

section: Optional[str] = None

67

split_tracks: bool = False

@dataclass

class MuseExportResult:

72

"""Result of a completed export operation.

73

74

Attributes:

75

paths_written: Absolute paths of all files written during export.

76

format: The format that was exported.

77

commit_id: Source commit ID.

78

skipped_count: Number of manifest entries skipped (wrong type/filter).

79

"""

80

81

paths_written: list[pathlib.Path] = field(default_factory=list)

82

format: ExportFormat = ExportFormat.MIDI

83

commit_id: str = ""

84

skipped_count: int = 0

85

86

87

class StorpheusUnavailableError(Exception):

88

"""Raised when WAV export is requested but Storpheus is not reachable.

89

90

Callers should catch this and surface a human-readable message rather

91

than letting it propagate as an unhandled exception.

"""

# ---------------------------------------------------------------------------

96

# Manifest filtering

97

# ---------------------------------------------------------------------------

98

99

#: File extensions treated as MIDI files.

100

_MIDI_SUFFIXES: frozenset[str] = frozenset({".mid", ".midi"})

def filter_manifest(

manifest: dict[str, str],

105

*,

106

track: Optional[str],

107

section: Optional[str],

108

) -> dict[str, str]:

109

"""Return a filtered copy of *manifest* matching the given criteria.

110

111

Both *track* and *section* are case-insensitive substring matches

112

against the full path string. Only entries matching ALL provided

113

filters are kept. When both are ``None`` the full manifest is returned.

114

115

Args:

116

manifest: ``{rel_path: object_id}`` from MuseCliSnapshot.

117

track: Track name substring filter (e.g. ``"piano"``).

118

section: Section name substring filter (e.g. ``"chorus"``).

119

120

Returns:

121

Filtered manifest dict with the same ``{rel_path: object_id}`` shape.

122

"""

123

if track is None and section is None:

124

return dict(manifest)

125

126

result: dict[str, str] = {}

127

for rel_path, object_id in manifest.items():

128

path_lower = rel_path.lower()

129

if track is not None and track.lower() not in path_lower:

130

continue

131

if section is not None and section.lower() not in path_lower:

132

continue

133

result[rel_path] = object_id

return result

# ---------------------------------------------------------------------------

139

# Format handlers

140

# ---------------------------------------------------------------------------

def export_midi(

manifest: dict[str, str],

145

root: pathlib.Path,

146

opts: MuseExportOptions,

147

) -> MuseExportResult:

148

"""Copy MIDI files from the snapshot to opts.output_path.

149

150

For a single-file export (split_tracks not set and only one MIDI

151

file found) the output is written directly to opts.output_path.

152

153

When split_tracks is set (or when multiple MIDI files are found),

154

opts.output_path is treated as a directory and one <stem>.mid

155

file is written per track.

156

157

Args:

158

manifest: Filtered snapshot manifest.

159

root: Muse repository root.

160

opts: Export options including output path and flags.

161

162

Returns:

163

MuseExportResult listing written paths.

164

"""

165

result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)

166

workdir = root / "muse-work"

167

168

midi_entries: list[tuple[str, pathlib.Path]] = []

169

for rel_path, _ in sorted(manifest.items()):

170

src = workdir / rel_path

171

suffix = pathlib.PurePosixPath(rel_path).suffix.lower()

172

if suffix not in _MIDI_SUFFIXES:

173

result.skipped_count += 1

174

logger.debug("export midi: skipping non-MIDI file %s", rel_path)

175

continue

176

if not src.exists():

177

result.skipped_count += 1

178

logger.warning("export midi: source file missing: %s", src)

179

continue

180

midi_entries.append((rel_path, src))

if not midi_entries:

return result

if len(midi_entries) == 1 and not opts.split_tracks:

186

opts.output_path.parent.mkdir(parents=True, exist_ok=True)

187

shutil.copy2(midi_entries[0][1], opts.output_path)

188

result.paths_written.append(opts.output_path)

189

logger.info("export midi: wrote %s", opts.output_path)

190

else:

191

opts.output_path.mkdir(parents=True, exist_ok=True)

192

for rel_path, src in midi_entries:

193

stem = pathlib.PurePosixPath(rel_path).stem

194

dst = opts.output_path / f"{stem}.mid"

195

shutil.copy2(src, dst)

196

result.paths_written.append(dst)

197

logger.info("export midi: wrote %s", dst)

return result

def export_json(

manifest: dict[str, str],

204

root: pathlib.Path,

205

opts: MuseExportOptions,

206

) -> MuseExportResult:

207

"""Export the snapshot as structured JSON.

208

209

The output JSON has the shape::

210

211

{

212

"commit_id": "<full commit hash>",

213

"exported_at": "<ISO-8601 timestamp>",

214

"files": [

215

{

216

"path": "<rel_path>",

217

"object_id": "<sha256>",

218

"size_bytes": <int>,

219

"exists_in_workdir": <bool>

},

...

]

}

This format is intended for AI model consumption and downstream tooling

226

that needs a machine-readable index of the snapshot.

227

228

Args:

229

manifest: Filtered snapshot manifest.

230

root: Muse repository root.

231

opts: Export options including output path.

232

233

Returns:

234

MuseExportResult listing written paths.

"""

import datetime

result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)

239

workdir = root / "muse-work"

240

241

files_list: list[dict[str, object]] = []

242

for rel_path, object_id in sorted(manifest.items()):

243

src = workdir / rel_path

244

entry: dict[str, object] = {

245

"path": rel_path,

246

"object_id": object_id,

247

"size_bytes": src.stat().st_size if src.exists() else None,

248

"exists_in_workdir": src.exists(),

249

}

250

files_list.append(entry)

251

252

payload: dict[str, object] = {

253

"commit_id": opts.commit_id,

254

"exported_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),

"files": files_list,

}

opts.output_path.parent.mkdir(parents=True, exist_ok=True)

259

opts.output_path.write_text(json.dumps(payload, indent=2))

260

result.paths_written.append(opts.output_path)

261

logger.info("export json: wrote %s", opts.output_path)

return result

def export_musicxml(

manifest: dict[str, str],

267

root: pathlib.Path,

268

opts: MuseExportOptions,

269

) -> MuseExportResult:

270

"""Export MIDI files in the snapshot as MusicXML.

271

272

Converts each MIDI file using a minimal MIDI-to-MusicXML transcription:

273

reads Note On/Off events via mido and emits a well-formed MusicXML

274

document with one <part> per MIDI channel.

275

276

The conversion is intentionally lossy (MIDI lacks notation semantics):

277

durations are quantised to the nearest sixteenth note and pitch spelling

defaults to sharps.

Args:

manifest: Filtered snapshot manifest.

282

root: Muse repository root.

283

opts: Export options including output path.

284

285

Returns:

286

MuseExportResult listing written paths.

287

"""

288

result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)

289

workdir = root / "muse-work"

290

291

midi_entries: list[tuple[str, pathlib.Path]] = []

292

for rel_path, _ in sorted(manifest.items()):

293

suffix = pathlib.PurePosixPath(rel_path).suffix.lower()

294

if suffix not in _MIDI_SUFFIXES:

295

result.skipped_count += 1

296

continue

297

src = workdir / rel_path

298

if not src.exists():

299

result.skipped_count += 1

300

continue

301

midi_entries.append((rel_path, src))

if not midi_entries:

return result

if len(midi_entries) == 1 and not opts.split_tracks:

307

xml = _midi_to_musicxml(midi_entries[0][1])

308

opts.output_path.parent.mkdir(parents=True, exist_ok=True)

309

opts.output_path.write_text(xml, encoding="utf-8")

310

result.paths_written.append(opts.output_path)

311

logger.info("export musicxml: wrote %s", opts.output_path)

312

else:

313

opts.output_path.mkdir(parents=True, exist_ok=True)

314

for rel_path, src in midi_entries:

315

stem = pathlib.PurePosixPath(rel_path).stem

316

dst = opts.output_path / f"{stem}.xml"

317

xml = _midi_to_musicxml(src)

318

dst.write_text(xml, encoding="utf-8")

319

result.paths_written.append(dst)

320

logger.info("export musicxml: wrote %s", dst)

return result

def export_abc(

manifest: dict[str, str],

327

root: pathlib.Path,

328

opts: MuseExportOptions,

329

) -> MuseExportResult:

330

"""Export MIDI files in the snapshot as ABC notation.

331

332

Produces a simplified ABC notation file: one voice per MIDI channel,

333

pitches mapped to note names, durations quantised to eighth notes.

334

335

Args:

336

manifest: Filtered snapshot manifest.

337

root: Muse repository root.

338

opts: Export options including output path.

339

340

Returns:

341

MuseExportResult listing written paths.

342

"""

343

result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)

344

workdir = root / "muse-work"

345

346

midi_entries: list[tuple[str, pathlib.Path]] = []

347

for rel_path, _ in sorted(manifest.items()):

348

suffix = pathlib.PurePosixPath(rel_path).suffix.lower()

349

if suffix not in _MIDI_SUFFIXES:

350

result.skipped_count += 1

351

continue

352

src = workdir / rel_path

353

if not src.exists():

354

result.skipped_count += 1

355

continue

356

midi_entries.append((rel_path, src))

if not midi_entries:

return result

if len(midi_entries) == 1 and not opts.split_tracks:

362

abc = _midi_to_abc(midi_entries[0][1])

363

opts.output_path.parent.mkdir(parents=True, exist_ok=True)

364

opts.output_path.write_text(abc, encoding="utf-8")

365

result.paths_written.append(opts.output_path)

366

logger.info("export abc: wrote %s", opts.output_path)

367

else:

368

opts.output_path.mkdir(parents=True, exist_ok=True)

369

for rel_path, src in midi_entries:

370

stem = pathlib.PurePosixPath(rel_path).stem

371

dst = opts.output_path / f"{stem}.abc"

372

abc = _midi_to_abc(src)

373

dst.write_text(abc, encoding="utf-8")

374

result.paths_written.append(dst)

375

logger.info("export abc: wrote %s", dst)

return result

def export_wav(

manifest: dict[str, str],

382

root: pathlib.Path,

383

opts: MuseExportOptions,

384

storpheus_url: str,

385

) -> MuseExportResult:

386

"""Export MIDI files to WAV audio via Storpheus.

387

388

Performs a synchronous health check against storpheus_url before

389

attempting any conversion. Raises StorpheusUnavailableError

390

immediately if Storpheus is not reachable.

391

392

Args:

393

manifest: Filtered snapshot manifest.

394

root: Muse repository root.

395

opts: Export options including output path.

396

storpheus_url: Base URL for the Storpheus service health endpoint.

397

398

Returns:

399

MuseExportResult listing written paths.

400

401

Raises:

402

StorpheusUnavailableError: When Storpheus is unreachable.

403

"""

404

result = MuseExportResult(format=opts.format, commit_id=opts.commit_id)

405

406

try:

407

probe_timeout = httpx.Timeout(connect=3.0, read=3.0, write=3.0, pool=3.0)

408

with httpx.Client(timeout=probe_timeout) as client:

409

resp = client.get(f"{storpheus_url.rstrip('/')}/health")

410

reachable = resp.status_code == 200

411

except Exception as exc:

412

raise StorpheusUnavailableError(

413

f"Storpheus is not reachable at {storpheus_url}: {exc}\n"

414

"Start Storpheus (docker compose up storpheus) and retry."

) from exc

if not reachable:

raise StorpheusUnavailableError(

419

f"Storpheus health check returned non-200 at {storpheus_url}/health.\n"

420

"Check Storpheus logs: docker compose logs storpheus"

421

)

422

423

logger.info("Storpheus reachable at %s — WAV export ready", storpheus_url)

424

result.skipped_count = len(manifest)

425

logger.warning(

426

"WAV render delegation to Storpheus is not yet fully implemented; "

427

"returning empty result. Full WAV rendering is tracked as a follow-up."

)

return result

# ---------------------------------------------------------------------------

433

# Dispatch

434

# ---------------------------------------------------------------------------

def export_snapshot(

manifest: dict[str, str],

439

root: pathlib.Path,

440

opts: MuseExportOptions,

441

storpheus_url: str = "http://localhost:10002",

442

) -> MuseExportResult:

443

"""Top-level export dispatcher.

444

445

Applies manifest filtering (--track, --section) then delegates

446

to the appropriate format handler.

447

448

Args:

449

manifest: Raw snapshot manifest from DB.

450

root: Muse repository root.

451

opts: Fully-populated export options.

452

storpheus_url: Base URL for Storpheus health check (WAV only).

453

454

Returns:

455

MuseExportResult describing what was written.

456

457

Raises:

458

StorpheusUnavailableError: For WAV format when unreachable.

459

ValueError: If an unsupported format is passed.

460

"""

461

filtered = filter_manifest(manifest, track=opts.track, section=opts.section)

462

463

if opts.format == ExportFormat.MIDI:

464

return export_midi(filtered, root, opts)

465

elif opts.format == ExportFormat.JSON:

466

return export_json(filtered, root, opts)

467

elif opts.format == ExportFormat.MUSICXML:

468

return export_musicxml(filtered, root, opts)

469

elif opts.format == ExportFormat.ABC:

470

return export_abc(filtered, root, opts)

471

elif opts.format == ExportFormat.WAV:

472

return export_wav(filtered, root, opts, storpheus_url=storpheus_url)

473

else:

474

raise ValueError(f"Unsupported export format: {opts.format!r}")

475

476

477

# ---------------------------------------------------------------------------

478

# MIDI note helpers

479

# ---------------------------------------------------------------------------

480

481

#: MIDI note names (sharps) indexed 0-11.

482

_NOTE_NAMES: list[str] = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]

483

484

#: ABC note names for MIDI pitch classes 0-11.

485

_ABC_NOTE_NAMES: list[str] = ["C", "^C", "D", "^D", "E", "F", "^F", "G", "^G", "A", "^A", "B"]

486

487

488

def _midi_note_to_step_octave(note: int) -> tuple[str, int]:

489

"""Convert a MIDI note number (0-127) to (step, octave) for MusicXML.

490

491

Returns e.g. ("C", 4) for middle C (MIDI 60).

492

"""

493

octave = (note // 12) - 1

494

step = _NOTE_NAMES[note % 12]

return step, octave

def _midi_note_to_abc(note: int) -> str:

499

"""Convert MIDI note number to ABC notation pitch string.

500

501

C4 (MIDI 60) is uppercase C; C5 (MIDI 72) is lowercase c; above that

502

add apostrophes; below C4 add commas per the ABC notation spec.

503

"""

504

octave = note // 12 - 1

505

pitch_class = note % 12

506

name = _ABC_NOTE_NAMES[pitch_class]

507

has_accidental = "^" in name

if octave == 4:

return name

elif octave == 5:

if has_accidental:

return "^" + name[1].lower()

514

return name.lower()

515

elif octave > 5:

516

suffix = "'" * (octave - 5)

517

base = ("^" + name[1].lower()) if has_accidental else name.lower()

518

return base + suffix

519

else:

520

suffix = "," * (4 - octave)

return name + suffix

def _parse_midi_notes(

525

path: pathlib.Path,

526

) -> dict[int, list[tuple[int, int, int]]]:

527

"""Parse a MIDI file and return notes grouped by channel.

528

529

Uses mido to read Note On/Off events across all tracks and returns

530

a dict mapping ``channel -> [(start_tick, end_tick, pitch), ...]``.

531

532

Args:

533

path: Path to the MIDI file.

534

535

Returns:

536

Dict of channel index to list of (start_tick, end_tick, pitch) tuples.

"""

import mido

mid = mido.MidiFile(str(path))

541

channel_notes: dict[int, list[tuple[int, int, int]]] = {}

542

active: dict[tuple[int, int], int] = {}

543

544

for track in mid.tracks:

abs_tick = 0

for msg in track:

abs_tick += msg.time

if msg.type == "note_on" and msg.velocity > 0:

549

active[(msg.channel, msg.note)] = abs_tick

550

elif msg.type == "note_off" or (

551

msg.type == "note_on" and msg.velocity == 0

552

):

553

start = active.pop((msg.channel, msg.note), None)

554

if start is not None:

555

ch_list = channel_notes.setdefault(msg.channel, [])

556

ch_list.append((start, abs_tick, msg.note))

return channel_notes

def _midi_to_musicxml(path: pathlib.Path) -> str:

562

"""Convert a MIDI file to a minimal MusicXML string.

563

564

Uses mido to read Note On/Off events and emits one <part> per MIDI

565

channel. Durations are passed through as raw tick values.

566

567

This is a best-effort transcription — MIDI does not carry notation

568

semantics so the output is suitable for import review, not engraving.

569

570

Args:

571

path: Path to the source MIDI file.

572

573

Returns:

574

MusicXML document as a UTF-8 string.

"""

import mido

mid = mido.MidiFile(str(path))

579

tpb: int = mid.ticks_per_beat or 480

580

divisions = tpb

581

582

channel_notes = _parse_midi_notes(path)

583

584

parts: list[str] = []

585

part_list_items: list[str] = []

586

for ch_idx, channel in enumerate(sorted(channel_notes.keys()), 1):

587

part_id = f"P{ch_idx}"

588

part_list_items.append(

589

f' <score-part id="{part_id}">'

590

f"<part-name>Channel {channel}</part-name>"

591

f"</score-part>"

592

)

593

notes_xml: list[str] = []

594

for start_tick, end_tick, pitch in sorted(channel_notes[channel]):

595

duration_ticks = max(1, end_tick - start_tick)

596

step, octave = _midi_note_to_step_octave(pitch)

597

notes_xml.append(

598

f" <note>"

599

f"<pitch><step>{step}</step><octave>{octave}</octave></pitch>"

600

f"<duration>{duration_ticks}</duration>"

601

f"<type>quarter</type>"

602

f"</note>"

603

)

604

notes_block = "\n".join(notes_xml) if notes_xml else " "

605

parts.append(

606

f' <part id="{part_id}">\n'

607

f' <measure number="1">\n'

608

f" <attributes>"

609

f"<divisions>{divisions}</divisions>"

f"</attributes>\n"

f"{notes_block}\n"

f" </measure>\n"

f" </part>"

)

part_list_xml = "\n".join(part_list_items)

617

parts_xml = "\n".join(parts)

618

619

return (

620

'<?xml version="1.0" encoding="UTF-8"?>\n'

621

'<!DOCTYPE score-partwise PUBLIC\n'

622

' "-//Recordare//DTD MusicXML 4.0 Partwise//EN"\n'

623

' "http://www.musicxml.org/dtds/partwise.dtd">\n'

624

'<score-partwise version="4.0">\n'

625

f" <part-list>\n{part_list_xml}\n </part-list>\n"

626

f"{parts_xml}\n"

627

"</score-partwise>\n"

)

def _midi_to_abc(path: pathlib.Path) -> str:

632

"""Convert a MIDI file to simplified ABC notation.

633

634

Reads Note On/Off events, assigns each MIDI channel to an ABC voice,

635

and emits an X: header followed by note sequences.

636

637

Args:

638

path: Path to the source MIDI file.

639

640

Returns:

641

ABC notation document as a UTF-8 string.

642

"""

643

channel_notes = _parse_midi_notes(path)

stem = path.stem

lines: list[str] = [

"X:1",

f"T:{stem}",

"M:4/4",

"L:1/8",

"K:C",

]

for voice_idx, channel in enumerate(sorted(channel_notes.keys()), 1):

655

notes_sorted = sorted(channel_notes[channel], key=lambda n: n[0])

656

abc_notes = [_midi_note_to_abc(pitch) for _, _, pitch in notes_sorted]

657

voice_line = " ".join(abc_notes) if abc_notes else "z"

658

lines.append(f"V:{voice_idx}")

659

lines.append(voice_line)

660

661

return "\n".join(lines) + "\n"

662

663

664

# ---------------------------------------------------------------------------

665

# Commit resolution helpers

666

# ---------------------------------------------------------------------------

667

668

669

def resolve_commit_id(

670

root: pathlib.Path,

671

commit_prefix: Optional[str],

672

) -> str:

673

"""Resolve a commit prefix (or None for HEAD) to a full commit ID.

674

675

When commit_prefix is None, reads the HEAD pointer from

676

.muse/refs/heads/<branch> and returns its value.

677

678

This is a filesystem-only helper — DB prefix resolution is done

679

in the Typer command using the open session.

680

681

Args:

682

root: Muse repository root.

683

commit_prefix: Short commit ID prefix, or None for HEAD.

684

685

Returns:

686

A non-empty string suitable for DB lookup (may still be a prefix

687

when commit_prefix is provided; the caller does DB resolution).

688

689

Raises:

690

ValueError: If HEAD has no commits yet.

691

"""

692

if commit_prefix is not None:

693

return commit_prefix

694

695

muse_dir = root / ".muse"

696

head_ref = (muse_dir / "HEAD").read_text().strip()

697

ref_path = muse_dir / pathlib.Path(head_ref)

698

if not ref_path.exists():

699

raise ValueError("No commits yet — nothing to export.")

700

head_commit_id = ref_path.read_text().strip()

701

if not head_commit_id:

702

raise ValueError("No commits yet — nothing to export.")

703

return head_commit_id