maestro/services/muse_arrange.py · cgcardona/muse

1

"""Muse Arrange — arrangement map analysis for committed snapshots.

2

3

Builds an *arrangement matrix* from the file manifest of a Muse commit:

4

rows = instruments, columns = sections. Each cell records whether the

5

instrument is active in that section and, in density mode, how many bytes

6

of MIDI data it contributed (a byte-count proxy for note density).

7

8

**Path convention:**

9

Files in ``muse-work/`` that carry section metadata must follow::

Examples::

intro/drums/beat.mid → section=intro, instrument=drums

16

chorus/strings/pad.mid → section=chorus, instrument=strings

17

verse/bass/line_v1.mid → section=verse, instrument=bass

18

19

Files with fewer than two path components are uncategorised and excluded

20

from the arrangement matrix.

21

22

**Outputs:**

23

- Text (``--format text``) — Unicode block-char matrix, human-readable

24

- JSON (``--format json``) — structured dict, AI-agent-consumable

25

- CSV (``--format csv``) — spreadsheet-ready rows

26

27

**Compare mode (``--compare commit-a commit-b``):**

28

Produces an :class:`ArrangementDiff` showing which ``(section, instrument)``

29

cells were added, removed, or unchanged between the two commits.

30

31

Why this matters for AI orchestration:

32

An AI agent can call ``muse arrange --format json HEAD`` before generating

33

a new string part to see exactly which sections already have strings,

34

preventing doubling mistakes and enabling coherent orchestration decisions.

35

"""

36

from __future__ import annotations

import csv

import io

import json

import logging

from dataclasses import dataclass, field

43

from typing import Literal

44

45

logger = logging.getLogger(__name__)

46

47

# ---------------------------------------------------------------------------

48

# Path parsing

49

# ---------------------------------------------------------------------------

50

51

_SECTION_ORDER: list[str] = [

52

"intro", "verse", "prechorus", "pre-chorus", "prechoruse",

53

"chorus", "bridge", "outro", "breakdown", "drop", "hook",

54

]

55

56

_SECTION_ALIASES: dict[str, str] = {

57

"pre-chorus": "prechorus",

58

"prechoruse": "prechorus",

59

"pre_chorus": "prechorus",

}

def _normalise_section(raw: str) -> str:

64

"""Lower-case and apply known aliases to section names."""

65

lower = raw.lower().strip()

66

return _SECTION_ALIASES.get(lower, lower)

67

68

69

def extract_section_instrument(rel_path: str) -> tuple[str, str] | None:

70

"""Parse *rel_path* (relative to ``muse-work/``) into ``(section, instrument)``.

71

72

Returns ``None`` when the path does not have at least two directory

73

components (i.e. it cannot be mapped to a section + instrument pair).

74

75

The path is expected to follow the canonical convention::

Only the first two components are used; any deeper nesting is ignored.

80

The section name is normalised via :func:`_normalise_section`.

Examples::

"intro/drums/beat.mid" → ("intro", "drums")

85

"chorus/strings/pad.mid" → ("chorus", "strings")

86

"bass/riff.mid" → None # only one directory component

87

"solo.mid" → None # flat file

88

"""

89

parts = rel_path.replace("\\", "/").split("/")

90

# Need at least section + instrument + filename (≥ 3 parts)

91

# but also accept section + filename (2 parts) where the first is

92

# the section and the second is the filename (not an instrument — skip).

93

# We require exactly ≥ 3 parts: parts[0]=section, parts[1]=instrument

94

if len(parts) < 3:

95

return None

96

section = _normalise_section(parts[0])

97

instrument = parts[1].lower().strip()

98

if not section or not instrument:

99

return None

100

return section, instrument

101

102

103

# ---------------------------------------------------------------------------

104

# Core data types

105

# ---------------------------------------------------------------------------

106

107

108

@dataclass(frozen=True)

109

class ArrangementCell:

110

"""Single cell in the arrangement matrix: one (section, instrument) pair.

111

112

``active`` is ``True`` when at least one file exists for this pair.

113

``file_count`` counts distinct files (useful when multiple takes exist).

114

``total_bytes`` sums the object sizes — used as a note-density proxy in

``--density`` mode.

"""

section: str

instrument: str

active: bool

file_count: int = 0

total_bytes: int = 0

@property

def density_score(self) -> float:

126

"""Normalised byte density — raw ``total_bytes`` exposed for callers."""

127

return float(self.total_bytes)

@dataclass

class ArrangementMatrix:

132

"""Full arrangement matrix for a single commit.

133

134

Attributes:

135

commit_id: The 64-char commit SHA used to build this matrix.

136

sections: Ordered list of section names (columns).

137

instruments: Ordered list of instrument names (rows).

138

cells: Mapping ``(section, instrument) → ArrangementCell``.

"""

commit_id: str

sections: list[str]

instruments: list[str]

144

cells: dict[tuple[str, str], ArrangementCell] = field(default_factory=dict)

145

146

def get_cell(self, section: str, instrument: str) -> ArrangementCell:

147

"""Return the cell for *(section, instrument)*, defaulting to inactive."""

148

key = (section, instrument)

149

return self.cells.get(

150

key, ArrangementCell(section=section, instrument=instrument, active=False)

)

@dataclass(frozen=True)

155

class ArrangementDiffCell:

156

"""Change status of a single cell between two commits.

157

158

``status`` is one of:

159

- ``"added"`` — active in commit-b, absent in commit-a

160

- ``"removed"`` — active in commit-a, absent in commit-b

161

- ``"unchanged"`` — same active/inactive state in both commits

"""

section: str

instrument: str

status: Literal["added", "removed", "unchanged"]

167

cell_a: ArrangementCell

168

cell_b: ArrangementCell

@dataclass

class ArrangementDiff:

173

"""Diff of two arrangement matrices (commit-a → commit-b).

174

175

Attributes:

176

commit_id_a: Commit SHA for the baseline (left side).

177

commit_id_b: Commit SHA for the target (right side).

178

sections: Union of section names across both matrices.

179

instruments: Union of instrument names across both matrices.

180

cells: Mapping ``(section, instrument) → ArrangementDiffCell``.

"""

commit_id_a: str

commit_id_b: str

sections: list[str]

instruments: list[str]

187

cells: dict[tuple[str, str], ArrangementDiffCell] = field(default_factory=dict)

188

189

190

# ---------------------------------------------------------------------------

191

# Matrix builder

192

# ---------------------------------------------------------------------------

193

194

195

def build_arrangement_matrix(

196

commit_id: str,

197

manifest: dict[str, str],

198

object_sizes: dict[str, int] | None = None,

199

) -> ArrangementMatrix:

200

"""Build an :class:`ArrangementMatrix` from a snapshot *manifest*.

Parameters

----------

commit_id:

The commit SHA the manifest was taken from (stored on the matrix

206

for display and JSON serialisation).

207

manifest:

208

A ``{rel_path: object_id}`` mapping as returned by

209

:func:`maestro.muse_cli.db.get_commit_snapshot_manifest`.

210

Paths are relative to ``muse-work/``.

211

object_sizes:

212

Optional ``{object_id: size_bytes}`` map. When provided, each

213

cell accumulates the byte sizes of its files so that

214

``--density`` mode can report them. Missing entries default to 0.

Returns

-------

ArrangementMatrix

A matrix with sections and instruments ordered: first by the

220

canonical section ordering defined in ``_SECTION_ORDER``, with any

221

unknown sections appended alphabetically. Instruments are sorted

222

alphabetically.

223

"""

224

sizes = object_sizes or {}

225

226

# Accumulate counts and byte totals per (section, instrument) cell.

227

counts: dict[tuple[str, str], int] = {}

228

bytes_: dict[tuple[str, str], int] = {}

229

230

for rel_path, object_id in manifest.items():

231

parsed = extract_section_instrument(rel_path)

232

if parsed is None:

233

continue

234

section, instrument = parsed

235

key = (section, instrument)

236

counts[key] = counts.get(key, 0) + 1

237

bytes_[key] = bytes_.get(key, 0) + sizes.get(object_id, 0)

238

239

# Derive ordered section and instrument lists.

240

all_sections = {k[0] for k in counts}

241

all_instruments = {k[1] for k in counts}

242

243

sections = _order_sections(all_sections)

244

instruments = sorted(all_instruments)

245

246

cells: dict[tuple[str, str], ArrangementCell] = {}

247

for key, count in counts.items():

248

section, instrument = key

249

cells[key] = ArrangementCell(

250

section=section,

251

instrument=instrument,

252

active=True,

253

file_count=count,

254

total_bytes=bytes_.get(key, 0),

255

)

256

257

return ArrangementMatrix(

258

commit_id=commit_id,

259

sections=sections,

260

instruments=instruments,

cells=cells,

)

def _order_sections(sections: set[str]) -> list[str]:

266

"""Order sections by canonical musical position, with unknowns appended."""

267

known_order = [s for s in _SECTION_ORDER if s in sections]

268

unknown = sorted(sections - set(_SECTION_ORDER))

269

return known_order + unknown

270

271

272

# ---------------------------------------------------------------------------

273

# Renderers — text

274

# ---------------------------------------------------------------------------

275

276

_ACTIVE_CHAR = "████"

277

_INACTIVE_CHAR = "░░░░"

278

279

280

def render_matrix_text(

281

matrix: ArrangementMatrix,

282

*,

283

density: bool = False,

284

section_filter: str | None = None,

285

track_filter: str | None = None,

286

) -> str:

287

"""Render *matrix* as a human-readable text table.

288

289

Each row is an instrument; each column is a section. Active cells

290

show ``████``; inactive cells show ``░░░░``. In ``density`` mode each

291

cell shows the total byte size instead.

Parameters

----------

density:

When ``True``, show byte totals per cell instead of block chars.

297

section_filter:

298

If set, include only the named section (case-insensitive).

299

track_filter:

300

If set, include only the named instrument/track (case-insensitive).

301

"""

302

sections = _apply_section_filter(matrix.sections, section_filter)

303

instruments = _apply_track_filter(matrix.instruments, track_filter)

304

305

if not sections or not instruments:

306

return f"Arrangement Map — commit {matrix.commit_id[:8]}\n\n(no data for the given filters)"

307

308

short_id = matrix.commit_id[:8]

309

lines: list[str] = [f"Arrangement Map — commit {short_id}", ""]

310

311

# Column widths

312

instr_width = max((len(i) for i in instruments), default=8) + 2

313

col_width = max(max((len(s) for s in sections), default=4), 4) + 2

314

315

# Header row

316

header = " " * instr_width

317

for section in sections:

318

header += section.capitalize().center(col_width)

lines.append(header)

# Data rows

for instrument in instruments:

323

row = instrument.ljust(instr_width)

324

for section in sections:

325

cell = matrix.get_cell(section, instrument)

326

if density:

327

cell_text = f"{cell.total_bytes:,}" if cell.active else "-"

328

else:

329

cell_text = _ACTIVE_CHAR if cell.active else _INACTIVE_CHAR

330

row += cell_text.center(col_width)

331

lines.append(row)

332

333

return "\n".join(lines)

334

335

336

def render_matrix_json(

337

matrix: ArrangementMatrix,

338

*,

339

density: bool = False,

340

section_filter: str | None = None,

341

track_filter: str | None = None,

342

) -> str:

343

"""Serialise *matrix* as a JSON string suitable for AI agent consumption."""

344

sections = _apply_section_filter(matrix.sections, section_filter)

345

instruments = _apply_track_filter(matrix.instruments, track_filter)

346

347

matrix_data: dict[str, dict[str, object]] = {}

348

for instrument in instruments:

349

matrix_data[instrument] = {}

350

for section in sections:

351

cell = matrix.get_cell(section, instrument)

352

if density:

353

matrix_data[instrument][section] = {

354

"active": cell.active,

355

"file_count": cell.file_count,

356

"total_bytes": cell.total_bytes,

357

}

358

else:

359

matrix_data[instrument][section] = cell.active

360

361

payload: dict[str, object] = {

362

"commit_id": matrix.commit_id,

363

"sections": sections,

364

"instruments": instruments,

365

"arrangement": matrix_data,

366

}

367

return json.dumps(payload, indent=2)

368

369

370

def render_matrix_csv(

371

matrix: ArrangementMatrix,

372

*,

373

density: bool = False,

374

section_filter: str | None = None,

375

track_filter: str | None = None,

376

) -> str:

377

"""Serialise *matrix* as CSV with instrument as the first column."""

378

sections = _apply_section_filter(matrix.sections, section_filter)

379

instruments = _apply_track_filter(matrix.instruments, track_filter)

380

381

buf = io.StringIO()

382

writer = csv.writer(buf)

383

384

header = ["instrument"] + sections

385

writer.writerow(header)

386

387

for instrument in instruments:

388

row: list[object] = [instrument]

389

for section in sections:

390

cell = matrix.get_cell(section, instrument)

391

if density:

392

row.append(cell.total_bytes if cell.active else 0)

393

else:

394

row.append(1 if cell.active else 0)

395

writer.writerow(row)

396

397

return buf.getvalue()

398

399

400

# ---------------------------------------------------------------------------

401

# Diff builder + renderer

402

# ---------------------------------------------------------------------------

403

404

405

def build_arrangement_diff(

406

matrix_a: ArrangementMatrix,

407

matrix_b: ArrangementMatrix,

408

) -> ArrangementDiff:

409

"""Compute a cell-by-cell diff of two arrangement matrices.

410

411

Sections and instruments are the union of both matrices' sets, ordered

412

by the canonical section order for sections and alphabetically for

413

instruments.

414

"""

415

all_sections = set(matrix_a.sections) | set(matrix_b.sections)

416

all_instruments = set(matrix_a.instruments) | set(matrix_b.instruments)

417

418

sections = _order_sections(all_sections)

419

instruments = sorted(all_instruments)

420

421

diff_cells: dict[tuple[str, str], ArrangementDiffCell] = {}

422

423

for section in sections:

424

for instrument in instruments:

425

cell_a = matrix_a.get_cell(section, instrument)

426

cell_b = matrix_b.get_cell(section, instrument)

427

428

if not cell_a.active and cell_b.active:

429

status: Literal["added", "removed", "unchanged"] = "added"

430

elif cell_a.active and not cell_b.active:

status = "removed"

else:

status = "unchanged"

key = (section, instrument)

436

diff_cells[key] = ArrangementDiffCell(

437

section=section,

438

instrument=instrument,

status=status,

cell_a=cell_a,

cell_b=cell_b,

)

return ArrangementDiff(

445

commit_id_a=matrix_a.commit_id,

446

commit_id_b=matrix_b.commit_id,

447

sections=sections,

448

instruments=instruments,

cells=diff_cells,

)

_DIFF_SYMBOLS: dict[str, str] = {

"added": "+",

"removed": "-",

"unchanged": " ",

}

def render_diff_text(diff: ArrangementDiff) -> str:

461

"""Render *diff* as a human-readable side-by-side comparison.

462

463

``+`` = cell added in commit-b, ``-`` = cell removed, `` `` = unchanged.

464

Only rows with at least one changed cell are shown.

465

"""

466

a_short = diff.commit_id_a[:8]

467

b_short = diff.commit_id_b[:8]

468

lines: list[str] = [

469

f"Arrangement Diff — {a_short} → {b_short}",

"",

]

# Compute column widths

474

instr_width = max((len(i) for i in diff.instruments), default=8) + 2

475

col_width = max(max((len(s) for s in diff.sections), default=4), 4) + 2

476

477

header = " " * instr_width

478

for section in diff.sections:

479

header += section.capitalize().center(col_width)

480

lines.append(header)

481

482

changed_rows: list[tuple[str, bool]] = []

483

for instrument in diff.instruments:

484

row = instrument.ljust(instr_width)

485

has_change = False

486

for section in diff.sections:

487

cell_diff = diff.cells.get((section, instrument))

488

if cell_diff is None:

489

symbol = " "

490

cell_char = _INACTIVE_CHAR

491

else:

492

symbol = _DIFF_SYMBOLS[cell_diff.status]

493

if cell_diff.status == "unchanged":

494

cell_char = _ACTIVE_CHAR if cell_diff.cell_b.active else _INACTIVE_CHAR

495

elif cell_diff.status == "added":

496

cell_char = f"+{_ACTIVE_CHAR}"

497

has_change = True

498

else:

499

cell_char = f"-{_ACTIVE_CHAR}"

500

has_change = True

501

row += cell_char.center(col_width)

502

changed_rows.append((row, has_change))

503

504

# Show all rows but mark changed ones; if all unchanged, show a note.

505

any_changes = any(has for _, has in changed_rows)

506

for row, _ in changed_rows:

lines.append(row)

if not any_changes:

lines.append("")

lines.append("(no arrangement changes between these commits)")

512

513

return "\n".join(lines)

514

515

516

def render_diff_json(diff: ArrangementDiff) -> str:

517

"""Serialise *diff* as a JSON string."""

518

changes: list[dict[str, object]] = []

519

for key, cell_diff in diff.cells.items():

520

if cell_diff.status != "unchanged":

changes.append(

{

"section": key[0],

"instrument": key[1],

525

"status": cell_diff.status,

}

)

payload: dict[str, object] = {

530

"commit_id_a": diff.commit_id_a,

531

"commit_id_b": diff.commit_id_b,

532

"sections": diff.sections,

533

"instruments": diff.instruments,

534

"changes": changes,

535

}

536

return json.dumps(payload, indent=2)

537

538

539

# ---------------------------------------------------------------------------

540

# Filter helpers

541

# ---------------------------------------------------------------------------

542

543

544

def _apply_section_filter(sections: list[str], section_filter: str | None) -> list[str]:

545

"""Return the filtered section list. ``None`` means no filter."""

546

if section_filter is None:

547

return sections

548

normalised = _normalise_section(section_filter)

549

return [s for s in sections if s == normalised]

550

551

552

def _apply_track_filter(instruments: list[str], track_filter: str | None) -> list[str]:

553

"""Return the filtered instrument list. ``None`` means no filter."""

554

if track_filter is None:

555

return instruments

556

lower = track_filter.lower().strip()

557

return [i for i in instruments if i == lower]