cgcardona / muse public
muse_arrange.py python
557 lines 18.5 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """Muse Arrange — arrangement map analysis for committed snapshots.
2
3 Builds an *arrangement matrix* from the file manifest of a Muse commit:
4 rows = instruments, columns = sections. Each cell records whether the
5 instrument is active in that section and, in density mode, how many bytes
6 of MIDI data it contributed (a byte-count proxy for note density).
7
8 **Path convention:**
9 Files in ``muse-work/`` that carry section metadata must follow::
10
11 <section>/<instrument>/<filename>
12
13 Examples::
14
15 intro/drums/beat.mid → section=intro, instrument=drums
16 chorus/strings/pad.mid → section=chorus, instrument=strings
17 verse/bass/line_v1.mid → section=verse, instrument=bass
18
19 Files with fewer than two path components are uncategorised and excluded
20 from the arrangement matrix.
21
22 **Outputs:**
23 - Text (``--format text``) — Unicode block-char matrix, human-readable
24 - JSON (``--format json``) — structured dict, AI-agent-consumable
25 - CSV (``--format csv``) — spreadsheet-ready rows
26
27 **Compare mode (``--compare commit-a commit-b``):**
28 Produces an :class:`ArrangementDiff` showing which ``(section, instrument)``
29 cells were added, removed, or unchanged between the two commits.
30
31 Why this matters for AI orchestration:
32 An AI agent can call ``muse arrange --format json HEAD`` before generating
33 a new string part to see exactly which sections already have strings,
34 preventing doubling mistakes and enabling coherent orchestration decisions.
35 """
36 from __future__ import annotations
37
38 import csv
39 import io
40 import json
41 import logging
42 from dataclasses import dataclass, field
43 from typing import Literal
44
45 logger = logging.getLogger(__name__)
46
47 # ---------------------------------------------------------------------------
48 # Path parsing
49 # ---------------------------------------------------------------------------
50
51 _SECTION_ORDER: list[str] = [
52 "intro", "verse", "prechorus", "pre-chorus", "prechoruse",
53 "chorus", "bridge", "outro", "breakdown", "drop", "hook",
54 ]
55
56 _SECTION_ALIASES: dict[str, str] = {
57 "pre-chorus": "prechorus",
58 "prechoruse": "prechorus",
59 "pre_chorus": "prechorus",
60 }
61
62
63 def _normalise_section(raw: str) -> str:
64 """Lower-case and apply known aliases to section names."""
65 lower = raw.lower().strip()
66 return _SECTION_ALIASES.get(lower, lower)
67
68
69 def extract_section_instrument(rel_path: str) -> tuple[str, str] | None:
70 """Parse *rel_path* (relative to ``muse-work/``) into ``(section, instrument)``.
71
72 Returns ``None`` when the path does not have at least two directory
73 components (i.e. it cannot be mapped to a section + instrument pair).
74
75 The path is expected to follow the canonical convention::
76
77 <section>/<instrument>/<filename>
78
79 Only the first two components are used; any deeper nesting is ignored.
80 The section name is normalised via :func:`_normalise_section`.
81
82 Examples::
83
84 "intro/drums/beat.mid" → ("intro", "drums")
85 "chorus/strings/pad.mid" → ("chorus", "strings")
86 "bass/riff.mid" → None # only one directory component
87 "solo.mid" → None # flat file
88 """
89 parts = rel_path.replace("\\", "/").split("/")
90 # Need at least section + instrument + filename (≥ 3 parts)
91 # but also accept section + filename (2 parts) where the first is
92 # the section and the second is the filename (not an instrument — skip).
93 # We require exactly ≥ 3 parts: parts[0]=section, parts[1]=instrument
94 if len(parts) < 3:
95 return None
96 section = _normalise_section(parts[0])
97 instrument = parts[1].lower().strip()
98 if not section or not instrument:
99 return None
100 return section, instrument
101
102
103 # ---------------------------------------------------------------------------
104 # Core data types
105 # ---------------------------------------------------------------------------
106
107
108 @dataclass(frozen=True)
109 class ArrangementCell:
110 """Single cell in the arrangement matrix: one (section, instrument) pair.
111
112 ``active`` is ``True`` when at least one file exists for this pair.
113 ``file_count`` counts distinct files (useful when multiple takes exist).
114 ``total_bytes`` sums the object sizes — used as a note-density proxy in
115 ``--density`` mode.
116 """
117
118 section: str
119 instrument: str
120 active: bool
121 file_count: int = 0
122 total_bytes: int = 0
123
124 @property
125 def density_score(self) -> float:
126 """Normalised byte density — raw ``total_bytes`` exposed for callers."""
127 return float(self.total_bytes)
128
129
130 @dataclass
131 class ArrangementMatrix:
132 """Full arrangement matrix for a single commit.
133
134 Attributes:
135 commit_id: The 64-char commit SHA used to build this matrix.
136 sections: Ordered list of section names (columns).
137 instruments: Ordered list of instrument names (rows).
138 cells: Mapping ``(section, instrument) → ArrangementCell``.
139 """
140
141 commit_id: str
142 sections: list[str]
143 instruments: list[str]
144 cells: dict[tuple[str, str], ArrangementCell] = field(default_factory=dict)
145
146 def get_cell(self, section: str, instrument: str) -> ArrangementCell:
147 """Return the cell for *(section, instrument)*, defaulting to inactive."""
148 key = (section, instrument)
149 return self.cells.get(
150 key, ArrangementCell(section=section, instrument=instrument, active=False)
151 )
152
153
154 @dataclass(frozen=True)
155 class ArrangementDiffCell:
156 """Change status of a single cell between two commits.
157
158 ``status`` is one of:
159 - ``"added"`` — active in commit-b, absent in commit-a
160 - ``"removed"`` — active in commit-a, absent in commit-b
161 - ``"unchanged"`` — same active/inactive state in both commits
162 """
163
164 section: str
165 instrument: str
166 status: Literal["added", "removed", "unchanged"]
167 cell_a: ArrangementCell
168 cell_b: ArrangementCell
169
170
171 @dataclass
172 class ArrangementDiff:
173 """Diff of two arrangement matrices (commit-a → commit-b).
174
175 Attributes:
176 commit_id_a: Commit SHA for the baseline (left side).
177 commit_id_b: Commit SHA for the target (right side).
178 sections: Union of section names across both matrices.
179 instruments: Union of instrument names across both matrices.
180 cells: Mapping ``(section, instrument) → ArrangementDiffCell``.
181 """
182
183 commit_id_a: str
184 commit_id_b: str
185 sections: list[str]
186 instruments: list[str]
187 cells: dict[tuple[str, str], ArrangementDiffCell] = field(default_factory=dict)
188
189
190 # ---------------------------------------------------------------------------
191 # Matrix builder
192 # ---------------------------------------------------------------------------
193
194
195 def build_arrangement_matrix(
196 commit_id: str,
197 manifest: dict[str, str],
198 object_sizes: dict[str, int] | None = None,
199 ) -> ArrangementMatrix:
200 """Build an :class:`ArrangementMatrix` from a snapshot *manifest*.
201
202 Parameters
203 ----------
204 commit_id:
205 The commit SHA the manifest was taken from (stored on the matrix
206 for display and JSON serialisation).
207 manifest:
208 A ``{rel_path: object_id}`` mapping as returned by
209 :func:`maestro.muse_cli.db.get_commit_snapshot_manifest`.
210 Paths are relative to ``muse-work/``.
211 object_sizes:
212 Optional ``{object_id: size_bytes}`` map. When provided, each
213 cell accumulates the byte sizes of its files so that
214 ``--density`` mode can report them. Missing entries default to 0.
215
216 Returns
217 -------
218 ArrangementMatrix
219 A matrix with sections and instruments ordered: first by the
220 canonical section ordering defined in ``_SECTION_ORDER``, with any
221 unknown sections appended alphabetically. Instruments are sorted
222 alphabetically.
223 """
224 sizes = object_sizes or {}
225
226 # Accumulate counts and byte totals per (section, instrument) cell.
227 counts: dict[tuple[str, str], int] = {}
228 bytes_: dict[tuple[str, str], int] = {}
229
230 for rel_path, object_id in manifest.items():
231 parsed = extract_section_instrument(rel_path)
232 if parsed is None:
233 continue
234 section, instrument = parsed
235 key = (section, instrument)
236 counts[key] = counts.get(key, 0) + 1
237 bytes_[key] = bytes_.get(key, 0) + sizes.get(object_id, 0)
238
239 # Derive ordered section and instrument lists.
240 all_sections = {k[0] for k in counts}
241 all_instruments = {k[1] for k in counts}
242
243 sections = _order_sections(all_sections)
244 instruments = sorted(all_instruments)
245
246 cells: dict[tuple[str, str], ArrangementCell] = {}
247 for key, count in counts.items():
248 section, instrument = key
249 cells[key] = ArrangementCell(
250 section=section,
251 instrument=instrument,
252 active=True,
253 file_count=count,
254 total_bytes=bytes_.get(key, 0),
255 )
256
257 return ArrangementMatrix(
258 commit_id=commit_id,
259 sections=sections,
260 instruments=instruments,
261 cells=cells,
262 )
263
264
265 def _order_sections(sections: set[str]) -> list[str]:
266 """Order sections by canonical musical position, with unknowns appended."""
267 known_order = [s for s in _SECTION_ORDER if s in sections]
268 unknown = sorted(sections - set(_SECTION_ORDER))
269 return known_order + unknown
270
271
272 # ---------------------------------------------------------------------------
273 # Renderers — text
274 # ---------------------------------------------------------------------------
275
276 _ACTIVE_CHAR = "████"
277 _INACTIVE_CHAR = "░░░░"
278
279
280 def render_matrix_text(
281 matrix: ArrangementMatrix,
282 *,
283 density: bool = False,
284 section_filter: str | None = None,
285 track_filter: str | None = None,
286 ) -> str:
287 """Render *matrix* as a human-readable text table.
288
289 Each row is an instrument; each column is a section. Active cells
290 show ``████``; inactive cells show ``░░░░``. In ``density`` mode each
291 cell shows the total byte size instead.
292
293 Parameters
294 ----------
295 density:
296 When ``True``, show byte totals per cell instead of block chars.
297 section_filter:
298 If set, include only the named section (case-insensitive).
299 track_filter:
300 If set, include only the named instrument/track (case-insensitive).
301 """
302 sections = _apply_section_filter(matrix.sections, section_filter)
303 instruments = _apply_track_filter(matrix.instruments, track_filter)
304
305 if not sections or not instruments:
306 return f"Arrangement Map — commit {matrix.commit_id[:8]}\n\n(no data for the given filters)"
307
308 short_id = matrix.commit_id[:8]
309 lines: list[str] = [f"Arrangement Map — commit {short_id}", ""]
310
311 # Column widths
312 instr_width = max((len(i) for i in instruments), default=8) + 2
313 col_width = max(max((len(s) for s in sections), default=4), 4) + 2
314
315 # Header row
316 header = " " * instr_width
317 for section in sections:
318 header += section.capitalize().center(col_width)
319 lines.append(header)
320
321 # Data rows
322 for instrument in instruments:
323 row = instrument.ljust(instr_width)
324 for section in sections:
325 cell = matrix.get_cell(section, instrument)
326 if density:
327 cell_text = f"{cell.total_bytes:,}" if cell.active else "-"
328 else:
329 cell_text = _ACTIVE_CHAR if cell.active else _INACTIVE_CHAR
330 row += cell_text.center(col_width)
331 lines.append(row)
332
333 return "\n".join(lines)
334
335
336 def render_matrix_json(
337 matrix: ArrangementMatrix,
338 *,
339 density: bool = False,
340 section_filter: str | None = None,
341 track_filter: str | None = None,
342 ) -> str:
343 """Serialise *matrix* as a JSON string suitable for AI agent consumption."""
344 sections = _apply_section_filter(matrix.sections, section_filter)
345 instruments = _apply_track_filter(matrix.instruments, track_filter)
346
347 matrix_data: dict[str, dict[str, object]] = {}
348 for instrument in instruments:
349 matrix_data[instrument] = {}
350 for section in sections:
351 cell = matrix.get_cell(section, instrument)
352 if density:
353 matrix_data[instrument][section] = {
354 "active": cell.active,
355 "file_count": cell.file_count,
356 "total_bytes": cell.total_bytes,
357 }
358 else:
359 matrix_data[instrument][section] = cell.active
360
361 payload: dict[str, object] = {
362 "commit_id": matrix.commit_id,
363 "sections": sections,
364 "instruments": instruments,
365 "arrangement": matrix_data,
366 }
367 return json.dumps(payload, indent=2)
368
369
370 def render_matrix_csv(
371 matrix: ArrangementMatrix,
372 *,
373 density: bool = False,
374 section_filter: str | None = None,
375 track_filter: str | None = None,
376 ) -> str:
377 """Serialise *matrix* as CSV with instrument as the first column."""
378 sections = _apply_section_filter(matrix.sections, section_filter)
379 instruments = _apply_track_filter(matrix.instruments, track_filter)
380
381 buf = io.StringIO()
382 writer = csv.writer(buf)
383
384 header = ["instrument"] + sections
385 writer.writerow(header)
386
387 for instrument in instruments:
388 row: list[object] = [instrument]
389 for section in sections:
390 cell = matrix.get_cell(section, instrument)
391 if density:
392 row.append(cell.total_bytes if cell.active else 0)
393 else:
394 row.append(1 if cell.active else 0)
395 writer.writerow(row)
396
397 return buf.getvalue()
398
399
400 # ---------------------------------------------------------------------------
401 # Diff builder + renderer
402 # ---------------------------------------------------------------------------
403
404
405 def build_arrangement_diff(
406 matrix_a: ArrangementMatrix,
407 matrix_b: ArrangementMatrix,
408 ) -> ArrangementDiff:
409 """Compute a cell-by-cell diff of two arrangement matrices.
410
411 Sections and instruments are the union of both matrices' sets, ordered
412 by the canonical section order for sections and alphabetically for
413 instruments.
414 """
415 all_sections = set(matrix_a.sections) | set(matrix_b.sections)
416 all_instruments = set(matrix_a.instruments) | set(matrix_b.instruments)
417
418 sections = _order_sections(all_sections)
419 instruments = sorted(all_instruments)
420
421 diff_cells: dict[tuple[str, str], ArrangementDiffCell] = {}
422
423 for section in sections:
424 for instrument in instruments:
425 cell_a = matrix_a.get_cell(section, instrument)
426 cell_b = matrix_b.get_cell(section, instrument)
427
428 if not cell_a.active and cell_b.active:
429 status: Literal["added", "removed", "unchanged"] = "added"
430 elif cell_a.active and not cell_b.active:
431 status = "removed"
432 else:
433 status = "unchanged"
434
435 key = (section, instrument)
436 diff_cells[key] = ArrangementDiffCell(
437 section=section,
438 instrument=instrument,
439 status=status,
440 cell_a=cell_a,
441 cell_b=cell_b,
442 )
443
444 return ArrangementDiff(
445 commit_id_a=matrix_a.commit_id,
446 commit_id_b=matrix_b.commit_id,
447 sections=sections,
448 instruments=instruments,
449 cells=diff_cells,
450 )
451
452
453 _DIFF_SYMBOLS: dict[str, str] = {
454 "added": "+",
455 "removed": "-",
456 "unchanged": " ",
457 }
458
459
460 def render_diff_text(diff: ArrangementDiff) -> str:
461 """Render *diff* as a human-readable side-by-side comparison.
462
463 ``+`` = cell added in commit-b, ``-`` = cell removed, `` `` = unchanged.
464 Only rows with at least one changed cell are shown.
465 """
466 a_short = diff.commit_id_a[:8]
467 b_short = diff.commit_id_b[:8]
468 lines: list[str] = [
469 f"Arrangement Diff — {a_short} → {b_short}",
470 "",
471 ]
472
473 # Compute column widths
474 instr_width = max((len(i) for i in diff.instruments), default=8) + 2
475 col_width = max(max((len(s) for s in diff.sections), default=4), 4) + 2
476
477 header = " " * instr_width
478 for section in diff.sections:
479 header += section.capitalize().center(col_width)
480 lines.append(header)
481
482 changed_rows: list[tuple[str, bool]] = []
483 for instrument in diff.instruments:
484 row = instrument.ljust(instr_width)
485 has_change = False
486 for section in diff.sections:
487 cell_diff = diff.cells.get((section, instrument))
488 if cell_diff is None:
489 symbol = " "
490 cell_char = _INACTIVE_CHAR
491 else:
492 symbol = _DIFF_SYMBOLS[cell_diff.status]
493 if cell_diff.status == "unchanged":
494 cell_char = _ACTIVE_CHAR if cell_diff.cell_b.active else _INACTIVE_CHAR
495 elif cell_diff.status == "added":
496 cell_char = f"+{_ACTIVE_CHAR}"
497 has_change = True
498 else:
499 cell_char = f"-{_ACTIVE_CHAR}"
500 has_change = True
501 row += cell_char.center(col_width)
502 changed_rows.append((row, has_change))
503
504 # Show all rows but mark changed ones; if all unchanged, show a note.
505 any_changes = any(has for _, has in changed_rows)
506 for row, _ in changed_rows:
507 lines.append(row)
508
509 if not any_changes:
510 lines.append("")
511 lines.append("(no arrangement changes between these commits)")
512
513 return "\n".join(lines)
514
515
516 def render_diff_json(diff: ArrangementDiff) -> str:
517 """Serialise *diff* as a JSON string."""
518 changes: list[dict[str, object]] = []
519 for key, cell_diff in diff.cells.items():
520 if cell_diff.status != "unchanged":
521 changes.append(
522 {
523 "section": key[0],
524 "instrument": key[1],
525 "status": cell_diff.status,
526 }
527 )
528
529 payload: dict[str, object] = {
530 "commit_id_a": diff.commit_id_a,
531 "commit_id_b": diff.commit_id_b,
532 "sections": diff.sections,
533 "instruments": diff.instruments,
534 "changes": changes,
535 }
536 return json.dumps(payload, indent=2)
537
538
539 # ---------------------------------------------------------------------------
540 # Filter helpers
541 # ---------------------------------------------------------------------------
542
543
544 def _apply_section_filter(sections: list[str], section_filter: str | None) -> list[str]:
545 """Return the filtered section list. ``None`` means no filter."""
546 if section_filter is None:
547 return sections
548 normalised = _normalise_section(section_filter)
549 return [s for s in sections if s == normalised]
550
551
552 def _apply_track_filter(instruments: list[str], track_filter: str | None) -> list[str]:
553 """Return the filtered instrument list. ``None`` means no filter."""
554 if track_filter is None:
555 return instruments
556 lower = track_filter.lower().strip()
557 return [i for i in instruments if i == lower]