cgcardona / muse public
muse_divergence.py python
390 lines 12.7 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """Muse Divergence Engine — musical divergence between two CLI branches.
2
3 Computes a per-dimension divergence score by comparing the file-level changes
4 each branch introduced since their common ancestor (merge base).
5
6 Dimensions analysed
7 -------------------
8 - ``melodic`` — lead/melody/solo/vocal files
9 - ``harmonic`` — harmony/chord/key/scale files
10 - ``rhythmic`` — beat/drum/rhythm/groove/percussion files
11 - ``structural`` — form/section/arrangement/bridge/chorus/verse files
12 - ``dynamic`` — mix/master/volume/level files
13
14 A path is assigned to one or more dimensions by keyword matching on the
15 lowercase filename. Paths that do not match any dimension keyword are counted
16 as unclassified and excluded from individual dimension scores but may
17 contribute to the ``overall_score``.
18
19 Score formula (per dimension)
20 ------------------------------
21 Given the sets of paths changed on branch A (``a_dim``) and branch B
22 (``b_dim``) since the merge base for a specific dimension:
23
24 score = |symmetric_difference(a_dim, b_dim)| / |union(a_dim, b_dim)|
25
26 Score 0.0 = both branches changed exactly the same files in this dimension.
27 Score 1.0 = no overlap — completely diverged.
28
29 Boundary rules
30 --------------
31 - Must NOT import StateStore, executor, MCP tools, or handlers.
32 - Must NOT import ``muse_merge_base`` (variation-level LCA) — use
33 ``merge_engine.find_merge_base`` (commit-level LCA) for CLI branches.
34 - May import ``muse_cli.{db, merge_engine, models}``.
35 """
36
37 from __future__ import annotations
38
39 import logging
40 from collections import deque
41 from dataclasses import dataclass
42 from enum import Enum
43
44 from sqlalchemy.ext.asyncio import AsyncSession
45 from sqlalchemy.future import select
46
47 from maestro.muse_cli.db import get_commit_snapshot_manifest
48 from maestro.muse_cli.merge_engine import find_merge_base
49 from maestro.muse_cli.models import MuseCliCommit
50
51 logger = logging.getLogger(__name__)
52
53 # ---------------------------------------------------------------------------
54 # Constants
55 # ---------------------------------------------------------------------------
56
57 ALL_DIMENSIONS: tuple[str, ...] = (
58 "melodic",
59 "harmonic",
60 "rhythmic",
61 "structural",
62 "dynamic",
63 )
64
65 #: Lowercase keyword patterns used to classify file paths into musical dimensions.
66 _DIMENSION_PATTERNS: dict[str, tuple[str, ...]] = {
67 "melodic": ("melody", "lead", "solo", "vocal"),
68 "harmonic": ("harm", "chord", "key", "scale"),
69 "rhythmic": ("beat", "drum", "rhythm", "groove", "perc"),
70 "structural": ("struct", "form", "section", "bridge", "chorus", "verse", "intro", "outro"),
71 "dynamic": ("mix", "master", "volume", "level", "dyn"),
72 }
73
74
75 # ---------------------------------------------------------------------------
76 # Result types
77 # ---------------------------------------------------------------------------
78
79
80 class DivergenceLevel(str, Enum):
81 """Qualitative label for a per-dimension or overall divergence score.
82
83 Thresholds
84 ----------
85 - ``NONE`` — score < 0.15
86 - ``LOW`` — 0.15 ≤ score < 0.40
87 - ``MED`` — 0.40 ≤ score < 0.70
88 - ``HIGH`` — score ≥ 0.70
89 """
90
91 NONE = "none"
92 LOW = "low"
93 MED = "med"
94 HIGH = "high"
95
96
97 @dataclass(frozen=True)
98 class DimensionDivergence:
99 """Divergence score and description for a single musical dimension.
100
101 Attributes:
102 dimension: Dimension name (e.g. ``"melodic"``).
103 level: Qualitative divergence level.
104 score: Normalised divergence score in [0.0, 1.0].
105 description: Human-readable divergence summary.
106 branch_a_summary: How many files in this dimension changed on branch A.
107 branch_b_summary: How many files in this dimension changed on branch B.
108 """
109
110 dimension: str
111 level: DivergenceLevel
112 score: float
113 description: str
114 branch_a_summary: str
115 branch_b_summary: str
116
117
118 @dataclass(frozen=True)
119 class MuseDivergenceResult:
120 """Full musical divergence report between two CLI branches.
121
122 Attributes:
123 branch_a: Name of the first branch.
124 branch_b: Name of the second branch.
125 common_ancestor: Commit ID of the merge base, or ``None`` if disjoint.
126 dimensions: Per-dimension divergence results.
127 overall_score: Mean of all per-dimension scores in [0.0, 1.0].
128 """
129
130 branch_a: str
131 branch_b: str
132 common_ancestor: str | None
133 dimensions: tuple[DimensionDivergence, ...]
134 overall_score: float
135
136
137 # ---------------------------------------------------------------------------
138 # Pure helpers
139 # ---------------------------------------------------------------------------
140
141
142 def classify_path(path: str) -> set[str]:
143 """Return the set of dimensions this file path belongs to.
144
145 Matching is case-insensitive and keyword-based. A single path may belong
146 to multiple dimensions (e.g. ``"vocal_melody.mid"`` → ``melodic``).
147
148 Args:
149 path: POSIX-style relative file path from a snapshot manifest.
150
151 Returns:
152 Set of dimension names that the path matches. Empty set if unclassified.
153 """
154 lower = path.lower()
155 return {
156 dim
157 for dim, patterns in _DIMENSION_PATTERNS.items()
158 if any(pat in lower for pat in patterns)
159 }
160
161
162 def score_to_level(score: float) -> DivergenceLevel:
163 """Map a numeric divergence score to a qualitative :class:`DivergenceLevel`.
164
165 Args:
166 score: Normalised score in [0.0, 1.0].
167
168 Returns:
169 The appropriate :class:`DivergenceLevel` enum member.
170 """
171 if score < 0.15:
172 return DivergenceLevel.NONE
173 if score < 0.40:
174 return DivergenceLevel.LOW
175 if score < 0.70:
176 return DivergenceLevel.MED
177 return DivergenceLevel.HIGH
178
179
180 def compute_dimension_divergence(
181 dimension: str,
182 branch_a_changed: set[str],
183 branch_b_changed: set[str],
184 ) -> DimensionDivergence:
185 """Compute divergence for a single musical dimension.
186
187 Score = ``|symmetric_diff| / |union|`` over paths in *dimension*:
188
189 - 0.0 → both branches changed exactly the same files.
190 - 1.0 → no overlap — completely diverged.
191
192 Args:
193 dimension: Dimension name (one of :data:`ALL_DIMENSIONS`).
194 branch_a_changed: Paths changed on branch A since the merge base.
195 branch_b_changed: Paths changed on branch B since the merge base.
196
197 Returns:
198 A :class:`DimensionDivergence` with score, level, and human summary.
199 """
200 def _filter(paths: set[str]) -> set[str]:
201 return {p for p in paths if dimension in classify_path(p)}
202
203 a_dim = _filter(branch_a_changed)
204 b_dim = _filter(branch_b_changed)
205
206 union = a_dim | b_dim
207 sym_diff = a_dim.symmetric_difference(b_dim)
208 total = len(union)
209
210 if total == 0:
211 score = 0.0
212 desc = f"No {dimension} changes on either branch."
213 else:
214 score = len(sym_diff) / total
215 if score < 0.15:
216 desc = f"Both branches made similar {dimension} changes."
217 elif score < 0.40:
218 desc = f"Minor {dimension} divergence — mostly aligned."
219 elif score < 0.70:
220 desc = f"Moderate {dimension} divergence — different directions."
221 else:
222 desc = f"High {dimension} divergence — branches took different creative paths."
223
224 level = score_to_level(score)
225 return DimensionDivergence(
226 dimension=dimension,
227 level=level,
228 score=round(score, 4),
229 description=desc,
230 branch_a_summary=f"{len(a_dim)} {dimension} file(s) changed",
231 branch_b_summary=f"{len(b_dim)} {dimension} file(s) changed",
232 )
233
234
235 # ---------------------------------------------------------------------------
236 # Async DB helpers
237 # ---------------------------------------------------------------------------
238
239
240 async def get_branch_head_commit_id(
241 session: AsyncSession,
242 repo_id: str,
243 branch: str,
244 ) -> str | None:
245 """Return the most recent commit ID on *branch* for *repo_id*.
246
247 Args:
248 session: Open async DB session.
249 repo_id: Repository identifier (from ``.muse/repo.json``).
250 branch: Branch name.
251
252 Returns:
253 Commit ID string, or ``None`` if the branch has no commits.
254 """
255 result = await session.execute(
256 select(MuseCliCommit.commit_id)
257 .where(
258 MuseCliCommit.repo_id == repo_id,
259 MuseCliCommit.branch == branch,
260 )
261 .order_by(MuseCliCommit.committed_at.desc())
262 .limit(1)
263 )
264 return result.scalar_one_or_none()
265
266
267 async def collect_changed_paths_since(
268 session: AsyncSession,
269 tip_commit_id: str,
270 base_commit_id: str | None,
271 ) -> set[str]:
272 """Collect all file paths changed from *base_commit_id* to *tip_commit_id*.
273
274 Loads the snapshot manifests at both ends and returns the union of:
275 - Paths added (in tip but not base).
276 - Paths deleted (in base but not tip).
277 - Paths modified (in both but with different ``object_id``).
278
279 When *base_commit_id* is ``None`` (disjoint histories), all paths in
280 *tip_commit_id*'s snapshot are returned.
281
282 Args:
283 session: Open async DB session.
284 tip_commit_id: Branch HEAD commit ID.
285 base_commit_id: Merge-base commit ID, or ``None``.
286
287 Returns:
288 Set of POSIX paths that changed between base and tip.
289 """
290 tip_manifest = await get_commit_snapshot_manifest(session, tip_commit_id) or {}
291 base_manifest: dict[str, str] = {}
292 if base_commit_id:
293 base_manifest = await get_commit_snapshot_manifest(session, base_commit_id) or {}
294
295 base_paths = set(base_manifest)
296 tip_paths = set(tip_manifest)
297
298 changed: set[str] = set()
299 changed |= tip_paths - base_paths # added
300 changed |= base_paths - tip_paths # deleted
301 for path in base_paths & tip_paths:
302 if base_manifest[path] != tip_manifest[path]:
303 changed.add(path) # modified
304
305 return changed
306
307
308 # ---------------------------------------------------------------------------
309 # Public API
310 # ---------------------------------------------------------------------------
311
312
313 async def compute_divergence(
314 session: AsyncSession,
315 *,
316 repo_id: str,
317 branch_a: str,
318 branch_b: str,
319 since: str | None = None,
320 dimensions: list[str] | None = None,
321 ) -> MuseDivergenceResult:
322 """Compute musical divergence between two CLI branches.
323
324 Finds the common ancestor (merge base), collects file changes since the
325 base on each branch, and computes a per-dimension divergence score.
326
327 Args:
328 session: Open async DB session.
329 repo_id: Repository ID (from ``.muse/repo.json``).
330 branch_a: First branch name.
331 branch_b: Second branch name.
332 since: Common ancestor commit ID override (auto-detected if ``None``).
333 dimensions: Dimensions to analyse (default: all in :data:`ALL_DIMENSIONS`).
334
335 Returns:
336 A :class:`MuseDivergenceResult` with per-dimension scores and
337 the resolved common ancestor.
338
339 Raises:
340 ValueError: If *branch_a* or *branch_b* has no commits.
341 """
342 dims: list[str] = list(dimensions) if dimensions else list(ALL_DIMENSIONS)
343
344 # ── Resolve branch head commits ──────────────────────────────────────
345 a_head = await get_branch_head_commit_id(session, repo_id, branch_a)
346 if a_head is None:
347 raise ValueError(
348 f"Branch '{branch_a}' has no commits in repo '{repo_id}'."
349 )
350 b_head = await get_branch_head_commit_id(session, repo_id, branch_b)
351 if b_head is None:
352 raise ValueError(
353 f"Branch '{branch_b}' has no commits in repo '{repo_id}'."
354 )
355
356 # ── Find or use provided common ancestor ─────────────────────────────
357 base_commit_id: str | None = since
358 if base_commit_id is None:
359 base_commit_id = await find_merge_base(session, a_head, b_head)
360
361 logger.info(
362 "✅ muse divergence: %r vs %r, base=%s",
363 branch_a,
364 branch_b,
365 base_commit_id[:8] if base_commit_id else "none",
366 )
367
368 # ── Collect changed paths since merge base ───────────────────────────
369 a_changed = await collect_changed_paths_since(session, a_head, base_commit_id)
370 b_changed = await collect_changed_paths_since(session, b_head, base_commit_id)
371
372 # ── Per-dimension divergence ─────────────────────────────────────────
373 divergences = tuple(
374 compute_dimension_divergence(dim, a_changed, b_changed)
375 for dim in dims
376 )
377
378 overall = (
379 round(sum(d.score for d in divergences) / len(divergences), 4)
380 if divergences
381 else 0.0
382 )
383
384 return MuseDivergenceResult(
385 branch_a=branch_a,
386 branch_b=branch_b,
387 common_ancestor=base_commit_id,
388 dimensions=divergences,
389 overall_score=overall,
390 )