muse_divergence.py
python
| 1 | """Muse Divergence Engine — musical divergence between two CLI branches. |
| 2 | |
| 3 | Computes a per-dimension divergence score by comparing the file-level changes |
| 4 | each branch introduced since their common ancestor (merge base). |
| 5 | |
| 6 | Dimensions analysed |
| 7 | ------------------- |
| 8 | - ``melodic`` — lead/melody/solo/vocal files |
| 9 | - ``harmonic`` — harmony/chord/key/scale files |
| 10 | - ``rhythmic`` — beat/drum/rhythm/groove/percussion files |
| 11 | - ``structural`` — form/section/arrangement/bridge/chorus/verse files |
| 12 | - ``dynamic`` — mix/master/volume/level files |
| 13 | |
| 14 | A path is assigned to one or more dimensions by keyword matching on the |
| 15 | lowercase filename. Paths that do not match any dimension keyword are counted |
| 16 | as unclassified and excluded from individual dimension scores but may |
| 17 | contribute to the ``overall_score``. |
| 18 | |
| 19 | Score formula (per dimension) |
| 20 | ------------------------------ |
| 21 | Given the sets of paths changed on branch A (``a_dim``) and branch B |
| 22 | (``b_dim``) since the merge base for a specific dimension: |
| 23 | |
| 24 | score = |symmetric_difference(a_dim, b_dim)| / |union(a_dim, b_dim)| |
| 25 | |
| 26 | Score 0.0 = both branches changed exactly the same files in this dimension. |
| 27 | Score 1.0 = no overlap — completely diverged. |
| 28 | |
| 29 | Boundary rules |
| 30 | -------------- |
| 31 | - Must NOT import StateStore, executor, MCP tools, or handlers. |
| 32 | - Must NOT import ``muse_merge_base`` (variation-level LCA) — use |
| 33 | ``merge_engine.find_merge_base`` (commit-level LCA) for CLI branches. |
| 34 | - May import ``muse_cli.{db, merge_engine, models}``. |
| 35 | """ |
| 36 | |
| 37 | from __future__ import annotations |
| 38 | |
| 39 | import logging |
| 40 | from collections import deque |
| 41 | from dataclasses import dataclass |
| 42 | from enum import Enum |
| 43 | |
| 44 | from sqlalchemy.ext.asyncio import AsyncSession |
| 45 | from sqlalchemy.future import select |
| 46 | |
| 47 | from maestro.muse_cli.db import get_commit_snapshot_manifest |
| 48 | from maestro.muse_cli.merge_engine import find_merge_base |
| 49 | from maestro.muse_cli.models import MuseCliCommit |
| 50 | |
| 51 | logger = logging.getLogger(__name__) |
| 52 | |
| 53 | # --------------------------------------------------------------------------- |
| 54 | # Constants |
| 55 | # --------------------------------------------------------------------------- |
| 56 | |
| 57 | ALL_DIMENSIONS: tuple[str, ...] = ( |
| 58 | "melodic", |
| 59 | "harmonic", |
| 60 | "rhythmic", |
| 61 | "structural", |
| 62 | "dynamic", |
| 63 | ) |
| 64 | |
| 65 | #: Lowercase keyword patterns used to classify file paths into musical dimensions. |
| 66 | _DIMENSION_PATTERNS: dict[str, tuple[str, ...]] = { |
| 67 | "melodic": ("melody", "lead", "solo", "vocal"), |
| 68 | "harmonic": ("harm", "chord", "key", "scale"), |
| 69 | "rhythmic": ("beat", "drum", "rhythm", "groove", "perc"), |
| 70 | "structural": ("struct", "form", "section", "bridge", "chorus", "verse", "intro", "outro"), |
| 71 | "dynamic": ("mix", "master", "volume", "level", "dyn"), |
| 72 | } |
| 73 | |
| 74 | |
| 75 | # --------------------------------------------------------------------------- |
| 76 | # Result types |
| 77 | # --------------------------------------------------------------------------- |
| 78 | |
| 79 | |
| 80 | class DivergenceLevel(str, Enum): |
| 81 | """Qualitative label for a per-dimension or overall divergence score. |
| 82 | |
| 83 | Thresholds |
| 84 | ---------- |
| 85 | - ``NONE`` — score < 0.15 |
| 86 | - ``LOW`` — 0.15 ≤ score < 0.40 |
| 87 | - ``MED`` — 0.40 ≤ score < 0.70 |
| 88 | - ``HIGH`` — score ≥ 0.70 |
| 89 | """ |
| 90 | |
| 91 | NONE = "none" |
| 92 | LOW = "low" |
| 93 | MED = "med" |
| 94 | HIGH = "high" |
| 95 | |
| 96 | |
| 97 | @dataclass(frozen=True) |
| 98 | class DimensionDivergence: |
| 99 | """Divergence score and description for a single musical dimension. |
| 100 | |
| 101 | Attributes: |
| 102 | dimension: Dimension name (e.g. ``"melodic"``). |
| 103 | level: Qualitative divergence level. |
| 104 | score: Normalised divergence score in [0.0, 1.0]. |
| 105 | description: Human-readable divergence summary. |
| 106 | branch_a_summary: How many files in this dimension changed on branch A. |
| 107 | branch_b_summary: How many files in this dimension changed on branch B. |
| 108 | """ |
| 109 | |
| 110 | dimension: str |
| 111 | level: DivergenceLevel |
| 112 | score: float |
| 113 | description: str |
| 114 | branch_a_summary: str |
| 115 | branch_b_summary: str |
| 116 | |
| 117 | |
| 118 | @dataclass(frozen=True) |
| 119 | class MuseDivergenceResult: |
| 120 | """Full musical divergence report between two CLI branches. |
| 121 | |
| 122 | Attributes: |
| 123 | branch_a: Name of the first branch. |
| 124 | branch_b: Name of the second branch. |
| 125 | common_ancestor: Commit ID of the merge base, or ``None`` if disjoint. |
| 126 | dimensions: Per-dimension divergence results. |
| 127 | overall_score: Mean of all per-dimension scores in [0.0, 1.0]. |
| 128 | """ |
| 129 | |
| 130 | branch_a: str |
| 131 | branch_b: str |
| 132 | common_ancestor: str | None |
| 133 | dimensions: tuple[DimensionDivergence, ...] |
| 134 | overall_score: float |
| 135 | |
| 136 | |
| 137 | # --------------------------------------------------------------------------- |
| 138 | # Pure helpers |
| 139 | # --------------------------------------------------------------------------- |
| 140 | |
| 141 | |
| 142 | def classify_path(path: str) -> set[str]: |
| 143 | """Return the set of dimensions this file path belongs to. |
| 144 | |
| 145 | Matching is case-insensitive and keyword-based. A single path may belong |
| 146 | to multiple dimensions (e.g. ``"vocal_melody.mid"`` → ``melodic``). |
| 147 | |
| 148 | Args: |
| 149 | path: POSIX-style relative file path from a snapshot manifest. |
| 150 | |
| 151 | Returns: |
| 152 | Set of dimension names that the path matches. Empty set if unclassified. |
| 153 | """ |
| 154 | lower = path.lower() |
| 155 | return { |
| 156 | dim |
| 157 | for dim, patterns in _DIMENSION_PATTERNS.items() |
| 158 | if any(pat in lower for pat in patterns) |
| 159 | } |
| 160 | |
| 161 | |
| 162 | def score_to_level(score: float) -> DivergenceLevel: |
| 163 | """Map a numeric divergence score to a qualitative :class:`DivergenceLevel`. |
| 164 | |
| 165 | Args: |
| 166 | score: Normalised score in [0.0, 1.0]. |
| 167 | |
| 168 | Returns: |
| 169 | The appropriate :class:`DivergenceLevel` enum member. |
| 170 | """ |
| 171 | if score < 0.15: |
| 172 | return DivergenceLevel.NONE |
| 173 | if score < 0.40: |
| 174 | return DivergenceLevel.LOW |
| 175 | if score < 0.70: |
| 176 | return DivergenceLevel.MED |
| 177 | return DivergenceLevel.HIGH |
| 178 | |
| 179 | |
| 180 | def compute_dimension_divergence( |
| 181 | dimension: str, |
| 182 | branch_a_changed: set[str], |
| 183 | branch_b_changed: set[str], |
| 184 | ) -> DimensionDivergence: |
| 185 | """Compute divergence for a single musical dimension. |
| 186 | |
| 187 | Score = ``|symmetric_diff| / |union|`` over paths in *dimension*: |
| 188 | |
| 189 | - 0.0 → both branches changed exactly the same files. |
| 190 | - 1.0 → no overlap — completely diverged. |
| 191 | |
| 192 | Args: |
| 193 | dimension: Dimension name (one of :data:`ALL_DIMENSIONS`). |
| 194 | branch_a_changed: Paths changed on branch A since the merge base. |
| 195 | branch_b_changed: Paths changed on branch B since the merge base. |
| 196 | |
| 197 | Returns: |
| 198 | A :class:`DimensionDivergence` with score, level, and human summary. |
| 199 | """ |
| 200 | def _filter(paths: set[str]) -> set[str]: |
| 201 | return {p for p in paths if dimension in classify_path(p)} |
| 202 | |
| 203 | a_dim = _filter(branch_a_changed) |
| 204 | b_dim = _filter(branch_b_changed) |
| 205 | |
| 206 | union = a_dim | b_dim |
| 207 | sym_diff = a_dim.symmetric_difference(b_dim) |
| 208 | total = len(union) |
| 209 | |
| 210 | if total == 0: |
| 211 | score = 0.0 |
| 212 | desc = f"No {dimension} changes on either branch." |
| 213 | else: |
| 214 | score = len(sym_diff) / total |
| 215 | if score < 0.15: |
| 216 | desc = f"Both branches made similar {dimension} changes." |
| 217 | elif score < 0.40: |
| 218 | desc = f"Minor {dimension} divergence — mostly aligned." |
| 219 | elif score < 0.70: |
| 220 | desc = f"Moderate {dimension} divergence — different directions." |
| 221 | else: |
| 222 | desc = f"High {dimension} divergence — branches took different creative paths." |
| 223 | |
| 224 | level = score_to_level(score) |
| 225 | return DimensionDivergence( |
| 226 | dimension=dimension, |
| 227 | level=level, |
| 228 | score=round(score, 4), |
| 229 | description=desc, |
| 230 | branch_a_summary=f"{len(a_dim)} {dimension} file(s) changed", |
| 231 | branch_b_summary=f"{len(b_dim)} {dimension} file(s) changed", |
| 232 | ) |
| 233 | |
| 234 | |
| 235 | # --------------------------------------------------------------------------- |
| 236 | # Async DB helpers |
| 237 | # --------------------------------------------------------------------------- |
| 238 | |
| 239 | |
| 240 | async def get_branch_head_commit_id( |
| 241 | session: AsyncSession, |
| 242 | repo_id: str, |
| 243 | branch: str, |
| 244 | ) -> str | None: |
| 245 | """Return the most recent commit ID on *branch* for *repo_id*. |
| 246 | |
| 247 | Args: |
| 248 | session: Open async DB session. |
| 249 | repo_id: Repository identifier (from ``.muse/repo.json``). |
| 250 | branch: Branch name. |
| 251 | |
| 252 | Returns: |
| 253 | Commit ID string, or ``None`` if the branch has no commits. |
| 254 | """ |
| 255 | result = await session.execute( |
| 256 | select(MuseCliCommit.commit_id) |
| 257 | .where( |
| 258 | MuseCliCommit.repo_id == repo_id, |
| 259 | MuseCliCommit.branch == branch, |
| 260 | ) |
| 261 | .order_by(MuseCliCommit.committed_at.desc()) |
| 262 | .limit(1) |
| 263 | ) |
| 264 | return result.scalar_one_or_none() |
| 265 | |
| 266 | |
| 267 | async def collect_changed_paths_since( |
| 268 | session: AsyncSession, |
| 269 | tip_commit_id: str, |
| 270 | base_commit_id: str | None, |
| 271 | ) -> set[str]: |
| 272 | """Collect all file paths changed from *base_commit_id* to *tip_commit_id*. |
| 273 | |
| 274 | Loads the snapshot manifests at both ends and returns the union of: |
| 275 | - Paths added (in tip but not base). |
| 276 | - Paths deleted (in base but not tip). |
| 277 | - Paths modified (in both but with different ``object_id``). |
| 278 | |
| 279 | When *base_commit_id* is ``None`` (disjoint histories), all paths in |
| 280 | *tip_commit_id*'s snapshot are returned. |
| 281 | |
| 282 | Args: |
| 283 | session: Open async DB session. |
| 284 | tip_commit_id: Branch HEAD commit ID. |
| 285 | base_commit_id: Merge-base commit ID, or ``None``. |
| 286 | |
| 287 | Returns: |
| 288 | Set of POSIX paths that changed between base and tip. |
| 289 | """ |
| 290 | tip_manifest = await get_commit_snapshot_manifest(session, tip_commit_id) or {} |
| 291 | base_manifest: dict[str, str] = {} |
| 292 | if base_commit_id: |
| 293 | base_manifest = await get_commit_snapshot_manifest(session, base_commit_id) or {} |
| 294 | |
| 295 | base_paths = set(base_manifest) |
| 296 | tip_paths = set(tip_manifest) |
| 297 | |
| 298 | changed: set[str] = set() |
| 299 | changed |= tip_paths - base_paths # added |
| 300 | changed |= base_paths - tip_paths # deleted |
| 301 | for path in base_paths & tip_paths: |
| 302 | if base_manifest[path] != tip_manifest[path]: |
| 303 | changed.add(path) # modified |
| 304 | |
| 305 | return changed |
| 306 | |
| 307 | |
| 308 | # --------------------------------------------------------------------------- |
| 309 | # Public API |
| 310 | # --------------------------------------------------------------------------- |
| 311 | |
| 312 | |
| 313 | async def compute_divergence( |
| 314 | session: AsyncSession, |
| 315 | *, |
| 316 | repo_id: str, |
| 317 | branch_a: str, |
| 318 | branch_b: str, |
| 319 | since: str | None = None, |
| 320 | dimensions: list[str] | None = None, |
| 321 | ) -> MuseDivergenceResult: |
| 322 | """Compute musical divergence between two CLI branches. |
| 323 | |
| 324 | Finds the common ancestor (merge base), collects file changes since the |
| 325 | base on each branch, and computes a per-dimension divergence score. |
| 326 | |
| 327 | Args: |
| 328 | session: Open async DB session. |
| 329 | repo_id: Repository ID (from ``.muse/repo.json``). |
| 330 | branch_a: First branch name. |
| 331 | branch_b: Second branch name. |
| 332 | since: Common ancestor commit ID override (auto-detected if ``None``). |
| 333 | dimensions: Dimensions to analyse (default: all in :data:`ALL_DIMENSIONS`). |
| 334 | |
| 335 | Returns: |
| 336 | A :class:`MuseDivergenceResult` with per-dimension scores and |
| 337 | the resolved common ancestor. |
| 338 | |
| 339 | Raises: |
| 340 | ValueError: If *branch_a* or *branch_b* has no commits. |
| 341 | """ |
| 342 | dims: list[str] = list(dimensions) if dimensions else list(ALL_DIMENSIONS) |
| 343 | |
| 344 | # ── Resolve branch head commits ────────────────────────────────────── |
| 345 | a_head = await get_branch_head_commit_id(session, repo_id, branch_a) |
| 346 | if a_head is None: |
| 347 | raise ValueError( |
| 348 | f"Branch '{branch_a}' has no commits in repo '{repo_id}'." |
| 349 | ) |
| 350 | b_head = await get_branch_head_commit_id(session, repo_id, branch_b) |
| 351 | if b_head is None: |
| 352 | raise ValueError( |
| 353 | f"Branch '{branch_b}' has no commits in repo '{repo_id}'." |
| 354 | ) |
| 355 | |
| 356 | # ── Find or use provided common ancestor ───────────────────────────── |
| 357 | base_commit_id: str | None = since |
| 358 | if base_commit_id is None: |
| 359 | base_commit_id = await find_merge_base(session, a_head, b_head) |
| 360 | |
| 361 | logger.info( |
| 362 | "✅ muse divergence: %r vs %r, base=%s", |
| 363 | branch_a, |
| 364 | branch_b, |
| 365 | base_commit_id[:8] if base_commit_id else "none", |
| 366 | ) |
| 367 | |
| 368 | # ── Collect changed paths since merge base ─────────────────────────── |
| 369 | a_changed = await collect_changed_paths_since(session, a_head, base_commit_id) |
| 370 | b_changed = await collect_changed_paths_since(session, b_head, base_commit_id) |
| 371 | |
| 372 | # ── Per-dimension divergence ───────────────────────────────────────── |
| 373 | divergences = tuple( |
| 374 | compute_dimension_divergence(dim, a_changed, b_changed) |
| 375 | for dim in dims |
| 376 | ) |
| 377 | |
| 378 | overall = ( |
| 379 | round(sum(d.score for d in divergences) / len(divergences), 4) |
| 380 | if divergences |
| 381 | else 0.0 |
| 382 | ) |
| 383 | |
| 384 | return MuseDivergenceResult( |
| 385 | branch_a=branch_a, |
| 386 | branch_b=branch_b, |
| 387 | common_ancestor=base_commit_id, |
| 388 | dimensions=divergences, |
| 389 | overall_score=overall, |
| 390 | ) |