muse_rerere.py
python
| 1 | """Muse Rerere — Reuse Recorded Resolutions for musical merge conflicts. |
| 2 | |
| 3 | In parallel multi-branch Muse workflows identical merge conflicts appear |
| 4 | repeatedly (e.g. the same MIDI region modified in the same way on two |
| 5 | independent branches). rerere records conflict shapes and their resolutions |
| 6 | so they can be applied automatically on subsequent merges. |
| 7 | |
| 8 | Cache layout:: |
| 9 | |
| 10 | .muse/rr-cache/<hash>/ |
| 11 | conflict — serialised conflict fingerprint (JSON) |
| 12 | postimage — serialised resolution (JSON, written only after resolve) |
| 13 | |
| 14 | The conflict fingerprint is a normalised, transposition-independent hash |
| 15 | of the conflict shape. Two conflicts with the same structural shape but |
| 16 | different absolute pitches are treated as the same conflict so that a |
| 17 | resolution recorded in one key can be applied in another. |
| 18 | |
| 19 | Boundary rules: |
| 20 | - Must NOT import StateStore, executor, MCP tools, routes, or handlers. |
| 21 | - May import muse_merge types. |
| 22 | - All file I/O uses pathlib.Path — never open() with bare strings. |
| 23 | """ |
| 24 | from __future__ import annotations |
| 25 | |
| 26 | import hashlib |
| 27 | import json |
| 28 | import logging |
| 29 | import re |
| 30 | from pathlib import Path |
| 31 | |
| 32 | from typing_extensions import TypedDict |
| 33 | |
| 34 | from maestro.contracts.json_types import JSONObject |
| 35 | |
| 36 | logger = logging.getLogger(__name__) |
| 37 | |
| 38 | |
| 39 | class ConflictDict(TypedDict): |
| 40 | """Minimal structural descriptor of a single merge conflict for rerere fingerprinting.""" |
| 41 | |
| 42 | region_id: str |
| 43 | type: str |
| 44 | description: str |
| 45 | |
| 46 | # --------------------------------------------------------------------------- |
| 47 | # Constants |
| 48 | # --------------------------------------------------------------------------- |
| 49 | |
| 50 | _RR_CACHE_DIR = ".muse/rr-cache" |
| 51 | |
| 52 | |
| 53 | # --------------------------------------------------------------------------- |
| 54 | # Internal helpers |
| 55 | # --------------------------------------------------------------------------- |
| 56 | |
| 57 | |
| 58 | def _rr_cache_root(repo_root: Path) -> Path: |
| 59 | """Return (and create if needed) the rr-cache directory.""" |
| 60 | cache = repo_root / _RR_CACHE_DIR |
| 61 | cache.mkdir(parents=True, exist_ok=True) |
| 62 | return cache |
| 63 | |
| 64 | |
| 65 | _PITCH_RE = re.compile(r"pitch=(\d+)") |
| 66 | |
| 67 | |
| 68 | def _conflict_fingerprint(conflicts: list[ConflictDict]) -> str: |
| 69 | """Compute a normalised, transposition-independent fingerprint for *conflicts*. |
| 70 | |
| 71 | Normalisation steps: |
| 72 | 1. Sort conflicts by (region_id, type, description) so that order does |
| 73 | not affect the fingerprint. |
| 74 | 2. Strip absolute pitch values from descriptions and replace them with |
| 75 | relative pitch offsets from the lowest pitch in the conflict set. |
| 76 | This makes the fingerprint invariant to transposition. |
| 77 | 3. SHA-256 the resulting JSON. |
| 78 | """ |
| 79 | all_pitches: list[int] = [] |
| 80 | for c in conflicts: |
| 81 | for m in _PITCH_RE.finditer(c.get("description", "")): |
| 82 | all_pitches.append(int(m.group(1))) |
| 83 | |
| 84 | min_pitch = min(all_pitches) if all_pitches else 0 |
| 85 | |
| 86 | def _normalise(c: ConflictDict) -> ConflictDict: |
| 87 | desc = c.get("description", "") |
| 88 | normalised_desc = _PITCH_RE.sub( |
| 89 | lambda m: f"pitch={int(m.group(1)) - min_pitch}", |
| 90 | desc, |
| 91 | ) |
| 92 | return ConflictDict( |
| 93 | region_id=c.get("region_id", ""), |
| 94 | type=c.get("type", ""), |
| 95 | description=normalised_desc, |
| 96 | ) |
| 97 | |
| 98 | normalised = sorted( |
| 99 | [_normalise(c) for c in conflicts], |
| 100 | key=lambda x: (x["region_id"], x["type"], x["description"]), |
| 101 | ) |
| 102 | blob = json.dumps(normalised, sort_keys=True, separators=(",", ":")) |
| 103 | return hashlib.sha256(blob.encode()).hexdigest() |
| 104 | |
| 105 | |
| 106 | def _hash_dir(repo_root: Path, conflict_hash: str) -> Path: |
| 107 | """Return the cache sub-directory for *conflict_hash*.""" |
| 108 | return _rr_cache_root(repo_root) / conflict_hash |
| 109 | |
| 110 | |
| 111 | # --------------------------------------------------------------------------- |
| 112 | # Public API |
| 113 | # --------------------------------------------------------------------------- |
| 114 | |
| 115 | |
| 116 | def record_conflict(repo_root: Path, conflicts: list[ConflictDict]) -> str: |
| 117 | """Record a conflict shape in the rr-cache. |
| 118 | |
| 119 | If the same conflict shape is already cached this is a no-op (idempotent). |
| 120 | |
| 121 | Args: |
| 122 | repo_root: Repository root (directory containing ``.muse/``). |
| 123 | conflicts: List of conflict dicts (keys: region_id, type, description). |
| 124 | These are typically derived from :class:`MergeConflict` instances. |
| 125 | |
| 126 | Returns: |
| 127 | The SHA-256 fingerprint hash identifying this conflict shape. |
| 128 | """ |
| 129 | h = _conflict_fingerprint(conflicts) |
| 130 | slot = _hash_dir(repo_root, h) |
| 131 | slot.mkdir(parents=True, exist_ok=True) |
| 132 | |
| 133 | conflict_file = slot / "conflict" |
| 134 | if not conflict_file.exists(): |
| 135 | conflict_file.write_text( |
| 136 | json.dumps(conflicts, indent=2), |
| 137 | encoding="utf-8", |
| 138 | ) |
| 139 | logger.info("✅ muse rerere: recorded conflict %s", h[:12]) |
| 140 | else: |
| 141 | logger.debug("muse rerere: conflict %s already cached", h[:12]) |
| 142 | |
| 143 | return h |
| 144 | |
| 145 | |
| 146 | def record_resolution( |
| 147 | repo_root: Path, |
| 148 | conflict_hash: str, |
| 149 | resolution: JSONObject, |
| 150 | ) -> None: |
| 151 | """Persist a resolution for an existing conflict fingerprint. |
| 152 | |
| 153 | Args: |
| 154 | repo_root: Repository root. |
| 155 | conflict_hash: Hash returned by :func:`record_conflict`. |
| 156 | resolution: Arbitrary resolution data (e.g. merged snapshot or |
| 157 | per-file resolution strategies). Must be JSON-serialisable. |
| 158 | |
| 159 | Raises: |
| 160 | FileNotFoundError: If *conflict_hash* is not in the cache (i.e. |
| 161 | :func:`record_conflict` was never called for it). |
| 162 | """ |
| 163 | slot = _hash_dir(repo_root, conflict_hash) |
| 164 | if not slot.is_dir(): |
| 165 | raise FileNotFoundError( |
| 166 | f"rerere: conflict hash {conflict_hash!r} not found in rr-cache" |
| 167 | ) |
| 168 | postimage = slot / "postimage" |
| 169 | postimage.write_text( |
| 170 | json.dumps(resolution, indent=2), |
| 171 | encoding="utf-8", |
| 172 | ) |
| 173 | logger.info("✅ muse rerere: recorded resolution for %s", conflict_hash[:12]) |
| 174 | |
| 175 | |
| 176 | def apply_rerere( |
| 177 | repo_root: Path, |
| 178 | conflicts: list[ConflictDict], |
| 179 | ) -> tuple[int, JSONObject | None]: |
| 180 | """Attempt to auto-apply a cached resolution for *conflicts*. |
| 181 | |
| 182 | Args: |
| 183 | repo_root: Repository root. |
| 184 | conflicts: Current merge conflicts (same format as :func:`record_conflict`). |
| 185 | |
| 186 | Returns: |
| 187 | A tuple ``(applied, resolution)`` where *applied* is the number of |
| 188 | conflicts resolved (0 or len(conflicts)) and *resolution* is the |
| 189 | cached resolution dict (or ``None`` when no cache hit exists). |
| 190 | """ |
| 191 | if not conflicts: |
| 192 | return 0, None |
| 193 | |
| 194 | h = _conflict_fingerprint(conflicts) |
| 195 | postimage = _hash_dir(repo_root, h) / "postimage" |
| 196 | if not postimage.exists(): |
| 197 | logger.debug("muse rerere: no cached resolution for %s", h[:12]) |
| 198 | return 0, None |
| 199 | |
| 200 | resolution: JSONObject = json.loads(postimage.read_text(encoding="utf-8")) |
| 201 | applied = len(conflicts) |
| 202 | logger.info( |
| 203 | "✅ muse rerere: resolved %d conflict(s) using rerere (hash %s)", |
| 204 | applied, |
| 205 | h[:12], |
| 206 | ) |
| 207 | return applied, resolution |
| 208 | |
| 209 | |
| 210 | def list_rerere(repo_root: Path) -> list[str]: |
| 211 | """Return all conflict fingerprint hashes currently in the rr-cache. |
| 212 | |
| 213 | Only hashes that have an associated ``conflict`` file are returned. |
| 214 | Incomplete entries (e.g. conflict recorded but not yet resolved) are |
| 215 | included — they are distinct from resolved entries which also have a |
| 216 | ``postimage`` file. |
| 217 | |
| 218 | Args: |
| 219 | repo_root: Repository root. |
| 220 | |
| 221 | Returns: |
| 222 | Sorted list of SHA-256 hex-digest strings. |
| 223 | """ |
| 224 | cache = _rr_cache_root(repo_root) |
| 225 | hashes: list[str] = [] |
| 226 | for entry in sorted(cache.iterdir()): |
| 227 | if entry.is_dir() and (entry / "conflict").exists(): |
| 228 | hashes.append(entry.name) |
| 229 | return hashes |
| 230 | |
| 231 | |
| 232 | def forget_rerere(repo_root: Path, conflict_hash: str) -> bool: |
| 233 | """Remove a single cached conflict/resolution from the rr-cache. |
| 234 | |
| 235 | Args: |
| 236 | repo_root: Repository root. |
| 237 | conflict_hash: Hash to remove. |
| 238 | |
| 239 | Returns: |
| 240 | ``True`` if the entry existed and was removed, ``False`` if it |
| 241 | was not found (idempotent — callers need not handle this as an error). |
| 242 | """ |
| 243 | slot = _hash_dir(repo_root, conflict_hash) |
| 244 | if not slot.is_dir(): |
| 245 | logger.warning("⚠️ muse rerere forget: hash %r not found", conflict_hash[:12]) |
| 246 | return False |
| 247 | |
| 248 | for child in slot.iterdir(): |
| 249 | child.unlink() |
| 250 | slot.rmdir() |
| 251 | logger.info("✅ muse rerere: forgot %s", conflict_hash[:12]) |
| 252 | return True |
| 253 | |
| 254 | |
| 255 | def clear_rerere(repo_root: Path) -> int: |
| 256 | """Remove ALL entries from the rr-cache. |
| 257 | |
| 258 | Args: |
| 259 | repo_root: Repository root. |
| 260 | |
| 261 | Returns: |
| 262 | Number of entries removed. |
| 263 | """ |
| 264 | cache = _rr_cache_root(repo_root) |
| 265 | removed = 0 |
| 266 | for entry in list(cache.iterdir()): |
| 267 | if entry.is_dir(): |
| 268 | for child in entry.iterdir(): |
| 269 | child.unlink() |
| 270 | entry.rmdir() |
| 271 | removed += 1 |
| 272 | logger.info("✅ muse rerere: cleared %d cache entr%s", removed, "y" if removed == 1 else "ies") |
| 273 | return removed |