cgcardona / muse public
muse_rerere.py python
273 lines 8.8 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """Muse Rerere — Reuse Recorded Resolutions for musical merge conflicts.
2
3 In parallel multi-branch Muse workflows identical merge conflicts appear
4 repeatedly (e.g. the same MIDI region modified in the same way on two
5 independent branches). rerere records conflict shapes and their resolutions
6 so they can be applied automatically on subsequent merges.
7
8 Cache layout::
9
10 .muse/rr-cache/<hash>/
11 conflict — serialised conflict fingerprint (JSON)
12 postimage — serialised resolution (JSON, written only after resolve)
13
14 The conflict fingerprint is a normalised, transposition-independent hash
15 of the conflict shape. Two conflicts with the same structural shape but
16 different absolute pitches are treated as the same conflict so that a
17 resolution recorded in one key can be applied in another.
18
19 Boundary rules:
20 - Must NOT import StateStore, executor, MCP tools, routes, or handlers.
21 - May import muse_merge types.
22 - All file I/O uses pathlib.Path — never open() with bare strings.
23 """
24 from __future__ import annotations
25
26 import hashlib
27 import json
28 import logging
29 import re
30 from pathlib import Path
31
32 from typing_extensions import TypedDict
33
34 from maestro.contracts.json_types import JSONObject
35
36 logger = logging.getLogger(__name__)
37
38
39 class ConflictDict(TypedDict):
40 """Minimal structural descriptor of a single merge conflict for rerere fingerprinting."""
41
42 region_id: str
43 type: str
44 description: str
45
46 # ---------------------------------------------------------------------------
47 # Constants
48 # ---------------------------------------------------------------------------
49
50 _RR_CACHE_DIR = ".muse/rr-cache"
51
52
53 # ---------------------------------------------------------------------------
54 # Internal helpers
55 # ---------------------------------------------------------------------------
56
57
58 def _rr_cache_root(repo_root: Path) -> Path:
59 """Return (and create if needed) the rr-cache directory."""
60 cache = repo_root / _RR_CACHE_DIR
61 cache.mkdir(parents=True, exist_ok=True)
62 return cache
63
64
65 _PITCH_RE = re.compile(r"pitch=(\d+)")
66
67
68 def _conflict_fingerprint(conflicts: list[ConflictDict]) -> str:
69 """Compute a normalised, transposition-independent fingerprint for *conflicts*.
70
71 Normalisation steps:
72 1. Sort conflicts by (region_id, type, description) so that order does
73 not affect the fingerprint.
74 2. Strip absolute pitch values from descriptions and replace them with
75 relative pitch offsets from the lowest pitch in the conflict set.
76 This makes the fingerprint invariant to transposition.
77 3. SHA-256 the resulting JSON.
78 """
79 all_pitches: list[int] = []
80 for c in conflicts:
81 for m in _PITCH_RE.finditer(c.get("description", "")):
82 all_pitches.append(int(m.group(1)))
83
84 min_pitch = min(all_pitches) if all_pitches else 0
85
86 def _normalise(c: ConflictDict) -> ConflictDict:
87 desc = c.get("description", "")
88 normalised_desc = _PITCH_RE.sub(
89 lambda m: f"pitch={int(m.group(1)) - min_pitch}",
90 desc,
91 )
92 return ConflictDict(
93 region_id=c.get("region_id", ""),
94 type=c.get("type", ""),
95 description=normalised_desc,
96 )
97
98 normalised = sorted(
99 [_normalise(c) for c in conflicts],
100 key=lambda x: (x["region_id"], x["type"], x["description"]),
101 )
102 blob = json.dumps(normalised, sort_keys=True, separators=(",", ":"))
103 return hashlib.sha256(blob.encode()).hexdigest()
104
105
106 def _hash_dir(repo_root: Path, conflict_hash: str) -> Path:
107 """Return the cache sub-directory for *conflict_hash*."""
108 return _rr_cache_root(repo_root) / conflict_hash
109
110
111 # ---------------------------------------------------------------------------
112 # Public API
113 # ---------------------------------------------------------------------------
114
115
116 def record_conflict(repo_root: Path, conflicts: list[ConflictDict]) -> str:
117 """Record a conflict shape in the rr-cache.
118
119 If the same conflict shape is already cached this is a no-op (idempotent).
120
121 Args:
122 repo_root: Repository root (directory containing ``.muse/``).
123 conflicts: List of conflict dicts (keys: region_id, type, description).
124 These are typically derived from :class:`MergeConflict` instances.
125
126 Returns:
127 The SHA-256 fingerprint hash identifying this conflict shape.
128 """
129 h = _conflict_fingerprint(conflicts)
130 slot = _hash_dir(repo_root, h)
131 slot.mkdir(parents=True, exist_ok=True)
132
133 conflict_file = slot / "conflict"
134 if not conflict_file.exists():
135 conflict_file.write_text(
136 json.dumps(conflicts, indent=2),
137 encoding="utf-8",
138 )
139 logger.info("✅ muse rerere: recorded conflict %s", h[:12])
140 else:
141 logger.debug("muse rerere: conflict %s already cached", h[:12])
142
143 return h
144
145
146 def record_resolution(
147 repo_root: Path,
148 conflict_hash: str,
149 resolution: JSONObject,
150 ) -> None:
151 """Persist a resolution for an existing conflict fingerprint.
152
153 Args:
154 repo_root: Repository root.
155 conflict_hash: Hash returned by :func:`record_conflict`.
156 resolution: Arbitrary resolution data (e.g. merged snapshot or
157 per-file resolution strategies). Must be JSON-serialisable.
158
159 Raises:
160 FileNotFoundError: If *conflict_hash* is not in the cache (i.e.
161 :func:`record_conflict` was never called for it).
162 """
163 slot = _hash_dir(repo_root, conflict_hash)
164 if not slot.is_dir():
165 raise FileNotFoundError(
166 f"rerere: conflict hash {conflict_hash!r} not found in rr-cache"
167 )
168 postimage = slot / "postimage"
169 postimage.write_text(
170 json.dumps(resolution, indent=2),
171 encoding="utf-8",
172 )
173 logger.info("✅ muse rerere: recorded resolution for %s", conflict_hash[:12])
174
175
176 def apply_rerere(
177 repo_root: Path,
178 conflicts: list[ConflictDict],
179 ) -> tuple[int, JSONObject | None]:
180 """Attempt to auto-apply a cached resolution for *conflicts*.
181
182 Args:
183 repo_root: Repository root.
184 conflicts: Current merge conflicts (same format as :func:`record_conflict`).
185
186 Returns:
187 A tuple ``(applied, resolution)`` where *applied* is the number of
188 conflicts resolved (0 or len(conflicts)) and *resolution* is the
189 cached resolution dict (or ``None`` when no cache hit exists).
190 """
191 if not conflicts:
192 return 0, None
193
194 h = _conflict_fingerprint(conflicts)
195 postimage = _hash_dir(repo_root, h) / "postimage"
196 if not postimage.exists():
197 logger.debug("muse rerere: no cached resolution for %s", h[:12])
198 return 0, None
199
200 resolution: JSONObject = json.loads(postimage.read_text(encoding="utf-8"))
201 applied = len(conflicts)
202 logger.info(
203 "✅ muse rerere: resolved %d conflict(s) using rerere (hash %s)",
204 applied,
205 h[:12],
206 )
207 return applied, resolution
208
209
210 def list_rerere(repo_root: Path) -> list[str]:
211 """Return all conflict fingerprint hashes currently in the rr-cache.
212
213 Only hashes that have an associated ``conflict`` file are returned.
214 Incomplete entries (e.g. conflict recorded but not yet resolved) are
215 included — they are distinct from resolved entries which also have a
216 ``postimage`` file.
217
218 Args:
219 repo_root: Repository root.
220
221 Returns:
222 Sorted list of SHA-256 hex-digest strings.
223 """
224 cache = _rr_cache_root(repo_root)
225 hashes: list[str] = []
226 for entry in sorted(cache.iterdir()):
227 if entry.is_dir() and (entry / "conflict").exists():
228 hashes.append(entry.name)
229 return hashes
230
231
232 def forget_rerere(repo_root: Path, conflict_hash: str) -> bool:
233 """Remove a single cached conflict/resolution from the rr-cache.
234
235 Args:
236 repo_root: Repository root.
237 conflict_hash: Hash to remove.
238
239 Returns:
240 ``True`` if the entry existed and was removed, ``False`` if it
241 was not found (idempotent — callers need not handle this as an error).
242 """
243 slot = _hash_dir(repo_root, conflict_hash)
244 if not slot.is_dir():
245 logger.warning("⚠️ muse rerere forget: hash %r not found", conflict_hash[:12])
246 return False
247
248 for child in slot.iterdir():
249 child.unlink()
250 slot.rmdir()
251 logger.info("✅ muse rerere: forgot %s", conflict_hash[:12])
252 return True
253
254
255 def clear_rerere(repo_root: Path) -> int:
256 """Remove ALL entries from the rr-cache.
257
258 Args:
259 repo_root: Repository root.
260
261 Returns:
262 Number of entries removed.
263 """
264 cache = _rr_cache_root(repo_root)
265 removed = 0
266 for entry in list(cache.iterdir()):
267 if entry.is_dir():
268 for child in entry.iterdir():
269 child.unlink()
270 entry.rmdir()
271 removed += 1
272 logger.info("✅ muse rerere: cleared %d cache entr%s", removed, "y" if removed == 1 else "ies")
273 return removed