maestro/services/muse_bisect.py · cgcardona/muse

muse_bisect.py python

361 lines 12.2 KB

12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago

1	"""Muse bisect service — binary search over the commit graph for regression hunting.
2
3	This service implements the state machine and commit graph traversal logic for
4	``muse bisect``. It is the music-domain analogue of ``git bisect``: given a
5	known-good commit and a known-bad commit, it finds the first commit that
6	introduced a regression by binary searching the ancestry path.
7
8	Typical music QA workflow
9	-------------------------
10	1. ``muse bisect start``
11	2. ``muse bisect good <last_known_good_sha>``
12	3. ``muse bisect bad <current_broken_sha>``
13	4. Muse checks out the midpoint commit into muse-work/ for inspection.
14	5. Producer listens, runs tests, then ``muse bisect good`` or ``muse bisect bad``.
15	6. Repeat until the culprit commit is identified.
16	7. ``muse bisect reset`` restores the pre-bisect state.
17
18	``muse bisect run <cmd>`` automates steps 4-6: it runs the command after each
19	checkout and uses the exit code (0 = good, 1 = bad) to advance automatically.
20
21	State file schema (BISECT_STATE.json)
22	--------------------------------------
23	.. code-block:: json
24
25	{
26	"good": "abc123...",
27	"bad": "def456...",
28	"current": "789abc...",
29	"tested": {"commit_id": "good"},
30	"pre_bisect_ref": "refs/heads/main",
31	"pre_bisect_commit": "abc000..."
32	}
33
34	``tested`` is a map from commit_id to verdict (``"good"`` or ``"bad"``).
35	``pre_bisect_ref`` and ``pre_bisect_commit`` record where HEAD was before the
36	session started so ``muse bisect reset`` can cleanly restore the workspace.
37	"""
38	from __future__ import annotations
39
40	import json
41	import logging
42	import math
43	import pathlib
44	from collections import deque
45	from dataclasses import dataclass, field
46	from typing import TYPE_CHECKING
47
48	if TYPE_CHECKING:
49	from sqlalchemy.ext.asyncio import AsyncSession
50
51	from maestro.muse_cli.models import MuseCliCommit
52
53	logger = logging.getLogger(__name__)
54
55	_BISECT_STATE_FILENAME = "BISECT_STATE.json"
56
57
58	# ---------------------------------------------------------------------------
59	# BisectState dataclass
60	# ---------------------------------------------------------------------------
61
62
63	@dataclass
64	class BisectState:
65	"""Mutable snapshot of an in-progress bisect session.
66
67	Attributes:
68	good: Commit ID of the last known-good revision.
69	bad: Commit ID of the first known-bad revision.
70	current: Commit ID currently checked out for testing.
71	tested: Map from commit_id to verdict (``"good"`` or ``"bad"``).
72	pre_bisect_ref: Symbolic ref HEAD pointed at before bisect started
73	(e.g. ``refs/heads/main``).
74	pre_bisect_commit: Commit ID HEAD resolved to before bisect started.
75	"""
76
77	good: str \| None = None
78	bad: str \| None = None
79	current: str \| None = None
80	tested: dict[str, str] = field(default_factory=dict)
81	pre_bisect_ref: str = ""
82	pre_bisect_commit: str = ""
83
84
85	# ---------------------------------------------------------------------------
86	# BisectStepResult — result type for a single bisect step
87	# ---------------------------------------------------------------------------
88
89
90	@dataclass(frozen=True)
91	class BisectStepResult:
92	"""Outcome of advancing the bisect session by one step.
93
94	Returned by :func:`advance_bisect` after marking a commit as good or bad.
95
96	Attributes:
97	culprit: Commit ID of the first bad commit if identified, else ``None``.
98	next_commit: Commit ID to check out and test next, or ``None`` when done.
99	remaining: Estimated number of commits still to test (0 when done).
100	message: Human-readable summary of this step for CLI display.
101	"""
102
103	culprit: str \| None
104	next_commit: str \| None
105	remaining: int
106	message: str
107
108
109	# ---------------------------------------------------------------------------
110	# Filesystem helpers
111	# ---------------------------------------------------------------------------
112
113
114	def read_bisect_state(root: pathlib.Path) -> BisectState \| None:
115	"""Return :class:`BisectState` if a bisect is in progress, else ``None``.
116
117	Reads ``.muse/BISECT_STATE.json``. Returns ``None`` when no file exists
118	(no active session) or when the file cannot be parsed (treated as absent).
119
120	Args:
121	root: Repository root (directory containing ``.muse/``).
122	"""
123	state_path = root / ".muse" / _BISECT_STATE_FILENAME
124	if not state_path.exists():
125	return None
126
127	try:
128	raw: dict[str, object] = json.loads(state_path.read_text())
129	except (json.JSONDecodeError, OSError) as exc:
130	logger.warning("⚠️ Failed to read %s: %s", _BISECT_STATE_FILENAME, exc)
131	return None
132
133	def _str_or_none(key: str) -> str \| None:
134	val = raw.get(key)
135	return str(val) if val is not None else None
136
137	raw_tested = raw.get("tested", {})
138	tested: dict[str, str] = (
139	{str(k): str(v) for k, v in raw_tested.items()}
140	if isinstance(raw_tested, dict)
141	else {}
142	)
143
144	return BisectState(
145	good=_str_or_none("good"),
146	bad=_str_or_none("bad"),
147	current=_str_or_none("current"),
148	tested=tested,
149	pre_bisect_ref=str(raw.get("pre_bisect_ref", "")),
150	pre_bisect_commit=str(raw.get("pre_bisect_commit", "")),
151	)
152
153
154	def write_bisect_state(root: pathlib.Path, state: BisectState) -> None:
155	"""Persist state to ``.muse/BISECT_STATE.json``.
156
157	Args:
158	root: Repository root (directory containing ``.muse/``).
159	state: Current bisect session state to persist.
160	"""
161	state_path = root / ".muse" / _BISECT_STATE_FILENAME
162	data: dict[str, object] = {
163	"good": state.good,
164	"bad": state.bad,
165	"current": state.current,
166	"tested": state.tested,
167	"pre_bisect_ref": state.pre_bisect_ref,
168	"pre_bisect_commit": state.pre_bisect_commit,
169	}
170	state_path.write_text(json.dumps(data, indent=2))
171	logger.debug("✅ Wrote BISECT_STATE.json (good=%s bad=%s)", state.good, state.bad)
172
173
174	def clear_bisect_state(root: pathlib.Path) -> None:
175	"""Remove ``.muse/BISECT_STATE.json`` after reset or culprit identified."""
176	state_path = root / ".muse" / _BISECT_STATE_FILENAME
177	if state_path.exists():
178	state_path.unlink()
179	logger.debug("✅ Cleared BISECT_STATE.json")
180
181
182	# ---------------------------------------------------------------------------
183	# Commit graph helpers (require a DB session)
184	# ---------------------------------------------------------------------------
185
186
187	async def get_commits_between(
188	session: AsyncSession,
189	good_commit_id: str,
190	bad_commit_id: str,
191	) -> list[MuseCliCommit]:
192	"""Return commits reachable from bad but not reachable from good.
193
194	Performs two BFS traversals of the Muse commit DAG:
195	1. All ancestors of good_commit_id (inclusive) → ``good_ancestors``.
196	2. All ancestors of bad_commit_id (inclusive) → filtered by exclusion.
197
198	Returns only commits that are ancestors of bad but not of good, sorted
199	by ``committed_at`` ascending (oldest first). These are the commits the
200	bisect session needs to search through.
201
202	An empty list means good and bad are the same commit or there is no
203	commit between them — the culprit is bad itself.
204
205	Args:
206	session: Open async DB session.
207	good_commit_id: Commit ID of the known-good revision.
208	bad_commit_id: Commit ID of the known-bad revision.
209
210	Returns:
211	Ordered list of :class:`MuseCliCommit` rows to bisect, oldest first.
212	"""
213	from maestro.muse_cli.models import MuseCliCommit
214
215	async def _ancestors(start: str) -> set[str]:
216	"""BFS collecting all ancestor commit IDs (inclusive of start)."""
217	visited: set[str] = set()
218	queue: deque[str] = deque([start])
219	while queue:
220	cid = queue.popleft()
221	if cid in visited:
222	continue
223	visited.add(cid)
224	row: MuseCliCommit \| None = await session.get(MuseCliCommit, cid)
225	if row is None:
226	continue
227	if row.parent_commit_id:
228	queue.append(row.parent_commit_id)
229	if row.parent2_commit_id:
230	queue.append(row.parent2_commit_id)
231	return visited
232
233	good_set = await _ancestors(good_commit_id)
234	bad_ancestors = await _ancestors(bad_commit_id)
235
236	# Commits between good and bad: reachable from bad, not from good,
237	# and excluding bad itself (bad is known-bad, not a candidate to test).
238	candidate_ids = bad_ancestors - good_set - {bad_commit_id}
239
240	if not candidate_ids:
241	return []
242
243	# Load and sort by committed_at ascending.
244	rows: list[MuseCliCommit] = []
245	for cid in candidate_ids:
246	row = await session.get(MuseCliCommit, cid)
247	if row is not None:
248	rows.append(row)
249
250	rows.sort(key=lambda r: r.committed_at)
251	return rows
252
253
254	def pick_midpoint(commits: list[MuseCliCommit]) -> MuseCliCommit \| None:
255	"""Return the midpoint commit for binary search.
256
257	Selects ``commits[(len(commits) - 1) // 2]`` — the lower-middle element
258	for even-length lists, middle for odd-length. Returns ``None`` on empty.
259
260	Args:
261	commits: Ordered list of candidate commits (oldest first).
262	"""
263	if not commits:
264	return None
265	mid_idx = (len(commits) - 1) // 2
266	return commits[mid_idx]
267
268
269	async def advance_bisect(
270	session: AsyncSession,
271	root: pathlib.Path,
272	commit_id: str,
273	verdict: str,
274	) -> BisectStepResult:
275	"""Record a verdict for commit_id and advance the bisect session.
276
277	Updates the ``good`` or ``bad`` bound based on the verdict, computes the
278	remaining candidate range, and selects the next midpoint to test.
279
280	When the candidate range collapses to zero the culprit is identified: it is
281	the ``bad`` bound (the earliest commit we marked bad, or the first bad commit
282	reachable from bad that is not reachable from good).
283
284	Args:
285	session: Open async DB session.
286	root: Repository root.
287	commit_id: Commit being marked.
288	verdict: Either ``"good"`` or ``"bad"``.
289
290	Returns:
291	:class:`BisectStepResult` describing the outcome.
292
293	Raises:
294	ValueError: If verdict is not ``"good"`` or ``"bad"``.
295	RuntimeError: If no bisect is in progress.
296	"""
297	if verdict not in ("good", "bad"):
298	raise ValueError(f"verdict must be 'good' or 'bad', got {verdict!r}")
299
300	state = read_bisect_state(root)
301	if state is None:
302	raise RuntimeError("No bisect session in progress. Run 'muse bisect start' first.")
303
304	# Record the verdict.
305	state.tested[commit_id] = verdict
306
307	if verdict == "good":
308	state.good = commit_id
309	else:
310	state.bad = commit_id
311
312	# If either bound is still unset we cannot advance yet.
313	if state.good is None or state.bad is None:
314	write_bisect_state(root, state)
315	missing = "bad" if state.bad is None else "good"
316	return BisectStepResult(
317	culprit=None,
318	next_commit=None,
319	remaining=0,
320	message=(
321	f"✅ Marked {commit_id[:8]} as {verdict}. "
322	f"Now mark a {missing} commit to begin bisecting."
323	),
324	)
325
326	# Recompute remaining candidates.
327	candidates = await get_commits_between(session, state.good, state.bad)
328
329	if not candidates:
330	# No commits left to test — bad is the culprit.
331	culprit = state.bad
332	state.current = None
333	write_bisect_state(root, state)
334	return BisectStepResult(
335	culprit=culprit,
336	next_commit=None,
337	remaining=0,
338	message=(
339	f"🎯 Bisect complete! First bad commit: {culprit[:8]}\n"
340	"Run 'muse bisect reset' to restore your workspace."
341	),
342	)
343
344	next_commit = pick_midpoint(candidates)
345	assert next_commit is not None # candidates is non-empty
346	state.current = next_commit.commit_id
347	write_bisect_state(root, state)
348
349	remaining = len(candidates)
350	steps = math.ceil(math.log2(remaining + 1)) if remaining > 0 else 0
351
352	return BisectStepResult(
353	culprit=None,
354	next_commit=next_commit.commit_id,
355	remaining=remaining,
356	message=(
357	f"✅ Marked {commit_id[:8]} as {verdict}. "
358	f"Checking out {next_commit.commit_id[:8]} "
359	f"(~{steps} step(s) remaining, {remaining} commit(s) in range)"
360	),
361	)

Content Address

Object ID (SHA-256)

a827a48ce1b45f24ff27460ec342681e4e0bb9118a8262197a13ead124ac02e8

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path maestro/services/muse_bisect.py

Lines 361

Size 12.2 KB

Language python

Ref 12901c5a

Snapshot 219767745448…

Last Modified

12901c5a

Initial extraction from tellurstori/maestro

cgcardona <gabriel@tellurstori.com> 4d ago

View commit →

Links

Browse tree at 12901c5a All commits View raw