cgcardona / muse public
test_muse_object_store.py python
511 lines 18.8 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """Tests for the canonical Muse object store — ``maestro.muse_cli.object_store``.
2
3 This file is the authoritative test suite for the shared blob store. Every
4 Muse command that reads or writes objects (``muse commit``, ``muse read-tree``,
5 ``muse reset --hard``) must route through this module. Tests here verify:
6
7 Unit tests (pure filesystem, no DB):
8 - test_object_path_uses_sharded_layout — path is <sha2>/<sha62>
9 - test_object_path_shard_dir_is_first_two — shard dir name is first 2 chars
10 - test_write_object_creates_shard_dir — shard dir created on first write
11 - test_write_object_stores_content — bytes are persisted correctly
12 - test_write_object_idempotent_returns_false — second write returns False, file unchanged
13 - test_write_object_from_path_stores_content — path-based write stores bytes correctly
14 - test_write_object_from_path_idempotent — path-based write is idempotent
15 - test_read_object_returns_bytes — returns stored content
16 - test_read_object_returns_none_when_missing — returns None for absent object
17 - test_has_object_true_after_write — True after write_object
18 - test_has_object_false_before_write — False when absent
19 - test_restore_object_copies_to_dest — file appears at dest
20 - test_restore_object_creates_parent_dirs — dest parent dirs are created
21 - test_restore_object_returns_false_missing — returns False when object absent
22
23 Regression tests (cross-command round-trips):
24 - test_same_layout_commit_then_read_tree — objects written by commit are found by read-tree
25 - test_same_layout_commit_then_reset_hard — objects written by commit are found by reset --hard
26 """
27 from __future__ import annotations
28
29 import datetime
30 import json
31 import pathlib
32 import uuid
33 from collections.abc import AsyncGenerator
34
35 import pytest
36 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
37
38 from maestro.db.database import Base
39 from maestro.muse_cli import models as cli_models # noqa: F401 — register tables
40 from maestro.muse_cli.models import MuseCliCommit, MuseCliObject, MuseCliSnapshot
41 from maestro.muse_cli.object_store import (
42 has_object,
43 object_path,
44 read_object,
45 restore_object,
46 write_object,
47 write_object_from_path,
48 )
49
50
51 # ---------------------------------------------------------------------------
52 # Fixtures
53 # ---------------------------------------------------------------------------
54
55
56 @pytest.fixture
57 async def async_session() -> AsyncGenerator[AsyncSession, None]:
58 """In-memory SQLite session with all Muse CLI tables created."""
59 engine = create_async_engine("sqlite+aiosqlite:///:memory:")
60 async with engine.begin() as conn:
61 await conn.run_sync(Base.metadata.create_all)
62 Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
63 async with Session() as session:
64 yield session
65 await engine.dispose()
66
67
68 @pytest.fixture
69 def repo_id() -> str:
70 return str(uuid.uuid4())
71
72
73 @pytest.fixture
74 def repo_root(tmp_path: pathlib.Path, repo_id: str) -> pathlib.Path:
75 """Minimal Muse repository structure with repo.json and HEAD."""
76 muse_dir = tmp_path / ".muse"
77 muse_dir.mkdir()
78 (muse_dir / "HEAD").write_text("refs/heads/main")
79 (muse_dir / "refs" / "heads").mkdir(parents=True)
80 (muse_dir / "refs" / "heads" / "main").write_text("")
81 (muse_dir / "repo.json").write_text(json.dumps({"repo_id": repo_id}))
82 return tmp_path
83
84
85 def _sha(seed: str, length: int = 64) -> str:
86 """Build a deterministic fake SHA of exactly *length* hex chars."""
87 return (seed * (length // len(seed) + 1))[:length]
88
89
90 # ---------------------------------------------------------------------------
91 # Unit tests — object_path layout
92 # ---------------------------------------------------------------------------
93
94
95 class TestObjectPath:
96
97 def test_object_path_uses_sharded_layout(self, tmp_path: pathlib.Path) -> None:
98 """object_path returns .muse/objects/<sha2>/<sha62> — the sharded layout."""
99 root = tmp_path
100 object_id = "ab" + "cd" * 31 # 64 hex chars
101 result = object_path(root, object_id)
102 expected = root / ".muse" / "objects" / "ab" / ("cd" * 31)
103 assert result == expected
104
105 def test_object_path_shard_dir_is_first_two_chars(
106 self, tmp_path: pathlib.Path
107 ) -> None:
108 """The shard directory name is exactly the first two hex characters."""
109 object_id = "ff" + "00" * 31
110 result = object_path(tmp_path, object_id)
111 assert result.parent.name == "ff"
112
113 def test_object_path_filename_is_remaining_62_chars(
114 self, tmp_path: pathlib.Path
115 ) -> None:
116 """The filename under the shard dir is the remaining 62 characters."""
117 object_id = "1a" + "bc" * 31
118 result = object_path(tmp_path, object_id)
119 assert result.name == "bc" * 31
120 assert len(result.name) == 62
121
122
123 # ---------------------------------------------------------------------------
124 # Unit tests — write_object (bytes)
125 # ---------------------------------------------------------------------------
126
127
128 class TestWriteObject:
129
130 def test_write_object_creates_shard_dir(self, tmp_path: pathlib.Path) -> None:
131 """write_object creates the shard subdirectory on first write."""
132 (tmp_path / ".muse").mkdir()
133 object_id = "ab" + "11" * 31
134 write_object(tmp_path, object_id, b"MIDI data")
135 shard_dir = tmp_path / ".muse" / "objects" / "ab"
136 assert shard_dir.is_dir()
137
138 def test_write_object_stores_content(self, tmp_path: pathlib.Path) -> None:
139 """write_object persists the exact bytes at the sharded path."""
140 (tmp_path / ".muse").mkdir()
141 object_id = "cc" + "dd" * 31
142 content = b"track: bass, tempo: 120bpm"
143 write_object(tmp_path, object_id, content)
144 dest = object_path(tmp_path, object_id)
145 assert dest.read_bytes() == content
146
147 def test_write_object_returns_true_on_new_write(
148 self, tmp_path: pathlib.Path
149 ) -> None:
150 """write_object returns True when the object is newly stored."""
151 (tmp_path / ".muse").mkdir()
152 object_id = "ee" + "ff" * 31
153 result = write_object(tmp_path, object_id, b"new blob")
154 assert result is True
155
156 def test_write_object_idempotent_returns_false(
157 self, tmp_path: pathlib.Path
158 ) -> None:
159 """Second write for the same object_id returns False without changing the file."""
160 (tmp_path / ".muse").mkdir()
161 object_id = "11" + "22" * 31
162 write_object(tmp_path, object_id, b"original content")
163 dest = object_path(tmp_path, object_id)
164 mtime_first = dest.stat().st_mtime
165
166 result = write_object(tmp_path, object_id, b"different content")
167
168 assert result is False
169 assert dest.stat().st_mtime == mtime_first # file not touched
170 assert dest.read_bytes() == b"original content" # original content preserved
171
172
173 # ---------------------------------------------------------------------------
174 # Unit tests — write_object_from_path (path-based write)
175 # ---------------------------------------------------------------------------
176
177
178 class TestWriteObjectFromPath:
179
180 def test_write_object_from_path_stores_content(
181 self, tmp_path: pathlib.Path
182 ) -> None:
183 """write_object_from_path copies the source file into the sharded store."""
184 (tmp_path / ".muse").mkdir()
185 object_id = "aa" + "bb" * 31
186 src = tmp_path / "drums.mid"
187 src.write_bytes(b"MIDI drums data")
188
189 write_object_from_path(tmp_path, object_id, src)
190
191 dest = object_path(tmp_path, object_id)
192 assert dest.read_bytes() == b"MIDI drums data"
193
194 def test_write_object_from_path_returns_true_on_new_write(
195 self, tmp_path: pathlib.Path
196 ) -> None:
197 """write_object_from_path returns True when the object is newly stored."""
198 (tmp_path / ".muse").mkdir()
199 object_id = "33" + "44" * 31
200 src = tmp_path / "keys.mid"
201 src.write_bytes(b"piano riff")
202
203 result = write_object_from_path(tmp_path, object_id, src)
204 assert result is True
205
206 def test_write_object_from_path_idempotent(self, tmp_path: pathlib.Path) -> None:
207 """Second call with the same object_id returns False, file unchanged."""
208 (tmp_path / ".muse").mkdir()
209 object_id = "55" + "66" * 31
210 src = tmp_path / "lead.mid"
211 src.write_bytes(b"lead melody")
212
213 write_object_from_path(tmp_path, object_id, src)
214 dest = object_path(tmp_path, object_id)
215 mtime_first = dest.stat().st_mtime
216
217 result = write_object_from_path(tmp_path, object_id, src)
218 assert result is False
219 assert dest.stat().st_mtime == mtime_first
220
221
222 # ---------------------------------------------------------------------------
223 # Unit tests — read_object
224 # ---------------------------------------------------------------------------
225
226
227 class TestReadObject:
228
229 def test_read_object_returns_bytes(self, tmp_path: pathlib.Path) -> None:
230 """read_object returns the exact bytes that were written."""
231 (tmp_path / ".muse").mkdir()
232 object_id = "77" + "88" * 31
233 content = b"chorus riff, key of C"
234 write_object(tmp_path, object_id, content)
235
236 result = read_object(tmp_path, object_id)
237 assert result == content
238
239 def test_read_object_returns_none_when_missing(
240 self, tmp_path: pathlib.Path
241 ) -> None:
242 """read_object returns None for an object not in the store."""
243 (tmp_path / ".muse").mkdir()
244 object_id = "99" + "aa" * 31
245 result = read_object(tmp_path, object_id)
246 assert result is None
247
248
249 # ---------------------------------------------------------------------------
250 # Unit tests — has_object
251 # ---------------------------------------------------------------------------
252
253
254 class TestHasObject:
255
256 def test_has_object_false_before_write(self, tmp_path: pathlib.Path) -> None:
257 """has_object returns False before any write."""
258 (tmp_path / ".muse").mkdir()
259 object_id = "bb" + "cc" * 31
260 assert has_object(tmp_path, object_id) is False
261
262 def test_has_object_true_after_write(self, tmp_path: pathlib.Path) -> None:
263 """has_object returns True after write_object."""
264 (tmp_path / ".muse").mkdir()
265 object_id = "dd" + "ee" * 31
266 write_object(tmp_path, object_id, b"pad chord")
267 assert has_object(tmp_path, object_id) is True
268
269
270 # ---------------------------------------------------------------------------
271 # Unit tests — restore_object
272 # ---------------------------------------------------------------------------
273
274
275 class TestRestoreObject:
276
277 def test_restore_object_copies_to_dest(self, tmp_path: pathlib.Path) -> None:
278 """restore_object writes the stored blob to the given destination path."""
279 (tmp_path / ".muse").mkdir()
280 object_id = "12" + "34" * 31
281 content = b"bridge melody, Bm"
282 write_object(tmp_path, object_id, content)
283
284 dest = tmp_path / "muse-work" / "bridge.mid"
285 dest.parent.mkdir(parents=True, exist_ok=True)
286 result = restore_object(tmp_path, object_id, dest)
287
288 assert result is True
289 assert dest.read_bytes() == content
290
291 def test_restore_object_creates_parent_dirs(self, tmp_path: pathlib.Path) -> None:
292 """restore_object creates missing parent directories for the dest path."""
293 (tmp_path / ".muse").mkdir()
294 object_id = "56" + "78" * 31
295 write_object(tmp_path, object_id, b"nested track")
296
297 dest = tmp_path / "muse-work" / "tracks" / "strings" / "viola.mid"
298 # Parent dirs do NOT exist yet — restore_object must create them.
299 assert not dest.parent.exists()
300
301 result = restore_object(tmp_path, object_id, dest)
302 assert result is True
303 assert dest.read_bytes() == b"nested track"
304
305 def test_restore_object_returns_false_when_missing(
306 self, tmp_path: pathlib.Path
307 ) -> None:
308 """restore_object returns False cleanly when the object is absent."""
309 (tmp_path / ".muse").mkdir()
310 object_id = "90" + "ab" * 31
311 dest = tmp_path / "muse-work" / "ghost.mid"
312 dest.parent.mkdir(parents=True, exist_ok=True)
313
314 result = restore_object(tmp_path, object_id, dest)
315 assert result is False
316 assert not dest.exists()
317
318
319 # ---------------------------------------------------------------------------
320 # Cross-command round-trip tests
321 #
322 # These are the regression tests the issue specifically calls for. They wire
323 # together the real _commit_async / _read_tree_async / perform_reset cores
324 # against the shared object store to prove that objects written by one command
325 # are found by every other command.
326 # ---------------------------------------------------------------------------
327
328
329 async def _add_commit_row(
330 session: AsyncSession,
331 *,
332 repo_id: str,
333 manifest: dict[str, str],
334 branch: str = "main",
335 message: str = "test commit",
336 parent_commit_id: str | None = None,
337 committed_at: datetime.datetime | None = None,
338 ) -> MuseCliCommit:
339 """Insert a MuseCliCommit + MuseCliSnapshot row and return the commit."""
340 snapshot_id = _sha(str(uuid.uuid4()).replace("-", ""))
341 commit_id = _sha(str(uuid.uuid4()).replace("-", ""))
342
343 for object_id in manifest.values():
344 existing = await session.get(MuseCliObject, object_id)
345 if existing is None:
346 session.add(MuseCliObject(object_id=object_id, size_bytes=10))
347
348 session.add(MuseCliSnapshot(snapshot_id=snapshot_id, manifest=manifest))
349 await session.flush()
350
351 ts = committed_at or datetime.datetime.now(datetime.timezone.utc)
352 commit = MuseCliCommit(
353 commit_id=commit_id,
354 repo_id=repo_id,
355 branch=branch,
356 parent_commit_id=parent_commit_id,
357 snapshot_id=snapshot_id,
358 message=message,
359 author="",
360 committed_at=ts,
361 )
362 session.add(commit)
363 await session.flush()
364 return commit
365
366
367 class TestCrossCommandRoundTrips:
368 """Regression: objects from ``muse commit`` must be findable by all other commands."""
369
370 @pytest.mark.anyio
371 async def test_same_layout_commit_then_read_tree(
372 self,
373 async_session: AsyncSession,
374 repo_root: pathlib.Path,
375 repo_id: str,
376 ) -> None:
377 """Objects written via write_object_from_path (commit) are readable by read_tree.
378
379 This is the primary regression: flat-layout objects
380 written by muse commit could not be found by muse read-tree which
381 used the same module. Both now use the sharded layout.
382 """
383 from maestro.muse_cli.commands.read_tree import _read_tree_async
384
385 # Seed muse-work/ with a file.
386 workdir = repo_root / "muse-work"
387 workdir.mkdir()
388 track_file = workdir / "track.mid"
389 track_content = b"verse hook, 4/4, 120bpm"
390 track_file.write_bytes(track_content)
391
392 # Compute hash and store via the commit path.
393 from maestro.muse_cli.snapshot import hash_file
394
395 object_id = hash_file(track_file)
396 write_object_from_path(repo_root, object_id, track_file)
397
398 # Insert the snapshot + commit row.
399 manifest = {"track.mid": object_id}
400 commit = await _add_commit_row(
401 async_session,
402 repo_id=repo_id,
403 manifest=manifest,
404 )
405
406 # Simulate a clean working tree before read-tree.
407 track_file.unlink()
408 assert not track_file.exists()
409
410 # read-tree should restore the file from the object store.
411 result = await _read_tree_async(
412 snapshot_id=commit.snapshot_id,
413 root=repo_root,
414 session=async_session,
415 )
416
417 assert "track.mid" in result.files_written
418 assert track_file.exists()
419 assert track_file.read_bytes() == track_content
420
421 @pytest.mark.anyio
422 async def test_same_layout_commit_then_reset_hard(
423 self,
424 async_session: AsyncSession,
425 repo_root: pathlib.Path,
426 repo_id: str,
427 ) -> None:
428 """Objects written via write_object_from_path (commit) are readable by reset --hard.
429
430 This is the primary regression: muse reset --hard used a
431 sharded layout but muse commit used a flat layout, so reset could never
432 find the objects commit had stored. Both now use the same sharded layout.
433 """
434 from maestro.services.muse_reset import ResetMode, perform_reset
435
436 # v1 content — the snapshot we'll reset back to.
437 object_id_v1 = "11" * 32
438 content_v1 = b"intro riff, Em"
439 write_object(repo_root, object_id_v1, content_v1)
440
441 t0 = datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc)
442 c1 = await _add_commit_row(
443 async_session,
444 repo_id=repo_id,
445 manifest={"lead.mid": object_id_v1},
446 committed_at=t0,
447 message="v1",
448 )
449
450 # v2 content — the current HEAD we'll reset away from.
451 object_id_v2 = "22" * 32
452 content_v2 = b"chorus, C major"
453 write_object(repo_root, object_id_v2, content_v2)
454
455 c2 = await _add_commit_row(
456 async_session,
457 repo_id=repo_id,
458 manifest={"lead.mid": object_id_v2},
459 parent_commit_id=c1.commit_id,
460 message="v2",
461 )
462
463 # Set branch HEAD to c2 and populate muse-work/ with v2 content.
464 ref_path = repo_root / ".muse" / "refs" / "heads" / "main"
465 ref_path.write_text(c2.commit_id)
466
467 workdir = repo_root / "muse-work"
468 workdir.mkdir(parents=True, exist_ok=True)
469 (workdir / "lead.mid").write_bytes(content_v2)
470
471 # Hard reset to c1 — must find v1 object written above.
472 result = await perform_reset(
473 root=repo_root,
474 session=async_session,
475 ref=c1.commit_id,
476 mode=ResetMode.HARD,
477 )
478
479 assert result.files_restored == 1
480 assert result.target_commit_id == c1.commit_id
481 assert (workdir / "lead.mid").read_bytes() == content_v1
482
483 @pytest.mark.anyio
484 async def test_commit_write_then_read_tree_write_produce_same_path(
485 self,
486 repo_root: pathlib.Path,
487 ) -> None:
488 """write_object and write_object_from_path both produce the same sharded path.
489
490 Ensures neither write variant creates a layout inconsistency.
491 """
492 (repo_root / ".muse").mkdir(exist_ok=True)
493 object_id = "ab" + "cd" * 31
494 content = b"same object, two write paths"
495
496 # Write via bytes API (as _commit_async used to).
497 write_object(repo_root, object_id, content)
498 p_bytes = object_path(repo_root, object_id)
499
500 # Clear store.
501 p_bytes.unlink()
502 p_bytes.parent.rmdir()
503
504 # Write via path API (as _commit_async now does).
505 src = repo_root / "tmp_source.mid"
506 src.write_bytes(content)
507 write_object_from_path(repo_root, object_id, src)
508 p_path = object_path(repo_root, object_id)
509
510 assert p_bytes == p_path # identical paths
511 assert p_path.read_bytes() == content