test_muse_object_store.py
python
| 1 | """Tests for the canonical Muse object store — ``maestro.muse_cli.object_store``. |
| 2 | |
| 3 | This file is the authoritative test suite for the shared blob store. Every |
| 4 | Muse command that reads or writes objects (``muse commit``, ``muse read-tree``, |
| 5 | ``muse reset --hard``) must route through this module. Tests here verify: |
| 6 | |
| 7 | Unit tests (pure filesystem, no DB): |
| 8 | - test_object_path_uses_sharded_layout — path is <sha2>/<sha62> |
| 9 | - test_object_path_shard_dir_is_first_two — shard dir name is first 2 chars |
| 10 | - test_write_object_creates_shard_dir — shard dir created on first write |
| 11 | - test_write_object_stores_content — bytes are persisted correctly |
| 12 | - test_write_object_idempotent_returns_false — second write returns False, file unchanged |
| 13 | - test_write_object_from_path_stores_content — path-based write stores bytes correctly |
| 14 | - test_write_object_from_path_idempotent — path-based write is idempotent |
| 15 | - test_read_object_returns_bytes — returns stored content |
| 16 | - test_read_object_returns_none_when_missing — returns None for absent object |
| 17 | - test_has_object_true_after_write — True after write_object |
| 18 | - test_has_object_false_before_write — False when absent |
| 19 | - test_restore_object_copies_to_dest — file appears at dest |
| 20 | - test_restore_object_creates_parent_dirs — dest parent dirs are created |
| 21 | - test_restore_object_returns_false_missing — returns False when object absent |
| 22 | |
| 23 | Regression tests (cross-command round-trips): |
| 24 | - test_same_layout_commit_then_read_tree — objects written by commit are found by read-tree |
| 25 | - test_same_layout_commit_then_reset_hard — objects written by commit are found by reset --hard |
| 26 | """ |
| 27 | from __future__ import annotations |
| 28 | |
| 29 | import datetime |
| 30 | import json |
| 31 | import pathlib |
| 32 | import uuid |
| 33 | from collections.abc import AsyncGenerator |
| 34 | |
| 35 | import pytest |
| 36 | from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine |
| 37 | |
| 38 | from maestro.db.database import Base |
| 39 | from maestro.muse_cli import models as cli_models # noqa: F401 — register tables |
| 40 | from maestro.muse_cli.models import MuseCliCommit, MuseCliObject, MuseCliSnapshot |
| 41 | from maestro.muse_cli.object_store import ( |
| 42 | has_object, |
| 43 | object_path, |
| 44 | read_object, |
| 45 | restore_object, |
| 46 | write_object, |
| 47 | write_object_from_path, |
| 48 | ) |
| 49 | |
| 50 | |
| 51 | # --------------------------------------------------------------------------- |
| 52 | # Fixtures |
| 53 | # --------------------------------------------------------------------------- |
| 54 | |
| 55 | |
| 56 | @pytest.fixture |
| 57 | async def async_session() -> AsyncGenerator[AsyncSession, None]: |
| 58 | """In-memory SQLite session with all Muse CLI tables created.""" |
| 59 | engine = create_async_engine("sqlite+aiosqlite:///:memory:") |
| 60 | async with engine.begin() as conn: |
| 61 | await conn.run_sync(Base.metadata.create_all) |
| 62 | Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) |
| 63 | async with Session() as session: |
| 64 | yield session |
| 65 | await engine.dispose() |
| 66 | |
| 67 | |
| 68 | @pytest.fixture |
| 69 | def repo_id() -> str: |
| 70 | return str(uuid.uuid4()) |
| 71 | |
| 72 | |
| 73 | @pytest.fixture |
| 74 | def repo_root(tmp_path: pathlib.Path, repo_id: str) -> pathlib.Path: |
| 75 | """Minimal Muse repository structure with repo.json and HEAD.""" |
| 76 | muse_dir = tmp_path / ".muse" |
| 77 | muse_dir.mkdir() |
| 78 | (muse_dir / "HEAD").write_text("refs/heads/main") |
| 79 | (muse_dir / "refs" / "heads").mkdir(parents=True) |
| 80 | (muse_dir / "refs" / "heads" / "main").write_text("") |
| 81 | (muse_dir / "repo.json").write_text(json.dumps({"repo_id": repo_id})) |
| 82 | return tmp_path |
| 83 | |
| 84 | |
| 85 | def _sha(seed: str, length: int = 64) -> str: |
| 86 | """Build a deterministic fake SHA of exactly *length* hex chars.""" |
| 87 | return (seed * (length // len(seed) + 1))[:length] |
| 88 | |
| 89 | |
| 90 | # --------------------------------------------------------------------------- |
| 91 | # Unit tests — object_path layout |
| 92 | # --------------------------------------------------------------------------- |
| 93 | |
| 94 | |
| 95 | class TestObjectPath: |
| 96 | |
| 97 | def test_object_path_uses_sharded_layout(self, tmp_path: pathlib.Path) -> None: |
| 98 | """object_path returns .muse/objects/<sha2>/<sha62> — the sharded layout.""" |
| 99 | root = tmp_path |
| 100 | object_id = "ab" + "cd" * 31 # 64 hex chars |
| 101 | result = object_path(root, object_id) |
| 102 | expected = root / ".muse" / "objects" / "ab" / ("cd" * 31) |
| 103 | assert result == expected |
| 104 | |
| 105 | def test_object_path_shard_dir_is_first_two_chars( |
| 106 | self, tmp_path: pathlib.Path |
| 107 | ) -> None: |
| 108 | """The shard directory name is exactly the first two hex characters.""" |
| 109 | object_id = "ff" + "00" * 31 |
| 110 | result = object_path(tmp_path, object_id) |
| 111 | assert result.parent.name == "ff" |
| 112 | |
| 113 | def test_object_path_filename_is_remaining_62_chars( |
| 114 | self, tmp_path: pathlib.Path |
| 115 | ) -> None: |
| 116 | """The filename under the shard dir is the remaining 62 characters.""" |
| 117 | object_id = "1a" + "bc" * 31 |
| 118 | result = object_path(tmp_path, object_id) |
| 119 | assert result.name == "bc" * 31 |
| 120 | assert len(result.name) == 62 |
| 121 | |
| 122 | |
| 123 | # --------------------------------------------------------------------------- |
| 124 | # Unit tests — write_object (bytes) |
| 125 | # --------------------------------------------------------------------------- |
| 126 | |
| 127 | |
| 128 | class TestWriteObject: |
| 129 | |
| 130 | def test_write_object_creates_shard_dir(self, tmp_path: pathlib.Path) -> None: |
| 131 | """write_object creates the shard subdirectory on first write.""" |
| 132 | (tmp_path / ".muse").mkdir() |
| 133 | object_id = "ab" + "11" * 31 |
| 134 | write_object(tmp_path, object_id, b"MIDI data") |
| 135 | shard_dir = tmp_path / ".muse" / "objects" / "ab" |
| 136 | assert shard_dir.is_dir() |
| 137 | |
| 138 | def test_write_object_stores_content(self, tmp_path: pathlib.Path) -> None: |
| 139 | """write_object persists the exact bytes at the sharded path.""" |
| 140 | (tmp_path / ".muse").mkdir() |
| 141 | object_id = "cc" + "dd" * 31 |
| 142 | content = b"track: bass, tempo: 120bpm" |
| 143 | write_object(tmp_path, object_id, content) |
| 144 | dest = object_path(tmp_path, object_id) |
| 145 | assert dest.read_bytes() == content |
| 146 | |
| 147 | def test_write_object_returns_true_on_new_write( |
| 148 | self, tmp_path: pathlib.Path |
| 149 | ) -> None: |
| 150 | """write_object returns True when the object is newly stored.""" |
| 151 | (tmp_path / ".muse").mkdir() |
| 152 | object_id = "ee" + "ff" * 31 |
| 153 | result = write_object(tmp_path, object_id, b"new blob") |
| 154 | assert result is True |
| 155 | |
| 156 | def test_write_object_idempotent_returns_false( |
| 157 | self, tmp_path: pathlib.Path |
| 158 | ) -> None: |
| 159 | """Second write for the same object_id returns False without changing the file.""" |
| 160 | (tmp_path / ".muse").mkdir() |
| 161 | object_id = "11" + "22" * 31 |
| 162 | write_object(tmp_path, object_id, b"original content") |
| 163 | dest = object_path(tmp_path, object_id) |
| 164 | mtime_first = dest.stat().st_mtime |
| 165 | |
| 166 | result = write_object(tmp_path, object_id, b"different content") |
| 167 | |
| 168 | assert result is False |
| 169 | assert dest.stat().st_mtime == mtime_first # file not touched |
| 170 | assert dest.read_bytes() == b"original content" # original content preserved |
| 171 | |
| 172 | |
| 173 | # --------------------------------------------------------------------------- |
| 174 | # Unit tests — write_object_from_path (path-based write) |
| 175 | # --------------------------------------------------------------------------- |
| 176 | |
| 177 | |
| 178 | class TestWriteObjectFromPath: |
| 179 | |
| 180 | def test_write_object_from_path_stores_content( |
| 181 | self, tmp_path: pathlib.Path |
| 182 | ) -> None: |
| 183 | """write_object_from_path copies the source file into the sharded store.""" |
| 184 | (tmp_path / ".muse").mkdir() |
| 185 | object_id = "aa" + "bb" * 31 |
| 186 | src = tmp_path / "drums.mid" |
| 187 | src.write_bytes(b"MIDI drums data") |
| 188 | |
| 189 | write_object_from_path(tmp_path, object_id, src) |
| 190 | |
| 191 | dest = object_path(tmp_path, object_id) |
| 192 | assert dest.read_bytes() == b"MIDI drums data" |
| 193 | |
| 194 | def test_write_object_from_path_returns_true_on_new_write( |
| 195 | self, tmp_path: pathlib.Path |
| 196 | ) -> None: |
| 197 | """write_object_from_path returns True when the object is newly stored.""" |
| 198 | (tmp_path / ".muse").mkdir() |
| 199 | object_id = "33" + "44" * 31 |
| 200 | src = tmp_path / "keys.mid" |
| 201 | src.write_bytes(b"piano riff") |
| 202 | |
| 203 | result = write_object_from_path(tmp_path, object_id, src) |
| 204 | assert result is True |
| 205 | |
| 206 | def test_write_object_from_path_idempotent(self, tmp_path: pathlib.Path) -> None: |
| 207 | """Second call with the same object_id returns False, file unchanged.""" |
| 208 | (tmp_path / ".muse").mkdir() |
| 209 | object_id = "55" + "66" * 31 |
| 210 | src = tmp_path / "lead.mid" |
| 211 | src.write_bytes(b"lead melody") |
| 212 | |
| 213 | write_object_from_path(tmp_path, object_id, src) |
| 214 | dest = object_path(tmp_path, object_id) |
| 215 | mtime_first = dest.stat().st_mtime |
| 216 | |
| 217 | result = write_object_from_path(tmp_path, object_id, src) |
| 218 | assert result is False |
| 219 | assert dest.stat().st_mtime == mtime_first |
| 220 | |
| 221 | |
| 222 | # --------------------------------------------------------------------------- |
| 223 | # Unit tests — read_object |
| 224 | # --------------------------------------------------------------------------- |
| 225 | |
| 226 | |
| 227 | class TestReadObject: |
| 228 | |
| 229 | def test_read_object_returns_bytes(self, tmp_path: pathlib.Path) -> None: |
| 230 | """read_object returns the exact bytes that were written.""" |
| 231 | (tmp_path / ".muse").mkdir() |
| 232 | object_id = "77" + "88" * 31 |
| 233 | content = b"chorus riff, key of C" |
| 234 | write_object(tmp_path, object_id, content) |
| 235 | |
| 236 | result = read_object(tmp_path, object_id) |
| 237 | assert result == content |
| 238 | |
| 239 | def test_read_object_returns_none_when_missing( |
| 240 | self, tmp_path: pathlib.Path |
| 241 | ) -> None: |
| 242 | """read_object returns None for an object not in the store.""" |
| 243 | (tmp_path / ".muse").mkdir() |
| 244 | object_id = "99" + "aa" * 31 |
| 245 | result = read_object(tmp_path, object_id) |
| 246 | assert result is None |
| 247 | |
| 248 | |
| 249 | # --------------------------------------------------------------------------- |
| 250 | # Unit tests — has_object |
| 251 | # --------------------------------------------------------------------------- |
| 252 | |
| 253 | |
| 254 | class TestHasObject: |
| 255 | |
| 256 | def test_has_object_false_before_write(self, tmp_path: pathlib.Path) -> None: |
| 257 | """has_object returns False before any write.""" |
| 258 | (tmp_path / ".muse").mkdir() |
| 259 | object_id = "bb" + "cc" * 31 |
| 260 | assert has_object(tmp_path, object_id) is False |
| 261 | |
| 262 | def test_has_object_true_after_write(self, tmp_path: pathlib.Path) -> None: |
| 263 | """has_object returns True after write_object.""" |
| 264 | (tmp_path / ".muse").mkdir() |
| 265 | object_id = "dd" + "ee" * 31 |
| 266 | write_object(tmp_path, object_id, b"pad chord") |
| 267 | assert has_object(tmp_path, object_id) is True |
| 268 | |
| 269 | |
| 270 | # --------------------------------------------------------------------------- |
| 271 | # Unit tests — restore_object |
| 272 | # --------------------------------------------------------------------------- |
| 273 | |
| 274 | |
| 275 | class TestRestoreObject: |
| 276 | |
| 277 | def test_restore_object_copies_to_dest(self, tmp_path: pathlib.Path) -> None: |
| 278 | """restore_object writes the stored blob to the given destination path.""" |
| 279 | (tmp_path / ".muse").mkdir() |
| 280 | object_id = "12" + "34" * 31 |
| 281 | content = b"bridge melody, Bm" |
| 282 | write_object(tmp_path, object_id, content) |
| 283 | |
| 284 | dest = tmp_path / "muse-work" / "bridge.mid" |
| 285 | dest.parent.mkdir(parents=True, exist_ok=True) |
| 286 | result = restore_object(tmp_path, object_id, dest) |
| 287 | |
| 288 | assert result is True |
| 289 | assert dest.read_bytes() == content |
| 290 | |
| 291 | def test_restore_object_creates_parent_dirs(self, tmp_path: pathlib.Path) -> None: |
| 292 | """restore_object creates missing parent directories for the dest path.""" |
| 293 | (tmp_path / ".muse").mkdir() |
| 294 | object_id = "56" + "78" * 31 |
| 295 | write_object(tmp_path, object_id, b"nested track") |
| 296 | |
| 297 | dest = tmp_path / "muse-work" / "tracks" / "strings" / "viola.mid" |
| 298 | # Parent dirs do NOT exist yet — restore_object must create them. |
| 299 | assert not dest.parent.exists() |
| 300 | |
| 301 | result = restore_object(tmp_path, object_id, dest) |
| 302 | assert result is True |
| 303 | assert dest.read_bytes() == b"nested track" |
| 304 | |
| 305 | def test_restore_object_returns_false_when_missing( |
| 306 | self, tmp_path: pathlib.Path |
| 307 | ) -> None: |
| 308 | """restore_object returns False cleanly when the object is absent.""" |
| 309 | (tmp_path / ".muse").mkdir() |
| 310 | object_id = "90" + "ab" * 31 |
| 311 | dest = tmp_path / "muse-work" / "ghost.mid" |
| 312 | dest.parent.mkdir(parents=True, exist_ok=True) |
| 313 | |
| 314 | result = restore_object(tmp_path, object_id, dest) |
| 315 | assert result is False |
| 316 | assert not dest.exists() |
| 317 | |
| 318 | |
| 319 | # --------------------------------------------------------------------------- |
| 320 | # Cross-command round-trip tests |
| 321 | # |
| 322 | # These are the regression tests the issue specifically calls for. They wire |
| 323 | # together the real _commit_async / _read_tree_async / perform_reset cores |
| 324 | # against the shared object store to prove that objects written by one command |
| 325 | # are found by every other command. |
| 326 | # --------------------------------------------------------------------------- |
| 327 | |
| 328 | |
| 329 | async def _add_commit_row( |
| 330 | session: AsyncSession, |
| 331 | *, |
| 332 | repo_id: str, |
| 333 | manifest: dict[str, str], |
| 334 | branch: str = "main", |
| 335 | message: str = "test commit", |
| 336 | parent_commit_id: str | None = None, |
| 337 | committed_at: datetime.datetime | None = None, |
| 338 | ) -> MuseCliCommit: |
| 339 | """Insert a MuseCliCommit + MuseCliSnapshot row and return the commit.""" |
| 340 | snapshot_id = _sha(str(uuid.uuid4()).replace("-", "")) |
| 341 | commit_id = _sha(str(uuid.uuid4()).replace("-", "")) |
| 342 | |
| 343 | for object_id in manifest.values(): |
| 344 | existing = await session.get(MuseCliObject, object_id) |
| 345 | if existing is None: |
| 346 | session.add(MuseCliObject(object_id=object_id, size_bytes=10)) |
| 347 | |
| 348 | session.add(MuseCliSnapshot(snapshot_id=snapshot_id, manifest=manifest)) |
| 349 | await session.flush() |
| 350 | |
| 351 | ts = committed_at or datetime.datetime.now(datetime.timezone.utc) |
| 352 | commit = MuseCliCommit( |
| 353 | commit_id=commit_id, |
| 354 | repo_id=repo_id, |
| 355 | branch=branch, |
| 356 | parent_commit_id=parent_commit_id, |
| 357 | snapshot_id=snapshot_id, |
| 358 | message=message, |
| 359 | author="", |
| 360 | committed_at=ts, |
| 361 | ) |
| 362 | session.add(commit) |
| 363 | await session.flush() |
| 364 | return commit |
| 365 | |
| 366 | |
| 367 | class TestCrossCommandRoundTrips: |
| 368 | """Regression: objects from ``muse commit`` must be findable by all other commands.""" |
| 369 | |
| 370 | @pytest.mark.anyio |
| 371 | async def test_same_layout_commit_then_read_tree( |
| 372 | self, |
| 373 | async_session: AsyncSession, |
| 374 | repo_root: pathlib.Path, |
| 375 | repo_id: str, |
| 376 | ) -> None: |
| 377 | """Objects written via write_object_from_path (commit) are readable by read_tree. |
| 378 | |
| 379 | This is the primary regression: flat-layout objects |
| 380 | written by muse commit could not be found by muse read-tree which |
| 381 | used the same module. Both now use the sharded layout. |
| 382 | """ |
| 383 | from maestro.muse_cli.commands.read_tree import _read_tree_async |
| 384 | |
| 385 | # Seed muse-work/ with a file. |
| 386 | workdir = repo_root / "muse-work" |
| 387 | workdir.mkdir() |
| 388 | track_file = workdir / "track.mid" |
| 389 | track_content = b"verse hook, 4/4, 120bpm" |
| 390 | track_file.write_bytes(track_content) |
| 391 | |
| 392 | # Compute hash and store via the commit path. |
| 393 | from maestro.muse_cli.snapshot import hash_file |
| 394 | |
| 395 | object_id = hash_file(track_file) |
| 396 | write_object_from_path(repo_root, object_id, track_file) |
| 397 | |
| 398 | # Insert the snapshot + commit row. |
| 399 | manifest = {"track.mid": object_id} |
| 400 | commit = await _add_commit_row( |
| 401 | async_session, |
| 402 | repo_id=repo_id, |
| 403 | manifest=manifest, |
| 404 | ) |
| 405 | |
| 406 | # Simulate a clean working tree before read-tree. |
| 407 | track_file.unlink() |
| 408 | assert not track_file.exists() |
| 409 | |
| 410 | # read-tree should restore the file from the object store. |
| 411 | result = await _read_tree_async( |
| 412 | snapshot_id=commit.snapshot_id, |
| 413 | root=repo_root, |
| 414 | session=async_session, |
| 415 | ) |
| 416 | |
| 417 | assert "track.mid" in result.files_written |
| 418 | assert track_file.exists() |
| 419 | assert track_file.read_bytes() == track_content |
| 420 | |
| 421 | @pytest.mark.anyio |
| 422 | async def test_same_layout_commit_then_reset_hard( |
| 423 | self, |
| 424 | async_session: AsyncSession, |
| 425 | repo_root: pathlib.Path, |
| 426 | repo_id: str, |
| 427 | ) -> None: |
| 428 | """Objects written via write_object_from_path (commit) are readable by reset --hard. |
| 429 | |
| 430 | This is the primary regression: muse reset --hard used a |
| 431 | sharded layout but muse commit used a flat layout, so reset could never |
| 432 | find the objects commit had stored. Both now use the same sharded layout. |
| 433 | """ |
| 434 | from maestro.services.muse_reset import ResetMode, perform_reset |
| 435 | |
| 436 | # v1 content — the snapshot we'll reset back to. |
| 437 | object_id_v1 = "11" * 32 |
| 438 | content_v1 = b"intro riff, Em" |
| 439 | write_object(repo_root, object_id_v1, content_v1) |
| 440 | |
| 441 | t0 = datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc) |
| 442 | c1 = await _add_commit_row( |
| 443 | async_session, |
| 444 | repo_id=repo_id, |
| 445 | manifest={"lead.mid": object_id_v1}, |
| 446 | committed_at=t0, |
| 447 | message="v1", |
| 448 | ) |
| 449 | |
| 450 | # v2 content — the current HEAD we'll reset away from. |
| 451 | object_id_v2 = "22" * 32 |
| 452 | content_v2 = b"chorus, C major" |
| 453 | write_object(repo_root, object_id_v2, content_v2) |
| 454 | |
| 455 | c2 = await _add_commit_row( |
| 456 | async_session, |
| 457 | repo_id=repo_id, |
| 458 | manifest={"lead.mid": object_id_v2}, |
| 459 | parent_commit_id=c1.commit_id, |
| 460 | message="v2", |
| 461 | ) |
| 462 | |
| 463 | # Set branch HEAD to c2 and populate muse-work/ with v2 content. |
| 464 | ref_path = repo_root / ".muse" / "refs" / "heads" / "main" |
| 465 | ref_path.write_text(c2.commit_id) |
| 466 | |
| 467 | workdir = repo_root / "muse-work" |
| 468 | workdir.mkdir(parents=True, exist_ok=True) |
| 469 | (workdir / "lead.mid").write_bytes(content_v2) |
| 470 | |
| 471 | # Hard reset to c1 — must find v1 object written above. |
| 472 | result = await perform_reset( |
| 473 | root=repo_root, |
| 474 | session=async_session, |
| 475 | ref=c1.commit_id, |
| 476 | mode=ResetMode.HARD, |
| 477 | ) |
| 478 | |
| 479 | assert result.files_restored == 1 |
| 480 | assert result.target_commit_id == c1.commit_id |
| 481 | assert (workdir / "lead.mid").read_bytes() == content_v1 |
| 482 | |
| 483 | @pytest.mark.anyio |
| 484 | async def test_commit_write_then_read_tree_write_produce_same_path( |
| 485 | self, |
| 486 | repo_root: pathlib.Path, |
| 487 | ) -> None: |
| 488 | """write_object and write_object_from_path both produce the same sharded path. |
| 489 | |
| 490 | Ensures neither write variant creates a layout inconsistency. |
| 491 | """ |
| 492 | (repo_root / ".muse").mkdir(exist_ok=True) |
| 493 | object_id = "ab" + "cd" * 31 |
| 494 | content = b"same object, two write paths" |
| 495 | |
| 496 | # Write via bytes API (as _commit_async used to). |
| 497 | write_object(repo_root, object_id, content) |
| 498 | p_bytes = object_path(repo_root, object_id) |
| 499 | |
| 500 | # Clear store. |
| 501 | p_bytes.unlink() |
| 502 | p_bytes.parent.rmdir() |
| 503 | |
| 504 | # Write via path API (as _commit_async now does). |
| 505 | src = repo_root / "tmp_source.mid" |
| 506 | src.write_bytes(content) |
| 507 | write_object_from_path(repo_root, object_id, src) |
| 508 | p_path = object_path(repo_root, object_id) |
| 509 | |
| 510 | assert p_bytes == p_path # identical paths |
| 511 | assert p_path.read_bytes() == content |