cgcardona / muse public
object_store.py python
202 lines 7.0 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """Canonical content-addressed object store for the Muse VCS.
2
3 All Muse commands that read or write blobs — ``muse commit``, ``muse read-tree``,
4 ``muse reset`` — go through this module exclusively. No command may implement
5 its own path logic or copy its own blobs.
6
7 Layout
8 ------
9 Objects are stored under ``<repo_root>/.muse/objects/`` using a two-character
10 sharded directory layout that mirrors Git's loose-object format::
11
12 .muse/objects/<sha2>/<sha62>
13
14 where ``<sha2>`` is the first two hex characters of the SHA-256 digest and
15 ``<sha62>`` is the remaining 62 characters. For example, the object with
16 digest ``ab1234...`` is stored at ``.muse/objects/ab/1234...``.
17
18 Why sharding?
19 -------------
20 Music repositories accumulate objects at a far higher rate than code
21 repositories: every generated take, every variation, every rendered clip is a
22 new blob. A single recording session can produce tens of thousands of objects.
23 Without sharding, a flat directory exceeds filesystem limits (ext4, APFS, HFS+
24 all degrade or hard-limit above ~32,000 entries per directory). Two hex
25 characters yield 256 subdirectories — the same trade-off Git settled on after
26 years of production use.
27
28 This module is the single source of truth for all local object I/O.
29 The store is append-only: writing the same object twice is always a no-op.
30 """
31 from __future__ import annotations
32
33 import logging
34 import pathlib
35 import shutil
36
37 logger = logging.getLogger(__name__)
38
39 _OBJECTS_DIR = "objects"
40
41
42 def objects_dir(repo_root: pathlib.Path) -> pathlib.Path:
43 """Return the path to the local object store root directory.
44
45 The store lives at ``<repo_root>/.muse/objects/``. Shard subdirectories
46 are created lazily by :func:`write_object` and :func:`write_object_from_path`.
47
48 Args:
49 repo_root: Root of the Muse repository (the directory containing
50 ``.muse/``).
51
52 Returns:
53 Absolute path to the objects directory (may not yet exist).
54 """
55 return repo_root / ".muse" / _OBJECTS_DIR
56
57
58 def object_path(repo_root: pathlib.Path, object_id: str) -> pathlib.Path:
59 """Return the canonical on-disk path for a single object.
60
61 Objects are sharded by the first two hex characters of their SHA-256
62 digest, matching Git's loose-object layout::
63
64 .muse/objects/<sha2>/<sha62>
65
66 This prevents filesystem performance issues as the repository grows.
67
68 Args:
69 repo_root: Root of the Muse repository.
70 object_id: SHA-256 hex digest of the object's content (64 chars).
71
72 Returns:
73 Absolute path to the object file (may not yet exist).
74 """
75 return objects_dir(repo_root) / object_id[:2] / object_id[2:]
76
77
78 def has_object(repo_root: pathlib.Path, object_id: str) -> bool:
79 """Return ``True`` if *object_id* is present in the local store.
80
81 Cheaper than :func:`read_object` when the caller only needs to check
82 existence (e.g. to pre-flight a hard reset before touching the working
83 tree).
84
85 Args:
86 repo_root: Root of the Muse repository.
87 object_id: SHA-256 hex digest to check.
88 """
89 return object_path(repo_root, object_id).exists()
90
91
92 def write_object(repo_root: pathlib.Path, object_id: str, content: bytes) -> bool:
93 """Write *content* to the local object store under *object_id*.
94
95 If the object already exists (same ID = same content, content-addressed)
96 the write is skipped and ``False`` is returned. Returns ``True`` when a
97 new object was written.
98
99 The shard directory is created on first write. Subsequent writes for the
100 same ``object_id`` are no-ops — they never overwrite existing content.
101
102 Args:
103 repo_root: Root of the Muse repository.
104 object_id: SHA-256 hex digest that identifies this object (64 chars).
105 content: Raw bytes to persist.
106
107 Returns:
108 ``True`` if the object was newly written, ``False`` if it already
109 existed (idempotent).
110 """
111 dest = object_path(repo_root, object_id)
112 if dest.exists():
113 logger.debug("⚠️ Object %s already in store — skipped", object_id[:8])
114 return False
115 dest.parent.mkdir(parents=True, exist_ok=True)
116 dest.write_bytes(content)
117 logger.debug("✅ Stored object %s (%d bytes)", object_id[:8], len(content))
118 return True
119
120
121 def write_object_from_path(
122 repo_root: pathlib.Path,
123 object_id: str,
124 src: pathlib.Path,
125 ) -> bool:
126 """Copy *src* into the object store without loading it into memory.
127
128 Preferred over :func:`write_object` for large blobs (dense MIDI renders,
129 audio previews) because ``shutil.copy2`` delegates to the OS copy
130 mechanism, keeping the interpreter heap clean.
131
132 Idempotent: if the object already exists it is never overwritten.
133
134 Args:
135 repo_root: Root of the Muse repository.
136 object_id: SHA-256 hex digest of *src*'s content (64 chars).
137 src: Absolute path of the source file to store.
138
139 Returns:
140 ``True`` if the object was newly written, ``False`` if it already
141 existed (idempotent).
142 """
143 dest = object_path(repo_root, object_id)
144 if dest.exists():
145 logger.debug("⚠️ Object %s already in store — skipped", object_id[:8])
146 return False
147 dest.parent.mkdir(parents=True, exist_ok=True)
148 shutil.copy2(src, dest)
149 logger.debug("✅ Stored object %s (%s)", object_id[:8], src.name)
150 return True
151
152
153 def read_object(repo_root: pathlib.Path, object_id: str) -> bytes | None:
154 """Read and return the raw bytes for *object_id* from the local store.
155
156 Returns ``None`` when the object is not present in the store so callers
157 can produce a user-facing error rather than raising ``FileNotFoundError``.
158
159 Args:
160 repo_root: Root of the Muse repository.
161 object_id: SHA-256 hex digest of the desired object.
162
163 Returns:
164 Raw bytes, or ``None`` when the object is absent from the store.
165 """
166 dest = object_path(repo_root, object_id)
167 if not dest.exists():
168 logger.debug("⚠️ Object %s not found in local store", object_id[:8])
169 return None
170 return dest.read_bytes()
171
172
173 def restore_object(
174 repo_root: pathlib.Path,
175 object_id: str,
176 dest: pathlib.Path,
177 ) -> bool:
178 """Copy an object from the store to *dest* without loading it into memory.
179
180 Preferred over :func:`read_object` + ``dest.write_bytes()`` for large
181 blobs because ``shutil.copy2`` delegates to the OS copy mechanism.
182
183 Creates parent directories of *dest* if they do not exist.
184
185 Args:
186 repo_root: Root of the Muse repository.
187 object_id: SHA-256 hex digest of the desired object (64 chars).
188 dest: Absolute path to write the restored file.
189
190 Returns:
191 ``True`` on success, ``False`` if the object is not in the store.
192 """
193 src = object_path(repo_root, object_id)
194 if not src.exists():
195 logger.debug(
196 "⚠️ Object %s not found in local store — cannot restore", object_id[:8]
197 )
198 return False
199 dest.parent.mkdir(parents=True, exist_ok=True)
200 shutil.copy2(src, dest)
201 logger.debug("✅ Restored object %s → %s", object_id[:8], dest)
202 return True