cgcardona / muse public
hash_object.py python
249 lines 7.9 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """muse hash-object — compute and optionally store a Muse content-addressed object.
2
3 Mirrors ``git hash-object`` plumbing semantics: given a file path (or stdin),
4 compute the SHA-256 hash of its raw bytes and print it. With ``-w`` the object
5 is written into both the local on-disk store (``.muse/objects/``) and the
6 Postgres ``muse_cli_objects`` table so it can be referenced by future
7 ``muse commit-tree`` or ``muse cat-object`` calls.
8
9 Usage examples
10 --------------
11
12 Print the hash without storing::
13
14 muse hash-object muse-work/drums/kick.mid
15
16 Hash and store in the object store::
17
18 muse hash-object -w muse-work/drums/kick.mid
19
20 Hash content from stdin::
21
22 echo "data" | muse hash-object --stdin
23
24 The hash produced here is identical to what ``muse commit`` would compute for
25 the same file (sha256 of raw bytes, lowercase hex, 64 characters).
26
27 Agent use case
28 --------------
29 AI agents use ``muse hash-object`` to pre-check whether a file is already
30 stored before uploading it, or to derive the object ID that will be assigned
31 when the file is committed — useful for building optimistic pipelines.
32 """
33 from __future__ import annotations
34
35 import asyncio
36 import hashlib
37 import logging
38 import pathlib
39 import sys
40
41 import typer
42 from sqlalchemy.ext.asyncio import AsyncSession
43
44 from maestro.muse_cli._repo import require_repo
45 from maestro.muse_cli.db import open_session
46 from maestro.muse_cli.errors import ExitCode
47 from maestro.muse_cli.models import MuseCliObject
48 from maestro.muse_cli.object_store import write_object
49
50 logger = logging.getLogger(__name__)
51
52 app = typer.Typer()
53
54 # ---------------------------------------------------------------------------
55 # Result type
56 # ---------------------------------------------------------------------------
57
58
59 class HashObjectResult:
60 """Structured result from ``muse hash-object``.
61
62 Records the computed SHA-256 digest and whether the object was written
63 to the store.
64
65 Args:
66 object_id: The 64-character lowercase hex SHA-256 digest.
67 stored: ``True`` when the object was written to the store
68 (``-w`` flag), ``False`` for a compute-only run.
69 already_existed: ``True`` when ``-w`` was given but the object was
70 already present in the store (idempotent).
71 """
72
73 def __init__(
74 self,
75 *,
76 object_id: str,
77 stored: bool,
78 already_existed: bool = False,
79 ) -> None:
80 self.object_id = object_id
81 self.stored = stored
82 self.already_existed = already_existed
83
84
85 # ---------------------------------------------------------------------------
86 # Pure hash helper
87 # ---------------------------------------------------------------------------
88
89
90 def hash_bytes(content: bytes) -> str:
91 """Return the SHA-256 hex digest of *content*.
92
93 Identical to the hash ``muse commit`` computes for each tracked file,
94 ensuring content-addressability across all Muse plumbing commands.
95
96 Args:
97 content: Raw bytes to hash.
98
99 Returns:
100 64-character lowercase hex string.
101 """
102 return hashlib.sha256(content).hexdigest()
103
104
105 # ---------------------------------------------------------------------------
106 # Async core — fully injectable for tests
107 # ---------------------------------------------------------------------------
108
109
110 async def _hash_object_async(
111 *,
112 session: AsyncSession,
113 content: bytes,
114 write: bool,
115 repo_root: pathlib.Path | None = None,
116 ) -> HashObjectResult:
117 """Core hash-object logic — compute SHA-256 and optionally persist.
118
119 When *write* is ``True``:
120
121 1. Write the raw bytes into the local on-disk store (``.muse/objects/``).
122 2. Upsert a ``MuseCliObject`` row into Postgres with the object ID and
123 byte count. The upsert is idempotent: inserting the same object twice
124 is a no-op.
125
126 Args:
127 session: Open async DB session (used only when *write* is ``True``).
128 content: Raw bytes to hash (and optionally store).
129 write: When ``True``, persist the object to the store.
130 repo_root: Path to the Muse repo root for the on-disk store. When
131 ``None`` the repo root is resolved from the current
132 working directory via :func:`~maestro.muse_cli._repo.require_repo`.
133
134 Returns:
135 :class:`HashObjectResult` with the computed ID and storage status.
136 """
137 object_id = hash_bytes(content)
138
139 if not write:
140 return HashObjectResult(object_id=object_id, stored=False)
141
142 # Check whether the DB row already exists before inserting.
143 existing = await session.get(MuseCliObject, object_id)
144 already_existed = existing is not None
145
146 if not already_existed:
147 row = MuseCliObject(object_id=object_id, size_bytes=len(content))
148 session.add(row)
149 await session.flush()
150 logger.info("✅ Stored object %s (%d bytes)", object_id[:8], len(content))
151 else:
152 logger.debug("⚠️ Object %s already in DB — skipped", object_id[:8])
153
154 # Always attempt on-disk write (idempotent).
155 root = repo_root if repo_root is not None else require_repo()
156 write_object(root, object_id, content)
157
158 return HashObjectResult(
159 object_id=object_id,
160 stored=True,
161 already_existed=already_existed,
162 )
163
164
165 # ---------------------------------------------------------------------------
166 # Typer command
167 # ---------------------------------------------------------------------------
168
169
170 @app.callback(invoke_without_command=True)
171 def hash_object(
172 ctx: typer.Context,
173 file: str = typer.Argument(
174 "",
175 help="Path to the file to hash. Omit when using --stdin.",
176 metavar="<file>",
177 ),
178 write: bool = typer.Option(
179 False,
180 "-w",
181 "--write",
182 help=(
183 "Write the object into the content-addressed store "
184 "(.muse/objects/) and the muse_cli_objects table."
185 ),
186 ),
187 stdin: bool = typer.Option(
188 False,
189 "--stdin",
190 help="Read content from stdin instead of a file.",
191 ),
192 ) -> None:
193 """Compute the SHA-256 object ID for a file (or stdin content).
194
195 Prints the 64-character hex hash to stdout. With ``-w``, the object is
196 also written to the local store and the Postgres ``muse_cli_objects``
197 table so it can be referenced by other plumbing commands.
198
199 The hash is identical to the one ``muse commit`` would assign to the same
200 file, ensuring cross-command content-addressability.
201 """
202 if stdin and file:
203 typer.echo("❌ Provide a file path OR --stdin, not both.")
204 raise typer.Exit(code=ExitCode.USER_ERROR)
205
206 if not stdin and not file:
207 typer.echo("❌ Provide a file path or --stdin.")
208 raise typer.Exit(code=ExitCode.USER_ERROR)
209
210 require_repo()
211
212 # Read content — either from the file or from stdin.
213 if stdin:
214 content = sys.stdin.buffer.read()
215 else:
216 src = pathlib.Path(file)
217 if not src.exists():
218 typer.echo(f"❌ File not found: {file}")
219 raise typer.Exit(code=ExitCode.USER_ERROR)
220 if not src.is_file():
221 typer.echo(f"❌ Not a regular file: {file}")
222 raise typer.Exit(code=ExitCode.USER_ERROR)
223 content = src.read_bytes()
224
225 async def _run() -> None:
226 if write:
227 async with open_session() as session:
228 result = await _hash_object_async(
229 session=session,
230 content=content,
231 write=True,
232 )
233 await session.commit()
234 else:
235 # Compute-only: no DB access needed.
236 result = HashObjectResult(
237 object_id=hash_bytes(content),
238 stored=False,
239 )
240 typer.echo(result.object_id)
241
242 try:
243 asyncio.run(_run())
244 except typer.Exit:
245 raise
246 except Exception as exc:
247 typer.echo(f"❌ muse hash-object failed: {exc}")
248 logger.error("❌ muse hash-object error: %s", exc, exc_info=True)
249 raise typer.Exit(code=ExitCode.INTERNAL_ERROR)