cgcardona / muse public
test_code_manifest.py python
201 lines 8.2 KB
766ee24d feat: code domain leverages core invariants, query engine, manifests, p… Gabriel Cardona <gabriel@tellurstori.com> 1d ago
1 """Tests for the hierarchical code manifest in muse/plugins/code/manifest.py."""
2 from __future__ import annotations
3
4 import hashlib
5 import pathlib
6 import tempfile
7
8 import pytest
9
10 from muse.plugins.code.manifest import (
11 CodeManifest,
12 ManifestFileDiff,
13 build_code_manifest,
14 diff_manifests,
15 read_code_manifest,
16 write_code_manifest,
17 )
18
19
20 # ---------------------------------------------------------------------------
21 # Helpers
22 # ---------------------------------------------------------------------------
23
24
25 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
26 muse = tmp_path / ".muse"
27 muse.mkdir()
28 (muse / "objects").mkdir()
29 return tmp_path
30
31
32 def _write_object(root: pathlib.Path, content: bytes) -> str:
33 h = hashlib.sha256(content).hexdigest()
34 obj_path = root / ".muse" / "objects" / h[:2] / h[2:]
35 obj_path.parent.mkdir(parents=True, exist_ok=True)
36 obj_path.write_bytes(content)
37 return h
38
39
40 # ---------------------------------------------------------------------------
41 # build_code_manifest
42 # ---------------------------------------------------------------------------
43
44
45 class TestBuildCodeManifest:
46 def test_empty_snapshot(self) -> None:
47 with tempfile.TemporaryDirectory() as tmp:
48 root = _make_repo(pathlib.Path(tmp))
49 manifest = build_code_manifest("snap1", {}, root)
50 assert manifest["snapshot_id"] == "snap1"
51 assert manifest["total_files"] == 0
52 assert manifest["packages"] == []
53 assert manifest["total_symbols"] == 0
54
55 def test_single_python_file(self) -> None:
56 with tempfile.TemporaryDirectory() as tmp:
57 root = _make_repo(pathlib.Path(tmp))
58 src = b"def foo():\n return 1\n"
59 h = _write_object(root, src)
60 manifest = build_code_manifest("snap1", {"src/utils.py": h}, root)
61 assert manifest["total_files"] == 1
62 assert manifest["semantic_files"] >= 1
63 assert len(manifest["packages"]) == 1
64 pkg = manifest["packages"][0]
65 assert pkg["package"] == "src"
66 assert len(pkg["modules"]) == 1
67 mod = pkg["modules"][0]
68 assert mod["module_path"] == "src/utils.py"
69 assert mod["language"] == "Python"
70
71 def test_groups_by_package(self) -> None:
72 with tempfile.TemporaryDirectory() as tmp:
73 root = _make_repo(pathlib.Path(tmp))
74 h1 = _write_object(root, b"x = 1\n")
75 h2 = _write_object(root, b"y = 2\n")
76 h3 = _write_object(root, b"z = 3\n")
77 flat = {
78 "src/a.py": h1,
79 "src/b.py": h2,
80 "tests/c.py": h3,
81 }
82 manifest = build_code_manifest("snap1", flat, root)
83 assert manifest["total_files"] == 3
84 packages = {pkg["package"] for pkg in manifest["packages"]}
85 assert "src" in packages
86 assert "tests" in packages
87
88 def test_manifest_hash_stable(self) -> None:
89 with tempfile.TemporaryDirectory() as tmp:
90 root = _make_repo(pathlib.Path(tmp))
91 src = b"x = 1\n"
92 h = _write_object(root, src)
93 m1 = build_code_manifest("snap1", {"a.py": h}, root)
94 m2 = build_code_manifest("snap1", {"a.py": h}, root)
95 assert m1["manifest_hash"] == m2["manifest_hash"]
96
97 def test_non_semantic_file_has_empty_ast_hash(self) -> None:
98 with tempfile.TemporaryDirectory() as tmp:
99 root = _make_repo(pathlib.Path(tmp))
100 h = _write_object(root, b"some binary or text content")
101 manifest = build_code_manifest("snap1", {"README.md": h}, root)
102 mod = manifest["packages"][0]["modules"][0]
103 assert mod["ast_hash"] == ""
104 assert mod["symbol_count"] == 0
105
106
107 # ---------------------------------------------------------------------------
108 # diff_manifests
109 # ---------------------------------------------------------------------------
110
111
112 class TestDiffManifests:
113 def _build_simple(self, root: pathlib.Path, files: dict[str, bytes]) -> CodeManifest:
114 flat: dict[str, str] = {}
115 for path, content in files.items():
116 flat[path] = _write_object(root, content)
117 return build_code_manifest("snap", flat, root)
118
119 def test_identical_manifests_no_diff(self) -> None:
120 with tempfile.TemporaryDirectory() as tmp:
121 root = _make_repo(pathlib.Path(tmp))
122 base = self._build_simple(root, {"a.py": b"x = 1\n"})
123 diffs = diff_manifests(base, base)
124 assert diffs == []
125
126 def test_added_file_detected(self) -> None:
127 with tempfile.TemporaryDirectory() as tmp:
128 root = _make_repo(pathlib.Path(tmp))
129 base = self._build_simple(root, {"a.py": b"x = 1\n"})
130 target = self._build_simple(root, {"a.py": b"x = 1\n", "b.py": b"y = 2\n"})
131 diffs = diff_manifests(base, target)
132 added = [d for d in diffs if d["change"] == "added"]
133 assert any(d["path"] == "b.py" for d in added)
134
135 def test_removed_file_detected(self) -> None:
136 with tempfile.TemporaryDirectory() as tmp:
137 root = _make_repo(pathlib.Path(tmp))
138 base = self._build_simple(root, {"a.py": b"x = 1\n", "b.py": b"y = 2\n"})
139 target = self._build_simple(root, {"a.py": b"x = 1\n"})
140 diffs = diff_manifests(base, target)
141 removed = [d for d in diffs if d["change"] == "removed"]
142 assert any(d["path"] == "b.py" for d in removed)
143
144 def test_semantic_change_detected(self) -> None:
145 with tempfile.TemporaryDirectory() as tmp:
146 root = _make_repo(pathlib.Path(tmp))
147 base = self._build_simple(root, {"a.py": b"def foo():\n return 1\n"})
148 target = self._build_simple(root, {"a.py": b"def foo():\n return 2\n"})
149 diffs = diff_manifests(base, target)
150 assert len(diffs) == 1
151 assert diffs[0]["semantic_change"] is True
152
153 def test_whitespace_only_change_non_semantic(self) -> None:
154 # Whitespace-only changes: content_hash differs but ast_hash should be the same.
155 with tempfile.TemporaryDirectory() as tmp:
156 root = _make_repo(pathlib.Path(tmp))
157 base = self._build_simple(root, {"a.py": b"def foo():\n return 1\n"})
158 target = self._build_simple(root, {"a.py": b"def foo():\n return 1\n\n\n"})
159 diffs = diff_manifests(base, target)
160 # Whitespace diff may or may not change AST hash depending on parser.
161 # Just assert we get a diff record with a path.
162 if diffs:
163 assert diffs[0]["path"] == "a.py"
164
165
166 # ---------------------------------------------------------------------------
167 # Persistence
168 # ---------------------------------------------------------------------------
169
170
171 class TestManifestPersistence:
172 def test_write_and_read_roundtrip(self) -> None:
173 with tempfile.TemporaryDirectory() as tmp:
174 root = _make_repo(pathlib.Path(tmp))
175 src = b"def my_fn():\n pass\n"
176 h = _write_object(root, src)
177 original = build_code_manifest("snap1", {"src/a.py": h}, root)
178
179 write_code_manifest(root, original)
180 loaded = read_code_manifest(root, original["manifest_hash"])
181
182 assert loaded is not None
183 assert loaded["snapshot_id"] == "snap1"
184 assert loaded["manifest_hash"] == original["manifest_hash"]
185 assert len(loaded["packages"]) == len(original["packages"])
186
187 def test_read_nonexistent_returns_none(self) -> None:
188 with tempfile.TemporaryDirectory() as tmp:
189 root = _make_repo(pathlib.Path(tmp))
190 result = read_code_manifest(root, "nonexistent_hash")
191 assert result is None
192
193 def test_write_idempotent(self) -> None:
194 with tempfile.TemporaryDirectory() as tmp:
195 root = _make_repo(pathlib.Path(tmp))
196 h = _write_object(root, b"x = 1\n")
197 manifest = build_code_manifest("snap1", {"a.py": h}, root)
198 write_code_manifest(root, manifest)
199 write_code_manifest(root, manifest) # second write should not error
200 loaded = read_code_manifest(root, manifest["manifest_hash"])
201 assert loaded is not None