cgcardona / muse public
test_indices.py python
273 lines 10.5 KB
e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Tests for muse/core/indices.py — optional local index layer.
2
3 Coverage
4 --------
5 SymbolHistoryEntry
6 - to_dict / from_dict round-trip.
7 - All six fields preserved.
8
9 symbol_history index
10 - save_symbol_history writes a valid JSON file.
11 - load_symbol_history reads it back correctly.
12 - load returns empty dict when file absent.
13 - load returns empty dict on corrupt JSON.
14 - Sorting: entries dict is sorted by address.
15 - Multiple addresses, multiple events per address.
16
17 hash_occurrence index
18 - save_hash_occurrence writes a valid JSON file.
19 - load_hash_occurrence reads it back correctly.
20 - load returns empty dict when file absent.
21 - load returns empty dict on corrupt JSON.
22 - Addresses within each hash entry are sorted.
23
24 index_info
25 - Reports "absent" for missing indexes.
26 - Reports "present" + correct entry count for existing indexes.
27 - Reports "corrupt" for malformed JSON.
28 - Reports both indexes.
29
30 Schema compliance
31 - schema_version == 1.
32 - updated_at is present and is a non-empty string.
33 - index field matches the index name.
34 """
35
36 import json
37 import pathlib
38
39 import pytest
40
41 from muse.core.indices import (
42 HashOccurrenceIndex,
43 SymbolHistoryEntry,
44 SymbolHistoryIndex,
45 index_info,
46 load_hash_occurrence,
47 load_symbol_history,
48 save_hash_occurrence,
49 save_symbol_history,
50 )
51
52
53 # ---------------------------------------------------------------------------
54 # SymbolHistoryEntry
55 # ---------------------------------------------------------------------------
56
57
58 class TestSymbolHistoryEntry:
59 def test_to_dict_from_dict_round_trip(self) -> None:
60 entry = SymbolHistoryEntry(
61 commit_id="abc123",
62 committed_at="2026-01-01T00:00:00+00:00",
63 op="insert",
64 content_id="content_abc",
65 body_hash="body_hash_xyz",
66 signature_id="sig_id_pqr",
67 )
68 d = entry.to_dict()
69 entry2 = SymbolHistoryEntry.from_dict(d)
70 assert entry2.commit_id == "abc123"
71 assert entry2.committed_at == "2026-01-01T00:00:00+00:00"
72 assert entry2.op == "insert"
73 assert entry2.content_id == "content_abc"
74 assert entry2.body_hash == "body_hash_xyz"
75 assert entry2.signature_id == "sig_id_pqr"
76
77 def test_all_ops_preserved(self) -> None:
78 for op in ("insert", "delete", "replace", "patch"):
79 e = SymbolHistoryEntry("c", "t", op, "cid", "bh", "sig")
80 assert SymbolHistoryEntry.from_dict(e.to_dict()).op == op
81
82
83 # ---------------------------------------------------------------------------
84 # symbol_history index — save / load
85 # ---------------------------------------------------------------------------
86
87
88 class TestSymbolHistoryIndex:
89 def _make_entry(self, op: str = "insert") -> SymbolHistoryEntry:
90 return SymbolHistoryEntry(
91 commit_id="commit1",
92 committed_at="2026-01-01T00:00:00+00:00",
93 op=op,
94 content_id="cid1",
95 body_hash="bh1",
96 signature_id="sig1",
97 )
98
99 def test_save_creates_file(self, tmp_path: pathlib.Path) -> None:
100 index: SymbolHistoryIndex = {
101 "src/a.py::f": [self._make_entry()],
102 }
103 save_symbol_history(tmp_path, index)
104 path = tmp_path / ".muse" / "indices" / "symbol_history.json"
105 assert path.exists()
106
107 def test_round_trip(self, tmp_path: pathlib.Path) -> None:
108 entry = self._make_entry("replace")
109 index: SymbolHistoryIndex = {
110 "src/billing.py::compute_total": [entry],
111 }
112 save_symbol_history(tmp_path, index)
113 loaded = load_symbol_history(tmp_path)
114 assert "src/billing.py::compute_total" in loaded
115 entries = loaded["src/billing.py::compute_total"]
116 assert len(entries) == 1
117 assert entries[0].op == "replace"
118 assert entries[0].commit_id == "commit1"
119
120 def test_multiple_addresses(self, tmp_path: pathlib.Path) -> None:
121 index: SymbolHistoryIndex = {
122 "src/a.py::alpha": [self._make_entry("insert")],
123 "src/b.py::beta": [self._make_entry("insert"), self._make_entry("replace")],
124 }
125 save_symbol_history(tmp_path, index)
126 loaded = load_symbol_history(tmp_path)
127 assert len(loaded["src/a.py::alpha"]) == 1
128 assert len(loaded["src/b.py::beta"]) == 2
129
130 def test_load_absent_returns_empty(self, tmp_path: pathlib.Path) -> None:
131 result = load_symbol_history(tmp_path)
132 assert result == {}
133
134 def test_load_corrupt_returns_empty(self, tmp_path: pathlib.Path) -> None:
135 indices_dir = tmp_path / ".muse" / "indices"
136 indices_dir.mkdir(parents=True, exist_ok=True)
137 (indices_dir / "symbol_history.json").write_text("{not valid json")
138 result = load_symbol_history(tmp_path)
139 assert result == {}
140
141 def test_schema_compliance(self, tmp_path: pathlib.Path) -> None:
142 index: SymbolHistoryIndex = {"x.py::f": [self._make_entry()]}
143 save_symbol_history(tmp_path, index)
144 raw = json.loads((tmp_path / ".muse" / "indices" / "symbol_history.json").read_text())
145 assert raw["schema_version"] == 1
146 assert raw["index"] == "symbol_history"
147 assert raw["updated_at"] # non-empty string
148 assert "x.py::f" in raw["entries"]
149
150 def test_empty_index_saved(self, tmp_path: pathlib.Path) -> None:
151 save_symbol_history(tmp_path, {})
152 loaded = load_symbol_history(tmp_path)
153 assert loaded == {}
154
155 def test_entries_sorted_by_address(self, tmp_path: pathlib.Path) -> None:
156 index: SymbolHistoryIndex = {
157 "z.py::z": [self._make_entry()],
158 "a.py::a": [self._make_entry()],
159 "m.py::m": [self._make_entry()],
160 }
161 save_symbol_history(tmp_path, index)
162 raw = json.loads((tmp_path / ".muse" / "indices" / "symbol_history.json").read_text())
163 keys = list(raw["entries"].keys())
164 assert keys == sorted(keys)
165
166
167 # ---------------------------------------------------------------------------
168 # hash_occurrence index — save / load
169 # ---------------------------------------------------------------------------
170
171
172 class TestHashOccurrenceIndex:
173 def test_save_creates_file(self, tmp_path: pathlib.Path) -> None:
174 index: HashOccurrenceIndex = {
175 "deadbeef": ["src/a.py::f", "src/b.py::g"],
176 }
177 save_hash_occurrence(tmp_path, index)
178 path = tmp_path / ".muse" / "indices" / "hash_occurrence.json"
179 assert path.exists()
180
181 def test_round_trip(self, tmp_path: pathlib.Path) -> None:
182 index: HashOccurrenceIndex = {
183 "abc123": ["src/a.py::f", "src/b.py::g"],
184 "def456": ["src/c.py::h"],
185 }
186 save_hash_occurrence(tmp_path, index)
187 loaded = load_hash_occurrence(tmp_path)
188 assert "abc123" in loaded
189 assert set(loaded["abc123"]) == {"src/a.py::f", "src/b.py::g"}
190 assert loaded["def456"] == ["src/c.py::h"]
191
192 def test_addresses_sorted_within_hash(self, tmp_path: pathlib.Path) -> None:
193 index: HashOccurrenceIndex = {
194 "hash1": ["z.py::z", "a.py::a", "m.py::m"],
195 }
196 save_hash_occurrence(tmp_path, index)
197 raw = json.loads((tmp_path / ".muse" / "indices" / "hash_occurrence.json").read_text())
198 addrs = raw["entries"]["hash1"]
199 assert addrs == sorted(addrs)
200
201 def test_hashes_sorted(self, tmp_path: pathlib.Path) -> None:
202 index: HashOccurrenceIndex = {
203 "zzz": ["a.py::f"],
204 "aaa": ["b.py::g"],
205 }
206 save_hash_occurrence(tmp_path, index)
207 raw = json.loads((tmp_path / ".muse" / "indices" / "hash_occurrence.json").read_text())
208 keys = list(raw["entries"].keys())
209 assert keys == sorted(keys)
210
211 def test_load_absent_returns_empty(self, tmp_path: pathlib.Path) -> None:
212 assert load_hash_occurrence(tmp_path) == {}
213
214 def test_load_corrupt_returns_empty(self, tmp_path: pathlib.Path) -> None:
215 indices_dir = tmp_path / ".muse" / "indices"
216 indices_dir.mkdir(parents=True, exist_ok=True)
217 (indices_dir / "hash_occurrence.json").write_text("not json at all")
218 assert load_hash_occurrence(tmp_path) == {}
219
220 def test_schema_compliance(self, tmp_path: pathlib.Path) -> None:
221 save_hash_occurrence(tmp_path, {"h": ["a.py::f"]})
222 raw = json.loads((tmp_path / ".muse" / "indices" / "hash_occurrence.json").read_text())
223 assert raw["schema_version"] == 1
224 assert raw["index"] == "hash_occurrence"
225 assert raw["updated_at"]
226
227 def test_empty_index(self, tmp_path: pathlib.Path) -> None:
228 save_hash_occurrence(tmp_path, {})
229 assert load_hash_occurrence(tmp_path) == {}
230
231
232 # ---------------------------------------------------------------------------
233 # index_info
234 # ---------------------------------------------------------------------------
235
236
237 class TestIndexInfo:
238 def test_both_absent(self, tmp_path: pathlib.Path) -> None:
239 info = index_info(tmp_path)
240 assert len(info) == 2
241 names = {i["name"] for i in info}
242 assert names == {"symbol_history", "hash_occurrence"}
243 for item in info:
244 assert item["status"] == "absent"
245
246 def test_symbol_history_present(self, tmp_path: pathlib.Path) -> None:
247 entry = SymbolHistoryEntry("c", "t", "insert", "cid", "bh", "sig")
248 save_symbol_history(tmp_path, {"a.py::f": [entry], "b.py::g": [entry]})
249 info = index_info(tmp_path)
250 sh = next(i for i in info if i["name"] == "symbol_history")
251 assert sh["status"] == "present"
252 assert sh["entries"] == "2"
253
254 def test_hash_occurrence_present(self, tmp_path: pathlib.Path) -> None:
255 save_hash_occurrence(tmp_path, {"h1": ["a.py::f"], "h2": ["b.py::g"]})
256 info = index_info(tmp_path)
257 ho = next(i for i in info if i["name"] == "hash_occurrence")
258 assert ho["status"] == "present"
259 assert ho["entries"] == "2"
260
261 def test_corrupt_index_reported(self, tmp_path: pathlib.Path) -> None:
262 indices_dir = tmp_path / ".muse" / "indices"
263 indices_dir.mkdir(parents=True, exist_ok=True)
264 (indices_dir / "symbol_history.json").write_text("{bad")
265 info = index_info(tmp_path)
266 sh = next(i for i in info if i["name"] == "symbol_history")
267 assert sh["status"] == "corrupt"
268
269 def test_updated_at_present_when_index_exists(self, tmp_path: pathlib.Path) -> None:
270 save_hash_occurrence(tmp_path, {"h": ["f.py::x"]})
271 info = index_info(tmp_path)
272 ho = next(i for i in info if i["name"] == "hash_occurrence")
273 assert ho["updated_at"] # non-empty string