cgcardona / muse public
test_stress_query_engine.py python
354 lines 12.9 KB
119290fc Add mission-critical stress test suite (9 new files, 1716 tests total) (#76) Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Stress tests for the generic query engine and code query DSL.
2
3 Covers:
4 - walk_history on linear chains of 100+ commits.
5 - CommitEvaluator with correct 3-arg signature.
6 - format_matches output format.
7 - Code query DSL: all field types, all operators, AND/OR composition.
8 - Code query DSL: unknown field raises ValueError.
9 - Query against large history (200 commits).
10 - Branch-scoped queries.
11 """
12 from __future__ import annotations
13
14 import datetime
15 import pathlib
16
17 import pytest
18
19 from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history
20 from muse.core.store import CommitRecord, write_commit
21 from muse.domain import SemVerBump
22 from muse.plugins.code._code_query import build_evaluator
23
24
25 # ---------------------------------------------------------------------------
26 # Helpers
27 # ---------------------------------------------------------------------------
28
29
30 def _now() -> datetime.datetime:
31 return datetime.datetime.now(datetime.timezone.utc)
32
33
34 def _write(
35 root: pathlib.Path,
36 cid: str,
37 branch: str = "main",
38 parent: str | None = None,
39 author: str = "alice",
40 agent_id: str = "",
41 model_id: str = "",
42 sem_ver_bump: SemVerBump = "none",
43 message: str = "",
44 ) -> CommitRecord:
45 c = CommitRecord(
46 commit_id=cid,
47 repo_id="repo",
48 branch=branch,
49 snapshot_id=f"snap-{cid}",
50 message=message or f"commit {cid}",
51 committed_at=_now(),
52 parent_commit_id=parent,
53 author=author,
54 agent_id=agent_id,
55 model_id=model_id,
56 sem_ver_bump=sem_ver_bump,
57 )
58 write_commit(root, c)
59 ref = root / ".muse" / "refs" / "heads" / branch
60 ref.write_text(cid)
61 return c
62
63
64 def _make_match(commit: CommitRecord) -> QueryMatch:
65 return QueryMatch(
66 commit_id=commit.commit_id,
67 author=commit.author,
68 committed_at=commit.committed_at.isoformat(),
69 branch=commit.branch,
70 detail=f"matched commit {commit.commit_id}",
71 )
72
73
74 @pytest.fixture
75 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
76 muse = tmp_path / ".muse"
77 (muse / "commits").mkdir(parents=True)
78 (muse / "refs" / "heads").mkdir(parents=True)
79 return tmp_path
80
81
82 # ===========================================================================
83 # walk_history — basic
84 # ===========================================================================
85
86
87 class TestWalkHistoryBasic:
88 def test_empty_history_no_matches(self, repo: pathlib.Path) -> None:
89 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
90 return [_make_match(commit)]
91 result = walk_history(repo, "nonexistent-branch", ev)
92 assert result == []
93
94 def test_single_commit_matches(self, repo: pathlib.Path) -> None:
95 _write(repo, "only", branch="main")
96 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
97 return [_make_match(commit)]
98 result = walk_history(repo, "main", ev)
99 assert len(result) == 1
100 assert result[0]["commit_id"] == "only"
101
102 def test_single_commit_no_match(self, repo: pathlib.Path) -> None:
103 _write(repo, "only", branch="main")
104 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
105 return []
106 result = walk_history(repo, "main", ev)
107 assert result == []
108
109 def test_linear_chain_all_match(self, repo: pathlib.Path) -> None:
110 prev = None
111 for i in range(10):
112 cid = f"c{i:03d}"
113 _write(repo, cid, parent=prev)
114 prev = cid
115 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
116 return [_make_match(commit)]
117 result = walk_history(repo, "main", ev)
118 assert len(result) == 10
119
120 def test_linear_chain_filtered(self, repo: pathlib.Path) -> None:
121 prev = None
122 for i in range(10):
123 cid = f"c{i:03d}"
124 author = "alice" if i % 2 == 0 else "bob"
125 _write(repo, cid, parent=prev, author=author)
126 prev = cid
127
128 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
129 if commit.author == "alice":
130 return [_make_match(commit)]
131 return []
132
133 result = walk_history(repo, "main", ev)
134 assert len(result) == 5
135
136 def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None:
137 prev = None
138 for i in range(50):
139 cid = f"c{i:03d}"
140 _write(repo, cid, parent=prev)
141 prev = cid
142 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
143 return [_make_match(commit)]
144 result = walk_history(repo, "main", ev, max_commits=10)
145 assert len(result) == 10
146
147 def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None:
148 _write(repo, "abc123", branch="main", author="alice")
149 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
150 return [_make_match(commit)]
151 result = walk_history(repo, "main", ev)
152 assert result[0]["commit_id"] == "abc123"
153 assert result[0]["branch"] == "main"
154 assert result[0]["author"] == "alice"
155
156
157 # ===========================================================================
158 # walk_history — large history
159 # ===========================================================================
160
161
162 class TestWalkHistoryLarge:
163 def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None:
164 prev = None
165 for i in range(200):
166 cid = f"large-{i:04d}"
167 _write(repo, cid, parent=prev, agent_id="bot" if i % 3 == 0 else "")
168 prev = cid
169
170 def bot_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
171 if commit.agent_id == "bot":
172 return [_make_match(commit)]
173 return []
174
175 result = walk_history(repo, "main", bot_only)
176 # 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits.
177 assert len(result) == 67
178
179 def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None:
180 prev = None
181 for i in range(100):
182 cid = f"agent-test-{i:04d}"
183 agent = f"agent-{i % 5}"
184 _write(repo, cid, parent=prev, agent_id=agent)
185 prev = cid
186
187 def agent_0_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
188 if commit.agent_id == "agent-0":
189 return [_make_match(commit)]
190 return []
191
192 result = walk_history(repo, "main", agent_0_only)
193 assert len(result) == 20 # 100 / 5 = 20
194
195
196 # ===========================================================================
197 # format_matches
198 # ===========================================================================
199
200
201 class TestFormatMatches:
202 def test_empty_matches_produces_output(self) -> None:
203 out = format_matches([])
204 assert isinstance(out, str)
205
206 def test_single_match_includes_commit_id(self) -> None:
207 match = QueryMatch(
208 commit_id="abc12345",
209 branch="main",
210 author="alice",
211 committed_at=_now().isoformat(),
212 detail="test match",
213 )
214 out = format_matches([match])
215 assert "abc12345" in out
216
217 def test_multiple_matches_all_present(self) -> None:
218 matches = [
219 QueryMatch(
220 commit_id=f"id{i:04d}",
221 branch="main",
222 author="alice",
223 committed_at=_now().isoformat(),
224 detail="matched",
225 )
226 for i in range(5)
227 ]
228 out = format_matches(matches)
229 for i in range(5):
230 assert f"id{i:04d}" in out
231
232
233 # ===========================================================================
234 # Code query DSL — build_evaluator
235 # ===========================================================================
236
237
238 class TestCodeQueryDSL:
239 # --- author field ---
240
241 def test_author_equals(self, repo: pathlib.Path) -> None:
242 _write(repo, "a1", author="alice")
243 _write(repo, "a2", author="bob", parent="a1")
244 evaluator = build_evaluator("author == 'alice'")
245 result = walk_history(repo, "main", evaluator)
246 assert any(m["commit_id"] == "a1" for m in result)
247 assert not any(m["commit_id"] == "a2" for m in result)
248
249 def test_author_not_equals(self, repo: pathlib.Path) -> None:
250 _write(repo, "b1", author="alice")
251 _write(repo, "b2", author="bob", parent="b1")
252 evaluator = build_evaluator("author != 'alice'")
253 result = walk_history(repo, "main", evaluator)
254 assert all(m["author"] != "alice" for m in result)
255
256 def test_author_contains(self, repo: pathlib.Path) -> None:
257 _write(repo, "c1", author="alice-smith")
258 _write(repo, "c2", author="bob-jones", parent="c1")
259 evaluator = build_evaluator("author contains 'alice'")
260 result = walk_history(repo, "main", evaluator)
261 assert len(result) == 1
262 assert "alice" in result[0]["author"]
263
264 def test_author_startswith(self, repo: pathlib.Path) -> None:
265 _write(repo, "d1", author="agent-claude")
266 _write(repo, "d2", author="human-alice", parent="d1")
267 evaluator = build_evaluator("author startswith 'agent'")
268 result = walk_history(repo, "main", evaluator)
269 assert len(result) == 1
270 assert result[0]["author"].startswith("agent")
271
272 # --- agent_id field ---
273
274 def test_agent_id_equals(self, repo: pathlib.Path) -> None:
275 _write(repo, "e1", agent_id="claude-v4")
276 _write(repo, "e2", agent_id="gpt-4o", parent="e1")
277 evaluator = build_evaluator("agent_id == 'claude-v4'")
278 result = walk_history(repo, "main", evaluator)
279 assert len(result) == 1
280 assert result[0]["commit_id"] == "e1"
281
282 # --- sem_ver_bump field ---
283
284 def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None:
285 _write(repo, "f1", sem_ver_bump="major")
286 _write(repo, "f2", sem_ver_bump="minor", parent="f1")
287 _write(repo, "f3", sem_ver_bump="patch", parent="f2")
288 evaluator = build_evaluator("sem_ver_bump == 'major'")
289 result = walk_history(repo, "main", evaluator)
290 assert len(result) == 1
291
292 # --- model_id field ---
293
294 def test_model_id_contains(self, repo: pathlib.Path) -> None:
295 _write(repo, "g1", model_id="claude-3-5-sonnet-20241022")
296 _write(repo, "g2", model_id="gpt-4o-2024-08-06", parent="g1")
297 evaluator = build_evaluator("model_id contains 'claude'")
298 result = walk_history(repo, "main", evaluator)
299 assert len(result) == 1
300
301 # --- AND composition ---
302
303 def test_and_composition(self, repo: pathlib.Path) -> None:
304 _write(repo, "h1", author="alice", agent_id="bot-1")
305 _write(repo, "h2", author="alice", agent_id="bot-2", parent="h1")
306 _write(repo, "h3", author="bob", agent_id="bot-1", parent="h2")
307 evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'")
308 result = walk_history(repo, "main", evaluator)
309 assert len(result) == 1
310 assert result[0]["commit_id"] == "h1"
311
312 # --- OR composition ---
313
314 def test_or_composition(self, repo: pathlib.Path) -> None:
315 _write(repo, "i1", author="alice")
316 _write(repo, "i2", author="bob", parent="i1")
317 _write(repo, "i3", author="charlie", parent="i2")
318 evaluator = build_evaluator("author == 'alice' or author == 'bob'")
319 result = walk_history(repo, "main", evaluator)
320 assert len(result) == 2
321
322 # --- complex nested AND OR ---
323
324 def test_complex_and_or(self, repo: pathlib.Path) -> None:
325 _write(repo, "j1", author="alice", sem_ver_bump="major")
326 _write(repo, "j2", author="bob", sem_ver_bump="minor", parent="j1")
327 _write(repo, "j3", author="alice", sem_ver_bump="patch", parent="j2")
328 evaluator = build_evaluator(
329 "sem_ver_bump == 'major' or sem_ver_bump == 'minor'"
330 )
331 result = walk_history(repo, "main", evaluator)
332 assert len(result) == 2
333
334 # --- error cases ---
335
336 def test_unknown_field_raises_value_error(self) -> None:
337 with pytest.raises(ValueError):
338 build_evaluator("unknown_field == 'something'")
339
340 def test_unknown_operator_raises_value_error(self) -> None:
341 with pytest.raises(ValueError):
342 build_evaluator("author REGEX 'alice'")
343
344 def test_empty_query_raises(self) -> None:
345 with pytest.raises((ValueError, IndexError)):
346 build_evaluator("")
347
348 # --- branch field ---
349
350 def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None:
351 _write(repo, "k1", branch="main", author="alice")
352 evaluator = build_evaluator("branch == 'main'")
353 result = walk_history(repo, "main", evaluator)
354 assert all(m["branch"] == "main" for m in result)