test_stress_query_engine.py
python
| 1 | """Stress tests for the generic query engine and code query DSL. |
| 2 | |
| 3 | Covers: |
| 4 | - walk_history on linear chains of 100+ commits. |
| 5 | - CommitEvaluator with correct 3-arg signature. |
| 6 | - format_matches output format. |
| 7 | - Code query DSL: all field types, all operators, AND/OR composition. |
| 8 | - Code query DSL: unknown field raises ValueError. |
| 9 | - Query against large history (200 commits). |
| 10 | - Branch-scoped queries. |
| 11 | """ |
| 12 | |
| 13 | import datetime |
| 14 | import pathlib |
| 15 | |
| 16 | import pytest |
| 17 | |
| 18 | from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history |
| 19 | from muse.core.store import CommitRecord, write_commit |
| 20 | from muse.domain import SemVerBump |
| 21 | from muse.plugins.code._code_query import build_evaluator |
| 22 | |
| 23 | |
| 24 | # --------------------------------------------------------------------------- |
| 25 | # Helpers |
| 26 | # --------------------------------------------------------------------------- |
| 27 | |
| 28 | |
| 29 | def _now() -> datetime.datetime: |
| 30 | return datetime.datetime.now(datetime.timezone.utc) |
| 31 | |
| 32 | |
| 33 | def _write( |
| 34 | root: pathlib.Path, |
| 35 | cid: str, |
| 36 | branch: str = "main", |
| 37 | parent: str | None = None, |
| 38 | author: str = "alice", |
| 39 | agent_id: str = "", |
| 40 | model_id: str = "", |
| 41 | sem_ver_bump: SemVerBump = "none", |
| 42 | message: str = "", |
| 43 | ) -> CommitRecord: |
| 44 | c = CommitRecord( |
| 45 | commit_id=cid, |
| 46 | repo_id="repo", |
| 47 | branch=branch, |
| 48 | snapshot_id=f"snap-{cid}", |
| 49 | message=message or f"commit {cid}", |
| 50 | committed_at=_now(), |
| 51 | parent_commit_id=parent, |
| 52 | author=author, |
| 53 | agent_id=agent_id, |
| 54 | model_id=model_id, |
| 55 | sem_ver_bump=sem_ver_bump, |
| 56 | ) |
| 57 | write_commit(root, c) |
| 58 | ref = root / ".muse" / "refs" / "heads" / branch |
| 59 | ref.write_text(cid) |
| 60 | return c |
| 61 | |
| 62 | |
| 63 | def _make_match(commit: CommitRecord) -> QueryMatch: |
| 64 | return QueryMatch( |
| 65 | commit_id=commit.commit_id, |
| 66 | author=commit.author, |
| 67 | committed_at=commit.committed_at.isoformat(), |
| 68 | branch=commit.branch, |
| 69 | detail=f"matched commit {commit.commit_id}", |
| 70 | ) |
| 71 | |
| 72 | |
| 73 | @pytest.fixture |
| 74 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 75 | muse = tmp_path / ".muse" |
| 76 | (muse / "commits").mkdir(parents=True) |
| 77 | (muse / "refs" / "heads").mkdir(parents=True) |
| 78 | return tmp_path |
| 79 | |
| 80 | |
| 81 | # =========================================================================== |
| 82 | # walk_history — basic |
| 83 | # =========================================================================== |
| 84 | |
| 85 | |
| 86 | class TestWalkHistoryBasic: |
| 87 | def test_empty_history_no_matches(self, repo: pathlib.Path) -> None: |
| 88 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 89 | return [_make_match(commit)] |
| 90 | result = walk_history(repo, "nonexistent-branch", ev) |
| 91 | assert result == [] |
| 92 | |
| 93 | def test_single_commit_matches(self, repo: pathlib.Path) -> None: |
| 94 | _write(repo, "only", branch="main") |
| 95 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 96 | return [_make_match(commit)] |
| 97 | result = walk_history(repo, "main", ev) |
| 98 | assert len(result) == 1 |
| 99 | assert result[0]["commit_id"] == "only" |
| 100 | |
| 101 | def test_single_commit_no_match(self, repo: pathlib.Path) -> None: |
| 102 | _write(repo, "only", branch="main") |
| 103 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 104 | return [] |
| 105 | result = walk_history(repo, "main", ev) |
| 106 | assert result == [] |
| 107 | |
| 108 | def test_linear_chain_all_match(self, repo: pathlib.Path) -> None: |
| 109 | prev = None |
| 110 | for i in range(10): |
| 111 | cid = f"c{i:03d}" |
| 112 | _write(repo, cid, parent=prev) |
| 113 | prev = cid |
| 114 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 115 | return [_make_match(commit)] |
| 116 | result = walk_history(repo, "main", ev) |
| 117 | assert len(result) == 10 |
| 118 | |
| 119 | def test_linear_chain_filtered(self, repo: pathlib.Path) -> None: |
| 120 | prev = None |
| 121 | for i in range(10): |
| 122 | cid = f"c{i:03d}" |
| 123 | author = "alice" if i % 2 == 0 else "bob" |
| 124 | _write(repo, cid, parent=prev, author=author) |
| 125 | prev = cid |
| 126 | |
| 127 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 128 | if commit.author == "alice": |
| 129 | return [_make_match(commit)] |
| 130 | return [] |
| 131 | |
| 132 | result = walk_history(repo, "main", ev) |
| 133 | assert len(result) == 5 |
| 134 | |
| 135 | def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None: |
| 136 | prev = None |
| 137 | for i in range(50): |
| 138 | cid = f"c{i:03d}" |
| 139 | _write(repo, cid, parent=prev) |
| 140 | prev = cid |
| 141 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 142 | return [_make_match(commit)] |
| 143 | result = walk_history(repo, "main", ev, max_commits=10) |
| 144 | assert len(result) == 10 |
| 145 | |
| 146 | def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None: |
| 147 | _write(repo, "abc123", branch="main", author="alice") |
| 148 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 149 | return [_make_match(commit)] |
| 150 | result = walk_history(repo, "main", ev) |
| 151 | assert result[0]["commit_id"] == "abc123" |
| 152 | assert result[0]["branch"] == "main" |
| 153 | assert result[0]["author"] == "alice" |
| 154 | |
| 155 | |
| 156 | # =========================================================================== |
| 157 | # walk_history — large history |
| 158 | # =========================================================================== |
| 159 | |
| 160 | |
| 161 | class TestWalkHistoryLarge: |
| 162 | def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None: |
| 163 | prev = None |
| 164 | for i in range(200): |
| 165 | cid = f"large-{i:04d}" |
| 166 | _write(repo, cid, parent=prev, agent_id="bot" if i % 3 == 0 else "") |
| 167 | prev = cid |
| 168 | |
| 169 | def bot_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 170 | if commit.agent_id == "bot": |
| 171 | return [_make_match(commit)] |
| 172 | return [] |
| 173 | |
| 174 | result = walk_history(repo, "main", bot_only) |
| 175 | # 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits. |
| 176 | assert len(result) == 67 |
| 177 | |
| 178 | def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None: |
| 179 | prev = None |
| 180 | for i in range(100): |
| 181 | cid = f"agent-test-{i:04d}" |
| 182 | agent = f"agent-{i % 5}" |
| 183 | _write(repo, cid, parent=prev, agent_id=agent) |
| 184 | prev = cid |
| 185 | |
| 186 | def agent_0_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 187 | if commit.agent_id == "agent-0": |
| 188 | return [_make_match(commit)] |
| 189 | return [] |
| 190 | |
| 191 | result = walk_history(repo, "main", agent_0_only) |
| 192 | assert len(result) == 20 # 100 / 5 = 20 |
| 193 | |
| 194 | |
| 195 | # =========================================================================== |
| 196 | # format_matches |
| 197 | # =========================================================================== |
| 198 | |
| 199 | |
| 200 | class TestFormatMatches: |
| 201 | def test_empty_matches_produces_output(self) -> None: |
| 202 | out = format_matches([]) |
| 203 | assert isinstance(out, str) |
| 204 | |
| 205 | def test_single_match_includes_commit_id(self) -> None: |
| 206 | match = QueryMatch( |
| 207 | commit_id="abc12345", |
| 208 | branch="main", |
| 209 | author="alice", |
| 210 | committed_at=_now().isoformat(), |
| 211 | detail="test match", |
| 212 | ) |
| 213 | out = format_matches([match]) |
| 214 | assert "abc12345" in out |
| 215 | |
| 216 | def test_multiple_matches_all_present(self) -> None: |
| 217 | matches = [ |
| 218 | QueryMatch( |
| 219 | commit_id=f"id{i:04d}", |
| 220 | branch="main", |
| 221 | author="alice", |
| 222 | committed_at=_now().isoformat(), |
| 223 | detail="matched", |
| 224 | ) |
| 225 | for i in range(5) |
| 226 | ] |
| 227 | out = format_matches(matches) |
| 228 | for i in range(5): |
| 229 | assert f"id{i:04d}" in out |
| 230 | |
| 231 | |
| 232 | # =========================================================================== |
| 233 | # Code query DSL — build_evaluator |
| 234 | # =========================================================================== |
| 235 | |
| 236 | |
| 237 | class TestCodeQueryDSL: |
| 238 | # --- author field --- |
| 239 | |
| 240 | def test_author_equals(self, repo: pathlib.Path) -> None: |
| 241 | _write(repo, "a1", author="alice") |
| 242 | _write(repo, "a2", author="bob", parent="a1") |
| 243 | evaluator = build_evaluator("author == 'alice'") |
| 244 | result = walk_history(repo, "main", evaluator) |
| 245 | assert any(m["commit_id"] == "a1" for m in result) |
| 246 | assert not any(m["commit_id"] == "a2" for m in result) |
| 247 | |
| 248 | def test_author_not_equals(self, repo: pathlib.Path) -> None: |
| 249 | _write(repo, "b1", author="alice") |
| 250 | _write(repo, "b2", author="bob", parent="b1") |
| 251 | evaluator = build_evaluator("author != 'alice'") |
| 252 | result = walk_history(repo, "main", evaluator) |
| 253 | assert all(m["author"] != "alice" for m in result) |
| 254 | |
| 255 | def test_author_contains(self, repo: pathlib.Path) -> None: |
| 256 | _write(repo, "c1", author="alice-smith") |
| 257 | _write(repo, "c2", author="bob-jones", parent="c1") |
| 258 | evaluator = build_evaluator("author contains 'alice'") |
| 259 | result = walk_history(repo, "main", evaluator) |
| 260 | assert len(result) == 1 |
| 261 | assert "alice" in result[0]["author"] |
| 262 | |
| 263 | def test_author_startswith(self, repo: pathlib.Path) -> None: |
| 264 | _write(repo, "d1", author="agent-claude") |
| 265 | _write(repo, "d2", author="human-alice", parent="d1") |
| 266 | evaluator = build_evaluator("author startswith 'agent'") |
| 267 | result = walk_history(repo, "main", evaluator) |
| 268 | assert len(result) == 1 |
| 269 | assert result[0]["author"].startswith("agent") |
| 270 | |
| 271 | # --- agent_id field --- |
| 272 | |
| 273 | def test_agent_id_equals(self, repo: pathlib.Path) -> None: |
| 274 | _write(repo, "e1", agent_id="claude-v4") |
| 275 | _write(repo, "e2", agent_id="gpt-4o", parent="e1") |
| 276 | evaluator = build_evaluator("agent_id == 'claude-v4'") |
| 277 | result = walk_history(repo, "main", evaluator) |
| 278 | assert len(result) == 1 |
| 279 | assert result[0]["commit_id"] == "e1" |
| 280 | |
| 281 | # --- sem_ver_bump field --- |
| 282 | |
| 283 | def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None: |
| 284 | _write(repo, "f1", sem_ver_bump="major") |
| 285 | _write(repo, "f2", sem_ver_bump="minor", parent="f1") |
| 286 | _write(repo, "f3", sem_ver_bump="patch", parent="f2") |
| 287 | evaluator = build_evaluator("sem_ver_bump == 'major'") |
| 288 | result = walk_history(repo, "main", evaluator) |
| 289 | assert len(result) == 1 |
| 290 | |
| 291 | # --- model_id field --- |
| 292 | |
| 293 | def test_model_id_contains(self, repo: pathlib.Path) -> None: |
| 294 | _write(repo, "g1", model_id="claude-3-5-sonnet-20241022") |
| 295 | _write(repo, "g2", model_id="gpt-4o-2024-08-06", parent="g1") |
| 296 | evaluator = build_evaluator("model_id contains 'claude'") |
| 297 | result = walk_history(repo, "main", evaluator) |
| 298 | assert len(result) == 1 |
| 299 | |
| 300 | # --- AND composition --- |
| 301 | |
| 302 | def test_and_composition(self, repo: pathlib.Path) -> None: |
| 303 | _write(repo, "h1", author="alice", agent_id="bot-1") |
| 304 | _write(repo, "h2", author="alice", agent_id="bot-2", parent="h1") |
| 305 | _write(repo, "h3", author="bob", agent_id="bot-1", parent="h2") |
| 306 | evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'") |
| 307 | result = walk_history(repo, "main", evaluator) |
| 308 | assert len(result) == 1 |
| 309 | assert result[0]["commit_id"] == "h1" |
| 310 | |
| 311 | # --- OR composition --- |
| 312 | |
| 313 | def test_or_composition(self, repo: pathlib.Path) -> None: |
| 314 | _write(repo, "i1", author="alice") |
| 315 | _write(repo, "i2", author="bob", parent="i1") |
| 316 | _write(repo, "i3", author="charlie", parent="i2") |
| 317 | evaluator = build_evaluator("author == 'alice' or author == 'bob'") |
| 318 | result = walk_history(repo, "main", evaluator) |
| 319 | assert len(result) == 2 |
| 320 | |
| 321 | # --- complex nested AND OR --- |
| 322 | |
| 323 | def test_complex_and_or(self, repo: pathlib.Path) -> None: |
| 324 | _write(repo, "j1", author="alice", sem_ver_bump="major") |
| 325 | _write(repo, "j2", author="bob", sem_ver_bump="minor", parent="j1") |
| 326 | _write(repo, "j3", author="alice", sem_ver_bump="patch", parent="j2") |
| 327 | evaluator = build_evaluator( |
| 328 | "sem_ver_bump == 'major' or sem_ver_bump == 'minor'" |
| 329 | ) |
| 330 | result = walk_history(repo, "main", evaluator) |
| 331 | assert len(result) == 2 |
| 332 | |
| 333 | # --- error cases --- |
| 334 | |
| 335 | def test_unknown_field_raises_value_error(self) -> None: |
| 336 | with pytest.raises(ValueError): |
| 337 | build_evaluator("unknown_field == 'something'") |
| 338 | |
| 339 | def test_unknown_operator_raises_value_error(self) -> None: |
| 340 | with pytest.raises(ValueError): |
| 341 | build_evaluator("author REGEX 'alice'") |
| 342 | |
| 343 | def test_empty_query_raises(self) -> None: |
| 344 | with pytest.raises((ValueError, IndexError)): |
| 345 | build_evaluator("") |
| 346 | |
| 347 | # --- branch field --- |
| 348 | |
| 349 | def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None: |
| 350 | _write(repo, "k1", branch="main", author="alice") |
| 351 | evaluator = build_evaluator("branch == 'main'") |
| 352 | result = walk_history(repo, "main", evaluator) |
| 353 | assert all(m["branch"] == "main" for m in result) |