cgcardona / muse public
test_predicate.py python
458 lines 16.3 KB
e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Tests for the predicate DSL parser (muse/plugins/code/_predicate.py).
2
3 Coverage
4 --------
5 Tokenisation
6 - Valid atoms, operators, keywords, parentheses, whitespace skipping.
7 - Unexpected character raises PredicateError.
8
9 Atom parsing
10 - All seven operators: = ~= ^= $= != >= <=
11 - All ten predicate keys: kind, language, name, qualified_name, file,
12 hash, body_hash, signature_id, lineno_gt, lineno_lt.
13 - Double-quoted values.
14 - Unknown key raises PredicateError.
15 - Non-integer value for lineno_gt / lineno_lt raises PredicateError.
16
17 Compound expressions
18 - Implicit AND (adjacent atoms).
19 - Explicit OR.
20 - Explicit NOT.
21 - Parenthesised sub-expressions.
22 - Mixed OR / NOT / AND / parentheses.
23 - Trailing garbage token raises PredicateError.
24
25 parse_query
26 - Empty string → match-all predicate.
27 - Empty list → match-all predicate.
28 - List of atoms → implicit AND.
29 - Single string → parsed normally.
30
31 Predicate evaluation
32 - Each key field reads the correct SymbolRecord / file_path field.
33 - lineno_gt / lineno_lt boundary conditions (strict inequality).
34 - hash / body_hash / signature_id prefix matching.
35 - Case-insensitive string matching for =, ~=, ^=, $=, !=.
36 """
37
38 import pytest
39
40 from muse.plugins.code._predicate import PredicateError, parse_query
41 from muse.plugins.code.ast_parser import SymbolRecord
42
43
44 # ---------------------------------------------------------------------------
45 # Helpers
46 # ---------------------------------------------------------------------------
47
48
49 def _rec(
50 *,
51 kind: str = "function",
52 name: str = "my_func",
53 qualified_name: str = "my_func",
54 lineno: int = 10,
55 end_lineno: int = 20,
56 content_id: str = "abcdef1234567890" * 2,
57 body_hash: str = "deadbeef1234" * 4,
58 signature_id: str = "cafebabe5678" * 4,
59 metadata_id: str = "",
60 canonical_key: str = "",
61 ) -> SymbolRecord:
62 return SymbolRecord(
63 kind=kind,
64 name=name,
65 qualified_name=qualified_name,
66 lineno=lineno,
67 end_lineno=end_lineno,
68 content_id=content_id,
69 body_hash=body_hash,
70 signature_id=signature_id,
71 metadata_id=metadata_id,
72 canonical_key=canonical_key,
73 )
74
75
76 def _match(
77 query: str | list[str],
78 file_path: str = "src/billing.py",
79 kind: str = "function",
80 name: str = "my_func",
81 qualified_name: str = "my_func",
82 lineno: int = 10,
83 ) -> bool:
84 rec = _rec(kind=kind, name=name, qualified_name=qualified_name, lineno=lineno)
85 pred = parse_query(query)
86 return pred(file_path, rec)
87
88
89 # ---------------------------------------------------------------------------
90 # Empty / match-all
91 # ---------------------------------------------------------------------------
92
93
94 class TestMatchAll:
95 def test_empty_string_matches_everything(self) -> None:
96 pred = parse_query("")
97 assert pred("src/foo.py", _rec())
98
99 def test_empty_list_matches_everything(self) -> None:
100 pred = parse_query([])
101 assert pred("src/foo.py", _rec())
102
103 def test_whitespace_only_matches_everything(self) -> None:
104 pred = parse_query(" ")
105 assert pred("src/foo.py", _rec())
106
107
108 # ---------------------------------------------------------------------------
109 # Single atom — kind key
110 # ---------------------------------------------------------------------------
111
112
113 class TestKindPredicate:
114 def test_exact_match(self) -> None:
115 assert _match("kind=function", kind="function")
116
117 def test_exact_match_no_hit(self) -> None:
118 assert not _match("kind=class", kind="function")
119
120 def test_case_insensitive(self) -> None:
121 assert _match("kind=Function", kind="function")
122
123 def test_not_equal(self) -> None:
124 assert _match("kind!=class", kind="function")
125 assert not _match("kind!=function", kind="function")
126
127 def test_contains(self) -> None:
128 assert _match("kind~=unc", kind="function")
129 assert not _match("kind~=xyz", kind="function")
130
131 def test_starts_with(self) -> None:
132 assert _match("kind^=func", kind="function")
133 assert not _match("kind^=class", kind="function")
134
135 def test_ends_with(self) -> None:
136 assert _match("kind$=tion", kind="function")
137 assert not _match("kind$=ass", kind="function")
138
139
140 # ---------------------------------------------------------------------------
141 # name key
142 # ---------------------------------------------------------------------------
143
144
145 class TestNamePredicate:
146 def test_exact(self) -> None:
147 assert _match("name=compute_total", name="compute_total")
148 assert not _match("name=compute_total", name="compute_invoice")
149
150 def test_contains(self) -> None:
151 assert _match("name~=total", name="compute_total")
152 assert not _match("name~=invoice", name="compute_total")
153
154 def test_starts_with(self) -> None:
155 assert _match("name^=compute", name="compute_total")
156
157 def test_ends_with(self) -> None:
158 assert _match("name$=total", name="compute_total")
159
160
161 # ---------------------------------------------------------------------------
162 # qualified_name key
163 # ---------------------------------------------------------------------------
164
165
166 class TestQualifiedNamePredicate:
167 def test_dotted_name(self) -> None:
168 assert _match("qualified_name=Invoice.compute", qualified_name="Invoice.compute")
169 assert not _match("qualified_name=Invoice.pay", qualified_name="Invoice.compute")
170
171 def test_contains(self) -> None:
172 assert _match("qualified_name~=Invoice", qualified_name="Invoice.compute")
173
174
175 # ---------------------------------------------------------------------------
176 # file key
177 # ---------------------------------------------------------------------------
178
179
180 class TestFilePredicate:
181 def test_exact(self) -> None:
182 assert _match("file=src/billing.py", file_path="src/billing.py")
183 assert not _match("file=src/utils.py", file_path="src/billing.py")
184
185 def test_contains(self) -> None:
186 assert _match("file~=billing", file_path="src/billing.py")
187
188 def test_starts_with(self) -> None:
189 assert _match("file^=src/", file_path="src/billing.py")
190
191 def test_ends_with(self) -> None:
192 assert _match("file$=.py", file_path="src/billing.py")
193
194
195 # ---------------------------------------------------------------------------
196 # hash / body_hash / signature_id keys (prefix matching)
197 # ---------------------------------------------------------------------------
198
199
200 class TestHashPredicates:
201 def test_content_id_prefix(self) -> None:
202 rec = _rec(content_id="abcdef" + "0" * 58)
203 pred = parse_query("hash=abcde")
204 assert pred("f.py", rec)
205
206 def test_content_id_prefix_no_match(self) -> None:
207 rec = _rec(content_id="abcdef" + "0" * 58)
208 pred = parse_query("hash=xyz")
209 assert not pred("f.py", rec)
210
211 def test_body_hash_prefix(self) -> None:
212 rec = _rec(body_hash="deadbeef" + "0" * 56)
213 pred = parse_query("body_hash=deadbe")
214 assert pred("f.py", rec)
215
216 def test_signature_id_prefix(self) -> None:
217 rec = _rec(signature_id="cafebabe" + "0" * 56)
218 pred = parse_query("signature_id=cafeba")
219 assert pred("f.py", rec)
220
221 def test_hash_prefix_case_sensitive_match(self) -> None:
222 # Hash matching uses prefix-startswith; stored value case must match query case.
223 rec = _rec(content_id="abcdef" + "0" * 58)
224 pred = parse_query("hash=abcdef")
225 assert pred("f.py", rec)
226 # Upper-case stored hash won't match lower-case query prefix
227 # (hash= uses startswith without normalization — this is by design).
228 rec_upper = _rec(content_id="ABCDEF" + "0" * 58)
229 pred_lower = parse_query("hash=abcdef")
230 # The stored hash starts with "ABCDEF", query is "abcdef" → no match.
231 assert not pred_lower("f.py", rec_upper)
232
233
234 # ---------------------------------------------------------------------------
235 # lineno_gt / lineno_lt
236 # ---------------------------------------------------------------------------
237
238
239 class TestLinenoPredicates:
240 def test_lineno_gt_pass(self) -> None:
241 assert _match("lineno_gt=5", lineno=10)
242
243 def test_lineno_gt_boundary(self) -> None:
244 # lineno_gt=10 means lineno > 10, so lineno=10 should NOT match
245 assert not _match("lineno_gt=10", lineno=10)
246 assert _match("lineno_gt=9", lineno=10)
247
248 def test_lineno_lt_pass(self) -> None:
249 assert _match("lineno_lt=20", lineno=10)
250
251 def test_lineno_lt_boundary(self) -> None:
252 assert not _match("lineno_lt=10", lineno=10)
253 assert _match("lineno_lt=11", lineno=10)
254
255 def test_lineno_gt_bad_value(self) -> None:
256 with pytest.raises(PredicateError, match="integer"):
257 parse_query("lineno_gt=abc")
258
259 def test_lineno_lt_bad_value(self) -> None:
260 with pytest.raises(PredicateError, match="integer"):
261 parse_query("lineno_lt=abc")
262
263
264 # ---------------------------------------------------------------------------
265 # language key
266 # ---------------------------------------------------------------------------
267
268
269 class TestLanguagePredicate:
270 def test_python_by_extension(self) -> None:
271 pred = parse_query("language=Python")
272 assert pred("src/billing.py", _rec())
273 assert not pred("src/billing.go", _rec())
274
275 def test_go_by_extension(self) -> None:
276 pred = parse_query("language=Go")
277 assert pred("cmd/main.go", _rec())
278 assert not pred("cmd/main.py", _rec())
279
280 def test_typescript(self) -> None:
281 pred = parse_query("language=TypeScript")
282 assert pred("src/index.ts", _rec())
283
284 def test_rust(self) -> None:
285 pred = parse_query("language=Rust")
286 assert pred("src/main.rs", _rec())
287
288
289 # ---------------------------------------------------------------------------
290 # Compound: AND (implicit)
291 # ---------------------------------------------------------------------------
292
293
294 class TestImplicitAnd:
295 def test_two_atoms_both_match(self) -> None:
296 assert _match("kind=function name=compute_total", kind="function", name="compute_total")
297
298 def test_two_atoms_first_no_match(self) -> None:
299 assert not _match("kind=class name=compute_total", kind="function", name="compute_total")
300
301 def test_two_atoms_second_no_match(self) -> None:
302 assert not _match("kind=function name=invoice", kind="function", name="compute_total")
303
304 def test_three_atoms(self) -> None:
305 assert _match(
306 "kind=function name~=compute file~=billing",
307 kind="function",
308 name="compute_total",
309 file_path="src/billing.py",
310 )
311
312 def test_explicit_and_keyword(self) -> None:
313 assert _match("kind=function AND name=compute_total", kind="function", name="compute_total")
314
315
316 # ---------------------------------------------------------------------------
317 # Compound: OR
318 # ---------------------------------------------------------------------------
319
320
321 class TestOr:
322 def test_or_first_matches(self) -> None:
323 assert _match("kind=function OR kind=class", kind="function")
324
325 def test_or_second_matches(self) -> None:
326 assert _match("kind=function OR kind=class", kind="class")
327
328 def test_or_neither_matches(self) -> None:
329 assert not _match("kind=function OR kind=class", kind="method")
330
331 def test_or_with_three_alternatives(self) -> None:
332 pred = parse_query("kind=function OR kind=class OR kind=method")
333 assert pred("f.py", _rec(kind="function"))
334 assert pred("f.py", _rec(kind="class"))
335 assert pred("f.py", _rec(kind="method"))
336 assert not pred("f.py", _rec(kind="variable"))
337
338 def test_or_in_list_mode(self) -> None:
339 # List mode joins with spaces, so OR in middle still works.
340 pred = parse_query(["kind=function OR kind=class"])
341 assert pred("f.py", _rec(kind="class"))
342
343
344 # ---------------------------------------------------------------------------
345 # Compound: NOT
346 # ---------------------------------------------------------------------------
347
348
349 class TestNot:
350 def test_not_inverts_match(self) -> None:
351 assert not _match("NOT kind=function", kind="function")
352 assert _match("NOT kind=function", kind="class")
353
354 def test_not_with_and(self) -> None:
355 pred = parse_query("NOT kind=import name~=billing")
356 # kind=function, name=billing_util → matches (not import AND name contains billing)
357 assert pred("f.py", _rec(kind="function", name="billing_util"))
358 # kind=import → fails NOT
359 assert not pred("f.py", _rec(kind="import", name="billing_util"))
360 # name doesn't contain billing → fails AND
361 assert not pred("f.py", _rec(kind="function", name="compute"))
362
363 def test_not_with_parenthesised_group(self) -> None:
364 # NOT applied to a grouped predicate.
365 pred = parse_query("NOT (kind=import)")
366 assert pred("f.py", _rec(kind="function"))
367 assert not pred("f.py", _rec(kind="import"))
368
369
370 # ---------------------------------------------------------------------------
371 # Parentheses / grouping
372 # ---------------------------------------------------------------------------
373
374
375 class TestParentheses:
376 def test_parenthesised_or(self) -> None:
377 pred = parse_query("(kind=function OR kind=method) name^=_")
378 # function starting with _ → matches
379 assert pred("f.py", _rec(kind="function", name="_private"))
380 # method starting with _ → matches
381 assert pred("f.py", _rec(kind="method", name="_helper"))
382 # class starting with _ → does NOT match (kind check fails)
383 assert not pred("f.py", _rec(kind="class", name="_Base"))
384 # function NOT starting with _ → does NOT match (name check fails)
385 assert not pred("f.py", _rec(kind="function", name="public_func"))
386
387 def test_nested_parens(self) -> None:
388 pred = parse_query("((kind=function OR kind=class) AND file~=billing)")
389 assert pred("src/billing.py", _rec(kind="function"))
390 assert pred("src/billing.py", _rec(kind="class"))
391 assert not pred("src/utils.py", _rec(kind="function"))
392
393 def test_not_parenthesised_group(self) -> None:
394 pred = parse_query("NOT (kind=function OR kind=class)")
395 assert pred("f.py", _rec(kind="method"))
396 assert not pred("f.py", _rec(kind="function"))
397
398
399 # ---------------------------------------------------------------------------
400 # parse_query list mode
401 # ---------------------------------------------------------------------------
402
403
404 class TestParseQueryListMode:
405 def test_single_atom_list(self) -> None:
406 pred = parse_query(["kind=function"])
407 assert pred("f.py", _rec(kind="function"))
408 assert not pred("f.py", _rec(kind="class"))
409
410 def test_multi_atom_list_implicit_and(self) -> None:
411 pred = parse_query(["kind=function", "name~=compute"])
412 assert pred("f.py", _rec(kind="function", name="compute_total"))
413 assert not pred("f.py", _rec(kind="class", name="compute_total"))
414
415 def test_atom_with_or_in_list(self) -> None:
416 pred = parse_query(["kind=function OR kind=method"])
417 assert pred("f.py", _rec(kind="method"))
418
419
420 # ---------------------------------------------------------------------------
421 # Error cases
422 # ---------------------------------------------------------------------------
423
424
425 class TestErrors:
426 def test_unknown_key(self) -> None:
427 with pytest.raises(PredicateError, match="Unknown predicate key"):
428 parse_query("colour=red")
429
430 def test_missing_operator(self) -> None:
431 with pytest.raises(PredicateError):
432 parse_query("kind function") # no operator
433
434 def test_unclosed_paren(self) -> None:
435 with pytest.raises(PredicateError):
436 parse_query("(kind=function")
437
438 def test_unexpected_close_paren(self) -> None:
439 with pytest.raises(PredicateError):
440 parse_query("kind=function)")
441
442 def test_trailing_garbage(self) -> None:
443 # "kind=function" is valid, but then extra garbage
444 with pytest.raises(PredicateError):
445 parse_query("kind=function )")
446
447 def test_empty_not(self) -> None:
448 with pytest.raises(PredicateError):
449 parse_query("NOT")
450
451 def test_double_quoted_value(self) -> None:
452 # Double-quoted values are stripped correctly.
453 pred = parse_query('name="compute total"')
454 assert pred("f.py", _rec(name="compute total"))
455
456 def test_or_without_rhs(self) -> None:
457 with pytest.raises(PredicateError):
458 parse_query("kind=function OR")