cgcardona / muse public
test_predicate.py python
459 lines 16.3 KB
dfa7b7aa Add comprehensive docs and supercharged tests for Code Domain V2 (#70) Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Tests for the predicate DSL parser (muse/plugins/code/_predicate.py).
2
3 Coverage
4 --------
5 Tokenisation
6 - Valid atoms, operators, keywords, parentheses, whitespace skipping.
7 - Unexpected character raises PredicateError.
8
9 Atom parsing
10 - All seven operators: = ~= ^= $= != >= <=
11 - All ten predicate keys: kind, language, name, qualified_name, file,
12 hash, body_hash, signature_id, lineno_gt, lineno_lt.
13 - Double-quoted values.
14 - Unknown key raises PredicateError.
15 - Non-integer value for lineno_gt / lineno_lt raises PredicateError.
16
17 Compound expressions
18 - Implicit AND (adjacent atoms).
19 - Explicit OR.
20 - Explicit NOT.
21 - Parenthesised sub-expressions.
22 - Mixed OR / NOT / AND / parentheses.
23 - Trailing garbage token raises PredicateError.
24
25 parse_query
26 - Empty string → match-all predicate.
27 - Empty list → match-all predicate.
28 - List of atoms → implicit AND.
29 - Single string → parsed normally.
30
31 Predicate evaluation
32 - Each key field reads the correct SymbolRecord / file_path field.
33 - lineno_gt / lineno_lt boundary conditions (strict inequality).
34 - hash / body_hash / signature_id prefix matching.
35 - Case-insensitive string matching for =, ~=, ^=, $=, !=.
36 """
37 from __future__ import annotations
38
39 import pytest
40
41 from muse.plugins.code._predicate import PredicateError, parse_query
42 from muse.plugins.code.ast_parser import SymbolRecord
43
44
45 # ---------------------------------------------------------------------------
46 # Helpers
47 # ---------------------------------------------------------------------------
48
49
50 def _rec(
51 *,
52 kind: str = "function",
53 name: str = "my_func",
54 qualified_name: str = "my_func",
55 lineno: int = 10,
56 end_lineno: int = 20,
57 content_id: str = "abcdef1234567890" * 2,
58 body_hash: str = "deadbeef1234" * 4,
59 signature_id: str = "cafebabe5678" * 4,
60 metadata_id: str = "",
61 canonical_key: str = "",
62 ) -> SymbolRecord:
63 return SymbolRecord(
64 kind=kind,
65 name=name,
66 qualified_name=qualified_name,
67 lineno=lineno,
68 end_lineno=end_lineno,
69 content_id=content_id,
70 body_hash=body_hash,
71 signature_id=signature_id,
72 metadata_id=metadata_id,
73 canonical_key=canonical_key,
74 )
75
76
77 def _match(
78 query: str | list[str],
79 file_path: str = "src/billing.py",
80 kind: str = "function",
81 name: str = "my_func",
82 qualified_name: str = "my_func",
83 lineno: int = 10,
84 ) -> bool:
85 rec = _rec(kind=kind, name=name, qualified_name=qualified_name, lineno=lineno)
86 pred = parse_query(query)
87 return pred(file_path, rec)
88
89
90 # ---------------------------------------------------------------------------
91 # Empty / match-all
92 # ---------------------------------------------------------------------------
93
94
95 class TestMatchAll:
96 def test_empty_string_matches_everything(self) -> None:
97 pred = parse_query("")
98 assert pred("src/foo.py", _rec())
99
100 def test_empty_list_matches_everything(self) -> None:
101 pred = parse_query([])
102 assert pred("src/foo.py", _rec())
103
104 def test_whitespace_only_matches_everything(self) -> None:
105 pred = parse_query(" ")
106 assert pred("src/foo.py", _rec())
107
108
109 # ---------------------------------------------------------------------------
110 # Single atom — kind key
111 # ---------------------------------------------------------------------------
112
113
114 class TestKindPredicate:
115 def test_exact_match(self) -> None:
116 assert _match("kind=function", kind="function")
117
118 def test_exact_match_no_hit(self) -> None:
119 assert not _match("kind=class", kind="function")
120
121 def test_case_insensitive(self) -> None:
122 assert _match("kind=Function", kind="function")
123
124 def test_not_equal(self) -> None:
125 assert _match("kind!=class", kind="function")
126 assert not _match("kind!=function", kind="function")
127
128 def test_contains(self) -> None:
129 assert _match("kind~=unc", kind="function")
130 assert not _match("kind~=xyz", kind="function")
131
132 def test_starts_with(self) -> None:
133 assert _match("kind^=func", kind="function")
134 assert not _match("kind^=class", kind="function")
135
136 def test_ends_with(self) -> None:
137 assert _match("kind$=tion", kind="function")
138 assert not _match("kind$=ass", kind="function")
139
140
141 # ---------------------------------------------------------------------------
142 # name key
143 # ---------------------------------------------------------------------------
144
145
146 class TestNamePredicate:
147 def test_exact(self) -> None:
148 assert _match("name=compute_total", name="compute_total")
149 assert not _match("name=compute_total", name="compute_invoice")
150
151 def test_contains(self) -> None:
152 assert _match("name~=total", name="compute_total")
153 assert not _match("name~=invoice", name="compute_total")
154
155 def test_starts_with(self) -> None:
156 assert _match("name^=compute", name="compute_total")
157
158 def test_ends_with(self) -> None:
159 assert _match("name$=total", name="compute_total")
160
161
162 # ---------------------------------------------------------------------------
163 # qualified_name key
164 # ---------------------------------------------------------------------------
165
166
167 class TestQualifiedNamePredicate:
168 def test_dotted_name(self) -> None:
169 assert _match("qualified_name=Invoice.compute", qualified_name="Invoice.compute")
170 assert not _match("qualified_name=Invoice.pay", qualified_name="Invoice.compute")
171
172 def test_contains(self) -> None:
173 assert _match("qualified_name~=Invoice", qualified_name="Invoice.compute")
174
175
176 # ---------------------------------------------------------------------------
177 # file key
178 # ---------------------------------------------------------------------------
179
180
181 class TestFilePredicate:
182 def test_exact(self) -> None:
183 assert _match("file=src/billing.py", file_path="src/billing.py")
184 assert not _match("file=src/utils.py", file_path="src/billing.py")
185
186 def test_contains(self) -> None:
187 assert _match("file~=billing", file_path="src/billing.py")
188
189 def test_starts_with(self) -> None:
190 assert _match("file^=src/", file_path="src/billing.py")
191
192 def test_ends_with(self) -> None:
193 assert _match("file$=.py", file_path="src/billing.py")
194
195
196 # ---------------------------------------------------------------------------
197 # hash / body_hash / signature_id keys (prefix matching)
198 # ---------------------------------------------------------------------------
199
200
201 class TestHashPredicates:
202 def test_content_id_prefix(self) -> None:
203 rec = _rec(content_id="abcdef" + "0" * 58)
204 pred = parse_query("hash=abcde")
205 assert pred("f.py", rec)
206
207 def test_content_id_prefix_no_match(self) -> None:
208 rec = _rec(content_id="abcdef" + "0" * 58)
209 pred = parse_query("hash=xyz")
210 assert not pred("f.py", rec)
211
212 def test_body_hash_prefix(self) -> None:
213 rec = _rec(body_hash="deadbeef" + "0" * 56)
214 pred = parse_query("body_hash=deadbe")
215 assert pred("f.py", rec)
216
217 def test_signature_id_prefix(self) -> None:
218 rec = _rec(signature_id="cafebabe" + "0" * 56)
219 pred = parse_query("signature_id=cafeba")
220 assert pred("f.py", rec)
221
222 def test_hash_prefix_case_sensitive_match(self) -> None:
223 # Hash matching uses prefix-startswith; stored value case must match query case.
224 rec = _rec(content_id="abcdef" + "0" * 58)
225 pred = parse_query("hash=abcdef")
226 assert pred("f.py", rec)
227 # Upper-case stored hash won't match lower-case query prefix
228 # (hash= uses startswith without normalization — this is by design).
229 rec_upper = _rec(content_id="ABCDEF" + "0" * 58)
230 pred_lower = parse_query("hash=abcdef")
231 # The stored hash starts with "ABCDEF", query is "abcdef" → no match.
232 assert not pred_lower("f.py", rec_upper)
233
234
235 # ---------------------------------------------------------------------------
236 # lineno_gt / lineno_lt
237 # ---------------------------------------------------------------------------
238
239
240 class TestLinenoPredicates:
241 def test_lineno_gt_pass(self) -> None:
242 assert _match("lineno_gt=5", lineno=10)
243
244 def test_lineno_gt_boundary(self) -> None:
245 # lineno_gt=10 means lineno > 10, so lineno=10 should NOT match
246 assert not _match("lineno_gt=10", lineno=10)
247 assert _match("lineno_gt=9", lineno=10)
248
249 def test_lineno_lt_pass(self) -> None:
250 assert _match("lineno_lt=20", lineno=10)
251
252 def test_lineno_lt_boundary(self) -> None:
253 assert not _match("lineno_lt=10", lineno=10)
254 assert _match("lineno_lt=11", lineno=10)
255
256 def test_lineno_gt_bad_value(self) -> None:
257 with pytest.raises(PredicateError, match="integer"):
258 parse_query("lineno_gt=abc")
259
260 def test_lineno_lt_bad_value(self) -> None:
261 with pytest.raises(PredicateError, match="integer"):
262 parse_query("lineno_lt=abc")
263
264
265 # ---------------------------------------------------------------------------
266 # language key
267 # ---------------------------------------------------------------------------
268
269
270 class TestLanguagePredicate:
271 def test_python_by_extension(self) -> None:
272 pred = parse_query("language=Python")
273 assert pred("src/billing.py", _rec())
274 assert not pred("src/billing.go", _rec())
275
276 def test_go_by_extension(self) -> None:
277 pred = parse_query("language=Go")
278 assert pred("cmd/main.go", _rec())
279 assert not pred("cmd/main.py", _rec())
280
281 def test_typescript(self) -> None:
282 pred = parse_query("language=TypeScript")
283 assert pred("src/index.ts", _rec())
284
285 def test_rust(self) -> None:
286 pred = parse_query("language=Rust")
287 assert pred("src/main.rs", _rec())
288
289
290 # ---------------------------------------------------------------------------
291 # Compound: AND (implicit)
292 # ---------------------------------------------------------------------------
293
294
295 class TestImplicitAnd:
296 def test_two_atoms_both_match(self) -> None:
297 assert _match("kind=function name=compute_total", kind="function", name="compute_total")
298
299 def test_two_atoms_first_no_match(self) -> None:
300 assert not _match("kind=class name=compute_total", kind="function", name="compute_total")
301
302 def test_two_atoms_second_no_match(self) -> None:
303 assert not _match("kind=function name=invoice", kind="function", name="compute_total")
304
305 def test_three_atoms(self) -> None:
306 assert _match(
307 "kind=function name~=compute file~=billing",
308 kind="function",
309 name="compute_total",
310 file_path="src/billing.py",
311 )
312
313 def test_explicit_and_keyword(self) -> None:
314 assert _match("kind=function AND name=compute_total", kind="function", name="compute_total")
315
316
317 # ---------------------------------------------------------------------------
318 # Compound: OR
319 # ---------------------------------------------------------------------------
320
321
322 class TestOr:
323 def test_or_first_matches(self) -> None:
324 assert _match("kind=function OR kind=class", kind="function")
325
326 def test_or_second_matches(self) -> None:
327 assert _match("kind=function OR kind=class", kind="class")
328
329 def test_or_neither_matches(self) -> None:
330 assert not _match("kind=function OR kind=class", kind="method")
331
332 def test_or_with_three_alternatives(self) -> None:
333 pred = parse_query("kind=function OR kind=class OR kind=method")
334 assert pred("f.py", _rec(kind="function"))
335 assert pred("f.py", _rec(kind="class"))
336 assert pred("f.py", _rec(kind="method"))
337 assert not pred("f.py", _rec(kind="variable"))
338
339 def test_or_in_list_mode(self) -> None:
340 # List mode joins with spaces, so OR in middle still works.
341 pred = parse_query(["kind=function OR kind=class"])
342 assert pred("f.py", _rec(kind="class"))
343
344
345 # ---------------------------------------------------------------------------
346 # Compound: NOT
347 # ---------------------------------------------------------------------------
348
349
350 class TestNot:
351 def test_not_inverts_match(self) -> None:
352 assert not _match("NOT kind=function", kind="function")
353 assert _match("NOT kind=function", kind="class")
354
355 def test_not_with_and(self) -> None:
356 pred = parse_query("NOT kind=import name~=billing")
357 # kind=function, name=billing_util → matches (not import AND name contains billing)
358 assert pred("f.py", _rec(kind="function", name="billing_util"))
359 # kind=import → fails NOT
360 assert not pred("f.py", _rec(kind="import", name="billing_util"))
361 # name doesn't contain billing → fails AND
362 assert not pred("f.py", _rec(kind="function", name="compute"))
363
364 def test_not_with_parenthesised_group(self) -> None:
365 # NOT applied to a grouped predicate.
366 pred = parse_query("NOT (kind=import)")
367 assert pred("f.py", _rec(kind="function"))
368 assert not pred("f.py", _rec(kind="import"))
369
370
371 # ---------------------------------------------------------------------------
372 # Parentheses / grouping
373 # ---------------------------------------------------------------------------
374
375
376 class TestParentheses:
377 def test_parenthesised_or(self) -> None:
378 pred = parse_query("(kind=function OR kind=method) name^=_")
379 # function starting with _ → matches
380 assert pred("f.py", _rec(kind="function", name="_private"))
381 # method starting with _ → matches
382 assert pred("f.py", _rec(kind="method", name="_helper"))
383 # class starting with _ → does NOT match (kind check fails)
384 assert not pred("f.py", _rec(kind="class", name="_Base"))
385 # function NOT starting with _ → does NOT match (name check fails)
386 assert not pred("f.py", _rec(kind="function", name="public_func"))
387
388 def test_nested_parens(self) -> None:
389 pred = parse_query("((kind=function OR kind=class) AND file~=billing)")
390 assert pred("src/billing.py", _rec(kind="function"))
391 assert pred("src/billing.py", _rec(kind="class"))
392 assert not pred("src/utils.py", _rec(kind="function"))
393
394 def test_not_parenthesised_group(self) -> None:
395 pred = parse_query("NOT (kind=function OR kind=class)")
396 assert pred("f.py", _rec(kind="method"))
397 assert not pred("f.py", _rec(kind="function"))
398
399
400 # ---------------------------------------------------------------------------
401 # parse_query list mode
402 # ---------------------------------------------------------------------------
403
404
405 class TestParseQueryListMode:
406 def test_single_atom_list(self) -> None:
407 pred = parse_query(["kind=function"])
408 assert pred("f.py", _rec(kind="function"))
409 assert not pred("f.py", _rec(kind="class"))
410
411 def test_multi_atom_list_implicit_and(self) -> None:
412 pred = parse_query(["kind=function", "name~=compute"])
413 assert pred("f.py", _rec(kind="function", name="compute_total"))
414 assert not pred("f.py", _rec(kind="class", name="compute_total"))
415
416 def test_atom_with_or_in_list(self) -> None:
417 pred = parse_query(["kind=function OR kind=method"])
418 assert pred("f.py", _rec(kind="method"))
419
420
421 # ---------------------------------------------------------------------------
422 # Error cases
423 # ---------------------------------------------------------------------------
424
425
426 class TestErrors:
427 def test_unknown_key(self) -> None:
428 with pytest.raises(PredicateError, match="Unknown predicate key"):
429 parse_query("colour=red")
430
431 def test_missing_operator(self) -> None:
432 with pytest.raises(PredicateError):
433 parse_query("kind function") # no operator
434
435 def test_unclosed_paren(self) -> None:
436 with pytest.raises(PredicateError):
437 parse_query("(kind=function")
438
439 def test_unexpected_close_paren(self) -> None:
440 with pytest.raises(PredicateError):
441 parse_query("kind=function)")
442
443 def test_trailing_garbage(self) -> None:
444 # "kind=function" is valid, but then extra garbage
445 with pytest.raises(PredicateError):
446 parse_query("kind=function )")
447
448 def test_empty_not(self) -> None:
449 with pytest.raises(PredicateError):
450 parse_query("NOT")
451
452 def test_double_quoted_value(self) -> None:
453 # Double-quoted values are stripped correctly.
454 pred = parse_query('name="compute total"')
455 assert pred("f.py", _rec(name="compute total"))
456
457 def test_or_without_rhs(self) -> None:
458 with pytest.raises(PredicateError):
459 parse_query("kind=function OR")