"""Tests for the predicate DSL parser (muse/plugins/code/_predicate.py). Coverage -------- Tokenisation - Valid atoms, operators, keywords, parentheses, whitespace skipping. - Unexpected character raises PredicateError. Atom parsing - All seven operators: = ~= ^= $= != >= <= - All ten predicate keys: kind, language, name, qualified_name, file, hash, body_hash, signature_id, lineno_gt, lineno_lt. - Double-quoted values. - Unknown key raises PredicateError. - Non-integer value for lineno_gt / lineno_lt raises PredicateError. Compound expressions - Implicit AND (adjacent atoms). - Explicit OR. - Explicit NOT. - Parenthesised sub-expressions. - Mixed OR / NOT / AND / parentheses. - Trailing garbage token raises PredicateError. parse_query - Empty string → match-all predicate. - Empty list → match-all predicate. - List of atoms → implicit AND. - Single string → parsed normally. Predicate evaluation - Each key field reads the correct SymbolRecord / file_path field. - lineno_gt / lineno_lt boundary conditions (strict inequality). - hash / body_hash / signature_id prefix matching. - Case-insensitive string matching for =, ~=, ^=, $=, !=. """ from __future__ import annotations import pytest from muse.plugins.code._predicate import PredicateError, parse_query from muse.plugins.code.ast_parser import SymbolRecord # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _rec( *, kind: str = "function", name: str = "my_func", qualified_name: str = "my_func", lineno: int = 10, end_lineno: int = 20, content_id: str = "abcdef1234567890" * 2, body_hash: str = "deadbeef1234" * 4, signature_id: str = "cafebabe5678" * 4, metadata_id: str = "", canonical_key: str = "", ) -> SymbolRecord: return SymbolRecord( kind=kind, name=name, qualified_name=qualified_name, lineno=lineno, end_lineno=end_lineno, content_id=content_id, body_hash=body_hash, signature_id=signature_id, metadata_id=metadata_id, canonical_key=canonical_key, ) def _match( query: str | list[str], file_path: str = "src/billing.py", kind: str = "function", name: str = "my_func", qualified_name: str = "my_func", lineno: int = 10, ) -> bool: rec = _rec(kind=kind, name=name, qualified_name=qualified_name, lineno=lineno) pred = parse_query(query) return pred(file_path, rec) # --------------------------------------------------------------------------- # Empty / match-all # --------------------------------------------------------------------------- class TestMatchAll: def test_empty_string_matches_everything(self) -> None: pred = parse_query("") assert pred("src/foo.py", _rec()) def test_empty_list_matches_everything(self) -> None: pred = parse_query([]) assert pred("src/foo.py", _rec()) def test_whitespace_only_matches_everything(self) -> None: pred = parse_query(" ") assert pred("src/foo.py", _rec()) # --------------------------------------------------------------------------- # Single atom — kind key # --------------------------------------------------------------------------- class TestKindPredicate: def test_exact_match(self) -> None: assert _match("kind=function", kind="function") def test_exact_match_no_hit(self) -> None: assert not _match("kind=class", kind="function") def test_case_insensitive(self) -> None: assert _match("kind=Function", kind="function") def test_not_equal(self) -> None: assert _match("kind!=class", kind="function") assert not _match("kind!=function", kind="function") def test_contains(self) -> None: assert _match("kind~=unc", kind="function") assert not _match("kind~=xyz", kind="function") def test_starts_with(self) -> None: assert _match("kind^=func", kind="function") assert not _match("kind^=class", kind="function") def test_ends_with(self) -> None: assert _match("kind$=tion", kind="function") assert not _match("kind$=ass", kind="function") # --------------------------------------------------------------------------- # name key # --------------------------------------------------------------------------- class TestNamePredicate: def test_exact(self) -> None: assert _match("name=compute_total", name="compute_total") assert not _match("name=compute_total", name="compute_invoice") def test_contains(self) -> None: assert _match("name~=total", name="compute_total") assert not _match("name~=invoice", name="compute_total") def test_starts_with(self) -> None: assert _match("name^=compute", name="compute_total") def test_ends_with(self) -> None: assert _match("name$=total", name="compute_total") # --------------------------------------------------------------------------- # qualified_name key # --------------------------------------------------------------------------- class TestQualifiedNamePredicate: def test_dotted_name(self) -> None: assert _match("qualified_name=Invoice.compute", qualified_name="Invoice.compute") assert not _match("qualified_name=Invoice.pay", qualified_name="Invoice.compute") def test_contains(self) -> None: assert _match("qualified_name~=Invoice", qualified_name="Invoice.compute") # --------------------------------------------------------------------------- # file key # --------------------------------------------------------------------------- class TestFilePredicate: def test_exact(self) -> None: assert _match("file=src/billing.py", file_path="src/billing.py") assert not _match("file=src/utils.py", file_path="src/billing.py") def test_contains(self) -> None: assert _match("file~=billing", file_path="src/billing.py") def test_starts_with(self) -> None: assert _match("file^=src/", file_path="src/billing.py") def test_ends_with(self) -> None: assert _match("file$=.py", file_path="src/billing.py") # --------------------------------------------------------------------------- # hash / body_hash / signature_id keys (prefix matching) # --------------------------------------------------------------------------- class TestHashPredicates: def test_content_id_prefix(self) -> None: rec = _rec(content_id="abcdef" + "0" * 58) pred = parse_query("hash=abcde") assert pred("f.py", rec) def test_content_id_prefix_no_match(self) -> None: rec = _rec(content_id="abcdef" + "0" * 58) pred = parse_query("hash=xyz") assert not pred("f.py", rec) def test_body_hash_prefix(self) -> None: rec = _rec(body_hash="deadbeef" + "0" * 56) pred = parse_query("body_hash=deadbe") assert pred("f.py", rec) def test_signature_id_prefix(self) -> None: rec = _rec(signature_id="cafebabe" + "0" * 56) pred = parse_query("signature_id=cafeba") assert pred("f.py", rec) def test_hash_prefix_case_sensitive_match(self) -> None: # Hash matching uses prefix-startswith; stored value case must match query case. rec = _rec(content_id="abcdef" + "0" * 58) pred = parse_query("hash=abcdef") assert pred("f.py", rec) # Upper-case stored hash won't match lower-case query prefix # (hash= uses startswith without normalization — this is by design). rec_upper = _rec(content_id="ABCDEF" + "0" * 58) pred_lower = parse_query("hash=abcdef") # The stored hash starts with "ABCDEF", query is "abcdef" → no match. assert not pred_lower("f.py", rec_upper) # --------------------------------------------------------------------------- # lineno_gt / lineno_lt # --------------------------------------------------------------------------- class TestLinenoPredicates: def test_lineno_gt_pass(self) -> None: assert _match("lineno_gt=5", lineno=10) def test_lineno_gt_boundary(self) -> None: # lineno_gt=10 means lineno > 10, so lineno=10 should NOT match assert not _match("lineno_gt=10", lineno=10) assert _match("lineno_gt=9", lineno=10) def test_lineno_lt_pass(self) -> None: assert _match("lineno_lt=20", lineno=10) def test_lineno_lt_boundary(self) -> None: assert not _match("lineno_lt=10", lineno=10) assert _match("lineno_lt=11", lineno=10) def test_lineno_gt_bad_value(self) -> None: with pytest.raises(PredicateError, match="integer"): parse_query("lineno_gt=abc") def test_lineno_lt_bad_value(self) -> None: with pytest.raises(PredicateError, match="integer"): parse_query("lineno_lt=abc") # --------------------------------------------------------------------------- # language key # --------------------------------------------------------------------------- class TestLanguagePredicate: def test_python_by_extension(self) -> None: pred = parse_query("language=Python") assert pred("src/billing.py", _rec()) assert not pred("src/billing.go", _rec()) def test_go_by_extension(self) -> None: pred = parse_query("language=Go") assert pred("cmd/main.go", _rec()) assert not pred("cmd/main.py", _rec()) def test_typescript(self) -> None: pred = parse_query("language=TypeScript") assert pred("src/index.ts", _rec()) def test_rust(self) -> None: pred = parse_query("language=Rust") assert pred("src/main.rs", _rec()) # --------------------------------------------------------------------------- # Compound: AND (implicit) # --------------------------------------------------------------------------- class TestImplicitAnd: def test_two_atoms_both_match(self) -> None: assert _match("kind=function name=compute_total", kind="function", name="compute_total") def test_two_atoms_first_no_match(self) -> None: assert not _match("kind=class name=compute_total", kind="function", name="compute_total") def test_two_atoms_second_no_match(self) -> None: assert not _match("kind=function name=invoice", kind="function", name="compute_total") def test_three_atoms(self) -> None: assert _match( "kind=function name~=compute file~=billing", kind="function", name="compute_total", file_path="src/billing.py", ) def test_explicit_and_keyword(self) -> None: assert _match("kind=function AND name=compute_total", kind="function", name="compute_total") # --------------------------------------------------------------------------- # Compound: OR # --------------------------------------------------------------------------- class TestOr: def test_or_first_matches(self) -> None: assert _match("kind=function OR kind=class", kind="function") def test_or_second_matches(self) -> None: assert _match("kind=function OR kind=class", kind="class") def test_or_neither_matches(self) -> None: assert not _match("kind=function OR kind=class", kind="method") def test_or_with_three_alternatives(self) -> None: pred = parse_query("kind=function OR kind=class OR kind=method") assert pred("f.py", _rec(kind="function")) assert pred("f.py", _rec(kind="class")) assert pred("f.py", _rec(kind="method")) assert not pred("f.py", _rec(kind="variable")) def test_or_in_list_mode(self) -> None: # List mode joins with spaces, so OR in middle still works. pred = parse_query(["kind=function OR kind=class"]) assert pred("f.py", _rec(kind="class")) # --------------------------------------------------------------------------- # Compound: NOT # --------------------------------------------------------------------------- class TestNot: def test_not_inverts_match(self) -> None: assert not _match("NOT kind=function", kind="function") assert _match("NOT kind=function", kind="class") def test_not_with_and(self) -> None: pred = parse_query("NOT kind=import name~=billing") # kind=function, name=billing_util → matches (not import AND name contains billing) assert pred("f.py", _rec(kind="function", name="billing_util")) # kind=import → fails NOT assert not pred("f.py", _rec(kind="import", name="billing_util")) # name doesn't contain billing → fails AND assert not pred("f.py", _rec(kind="function", name="compute")) def test_not_with_parenthesised_group(self) -> None: # NOT applied to a grouped predicate. pred = parse_query("NOT (kind=import)") assert pred("f.py", _rec(kind="function")) assert not pred("f.py", _rec(kind="import")) # --------------------------------------------------------------------------- # Parentheses / grouping # --------------------------------------------------------------------------- class TestParentheses: def test_parenthesised_or(self) -> None: pred = parse_query("(kind=function OR kind=method) name^=_") # function starting with _ → matches assert pred("f.py", _rec(kind="function", name="_private")) # method starting with _ → matches assert pred("f.py", _rec(kind="method", name="_helper")) # class starting with _ → does NOT match (kind check fails) assert not pred("f.py", _rec(kind="class", name="_Base")) # function NOT starting with _ → does NOT match (name check fails) assert not pred("f.py", _rec(kind="function", name="public_func")) def test_nested_parens(self) -> None: pred = parse_query("((kind=function OR kind=class) AND file~=billing)") assert pred("src/billing.py", _rec(kind="function")) assert pred("src/billing.py", _rec(kind="class")) assert not pred("src/utils.py", _rec(kind="function")) def test_not_parenthesised_group(self) -> None: pred = parse_query("NOT (kind=function OR kind=class)") assert pred("f.py", _rec(kind="method")) assert not pred("f.py", _rec(kind="function")) # --------------------------------------------------------------------------- # parse_query list mode # --------------------------------------------------------------------------- class TestParseQueryListMode: def test_single_atom_list(self) -> None: pred = parse_query(["kind=function"]) assert pred("f.py", _rec(kind="function")) assert not pred("f.py", _rec(kind="class")) def test_multi_atom_list_implicit_and(self) -> None: pred = parse_query(["kind=function", "name~=compute"]) assert pred("f.py", _rec(kind="function", name="compute_total")) assert not pred("f.py", _rec(kind="class", name="compute_total")) def test_atom_with_or_in_list(self) -> None: pred = parse_query(["kind=function OR kind=method"]) assert pred("f.py", _rec(kind="method")) # --------------------------------------------------------------------------- # Error cases # --------------------------------------------------------------------------- class TestErrors: def test_unknown_key(self) -> None: with pytest.raises(PredicateError, match="Unknown predicate key"): parse_query("colour=red") def test_missing_operator(self) -> None: with pytest.raises(PredicateError): parse_query("kind function") # no operator def test_unclosed_paren(self) -> None: with pytest.raises(PredicateError): parse_query("(kind=function") def test_unexpected_close_paren(self) -> None: with pytest.raises(PredicateError): parse_query("kind=function)") def test_trailing_garbage(self) -> None: # "kind=function" is valid, but then extra garbage with pytest.raises(PredicateError): parse_query("kind=function )") def test_empty_not(self) -> None: with pytest.raises(PredicateError): parse_query("NOT") def test_double_quoted_value(self) -> None: # Double-quoted values are stripped correctly. pred = parse_query('name="compute total"') assert pred("f.py", _rec(name="compute total")) def test_or_without_rhs(self) -> None: with pytest.raises(PredicateError): parse_query("kind=function OR")