grep.py
python
| 1 | """muse grep — semantic symbol search across the symbol graph. |
| 2 | |
| 3 | Unlike ``git grep`` which searches raw text lines, ``muse grep`` searches |
| 4 | the *typed symbol graph* — only returning actual symbol declarations with |
| 5 | their kind, file, line number, and stable content hash. |
| 6 | |
| 7 | No false positives from comments, string literals, or call sites. Every |
| 8 | result is a real symbol that exists in the repository. |
| 9 | |
| 10 | Usage:: |
| 11 | |
| 12 | muse grep "validate" # all symbols whose name contains "validate" |
| 13 | muse grep "^handle" --regex # names matching regex "^handle" |
| 14 | muse grep "Invoice" --kind class # only class symbols |
| 15 | muse grep "compute" --language Go # only Go symbols |
| 16 | muse grep "total" --commit HEAD~5 # search a historical snapshot |
| 17 | |
| 18 | Output:: |
| 19 | |
| 20 | src/billing.py::validate_amount function line 8 a3f2c9.. |
| 21 | src/auth.py::validate_token function line 14 cb4afa.. |
| 22 | src/auth.py::Validator class line 22 1d2e3f.. |
| 23 | src/auth.py::Validator.validate method line 28 4a5b6c.. |
| 24 | |
| 25 | 4 match(es) across 2 files |
| 26 | """ |
| 27 | |
| 28 | from __future__ import annotations |
| 29 | |
| 30 | import json |
| 31 | import logging |
| 32 | import pathlib |
| 33 | import re |
| 34 | |
| 35 | import typer |
| 36 | |
| 37 | from muse.core.errors import ExitCode |
| 38 | from muse.core.repo import require_repo |
| 39 | from muse.core.store import get_commit_snapshot_manifest, resolve_commit_ref |
| 40 | from muse.plugins.code._query import language_of, symbols_for_snapshot |
| 41 | from muse.plugins.code.ast_parser import SymbolRecord |
| 42 | |
| 43 | logger = logging.getLogger(__name__) |
| 44 | |
| 45 | app = typer.Typer() |
| 46 | |
| 47 | _KIND_ICON: dict[str, str] = { |
| 48 | "function": "fn", |
| 49 | "async_function": "fn~", |
| 50 | "class": "class", |
| 51 | "method": "method", |
| 52 | "async_method": "method~", |
| 53 | "variable": "var", |
| 54 | "import": "import", |
| 55 | } |
| 56 | |
| 57 | |
| 58 | def _read_repo_id(root: pathlib.Path) -> str: |
| 59 | return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"]) |
| 60 | |
| 61 | |
| 62 | def _read_branch(root: pathlib.Path) -> str: |
| 63 | head_ref = (root / ".muse" / "HEAD").read_text().strip() |
| 64 | return head_ref.removeprefix("refs/heads/").strip() |
| 65 | |
| 66 | |
| 67 | @app.callback(invoke_without_command=True) |
| 68 | def grep( |
| 69 | ctx: typer.Context, |
| 70 | pattern: str = typer.Argument(..., metavar="PATTERN", help="Name pattern to search for."), |
| 71 | use_regex: bool = typer.Option( |
| 72 | False, "--regex", "-e", |
| 73 | help="Treat PATTERN as a regular expression (default: substring match).", |
| 74 | ), |
| 75 | kind_filter: str | None = typer.Option( |
| 76 | None, "--kind", "-k", metavar="KIND", |
| 77 | help="Restrict to symbols of this kind (function, class, method, …).", |
| 78 | ), |
| 79 | language_filter: str | None = typer.Option( |
| 80 | None, "--language", "-l", metavar="LANG", |
| 81 | help="Restrict to symbols from files of this language (Python, Go, …).", |
| 82 | ), |
| 83 | ref: str | None = typer.Option( |
| 84 | None, "--commit", "-c", metavar="REF", |
| 85 | help="Search a historical commit instead of HEAD.", |
| 86 | ), |
| 87 | show_hashes: bool = typer.Option( |
| 88 | False, "--hashes", help="Include content hashes in output.", |
| 89 | ), |
| 90 | as_json: bool = typer.Option( |
| 91 | False, "--json", help="Emit results as JSON.", |
| 92 | ), |
| 93 | ) -> None: |
| 94 | """Search the symbol graph by name — not file text. |
| 95 | |
| 96 | ``muse grep`` searches the typed, content-addressed symbol graph. |
| 97 | Every result is a real symbol declaration — no false positives from |
| 98 | comments, string literals, or call sites. |
| 99 | |
| 100 | The ``--regex`` flag enables full Python regex syntax. Without it, |
| 101 | PATTERN is matched as a case-insensitive substring of the symbol name. |
| 102 | |
| 103 | The ``--hashes`` flag adds the 8-character content-ID prefix to each |
| 104 | result, enabling downstream filtering by identity (e.g. find clones |
| 105 | with ``muse query hash=<prefix>``). |
| 106 | """ |
| 107 | root = require_repo() |
| 108 | repo_id = _read_repo_id(root) |
| 109 | branch = _read_branch(root) |
| 110 | |
| 111 | commit = resolve_commit_ref(root, repo_id, branch, ref) |
| 112 | if commit is None: |
| 113 | typer.echo(f"❌ Commit '{ref or 'HEAD'}' not found.", err=True) |
| 114 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 115 | |
| 116 | manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 117 | |
| 118 | try: |
| 119 | regex = re.compile(pattern, re.IGNORECASE) if use_regex else re.compile( |
| 120 | re.escape(pattern), re.IGNORECASE |
| 121 | ) |
| 122 | except re.error as exc: |
| 123 | typer.echo(f"❌ Invalid regex pattern: {exc}", err=True) |
| 124 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 125 | |
| 126 | symbol_map = symbols_for_snapshot( |
| 127 | root, manifest, |
| 128 | kind_filter=kind_filter, |
| 129 | language_filter=language_filter, |
| 130 | ) |
| 131 | |
| 132 | # Filter by name pattern. |
| 133 | matches: list[tuple[str, str, SymbolRecord]] = [] |
| 134 | for file_path, tree in sorted(symbol_map.items()): |
| 135 | for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]): |
| 136 | if regex.search(rec["name"]): |
| 137 | matches.append((file_path, addr, rec)) |
| 138 | |
| 139 | if as_json: |
| 140 | out: list[dict[str, str | int]] = [] |
| 141 | for _fp, addr, rec in matches: |
| 142 | out.append({ |
| 143 | "address": addr, |
| 144 | "kind": rec["kind"], |
| 145 | "name": rec["name"], |
| 146 | "qualified_name": rec["qualified_name"], |
| 147 | "file": addr.split("::")[0], |
| 148 | "lineno": rec["lineno"], |
| 149 | "language": language_of(addr.split("::")[0]), |
| 150 | "content_id": rec["content_id"], |
| 151 | }) |
| 152 | typer.echo(json.dumps(out, indent=2)) |
| 153 | return |
| 154 | |
| 155 | if not matches: |
| 156 | typer.echo(f" (no symbols matching '{pattern}')") |
| 157 | return |
| 158 | |
| 159 | files_seen: set[str] = set() |
| 160 | for file_path, addr, rec in matches: |
| 161 | files_seen.add(file_path) |
| 162 | icon = _KIND_ICON.get(rec["kind"], rec["kind"]) |
| 163 | name = rec["qualified_name"] |
| 164 | line = rec["lineno"] |
| 165 | hash_part = f" {rec['content_id'][:8]}.." if show_hashes else "" |
| 166 | typer.echo(f" {addr:<60} {icon:<10} line {line:>4}{hash_part}") |
| 167 | |
| 168 | typer.echo(f"\n{len(matches)} match(es) across {len(files_seen)} file(s)") |