cgcardona / muse public
api_surface.py python
282 lines 9.7 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """muse api-surface — public API surface tracking.
2
3 Shows which symbols in a snapshot are part of the public API, and how the
4 public API changed between two commits.
5
6 A symbol is **public** when all of the following hold:
7
8 * ``kind`` is one of: ``function``, ``async_function``, ``class``,
9 ``method``, ``async_method``
10 * ``name`` does not start with ``_`` (Python convention for private/internal)
11 * ``kind`` is not ``import``
12
13 Git cannot answer "what changed in the public API between v1.0 and v1.1?"
14 without an external diffing tool. Muse answers this in O(1) against committed
15 snapshots — no checkout required, no working-tree needed.
16
17 Usage::
18
19 muse api-surface
20 muse api-surface --commit HEAD~5
21 muse api-surface --diff main
22 muse api-surface --language Python
23 muse api-surface --json
24
25 With ``--diff REF``, shows a three-section report::
26
27 Public API surface — commit a1b2c3d4 vs commit e5f6a7b8
28 ──────────────────────────────────────────────────────────────
29
30 Added (3):
31 + src/billing.py::compute_tax function
32 + src/auth.py::refresh_token function
33 + src/models.py::User.to_json method
34
35 Removed (1):
36 - src/billing.py::compute_total function
37
38 Changed (2):
39 ~ src/billing.py::Invoice.pay method (signature_change)
40 ~ src/auth.py::validate_token function (impl_only)
41
42 Flags:
43
44 ``--commit, -c REF``
45 Show or compare from this commit (default: HEAD).
46
47 ``--diff REF``
48 Compare the commit from ``--commit`` against this ref.
49
50 ``--language LANG``
51 Filter to symbols in files of this language.
52
53 ``--json``
54 Emit results as JSON with a ``schema_version`` wrapper.
55 """
56
57 from __future__ import annotations
58
59 import json
60 import logging
61 import pathlib
62
63 import typer
64
65 from muse.core.errors import ExitCode
66 from muse.core.repo import require_repo
67 from muse.core.store import get_commit_snapshot_manifest, resolve_commit_ref
68 from muse.plugins.code._query import language_of, symbols_for_snapshot
69 from muse.plugins.code.ast_parser import SymbolRecord
70
71 logger = logging.getLogger(__name__)
72
73 app = typer.Typer()
74
75 _PUBLIC_KINDS: frozenset[str] = frozenset({
76 "function", "async_function", "class", "method", "async_method",
77 })
78
79
80 def _read_repo_id(root: pathlib.Path) -> str:
81 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
82
83
84 def _read_branch(root: pathlib.Path) -> str:
85 head_ref = (root / ".muse" / "HEAD").read_text().strip()
86 return head_ref.removeprefix("refs/heads/").strip()
87
88
89 def _is_public(name: str, kind: str) -> bool:
90 return kind in _PUBLIC_KINDS and not name.split(".")[-1].startswith("_")
91
92
93 def _public_symbols(
94 root: pathlib.Path,
95 manifest: dict[str, str],
96 language_filter: str | None,
97 ) -> dict[str, SymbolRecord]:
98 """Return all public symbols from *manifest* as a flat address → SymbolRecord dict."""
99 result: dict[str, SymbolRecord] = {}
100 sym_map = symbols_for_snapshot(root, manifest, language_filter=language_filter)
101 for _file, tree in sym_map.items():
102 for address, rec in tree.items():
103 if _is_public(rec["name"], rec["kind"]):
104 result[address] = rec
105 return result
106
107
108 def _classify_change(old: SymbolRecord, new: SymbolRecord) -> str:
109 """Return a human-readable classification of what changed."""
110 if old["content_id"] == new["content_id"]:
111 return "unchanged"
112 if old["signature_id"] != new["signature_id"]:
113 if old["body_hash"] != new["body_hash"]:
114 return "signature+impl"
115 return "signature_change"
116 return "impl_only"
117
118
119 class _ApiEntry:
120 def __init__(self, address: str, rec: SymbolRecord, language: str) -> None:
121 self.address = address
122 self.rec = rec
123 self.language = language
124
125 def to_dict(self) -> dict[str, str]:
126 return {
127 "address": self.address,
128 "kind": self.rec["kind"],
129 "name": self.rec["name"],
130 "qualified_name": self.rec["qualified_name"],
131 "language": self.language,
132 "content_id": self.rec["content_id"][:8],
133 "signature_id": self.rec["signature_id"][:8],
134 "body_hash": self.rec["body_hash"][:8],
135 }
136
137
138 @app.callback(invoke_without_command=True)
139 def api_surface(
140 ctx: typer.Context,
141 ref: str | None = typer.Option(
142 None, "--commit", "-c", metavar="REF",
143 help="Show surface at this commit (default: HEAD).",
144 ),
145 diff_ref: str | None = typer.Option(
146 None, "--diff", metavar="REF",
147 help="Compare HEAD (or --commit) against this ref.",
148 ),
149 language: str | None = typer.Option(
150 None, "--language", "-l", metavar="LANG",
151 help="Filter to this language (Python, Go, Rust, …).",
152 ),
153 as_json: bool = typer.Option(False, "--json", help="Emit results as JSON."),
154 ) -> None:
155 """Show the public API surface and how it changed between two commits.
156
157 A symbol is public when its kind is function/class/method (not import) and
158 its bare name does not start with ``_``.
159
160 With ``--diff REF``, shows three sections: Added, Removed, Changed.
161 Without ``--diff``, lists all public symbols at the given commit.
162
163 This command runs against committed snapshots only — no working-tree
164 parsing, no test execution.
165 """
166 root = require_repo()
167 repo_id = _read_repo_id(root)
168 branch = _read_branch(root)
169
170 commit = resolve_commit_ref(root, repo_id, branch, ref)
171 if commit is None:
172 typer.echo(f"❌ Commit '{ref or 'HEAD'}' not found.", err=True)
173 raise typer.Exit(code=ExitCode.USER_ERROR)
174
175 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
176 current_surface = _public_symbols(root, manifest, language)
177
178 if diff_ref is None:
179 # Just list the current surface.
180 entries = [
181 _ApiEntry(addr, rec, language_of(addr.split("::")[0]))
182 for addr, rec in sorted(current_surface.items())
183 ]
184 if as_json:
185 typer.echo(json.dumps(
186 {
187 "schema_version": 1,
188 "commit": commit.commit_id[:8],
189 "language_filter": language,
190 "total": len(entries),
191 "symbols": [e.to_dict() for e in entries],
192 },
193 indent=2,
194 ))
195 return
196
197 typer.echo(f"\nPublic API surface — commit {commit.commit_id[:8]}")
198 if language:
199 typer.echo(f" (language: {language})")
200 typer.echo("─" * 62)
201 if not entries:
202 typer.echo(" (no public symbols found)")
203 return
204 max_addr = max(len(e.address) for e in entries)
205 for e in entries:
206 typer.echo(f" {e.address:<{max_addr}} {e.rec['kind']}")
207 typer.echo(f"\n {len(entries)} public symbol(s)")
208 return
209
210 # Diff mode.
211 base_commit = resolve_commit_ref(root, repo_id, branch, diff_ref)
212 if base_commit is None:
213 typer.echo(f"❌ Diff ref '{diff_ref}' not found.", err=True)
214 raise typer.Exit(code=ExitCode.USER_ERROR)
215
216 base_manifest = get_commit_snapshot_manifest(root, base_commit.commit_id) or {}
217 base_surface = _public_symbols(root, base_manifest, language)
218
219 added = {a: r for a, r in current_surface.items() if a not in base_surface}
220 removed = {a: r for a, r in base_surface.items() if a not in current_surface}
221 changed: dict[str, tuple[SymbolRecord, SymbolRecord, str]] = {}
222 for addr in current_surface:
223 if addr in base_surface:
224 cls = _classify_change(base_surface[addr], current_surface[addr])
225 if cls != "unchanged":
226 changed[addr] = (base_surface[addr], current_surface[addr], cls)
227
228 if as_json:
229 typer.echo(json.dumps(
230 {
231 "schema_version": 1,
232 "commit": commit.commit_id[:8],
233 "base_commit": base_commit.commit_id[:8],
234 "language_filter": language,
235 "added": [
236 _ApiEntry(a, r, language_of(a.split("::")[0])).to_dict()
237 for a, r in sorted(added.items())
238 ],
239 "removed": [
240 _ApiEntry(a, r, language_of(a.split("::")[0])).to_dict()
241 for a, r in sorted(removed.items())
242 ],
243 "changed": [
244 {**_ApiEntry(a, new, language_of(a.split("::")[0])).to_dict(),
245 "change": cls}
246 for a, (_, new, cls) in sorted(changed.items())
247 ],
248 },
249 indent=2,
250 ))
251 return
252
253 typer.echo(
254 f"\nPublic API surface — commit {commit.commit_id[:8]} vs {base_commit.commit_id[:8]}"
255 )
256 if language:
257 typer.echo(f" (language: {language})")
258 typer.echo("─" * 62)
259
260 all_addrs = sorted(set(list(added) + list(removed) + list(changed)))
261 max_addr = max((len(a) for a in all_addrs), default=40)
262
263 if added:
264 typer.echo(f"\nAdded ({len(added)}):")
265 for addr, rec in sorted(added.items()):
266 typer.echo(f" + {addr:<{max_addr}} {rec['kind']}")
267
268 if removed:
269 typer.echo(f"\nRemoved ({len(removed)}):")
270 for addr, rec in sorted(removed.items()):
271 typer.echo(f" - {addr:<{max_addr}} {rec['kind']}")
272
273 if changed:
274 typer.echo(f"\nChanged ({len(changed)}):")
275 for addr, (_, new, cls) in sorted(changed.items()):
276 typer.echo(f" ~ {addr:<{max_addr}} {new['kind']} ({cls})")
277
278 if not added and not removed and not changed:
279 typer.echo("\n ✅ No public API changes detected.")
280 else:
281 n = len(added) + len(removed) + len(changed)
282 typer.echo(f"\n {n} public API change(s)")