#!/usr/bin/env python3 """Typing audit — find and count all banned type patterns in the codebase. Checks every rule from .cursorrules and AGENTS.md: - Any as a type (param, return, collection value) - object as a type (same severity as Any) - cast() usage (all usage banned — fix the callee) - # type: ignore (blanket and specific) - Bare collections (list, dict, set, tuple without type parameters) - Optional[X] (use X | None instead) - Legacy typing imports (List, Dict, Set, Tuple — use lowercase builtins) Outputs JSON (machine-readable) + a human summary to stdout. Usage: python tools/typing_audit.py # muse/ + tests/ python tools/typing_audit.py --json artifacts/typing_audit.json python tools/typing_audit.py --dirs muse/ tests/ python tools/typing_audit.py --dirs muse/ --max-any 0 """ from __future__ import annotations import argparse import ast import json import re import sys from collections import defaultdict from pathlib import Path from typing import Any # ── Pattern matchers ────────────────────────────────────────────────────────── # Grouped by category. Every key contributes to the total violation count. _PATTERNS: dict[str, re.Pattern[str]] = { # ── Any-as-type ────────────────────────────────────────────────────── "dict_str_any": re.compile( r"\bdict\[str,\s*Any\]|\bDict\[str,\s*Any\]", re.IGNORECASE ), "list_any": re.compile(r"\blist\[Any\]|\bList\[Any\]", re.IGNORECASE), "return_any": re.compile(r"->\s*Any\b"), "param_any": re.compile(r":\s*Any\b"), "mapping_any": re.compile( r"\bMapping\[str,\s*Any\]", re.IGNORECASE ), "optional_any": re.compile(r"\bOptional\[Any\]", re.IGNORECASE), "sequence_any": re.compile( r"\bSequence\[Any\]|\bIterable\[Any\]", re.IGNORECASE ), "tuple_any": re.compile(r"\btuple\[.*Any.*\]|\bTuple\[.*Any.*\]"), # ── object-as-type (same severity as Any) ──────────────────────────── "param_object": re.compile(r":\s*object\b"), "return_object": re.compile(r"->\s*object\b"), "collection_object": re.compile( r"\b(?:dict|list|set|tuple|Sequence|Mapping)\[[^]]*\bobject\b" ), # ── cast() — all usage banned ──────────────────────────────────────── "cast_usage": re.compile(r"\bcast\("), # ── type: ignore — suppresses real errors ──────────────────────────── "type_ignore": re.compile(r"#\s*type:\s*ignore"), # ── Bare collections (no type parameters) ──────────────────────────── # Negative lookaheads: exclude parameterized [, constructor calls (, # and prose patterns (": list of items" in docstrings). "bare_list": re.compile(r"(?::\s*|->\s*)list\b(?!\[|\(|\s+[a-z])"), "bare_dict": re.compile(r"(?::\s*|->\s*)dict\b(?!\[|\(|\s+[a-z])"), "bare_set": re.compile(r"(?::\s*|->\s*)set\b(?!\[|\(|\s+[a-z])"), "bare_tuple": re.compile(r"(?::\s*|->\s*)tuple\b(?!\[|\(|\s+[a-z])"), # ── Optional[X] — use X | None instead ─────────────────────────────── # Excludes Optional[Any] which is already caught by optional_any. "optional_usage": re.compile(r"\bOptional\[(?!Any\b)"), # ── Legacy typing imports (use lowercase builtins) ─────────────────── "legacy_List": re.compile(r"\bList\["), "legacy_Dict": re.compile(r"\bDict\["), "legacy_Set": re.compile(r"\bSet\["), "legacy_Tuple": re.compile(r"\bTuple\["), } def _count_pattern_in_line(line: str, pattern: re.Pattern[str]) -> int: return len(pattern.findall(line)) def _imports_any(source: str) -> bool: """Check if file imports Any from typing.""" return bool(re.search(r"from\s+typing\s+import\s+.*\bAny\b", source)) def _classify_type_ignores(line: str) -> str: """Return the ignore variant (blanket vs specific).""" m = re.search(r"#\s*type:\s*ignore\[([^\]]+)\]", line) if m: return f"type_ignore[{m.group(1)}]" return "type_ignore[blanket]" # ── AST-based detection ────────────────────────────────────────────────────── def _find_untyped_defs(source: str, filepath: str) -> list[dict[str, Any]]: """Find function defs missing return type or param annotations.""" results: list[dict[str, Any]] = [] try: tree = ast.parse(source) except SyntaxError: return results for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): if node.returns is None: results.append({ "file": filepath, "line": node.lineno, "name": node.name, "issue": "missing_return_type", }) for arg in node.args.args + node.args.kwonlyargs: if arg.annotation is None and arg.arg != "self" and arg.arg != "cls": results.append({ "file": filepath, "line": node.lineno, "name": f"{node.name}.{arg.arg}", "issue": "missing_param_type", }) return results # ── File scanner ────────────────────────────────────────────────────────────── def scan_file(filepath: Path) -> dict[str, Any]: """Scan a single Python file for typing violations.""" try: source = filepath.read_text(encoding="utf-8") except (OSError, UnicodeDecodeError): return {} lines = source.splitlines() result: dict[str, Any] = { "file": str(filepath), "imports_any": _imports_any(source), "patterns": defaultdict(int), "pattern_lines": defaultdict(list), "type_ignore_variants": defaultdict(int), "untyped_defs": [], } for lineno, line in enumerate(lines, 1): stripped = line.strip() if not stripped or stripped.startswith("#"): continue for name, pattern in _PATTERNS.items(): count = _count_pattern_in_line(line, pattern) if count > 0: result["patterns"][name] += count result["pattern_lines"][name].append(lineno) if name == "type_ignore": variant = _classify_type_ignores(line) result["type_ignore_variants"][variant] += 1 result["untyped_defs"] = _find_untyped_defs(source, str(filepath)) return result def scan_directory(directory: Path) -> list[dict[str, Any]]: """Scan all Python files in a directory tree.""" results: list[dict[str, Any]] = [] for py_file in sorted(directory.rglob("*.py")): if "venv" in py_file.parts or "__pycache__" in py_file.parts: continue if ".git" in py_file.parts: continue file_result = scan_file(py_file) if file_result: results.append(file_result) return results # ── Report generation ───────────────────────────────────────────────────────── # Display order: group patterns into logical categories for the report. _CATEGORY_ORDER: list[tuple[str, list[str]]] = [ ("Any-as-type", [ "dict_str_any", "list_any", "return_any", "param_any", "mapping_any", "optional_any", "sequence_any", "tuple_any", ]), ("object-as-type", [ "param_object", "return_object", "collection_object", ]), ("cast() usage", ["cast_usage"]), ("type: ignore", ["type_ignore"]), ("Bare collections", [ "bare_list", "bare_dict", "bare_set", "bare_tuple", ]), ("Optional (use X | None)", ["optional_usage"]), ("Legacy typing imports", [ "legacy_List", "legacy_Dict", "legacy_Set", "legacy_Tuple", ]), ] def generate_report(results: list[dict[str, Any]]) -> dict[str, Any]: """Generate aggregate report from scan results.""" totals: dict[str, int] = defaultdict(int) files_with_any_import = 0 per_file: dict[str, dict[str, int]] = {} top_offenders: list[dict[str, Any]] = [] all_type_ignore_variants: dict[str, int] = defaultdict(int) all_untyped_defs: list[dict[str, Any]] = [] for r in results: filepath = r["file"] if r.get("imports_any"): files_with_any_import += 1 file_total = 0 file_patterns: dict[str, int] = {} for pattern, count in r.get("patterns", {}).items(): totals[pattern] += count file_patterns[pattern] = count file_total += count if file_total > 0: per_file[filepath] = file_patterns top_offenders.append({"file": filepath, "total": file_total, "patterns": file_patterns}) for variant, count in r.get("type_ignore_variants", {}).items(): all_type_ignore_variants[variant] += count all_untyped_defs.extend(r.get("untyped_defs", [])) top_offenders.sort(key=lambda x: x["total"], reverse=True) return { "summary": { "total_files_scanned": len(results), "files_importing_any": files_with_any_import, "total_any_patterns": sum(totals.values()), "untyped_defs": len(all_untyped_defs), }, "pattern_totals": dict(totals), "type_ignore_variants": dict(all_type_ignore_variants), "top_offenders": top_offenders[:30], "per_file": per_file, "untyped_defs": all_untyped_defs[:50], } def print_human_summary(report: dict[str, Any]) -> None: """Print a human-readable summary.""" s = report["summary"] totals = report["pattern_totals"] print("\n" + "=" * 70) print(" TYPING AUDIT — Violation Report") print("=" * 70) print(f" Files scanned: {s['total_files_scanned']}") print(f" Files importing Any: {s['files_importing_any']}") print(f" Total violations: {s['total_any_patterns']}") print(f" Untyped defs: {s['untyped_defs']}") print() for category, pattern_names in _CATEGORY_ORDER: category_total = sum(totals.get(p, 0) for p in pattern_names) if category_total == 0: continue print(f" {category}:") for p in pattern_names: count = totals.get(p, 0) if count > 0: print(f" {p:30s} {count:5d}") print() if not any(totals.get(p, 0) for _, pats in _CATEGORY_ORDER for p in pats): print(" Pattern breakdown: (none)") print() if report["type_ignore_variants"]: print(" # type: ignore variants:") for variant, count in sorted(report["type_ignore_variants"].items(), key=lambda x: -x[1]): print(f" {variant:40s} {count:5d}") print() print(" Top 15 offenders:") for entry in report["top_offenders"][:15]: print(f" {entry['total']:4d} {entry['file']}") print("=" * 70 + "\n") # ── CLI ─────────────────────────────────────────────────────────────────────── def main() -> None: parser = argparse.ArgumentParser( description="Audit typing violations: Any, object, cast, bare collections, " "Optional, legacy imports, type: ignore, untyped defs", ) parser.add_argument( "--dirs", nargs="+", default=["muse/", "tests/"], help="Directories to scan", ) parser.add_argument("--json", type=str, help="Write JSON report to file") parser.add_argument( "--max-any", type=int, default=None, help="Fail (exit 1) if total violations exceed this threshold", ) args = parser.parse_args() all_results: list[dict[str, Any]] = [] for d in args.dirs: p = Path(d) if p.exists(): all_results.extend(scan_directory(p)) else: print(f"WARNING: {d} does not exist, skipping", file=sys.stderr) report = generate_report(all_results) print_human_summary(report) if args.json: Path(args.json).parent.mkdir(parents=True, exist_ok=True) Path(args.json).write_text( json.dumps(report, indent=2, default=str), encoding="utf-8", ) print(f" JSON report written to {args.json}") if args.max_any is not None: total = report["summary"]["total_any_patterns"] if total > args.max_any: print( f"\n❌ RATCHET FAILED: {total} violations exceed " f"threshold of {args.max_any}", file=sys.stderr, ) sys.exit(1) else: print( f"\n✅ RATCHET OK: {total} violations within " f"threshold of {args.max_any}", ) if __name__ == "__main__": main()