muse_find.py
python
| 1 | """Muse Find — search commit history by musical properties. |
| 2 | |
| 3 | This is the musical equivalent of ``git log --grep``, extended with |
| 4 | domain-specific filters for harmony, rhythm, melody, structure, dynamics, |
| 5 | and emotion. All filters combine with AND logic: a commit must satisfy |
| 6 | every non-None criterion to appear in results. |
| 7 | |
| 8 | Query DSL |
| 9 | --------- |
| 10 | Each property filter accepts a free-text query string matched |
| 11 | case-insensitively against the commit message. Two syntaxes: |
| 12 | |
| 13 | **Equality match** (default):: |
| 14 | |
| 15 | --harmony "key=Eb" → substring match for "key=Eb" in message |
| 16 | |
| 17 | **Numeric range** (``key=low-high``):: |
| 18 | |
| 19 | --rhythm "tempo=120-130" → extract tempo=<N> from message, |
| 20 | check 120 <= N <= 130 |
| 21 | |
| 22 | Range syntax is triggered when the value portion of ``key=value`` |
| 23 | contains exactly one hyphen separating two non-negative numbers. |
| 24 | """ |
| 25 | from __future__ import annotations |
| 26 | |
| 27 | import logging |
| 28 | import re |
| 29 | from dataclasses import dataclass |
| 30 | from datetime import datetime |
| 31 | |
| 32 | from sqlalchemy import and_ |
| 33 | from sqlalchemy.ext.asyncio import AsyncSession |
| 34 | from sqlalchemy.future import select |
| 35 | |
| 36 | from maestro.muse_cli.models import MuseCliCommit |
| 37 | |
| 38 | logger = logging.getLogger(__name__) |
| 39 | |
| 40 | _DEFAULT_LIMIT = 20 |
| 41 | |
| 42 | |
| 43 | @dataclass(frozen=True) |
| 44 | class MuseFindQuery: |
| 45 | """All search criteria for a ``muse find`` invocation. |
| 46 | |
| 47 | Every field is optional. Non-None fields are ANDed together. |
| 48 | ``limit`` caps the result set (default 20). |
| 49 | """ |
| 50 | |
| 51 | harmony: str | None = None |
| 52 | rhythm: str | None = None |
| 53 | melody: str | None = None |
| 54 | structure: str | None = None |
| 55 | dynamic: str | None = None |
| 56 | emotion: str | None = None |
| 57 | section: str | None = None |
| 58 | track: str | None = None |
| 59 | since: datetime | None = None |
| 60 | until: datetime | None = None |
| 61 | limit: int = _DEFAULT_LIMIT |
| 62 | |
| 63 | |
| 64 | @dataclass(frozen=True) |
| 65 | class MuseFindCommitResult: |
| 66 | """A single commit that matched the search criteria.""" |
| 67 | |
| 68 | commit_id: str |
| 69 | branch: str |
| 70 | message: str |
| 71 | author: str |
| 72 | committed_at: datetime |
| 73 | parent_commit_id: str | None |
| 74 | snapshot_id: str |
| 75 | |
| 76 | |
| 77 | @dataclass(frozen=True) |
| 78 | class MuseFindResults: |
| 79 | """Container returned by :func:`search_commits`. |
| 80 | |
| 81 | ``matches`` is newest-first, capped at ``query.limit``. |
| 82 | ``total_scanned`` is the number of DB rows examined before limit was applied. |
| 83 | """ |
| 84 | |
| 85 | matches: tuple[MuseFindCommitResult, ...] |
| 86 | total_scanned: int |
| 87 | query: MuseFindQuery |
| 88 | |
| 89 | |
| 90 | _RANGE_RE = re.compile(r"^(\d+(?:\.\d+)?)-(\d+(?:\.\d+)?)$") |
| 91 | _KEY_VALUE_RE = re.compile(r"^([^=]+)=(.+)$") |
| 92 | |
| 93 | |
| 94 | def _parse_property_filter(query_str: str) -> tuple[str, float, float] | None: |
| 95 | """Parse ``key=low-high`` range syntax. |
| 96 | |
| 97 | Returns ``(key, low, high)`` when matched, or ``None`` for plain text. |
| 98 | |
| 99 | Examples:: |
| 100 | |
| 101 | "tempo=120-130" -> ("tempo", 120.0, 130.0) |
| 102 | "key=Eb" -> None |
| 103 | """ |
| 104 | m = _KEY_VALUE_RE.match(query_str) |
| 105 | if m is None: |
| 106 | return None |
| 107 | key = m.group(1).strip() |
| 108 | value = m.group(2).strip() |
| 109 | rm = _RANGE_RE.match(value) |
| 110 | if rm is None: |
| 111 | return None |
| 112 | return (key, float(rm.group(1)), float(rm.group(2))) |
| 113 | |
| 114 | |
| 115 | def _extract_numeric_value(message: str, key: str) -> float | None: |
| 116 | """Extract the numeric value for *key* from a commit message. |
| 117 | |
| 118 | Matches patterns like ``key=<number>`` and returns the first as float. |
| 119 | |
| 120 | Examples:: |
| 121 | |
| 122 | "tempo=125 bpm" -> key="tempo" -> 125.0 |
| 123 | "swing=0.72" -> key="swing" -> 0.72 |
| 124 | """ |
| 125 | pattern = re.compile( |
| 126 | r"\b" + re.escape(key) + r"\s*=\s*(\d+(?:\.\d+)?)\b", |
| 127 | re.IGNORECASE, |
| 128 | ) |
| 129 | m = pattern.search(message) |
| 130 | if m is None: |
| 131 | return None |
| 132 | return float(m.group(1)) |
| 133 | |
| 134 | |
| 135 | def _matches_property(message: str, query_str: str) -> bool: |
| 136 | """Return True when *message* satisfies *query_str*. |
| 137 | |
| 138 | Handles both plain text (case-insensitive substring) and range matching. |
| 139 | """ |
| 140 | parsed = _parse_property_filter(query_str) |
| 141 | if parsed is not None: |
| 142 | key, low, high = parsed |
| 143 | value = _extract_numeric_value(message, key) |
| 144 | if value is None: |
| 145 | return False |
| 146 | return low <= value <= high |
| 147 | return query_str.lower() in message.lower() |
| 148 | |
| 149 | |
| 150 | async def search_commits( |
| 151 | session: AsyncSession, |
| 152 | repo_id: str, |
| 153 | query: MuseFindQuery, |
| 154 | ) -> MuseFindResults: |
| 155 | """Search commit history for commits matching all criteria in *query*. |
| 156 | |
| 157 | Strategy: |
| 158 | 1. Build a SQL query applying date range and plain text filters at DB layer. |
| 159 | 2. Load candidate rows ordered newest-first. |
| 160 | 3. Apply Python-level range filtering for numeric range expressions. |
| 161 | 4. Collect up to ``query.limit`` results. |
| 162 | |
| 163 | This function is read-only. |
| 164 | |
| 165 | Args: |
| 166 | session: Async SQLAlchemy session. |
| 167 | repo_id: Repository to scope the search to. |
| 168 | query: Search criteria. |
| 169 | |
| 170 | Returns: |
| 171 | :class:`MuseFindResults` with matching commits and diagnostics. |
| 172 | """ |
| 173 | stmt = select(MuseCliCommit).where(MuseCliCommit.repo_id == repo_id) |
| 174 | |
| 175 | date_conditions = [] |
| 176 | if query.since is not None: |
| 177 | date_conditions.append(MuseCliCommit.committed_at >= query.since) |
| 178 | if query.until is not None: |
| 179 | date_conditions.append(MuseCliCommit.committed_at <= query.until) |
| 180 | if date_conditions: |
| 181 | stmt = stmt.where(and_(*date_conditions)) |
| 182 | |
| 183 | # Push plain-text (non-range) filters to SQL for efficiency. |
| 184 | # Range queries require Python-level numeric extraction (applied below). |
| 185 | all_terms: list[str | None] = [ |
| 186 | query.harmony, |
| 187 | query.rhythm, |
| 188 | query.melody, |
| 189 | query.structure, |
| 190 | query.dynamic, |
| 191 | query.emotion, |
| 192 | query.section, |
| 193 | query.track, |
| 194 | ] |
| 195 | for term in all_terms: |
| 196 | if term is not None and _parse_property_filter(term) is None: |
| 197 | stmt = stmt.where(MuseCliCommit.message.ilike(f"%{term}%")) |
| 198 | |
| 199 | stmt = stmt.order_by(MuseCliCommit.committed_at.desc()) |
| 200 | |
| 201 | result = await session.execute(stmt) |
| 202 | rows: list[MuseCliCommit] = list(result.scalars().all()) |
| 203 | total_scanned = len(rows) |
| 204 | |
| 205 | # Python-level range filtering for numeric range expressions. |
| 206 | range_filters: list[str] = [ |
| 207 | term |
| 208 | for term in all_terms |
| 209 | if term is not None and _parse_property_filter(term) is not None |
| 210 | ] |
| 211 | |
| 212 | matches: list[MuseFindCommitResult] = [] |
| 213 | for row in rows: |
| 214 | if len(matches) >= query.limit: |
| 215 | break |
| 216 | if all(_matches_property(row.message, f) for f in range_filters): |
| 217 | matches.append( |
| 218 | MuseFindCommitResult( |
| 219 | commit_id=row.commit_id, |
| 220 | branch=row.branch, |
| 221 | message=row.message, |
| 222 | author=row.author, |
| 223 | committed_at=row.committed_at, |
| 224 | parent_commit_id=row.parent_commit_id, |
| 225 | snapshot_id=row.snapshot_id, |
| 226 | ) |
| 227 | ) |
| 228 | |
| 229 | logger.info( |
| 230 | "✅ muse find: %d match(es) from %d scanned (repo=%s)", |
| 231 | len(matches), |
| 232 | total_scanned, |
| 233 | repo_id[:8], |
| 234 | ) |
| 235 | return MuseFindResults( |
| 236 | matches=tuple(matches), |
| 237 | total_scanned=total_scanned, |
| 238 | query=query, |
| 239 | ) |