cgcardona / muse public
muse_find.py python
239 lines 6.9 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """Muse Find — search commit history by musical properties.
2
3 This is the musical equivalent of ``git log --grep``, extended with
4 domain-specific filters for harmony, rhythm, melody, structure, dynamics,
5 and emotion. All filters combine with AND logic: a commit must satisfy
6 every non-None criterion to appear in results.
7
8 Query DSL
9 ---------
10 Each property filter accepts a free-text query string matched
11 case-insensitively against the commit message. Two syntaxes:
12
13 **Equality match** (default)::
14
15 --harmony "key=Eb" → substring match for "key=Eb" in message
16
17 **Numeric range** (``key=low-high``)::
18
19 --rhythm "tempo=120-130" → extract tempo=<N> from message,
20 check 120 <= N <= 130
21
22 Range syntax is triggered when the value portion of ``key=value``
23 contains exactly one hyphen separating two non-negative numbers.
24 """
25 from __future__ import annotations
26
27 import logging
28 import re
29 from dataclasses import dataclass
30 from datetime import datetime
31
32 from sqlalchemy import and_
33 from sqlalchemy.ext.asyncio import AsyncSession
34 from sqlalchemy.future import select
35
36 from maestro.muse_cli.models import MuseCliCommit
37
38 logger = logging.getLogger(__name__)
39
40 _DEFAULT_LIMIT = 20
41
42
43 @dataclass(frozen=True)
44 class MuseFindQuery:
45 """All search criteria for a ``muse find`` invocation.
46
47 Every field is optional. Non-None fields are ANDed together.
48 ``limit`` caps the result set (default 20).
49 """
50
51 harmony: str | None = None
52 rhythm: str | None = None
53 melody: str | None = None
54 structure: str | None = None
55 dynamic: str | None = None
56 emotion: str | None = None
57 section: str | None = None
58 track: str | None = None
59 since: datetime | None = None
60 until: datetime | None = None
61 limit: int = _DEFAULT_LIMIT
62
63
64 @dataclass(frozen=True)
65 class MuseFindCommitResult:
66 """A single commit that matched the search criteria."""
67
68 commit_id: str
69 branch: str
70 message: str
71 author: str
72 committed_at: datetime
73 parent_commit_id: str | None
74 snapshot_id: str
75
76
77 @dataclass(frozen=True)
78 class MuseFindResults:
79 """Container returned by :func:`search_commits`.
80
81 ``matches`` is newest-first, capped at ``query.limit``.
82 ``total_scanned`` is the number of DB rows examined before limit was applied.
83 """
84
85 matches: tuple[MuseFindCommitResult, ...]
86 total_scanned: int
87 query: MuseFindQuery
88
89
90 _RANGE_RE = re.compile(r"^(\d+(?:\.\d+)?)-(\d+(?:\.\d+)?)$")
91 _KEY_VALUE_RE = re.compile(r"^([^=]+)=(.+)$")
92
93
94 def _parse_property_filter(query_str: str) -> tuple[str, float, float] | None:
95 """Parse ``key=low-high`` range syntax.
96
97 Returns ``(key, low, high)`` when matched, or ``None`` for plain text.
98
99 Examples::
100
101 "tempo=120-130" -> ("tempo", 120.0, 130.0)
102 "key=Eb" -> None
103 """
104 m = _KEY_VALUE_RE.match(query_str)
105 if m is None:
106 return None
107 key = m.group(1).strip()
108 value = m.group(2).strip()
109 rm = _RANGE_RE.match(value)
110 if rm is None:
111 return None
112 return (key, float(rm.group(1)), float(rm.group(2)))
113
114
115 def _extract_numeric_value(message: str, key: str) -> float | None:
116 """Extract the numeric value for *key* from a commit message.
117
118 Matches patterns like ``key=<number>`` and returns the first as float.
119
120 Examples::
121
122 "tempo=125 bpm" -> key="tempo" -> 125.0
123 "swing=0.72" -> key="swing" -> 0.72
124 """
125 pattern = re.compile(
126 r"\b" + re.escape(key) + r"\s*=\s*(\d+(?:\.\d+)?)\b",
127 re.IGNORECASE,
128 )
129 m = pattern.search(message)
130 if m is None:
131 return None
132 return float(m.group(1))
133
134
135 def _matches_property(message: str, query_str: str) -> bool:
136 """Return True when *message* satisfies *query_str*.
137
138 Handles both plain text (case-insensitive substring) and range matching.
139 """
140 parsed = _parse_property_filter(query_str)
141 if parsed is not None:
142 key, low, high = parsed
143 value = _extract_numeric_value(message, key)
144 if value is None:
145 return False
146 return low <= value <= high
147 return query_str.lower() in message.lower()
148
149
150 async def search_commits(
151 session: AsyncSession,
152 repo_id: str,
153 query: MuseFindQuery,
154 ) -> MuseFindResults:
155 """Search commit history for commits matching all criteria in *query*.
156
157 Strategy:
158 1. Build a SQL query applying date range and plain text filters at DB layer.
159 2. Load candidate rows ordered newest-first.
160 3. Apply Python-level range filtering for numeric range expressions.
161 4. Collect up to ``query.limit`` results.
162
163 This function is read-only.
164
165 Args:
166 session: Async SQLAlchemy session.
167 repo_id: Repository to scope the search to.
168 query: Search criteria.
169
170 Returns:
171 :class:`MuseFindResults` with matching commits and diagnostics.
172 """
173 stmt = select(MuseCliCommit).where(MuseCliCommit.repo_id == repo_id)
174
175 date_conditions = []
176 if query.since is not None:
177 date_conditions.append(MuseCliCommit.committed_at >= query.since)
178 if query.until is not None:
179 date_conditions.append(MuseCliCommit.committed_at <= query.until)
180 if date_conditions:
181 stmt = stmt.where(and_(*date_conditions))
182
183 # Push plain-text (non-range) filters to SQL for efficiency.
184 # Range queries require Python-level numeric extraction (applied below).
185 all_terms: list[str | None] = [
186 query.harmony,
187 query.rhythm,
188 query.melody,
189 query.structure,
190 query.dynamic,
191 query.emotion,
192 query.section,
193 query.track,
194 ]
195 for term in all_terms:
196 if term is not None and _parse_property_filter(term) is None:
197 stmt = stmt.where(MuseCliCommit.message.ilike(f"%{term}%"))
198
199 stmt = stmt.order_by(MuseCliCommit.committed_at.desc())
200
201 result = await session.execute(stmt)
202 rows: list[MuseCliCommit] = list(result.scalars().all())
203 total_scanned = len(rows)
204
205 # Python-level range filtering for numeric range expressions.
206 range_filters: list[str] = [
207 term
208 for term in all_terms
209 if term is not None and _parse_property_filter(term) is not None
210 ]
211
212 matches: list[MuseFindCommitResult] = []
213 for row in rows:
214 if len(matches) >= query.limit:
215 break
216 if all(_matches_property(row.message, f) for f in range_filters):
217 matches.append(
218 MuseFindCommitResult(
219 commit_id=row.commit_id,
220 branch=row.branch,
221 message=row.message,
222 author=row.author,
223 committed_at=row.committed_at,
224 parent_commit_id=row.parent_commit_id,
225 snapshot_id=row.snapshot_id,
226 )
227 )
228
229 logger.info(
230 "✅ muse find: %d match(es) from %d scanned (repo=%s)",
231 len(matches),
232 total_scanned,
233 repo_id[:8],
234 )
235 return MuseFindResults(
236 matches=tuple(matches),
237 total_scanned=total_scanned,
238 query=query,
239 )