cgcardona / muse public
ask.py python
238 lines 8.5 KB
12901c5a Initial extraction from tellurstori/maestro cgcardona <gabriel@tellurstori.com> 4d ago
1 """muse ask — natural language query over Muse musical history.
2
3 Searches commit messages for keywords extracted from the user's question
4 and returns matching commits in a structured answer. This is a stub
5 implementation: keyword matching over commit messages. Full LLM-powered
6 answer generation is a planned enhancement.
7
8 Usage::
9
10 muse ask "what tempo changes did I make last week?"
11 muse ask "boom bap sessions" --branch feature/hip-hop --cite
12 muse ask "piano intro" --since 2026-01-01 --until 2026-02-01 --json
13
14 ``--cite`` appends the full commit ID to each matching commit entry.
15 ``--json`` emits a machine-readable JSON response instead of plain text.
16 """
17 from __future__ import annotations
18
19 import asyncio
20 import json
21 import logging
22 import pathlib
23 import re
24 from datetime import date, datetime, timezone
25 from typing import Annotated, Optional
26
27 import typer
28 from sqlalchemy import select
29 from sqlalchemy.ext.asyncio import AsyncSession
30
31 from maestro.muse_cli._repo import require_repo
32 from maestro.muse_cli.db import open_session
33 from maestro.muse_cli.errors import ExitCode
34 from maestro.muse_cli.models import MuseCliCommit
35
36 logger = logging.getLogger(__name__)
37
38 app = typer.Typer()
39
40 _MAX_COMMITS = 10_000
41
42
43 # ---------------------------------------------------------------------------
44 # Data types
45 # ---------------------------------------------------------------------------
46
47
48 class AnswerResult:
49 """Structured result returned by ``_ask_async`` for testability."""
50
51 def __init__(
52 self,
53 question: str,
54 total_searched: int,
55 matches: list[MuseCliCommit],
56 cite: bool,
57 ) -> None:
58 self.question = question
59 self.total_searched = total_searched
60 self.matches = matches
61 self.cite = cite
62
63 def to_plain(self) -> str:
64 """Format as human-readable plain text."""
65 lines: list[str] = [
66 f"Based on Muse history ({self.total_searched} commits searched):",
67 f"Commits matching your query: {len(self.matches)} found",
68 ]
69 if self.matches:
70 lines.append("")
71 for commit in self.matches:
72 ts = commit.committed_at.strftime("%Y-%m-%d %H:%M")
73 if self.cite:
74 lines.append(f" [{commit.commit_id}] {ts} {commit.message}")
75 else:
76 lines.append(f" [{commit.commit_id[:8]}] {ts} {commit.message}")
77 else:
78 lines.append(" (no matching commits)")
79 lines.append("")
80 lines.append(
81 "Note: Full LLM-powered answer generation is a planned enhancement."
82 )
83 return "\n".join(lines)
84
85 def to_json(self) -> str:
86 """Format as JSON."""
87 payload: dict[str, object] = {
88 "question": self.question,
89 "total_searched": self.total_searched,
90 "matches": [
91 {
92 "commit_id": c.commit_id if self.cite else c.commit_id[:8],
93 "branch": c.branch,
94 "message": c.message,
95 "committed_at": c.committed_at.isoformat(),
96 }
97 for c in self.matches
98 ],
99 "note": "Full LLM-powered answer generation is a planned enhancement.",
100 }
101 return json.dumps(payload, indent=2)
102
103
104 # ---------------------------------------------------------------------------
105 # Testable async core
106 # ---------------------------------------------------------------------------
107
108
109 def _keywords(question: str) -> list[str]:
110 """Extract non-trivial lowercase tokens from the question string.
111
112 Strips punctuation and common stop-words so the keyword match focuses
113 on meaningful terms from the user's question.
114 """
115 stop = {
116 "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
117 "have", "has", "had", "do", "does", "did", "will", "would", "could",
118 "should", "may", "might", "shall", "can", "need", "dare", "ought",
119 "i", "my", "me", "we", "our", "you", "your", "he", "she", "it",
120 "they", "their", "them", "what", "when", "where", "who", "which",
121 "how", "why", "in", "on", "at", "to", "of", "for", "and", "or",
122 "but", "not", "with", "from", "by", "about", "into", "through",
123 "did", "make", "made", "last", "any", "all", "that", "this",
124 }
125 tokens = re.split(r"[\s\W]+", question.lower())
126 return [t for t in tokens if t and t not in stop and len(t) > 1]
127
128
129 async def _ask_async(
130 *,
131 question: str,
132 root: pathlib.Path,
133 session: AsyncSession,
134 branch: str | None,
135 since: date | None,
136 until: date | None,
137 cite: bool,
138 ) -> AnswerResult:
139 """Core ask logic — fully injectable for tests.
140
141 Loads commits from the DB, applies optional filters (branch, date
142 range), and performs keyword search over commit messages. Returns
143 an :class:`AnswerResult` that can be rendered as plain text or JSON.
144 """
145 muse_dir = root / ".muse"
146 repo_data: dict[str, str] = json.loads((muse_dir / "repo.json").read_text())
147 repo_id = repo_data["repo_id"]
148
149 # Determine effective branch filter: explicit flag → HEAD branch → all.
150 effective_branch: str | None = branch
151 if effective_branch is None:
152 head_ref_text = (muse_dir / "HEAD").read_text().strip()
153 effective_branch = head_ref_text.rsplit("/", 1)[-1]
154
155 stmt = (
156 select(MuseCliCommit)
157 .where(MuseCliCommit.repo_id == repo_id)
158 .order_by(MuseCliCommit.committed_at.desc())
159 .limit(_MAX_COMMITS)
160 )
161 if effective_branch:
162 stmt = stmt.where(MuseCliCommit.branch == effective_branch)
163 if since is not None:
164 since_dt = datetime(since.year, since.month, since.day, tzinfo=timezone.utc)
165 stmt = stmt.where(MuseCliCommit.committed_at >= since_dt)
166 if until is not None:
167 # inclusive: treat until as end-of-day
168 until_dt = datetime(
169 until.year, until.month, until.day, 23, 59, 59, tzinfo=timezone.utc
170 )
171 stmt = stmt.where(MuseCliCommit.committed_at <= until_dt)
172
173 result = await session.execute(stmt)
174 all_commits: list[MuseCliCommit] = list(result.scalars().all())
175
176 keywords = _keywords(question)
177 if keywords:
178 matches = [
179 c for c in all_commits
180 if any(kw in c.message.lower() for kw in keywords)
181 ]
182 else:
183 # Empty query → return all commits (the question had no useful tokens).
184 matches = list(all_commits)
185
186 return AnswerResult(
187 question=question,
188 total_searched=len(all_commits),
189 matches=matches,
190 cite=cite,
191 )
192
193
194 # ---------------------------------------------------------------------------
195 # Typer command
196 # ---------------------------------------------------------------------------
197
198
199 @app.callback(invoke_without_command=True)
200 def ask(
201 ctx: typer.Context,
202 question: Annotated[str, typer.Argument(help="Natural language question about your musical history.")],
203 branch: Annotated[Optional[str], typer.Option("--branch", help="Restrict search to this branch name.")] = None,
204 since: Annotated[Optional[datetime], typer.Option("--since", formats=["%Y-%m-%d"], help="Only include commits on or after this date (YYYY-MM-DD).")] = None,
205 until: Annotated[Optional[datetime], typer.Option("--until", formats=["%Y-%m-%d"], help="Only include commits on or before this date (YYYY-MM-DD).")] = None,
206 output_json: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
207 cite: Annotated[bool, typer.Option("--cite", help="Show full commit IDs in the answer.")] = False,
208 ) -> None:
209 """Query your Muse musical history in natural language."""
210 root = require_repo()
211
212 since_date: date | None = since.date() if since is not None else None
213 until_date: date | None = until.date() if until is not None else None
214
215 async def _run() -> None:
216 async with open_session() as session:
217 result = await _ask_async(
218 question=question,
219 root=root,
220 session=session,
221 branch=branch,
222 since=since_date,
223 until=until_date,
224 cite=cite,
225 )
226 if output_json:
227 typer.echo(result.to_json())
228 else:
229 typer.echo(result.to_plain())
230
231 try:
232 asyncio.run(_run())
233 except typer.Exit:
234 raise
235 except Exception as exc:
236 typer.echo(f"❌ muse ask failed: {exc}")
237 logger.error("❌ muse ask error: %s", exc, exc_info=True)
238 raise typer.Exit(code=ExitCode.INTERNAL_ERROR)