test_maestro_muse_integration.py
python
| 1 | """Integration tests for the Maestro stress test → muse-work/ output contract. |
| 2 | |
| 3 | These tests exercise the artifact-saving and manifest-emission functions from |
| 4 | ``scripts/e2e/stress_test.py`` in isolation — no live Storpheus or Maestro |
| 5 | service is required. |
| 6 | |
| 7 | All async tests use ``@pytest.mark.anyio``. |
| 8 | """ |
| 9 | from __future__ import annotations |
| 10 | |
| 11 | import base64 |
| 12 | import json |
| 13 | import pathlib |
| 14 | import sys |
| 15 | from dataclasses import asdict |
| 16 | from typing import Any |
| 17 | from unittest.mock import AsyncMock, patch |
| 18 | |
| 19 | import pytest |
| 20 | |
| 21 | # stress_test.py lives in scripts/e2e/ which is not a package; import it via |
| 22 | # sys.path manipulation so it's available to the test suite without modifying |
| 23 | # production code. |
| 24 | _SCRIPTS_E2E = pathlib.Path(__file__).parents[2] / "scripts" / "e2e" |
| 25 | if str(_SCRIPTS_E2E) not in sys.path: |
| 26 | sys.path.insert(0, str(_SCRIPTS_E2E)) |
| 27 | |
| 28 | from stress_test import ( # noqa: E402 |
| 29 | ArtifactSet, |
| 30 | MuseBatchFile, |
| 31 | RequestResult, |
| 32 | emit_muse_batch_json, |
| 33 | save_artifacts_to_muse_work, |
| 34 | ) |
| 35 | |
| 36 | |
| 37 | # --------------------------------------------------------------------------- |
| 38 | # Helpers |
| 39 | # --------------------------------------------------------------------------- |
| 40 | |
| 41 | |
| 42 | def _make_result( |
| 43 | genre: str = "jazz", |
| 44 | bars: int = 4, |
| 45 | instruments: list[str] | None = None, |
| 46 | success: bool = True, |
| 47 | cache_hit: bool = False, |
| 48 | composition_id: str = "comp-0001", |
| 49 | ) -> RequestResult: |
| 50 | return RequestResult( |
| 51 | genre=genre, |
| 52 | tempo=110, |
| 53 | instruments=instruments or ["drums", "bass"], |
| 54 | bars=bars, |
| 55 | quality_preset="fast", |
| 56 | intent_profile="neutral", |
| 57 | key=None, |
| 58 | success=success, |
| 59 | composition_id=composition_id, |
| 60 | cache_hit=cache_hit, |
| 61 | ) |
| 62 | |
| 63 | |
| 64 | def _make_artifact( |
| 65 | composition_id: str = "comp-0001", |
| 66 | genre: str = "jazz", |
| 67 | bars: int = 4, |
| 68 | with_mid: bool = True, |
| 69 | with_mp3: bool = True, |
| 70 | with_webp: bool = True, |
| 71 | ) -> ArtifactSet: |
| 72 | art = ArtifactSet(composition_id=composition_id, genre=genre, bars=bars) |
| 73 | if with_mid: |
| 74 | art.mid_b64 = base64.b64encode(b"MIDI-DATA").decode() |
| 75 | if with_mp3: |
| 76 | art.mp3_b64 = base64.b64encode(b"MP3-DATA").decode() |
| 77 | if with_webp: |
| 78 | art.webp_b64 = base64.b64encode(b"WEBP-DATA").decode() |
| 79 | return art |
| 80 | |
| 81 | |
| 82 | # --------------------------------------------------------------------------- |
| 83 | # test_stress_test_writes_muse_work_layout |
| 84 | # --------------------------------------------------------------------------- |
| 85 | |
| 86 | |
| 87 | def test_stress_test_writes_muse_work_layout(tmp_path: pathlib.Path) -> None: |
| 88 | """Files appear in correct subdirectories after save_artifacts_to_muse_work.""" |
| 89 | output_dir = tmp_path / "muse-work" |
| 90 | result = _make_result( |
| 91 | genre="jazz", bars=4, instruments=["drums", "bass"], composition_id="comp-abc" |
| 92 | ) |
| 93 | artifact = _make_artifact(composition_id="comp-abc", genre="jazz", bars=4) |
| 94 | artifacts = {"comp-abc": artifact} |
| 95 | |
| 96 | batch_files = save_artifacts_to_muse_work(output_dir, [result], artifacts) |
| 97 | |
| 98 | # MIDI → tracks/<instruments>/ |
| 99 | mid_files = list((output_dir / "tracks").rglob("*.mid")) |
| 100 | assert len(mid_files) == 1, "Exactly one MIDI file should be written" |
| 101 | assert "drums_bass" in str(mid_files[0]) |
| 102 | assert "jazz_4b_comp-abc.mid" == mid_files[0].name |
| 103 | |
| 104 | # MP3 → renders/ |
| 105 | mp3_files = list((output_dir / "renders").rglob("*.mp3")) |
| 106 | assert len(mp3_files) == 1 |
| 107 | assert "jazz_4b_comp-abc.mp3" == mp3_files[0].name |
| 108 | |
| 109 | # WebP → previews/ |
| 110 | webp_files = list((output_dir / "previews").rglob("*.webp")) |
| 111 | assert len(webp_files) == 1 |
| 112 | assert "jazz_4b_comp-abc.webp" == webp_files[0].name |
| 113 | |
| 114 | # Meta JSON → meta/ |
| 115 | meta_files = list((output_dir / "meta").rglob("*.json")) |
| 116 | assert len(meta_files) == 1 |
| 117 | assert "jazz_4b_comp-abc.json" == meta_files[0].name |
| 118 | |
| 119 | # Verify batch_files roles |
| 120 | roles = {f.role for f in batch_files} |
| 121 | assert roles == {"midi", "mp3", "webp", "meta"} |
| 122 | |
| 123 | |
| 124 | # --------------------------------------------------------------------------- |
| 125 | # test_muse_batch_json_schema |
| 126 | # --------------------------------------------------------------------------- |
| 127 | |
| 128 | |
| 129 | def test_muse_batch_json_schema(tmp_path: pathlib.Path) -> None: |
| 130 | """muse-batch.json is valid JSON matching the required schema.""" |
| 131 | output_dir = tmp_path / "muse-work" |
| 132 | result = _make_result(composition_id="comp-0001", genre="house", bars=8) |
| 133 | artifact = _make_artifact(composition_id="comp-0001", genre="house", bars=8) |
| 134 | artifacts = {"comp-0001": artifact} |
| 135 | |
| 136 | batch_files = save_artifacts_to_muse_work(output_dir, [result], artifacts) |
| 137 | |
| 138 | provenance: dict[str, Any] = { |
| 139 | "prompt": "stress_test.py --quick --genre house", |
| 140 | "model": "storpheus", |
| 141 | "seed": "stress-20260227_172919", |
| 142 | "storpheus_version": "1.0.0", |
| 143 | } |
| 144 | batch_path = emit_muse_batch_json( |
| 145 | batch_root=tmp_path, |
| 146 | run_id="stress-20260227_172919", |
| 147 | generated_at="2026-02-27T17:29:19Z", |
| 148 | batch_files=batch_files, |
| 149 | results=[result], |
| 150 | provenance=provenance, |
| 151 | ) |
| 152 | |
| 153 | assert batch_path.exists(), "muse-batch.json must be written" |
| 154 | data = json.loads(batch_path.read_text()) |
| 155 | |
| 156 | # Required top-level keys |
| 157 | assert "run_id" in data |
| 158 | assert "generated_at" in data |
| 159 | assert "commit_message_suggestion" in data |
| 160 | assert "files" in data |
| 161 | assert "provenance" in data |
| 162 | |
| 163 | assert data["run_id"] == "stress-20260227_172919" |
| 164 | assert data["generated_at"] == "2026-02-27T17:29:19Z" |
| 165 | assert isinstance(data["commit_message_suggestion"], str) |
| 166 | assert len(data["commit_message_suggestion"]) > 0 |
| 167 | |
| 168 | # Each file entry must have required fields |
| 169 | for entry in data["files"]: |
| 170 | assert "path" in entry |
| 171 | assert "role" in entry |
| 172 | assert "genre" in entry |
| 173 | assert "bars" in entry |
| 174 | assert entry["role"] in ("midi", "mp3", "webp", "meta") |
| 175 | |
| 176 | # Paths must be relative (no leading /) |
| 177 | for entry in data["files"]: |
| 178 | assert not entry["path"].startswith("/"), "paths must be relative to repo root" |
| 179 | assert entry["path"].startswith("muse-work/") |
| 180 | |
| 181 | # Provenance fields |
| 182 | prov = data["provenance"] |
| 183 | assert "prompt" in prov |
| 184 | assert "model" in prov |
| 185 | assert "seed" in prov |
| 186 | assert "storpheus_version" in prov |
| 187 | |
| 188 | |
| 189 | # --------------------------------------------------------------------------- |
| 190 | # test_muse_batch_includes_only_successes |
| 191 | # --------------------------------------------------------------------------- |
| 192 | |
| 193 | |
| 194 | def test_muse_batch_includes_only_successes(tmp_path: pathlib.Path) -> None: |
| 195 | """Failed results are absent from the files[] array in muse-batch.json.""" |
| 196 | output_dir = tmp_path / "muse-work" |
| 197 | |
| 198 | success_result = _make_result( |
| 199 | genre="jazz", composition_id="comp-ok", success=True |
| 200 | ) |
| 201 | failed_result = _make_result( |
| 202 | genre="house", composition_id="comp-fail", success=False |
| 203 | ) |
| 204 | failed_result.error = "GPU timeout" |
| 205 | |
| 206 | artifacts = { |
| 207 | "comp-ok": _make_artifact(composition_id="comp-ok"), |
| 208 | # No artifact for comp-fail (failed generation) |
| 209 | } |
| 210 | |
| 211 | batch_files = save_artifacts_to_muse_work( |
| 212 | output_dir, [success_result, failed_result], artifacts |
| 213 | ) |
| 214 | |
| 215 | batch_path = emit_muse_batch_json( |
| 216 | batch_root=tmp_path, |
| 217 | run_id="stress-test", |
| 218 | generated_at="2026-02-27T00:00:00Z", |
| 219 | batch_files=batch_files, |
| 220 | results=[success_result, failed_result], |
| 221 | provenance={}, |
| 222 | ) |
| 223 | |
| 224 | data = json.loads(batch_path.read_text()) |
| 225 | |
| 226 | # Only the successful jazz result should appear |
| 227 | genres_in_batch = {e["genre"] for e in data["files"]} |
| 228 | assert "jazz" in genres_in_batch |
| 229 | assert "house" not in genres_in_batch, "Failed result must be omitted from batch" |
| 230 | |
| 231 | # Verify no comp-fail paths |
| 232 | paths_in_batch = [e["path"] for e in data["files"]] |
| 233 | assert not any("comp-fail" in p for p in paths_in_batch) |
| 234 | |
| 235 | |
| 236 | # --------------------------------------------------------------------------- |
| 237 | # test_muse_batch_cache_hits_have_cached_flag |
| 238 | # --------------------------------------------------------------------------- |
| 239 | |
| 240 | |
| 241 | def test_muse_batch_cache_hits_have_cached_flag(tmp_path: pathlib.Path) -> None: |
| 242 | """Cache-hit results are included in muse-batch.json with cached=True.""" |
| 243 | output_dir = tmp_path / "muse-work" |
| 244 | |
| 245 | cached_result = _make_result( |
| 246 | genre="boom_bap", composition_id="comp-cached", success=True, cache_hit=True |
| 247 | ) |
| 248 | fresh_result = _make_result( |
| 249 | genre="techno", composition_id="comp-fresh", success=True, cache_hit=False |
| 250 | ) |
| 251 | artifacts = { |
| 252 | "comp-cached": _make_artifact( |
| 253 | composition_id="comp-cached", genre="boom_bap", with_mp3=False, with_webp=False |
| 254 | ), |
| 255 | "comp-fresh": _make_artifact( |
| 256 | composition_id="comp-fresh", genre="techno", with_mp3=False, with_webp=False |
| 257 | ), |
| 258 | } |
| 259 | |
| 260 | batch_files = save_artifacts_to_muse_work( |
| 261 | output_dir, [cached_result, fresh_result], artifacts |
| 262 | ) |
| 263 | |
| 264 | batch_path = emit_muse_batch_json( |
| 265 | batch_root=tmp_path, |
| 266 | run_id="stress-cache-test", |
| 267 | generated_at="2026-02-27T00:00:00Z", |
| 268 | batch_files=batch_files, |
| 269 | results=[cached_result, fresh_result], |
| 270 | provenance={}, |
| 271 | ) |
| 272 | |
| 273 | data = json.loads(batch_path.read_text()) |
| 274 | |
| 275 | cached_entries = [e for e in data["files"] if "comp-cached" in e["path"]] |
| 276 | fresh_entries = [e for e in data["files"] if "comp-fresh" in e["path"]] |
| 277 | |
| 278 | assert len(cached_entries) > 0, "Cache hit must appear in batch" |
| 279 | assert all(e["cached"] is True for e in cached_entries) |
| 280 | |
| 281 | assert len(fresh_entries) > 0, "Fresh result must appear in batch" |
| 282 | assert all(e["cached"] is False for e in fresh_entries) |
| 283 | |
| 284 | |
| 285 | # --------------------------------------------------------------------------- |
| 286 | # test_muse_batch_commit_message_suggestion_multi_genre |
| 287 | # --------------------------------------------------------------------------- |
| 288 | |
| 289 | |
| 290 | @pytest.mark.parametrize( |
| 291 | "genres, expected_prefix", |
| 292 | [ |
| 293 | (["jazz"], "feat: jazz stress test"), |
| 294 | (["jazz", "house"], "feat: 2-genre stress test"), |
| 295 | (["jazz", "house", "techno"], "feat: 3-genre stress test"), |
| 296 | ], |
| 297 | ) |
| 298 | def test_muse_batch_commit_message_suggestion( |
| 299 | tmp_path: pathlib.Path, |
| 300 | genres: list[str], |
| 301 | expected_prefix: str, |
| 302 | ) -> None: |
| 303 | """commit_message_suggestion reflects the number and names of genres.""" |
| 304 | output_dir = tmp_path / "muse-work" |
| 305 | results = [] |
| 306 | artifacts: dict[str, ArtifactSet] = {} |
| 307 | |
| 308 | for i, genre in enumerate(genres): |
| 309 | comp_id = f"comp-{i:04d}" |
| 310 | r = _make_result(genre=genre, composition_id=comp_id, success=True) |
| 311 | results.append(r) |
| 312 | artifacts[comp_id] = _make_artifact( |
| 313 | composition_id=comp_id, genre=genre, with_mp3=False, with_webp=False |
| 314 | ) |
| 315 | |
| 316 | batch_files = save_artifacts_to_muse_work(output_dir, results, artifacts) |
| 317 | batch_path = emit_muse_batch_json( |
| 318 | batch_root=tmp_path, |
| 319 | run_id="stress-msg-test", |
| 320 | generated_at="2026-02-27T00:00:00Z", |
| 321 | batch_files=batch_files, |
| 322 | results=results, |
| 323 | provenance={}, |
| 324 | ) |
| 325 | |
| 326 | data = json.loads(batch_path.read_text()) |
| 327 | suggestion = data["commit_message_suggestion"] |
| 328 | assert suggestion.startswith(expected_prefix), ( |
| 329 | f"Expected suggestion to start with {expected_prefix!r}, got {suggestion!r}" |
| 330 | ) |
| 331 | |
| 332 | |
| 333 | # --------------------------------------------------------------------------- |
| 334 | # test_muse_batch_no_artifacts_uses_genres_from_results |
| 335 | # --------------------------------------------------------------------------- |
| 336 | |
| 337 | |
| 338 | def test_muse_batch_no_artifacts_uses_genres_from_results( |
| 339 | tmp_path: pathlib.Path, |
| 340 | ) -> None: |
| 341 | """When no artifacts are available, commit_message_suggestion uses successful genres.""" |
| 342 | output_dir = tmp_path / "muse-work" |
| 343 | result = _make_result(genre="ambient", composition_id="comp-no-art", success=True) |
| 344 | |
| 345 | # No artifacts → no files written → empty batch_files |
| 346 | batch_files = save_artifacts_to_muse_work(output_dir, [result], {}) |
| 347 | |
| 348 | batch_path = emit_muse_batch_json( |
| 349 | batch_root=tmp_path, |
| 350 | run_id="stress-no-art", |
| 351 | generated_at="2026-02-27T00:00:00Z", |
| 352 | batch_files=batch_files, |
| 353 | results=[result], |
| 354 | provenance={}, |
| 355 | ) |
| 356 | |
| 357 | data = json.loads(batch_path.read_text()) |
| 358 | suggestion = data["commit_message_suggestion"] |
| 359 | # Should fall back to successful genres from results |
| 360 | assert "ambient" in suggestion |