"""muse validate — musical integrity checks for the working tree.

This module provides the core validation logic that ``muse validate`` invokes.
It is intentionally kept separate from the CLI layer so the checks can be
called from tests and future automation pipelines without spawning a subprocess.

Named result types registered in ``docs/reference/type_contracts.md``:
- ``ValidationSeverity``
- ``ValidationIssue``
- ``ValidationCheckResult``
- ``MuseValidateResult``

Exit-code contract (mirrors git-fsck conventions):
- 0 — all checks passed (no errors, no warnings)
- 1 — one or more ERROR issues found
- 2 — one or more WARN issues found and ``--strict`` was requested
"""
from __future__ import annotations

import dataclasses
import enum
import json
import logging
import pathlib
import re
import struct

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Types
# ---------------------------------------------------------------------------

ALLOWED_EMOTION_TAGS: frozenset[str] = frozenset(
    [
        "happy",
        "sad",
        "energetic",
        "calm",
        "tense",
        "relaxed",
        "dark",
        "bright",
        "melancholic",
        "triumphant",
        "mysterious",
        "playful",
        "romantic",
        "aggressive",
        "peaceful",
    ]
)

#: Regex for well-formed section directory names: e.g. "verse", "chorus-01", "bridge_02"
_SECTION_NAME_RE = re.compile(r"^[a-z][a-z0-9_-]*$")


class ValidationSeverity(str, enum.Enum):
    """Severity level for a single validation issue."""

    ERROR = "error"
    WARN = "warn"
    INFO = "info"


@dataclasses.dataclass
class ValidationIssue:
    """A single finding produced by a validation check.

    Agents should treat ERROR severity as a blocker for ``muse commit``.
    WARN severity is informational unless ``--strict`` mode is active.
    """

    severity: ValidationSeverity
    check: str
    path: str
    message: str

    def to_dict(self) -> dict[str, str]:
        return {
            "severity": self.severity.value,
            "check": self.check,
            "path": self.path,
            "message": self.message,
        }


@dataclasses.dataclass
class ValidationCheckResult:
    """Outcome of a single named check category.

    ``passed`` is True only when ``issues`` is empty for this check.
    """

    name: str
    passed: bool
    issues: list[ValidationIssue]

    def to_dict(self) -> dict[str, object]:
        return {
            "name": self.name,
            "passed": self.passed,
            "issues": [i.to_dict() for i in self.issues],
        }


@dataclasses.dataclass
class MuseValidateResult:
    """Aggregated result of all validation checks run against the working tree.

    ``clean`` is True iff every check passed (no issues of any severity).
    ``has_errors`` is True iff at least one ERROR-severity issue was found.
    ``has_warnings`` is True iff at least one WARN-severity issue was found.
    """

    clean: bool
    has_errors: bool
    has_warnings: bool
    checks: list[ValidationCheckResult]
    fixes_applied: list[str]

    def to_dict(self) -> dict[str, object]:
        return {
            "clean": self.clean,
            "has_errors": self.has_errors,
            "has_warnings": self.has_warnings,
            "checks": [c.to_dict() for c in self.checks],
            "fixes_applied": self.fixes_applied,
        }


# ---------------------------------------------------------------------------
# MIDI integrity check
# ---------------------------------------------------------------------------

def _is_valid_midi(path: pathlib.Path) -> bool:
    """Return True iff *path* begins with the Standard MIDI File header (MThd).

    This is a fast structural check — it verifies the 4-byte magic header and
    the header chunk length (always 6 bytes for SMF). Full parse correctness
    is left to ``mido`` in the import pipeline; here we just reject obviously
    corrupt or truncated files so agents get an actionable error before commit.
    """
    try:
        with path.open("rb") as fh:
            magic = fh.read(4)
            if magic != b"MThd":
                return False
            chunk_len_bytes = fh.read(4)
            if len(chunk_len_bytes) < 4:
                return False
            chunk_len: int = struct.unpack(">I", chunk_len_bytes)[0]
            return chunk_len == 6
    except OSError:
        return False


def check_midi_integrity(
    workdir: pathlib.Path,
    track_filter: str | None = None,
) -> ValidationCheckResult:
    """Verify that every .mid/.midi file in *workdir* has a valid MIDI header.

    Agents use this to detect corruption introduced by partial writes, failed
    exports, or bit-rot before the file is committed to Muse VCS history.

    Args:
        workdir: The ``muse-work/`` directory to scan.
        track_filter: If given, only MIDI files whose relative path contains
                      this string (case-insensitive) are validated.

    Returns:
        ValidationCheckResult with check name ``"midi_integrity"``.
    """
    issues: list[ValidationIssue] = []
    if not workdir.exists():
        return ValidationCheckResult(name="midi_integrity", passed=True, issues=[])

    for midi_path in sorted(workdir.rglob("*.mid")) + sorted(workdir.rglob("*.midi")):
        if not midi_path.is_file():
            continue
        rel = midi_path.relative_to(workdir).as_posix()
        if track_filter and track_filter.lower() not in rel.lower():
            continue
        if not _is_valid_midi(midi_path):
            issues.append(
                ValidationIssue(
                    severity=ValidationSeverity.ERROR,
                    check="midi_integrity",
                    path=rel,
                    message=f"Invalid or corrupted MIDI file: missing or malformed MThd header.",
                )
            )
            logger.warning("❌ MIDI integrity failure: %s", rel)

    return ValidationCheckResult(
        name="midi_integrity",
        passed=len(issues) == 0,
        issues=issues,
    )


# ---------------------------------------------------------------------------
# Manifest consistency check
# ---------------------------------------------------------------------------

def check_manifest_consistency(
    root: pathlib.Path,
    track_filter: str | None = None,
) -> ValidationCheckResult:
    """Compare the committed snapshot manifest against the actual working tree.

    Detects orphaned files (in the manifest but missing from disk) and
    unregistered files (on disk but absent from the manifest). These indicate
    that the working tree has drifted from the last commit — potentially from
    manual edits or a failed ``muse checkout``.

    Args:
        root: Repository root (contains ``.muse/`` and ``muse-work/``).
        track_filter: Scope validation to paths containing this string.

    Returns:
        ValidationCheckResult with check name ``"manifest_consistency"``.
    """
    issues: list[ValidationIssue] = []
    muse_dir = root / ".muse"
    workdir = root / "muse-work"

    # Resolve HEAD commit and its snapshot manifest
    head_path = muse_dir / "HEAD"
    if not head_path.exists():
        return ValidationCheckResult(name="manifest_consistency", passed=True, issues=[])

    head_ref = head_path.read_text().strip()
    ref_file = muse_dir / pathlib.Path(head_ref)
    if not ref_file.exists() or not ref_file.read_text().strip():
        # No commits yet — nothing to compare against
        return ValidationCheckResult(name="manifest_consistency", passed=True, issues=[])

    # Load the committed snapshot manifest from the muse-work objects area
    # The manifest is stored alongside objects in .muse/objects/ as a JSON side-car,
    # but in this implementation commits reference snapshots stored in DB.
    # We read the on-disk snapshot cache if available (written by muse commit).
    snapshot_cache = muse_dir / "snapshot_manifest.json"
    if not snapshot_cache.exists():
        # No cached manifest — check is not possible without DB access
        return ValidationCheckResult(name="manifest_consistency", passed=True, issues=[])

    try:
        committed_manifest: dict[str, str] = json.loads(snapshot_cache.read_text())
    except (json.JSONDecodeError, OSError) as exc:
        issues.append(
            ValidationIssue(
                severity=ValidationSeverity.ERROR,
                check="manifest_consistency",
                path=".muse/snapshot_manifest.json",
                message=f"Cannot read cached snapshot manifest: {exc}",
            )
        )
        return ValidationCheckResult(name="manifest_consistency", passed=False, issues=issues)

    if not workdir.exists():
        # All committed files are orphaned
        for path in sorted(committed_manifest):
            if track_filter and track_filter.lower() not in path.lower():
                continue
            issues.append(
                ValidationIssue(
                    severity=ValidationSeverity.ERROR,
                    check="manifest_consistency",
                    path=path,
                    message="File is in committed manifest but muse-work/ does not exist.",
                )
            )
        return ValidationCheckResult(
            name="manifest_consistency",
            passed=len(issues) == 0,
            issues=issues,
        )

    # Build current working-tree manifest
    from maestro.muse_cli.snapshot import walk_workdir, hash_file

    current_manifest = walk_workdir(workdir)

    committed_paths = set(committed_manifest.keys())
    current_paths = set(current_manifest.keys())

    for path in sorted(committed_paths - current_paths):
        if track_filter and track_filter.lower() not in path.lower():
            continue
        issues.append(
            ValidationIssue(
                severity=ValidationSeverity.ERROR,
                check="manifest_consistency",
                path=path,
                message="File in committed manifest is missing from working tree (orphaned).",
            )
        )

    for path in sorted(current_paths - committed_paths):
        if track_filter and track_filter.lower() not in path.lower():
            continue
        issues.append(
            ValidationIssue(
                severity=ValidationSeverity.WARN,
                check="manifest_consistency",
                path=path,
                message="File in working tree is not recorded in committed manifest (unregistered).",
            )
        )

    return ValidationCheckResult(
        name="manifest_consistency",
        passed=len(issues) == 0,
        issues=issues,
    )


# ---------------------------------------------------------------------------
# Duplicate tracks check
# ---------------------------------------------------------------------------

def check_no_duplicate_tracks(
    workdir: pathlib.Path,
    track_filter: str | None = None,
) -> ValidationCheckResult:
    """Detect duplicate instrument-role definitions in the working tree.

    A duplicate is defined as two or more MIDI files sharing the same
    instrument role name (the stem of their filename, excluding the extension
    and any numeric suffix). For example: ``bass.mid`` and ``bass_alt.mid``
    both define a bass role.

    Agents use this to prevent ambiguous track assignments that would cause
    Storpheus to generate for the wrong instrument during composition.

    Args:
        workdir: The ``muse-work/`` directory to scan.
        track_filter: If given, only roles whose name contains this string
                      (case-insensitive) are evaluated.

    Returns:
        ValidationCheckResult with check name ``"no_duplicate_tracks"``.
    """
    issues: list[ValidationIssue] = []
    if not workdir.exists():
        return ValidationCheckResult(name="no_duplicate_tracks", passed=True, issues=[])

    from collections import defaultdict
    role_to_paths: dict[str, list[str]] = defaultdict(list)

    for midi_path in sorted(workdir.rglob("*.mid")) + sorted(workdir.rglob("*.midi")):
        if not midi_path.is_file():
            continue
        rel = midi_path.relative_to(workdir).as_posix()
        if track_filter and track_filter.lower() not in rel.lower():
            continue
        # Derive role: strip extension, strip trailing digits/underscores/hyphens
        stem = midi_path.stem.lower()
        role = re.sub(r"[_\-]?\d+$", "", stem)
        role_to_paths[role].append(rel)

    for role, paths in sorted(role_to_paths.items()):
        if len(paths) > 1:
            issues.append(
                ValidationIssue(
                    severity=ValidationSeverity.WARN,
                    check="no_duplicate_tracks",
                    path=", ".join(paths),
                    message=f"Duplicate instrument role '{role}' defined by {len(paths)} files.",
                )
            )
            logger.warning("⚠️ Duplicate track role: %s → %s", role, paths)

    return ValidationCheckResult(
        name="no_duplicate_tracks",
        passed=len(issues) == 0,
        issues=issues,
    )


# ---------------------------------------------------------------------------
# Section naming convention check
# ---------------------------------------------------------------------------

def check_section_naming(
    workdir: pathlib.Path,
    section_filter: str | None = None,
) -> ValidationCheckResult:
    """Verify that section subdirectories follow the expected naming convention.

    Section directories must match ``[a-z][a-z0-9_-]*`` (lowercase, starting
    with a letter, using only alphanumeric chars, hyphens, or underscores).
    This constraint ensures consistent referencing by AI agents and avoids
    shell quoting issues.

    Args:
        workdir: The ``muse-work/`` directory to scan.
        section_filter: If given, only directories whose name contains this
                        string (case-insensitive) are evaluated.

    Returns:
        ValidationCheckResult with check name ``"section_naming"``.
    """
    issues: list[ValidationIssue] = []
    if not workdir.exists():
        return ValidationCheckResult(name="section_naming", passed=True, issues=[])

    for entry in sorted(workdir.iterdir()):
        if not entry.is_dir():
            continue
        name = entry.name
        if section_filter and section_filter.lower() not in name.lower():
            continue
        if not _SECTION_NAME_RE.match(name):
            issues.append(
                ValidationIssue(
                    severity=ValidationSeverity.WARN,
                    check="section_naming",
                    path=name,
                    message=(
                        f"Section directory '{name}' does not follow naming convention "
                        f"[a-z][a-z0-9_-]* (lowercase, no spaces or uppercase letters)."
                    ),
                )
            )
            logger.warning("⚠️ Section naming violation: %s", name)

    return ValidationCheckResult(
        name="section_naming",
        passed=len(issues) == 0,
        issues=issues,
    )


# ---------------------------------------------------------------------------
# Emotion tags check
# ---------------------------------------------------------------------------

def check_emotion_tags(
    root: pathlib.Path,
    track_filter: str | None = None,
) -> ValidationCheckResult:
    """Verify that emotion tags in commit metadata are from the allowed vocabulary.

    Reads ``.muse/commit_metadata.json`` if present (written by ``muse tag``).
    Any tag not in :data:`ALLOWED_EMOTION_TAGS` is flagged as a warning so
    agents know they may be working with an unrecognised emotional label that
    Maestro's mood model has not been trained on.

    Args:
        root: Repository root.
        track_filter: Unused for this check (included for API symmetry).

    Returns:
        ValidationCheckResult with check name ``"emotion_tags"``.
    """
    issues: list[ValidationIssue] = []
    muse_dir = root / ".muse"
    tag_cache = muse_dir / "tags.json"

    if not tag_cache.exists():
        return ValidationCheckResult(name="emotion_tags", passed=True, issues=[])

    try:
        tags_data: object = json.loads(tag_cache.read_text())
    except (json.JSONDecodeError, OSError) as exc:
        issues.append(
            ValidationIssue(
                severity=ValidationSeverity.WARN,
                check="emotion_tags",
                path=".muse/tags.json",
                message=f"Cannot read tag cache: {exc}",
            )
        )
        return ValidationCheckResult(name="emotion_tags", passed=False, issues=issues)

    if not isinstance(tags_data, list):
        return ValidationCheckResult(name="emotion_tags", passed=True, issues=[])

    for entry in tags_data:
        if not isinstance(entry, dict):
            continue
        tag_name: object = entry.get("tag")
        if not isinstance(tag_name, str):
            continue
        tag_lower = tag_name.lower()
        if tag_lower not in ALLOWED_EMOTION_TAGS:
            issues.append(
                ValidationIssue(
                    severity=ValidationSeverity.WARN,
                    check="emotion_tags",
                    path=".muse/tags.json",
                    message=(
                        f"Emotion tag '{tag_name}' is not in the allowed vocabulary. "
                        f"Allowed: {', '.join(sorted(ALLOWED_EMOTION_TAGS))}"
                    ),
                )
            )
            logger.warning("⚠️ Unknown emotion tag: %s", tag_name)

    return ValidationCheckResult(
        name="emotion_tags",
        passed=len(issues) == 0,
        issues=issues,
    )


# ---------------------------------------------------------------------------
# Auto-fix: quantise slightly off-grid notes (stub — full impl requires mido)
# ---------------------------------------------------------------------------

def apply_fixes(
    workdir: pathlib.Path,
    issues: list[ValidationIssue],
) -> list[str]:
    """Apply automatic corrections for fixable issues.

    Currently supports:
    - Re-writing malformed MIDI files is not auto-fixable (data-loss risk).
    - Section naming: no auto-rename (would break references in other files).
    - Duplicate tracks: no auto-remove (ambiguous which to keep).

    The function is intentionally conservative — it only fixes issues that
    cannot cause data loss and where the correct fix is unambiguous.

    Args:
        workdir: The ``muse-work/`` working tree directory.
        issues: The full list of issues found during validation.

    Returns:
        List of human-readable strings describing each fix applied.
    """
    applied: list[str] = []

    # Future: quantise off-grid MIDI notes using mido when mido is available.
    # For now, emit an informational note if any fixable categories were found.
    fixable_checks = {"manifest_consistency"}
    fixable_issues = [i for i in issues if i.check in fixable_checks]
    if fixable_issues:
        logger.info(
            "⚠️ --fix: %d fixable issue(s) found but no auto-fix logic is "
            "implemented yet for check categories: %s",
            len(fixable_issues),
            {i.check for i in fixable_issues},
        )

    return applied


# ---------------------------------------------------------------------------
# Orchestrator
# ---------------------------------------------------------------------------

def run_validate(
    root: pathlib.Path,
    *,
    strict: bool = False,
    track_filter: str | None = None,
    section_filter: str | None = None,
    auto_fix: bool = False,
) -> MuseValidateResult:
    """Run all integrity checks against the working tree at *root*.

    This is the single entry point for the validate subsystem. It runs
    checks in dependency order and aggregates results into a single
    :class:`MuseValidateResult`.

    Args:
        root: Repository root (contains ``.muse/`` and ``muse-work/``).
        strict: Treat WARN-severity issues as fatal (exit 2 in CLI).
        track_filter: Restrict checks to files/paths containing this string.
        section_filter: Restrict section-naming check to dirs matching this.
        auto_fix: Attempt to auto-correct fixable issues before reporting.

    Returns:
        MuseValidateResult with all check outcomes and any fixes applied.
    """
    workdir = root / "muse-work"

    check_results: list[ValidationCheckResult] = [
        check_midi_integrity(workdir, track_filter=track_filter),
        check_manifest_consistency(root, track_filter=track_filter),
        check_no_duplicate_tracks(workdir, track_filter=track_filter),
        check_section_naming(workdir, section_filter=section_filter),
        check_emotion_tags(root, track_filter=track_filter),
    ]

    all_issues: list[ValidationIssue] = [
        issue for result in check_results for issue in result.issues
    ]

    fixes_applied: list[str] = []
    if auto_fix and all_issues:
        fixes_applied = apply_fixes(workdir, all_issues)

    has_errors = any(i.severity == ValidationSeverity.ERROR for i in all_issues)
    has_warnings = any(i.severity == ValidationSeverity.WARN for i in all_issues)
    clean = not has_errors and not has_warnings

    logger.info(
        "✅ muse validate: %d check(s), errors=%s, warnings=%s",
        len(check_results),
        has_errors,
        has_warnings,
    )

    return MuseValidateResult(
        clean=clean,
        has_errors=has_errors,
        has_warnings=has_warnings,
        checks=check_results,
        fixes_applied=fixes_applied,
    )