"""Stress tests for the generic query engine and code query DSL.

Covers:
- walk_history on linear chains of 100+ commits.
- CommitEvaluator with correct 3-arg signature.
- format_matches output format.
- Code query DSL: all field types, all operators, AND/OR composition.
- Code query DSL: unknown field raises ValueError.
- Query against large history (200 commits).
- Branch-scoped queries.
"""

import datetime
import pathlib

import pytest

from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history
from muse.core.store import CommitRecord, write_commit
from muse.domain import SemVerBump
from muse.plugins.code._code_query import build_evaluator


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _now() -> datetime.datetime:
    return datetime.datetime.now(datetime.timezone.utc)


def _write(
    root: pathlib.Path,
    cid: str,
    branch: str = "main",
    parent: str | None = None,
    author: str = "alice",
    agent_id: str = "",
    model_id: str = "",
    sem_ver_bump: SemVerBump = "none",
    message: str = "",
) -> CommitRecord:
    c = CommitRecord(
        commit_id=cid,
        repo_id="repo",
        branch=branch,
        snapshot_id=f"snap-{cid}",
        message=message or f"commit {cid}",
        committed_at=_now(),
        parent_commit_id=parent,
        author=author,
        agent_id=agent_id,
        model_id=model_id,
        sem_ver_bump=sem_ver_bump,
    )
    write_commit(root, c)
    ref = root / ".muse" / "refs" / "heads" / branch
    ref.write_text(cid)
    return c


def _make_match(commit: CommitRecord) -> QueryMatch:
    return QueryMatch(
        commit_id=commit.commit_id,
        author=commit.author,
        committed_at=commit.committed_at.isoformat(),
        branch=commit.branch,
        detail=f"matched commit {commit.commit_id}",
    )


@pytest.fixture
def repo(tmp_path: pathlib.Path) -> pathlib.Path:
    muse = tmp_path / ".muse"
    (muse / "commits").mkdir(parents=True)
    (muse / "refs" / "heads").mkdir(parents=True)
    return tmp_path


# ===========================================================================
# walk_history — basic
# ===========================================================================


class TestWalkHistoryBasic:
    def test_empty_history_no_matches(self, repo: pathlib.Path) -> None:
        def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            return [_make_match(commit)]
        result = walk_history(repo, "nonexistent-branch", ev)
        assert result == []

    def test_single_commit_matches(self, repo: pathlib.Path) -> None:
        _write(repo, "only", branch="main")
        def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            return [_make_match(commit)]
        result = walk_history(repo, "main", ev)
        assert len(result) == 1
        assert result[0]["commit_id"] == "only"

    def test_single_commit_no_match(self, repo: pathlib.Path) -> None:
        _write(repo, "only", branch="main")
        def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            return []
        result = walk_history(repo, "main", ev)
        assert result == []

    def test_linear_chain_all_match(self, repo: pathlib.Path) -> None:
        prev = None
        for i in range(10):
            cid = f"c{i:03d}"
            _write(repo, cid, parent=prev)
            prev = cid
        def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            return [_make_match(commit)]
        result = walk_history(repo, "main", ev)
        assert len(result) == 10

    def test_linear_chain_filtered(self, repo: pathlib.Path) -> None:
        prev = None
        for i in range(10):
            cid = f"c{i:03d}"
            author = "alice" if i % 2 == 0 else "bob"
            _write(repo, cid, parent=prev, author=author)
            prev = cid

        def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            if commit.author == "alice":
                return [_make_match(commit)]
            return []

        result = walk_history(repo, "main", ev)
        assert len(result) == 5

    def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None:
        prev = None
        for i in range(50):
            cid = f"c{i:03d}"
            _write(repo, cid, parent=prev)
            prev = cid
        def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            return [_make_match(commit)]
        result = walk_history(repo, "main", ev, max_commits=10)
        assert len(result) == 10

    def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None:
        _write(repo, "abc123", branch="main", author="alice")
        def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            return [_make_match(commit)]
        result = walk_history(repo, "main", ev)
        assert result[0]["commit_id"] == "abc123"
        assert result[0]["branch"] == "main"
        assert result[0]["author"] == "alice"


# ===========================================================================
# walk_history — large history
# ===========================================================================


class TestWalkHistoryLarge:
    def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None:
        prev = None
        for i in range(200):
            cid = f"large-{i:04d}"
            _write(repo, cid, parent=prev, agent_id="bot" if i % 3 == 0 else "")
            prev = cid

        def bot_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            if commit.agent_id == "bot":
                return [_make_match(commit)]
            return []

        result = walk_history(repo, "main", bot_only)
        # 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits.
        assert len(result) == 67

    def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None:
        prev = None
        for i in range(100):
            cid = f"agent-test-{i:04d}"
            agent = f"agent-{i % 5}"
            _write(repo, cid, parent=prev, agent_id=agent)
            prev = cid

        def agent_0_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
            if commit.agent_id == "agent-0":
                return [_make_match(commit)]
            return []

        result = walk_history(repo, "main", agent_0_only)
        assert len(result) == 20  # 100 / 5 = 20


# ===========================================================================
# format_matches
# ===========================================================================


class TestFormatMatches:
    def test_empty_matches_produces_output(self) -> None:
        out = format_matches([])
        assert isinstance(out, str)

    def test_single_match_includes_commit_id(self) -> None:
        match = QueryMatch(
            commit_id="a" * 64,
            branch="main",
            author="alice",
            committed_at=_now().isoformat(),
            detail="test match",
        )
        out = format_matches([match])
        assert "aaaaaaaa" in out

    def test_multiple_matches_all_present(self) -> None:
        matches = [
            QueryMatch(
                commit_id=f"id{i:04d}",
                branch="main",
                author="alice",
                committed_at=_now().isoformat(),
                detail="matched",
            )
            for i in range(5)
        ]
        out = format_matches(matches)
        for i in range(5):
            assert f"id{i:04d}" in out


# ===========================================================================
# Code query DSL — build_evaluator
# ===========================================================================


class TestCodeQueryDSL:
    # --- author field ---

    def test_author_equals(self, repo: pathlib.Path) -> None:
        _write(repo, "a1", author="alice")
        _write(repo, "a2", author="bob", parent="a1")
        evaluator = build_evaluator("author == 'alice'")
        result = walk_history(repo, "main", evaluator)
        assert any(m["commit_id"] == "a1" for m in result)
        assert not any(m["commit_id"] == "a2" for m in result)

    def test_author_not_equals(self, repo: pathlib.Path) -> None:
        _write(repo, "b1", author="alice")
        _write(repo, "b2", author="bob", parent="b1")
        evaluator = build_evaluator("author != 'alice'")
        result = walk_history(repo, "main", evaluator)
        assert all(m["author"] != "alice" for m in result)

    def test_author_contains(self, repo: pathlib.Path) -> None:
        _write(repo, "c1", author="alice-smith")
        _write(repo, "c2", author="bob-jones", parent="c1")
        evaluator = build_evaluator("author contains 'alice'")
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 1
        assert "alice" in result[0]["author"]

    def test_author_startswith(self, repo: pathlib.Path) -> None:
        _write(repo, "d1", author="agent-claude")
        _write(repo, "d2", author="human-alice", parent="d1")
        evaluator = build_evaluator("author startswith 'agent'")
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 1
        assert result[0]["author"].startswith("agent")

    # --- agent_id field ---

    def test_agent_id_equals(self, repo: pathlib.Path) -> None:
        _write(repo, "e1", agent_id="claude-v4")
        _write(repo, "e2", agent_id="gpt-4o", parent="e1")
        evaluator = build_evaluator("agent_id == 'claude-v4'")
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 1
        assert result[0]["commit_id"] == "e1"

    # --- sem_ver_bump field ---

    def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None:
        _write(repo, "f1", sem_ver_bump="major")
        _write(repo, "f2", sem_ver_bump="minor", parent="f1")
        _write(repo, "f3", sem_ver_bump="patch", parent="f2")
        evaluator = build_evaluator("sem_ver_bump == 'major'")
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 1

    # --- model_id field ---

    def test_model_id_contains(self, repo: pathlib.Path) -> None:
        _write(repo, "g1", model_id="claude-3-5-sonnet-20241022")
        _write(repo, "g2", model_id="gpt-4o-2024-08-06", parent="g1")
        evaluator = build_evaluator("model_id contains 'claude'")
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 1

    # --- AND composition ---

    def test_and_composition(self, repo: pathlib.Path) -> None:
        _write(repo, "h1", author="alice", agent_id="bot-1")
        _write(repo, "h2", author="alice", agent_id="bot-2", parent="h1")
        _write(repo, "h3", author="bob", agent_id="bot-1", parent="h2")
        evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'")
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 1
        assert result[0]["commit_id"] == "h1"

    # --- OR composition ---

    def test_or_composition(self, repo: pathlib.Path) -> None:
        _write(repo, "i1", author="alice")
        _write(repo, "i2", author="bob", parent="i1")
        _write(repo, "i3", author="charlie", parent="i2")
        evaluator = build_evaluator("author == 'alice' or author == 'bob'")
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 2

    # --- complex nested AND OR ---

    def test_complex_and_or(self, repo: pathlib.Path) -> None:
        _write(repo, "j1", author="alice", sem_ver_bump="major")
        _write(repo, "j2", author="bob", sem_ver_bump="minor", parent="j1")
        _write(repo, "j3", author="alice", sem_ver_bump="patch", parent="j2")
        evaluator = build_evaluator(
            "sem_ver_bump == 'major' or sem_ver_bump == 'minor'"
        )
        result = walk_history(repo, "main", evaluator)
        assert len(result) == 2

    # --- error cases ---

    def test_unknown_field_raises_value_error(self) -> None:
        with pytest.raises(ValueError):
            build_evaluator("unknown_field == 'something'")

    def test_unknown_operator_raises_value_error(self) -> None:
        with pytest.raises(ValueError):
            build_evaluator("author REGEX 'alice'")

    def test_empty_query_raises(self) -> None:
        with pytest.raises((ValueError, IndexError)):
            build_evaluator("")

    # --- branch field ---

    def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None:
        _write(repo, "k1", branch="main", author="alice")
        evaluator = build_evaluator("branch == 'main'")
        result = walk_history(repo, "main", evaluator)
        assert all(m["branch"] == "main" for m in result)