cgcardona / muse public
schema.py python
224 lines 8.3 KB
45fd2148 fix: config and versioning audit — TOML attributes, v0.1.1, no Phase N labels Gabriel Cardona <cgcardona@gmail.com> 2d ago
1 """Domain schema declaration types.
2
3 A plugin implements :meth:`~muse.domain.MuseDomainPlugin.schema` returning a
4 :class:`DomainSchema` to declare the structural shape of its data. The core
5 engine uses this declaration to:
6
7 1. Select the correct diff algorithm for each dimension via
8 :func:`~muse.core.diff_algorithms.diff_by_schema`.
9 2. Provide informed conflict messages (citing dimension names) during OT merge.
10 3. Route to CRDT convergent join when ``merge_mode`` is ``"crdt"``.
11
12 Every schema type is a ``TypedDict`` — JSON-serialisable, zero-``Any``, and
13 verifiable by mypy in strict mode.
14
15 CRDT dimension spec
16 -------------------
17 :class:`CRDTDimensionSpec` declares which CRDT primitive a dimension uses when
18 ``DomainSchema.merge_mode`` is ``"crdt"``. Plugins that mix three-way and
19 CRDT semantics per-dimension use :class:`CRDTDimensionSpec` for their CRDT
20 dimensions and :class:`DimensionSpec` for their three-way dimensions; both are
21 listed in :class:`DomainSchema`.
22
23 Design note on ``MapSchema.value_schema``
24 -----------------------------------------
25 ``MapSchema.value_schema`` carries the type ``ElementSchema``, which is
26 defined *after* ``MapSchema`` in this file. With ``from __future__ import
27 annotations`` all annotations are evaluated lazily, so this forward reference
28 is resolved correctly by both the Python runtime and mypy.
29 """
30 from __future__ import annotations
31
32 from typing import Literal, TypedDict
33
34
35 # ---------------------------------------------------------------------------
36 # Element schema types — one per structural primitive
37 # ---------------------------------------------------------------------------
38
39
40 class SequenceSchema(TypedDict):
41 """Ordered sequence of homogeneous elements (LCS-diffable).
42
43 Use for any domain data that is fundamentally a list: note events in a
44 MIDI track, nucleotides in a DNA strand, frames in an animation.
45
46 ``diff_algorithm`` selects the variant of LCS:
47 - ``"lcs"`` — classic O(nm) LCS, minimal insertions and deletions.
48 - ``"myers"`` — O(nd) Myers algorithm, same semantics, faster for low
49 edit distance (this is what Git uses).
50 - ``"patience"`` — patience-sort variant, produces more human-readable
51 diffs for sequences with many repeated elements.
52 """
53
54 kind: Literal["sequence"]
55 element_type: str
56 identity: Literal["by_id", "by_position", "by_content"]
57 diff_algorithm: Literal["lcs", "myers", "patience"]
58 alphabet: list[str] | None
59
60
61 class TreeSchema(TypedDict):
62 """Hierarchical labeled ordered tree (tree-edit-diffable).
63
64 Use for domain data with parent-child relationships: scene graphs, XML /
65 AST nodes, track hierarchies in a DAW.
66
67 ``diff_algorithm`` selects the tree edit algorithm:
68 - ``"zhang_shasha"`` — Zhang-Shasha 1989 O(n²m) minimum edit distance.
69 - ``"gumtree"`` — GumTree heuristic, better for large ASTs.
70 """
71
72 kind: Literal["tree"]
73 node_type: str
74 diff_algorithm: Literal["zhang_shasha", "gumtree"]
75
76
77 class TensorSchema(TypedDict):
78 """N-dimensional numerical array (sparse-numerical-diffable).
79
80 Use for simulation state, velocity curves, weight matrices, voxel grids.
81 Floating-point drift below ``epsilon`` is *not* considered a change.
82
83 ``diff_mode`` controls the output granularity:
84 - ``"sparse"`` — one ``ReplaceOp`` per changed element.
85 - ``"block"`` — groups adjacent changes into contiguous range ops.
86 - ``"full"`` — one ``ReplaceOp`` for the entire array if anything changed.
87 """
88
89 kind: Literal["tensor"]
90 dtype: Literal["float32", "float64", "int8", "int16", "int32", "int64"]
91 rank: int
92 epsilon: float
93 diff_mode: Literal["sparse", "block", "full"]
94
95
96 class SetSchema(TypedDict):
97 """Unordered collection of unique elements (set-algebra-diffable).
98
99 Use for collections where order is irrelevant: a set of files, a set of
100 annotations, a set of material IDs in a 3D scene.
101
102 ``identity`` determines what makes two elements "the same":
103 - ``"by_content"`` — SHA-256 of content (structural equality).
104 - ``"by_id"`` — stable element ID (e.g. UUID).
105 """
106
107 kind: Literal["set"]
108 element_type: str
109 identity: Literal["by_content", "by_id"]
110
111
112 class MapSchema(TypedDict):
113 """Key-value map with known or dynamic keys.
114
115 Use for dictionaries where both key and value structure matter: a map of
116 chromosome name → nucleotide sequence, or annotation key → quality scores.
117
118 ``value_schema`` is itself an ``ElementSchema``, allowing recursive
119 declarations (e.g. a map of sequences, a map of trees).
120 """
121
122 kind: Literal["map"]
123 key_type: str
124 value_schema: ElementSchema # forward reference — resolved lazily
125 identity: Literal["by_key"]
126
127
128 #: Union of all element schema types.
129 #: This is the type of ``DimensionSpec.schema`` and ``DomainSchema.top_level``.
130 ElementSchema = SequenceSchema | TreeSchema | TensorSchema | MapSchema | SetSchema
131
132
133 # ---------------------------------------------------------------------------
134 # Dimension spec — a named semantic sub-dimension
135 # ---------------------------------------------------------------------------
136
137
138 class DimensionSpec(TypedDict):
139 """A named semantic sub-dimension of the domain's state.
140
141 Domains are multi-dimensional. Music has melodic, harmonic, dynamic, and
142 structural dimensions. Genomics has coding regions, regulatory elements,
143 and metadata dimensions. 3D spatial design has geometry, materials,
144 lighting, and animation dimensions.
145
146 Each dimension can use a different element schema and diff algorithm.
147 The OT merge engine merges independent dimensions in parallel
148 without blocking on each other.
149
150 ``independent_merge`` — when ``True``, a conflict in this dimension does
151 not block merging other dimensions. When ``False`` (e.g. structural changes
152 in a DAW session), all dimensions must wait for this one to resolve.
153 """
154
155 name: str
156 description: str
157 schema: ElementSchema
158 independent_merge: bool
159
160
161 # ---------------------------------------------------------------------------
162 # CRDT per-dimension schema
163 # ---------------------------------------------------------------------------
164
165 #: The CRDT primitive types available for a dimension.
166 CRDTPrimitive = Literal["lww_register", "or_set", "rga", "aw_map", "g_counter"]
167
168
169 class CRDTDimensionSpec(TypedDict):
170 """Schema for a single dimension that uses CRDT convergent merge semantics.
171
172 Plugins declare a ``CRDTDimensionSpec`` for each dimension they want the
173 core engine to merge via :meth:`~muse.domain.CRDTPlugin.join` rather than
174 the three-way merge path.
175
176 ``crdt_type`` selects the primitive:
177
178 - ``"lww_register"`` — scalar, last-write-wins (timestamps).
179 - ``"or_set"`` — unordered set, adds win over concurrent removes.
180 - ``"rga"`` — ordered sequence (collaborative text / note editing).
181 - ``"aw_map"`` — key-value map, adds win.
182 - ``"g_counter"`` — monotonically increasing integer counter.
183
184 ``independent_merge`` mirrors :class:`DimensionSpec`: when ``True``,
185 conflicts in other dimensions do not block this one.
186 """
187
188 name: str
189 description: str
190 crdt_type: CRDTPrimitive
191 independent_merge: bool
192
193
194 # ---------------------------------------------------------------------------
195 # Top-level domain schema
196 # ---------------------------------------------------------------------------
197
198
199 class DomainSchema(TypedDict):
200 """Complete structural declaration for a domain plugin.
201
202 Returned by :meth:`~muse.domain.MuseDomainPlugin.schema`. The core engine
203 reads this once at plugin registration time.
204
205 ``top_level`` declares the primary collection structure (e.g. a set of
206 files for music, a map of chromosome sequences for genomics).
207
208 ``dimensions`` declares the semantic sub-dimensions. The merge engine
209 The OT merge engine uses these to determine which changes can be merged independently.
210
211 ``merge_mode`` controls the merge strategy:
212 - ``"three_way"`` — standard three-way merge (Phases 1–3).
213 - ``"crdt"`` — convergent CRDT join.
214
215 ``schema_version`` tracks the schema format for future migrations.
216 It is always ``1``.
217 """
218
219 domain: str
220 description: str
221 dimensions: list[DimensionSpec]
222 top_level: ElementSchema
223 merge_mode: Literal["three_way", "crdt"]
224 schema_version: Literal[1]