xml_safe.py
python
| 1 | """Typed safe XML parsing adapter. |
| 2 | |
| 3 | Wraps ``defusedxml`` behind a typed interface so the rest of Muse can use |
| 4 | ``SafeET.parse()`` with full type information and no suppression comments. |
| 5 | |
| 6 | ``defusedxml`` does not ship type stubs, so importing it directly would |
| 7 | require a suppression comment (banned by the project's zero-ignore rule). |
| 8 | This module contains the single, justified crossing of the typed/untyped |
| 9 | boundary and presents a fully-typed surface to callers. |
| 10 | |
| 11 | Only ``parse()`` is exposed — the sole function we use from defusedxml. |
| 12 | All other ElementTree functionality (``Element``, ``iterparse``, etc.) is |
| 13 | re-exported from the stdlib ``xml.etree.ElementTree``, which is fully typed. |
| 14 | """ |
| 15 | |
| 16 | from __future__ import annotations |
| 17 | |
| 18 | import xml.etree.ElementTree as _StdET |
| 19 | from pathlib import Path |
| 20 | from xml.etree.ElementTree import Element, ElementTree, ParseError |
| 21 | |
| 22 | |
| 23 | def _defuse_parse(source: str | Path) -> ElementTree: |
| 24 | """Parse an XML file through defusedxml to block entity expansion attacks. |
| 25 | |
| 26 | defusedxml raises ``defusedxml.DTDForbidden``, ``defusedxml.EntitiesForbidden``, |
| 27 | etc. on malicious XML. These are all subclasses of ``xml.etree.ElementTree.ParseError`` |
| 28 | so callers can catch ``ParseError`` generically. |
| 29 | """ |
| 30 | import defusedxml.ElementTree as _dxml # noqa: PLC0415 (local import intentional) |
| 31 | |
| 32 | return _dxml.parse(str(source)) |
| 33 | |
| 34 | |
| 35 | class SafeET: |
| 36 | """Namespace class — use ``SafeET.parse()`` as a drop-in for ``ET.parse()``.""" |
| 37 | |
| 38 | @staticmethod |
| 39 | def parse(source: str | Path) -> ElementTree: |
| 40 | """Return an :class:`xml.etree.ElementTree.ElementTree` parsed safely.""" |
| 41 | return _defuse_parse(source) |
| 42 | |
| 43 | # Re-export stdlib types so callers do not need to import xml.etree.ElementTree |
| 44 | # separately. |
| 45 | ParseError = ParseError |
| 46 | Element = Element |
| 47 | ElementTree = ElementTree |
| 48 | |
| 49 | |
| 50 | __all__ = ["SafeET"] |