Initial commit: Linux Transcriber app with multi-language and auto-detection support

2026-06-04 17:02:35 +02:00
commit 3439c0d90d
4038 changed files with 586721 additions and 0 deletions
--- a/venv/lib/python3.12/site-packages/idna/init.py
+++ b/venv/lib/python3.12/site-packages/idna/init.py
@@ -0,0 +1,45 @@
+from .core import (
+    IDNABidiError,
+    IDNAError,
+    InvalidCodepoint,
+    InvalidCodepointContext,
+    alabel,
+    check_bidi,
+    check_hyphen_ok,
+    check_initial_combiner,
+    check_label,
+    check_nfc,
+    decode,
+    encode,
+    ulabel,
+    uts46_remap,
+    valid_contextj,
+    valid_contexto,
+    valid_label_length,
+    valid_string_length,
+)
+from .intranges import intranges_contain
+from .package_data import __version__
+
+__all__ = [
+    "__version__",
+    "IDNABidiError",
+    "IDNAError",
+    "InvalidCodepoint",
+    "InvalidCodepointContext",
+    "alabel",
+    "check_bidi",
+    "check_hyphen_ok",
+    "check_initial_combiner",
+    "check_label",
+    "check_nfc",
+    "decode",
+    "encode",
+    "intranges_contain",
+    "ulabel",
+    "uts46_remap",
+    "valid_contextj",
+    "valid_contexto",
+    "valid_label_length",
+    "valid_string_length",
+]
--- a/venv/lib/python3.12/site-packages/idna/main.py
+++ b/venv/lib/python3.12/site-packages/idna/main.py
@@ -0,0 +1,6 @@
+import sys
+
+from .cli import main
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/venv/lib/python3.12/site-packages/idna/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/main.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/main.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/cli.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/cli.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/codec.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/codec.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/compat.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/compat.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/core.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/core.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/idnadata.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/idnadata.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/intranges.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/intranges.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/package_data.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/package_data.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/pycache/uts46data.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/idna/pycache/uts46data.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/idna/cli.py
+++ b/venv/lib/python3.12/site-packages/idna/cli.py
@@ -0,0 +1,128 @@
+"""Command-line interface for the :mod:`idna` package.
+
+Invoked via ``python -m idna``. See :func:`main` for the entry point.
+"""
+
+import argparse
+import sys
+from collections.abc import Iterable
+from itertools import chain
+from typing import IO, Optional
+
+from . import IDNAError, decode, encode
+from .core import _alabel_prefix, _unicode_dots_re
+from .package_data import __version__
+
+
+def _looks_like_alabel(s: str) -> bool:
+    """Return True if any label in ``s`` carries the ``xn--`` ACE prefix."""
+    prefix = _alabel_prefix.decode("ascii")
+    return any(label.lower().startswith(prefix) for label in _unicode_dots_re.split(s))
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="python -m idna",
+        description=(
+            "Convert a domain name between its Unicode (U-label) and "
+            "ASCII-compatible (A-label) forms. With no mode flag, the "
+            "direction is chosen from the first input — if it contains "
+            "an xn-- label the stream is decoded, otherwise it is "
+            "encoded — and the same mode is applied to every remaining "
+            "input. UTS #46 mapping is applied by default; pass "
+            "--strict to disable it. When no domains are given on the "
+            "command line and stdin is piped, one domain per line is "
+            "read from stdin."
+        ),
+    )
+    mode = parser.add_mutually_exclusive_group()
+    mode.add_argument(
+        "-e",
+        "--encode",
+        dest="mode",
+        action="store_const",
+        const="encode",
+        help="Encode the input to its ASCII A-label form.",
+    )
+    mode.add_argument(
+        "-d",
+        "--decode",
+        dest="mode",
+        action="store_const",
+        const="decode",
+        help="Decode the input from its ASCII A-label form.",
+    )
+    parser.add_argument(
+        "--strict",
+        action="store_true",
+        help="Disable the default UTS #46 mapping and apply IDNA 2008 rules verbatim.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"idna {__version__}",
+    )
+    parser.add_argument(
+        "domain",
+        nargs="*",
+        help="One or more domain names to convert. Omit to read from stdin.",
+    )
+    return parser
+
+
+def _iter_stdin(stream: IO[str]) -> Iterable[str]:
+    """Yield non-empty stripped lines from ``stream``, ignoring blanks."""
+    for line in stream:
+        stripped = line.strip()
+        if stripped:
+            yield stripped
+
+
+def _convert_one(domain: str, mode: str, uts46: bool) -> bool:
+    """Convert ``domain`` and write the result; return ``False`` on failure."""
+    try:
+        if mode == "decode":
+            print(decode(domain, uts46=uts46))
+        else:
+            print(encode(domain, uts46=uts46).decode("ascii"))
+    except IDNAError as err:
+        print(f"idna: {mode} failed for {domain!r}: {err}", file=sys.stderr)
+        return False
+    return True
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    """Entry point for ``python -m idna``.
+
+    When more than one domain is supplied (via positional arguments or
+    piped stdin) and no mode flag is given, the first input determines
+    the direction and that mode is applied uniformly to the rest.
+
+    :param argv: Argument list excluding the program name. Defaults to
+        :data:`sys.argv` when ``None``.
+    :returns: ``0`` on success, ``1`` if any conversion fails.
+    """
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+    uts46 = not args.strict
+
+    if args.domain:
+        domains: Iterable[str] = args.domain
+    elif not sys.stdin.isatty():
+        domains = _iter_stdin(sys.stdin)
+    else:
+        parser.error("a domain argument is required when stdin is a terminal")
+
+    iterator = iter(domains)
+    first = next(iterator, None)
+    if first is None:
+        return 0
+
+    mode = args.mode or ("decode" if _looks_like_alabel(first) else "encode")
+
+    results = [_convert_one(domain, mode, uts46) for domain in chain([first], iterator)]
+    return 0 if all(results) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/venv/lib/python3.12/site-packages/idna/codec.py
+++ b/venv/lib/python3.12/site-packages/idna/codec.py
@@ -0,0 +1,159 @@
+import codecs
+from typing import Any, Optional
+
+from .core import IDNAError, _unicode_dots_re, alabel, decode, encode, ulabel
+
+
+class Codec(codecs.Codec):
+    """Stateless IDNA 2008 codec.
+
+    Implements the :class:`codecs.Codec` protocol so that the whole-domain
+    encoder (:func:`idna.encode`) and decoder (:func:`idna.decode`) are
+    accessible through the standard codec machinery as ``"idna2008"``.
+
+    Only the ``"strict"`` error handler is supported; any other handler
+    raises :exc:`~idna.IDNAError`.
+    """
+
+    def encode(self, data: str, errors: str = "strict") -> tuple[bytes, int]:  # ty: ignore[invalid-method-override]
+        if errors != "strict":
+            raise IDNAError(f'Unsupported error handling "{errors}"')
+
+        if not data:
+            return b"", 0
+
+        return encode(data), len(data)
+
+    def decode(self, data: bytes, errors: str = "strict") -> tuple[str, int]:  # ty: ignore[invalid-method-override]
+        if errors != "strict":
+            raise IDNAError(f'Unsupported error handling "{errors}"')
+
+        if not data:
+            return "", 0
+
+        return decode(data), len(data)
+
+
+class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
+    """Incremental IDNA 2008 encoder.
+
+    Buffers a partial trailing label across calls until either the next
+    label separator is seen or ``final=True``, so that streamed input is
+    encoded one whole label at a time. Any of the four Unicode label
+    separators (``U+002E``, ``U+3002``, ``U+FF0E``, ``U+FF61``) ends a
+    label; the result always uses ``U+002E`` as the separator.
+
+    Only the ``"strict"`` error handler is supported.
+    """
+
+    def _buffer_encode(self, data: str, errors: str, final: bool) -> tuple[bytes, int]:  # ty: ignore[invalid-method-override]
+        if errors != "strict":
+            raise IDNAError(f'Unsupported error handling "{errors}"')
+
+        if not data:
+            return b"", 0
+
+        labels = _unicode_dots_re.split(data)
+        trailing_dot = b""
+        if labels:
+            if not labels[-1]:
+                trailing_dot = b"."
+                del labels[-1]
+            elif not final:
+                # Keep potentially unfinished label until the next call
+                del labels[-1]
+                if labels:
+                    trailing_dot = b"."
+
+        result = []
+        size = 0
+        for label in labels:
+            result.append(alabel(label))
+            if size:
+                size += 1
+            size += len(label)
+
+        # Join with U+002E
+        result_bytes = b".".join(result) + trailing_dot
+        size += len(trailing_dot)
+        return result_bytes, size
+
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+    """Incremental IDNA 2008 decoder.
+
+    Buffers a partial trailing label across calls until either the next
+    label separator is seen or ``final=True``, so that streamed input is
+    decoded one whole label at a time.
+
+    Only the ``"strict"`` error handler is supported.
+    """
+
+    def _buffer_decode(self, data: Any, errors: str, final: bool) -> tuple[str, int]:  # ty: ignore[invalid-method-override]
+        if errors != "strict":
+            raise IDNAError(f'Unsupported error handling "{errors}"')
+
+        if not data:
+            return ("", 0)
+
+        if not isinstance(data, str):
+            data = str(data, "ascii")
+
+        labels = _unicode_dots_re.split(data)
+        trailing_dot = ""
+        if labels:
+            if not labels[-1]:
+                trailing_dot = "."
+                del labels[-1]
+            elif not final:
+                # Keep potentially unfinished label until the next call
+                del labels[-1]
+                if labels:
+                    trailing_dot = "."
+
+        result = []
+        size = 0
+        for label in labels:
+            result.append(ulabel(label))
+            if size:
+                size += 1
+            size += len(label)
+
+        result_str = ".".join(result) + trailing_dot
+        size += len(trailing_dot)
+        return (result_str, size)
+
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    pass
+
+
+class StreamReader(Codec, codecs.StreamReader):
+    pass
+
+
+def search_function(name: str) -> Optional[codecs.CodecInfo]:
+    """Codec search function registered with :mod:`codecs`.
+
+    Returns a :class:`codecs.CodecInfo` for the ``"idna2008"`` codec name
+    so that ``str.encode("idna2008")`` and ``bytes.decode("idna2008")``
+    invoke the IDNA 2008 codec defined in this module.
+
+    :param name: The codec name being looked up.
+    :returns: A :class:`codecs.CodecInfo` instance if ``name`` is
+        ``"idna2008"``, otherwise ``None``.
+    """
+    if name != "idna2008":
+        return None
+    return codecs.CodecInfo(
+        name=name,
+        encode=Codec().encode,
+        decode=Codec().decode,  # type: ignore
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
+
+
+codecs.register(search_function)
--- a/venv/lib/python3.12/site-packages/idna/compat.py
+++ b/venv/lib/python3.12/site-packages/idna/compat.py
@@ -0,0 +1,41 @@
+from typing import Any, Union
+
+from .core import decode, encode
+
+
+def ToASCII(label: str) -> bytes:
+    """Compatibility shim for :rfc:`3490` ``ToASCII``.
+
+    Delegates to :func:`idna.encode` (IDNA 2008). Provided to ease porting
+    of code written against the legacy :mod:`encodings.idna` API; new code
+    should call :func:`idna.encode` directly.
+
+    :param label: The label or domain to encode.
+    :returns: The encoded form as ASCII :class:`bytes`.
+    """
+    return encode(label)
+
+
+def ToUnicode(label: Union[bytes, bytearray]) -> str:
+    """Compatibility shim for :rfc:`3490` ``ToUnicode``.
+
+    Delegates to :func:`idna.decode` (IDNA 2008). Provided to ease porting
+    of code written against the legacy :mod:`encodings.idna` API; new code
+    should call :func:`idna.decode` directly.
+
+    :param label: The label or domain to decode.
+    :returns: The decoded Unicode form.
+    """
+    return decode(label)
+
+
+def nameprep(s: Any) -> None:
+    """Stub for :rfc:`3491` Nameprep, which is not used by IDNA 2008.
+
+    IDNA 2008 (:rfc:`5891`) replaces Nameprep with the per-codepoint
+    validity classes from :rfc:`5892`; this function exists only to
+    return a clear error if legacy code attempts to call it.
+
+    :raises NotImplementedError: Always.
+    """
+    raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")
--- a/venv/lib/python3.12/site-packages/idna/core.py
+++ b/venv/lib/python3.12/site-packages/idna/core.py
@@ -0,0 +1,648 @@
+import bisect
+import re
+import unicodedata
+import warnings
+from typing import Optional, Union
+
+from . import idnadata
+from .intranges import intranges_contain
+
+_virama_combining_class = 9
+_alabel_prefix = b"xn--"
+_max_input_length = 1024
+_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
+
+
+# Bidi category sets from RFC 5893, hoisted out of the per-codepoint loop
+_bidi_rtl_first = frozenset({"R", "AL"})
+_bidi_rtl_categories = frozenset({"R", "AL", "AN"})
+_bidi_rtl_allowed = frozenset({"R", "AL", "AN", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"})
+_bidi_rtl_valid_ending = frozenset({"R", "AL", "EN", "AN"})
+_bidi_rtl_numeric = frozenset({"AN", "EN"})
+_bidi_ltr_allowed = frozenset({"L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"})
+_bidi_ltr_valid_ending = frozenset({"L", "EN"})
+_bidi_joiner_l_or_d = frozenset({"L", "D"})
+_bidi_joiner_r_or_d = frozenset({"R", "D"})
+
+
+def _joining_type(cp: int) -> Optional[str]:
+    for jt, ranges in idnadata.joining_types.items():
+        if intranges_contain(cp, ranges):
+            return jt
+    return None
+
+
+class IDNAError(UnicodeError):
+    """Base exception for all IDNA-encoding related problems"""
+
+
+class IDNABidiError(IDNAError):
+    """Exception when bidirectional requirements are not satisfied"""
+
+
+class InvalidCodepoint(IDNAError):
+    """Exception when a disallowed or unallocated codepoint is used"""
+
+
+class InvalidCodepointContext(IDNAError):
+    """Exception when the codepoint is not valid in the context it is used"""
+
+
+def _combining_class(cp: int) -> int:
+    v = unicodedata.combining(chr(cp))
+    if v == 0 and not unicodedata.name(chr(cp)):
+        raise ValueError("Unknown character in unicodedata")
+    return v
+
+
+def _is_script(cp: str, script: str) -> bool:
+    return intranges_contain(ord(cp), idnadata.scripts[script])
+
+
+def _punycode(s: str) -> bytes:
+    return s.encode("punycode")
+
+
+def _unot(s: int) -> str:
+    return f"U+{s:04X}"
+
+
+def valid_label_length(label: Union[bytes, str]) -> bool:
+    """Check that a label does not exceed the maximum permitted length.
+
+    Per :rfc:`1035` (and :rfc:`5891` §4.2.4) a DNS label must not exceed
+    63 octets. The argument may be either a :class:`str` (a U-label, where
+    length is measured in characters) or :class:`bytes` (an A-label, where
+    length is measured in octets).
+
+    :param label: The label to check.
+    :returns: ``True`` if the label is within the length limit, otherwise
+        ``False``.
+    """
+    return len(label) <= 63
+
+
+def valid_string_length(domain: Union[bytes, str], trailing_dot: bool) -> bool:
+    """Check that a full domain name does not exceed the maximum length.
+
+    Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing
+    dot is present, or 254 octets when one is included.
+
+    :param domain: The full (possibly multi-label) domain name.
+    :param trailing_dot: ``True`` if ``domain`` includes a trailing ``.``.
+    :returns: ``True`` if the domain is within the length limit, otherwise
+        ``False``.
+    """
+    return len(domain) <= (254 if trailing_dot else 253)
+
+
+def check_bidi(label: str, check_ltr: bool = False) -> bool:
+    """Validate the Bidi Rule from :rfc:`5893` for a single label.
+
+    The Bidi Rule constrains how bidirectional characters (Hebrew, Arabic,
+    etc.) may appear within a label. By default the check is only applied
+    when the label contains at least one right-to-left character (Unicode
+    bidirectional categories ``R``, ``AL``, or ``AN``); set ``check_ltr``
+    to ``True`` to apply it to LTR-only labels as well.
+
+    :param label: The label to validate, as a Unicode string.
+    :param check_ltr: If ``True``, apply the rules even when the label
+        contains no RTL characters.
+    :returns: ``True`` if the label satisfies the Bidi Rule.
+    :raises IDNABidiError: If any of Bidi Rule conditions 1-6 are violated,
+        or if the directional category of a codepoint cannot be determined.
+    """
+    if len(label) > _max_input_length:
+        raise IDNAError("Label too long")
+    # Bidi rules should only be applied if string contains RTL characters
+    bidi_label = False
+    for idx, cp in enumerate(label, 1):
+        direction = unicodedata.bidirectional(cp)
+        if direction == "":
+            # String likely comes from a newer version of Unicode
+            raise IDNABidiError(f"Unknown directionality in label {label!r} at position {idx}")
+        if direction in _bidi_rtl_categories:
+            bidi_label = True
+    if not bidi_label and not check_ltr:
+        return True
+
+    # Bidi rule 1
+    direction = unicodedata.bidirectional(label[0])
+    if direction in _bidi_rtl_first:
+        rtl = True
+    elif direction == "L":
+        rtl = False
+    else:
+        raise IDNABidiError(f"First codepoint in label {label!r} must be directionality L, R or AL")
+
+    valid_ending = False
+    number_type: Optional[str] = None
+    for idx, cp in enumerate(label, 1):
+        direction = unicodedata.bidirectional(cp)
+
+        if rtl:
+            # Bidi rule 2
+            if direction not in _bidi_rtl_allowed:
+                raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a right-to-left label")
+            # Bidi rule 3
+            if direction in _bidi_rtl_valid_ending:
+                valid_ending = True
+            elif direction != "NSM":
+                valid_ending = False
+            # Bidi rule 4
+            if direction in _bidi_rtl_numeric:
+                if not number_type:
+                    number_type = direction
+                elif number_type != direction:
+                    raise IDNABidiError("Can not mix numeral types in a right-to-left label")
+        else:
+            # Bidi rule 5
+            if direction not in _bidi_ltr_allowed:
+                raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a left-to-right label")
+            # Bidi rule 6
+            if direction in _bidi_ltr_valid_ending:
+                valid_ending = True
+            elif direction != "NSM":
+                valid_ending = False
+
+    if not valid_ending:
+        raise IDNABidiError("Label ends with illegal codepoint directionality")
+
+    return True
+
+
+def check_initial_combiner(label: str) -> bool:
+    """Reject labels that begin with a combining mark.
+
+    Per :rfc:`5891` §4.2.3.2 a label must not start with a character of
+    Unicode general category ``M`` (Mark).
+
+    :param label: The label to check.
+    :returns: ``True`` if the first character is not a combining mark.
+    :raises IDNAError: If the label begins with a combining character.
+    """
+    if unicodedata.category(label[0])[0] == "M":
+        raise IDNAError("Label begins with an illegal combining character")
+    return True
+
+
+def check_hyphen_ok(label: str) -> bool:
+    """Validate the hyphen restrictions for a label.
+
+    Per :rfc:`5891` §4.2.3.1 a label must not start or end with a hyphen
+    (``U+002D``), and must not have hyphens in both the third and fourth
+    positions (the prefix reserved for A-labels).
+
+    :param label: The label to check.
+    :returns: ``True`` if the hyphen restrictions are satisfied.
+    :raises IDNAError: If any of the hyphen restrictions are violated.
+    """
+    if label[2:4] == "--":
+        raise IDNAError("Label has disallowed hyphens in 3rd and 4th position")
+    if label[0] == "-" or label[-1] == "-":
+        raise IDNAError("Label must not start or end with a hyphen")
+    return True
+
+
+def check_nfc(label: str) -> None:
+    """Require that a label is in Unicode Normalization Form C.
+
+    :param label: The label to check.
+    :raises IDNAError: If ``label`` differs from its NFC normalisation.
+    """
+    if len(label) > _max_input_length:
+        raise IDNAError("Label too long")
+    if unicodedata.normalize("NFC", label) != label:
+        raise IDNAError("Label must be in Normalization Form C")
+
+
+def valid_contextj(label: str, pos: int) -> bool:
+    """Validate the CONTEXTJ rules from :rfc:`5892` Appendix A.
+
+    These rules govern the contextual use of the joiner codepoints
+    ``U+200C`` (ZERO WIDTH NON-JOINER, Appendix A.1) and ``U+200D``
+    (ZERO WIDTH JOINER, Appendix A.2) within a label.
+
+    :param label: The label containing the codepoint.
+    :param pos: Index of the joiner codepoint within ``label``.
+    :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTJ
+        rule, ``False`` otherwise (including when the codepoint at
+        ``pos`` is not a recognised joiner).
+    :raises ValueError: If an adjacent codepoint has no Unicode name when
+        determining its combining class.
+    :raises IDNAError: If ``label`` exceeds the defensive input length limit.
+    """
+    if len(label) > _max_input_length:
+        raise IDNAError("Label too long")
+    cp_value = ord(label[pos])
+
+    if cp_value == 0x200C:
+        if pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class:
+            return True
+
+        ok = False
+        for i in range(pos - 1, -1, -1):
+            joining_type = _joining_type(ord(label[i]))
+            if joining_type == "T":
+                continue
+            if joining_type in _bidi_joiner_l_or_d:
+                ok = True
+                break
+            break
+
+        if not ok:
+            return False
+
+        ok = False
+        for i in range(pos + 1, len(label)):
+            joining_type = _joining_type(ord(label[i]))
+            if joining_type == "T":
+                continue
+            if joining_type in _bidi_joiner_r_or_d:
+                ok = True
+                break
+            break
+        return ok
+
+    if cp_value == 0x200D:
+        return pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class
+
+    return False
+
+
+def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
+    """Validate the CONTEXTO rules from :rfc:`5892` Appendix A.
+
+    Covers the contextual rules for codepoints such as MIDDLE DOT
+    (``U+00B7``), Greek lower numeral sign, Hebrew punctuation, Katakana
+    middle dot, and the Arabic-Indic / Extended Arabic-Indic digit ranges.
+
+    :param label: The label containing the codepoint.
+    :param pos: Index of the codepoint within ``label``.
+    :param exception: Reserved for forward compatibility; currently unused.
+    :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTO
+        rule, ``False`` otherwise (including when the codepoint is not a
+        recognised CONTEXTO codepoint).
+    :raises IDNAError: If ``label`` exceeds the defensive input length limit.
+    """
+    if len(label) > _max_input_length:
+        raise IDNAError("Label too long")
+    cp_value = ord(label[pos])
+
+    if cp_value == 0x00B7:
+        return 0 < pos < len(label) - 1 and ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C
+
+    if cp_value == 0x0375:
+        if pos < len(label) - 1 and len(label) > 1:
+            return _is_script(label[pos + 1], "Greek")
+        return False
+
+    if cp_value in {0x05F3, 0x05F4}:
+        if pos > 0:
+            return _is_script(label[pos - 1], "Hebrew")
+        return False
+
+    if cp_value == 0x30FB:
+        for cp in label:
+            if cp == "\u30fb":
+                continue
+            if _is_script(cp, "Hiragana") or _is_script(cp, "Katakana") or _is_script(cp, "Han"):
+                return True
+        return False
+
+    if 0x660 <= cp_value <= 0x669:
+        return not any(0x6F0 <= ord(cp) <= 0x06F9 for cp in label)
+
+    if 0x6F0 <= cp_value <= 0x6F9:
+        return not any(0x660 <= ord(cp) <= 0x0669 for cp in label)
+
+    return False
+
+
+def check_label(label: Union[str, bytes, bytearray]) -> None:
+    """Run the full set of IDNA 2008 validity checks on a single label.
+
+    Applies, in order: NFC normalisation (:func:`check_nfc`), hyphen
+    restrictions (:func:`check_hyphen_ok`), the no-leading-combiner rule
+    (:func:`check_initial_combiner`), per-codepoint validity (PVALID,
+    CONTEXTJ, CONTEXTO classes from :rfc:`5892`), and the Bidi Rule
+    (:func:`check_bidi`).
+
+    :param label: The label to validate. ``bytes`` or ``bytearray`` input
+        is decoded as UTF-8 first.
+    :raises IDNAError: If the label is empty or fails a structural rule.
+    :raises InvalidCodepoint: If the label contains a DISALLOWED or
+        UNASSIGNED codepoint.
+    :raises InvalidCodepointContext: If a CONTEXTJ or CONTEXTO codepoint
+        is not valid in its context.
+    :raises IDNABidiError: If the Bidi Rule is violated.
+    """
+    if len(label) > _max_input_length:
+        raise IDNAError("Label too long")
+    if isinstance(label, (bytes, bytearray)):
+        label = label.decode("utf-8")
+    if len(label) == 0:
+        raise IDNAError("Empty Label")
+
+    # Reject on domain length rather than label length so support some UTS 46
+    # use cases, still reducing processing of label contextual rules
+    if not valid_string_length(label, trailing_dot=True):
+        raise IDNAError("Label too long")
+
+    check_nfc(label)
+    check_hyphen_ok(label)
+    check_initial_combiner(label)
+
+    for pos, cp in enumerate(label):
+        cp_value = ord(cp)
+        if intranges_contain(cp_value, idnadata.codepoint_classes["PVALID"]):
+            continue
+        if intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTJ"]):
+            try:
+                if not valid_contextj(label, pos):
+                    raise InvalidCodepointContext(f"Joiner {_unot(cp_value)} not allowed at position {pos + 1} in {label!r}")
+            except ValueError as err:
+                raise IDNAError(
+                    f"Unknown codepoint adjacent to joiner {_unot(cp_value)} at position {pos + 1} in {label!r}"
+                ) from err
+        elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]):
+            if not valid_contexto(label, pos):
+                raise InvalidCodepointContext(f"Codepoint {_unot(cp_value)} not allowed at position {pos + 1} in {label!r}")
+        else:
+            raise InvalidCodepoint(f"Codepoint {_unot(cp_value)} at position {pos + 1} of {label!r} not allowed")
+
+    check_bidi(label)
+
+
+def alabel(label: str) -> bytes:
+    """Convert a single U-label into its A-label form.
+
+    The result is the ASCII-Compatible Encoding (ACE) form per :rfc:`5891`
+    §4: the label is validated, Punycode-encoded, and prefixed with
+    ``xn--``. Pure ASCII labels that are already valid IDNA labels are
+    returned unchanged (as :class:`bytes`).
+
+    :param label: The label to convert, as a Unicode string.
+    :returns: The A-label as ASCII-encoded :class:`bytes`.
+    :raises IDNAError: If the label is invalid or the resulting A-label
+        exceeds 63 octets.
+    """
+    if len(label) > _max_input_length:
+        raise IDNAError("Label too long")
+    try:
+        label_bytes = label.encode("ascii")
+    except UnicodeEncodeError:
+        pass
+    else:
+        ulabel(label_bytes)
+        if not valid_label_length(label_bytes):
+            raise IDNAError("Label too long")
+        return label_bytes
+
+    check_label(label)
+    label_bytes = _alabel_prefix + _punycode(label)
+
+    if not valid_label_length(label_bytes):
+        raise IDNAError("Label too long")
+
+    return label_bytes
+
+
+def ulabel(label: Union[str, bytes, bytearray]) -> str:
+    """Convert a single A-label into its U-label form.
+
+    Performs the inverse of :func:`alabel`: an ``xn--``-prefixed label is
+    Punycode-decoded and validated. Labels that are already Unicode (or
+    plain ASCII without the ACE prefix) are validated and returned as a
+    Unicode string.
+
+    :param label: The label to convert. ``bytes`` or ``bytearray`` input
+        is treated as ASCII.
+    :returns: The U-label as a Unicode string.
+    :raises IDNAError: If the label is malformed or fails validation.
+    """
+    if len(label) > _max_input_length:
+        raise IDNAError("Label too long")
+    if not isinstance(label, (bytes, bytearray)):
+        try:
+            label_bytes = label.encode("ascii")
+        except UnicodeEncodeError:
+            check_label(label)
+            return label
+    else:
+        label_bytes = bytes(label)
+
+    label_bytes = label_bytes.lower()
+    if label_bytes.startswith(_alabel_prefix):
+        label_bytes = label_bytes[len(_alabel_prefix) :]
+        if not label_bytes:
+            raise IDNAError("Malformed A-label, no Punycode eligible content found")
+        if label_bytes.endswith(b"-"):
+            raise IDNAError("A-label must not end with a hyphen")
+    else:
+        check_label(label_bytes)
+        return label_bytes.decode("ascii")
+
+    try:
+        label = label_bytes.decode("punycode")
+    except UnicodeError as err:
+        raise IDNAError("Invalid A-label") from err
+    check_label(label)
+    return label
+
+
+def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str:
+    """Apply the UTS #46 character mapping to a domain string.
+
+    Implements the mapping table from `UTS #46 §4
+    <https://www.unicode.org/reports/tr46/>`_: each character is kept,
+    replaced, or rejected based on its status (``V``, ``M``, ``D``, ``3``,
+    ``I``). The result is returned in Normalisation Form C.
+
+    :param domain: The full domain name to remap.
+    :param std3_rules: If ``True``, apply the stricter STD3 ASCII rules
+        (status ``3`` codepoints raise instead of being kept or mapped).
+    :param transitional: If ``True``, use transitional processing (status
+        ``D`` codepoints are mapped instead of kept). Transitional
+        processing has been removed from UTS #46 and this option is
+        retained only for backwards compatibility.
+    :returns: The remapped domain, in Normalisation Form C.
+    :raises InvalidCodepoint: If the domain contains a disallowed
+        codepoint under the chosen rules.
+    :raises IDNAError: If ``domain`` exceeds the defensive input length limit.
+    """
+    if len(domain) > _max_input_length:
+        raise IDNAError("Domain too long")
+    from .uts46data import uts46_replacements, uts46_starts, uts46_statuses
+
+    output = ""
+
+    for pos, char in enumerate(domain):
+        code_point = ord(char)
+        i = code_point if code_point < 256 else bisect.bisect_right(uts46_starts, code_point) - 1
+        status = chr(uts46_statuses[i])
+        replacement: Optional[str] = uts46_replacements[i]
+
+        # UTS #46 §4: V is always valid, D is deviation (kept unless transitional),
+        # 3 is disallowed-STD3 (kept unmapped if std3_rules is off and no mapping).
+        keep_as_is = (
+            status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None)
+        )
+        # M is mapped, 3-with-replacement and transitional D fall through to the
+        # same replacement output path.
+        use_replacement = replacement is not None and (
+            status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional)
+        )
+
+        if keep_as_is:
+            output += char
+        elif use_replacement:
+            assert replacement is not None  # narrowed by use_replacement
+            output += replacement
+        elif status == "I":
+            continue
+        else:
+            raise InvalidCodepoint(f"Codepoint {_unot(code_point)} not allowed at position {pos + 1} in {domain!r}")
+
+    return unicodedata.normalize("NFC", output)
+
+
+def encode(
+    s: Union[str, bytes, bytearray],
+    strict: bool = False,
+    uts46: bool = False,
+    std3_rules: bool = False,
+    transitional: bool = False,
+) -> bytes:
+    """Encode a Unicode domain name into its ASCII (A-label) form.
+
+    Splits the input on label separators (only ``U+002E`` if ``strict`` is
+    set; otherwise also IDEOGRAPHIC FULL STOP ``U+3002``, FULLWIDTH FULL
+    STOP ``U+FF0E``, and HALFWIDTH IDEOGRAPHIC FULL STOP ``U+FF61``),
+    encodes each label with :func:`alabel`, and rejoins them with ``.``.
+    Optionally pre-processes the input through :func:`uts46_remap`.
+
+    :param s: The domain name to encode.
+    :param strict: If ``True``, only ``U+002E`` is recognised as a label
+        separator.
+    :param uts46: If ``True``, apply UTS #46 mapping before encoding.
+    :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is
+        ``True``.
+    :param transitional: Forwarded to :func:`uts46_remap` when ``uts46``
+        is ``True``. Deprecated: emits a :class:`DeprecationWarning` and
+        will be removed in a future version.
+    :returns: The encoded domain as ASCII :class:`bytes`.
+    :raises IDNAError: If the domain is empty, contains an invalid label,
+        or exceeds the maximum domain length.
+    """
+    if transitional:
+        warnings.warn(
+            "Transitional processing has been removed from UTS #46. "
+            "The transitional argument will be removed in a future version.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+    if not isinstance(s, str):
+        try:
+            s = str(s, "ascii")
+        except (UnicodeDecodeError, TypeError) as err:
+            raise IDNAError("should pass a unicode string to the function rather than a byte string.") from err
+    if len(s) > _max_input_length:
+        raise IDNAError("Domain too long")
+    if uts46:
+        s = uts46_remap(s, std3_rules, transitional)
+
+    # Reject inputs that exceed the maximum DNS domain length up-front
+    # to avoid expensive computation on long inputs.
+    if not valid_string_length(s, trailing_dot=True):
+        raise IDNAError("Domain too long")
+
+    trailing_dot = False
+    result = []
+    labels = s.split(".") if strict else _unicode_dots_re.split(s)
+    if not labels or labels == [""]:
+        raise IDNAError("Empty domain")
+    if labels[-1] == "":
+        del labels[-1]
+        trailing_dot = True
+    for label in labels:
+        s = alabel(label)
+        if s:
+            result.append(s)
+        else:
+            raise IDNAError("Empty label")
+    if trailing_dot:
+        result.append(b"")
+    s = b".".join(result)
+    if not valid_string_length(s, trailing_dot):
+        raise IDNAError("Domain too long")
+    return s
+
+
+def decode(
+    s: Union[str, bytes, bytearray],
+    strict: bool = False,
+    uts46: bool = False,
+    std3_rules: bool = False,
+    display: bool = False,
+) -> str:
+    """Decode an A-label-encoded domain name back to Unicode.
+
+    Splits the input on label separators (see :func:`encode` for the
+    rules), decodes each label with :func:`ulabel`, and rejoins them
+    with ``.``. Optionally pre-processes the input through
+    :func:`uts46_remap`.
+
+    :param s: The domain name to decode.
+    :param strict: If ``True``, only ``U+002E`` is recognised as a label
+        separator.
+    :param uts46: If ``True``, apply UTS #46 mapping before decoding.
+    :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is
+        ``True``.
+    :param display: If ``True``, any ``xn--`` label that fails IDNA
+        validation is passed through unchanged (lowercased) rather than
+        aborting the whole call. Intended for "decode for display"
+        consumers (e.g. URL libraries, HTTP clients) that want to show
+        the user the label as it appears on the wire when it cannot be
+        rendered as Unicode. Matches the per-label recovery prescribed
+        by UTS #46 §4 and the WHATWG URL "domain to Unicode" algorithm.
+    :returns: The decoded domain as a Unicode string.
+    :raises IDNAError: If the input is not valid ASCII, contains an
+        invalid label, or is empty.
+    """
+    if not isinstance(s, str):
+        try:
+            s = str(s, "ascii")
+        except (UnicodeDecodeError, TypeError) as err:
+            raise IDNAError("Invalid ASCII in A-label") from err
+    if len(s) > _max_input_length:
+        raise IDNAError("Domain too long")
+    if uts46:
+        s = uts46_remap(s, std3_rules, False)
+    # Reject inputs that exceed the maximum DNS domain length up-front
+    # to avoid expensive computation on long inputs.
+    if not valid_string_length(s, trailing_dot=True):
+        raise IDNAError("Domain too long")
+    trailing_dot = False
+    result = []
+    labels = s.split(".") if strict else _unicode_dots_re.split(s)
+    if not labels or labels == [""]:
+        raise IDNAError("Empty domain")
+    if not labels[-1]:
+        del labels[-1]
+        trailing_dot = True
+    for label in labels:
+        try:
+            u = ulabel(label)
+        except IDNAError:
+            if display and label[:4].lower() == "xn--":
+                u = label.lower()
+            else:
+                raise
+        if u:
+            result.append(u)
+        else:
+            raise IDNAError("Empty label")
+    if trailing_dot:
+        result.append("")
+    return ".".join(result)
--- a/venv/lib/python3.12/site-packages/idna/idnadata.py
+++ b/venv/lib/python3.12/site-packages/idna/idnadata.py
--- a/venv/lib/python3.12/site-packages/idna/intranges.py
+++ b/venv/lib/python3.12/site-packages/idna/intranges.py
@@ -0,0 +1,55 @@
+"""
+Given a list of integers, made up of (hopefully) a small number of long runs
+of consecutive integers, compute a representation of the form
+((start1, end1), (start2, end2) ...). Then answer the question "was x present
+in the original list?" in time O(log(# runs)).
+"""
+
+import bisect
+
+
+def intranges_from_list(list_: list[int]) -> tuple[int, ...]:
+    """Represent a list of integers as a sequence of ranges:
+    ((start_0, end_0), (start_1, end_1), ...), such that the original
+    integers are exactly those x such that start_i <= x < end_i for some i.
+
+    Ranges are encoded as single integers (start << 32 | end), not as tuples.
+    """
+
+    sorted_list = sorted(list_)
+    ranges = []
+    last_write = -1
+    for i in range(len(sorted_list)):
+        if i + 1 < len(sorted_list) and sorted_list[i] == sorted_list[i + 1] - 1:
+            continue
+        current_range = sorted_list[last_write + 1 : i + 1]
+        ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
+        last_write = i
+
+    return tuple(ranges)
+
+
+def _encode_range(start: int, end: int) -> int:
+    return (start << 32) | end
+
+
+def _decode_range(r: int) -> tuple[int, int]:
+    return (r >> 32), (r & ((1 << 32) - 1))
+
+
+def intranges_contain(int_: int, ranges: tuple[int, ...]) -> bool:
+    """Determine if `int_` falls into one of the ranges in `ranges`."""
+    tuple_ = _encode_range(int_, 0)
+    pos = bisect.bisect_left(ranges, tuple_)
+    # we could be immediately ahead of a tuple (start, end)
+    # with start < int_ <= end
+    if pos > 0:
+        left, right = _decode_range(ranges[pos - 1])
+        if left <= int_ < right:
+            return True
+    # or we could be immediately behind a tuple (int_, end)
+    if pos < len(ranges):
+        left, _ = _decode_range(ranges[pos])
+        if left == int_:
+            return True
+    return False
--- a/venv/lib/python3.12/site-packages/idna/package_data.py
+++ b/venv/lib/python3.12/site-packages/idna/package_data.py
@@ -0,0 +1 @@
+__version__ = "3.18"
--- a/venv/lib/python3.12/site-packages/idna/py.typed
+++ b/venv/lib/python3.12/site-packages/idna/py.typed
--- a/venv/lib/python3.12/site-packages/idna/uts46data.py
+++ b/venv/lib/python3.12/site-packages/idna/uts46data.py