Source code for multiformats.multibase

"""
    Implementation of the `multibase spec <https://github.com/multiformats/multibase>`_.

    Suggested usage:

    >>> from multiformats import multibase
"""

from __future__ import annotations

from abc import ABC, abstractmethod
import binascii
import importlib.resources as importlib_resources
from itertools import product
import json
import math
import re
from typing import Any, Callable, cast, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple, Type, Union
import sys

from typing_extensions import Literal, Final
from typing_validation import validate

from bases import (base2, base16, base8, base10, base36, base58btc, base58flickr, base58ripple,
                   base32, base32hex, base32z, base64, base64url, base45,)
from multiformats_config.multibase import load_multibase_table

from multiformats.multibase import raw
from multiformats.varint import BytesLike
from .raw import RawEncoder, RawDecoder
from .err import MultibaseKeyError, MultibaseValueError

MultibaseStatus = Literal[
    "draft", "final", "reserved", "experimental",
    "candidate", "default" # FIXME: deprecated legacy values
]
"""
    Literal type of possible values for the :attr:`Multibase.status` property.
"""

MultibaseStatusValues: Final[Tuple[MultibaseStatus, ...]] = (
    "draft", "final", "reserved", "experimental", "candidate", "default"
)
"""
    Collection of possible values for the :attr:`Multibase.status` property.
"""

[docs] class Multibase: """ Container class for a multibase encoding. Example usage: >>> Multibase(name="base16", code="f", status="default", description="hexadecimal") :param name: the multibase name :type name: :obj:`str` :param code: the multibase code, as single-char string or ``0x...`` hex-string of a non-empty bytestring :type code: :obj:`str` :param status: the multibase status :type status: ``'draft'``, ``'candidate'`` or ``'default'``, *optional* :param description: the multibase description :type description: :obj:`str`, *optional* """ _name: str _code: str _status: MultibaseStatus _description: str __slots__ = ("__weakref__", "_name", "_code", "_status", "_description") def __new__(cls, name: str, code: str, status: str = "draft", description: str = "" ) -> "Multibase": for arg in (name, code, status, description): validate(arg, str) name = Multibase._validate_name(name) code = Multibase.validate_code(code) status = Multibase._validate_status(status) instance = super().__new__(cls) instance._name = name instance._code = code instance._status = status instance._description = description return instance def __getnewargs__(self) -> tuple[str, str, MultibaseStatus, str]: return (self.name, self.code, self.status, self.description) @staticmethod def _validate_name(name: Optional[str]) -> str: validate(name, Optional[str]) assert name is not None if not re.match(r"^[a-z][a-z0-9_-]+$", name): # ensures len(name) > 1 raise MultibaseValueError(f"Invalid multibase encoding name {repr(name)}") return name
[docs] @staticmethod def validate_code(code: str) -> str: r""" Validates a multibase code and transforms it to single-character format (if in hex format). Example usage: >>> Multibase.validate_code("0x00") '\x00' >>> Multibase.validate_code("hi") MultibaseValueError: Multibase codes must be single-character strings or the hex digits '0xYZ' of a single byte. :param code: the multibase code, as single character or ``0x...`` hex-string of a non-empty bytestring :type code: :obj:`str` :raises ValueError: if the code is invalid """ validate(code, str) if re.match(r"^0x([0-9a-zA-Z][0-9a-zA-Z])+$", code): ord_code = int(code, base=16) if ord_code in range(0x20, 0x7F): raise MultibaseValueError("Multibase codes in hex format cannot be printable ASCII characters.") code = chr(ord_code) elif len(code) != 1: raise MultibaseValueError("Multibase codes must be single-character strings or the hex digits '0x...' of a non-empty bytestring.") return code
@staticmethod def _validate_status(status: str) -> MultibaseStatus: # if status not in ("draft", "candidate", "default"): if status not in MultibaseStatusValues: raise MultibaseValueError(f"Invalid multibase encoding status {repr(status)}.") return cast(MultibaseStatus, status) @property def code(self) -> str: """ Multibase code. Must either have length 1 or satisfy: .. code-block:: python re.match(r"^0x$", code) """ return self._code @property def code_printable(self) -> str: r""" Printable version of :meth:`Multibase.code`: - if the code is a single non-printable ASCII character, returns the hex string of its byte - otherwise, returns the code itself Example usage: >>> identity = multibase.get(code="\x00") >>> identity.code '\x00' >>> identity.code_printable '0x00' """ code = self.code ord_code = ord(code) if ord_code not in range(0x20, 0x7F): ord_code_num_bytes = max(1, math.ceil(ord_code.bit_length()/8)) ord_code_bytes = ord_code.to_bytes(ord_code_num_bytes, byteorder="big") return "0x"+base16.encode(ord_code_bytes) return code @property def status(self) -> MultibaseStatus: """ Multibase status. """ return self._status @property def description(self) -> str: """ Multibase description. """ return self._description @property def name(self) -> str: """ Multibase name. Must satisfy the following: .. code-block:: python re.match(r"^[a-z][a-z0-9_-]+$", name) In the `multibase table <https://github.com/multiformats/multibase/raw/master/multibase.csv>`_, this is listed under `encoding`. """ return self._name @property def raw_encoder(self) -> RawEncoder: """ Returns the raw encoder for this encoding: given bytes, it produces the encoded string without the multibase prefix. """ enc = raw.get(self.name) if enc is None: raise NotImplementedError(f"Multibase/decoding for {repr(self.name)} is not yet implemented.") return enc.encode @property def raw_decoder(self) -> RawDecoder: """ Returns the raw encoder for this encoding: given a string without the multibase prefix, it produces the decoded data. """ enc = raw.get(self.name) if enc is None: raise NotImplementedError(f"Multibase/decoding for {repr(self.name)} is not yet implemented.") return enc.decode
[docs] def encode(self, b: BytesLike) -> str: """ Encodes bytes into a multibase string: it first uses :meth:`Multibase.raw_encoder`, and then prepends the multibase prefix given by :attr:`Multibase.code` and returns the resulting multibase string. Example usage: >>> base32 = multibase.get("base32") >>> base32.encode(b"Hello World!") 'bjbswy3dpeblw64tmmqqq' :param b: the bytes to be encoded :type s: :class:`~multiformats.varint.BytesLike` """ return self.code+self.raw_encoder(b)
[docs] def decode(self, s: str) -> bytes: """ Decodes a multibase string into bytes: it first checks that the multibase prefix matches the value specified by :attr:`Multibase.code`, then uses :meth:`Multibase.raw_decoder` on the string without prefix and returns the bytes. Example usage: >>> base32 = multibase.get("base32") >>> base32.decode("bjbswy3dpeblw64tmmqqq") b'Hello World!' :param s: the string to be decoded :type s: :obj:`str` :raises ValueError: if the code from the string is different from the one of this multibase :raises ValueError: see :func:`from_str` :raises KeyError: see :func:`from_str` """ encoding = from_str(s) if encoding != self: raise MultibaseValueError(f"Expected {repr(self.name)} encoding, " f"found {repr(encoding.name)} encoding instead.") return self.raw_decoder(s[1:])
[docs] def to_json(self) -> Mapping[str, str]: """ Returns a JSON dictionary representation of this :class:`Multibase` object. Example usage: >>> base32 = multibase.get("base32") >>> base32.to_json() {'name': 'base32', 'code': 'b', 'status': 'default', 'description': 'rfc4648 case-insensitive - no padding'} """ return { "name": self.name, "code": self.code_printable, "status": self.status, "description": self.description }
def __str__(self) -> str: if exists(self.name) and get(self.name) == self: return f"multibase.get({repr(self.name)})" return repr(self) def __repr__(self) -> str: return f"Multibase({', '.join(f'{k}={repr(v)}' for k, v in self.to_json().items())})" @property def _as_tuple(self) -> Tuple[Type["Multibase"], str, str, MultibaseStatus]: return (Multibase, self.name, self.code, self.status) def __hash__(self) -> int: return hash(self._as_tuple) def __eq__(self, other: Any) -> bool: if self is other: return True if not isinstance(other, Multibase): return NotImplemented return self._as_tuple == other._as_tuple
[docs] def get(name: Optional[str] = None, *, code: Optional[str] = None) -> Multibase: """ Gets the multibase encoding with given name or multibase code. Example usage: >>> multibase.get("base8") Multibase(encoding='base8', code='7', status='draft', description='octal') >>> multibase.get(name="base8") Multibase(encoding='base8', code='7', status='draft', description='octal') >>> multibase.get(code="t") Multibase(encoding='base32hexpad', code='t', status='candidate', description='rfc4648 case-insensitive - with padding') :param name: the name of this multibase :type name: :obj:`str` or :obj:`None`, *optional* :param code: the code of this multibase (keyword-only) :type name: :obj:`str` or :obj:`None`, *optional* :raises ValueError: if the empty string is passed :raises KeyError: if no such multibase exists :raises ValueError: unless exactly one of ``name`` and ``code`` is specified """ validate(name, Optional[str]) validate(code, Optional[str]) if (name is None) == (code is None): raise MultibaseValueError("Must specify exactly one between encoding name and code.") if code is not None: if code not in _code_table: raise MultibaseKeyError(f"No multibase encoding with code {repr(code)}.") return _code_table[code] if name not in _name_table: raise MultibaseKeyError(f"No multibase encoding named {repr(name)}.") return _name_table[name]
[docs] def exists(name: Optional[str] = None, *, code: Optional[str] = None) -> bool: """ Checks whether a multibase encoding with given name or code exists. Example usage: >>> multibase.exists("base8") True >>> multibase.exists(code="t") True :param name: the name of this multibase :type name: :obj:`str` or :obj:`None`, *optional* :param code: the code of this multibase (keyword-only) :type name: :obj:`str` or :obj:`None`, *optional* :raises ValueError: if the empty string is passed :raises ValueError: unless exactly one of ``name`` and ``code`` is specified """ validate(name, Optional[str]) validate(code, Optional[str]) if (name is None) == (code is None): raise MultibaseValueError("Must specify exactly one between encoding name and code.") if code is not None: code = Multibase.validate_code(code) return code in _code_table return name in _name_table
[docs] def register(base: Multibase, *, overwrite: bool = False) -> None: """ Registers a given multibase encoding. Example usage: >>> base45 = Multibase(name="base45", code=":", status="draft", description="base45 encoding") >>> multibase.register(base45) >>> multibase.get("base45") Multibase(encoding='base45', code=':', status='draft', description='base45 encoding') :param base: the multibase to register :type base: :class:`Multibase` :param overwrite: whether to overwrite a multibase with existing code (optional, default :obj:`False`) :type overwrite: :obj:`bool`, *optional* :raises ValueError: if ``overwrite`` is :obj:`False` and a multibase with the same name or code already exists :raises ValueError: if ``overwrite`` is :obj:`True` and a multibase with the same name but different code already exists """ validate(base, Multibase) validate(overwrite, bool) if not overwrite and base.code in _code_table: raise MultibaseValueError(f"Multibase encoding with code {repr(base.code)} already exists: {_code_table[base.code]}") if base.name in _name_table and _name_table[base.name].code != base.code: raise MultibaseValueError(f"Multibase encoding with name {repr(base.name)} already exists: {_name_table[base.name]}") _code_table[base.code] = base _name_table[base.name] = base
[docs] def validate_multibase(multibase: Multibase) -> None: """ Validates an instance of :class:`Multibase`. If the multibase is registered (i.e. valid), no error is raised. :param multibase: the instance to be validated :type multibase: :class:`Multibase` :raises KeyError: if no multibase with the given name is registered :raises ValueError: if a multibase with the given name is registered, but is different from the one given """ validate(multibase, Multibase) mc = get(multibase.name) if mc != multibase: raise MultibaseValueError(f"Multibase named {multibase.name} exists, but is not the one given.")
[docs] def unregister(name: Optional[str] = None, *, code: Optional[str] = None) -> None: """ Unregisters the multibase encoding with given name or code. Example usage: >>> base45 = Multibase(name="base45", code=":", status="draft", description="base45 encoding") >>> multibase.register(base45) >>> multibase.get("base45") Multibase(encoding='base45', code=':', status='draft', description='base45 encoding') >>> multibase.unregister(code=":") >>> multibase.exists("base45") False :param name: the multibase name :type name: :obj:`str` or :obj:`None`, *optional* :param code: the multibase code :type code: :obj:`str` or :obj:`None`, *optional* :raises KeyError: if no such multibase exists """ enc = get(name=name, code=code) del _code_table[enc.code] del _name_table[enc.name]
[docs] def table() -> Iterator[Multibase]: """ Iterates through the registered multibases, in order of ascending code. Example usage: >>> [e.code for e in multibase.table()] ['\\x00', '0', '7', '9', 'B', 'C', 'F', 'K', 'M', 'T', 'U', 'V', 'Z','b', 'c', 'f', 'h', 'k', 'm', 'p', 't', 'u', 'v', 'z'] """ for code in sorted(_code_table.keys()): yield _code_table[code]
[docs] def from_str(s: str) -> Multibase: """ Returns the multibase encoding for the given string, according to the code specified by its prefix. Example usage: >>> multibase.from_str("mSGVsbG8gd29ybGQh") Multibase(encoding='base64', code='m', status='default', description='rfc4648 no padding') :param s: the multibase encoded string :type s: :obj:`str` :raises ValueError: if the empty string is passed :raises KeyError: if no multibase exists with that code """ validate(s, str) if len(s) == 0: raise MultibaseValueError("Empty string is not valid for encoded data.") if s[0] in _code_table: return _code_table[s[0]] for code in _code_table: if s.startswith(code): return get(code=code) raise MultibaseKeyError("No known multibase code is a prefix of the given string.")
[docs] def encode(data: BytesLike, base: Union[str, "Multibase"]) -> str: """ Encodes the given bytes into a multibase string using the given encoding. If the encoding is passed by name, the :func:`get` function is used to retrieve it. Multibase encoding is performed by the :meth:`multiformats.multibase.Multibase.encode` method. Example usage: >>> multibase.encode(b"Hello world!", "base64") 'mSGVsbG8gd29ybGQh' :param data: the data to encode using the multibase :type data: :obj:`~multiformats.varint.BytesLike` :param base: the multibase to use :type base: :obj:`str` or :class:`Multibase` """ validate(base, Union[str, "Multibase"]) if isinstance(base, str): base = get(base) return base.encode(data)
[docs] def decode(s: str) -> bytes: """ Decodes the given multibase string into bytes. The encoding is inferred using the :func:`from_str` function. Decoding is then performed by :meth:`Multibase.decode` method. Example usage: >>> multibase.decode("mSGVsbG8gd29ybGQh") b'Hello world!' :param s: the string to be decoded :type s: :obj:`str` """ base = from_str(s) return base.decode(s)
[docs] def decode_raw(s: str) -> Tuple[Multibase, bytes]: """ Similar to :func:`decode`, but returns a ``(base, bytestr)`` pair of the multibase and decoded bytestring. Example usage: >>> base, bytestr = multibase.decode_raw("mSGVsbG8gd29ybGQh") >>> base Multibase(name='base64', code='m', status='default', description='rfc4648 no padding') >>> bytestr b'Hello world!' :param s: the string to be decoded :type s: :obj:`str` """ base = from_str(s) return base, base.decode(s)
_code_table, _name_table = load_multibase_table() # def build_multibase_tables(bases: Iterable[Multibase]) -> Tuple[Dict[str, Multibase], Dict[str, Multibase]]: # """ # Creates code->encoding and name->encoding mappings from a finite iterable of encodings, returning the mappings. # Example usage: # >>> code_table, name_table = build_multicodec_tables(bases) # :param bases: the multibases to add to the table # :: # :raises ValueError: if the same encoding code or name is encountered multiple times # """ # # validate(multicodecs, Iterable[Multicodec]) # TODO: not yet properly supported by typing-validation # code_table: Dict[str, Multibase] = {} # name_table: Dict[str, Multibase] = {} # for e in bases: # if e.code in code_table: # raise MultibaseValueError(f"Multicodec name {e.name} appears multiple times in table.") # code_table[e.code] = e # if e.name in name_table: # raise MultibaseValueError(f"Multicodec name {e.name} appears multiple times in table.") # name_table[e.name] = e # return code_table, name_table # Create the global code->multibase and name->multibase mappings. # _code_table: Dict[str, Multibase] = {} # _name_table: Dict[str, Multibase] = {} # with importlib_resources.open_text("multiformats.multibase", "multibase-table.json", encoding="utf8") as _table_f: # _table_json = json.load(_table_f) # _code_table, _name_table = build_multibase_tables(Multibase(**row) for row in _table_json)