Source code for bpw_pde.common

"""Common functions used across the index, series, and dataframe modules."""

import hashlib
import string
import unicodedata

PUNCTUATION_TO_SPACE_TRANSLATOR = str.maketrans(string.punctuation, ' ' * len(string.punctuation))
DEFAULT_UNICODE_NORMALIZATION_FORM = 'NFKC'


[docs]def clean_name(name: str) -> str:
    """
    Applies lowercasing, whitespace tokenization, and rejoining tokens with underscore to ``name``.

    :param name: The name to be cleaned.
    :return: The cleaned name.
    """
    return '_'.join(name.lower().translate(PUNCTUATION_TO_SPACE_TRANSLATOR).split())


[docs]def md5(text: str) -> str:
    """
    Computes the MD5 hash of ``text``.

    :param text: The text to be hashed using the MD5 hashing algorithm.
    :return: The MD5 hash of the text.
    """
    return hashlib.md5(text.encode('utf-8')).hexdigest()


[docs]def unicode_normalize(text: str, form: str = DEFAULT_UNICODE_NORMALIZATION_FORM) -> str:
    """
    Applies Unicode normalization on ``text`` using the form ``form``.

    :param text: The text to be normalized.
    :param form: The Unicode normalization form to use for normalization.
    :return: The Unicode normalized text.
    """
    return unicodedata.normalize(form, text)