Source code for bpw_pde.common

"""Common functions used across the index, series, and dataframe modules."""

import hashlib
import string
import unicodedata

PUNCTUATION_TO_SPACE_TRANSLATOR = str.maketrans(string.punctuation, ' ' * len(string.punctuation))
DEFAULT_UNICODE_NORMALIZATION_FORM = 'NFKC'


[docs]def clean_name(name: str) -> str: """ Applies lowercasing, whitespace tokenization, and rejoining tokens with underscore to ``name``. :param name: The name to be cleaned. :return: The cleaned name. """ return '_'.join(name.lower().translate(PUNCTUATION_TO_SPACE_TRANSLATOR).split())
[docs]def md5(text: str) -> str: """ Computes the MD5 hash of ``text``. :param text: The text to be hashed using the MD5 hashing algorithm. :return: The MD5 hash of the text. """ return hashlib.md5(text.encode('utf-8')).hexdigest()
[docs]def unicode_normalize(text: str, form: str = DEFAULT_UNICODE_NORMALIZATION_FORM) -> str: """ Applies Unicode normalization on ``text`` using the form ``form``. :param text: The text to be normalized. :param form: The Unicode normalization form to use for normalization. :return: The Unicode normalized text. """ return unicodedata.normalize(form, text)