Code source de mlstatpy.nlp.normalize

# -*- coding: utf-8 -*-
"""
Text normalization


:githublink:`%|py|6`
"""
import unicodedata


[docs]def remove_diacritics(input_str): """ remove diacritics :param input_str: string to clean :return: cleaned string Example:: enguérand --> enguerand .. versionadded:: 1.0 :githublink:`%|py|21` """ nkfd_form = unicodedata.normalize('NFKD', input_str) only_ascii = nkfd_form.encode('ASCII', 'ignore') return only_ascii.decode("utf8")