Code source de mlstatpy.nlp.normalize
# -*- coding: utf-8 -*-
"""
Text normalization
:githublink:`%|py|6`
"""
import unicodedata
[docs]def remove_diacritics(input_str):
"""
remove diacritics
:param input_str: string to clean
:return: cleaned string
Example::
enguérand --> enguerand
.. versionadded:: 1.0
:githublink:`%|py|21`
"""
nkfd_form = unicodedata.normalize('NFKD', input_str)
only_ascii = nkfd_form.encode('ASCII', 'ignore')
return only_ascii.decode("utf8")