Coverage for src/mlstatpy/nlp/normalize.py: 100%

6 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-27 05:59 +0100

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Text normalization 

5""" 

6import unicodedata 

7 

8 

9def remove_diacritics(input_str): 

10 """ 

11 remove diacritics 

12 

13 @param input_str string to clean 

14 @return cleaned string 

15 

16 Example:: 

17 

18 enguérand --> enguerand 

19 

20 .. versionadded:: 1.0 

21 """ 

22 nkfd_form = unicodedata.normalize('NFKD', input_str) 

23 only_ascii = nkfd_form.encode('ASCII', 'ignore') 

24 return only_ascii.decode("utf8")