Coverage for src/ensae_teaching_cs/homeblog/postclassification.py: 100%
13 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Helpers for blog classification
5"""
8privateKeyClassification = {
9 "~recreative": ["\xE9conomie farfelue", "xaveir", "xavier", "xavier dupr\xE9", "litt\xE9rature",
10 "green website", "restaurant", "alimentation", "cuisine", "emploi", "discussion",
11 "wifi", "smart cities", "t\xE9l\xE9vision", "jeu", "jeux", "cin\xE9ma",
12 "d\xE9couverte", "cheminement", "\xE9conomie", "d\xE9mocratie", "d\xE9mographie",
13 "m\xE9decine", "th\xE9\xE2tre", "\xE9cole", "papa", "recreative", "video", "photo", "joke",
14 "tennis"],
15 "~technical": ["python", "programming", "c", "p-value", "edit distance",
16 "latex", "vba", "javascript", "big data", "math\xE9matique",
17 "programmation", "programmer", "internet", "algorithm", "algorithme",
18 "extreme values", "C#", "c#", "c sharp", "csharp", "machine learning", "os", "r", "git",
19 "doon\xE9es"],
20 "~ENSAE": ["ensae alumni", "data scientist", "ensae", "ENSAE", "enseignement", ],
21}
23privateKeyClassificationMandatory = list(privateKeyClassification.keys())
26def classify_post(keywords, content):
27 """
28 returns a list of keywords as a classification
29 - technical
30 - recreative
31 - English
32 - French
33 """
34 available_classes = list(privateKeyClassification.keys())
35 clean_keywords = [_.lower()
36 for _ in keywords if _ not in available_classes]
38 # adds keywords in lower caase
39 key = privateKeyClassification
41 res_class = []
42 for _ in clean_keywords:
43 for k, v in key.items():
44 if _ in v:
45 res_class.append(k)
46 # break
48 return res_class + clean_keywords