Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Documentation helper. 

4""" 

5from logging import getLogger 

6from textwrap import indent, dedent 

7import numpy 

8from jinja2 import Template 

9from pandas import DataFrame, notnull 

10from sklearn.linear_model import LinearRegression 

11from pyquickhelper.loghelper import noLOG 

12from pyquickhelper.pandashelper.tblformat import df2rst 

13from sklearn import __all__ as sklearn__all__ 

14from ...tools.asv_options_helper import get_opset_number_from_onnx 

15from ...tools.model_info import analyze_model 

16from ..validate.validate import enumerate_validated_operator_opsets, sklearn_operators 

17from ...onnx_tools.optim.sklearn_helper import inspect_sklearn_model 

18from ...onnx_tools.optim.onnx_helper import onnx_statistics 

19from ..onnx_inference import OnnxInference 

20from ..validate.validate_summary import _clean_values_optim 

21from .doc_helper import visual_rst_template 

22 

23 

24def _make_opset(row): 

25 opsets = [] 

26 if hasattr(row, 'to_dict'): 

27 row = row.to_dict() # pragma: no cover 

28 for k, v in row.items(): 

29 if k.startswith('opset'): 

30 if isinstance(v, int): 

31 opsets.append(v) 

32 elif isinstance(v, float): # pragma: no cover 

33 if numpy.isnan(v): 

34 opsets.append(0) 

35 else: 

36 opsets.append(int(v)) 

37 else: # pragma: no cover 

38 vv = list(_ for _ in v if 'OK' in str(v)) 

39 if len(vv) > 0: 

40 opsets.append(int(k.replace("opset", ""))) 

41 if len(opsets) == 0: 

42 return "o%d" % get_opset_number_from_onnx() # pragma: no cover 

43 val = max(opsets) 

44 return "o%d" % val 

45 

46 

47def enumerate_visual_onnx_representation_into_rst(sub, fLOG=noLOG): 

48 """ 

49 Returns content for pages such as 

50 :ref:`l-skl2onnx-linear_model`. 

51 """ 

52 logger = getLogger('skl2onnx') 

53 logger.disabled = True 

54 

55 rst_templ = visual_rst_template() 

56 templ = Template(rst_templ) 

57 done = set() 

58 subsets = [_['name'] for _ in sklearn_operators(sub)] 

59 subsets.sort() 

60 for row in enumerate_validated_operator_opsets( 

61 verbose=0, debug=None, fLOG=fLOG, 

62 opset_min=get_opset_number_from_onnx(), 

63 opset_max=get_opset_number_from_onnx(), 

64 store_models=True, models=subsets): 

65 

66 if 'ONNX' not in row: 

67 continue 

68 name = row['name'] 

69 scenario = row['scenario'] 

70 problem = row['problem'] 

71 model = row['MODEL'] 

72 method = row['method_name'] 

73 optim = row.get('optim', '') 

74 opset = _make_opset(row) 

75 stats_skl = inspect_sklearn_model(model) 

76 stats_onx = onnx_statistics(row['ONNX']) 

77 stats_model = analyze_model(model) 

78 stats = {'skl_' + k: v for k, v in stats_skl.items()} 

79 stats.update({'onx_' + k: v for k, v in stats_onx.items()}) 

80 stats.update({'fit_' + k: v for k, v in stats_model.items()}) 

81 

82 df = DataFrame([stats]) 

83 table = df2rst(df.T.reset_index(drop=False)) 

84 

85 clean_optim = _clean_values_optim(optim) 

86 title = " - ".join([name, problem, scenario, clean_optim]) 

87 if title in done: 

88 continue # pragma: no cover 

89 done.add(title) 

90 link = "-".join([name, problem, scenario, clean_optim, opset]) 

91 link = link.replace(" ", "").replace( 

92 "{", "").replace("}", "").replace("'", "") 

93 

94 optim_param = ("Model was converted with additional parameter: ``{}``.".format(optim) 

95 if optim else "") 

96 

97 oinf = OnnxInference(row['ONNX'], skip_run=True) 

98 dot = oinf.to_dot(recursive=True) 

99 try: 

100 res = templ.render(dot=dot, model=repr(model), method=method, 

101 kind=problem, title=title, 

102 indent=indent, len=len, 

103 link=link, table=table, 

104 optim_param=optim_param) 

105 except KeyError as e: # pragma: no cover 

106 rows = [ 

107 '', str(e), '', 

108 "title='{}'".format(title), 

109 "method='{}'".format(method), 

110 "problem='{}'".format(problem), 

111 model.__class__.__name__, "", "---------", 

112 rst_templ] 

113 res = ".. index:: docissue:\n\n::\n\n" + \ 

114 indent("\n".join(rows), " ") 

115 yield res 

116 

117 

118def compose_page_onnxrt_ops(level="^"): 

119 """ 

120 Writes page :ref:`l-onnx-runtime-operators`. 

121 

122 @param level title level 

123 """ 

124 begin = dedent(""" 

125 .. _l-onnx-runtime-operators: 

126 

127 Python Runtime for ONNX operators 

128 ================================= 

129 

130 The main function instantiates a runtime class which 

131 computes the outputs of a specific node. 

132 

133 .. autosignature:: mlprodict.onnxrt.ops.load_op 

134 

135 Other sections documents available operators. 

136 This project was mostly started to show a way to 

137 implement a custom runtime, do some benchmarks, 

138 test, exepriment... 

139 

140 .. contents:: 

141 :local: 

142 

143 Python 

144 ++++++ 

145 

146 """) 

147 from ..ops_cpu._op_list import _op_list 

148 

149 names = [] 

150 for op in _op_list: 

151 names.append((op.__name__, op)) 

152 names.sort() 

153 

154 rows = [begin] 

155 for name, op in names: 

156 rows.append("") 

157 rows.append(".. _lpyort-{}:".format(name)) 

158 rows.append("") 

159 rows.append(name) 

160 rows.append(level * len(name)) 

161 rows.append("") 

162 mod = op.__module__.split('.')[-1] 

163 rows.append( 

164 ".. autosignature:: mlprodict.onnxrt.ops_cpu.{}.{}".format(mod, name)) 

165 rows.append('') 

166 return "\n".join(rows) 

167 

168 

169def split_columns_subsets(df): 

170 """ 

171 Functions used in the documentation to split 

172 a dataframe by columns into multiple dataframe to 

173 reduce the scrolling. 

174 """ 

175 common = [c for c in ['name', 'problem', 

176 'scenario', 'optim'] if c in df.columns] 

177 subsets = [] 

178 subsets.append( 

179 [c for c in df.columns if 'opset' in c or 'onx_nnodes' == c]) 

180 subsets.append([c for c in df.columns if 'ERROR' in c or 'opset' in c]) 

181 subsets.append([c for c in df.columns if c.startswith( 

182 'skl_') or c.startswith('onx_') or 'opset' in c]) 

183 subsets.append([c for c in df.columns if 'N=' in c or 'opset' in c]) 

184 subsets = [s for s in subsets if len(s) > 0] 

185 return common, subsets 

186 

187 

188def build_key_split(key, index): 

189 """ 

190 Used for documentation. 

191 """ 

192 try: 

193 new_key = str(key).split('`')[1].split('<')[0].strip() 

194 except IndexError: 

195 new_key = str(key) 

196 if 'SVC' in new_key or 'SVR' in new_key or 'SVM' in new_key: 

197 return 'SVM' # pragma: no cover 

198 if 'Neighbors' in new_key: 

199 return 'Neighbors' # pragma: no cover 

200 if 'Scaler' in new_key: 

201 return 'Scaler' # pragma: no cover 

202 if 'Normalizer' in new_key: 

203 return 'Scaler' # pragma: no cover 

204 if new_key.endswith("NB"): 

205 return "...NB" # pragma: no cover 

206 if new_key.endswith("RBM"): 

207 return "...NB" # pragma: no cover 

208 if "KMeans" in new_key: 

209 return "KMeans" # pragma: no cover 

210 if ('XGB' in new_key or 'LGBM' in new_key or 'Tree' in new_key or 

211 'Forest' in new_key): 

212 return 'Trees' # pragma: no cover 

213 if ('ARDRegression' in new_key or 'ElasticNet' in new_key or 

214 'HuberRegressor' in new_key or 'Lars' in new_key or 

215 'Lasso' in new_key or 'LinearRegression' in new_key or 

216 'LogisticRegression' in new_key or 

217 'Ridge' in new_key or 'SGD' in new_key or 

218 'TheilSen' in new_key): 

219 return 'Linear' # pragma: no cover 

220 for begin in ["Lasso", "Select", "Label", 'Tfidf', 'Feature', 

221 'Bernoulli', 'MultiTask', 'OneVs', 'PLS', 

222 'Sparse', 'Spectral', 'MiniBatch', 

223 'Bayesian']: 

224 if new_key.startswith(begin): 

225 return begin + '...' 

226 for end in ['CV', 'Regressor', 'Classifier']: # pragma: no cover 

227 if new_key.endswith(end): 

228 new_key = new_key[:-len(end)] 

229 return new_key # pragma: no cover 

230 

231 

232def filter_rows(df): 

233 """ 

234 Used for documentation. 

235 """ 

236 for c in ['ERROR-msg', 'RT/SKL-N=1']: 

237 if c in df.columns: 

238 return df[df[c].apply(lambda x: notnull(x) and x not in (None, '', 'nan'))] 

239 return df