Coverage for mlprodict/onnxrt/validate/validate

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Summarizes results produces by function in *validate.py*.

4"""

5import decimal

6import json

7import numpy

8import pandas

9from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version

10from ... import __version__ as ort_version

13def _clean_values_optim(val):

14 if not isinstance(val, str):

15 return val

16 if '/' in val:

17 spl = val.split('/')

18 return "/".join(_clean_values_optim(v) for v in spl)

19 if "'>=" in val:

20 val = val.split("'>=")

21 if len(val) == 2:

22 val = val[-1]

23 rep = {

24 "{'optim': 'cdist'}": "cdist"

25 }

26 for k, v in rep.items():

27 val = val.replace(k, v)

28 return val

31def _summary_report_indices(df, add_cols=None, add_index=None):

32 if 'opset' not in df.columns:

33 raise RuntimeError( # pragma: no cover

34 "Unable to create summary (opset missing)\n{}\n--\n{}".format(

35 df.columns, df.head()))

37 col_values = ["available"]

38 for col in ['problem', 'scenario', 'opset', 'optim']:

39 if col not in df.columns:

40 df[col] = '' if col != 'opset' else numpy.nan

41 indices = ["name", "problem", "scenario", 'optim', 'method_name',

42 'output_index', 'conv_options', 'inst']

43 indices = [i for i in indices if i in df.columns]

44 df["optim"] = df["optim"].fillna('')

45 for c in ['n_features', 'runtime']:

46 if c in df.columns:

47 indices.append(c)

48 if c == 'runtime':

49 df[c].fillna('-', inplace=True)

50 for c in df.columns:

51 if c.startswith('opset') or c in {'available'}:

52 df[c].fillna('?', inplace=True)

54 # Adds information about the models in the index

55 indices2 = []

56 for c in df.columns:

57 if (isinstance(c, str) and len(c) >= 5 and (

58 c.startswith("onx_") or c.startswith("skl_"))):

59 if c in {'onx_domain', 'onx_doc_string', 'onx_ir_version',

60 'onx_model_version'}:

61 continue

62 if df[c].dtype in (numpy.float32, numpy.float64, float,

63 int, numpy.int32, numpy.int64):

64 defval = -1

65 else:

66 defval = ''

67 df[c].fillna(defval, inplace=True)

68 if c.startswith('skl_'):

69 indices.append(c)

70 else:

71 indices2.append(c)

73 columns = ['opset']

74 indices = indices + indices2

75 if add_index is not None:

76 for i in add_index: # pragma: no cover

77 if i not in indices:

78 indices.append(i)

79 return columns, indices, col_values

82class _MyEncoder(json.JSONEncoder):

83 def default(self, o): # pylint: disable=E0202

84 if hasattr(o, 'get_params'):

85 obj = dict(clsname=o.__class__.__name__)

86 obj.update(o.get_params())

87 return json.dumps(obj, sort_keys=True)

88 return json.dumps(o, sort_keys=True) # pragma: no cover

91def _jsonify(x):

93 def _l(k):

94 if isinstance(k, type):

95 return k.__name__

96 return k

98 if isinstance(x, dict):

99 x = {str(_l(k)): v for k, v in x.items()}

100 try:

101 return json.dumps(x, sort_keys=True, cls=_MyEncoder)

102 except TypeError: # pragma: no cover

103 # Cannot sort.

104 return json.dumps(x, cls=_MyEncoder)

105 try:

106 if numpy.isnan(x):

107 x = ''

108 except (ValueError, TypeError):

109 pass

110 try:

111 return json.dumps(x, cls=_MyEncoder)

112 except TypeError: # pragma: no cover

113 # Cannot sort.

114 return json.dumps(x, cls=_MyEncoder)

115

116

117def summary_report(df, add_cols=None, add_index=None):

118 """

119 Finalizes the results computed by function

120 @see fn enumerate_validated_operator_opsets.

121

122 @param df dataframe

123 @param add_cols additional columns to take into account

124 as values

125 @param add_index additional columns to take into accound

126 as index

127 @return pivoted dataframe

128

129 The outcome can be seen at page about :ref:`l-onnx-pyrun`.

130 """

131 df = df.copy()

132 if 'inst' in df.columns:

133 df['inst'] = df['inst'].apply(_jsonify)

134 if 'conv_options' in df.columns:

135 df['conv_options'] = df['conv_options'].apply(_jsonify)

136 num_types = (int, float, decimal.Decimal, numpy.number)

137

138 def aggfunc(values):

139 if len(values) != 1:

140 if all(map(lambda x: isinstance(x, num_types),

141 values)):

142 mi, ma = min(values), max(values)

143 if numpy.isnan(mi) and numpy.isnan(ma):

144 return ""

145 if mi == ma:

146 return mi

147 return '[{},{}]'.format(mi, ma)

148 values = [str(_).replace("\n", " ").replace('\r', '').strip(" ")

149 for _ in values]

150 values = [_ for _ in values if _]

151 vals = set(values)

152 if len(vals) != 1:

153 return " // ".join(map(str, values))

154 val = values.iloc[0] if not isinstance(values, list) else values[0]

155 if isinstance(val, float) and numpy.isnan(val):

156 return ""

157 return str(val)

158

159 columns, indices, col_values = _summary_report_indices(

160 df, add_cols=add_cols, add_index=add_index)

161 try:

162 piv = pandas.pivot_table(df, values=col_values,

163 index=indices, columns=columns,

164 aggfunc=aggfunc).reset_index(drop=False)

165 except (KeyError, TypeError) as e: # pragma: no cover

166 raise RuntimeError(

167 "Issue with keys={}, values={}\namong {}.".format(

168 indices, col_values, df.columns)) from e

169

170 cols = list(piv.columns)

171 opsets = [c[1] for c in cols if isinstance(c[1], (int, float))]

172

173 versions = ["opset%d" % i for i in opsets]

174 last = piv.columns[-1]

175 if isinstance(last, tuple) and last == ('available', '?'):

176 versions.append('FAIL')

177 nbvalid = len(indices + versions)

178 if len(piv.columns) != nbvalid:

179 raise RuntimeError( # pragma: no cover

180 "Mismatch between {} != {}\n{}\n{}\n---\n{}\n{}\n{}".format(

181 len(piv.columns), len(indices + versions),

182 piv.columns, indices + versions,

183 df.columns, indices, col_values))

184 piv.columns = indices + versions

185 piv = piv[indices + list(reversed(versions))].copy()

186 for c in versions:

187 piv[c].fillna('-', inplace=True)

188

189 if "available-ERROR" in df.columns:

190

191 from skl2onnx.common.exceptions import MissingShapeCalculator

192

193 def replace_msg(text):

194 if isinstance(text, MissingShapeCalculator):

195 return "NO CONVERTER" # pragma: no cover

196 if str(text).startswith("Unable to find a shape calculator for type '"):

197 return "NO CONVERTER"

198 if str(text).startswith("Unable to find problem for model '"):

199 return "NO PROBLEM" # pragma: no cover

200 if "not implemented for float64" in str(text):

201 return "NO RUNTIME 64" # pragma: no cover

202 return str(text)

203

204 piv2 = pandas.pivot_table(

205 df, values="available-ERROR", index=indices,

206 columns='opset', aggfunc=aggfunc).reset_index(drop=False)

207

208 col = piv2.iloc[:, piv2.shape[1] - 1]

209 piv["ERROR-msg"] = col.apply(replace_msg)

210

211 if any('time-ratio-' in c for c in df.columns):

212 cols = [c for c in df.columns if c.startswith('time-ratio')]

213 cols.sort()

214

215 df_sub = df[indices + cols]

216 piv2 = df_sub.groupby(indices).mean()

217 piv = piv.merge(piv2, on=indices, how='left')

218

219 def rep(c):

220 if 'N=1' in c and 'N=10' not in c:

221 return c.replace("time-ratio-", "RT/SKL-")

222 else:

223 return c.replace("time-ratio-", "")

224 cols = [rep(c) for c in piv.columns]

225 piv.columns = cols

226

227 # min, max

228 mins = [c for c in piv.columns if c.endswith('-min')]

229 maxs = [c for c in piv.columns if c.endswith('-max')]

230 combined = []

231 for mi, ma in zip(mins, maxs):

232 combined.append(mi)

233 combined.append(ma)

234 first = [c for c in piv.columns if c not in combined]

235 piv = piv[first + combined]

236

237 def clean_values(value):

238 if not isinstance(value, str):

239 return value # pragma: no cover

240 if "ERROR->=1000000" in value:

241 value = "big-diff"

242 elif "ERROR" in value:

243 value = value.replace("ERROR-_", "")

244 value = value.replace("_exc", "")

245 value = "ERR: " + value

246 elif "OK-" in value:

247 value = value.replace("OK-", "OK ")

248 elif "e<" in value:

249 value = value.replace("-", " ")

250 return value

251

252 for c in piv.columns:

253 if "opset" in c:

254 piv[c] = piv[c].apply(clean_values)

255 if 'optim' in c:

256 piv[c] = piv[c].apply(_clean_values_optim)

257

258 # adding versions

259 def keep_values(x):

260 if isinstance(x, float) and numpy.isnan(x):

261 return False # pragma: no cover

262 return True

263

264 col_versions = [c for c in df.columns if c.startswith("v_")]

265 if len(col_versions) > 0:

266 for c in col_versions:

267 vals = set(filter(keep_values, df[c]))

268 if len(vals) != 1:

269 raise RuntimeError( # pragma: no cover

270 "Columns '{}' has multiple values {}.".format(c, vals))

271 piv[c] = list(vals)[0]

272

273 return piv

274

275

276def merge_benchmark(dfs, column='runtime', baseline=None, suffix='-base'):

277 """

278 Merges several benchmarks run with command line

279 :ref:`validate_runtime <l-cmd-validate_runtime>`.

280

281 @param dfs dictionary *{'prefix': dataframe}*

282 @param column every value from this column is prefixed

283 by the given key in *dfs*

284 @param baseline add baseline

285 @param suffix suffix to add when comparing to the baseline

286 @return merged dataframe

287 """

288 def add_prefix(prefix, v):

289 if isinstance(v, str):

290 return prefix + v

291 return v # pragma: no cover

292

293 conc = []

294 for k, df in dfs.items():

295 if column not in df.columns:

296 raise ValueError(

297 "Unable to find column '{}' in {} (key='{}')".format(

298 column, df.columns, k))

299 df = df.copy()

300 df[column] = df[column].apply(lambda x: add_prefix(k, x))

301 if 'inst' in df.columns:

302 df['inst'] = df['inst'].fillna('')

303 else:

304 df['inst'] = ''

305 conc.append(df)

306 merged = pandas.concat(conc).reset_index(drop=True)

307 if baseline is not None:

308 def get_key(index):

309 k = []

310 for v in index:

311 try:

312 if numpy.isnan(v):

313 continue # pragma: no cover

314 except (ValueError, TypeError):

315 pass

316 k.append(v)

317 return tuple(k)

318

319 columns, indices, _ = _summary_report_indices(merged)

320 indices = list(_ for _ in (indices + columns) if _ != 'runtime')

321 try:

322 bdata = merged[merged.runtime == baseline].drop(

323 'runtime', axis=1).set_index(indices, verify_integrity=True)

324 except ValueError as e:

325 bdata2 = merged[indices + ['runtime']].copy()

326 bdata2['count'] = 1

327 n_rows = bdata2['count'].sum()

328 gr = bdata2.groupby(indices + ['runtime'], as_index=False).sum(

329 ).sort_values('count', ascending=False)

330 n_rows2 = gr['count'].sum()

331 one = gr.head()[:1]

332 rows = merged.merge(one, on=indices + ['runtime'])[:2]

333 for c in ['init-types', 'bench-skl', 'bench-batch', 'init_types', 'cl']:

334 if c in rows.columns:

335 rows = rows.drop(c, axis=1)

336 srows = rows.T.to_string(min_rows=100)

337 raise ValueError(

338 "(n_rows={}, n_rows2={}) Unable to group by {}.\n{}\n-------\n{}".format(

339 n_rows, n_rows2, indices, gr.T, srows)) from e

340 if bdata.shape[0] == 0:

341 raise RuntimeError( # pragma: no cover

342 "No result for baseline '{}'.".format(baseline))

343 ratios = [c for c in merged.columns if c.startswith('time-ratio-')]

344 indexed = {}

345 for index in bdata.index:

346 row = bdata.loc[index, :]

347 key = get_key(index)

348 indexed[key] = row[ratios]

349

350 for i in range(merged.shape[0]):

351 key = get_key(tuple(merged.loc[i, indices]))

352 if key not in indexed:

353 continue # pragma: no cover

354 value = indexed[key]

355 for r in ratios:

356 if r.endswith('-min') or r.endswith('-max'):

357 continue

358 value2 = merged.loc[i, r]

359 new_r = value2 / value[r]

360 new_col = r + suffix

361 if new_col not in merged.columns:

362 merged[new_col] = numpy.nan

363 merged.loc[i, new_col] = new_r

364

365 return merged

Coverage for mlprodict/onnxrt/validate/validate_summary.py : 95%

238 statements

Coverage for mlprodict/onnxrt/validate/validate_summary.py : 95%

238 statements 225 run 13 missing 22 excluded

238 statements