Source code for mlprodict.testing.model_verification

"""
Complex but recurring testing functions.


:githublink:`%|py|5`
"""
import random
import pandas
import numpy
from numpy.testing import assert_allclose
from ..grammar_sklearn import sklearn2graph
from ..grammar_sklearn.cc import compile_c_function


[docs]def iris_data(): """ Returns ``(X, y)`` for iris data. :githublink:`%|py|16` """ from sklearn.datasets import load_iris iris = load_iris() X = iris.data[:, :2] state = numpy.random.RandomState(seed=34) # pylint: disable=E1101 rnd = state.randn(*X.shape) / 3 X += rnd y = iris.target return X, y
[docs]def check_is_almost_equal(xv, exp, precision=1e-5, message=None): """ Checks that two floats or two arrays are almost equal. :param xv: float or vector :param exp: expected value :param precision: precision :param message: additional message :githublink:`%|py|35` """ if isinstance(exp, float) or len(exp.ravel()) == 1: if not (isinstance(xv, float) or len(xv.ravel()) == 1): raise TypeError( # pragma: no cover "Type mismatch between {0} and {1} (expected).".format( type(xv), type(exp))) diff = abs(xv - exp) if diff > 1e-5: raise ValueError( # pragma: no cover "Predictions are different expected={0}, computed={1}".format( exp, xv)) else: if not isinstance(xv, numpy.ndarray): raise TypeError( "Type mismatch between {0} and {1} (expected).".format(type(xv), type(exp))) xv = xv.ravel() exp = exp.ravel() try: assert_allclose(xv, exp, atol=precision) except AssertionError as e: if message is None: raise e else: raise AssertionError(message) from e # pragma: no cover
[docs]def check_model_representation(model, X, y=None, convs=None, output_names=None, only_float=True, verbose=False, suffix="", fLOG=None): """ Checks that a trained model can be exported in a specific list of formats and produces the same outputs if the representation can be used to predict. :param model: model (a class or an instance of a model but not trained) :param X: features :param y: targets :param convs: list of format to check, all possible by default ``['json', 'c']`` :param output_names: list of output columns (can be None, a default value is infered based on scikit-learn output then) :param verbose: print some information :param suffix: add this to disambiguate module :param fLOG: logging function :return: function to call to run the prediction :githublink:`%|py|79` """ if not only_float: raise NotImplementedError( # pragma: no cover "Only float are allowed.") if isinstance(X, list): X = pandas.DataFrame(X) if len(X.shape) != 2: raise ValueError( # pragma: no cover "X cannot be converted into a proper DataFrame. It has shape {0}." "".format(X.shape)) if only_float: X = X.values if isinstance(y, list): y = numpy.array(y) if convs is None: convs = ['json', 'c'] # sklearn if not hasattr(model.__class__, "fit"): # It is a class object and not an instance. We use the default values. model = model() model.fit(X, y) h = random.randint(0, X.shape[0] - 1) if isinstance(X, pandas.DataFrame): oneX = X.iloc[h, :].astype(numpy.float32) else: oneX = X[h, :].ravel().astype(numpy.float32) # model or transform moneX = numpy.resize(oneX, (1, len(oneX))) if hasattr(model, "predict"): ske = model.predict(moneX) else: ske = model.transform(moneX) if verbose and fLOG: fLOG("---------------------") fLOG(type(oneX), oneX.dtype) fLOG(model) for k, v in sorted(model.__dict__.items()): if k[-1] == '_': fLOG(" {0}={1}".format(k, v)) fLOG("---------------------") # grammar gr = sklearn2graph(model, output_names=output_names) lot = gr.execute(Features=oneX) if verbose and fLOG: fLOG(gr.graph_execution()) # verification check_is_almost_equal(lot, ske) # default for output_names if output_names is None: if len(ske.shape) == 1: output_names = ["Prediction"] elif len(ske.shape) == 2: output_names = ["p%d" % i for i in range(ske.shape[1])] else: raise ValueError( # pragma: no cover "Cannot guess default values for output_names.") for lang in convs: if lang in ('c', ): code_c = gr.export(lang=lang)['code'] if code_c is None: raise ValueError("cannot be None") # pragma: no cover compile_fct = compile_c_function from contextlib import redirect_stdout, redirect_stderr from io import StringIO fout = StringIO() ferr = StringIO() with redirect_stdout(fout): with redirect_stderr(ferr): try: fct = compile_fct( code_c, len(output_names), suffix=suffix, fLOG=lambda s: fout.write(s + "\n")) except Exception as e: # pragma: no cover raise RuntimeError( "Unable to compile a code\n-OUT-\n{0}\n-ERR-\n{1}\n-CODE-" "\n{2}".format(fout.getvalue(), ferr.getvalue(), code_c)) from e if verbose and fLOG: fLOG("-----------------") fLOG(output_names) fLOG("-----------------") fLOG(code_c) fLOG("-----------------") fLOG("h=", h, "oneX=", oneX) fLOG("-----------------") lotc = fct(oneX) check_is_almost_equal( lotc, ske, message="Issue with lang='{0}'".format(lang)) lotc_exp = lotc.copy() lotc2 = fct(oneX, lotc) if not numpy.array_equal(lotc_exp, lotc2): raise ValueError( # pragma: no cover "Second call returns different results.\n{0}\n{1}".format( lotc_exp, lotc2)) else: ser = gr.export(lang="json", hook={'array': lambda v: v.tolist()}) if ser is None: raise ValueError( # pragma: no cover "No output for long='{0}'".format(lang))