Converts a logistic regression into C#

The logistic regression is trained in python and executed in C.

Train a linear regression#

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
iris = load_iris()
X =[:, :2]
y =
y[y == 2] = 1
lr = LogisticRegression(), y)

Export into C#

# grammar is the expected scoring model.
from mlprodict.grammar_sklearn import sklearn2graph
gr = sklearn2graph(lr, output_names=['Prediction', 'Score'])
We can even check what the function should produce as a score. Types are strict.

import numpy
X = numpy.array([[numpy.float32(1), numpy.float32(2)]])
e2 = gr.execute(Features=X[0, :])
[  0.       -11.264062]

We compare with scikit-learn.

lr.decision_function(X[0:1, :])

Conversion into C:

res = gr.export(lang='c', hook={'array': lambda v: v.tolist(), 'float32': lambda v: float(v)})
int LogisticRegression (float* pred, float* Features)
    // 2290909222952-LogisticRegression - children
    // 2290909222728-concat - children
    // 2290909222672-sign - children
    // 2290909222616-+ - children
    // 2290909222560-adot - children
    float pred0c0c00c0[2] = {(float)3.3882975578308105, (float)-3.164527654647827};
    float* pred0c0c00c1 = Features;
    // 2290909222560-adot - itself
    float pred0c0c00;
    adot_float_float(&pred0c0c00, pred0c0c00c0, pred0c0c00c1, 2);
    // 2290909222560-adot - done
    float pred0c0c01 = (float)-8.323304176330566;
    // 2290909222616-+ - itself
    float pred0c0c0 = pred0c0c00 + pred0c0c01;
    // 2290909222616-+ - done
    // 2290909222672-sign - itself
    float pred0c0;
    sign_float(&pred0c0, pred0c0c0);
    // 2290909222672-sign - done
    // 2290909222728-concat - itself
    float pred0[2];
    concat_float_float(pred0, pred0c0, pred0c0c0);
    // 2290909222728-concat - done
    memcpy(pred, pred0, 2*sizeof(float));
    // 2290909222952-LogisticRegression - itself
    return 0;
    // 2290909222952-LogisticRegression - done

We execute the code with module cffi.

from import compile_c_function
fct = compile_c_function(res["code"], 2)
e2 = fct(X[0, :])
array([  0.      , -11.264062], dtype=float32)

Time comparison#

%timeit lr.decision_function(X[0:1, :])
64.9 µs ± 5.84 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit fct(X[0, :])
6.17 µs ± 380 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

There is a significant speedup on this example. It could be even faster by removing some Python part and optimizing the code produced by cffi. We can also save the creation of the array which contains the output by reusing an existing one.

out = fct(X[0, :])
%timeit fct(X[0, :], out)
6.33 µs ± 430 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)