Source code for mlinsights.search_rank.search_engine_predictions
"""
Implements a way to get close examples based
on the output of a machine learned model.
:githublink:`%|py|6`
"""
from ..mlmodel import model_featurizer
from ..helpers.parameters import format_function_call
from .search_engine_vectors import SearchEngineVectors
[docs]class SearchEnginePredictions(SearchEngineVectors):
"""
Extends class :class:`SearchEngineVectors <mlinsights.search_rank.search_engine_vectors.SearchEngineVectors>` by
looking for neighbors to a vector *X* by
looking neighbors to *f(X)* and not *X*.
*f* can be any function which converts a vector
into another one or a machine learned model.
In that case, *f* will be set to a default behavior.
See function :func:`model_featurizer <mlinsights.mlmodel.ml_featurizer.model_featurizer>`.
:githublink:`%|py|20`
"""
[docs] def __init__(self, fct, fct_params=None, **knn):
"""
:param fct: function *f* applied before looking for neighbors,
it can also be a machine learned model
:param fct_params: parameters sent to function :func:`model_featurizer <mlinsights.mlmodel.ml_featurizer.model_featurizer>`
:param pknn: list of parameters, see
:epkg:`sklearn:neighborsNearestNeighbors`
:githublink:`%|py|29`
"""
super().__init__(**knn)
self._fct_params = fct_params
self._fct_init = fct
if (callable(fct) and not hasattr(fct, 'predict') and
not hasattr(fct, 'forward')):
self.fct = fct
else:
if fct_params is None:
fct_params = {}
self.fct = model_featurizer(fct, **fct_params)
[docs] def __repr__(self):
"""
usual
:githublink:`%|py|44`
"""
if self.pknn:
pp = self.pknn.copy()
else:
pp = {}
pp['fct'] = self._fct_init
pp['fct_params'] = self._fct_params
return format_function_call(self.__class__.__name__, pp)
[docs] def fit(self, data=None, features=None, metadata=None):
"""
Every vector comes with a list of metadata.
:param data: a :epkg:`dataframe` or None if the
the features and the metadata
are specified with an array and a
dictionary
:param features: features columns or an array
:param metadata: data
:githublink:`%|py|63`
"""
iterate = self._is_iterable(data)
if iterate:
self._prepare_fit(data=data, features=features,
metadata=metadata, transform=self.fct)
else:
self._prepare_fit(data=data, features=features, metadata=metadata)
if isinstance(self.features_, list):
raise TypeError( # pragma: no cover
"features_ cannot be a list when training the model.")
self.features_ = self.fct(self.features_, True)
return self._fit_knn()
[docs] def kneighbors(self, X, n_neighbors=None):
"""
Searches for neighbors close to *X*.
:param X: features
:return: score, ind, meta
*score* is an array representing the lengths to points,
*ind* contains the indices of the nearest points in the population matrix,
*meta* is the metadata.
:githublink:`%|py|86`
"""
xp = self.fct(X, False)
if len(xp.shape) == 1:
xp = xp.reshape((1, len(xp)))
return super().kneighbors(xp, n_neighbors=n_neighbors)