Source code for mlprodict.onnx_conv.onnx_ops.onnx_tokenizer

"""
Custom operator Tokenizer.


:githublink:`%|py|5`
"""
from skl2onnx.algebra.onnx_operator import OnnxOperator


[docs]class OnnxTokenizer_1(OnnxOperator): """ Defines a custom operator not defined by ONNX specifications but in onnxruntime. :githublink:`%|py|12` """ since_version = 1 expected_inputs = ['text'] expected_outputs = ['tokens'] input_range = [1, 1] output_range = [1, 1] is_deprecated = False domain = 'mlprodict' operator_name = 'Tokenizer' past_version = {}
[docs] def __init__(self, text, mark=0, mincharnum=1, pad_value='#', separators=None, tokenexp='[a-zA-Z0-9_]+', stopwords=None, op_version=None, **kwargs): """ :param text: array or OnnxOperatorMixin :param mark: see :epkg:`Tokenizer` :param pad_value: see :epkg:`Tokenizer` :param separators: see :epkg:`Tokenizer` :param tokenexp: see :epkg:`Tokenizer` :param stopwords: list of stopwords, addition to :epkg:`Tokenizer` :param op_version: opset version :param kwargs: additional parameter :githublink:`%|py|37` """ if separators is None: separators = [] if stopwords is None: stopwords = [] OnnxOperator.__init__( self, text, mark=mark, mincharnum=mincharnum, pad_value=pad_value, separators=separators, tokenexp=tokenexp, stopwords=stopwords, op_version=op_version, **kwargs)
OnnxTokenizer = OnnxTokenizer_1