Source code for coremltools.converters.sklearn._converter

# Copyright (c) 2017, Apple Inc. All rights reserved.
#
# Use of this source code is governed by a BSD-3-clause license that can be
# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

from coremltools import __version__ as ct_version
from coremltools.models import _METADATA_SOURCE, _METADATA_VERSION

"""
Defines the primary function for converting scikit-learn models.
"""


[docs]def convert(sk_obj, input_features=None, output_feature_names=None): """ Convert scikit-learn pipeline, classifier, or regressor to Core ML format. Parameters ---------- sk_obj: model | [model] of scikit-learn format. Scikit learn model(s) to convert to a Core ML format. The input model may be a single scikit learn model, a scikit learn pipeline model, or a list of scikit learn models. Currently supported scikit learn models are: - Linear and Logistic Regression - LinearSVC and LinearSVR - Ridge Regression - SVC and SVR - NuSVC and NuSVR - Gradient Boosting Classifier and Regressor - Decision Tree Classifier and Regressor - Random Forest Classifier and Regressor - Normalizer - Imputer - Standard Scaler - DictVectorizer - One Hot Encoder - KNeighborsClassifier The input model, or the last model in a pipeline or list of models, determines whether this is exposed as a Transformer, Regressor, or Classifier. Note that there may not be a one-to-one correspondence between scikit learn models and the Core ML models chosen to represent them. For example, many scikit learn models are embedded in a pipeline to handle processing of input features. input_features: str | dict | list Optional name(s) that can be given to the inputs of the scikit-learn model. Defaults to ``"input"``. Input features can be specified in a number of forms. - Single string: In this case, the input is assumed to be a single array, with the number of dimensions set using ``num_dimensions``. - List of strings: In this case, the overall input dimensions to the scikit-learn model are assumed to be the length of the list. If neighboring names are identical, they are assumed to be an input array of that length. For example: ``["a", "b", "c"]`` resolves to: ``[("a", Double), ("b", Double), ("c", Double)]``. In addition: ``["a", "a", "b"]`` resolves to: ``[("a", Array(2)), ("b", Double)]``. - Dictionary: Where the keys are the names and the indices or ranges of feature indices. In this case, the Dictionary is presented as a mapping from keys to indices or ranges of contiguous indices. For example: ``{"a" : 0, "b" : [2,3], "c" : 1}`` resolves to: ``[("a", Double), ("c", Double), ("b", Array(2))]``. Note that the ordering is determined by the indices. - List of tuples of the form ``(name, datatype)``, in which ``name`` is the name of the exposed feature, and ``datatype`` is an instance of ``String``, ``Double``, ``Int64``, ``Array``, or ``Dictionary``. output_feature_names: string or list of strings Optional name(s) that can be given to the inputs of the scikit-learn model. The ``output_feature_names`` is interpreted according to the model type: - If the scikit-learn model is a transformer, it is the name of the array feature output by the final sequence of the transformer (defaults to ``"output"``). - If it is a classifier, it should be a 2-tuple of names giving the top class prediction and the array of scores for each class (defaults to ``"classLabel"`` and ``"classScores"``). - If it is a regressor, it should give the name of the prediction value (defaults to ``"prediction"``). Returns ------- model:MLModel Returns an MLModel instance representing a Core ML model. Examples -------- .. sourcecode:: python >>> from sklearn.linear_model import LinearRegression >>> import pandas as pd # Load data >>> data = pd.read_csv('houses.csv') # Train a model >>> model = LinearRegression() >>> model.fit(data[["bedroom", "bath", "size"]], data["price"]) # Convert and save the scikit-learn model >>> import coremltools >>> coreml_model = coremltools.converters.sklearn.convert(model, ["bedroom", "bath", "size"], "price") >>> coreml_model.save('HousePricer.mlmodel') """ # This function is just a thin wrapper around the internal converter so # that sklearn isn't actually imported unless this function is called from ...models import MLModel # NOTE: Providing user-defined class labels will be enabled when # several issues with the ordering of the classes are worked out. For now, # to use custom class labels, directly import the internal function below. from ._converter_internal import _convert_sklearn_model spec = _convert_sklearn_model( sk_obj, input_features, output_feature_names, class_labels=None ) model = MLModel(spec) from sklearn import __version__ as sklearn_version model.user_defined_metadata[_METADATA_VERSION] = ct_version model.user_defined_metadata[_METADATA_SOURCE] = "scikit-learn=={0}".format( sklearn_version ) return model