Questions or feedback?

Source code for opendp.extras.sklearn.linear_model

"""
This module requires extra installs: ``pip install 'opendp[scikit-learn]'``

For convenience, all the members of this module are also available from :py:mod:`opendp.prelude`.
We suggest importing under the conventional name ``dp``:

.. code:: pycon

    >>> import opendp.prelude as dp

The members of this module will then be accessible at ``dp.sklearn.linear_model``.

If you're interested in the underlying algorithm, we've also
`implemented Theil-Sen Regression as a demonstration of OpenDP plugins <../user-guide/plugins/theil-sen-regression.html>`_.
"""

from typing import Iterable
from opendp.extras.sklearn.linear_model._make_private_theil_sen import (
    make_private_theil_sen as _make_private_theil_sen,
)  # noqa: F401
from opendp._lib import import_optional_dependency
from opendp.mod import Measure

__all__ = ["LinearRegression"]


[docs] class LinearRegression: """ DP Linear Regression The interface is parallel to that offered by sklearn's `LinearRegression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html>`_. The ``fit`` method returns an sklearn ``LinearRegression`` object. :param x_bounds: Bounds for training data; For the moment, only lists containing a single tuple are supported :param y_bounds: Bounds for target data :param scale: The scale of the noise to be added :param runs: Controls how many times randomized pairwise predictions are computed. Increasing this value can improve the robustness and accuracy of the results; However, it can also increase computational cost and amount of noise needed later in the algorithm. :param candidates_count: How many evenly spaced candidates to generate :param fraction_bounds: predict y values at these cut percentiles of x_bounds. """ def __init__( self, output_measure: Measure, x_bounds: Iterable[tuple[float, float]], y_bounds: tuple[float, float], scale: float, runs: int = 1, candidates_count: int = 100, fraction_bounds: tuple[float, float] = (0.25, 0.75), ): x_bounds = list(x_bounds) # Shouldn't be so large that this is a problem if len(x_bounds) != 1: msg = f"For now, the x_bounds array must consist of a single tuple, not {x_bounds}" raise Exception(msg) self.measurement = _make_private_theil_sen( output_measure=output_measure, x_bounds=x_bounds[0], y_bounds=y_bounds, scale=scale, runs=runs, candidates_count=candidates_count, fraction_bounds=fraction_bounds, )
[docs] def fit( self, X, y, ): """ Fit DP linear model. :param X: Training data. Array-like of shape (n_samples, 1) :param y: Target values. Array-like of shape (n_samples,) :return: A fitted sklearn ``LinearRegression`` :example: >>> import opendp.prelude as dp >>> try: ... import sklearn ... except ModuleNotFoundError: ... import pytest ... pytest.skip('Requires extra install') >>> dp.enable_features("floating-point") >>> lin_reg = dp.sklearn.linear_model.LinearRegression( ... dp.max_divergence(), ... x_bounds=[(0, 10)], ... y_bounds=(0, 10), ... scale=1, ... ).fit( ... X=[[1], [2], [3], [4], [5]], ... y=[1, 2, 3, 4, 5], ... ) >>> lin_reg.predict([[10]]) array([...]) """ np = import_optional_dependency("numpy") from sklearn.linear_model import LinearRegression as LR X = np.array(X) slope, intercept = self.measurement(np.stack([X[:, 0], y], axis=1)) fit_regression = LR() fit_regression.coef_ = np.array([slope]) fit_regression.intercept_ = intercept return fit_regression
[docs] def predict(X): """ The ``fit()`` method returns a new sklearn object, so this method is never actually used. The sklearn documentation of the method with the same name is copied here only for reference. > Predict using the linear model. > > ### Parameters > *X : array-like or sparse matrix, shape (n_samples, n_features)* > > Samples. > > ### Returns > *C : array, shape (n_samples,)* > > Returns predicted values. .. end-markdown :raises NotImplementedError: This method is included only for documention. """ raise NotImplementedError("Included only for documentation") # pragma: no cover