Source code for opendp.mod

import ctypes
from typing import Union, Tuple, Callable, Optional

from opendp._lib import AnyMeasurement, AnyTransformation


[docs]class Measurement(ctypes.POINTER(AnyMeasurement)): """A differentially private unit of computation. A measurement contains a function and a privacy relation. The function releases a differentially-private release. The privacy relation maps from an input metric to an output measure. :example: >>> from opendp.mod import Measurement, enable_features >>> enable_features("contrib") ... >>> # create an instance of Measurement using a constructor from the meas module >>> from opendp.measurements import make_base_discrete_laplace >>> base_dl: Measurement = make_base_discrete_laplace(scale=2.) ... >>> # invoke the measurement (invoke and __call__ are equivalent) >>> base_dl.invoke(100) # -> 101 # doctest: +SKIP >>> base_dl(100) # -> 99 # doctest: +SKIP ... >>> # check the measurement's relation at >>> # (1, 0.5): (AbsoluteDistance<u32>, MaxDivergence) >>> assert base_dl.check(1, 0.5) ... >>> # chain with a transformation from the trans module >>> from opendp.transformations import make_count >>> chained = ( ... make_count(TIA=int) >> ... base_dl ... ) ... >>> # the resulting measurement has the same features >>> chained([1, 2, 3]) # -> 4 # doctest: +SKIP >>> # check the chained measurement's relation at >>> # (1, 0.5): (SymmetricDistance, MaxDivergence) >>> assert chained.check(1, 0.5) """ _type_ = AnyMeasurement def __call__(self, arg): from opendp.core import measurement_invoke return measurement_invoke(self, arg)
[docs] def invoke(self, arg): """Create a differentially-private release with `arg`. If `self` is (d_in, d_out)-close, then each invocation of this function is a d_out-DP release. :param arg: Input to the measurement. :return: differentially-private release :raises OpenDPException: packaged error from the core OpenDP library """ from opendp.core import measurement_invoke return measurement_invoke(self, arg)
[docs] def map(self, d_in): """Map an input distance `d_in` to an output distance.""" from opendp.core import measurement_map return measurement_map(self, d_in)
[docs] def check(self, d_in, d_out, *, debug=False) -> bool: """Check if the measurement is (`d_in`, `d_out`)-close. If true, implies that if the distance between inputs is at most `d_in`, then the privacy usage is at most `d_out`. See also :func:`~Transformation.check`, a similar check for transformations. :param d_in: Distance in terms of the input metric. :param d_out: Distance in terms of the output measure. :param debug: Enable to raise Exceptions to help identify why the privacy relation failed. :return: If True, a release is differentially private at `d_in`, `d_out`. :rtype: bool """ from opendp.core import measurement_check if debug: return measurement_check(self, d_in, d_out) try: return measurement_check(self, d_in, d_out) except OpenDPException as err: if err.variant == "RelationDebug": return False raise
def __rshift__(self, other: "Transformation"): if isinstance(other, Transformation): from opendp.combinators import make_chain_tm return make_chain_tm(other, self) raise ValueError(f"rshift expected a postprocessing transformation, got {other}") @property def input_distance_type(self): """Retrieve the distance type of the input metric. This may be any integral type for dataset metrics, or any numeric type for sensitivity metrics. :return: distance type """ from opendp.core import measurement_input_distance_type from opendp.typing import RuntimeType return RuntimeType.parse(measurement_input_distance_type(self)) @property def output_distance_type(self): """Retrieve the distance type of the output measure. This is the type that the budget is expressed in. :return: distance type """ from opendp.typing import RuntimeType from opendp.core import measurement_output_distance_type return RuntimeType.parse(measurement_output_distance_type(self)) @property def input_carrier_type(self): """Retrieve the carrier type of the input domain. Any member of the input domain is a member of the carrier type. :return: carrier type """ from opendp.core import measurement_input_carrier_type from opendp.typing import RuntimeType return RuntimeType.parse(measurement_input_carrier_type(self)) def __del__(self): try: from opendp.core import _measurement_free _measurement_free(self) except (ImportError, TypeError): # ImportError: sys.meta_path is None, Python is likely shutting down pass
[docs]class Transformation(ctypes.POINTER(AnyTransformation)): """A non-differentially private unit of computation. A transformation contains a function and a stability relation. The function maps from an input domain to an output domain. The stability relation maps from an input metric to an output metric. :example: >>> from opendp.mod import Transformation, enable_features >>> enable_features("contrib") ... >>> # create an instance of Transformation using a constructor from the trans module >>> from opendp.transformations import make_count >>> count: Transformation = make_count(TIA=int) ... >>> # invoke the transformation (invoke and __call__ are equivalent) >>> count.invoke([1, 2, 3]) # -> 3 # doctest: +SKIP >>> count([1, 2, 3]) # -> 3 # doctest: +SKIP ... >>> # check the transformation's relation at >>> # (1, 1): (SymmetricDistance, AbsoluteDistance<u32>) >>> assert count.check(1, 1) ... >>> # chain with more transformations from the trans module >>> from opendp.transformations import make_split_lines, make_cast, make_impute_constant >>> chained = ( ... make_split_lines() >> ... make_cast(TIA=str, TOA=int) >> ... make_impute_constant(constant=0) >> ... count ... ) ... >>> # the resulting transformation has the same features >>> chained("1\\n2\\n3") # -> 3 # doctest: +SKIP >>> assert chained.check(1, 1) # both chained transformations were 1-stable """ _type_ = AnyTransformation
[docs] def invoke(self, arg): """Execute a non-differentially-private query with `arg`. :param arg: Input to the transformation. :return: non-differentially-private answer :raises OpenDPException: packaged error from the core OpenDP library """ from opendp.core import transformation_invoke return transformation_invoke(self, arg)
def __call__(self, arg): from opendp.core import transformation_invoke return transformation_invoke(self, arg)
[docs] def map(self, d_in): """Map an input distance `d_in` to an output distance.""" from opendp.core import transformation_map return transformation_map(self, d_in)
[docs] def check(self, d_in, d_out, *, debug=False): """Check if the transformation is (`d_in`, `d_out`)-close. If true, implies that if the distance between inputs is at most `d_in`, then the distance between outputs is at most `d_out`. See also :func:`~Measurement.check`, a similar check for measurements. :param d_in: Distance in terms of the input metric. :param d_out: Distance in terms of the output metric. :param debug: Enable to raise Exceptions to help identify why the stability relation failed. :return: True if the relation passes. False if the relation failed. :rtype: bool :raises OpenDPException: packaged error from the core OpenDP library """ from opendp.core import transformation_check if debug: return transformation_check(self, d_in, d_out) try: return transformation_check(self, d_in, d_out) except OpenDPException as err: if err.variant == "RelationDebug": return False raise
def __rshift__(self, other: Union["Measurement", "Transformation"]): if isinstance(other, Measurement): from opendp.combinators import make_chain_mt return make_chain_mt(other, self) if isinstance(other, Transformation): from opendp.combinators import make_chain_tt return make_chain_tt(other, self) raise ValueError(f"rshift expected a measurement or transformation, got {other}") @property def input_distance_type(self): """Retrieve the distance type of the input metric. This may be any integral type for dataset metrics, or any numeric type for sensitivity metrics. :return: distance type """ from opendp.core import transformation_input_distance_type from opendp.typing import RuntimeType return RuntimeType.parse(transformation_input_distance_type(self)) @property def output_distance_type(self): """Retrieve the distance type of the output metric. This may be any integral type for dataset metrics, or any numeric type for sensitivity metrics. :return: distance type """ from opendp.core import transformation_output_distance_type from opendp.typing import RuntimeType return RuntimeType.parse(transformation_output_distance_type(self)) @property def input_carrier_type(self): """Retrieve the carrier type of the input domain. Any member of the input domain is a member of the carrier type. :return: carrier type """ from opendp.core import transformation_input_carrier_type from opendp.typing import RuntimeType return RuntimeType.parse(transformation_input_carrier_type(self)) def __del__(self): try: from opendp.core import _transformation_free _transformation_free(self) except (ImportError, TypeError): # ImportError: sys.meta_path is None, Python is likely shutting down pass
[docs]class SMDCurve(object): def __init__(self, curve): self.curve = curve
[docs] def epsilon(self, delta): from opendp._data import smd_curve_epsilon return smd_curve_epsilon(self.curve, delta)
[docs]class UnknownTypeException(Exception): pass
[docs]class OpenDPException(Exception): """General exception for errors originating from the underlying OpenDP library. The variant attribute corresponds to `one of the following variants <https://github.com/opendp/opendp/blob/53ec58d01762ca5ceee08590d7e7b725bbdafcf6/rust/opendp/src/error.rs#L46-L87>`_ and can be matched on. Error variants may change in library updates. See `Rust ErrorVariant <https://docs.rs/opendp/latest/opendp/error/enum.ErrorVariant.html>`_ for values variant may take on. """ def __init__(self, variant: str, message: str = None, raw_traceback: str = None): self.variant = variant self.message = message self.raw_traceback = raw_traceback
[docs] def raw_frames(self): import re return re.split(r"\s*[0-9]+: ", self.raw_traceback)
[docs] def frames(self): def format_frame(frame): return "\n ".join(l.strip() for l in frame.split("\n")) return [format_frame(f) for f in self.raw_frames() if f.startswith("opendp")]
def __str__(self) -> str: response = '' if self.raw_traceback: # join and split by newlines because frames may be multi-line lines = "\n".join(self.frames()[::-1]).split('\n') response += "Continued Rust stack trace:\n" + '\n'.join(' ' + line for line in lines) response += '\n ' + self.variant if self.message: response += f'("{self.message}")' return response
GLOBAL_FEATURES = set()
[docs]def enable_features(*features: str) -> None: GLOBAL_FEATURES.update(set(features))
[docs]def disable_features(*features: str) -> None: GLOBAL_FEATURES.difference_update(set(features))
[docs]def assert_features(*features: str) -> None: for feature in features: assert feature in GLOBAL_FEATURES, f"Attempted to use function that requires {feature}, but {feature} is not enabled. See https://github.com/opendp/opendp/discussions/304, then call enable_features(\"{feature}\")"
[docs]def binary_search_chain( make_chain: Callable[[Union[float, int]], Union[Transformation, Measurement]], d_in, d_out, bounds: Union[Tuple[float, float], Tuple[int, int]] = None, T=None) -> Union[Transformation, Measurement]: """Useful to find the Transformation or Measurement parameterized with the ideal constructor argument. Optimizes a parameterized chain `make_chain` within float or integer `bounds`, subject to the chained relation being (`d_in`, `d_out`)-close. See `binary_search_param` to retrieve the discovered parameter instead of the complete computation chain. :param make_chain: a unary function that maps from a number to a Transformation or Measurement :param d_in: desired input distance of the computation chain :param d_out: desired output distance of the computation chain :param bounds: a 2-tuple of the lower and upper bounds to the input of `make_chain` :param T: type of argument to `make_chain`, one of {float, int} :return: a chain parameterized at the nearest passing value to the decision point of the relation :rtype: Union[Transformation, Measurement] :raises TypeError: if the type is not inferrable (pass T) or the type is invalid :raises ValueError: if the predicate function is constant, bounds cannot be inferred, or decision boundary is not within `bounds`. :examples: Find a base_laplace measurement with the smallest noise scale that is still (d_in, d_out)-close. >>> from opendp.mod import binary_search_chain, enable_features >>> from opendp.transformations import make_clamp, make_bounded_resize, make_sized_bounded_mean >>> from opendp.measurements import make_base_laplace >>> enable_features("floating-point", "contrib") ... >>> # The majority of the chain only needs to be defined once. >>> pre = ( ... make_clamp(bounds=(0., 1.)) >> ... make_bounded_resize(size=10, bounds=(0., 1.), constant=0.) >> ... make_sized_bounded_mean(size=10, bounds=(0., 1.)) ... ) ... >>> # Find a value in `bounds` that produces a (`d_in`, `d_out`)-chain nearest the decision boundary. >>> # The lambda function returns the complete computation chain when given a single numeric parameter. >>> chain = binary_search_chain(lambda s: pre >> make_base_laplace(scale=s), d_in=1, d_out=1.) ... >>> # The resulting computation chain is always (`d_in`, `d_out`)-close, but we can still double-check: >>> assert chain.check(1, 1.) Build a (2 neighboring, 1. epsilon)-close sized bounded sum with discrete_laplace(100.) noise. It should have the widest possible admissible clamping bounds (-b, b). >>> from opendp.transformations import make_sized_bounded_sum >>> from opendp.measurements import make_base_discrete_laplace ... >>> def make_sum(b): ... return make_sized_bounded_sum(10_000, (-b, b)) >> make_base_discrete_laplace(100.) ... >>> # `meas` is a Measurement with the widest possible clamping bounds. >>> meas = binary_search_chain(make_sum, d_in=2, d_out=1., bounds=(0, 10_000)) ... >>> # If you want the discovered clamping bound, use `binary_search_param` instead. """ return make_chain(binary_search_param(make_chain, d_in, d_out, bounds, T))
[docs]def binary_search_param( make_chain: Callable[[Union[float, int]], Union[Transformation, Measurement]], d_in, d_out, bounds: Union[Tuple[float, float], Tuple[int, int]] = None, T=None) -> Union[float, int]: """Useful to solve for the ideal constructor argument. Optimizes a parameterized chain `make_chain` within float or integer `bounds`, subject to the chained relation being (`d_in`, `d_out`)-close. :param make_chain: a unary function that maps from a number to a Transformation or Measurement :param d_in: desired input distance of the computation chain :param d_out: desired output distance of the computation chain :param bounds: a 2-tuple of the lower and upper bounds to the input of `make_chain` :param T: type of argument to `make_chain`, one of {float, int} :return: the nearest passing value to the decision point of the relation :raises TypeError: if the type is not inferrable (pass T) or the type is invalid :raises ValueError: if the predicate function is constant, bounds cannot be inferred, or decision boundary is not within `bounds`. :example: >>> from opendp.mod import binary_search_param, enable_features >>> from opendp.measurements import make_base_laplace ... >>> # Find a value in `bounds` that produces a (`d_in`, `d_out`)-chain nearest the decision boundary. >>> # The first argument is any function that returns your complete computation chain >>> # when passed a single numeric parameter. >>> scale = binary_search_param(make_base_laplace, d_in=0.1, d_out=1.) >>> assert scale == 0.1 >>> # Constructing the same chain with the discovered parameter will always be (0.1, 1.)-close. >>> assert make_base_laplace(scale).check(0.1, 1.) A policy research organization wants to know the smallest sample size necessary to release an "accurate" epsilon=1 DP mean income. Determine the smallest dataset size such that, with 95% confidence, the DP release differs from the clipped dataset's mean by no more than 1000. Assume that neighboring datasets have a symmetric distance at most 2. Also assume a clipping bound of 500,000. >>> # we first work out the necessary noise scale to satisfy the above constraints. >>> from opendp.accuracy import accuracy_to_laplacian_scale >>> necessary_scale = accuracy_to_laplacian_scale(accuracy=1000., alpha=.05) ... >>> # we then write a function that make a computation chain with a given data size >>> def make_mean(data_size): ... return ( ... make_sized_bounded_mean(data_size, (0., 500_000.)) >> ... make_base_laplace(necessary_scale) ... ) ... >>> # solve for the smallest dataset size that admits a (2 neighboring, 1. epsilon)-close measurement >>> binary_search_param( ... make_mean, ... d_in=2, d_out=1., ... bounds=(1, 1000000)) 1498 """ return binary_search(lambda param: make_chain(param).check(d_in, d_out), bounds, T)