Source code for mozanalysis.frequentist_stats.bootstrap

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at

import numpy as np
import pandas as pd

import mozanalysis.bayesian_stats as mabs
from mozanalysis.utils import filter_outliers

[docs] def compare_branches( df, col_label, ref_branch_label="control", stat_fn=np.mean, num_samples=10000, threshold_quantile=None, individual_summary_quantiles=mabs.DEFAULT_QUANTILES, comparative_summary_quantiles=mabs.DEFAULT_QUANTILES, ): """Jointly sample bootstrapped statistics then compare them. Performs a percentile bootstrap, which, according to Efron, is not significantly more distasteful than a basic bootstrap, regardless of what you may read on Stack Overflow. Args: df: a pandas DataFrame of queried experiment data in the standard format (see ``mozanalysis.experiment``). col_label (str or list): Label for the df column contaning the metric to be analyzed. If a list, labels for the multiple metrics to be analyzed. ref_branch_label (str, optional): String in ``df['branch']`` that identifies the branch with respect to which we want to calculate uplifts - usually the control branch. stat_fn (func, optional): A function that either: - Aggregates each resampled population to a scalar (e.g. the default, ``np.mean``), or - Aggregates each resampled population to a dict of scalars. In both cases, this function must accept a one-dimensional ndarray or pandas Series as its input. num_samples (int, optional): The number of bootstrap iterations to perform. threshold_quantile (float, optional): An optional threshold quantile, above which to discard outliers. E.g. `0.9999`. individual_summary_quantiles (list, optional): Quantiles to determine the confidence bands on individual branch statistics. Change these when making Bonferroni corrections. comparative_summary_quantiles (list, optional): Quantiles to determine the confidence bands on comparative branch statistics (i.e. the change relative to the reference branch, probably the control). Change these when making Bonferroni corrections. Returns a dictionary: If ``stat_fn`` returns a scalar (this is the default), then this function returns a dictionary has the following keys and values: 'individual': dictionary mapping each branch name to a pandas Series that holds the expected value for the bootstrapped ``stat_fn``, and confidence intervals. 'comparative': dictionary mapping each branch name to a pandas Series of summary statistics for the possible uplifts of the bootstrapped ``stat_fn`` relative to the reference branch. Otherwise, when ``stat_fn`` returns a dict, then this function returns a similar dictionary, except the Series are replaced with DataFrames. Each row in each DataFrame corresponds to one output of `stat_fn`, and is the Series that would be returned if ``stat_fn`` computed only this statistic. """ branch_list = df.branch.unique() if ref_branch_label not in branch_list: raise ValueError( f"Branch label '{ref_branch_label}' not in branch list '{branch_list}" ) samples = { # TODO: do we need to control seed_start? If so then we must be careful here b: get_bootstrap_samples( df[col_label][df.branch == b], stat_fn, num_samples, threshold_quantile=threshold_quantile, ) for b in branch_list } return mabs.compare_samples( samples, ref_branch_label, individual_summary_quantiles, comparative_summary_quantiles, )
[docs] def bootstrap_one_branch( data, stat_fn=np.mean, num_samples=10000, seed_start=None, threshold_quantile=None, summary_quantiles=mabs.DEFAULT_QUANTILES, ): """Run a bootstrap for one branch on its own. Resamples the data ``num_samples`` times, computes ``stat_fn`` for each sample, then returns summary statistics for the distribution of the outputs of ``stat_fn``. Args: data: The data as a 1D numpy array, pandas series, or pandas dataframe. stat_fn: Either a function that aggregates each resampled population to a scalar (e.g. the default value ``np.mean`` lets you bootstrap means), or a function that aggregates each resampled population to a dict of scalars. In both cases, this function must accept a one-dimensional ndarray as its input. num_samples: The number of bootstrap iterations to perform seed_start: An int with which to seed numpy's RNG. It must be unique within this set of calculations. threshold_quantile (float, optional): An optional threshold quantile, above which to discard outliers. E.g. ``0.9999``. summary_quantiles (list, optional): Quantiles to determine the confidence bands on the branch statistics. Change these when making Bonferroni corrections. """ samples = get_bootstrap_samples( data, stat_fn, num_samples, seed_start, threshold_quantile ) return mabs.summarize_one_branch_samples(samples, summary_quantiles)
[docs] def get_bootstrap_samples( data, stat_fn=np.mean, num_samples=10000, seed_start=None, threshold_quantile=None, ): """Return ``stat_fn`` evaluated on resampled and original data. Do the resampling in parallel over the cluster. Args: data: The data as a 1D numpy array, pandas series, or pandas dataframe. stat_fn: Either a function that aggregates each resampled population to a scalar (e.g. the default value ``np.mean`` lets you bootstrap means), or a function that aggregates each resampled population to a dict of scalars. In both cases, this function must accept a one-dimensional ndarray as its input. num_samples: The number of samples to return seed_start: A seed for the random number generator; this function will use seeds in the range:: [seed_start, seed_start + num_samples) and these particular seeds must not be used elsewhere in this calculation. By default, use a random seed. threshold_quantile (float, optional): An optional threshold quantile, above which to discard outliers. E.g. ``0.9999``. Returns: ``stat_fn`` evaluated over ``num_samples`` samples. * By default, a pandas Series of sampled means * if ``stat_fn`` returns a scalar, a pandas Series * if ``stat_fn`` returns a dict, a pandas DataFrame with columns set to the dict keys. """ if type(data) is not np.ndarray: data = np.array(data.to_numpy(dtype="float", na_value=np.nan)) if np.isnan(data).any(): raise ValueError("'data' contains null values") if threshold_quantile: data = filter_outliers(data, threshold_quantile) if seed_start is None: seed_start = np.random.randint(np.iinfo(np.uint32).max) # Deterministic "randomness" requires careful state handling :( # Need to ensure every call has a unique, deterministic seed. seed_range = range(seed_start, seed_start + num_samples) summary_stat_samples = [ _resample_and_agg_once(data, stat_fn, unique_seed) for unique_seed in seed_range ] summary_df = pd.DataFrame(summary_stat_samples) if len(summary_df.columns) == 1: # Return a Series if stat_fn returns a scalar return summary_df.iloc[:, 0] # Else return a DataFrame if stat_fn returns a dict return summary_df
def _resample_and_agg_once(data, stat_fn, unique_seed=None): random_state = np.random.RandomState(unique_seed) n = len(data) # TODO: can't we just use random_state.choice? Wouldn't that be faster? # There's not thaaat much difference in RAM requirements? randints = random_state.randint(0, n, n) resampled_data = data[randints] return stat_fn(resampled_data)
[docs] def compare_branches_quantiles( df, col_label, ref_branch_label="control", quantiles_of_interest=None, num_samples=10000, threshold_quantile=None, individual_summary_quantiles=mabs.DEFAULT_QUANTILES, comparative_summary_quantiles=mabs.DEFAULT_QUANTILES, ): """ Performs inferences on the metric quantiles inspired by Spotify's "Resampling-free bootstrap inference for quantiles" approach Parameters are similar to `compare_branches` except for: Args: quantiles (List[float]): a list of quantiles upon which inferences are desired. Ex: 0.2 is the 20th percentile, 0.5 is the median, etc. """ if quantiles_of_interest is None: quantiles_of_interest = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] branch_list = df.branch.unique() if ref_branch_label not in branch_list: raise ValueError( f"Branch label '{ref_branch_label}' not in branch list '{branch_list}" ) samples = { b: get_quantile_bootstrap_samples( df[col_label][df.branch == b], quantiles_of_interest, num_samples, threshold_quantile=threshold_quantile, ) for b in branch_list } return mabs.compare_samples( samples, ref_branch_label, individual_summary_quantiles, comparative_summary_quantiles, )
[docs] def get_quantile_bootstrap_samples( data, quantiles_of_interest, num_samples=10000, threshold_quantile=None ): """Params are similar to `get_bootstrap_samples`""" if type(data) is not np.ndarray: data = np.array(data.to_numpy(dtype="float", na_value=np.nan)) if np.isnan(data).any(): raise ValueError("'data' contains null values") if threshold_quantile: data = filter_outliers(data, threshold_quantile) data = np.sort(data) sample_size = data.shape[0] samples = { f"{quantile:.1}": data[ np.random.binomial(sample_size - 1, quantile, num_samples) ] for quantile in quantiles_of_interest } df = pd.DataFrame.from_dict(samples) return df