Source code for mozanalysis.bayesian_stats

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import numpy as np
import pandas as pd

DEFAULT_QUANTILES = (0.005, 0.025, 0.5, 0.975, 0.995)



[docs]
def compare_samples(
    samples,
    ref_branch_label,
    individual_summary_quantiles=DEFAULT_QUANTILES,
    comparative_summary_quantiles=DEFAULT_QUANTILES,
):
    """Return descriptive statistics for branch stats and uplifts.

    Given per-branch samples for some quantity, return summary
    statistics (percentiles and the mean) for the quantity for each
    individual branch. Also return comparative summary statistics for
    the uplift of this quantity for each branch with respect to the
    reference branch.

    Args:
    ------
        samples (dict of pandas.Series or pandas.DataFrame): Each key
            is the label for a branch. Each value is the corresponding
            sample set.
        ref_branch_label (str): Label for the reference branch
            (typically the control).
        individual_summary_quantiles (list of float): Quantiles that
            define the summary stats for the individual branches'
            samples.
        comparative_summary_quantiles (list of float): Quantiles that
            define the summary stats for the comparative stats.

    Returns:
    ---------
        A dictionary. When the values of ``samples`` are Series, then this function
        returns a dictionary with the following keys and
        values:

            'individual': dictionary mapping each branch name to a
                pandas Series that holds the per-branch sample means and
                quantiles.
            'comparative': dictionary mapping each branch name to a
                pandas Series of summary statistics for the possible
                uplifts of the sampled quantity relative to the
                reference branch.

        Otherwise, when the values of ``samples`` are DataFrames, then
        this function returns a similar dictionary, except the Series
        are replaced with DataFrames. The index for each DataFrame is
        the columns of a value of ``samples``.

    """
    branch_list = list(samples.keys())

    return {
        "individual": {
            b: summarize_one_branch_samples(
                samples[b], quantiles=individual_summary_quantiles
            )
            for b in branch_list
        },
        "comparative": {
            b: summarize_joint_samples(
                samples[b],
                samples[ref_branch_label],
                quantiles=comparative_summary_quantiles,
            )
            for b in set(branch_list) - {ref_branch_label}
        },
    }




[docs]
def summarize_one_branch_samples(samples, quantiles=DEFAULT_QUANTILES):
    """Return descriptive statistics for sampled population-level stats.

    Given samples from one or more distributions, calculate some
    quantiles and the mean.

    The intended primary use-case is for calculating credible intervals
    for stats when bootstrapping, or credible intervals around Bayesian
    model parameters; in both cases ``samples`` are from a posterior
    concerning one branch of an experiment.

    Args:
    -----
        samples (pandas.Series or pandas.DataFrame): Samples over which
            to compute the mean and quantiles.
        quantiles (list, optional): The quantiles to compute - a good
            reason to override the defaults would be when Bonferroni
            corrections are required.

    Returns:
    --------
        If ``samples`` is a Series, then returns a pandas Series;
        the index contains the stringified ``quantiles`` plus
        ``'mean'``.

        If ``samples`` is a DataFrame, then returns a pandas DataFrame;
        the columns contain the stringified ``quantiles`` plus
        ``'mean'``. The index matches the columns of ``samples``.

    """
    if isinstance(samples, pd.DataFrame) or not np.isscalar(samples[0]):
        return _summarize_one_branch_samples_batch(samples, quantiles)
    else:
        return _summarize_one_branch_samples_single(samples, quantiles)




[docs]
def summarize_joint_samples(focus, reference, quantiles=DEFAULT_QUANTILES):
    """Return descriptive statistics for uplifts.

    The intended use case of this function is to compare a 'focus'
    experiment branch to a 'reference' experiment branch (e.g. the
    control). Samples from each branch are combined pairwise; these
    pairs are considered to be samples from the joint probability
    distribution (JPD). We compute various quantities from the JPD:

    * We compute summary statistics for the distribution over relative
      uplifts ``focus / reference - 1``
    * We compute summary statistics for the distribution over absolute
      uplifts ``focus - reference``
    * We compute a summary statistic for the distribution over the L1
      norm of absolute uplifts ``abs(focus - reference)``
    * We compute the fraction of probability mass in the region
      ``focus > reference``, which in a Bayesian context may be
      interpreted as the probability that the ground truth model
      parameter is larger for the focus branch than the reference
      branch.

    ``focus`` and ``reference`` are samples from distributions; each is the
    same format that would be supplied to `summarize_one_branch_samples`
    when analyzing the branches independently.

    Can be used to analyse a single metric (supply Series as arguments)
    or in batch mode (supply DataFrames as arguments).

    Args:
    -----
        focus (pandas.Series or pandas.DataFrame): Bootstrapped samples
            or samples of a model parameter for a branch of an
            experiment. If a DataFrame, each column represents a
            different quantity.
        reference (pandas.Series or pandas.DataFrame): The same
            quantity, calculated for a different branch (typically the
            control).
        quantiles (list, optional): The quantiles to compute - a good
            reason to override the defaults would be when Bonferroni
            corrections are required.

    Returns:
    --------
        A pandas Series or DataFrame containing a MultiIndex with the
        following labels on the higher level and stringified floats
        on the inner level

        * rel_uplift: Expectation value and quantiles over the relative
          uplift.
        * abs_uplift: Expectation value and quantiles over the absolute
          uplift.
        * max_abs_diff: Quantile 0.95 on the L1 norm of differences/
          absolute uplifts. In a Bayesian context, there is a 95%
          probability that the absolute difference is less than this in
          either direction.
        * prob_win: In a Bayesian context, the probability that the ground
          truth model parameter is larger for the focus than the reference
          branch.

        If returning a DataFrame, this MultiIndex is for the columns, and
        the index matches the columns of ``focus``.

    """
    if isinstance(focus, pd.DataFrame) or not np.isscalar(focus[0]):
        return _summarize_joint_samples_batch(focus, reference, quantiles)
    else:
        return _summarize_joint_samples_single(focus, reference, quantiles)



def _summarize_one_branch_samples_single(samples, quantiles=DEFAULT_QUANTILES):
    if not isinstance(samples, pd.Series | np.ndarray | list):
        # Hey pd.Series.agg - don't apply me elementwise!
        # Raising this error allows ``_summarize_one_branch_samples_batch``
        # to work also for non-batch ``samples`` (i.e. doing double duty)
        raise TypeError("Can't summarize a scalar")

    q_index = [str(v) for v in quantiles]

    res = pd.Series(index=q_index + ["mean"], dtype="float")

    res[q_index] = np.quantile(samples, quantiles)
    res["mean"] = np.mean(samples)
    return res


def _summarize_one_branch_samples_batch(samples, quantiles=DEFAULT_QUANTILES):
    return samples.agg(summarize_one_branch_samples, quantiles=quantiles).T


def _summarize_joint_samples_single(focus, reference, quantiles=DEFAULT_QUANTILES):
    str_quantiles = [str(q) for q in quantiles]

    index = pd.MultiIndex.from_tuples(
        [("rel_uplift", q) for q in str_quantiles + ["exp"]]
        + [("abs_uplift", q) for q in str_quantiles + ["exp"]]
        + [("max_abs_diff", "0.95"), ("prob_win",)]
    )

    res = pd.Series(index=index, dtype="float")

    rel_uplift_samples = focus / reference - 1
    res.loc[[("rel_uplift", q) for q in str_quantiles]] = np.quantile(
        rel_uplift_samples, quantiles
    )

    res.loc[("rel_uplift", "exp")] = np.mean(rel_uplift_samples)

    abs_uplift_samples = focus - reference
    res.loc[[("abs_uplift", q) for q in str_quantiles]] = np.quantile(
        abs_uplift_samples, quantiles
    )

    res.loc[("abs_uplift", "exp")] = np.mean(abs_uplift_samples)

    res.loc[("max_abs_diff", "0.95")] = np.quantile(np.abs(abs_uplift_samples), 0.95)

    res.loc["prob_win"] = np.mean(focus > reference)

    return res


def _summarize_joint_samples_batch(focus, reference, quantiles=DEFAULT_QUANTILES):
    if set(focus.columns) != set(reference.columns):
        raise ValueError()

    return pd.DataFrame(
        {
            k: summarize_joint_samples(focus[k], reference[k], quantiles)
            for k in focus.columns
        },
        columns=focus.columns,
    ).T