Source code for presc.copies.evaluations

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score

from presc.dataset import Dataset


[docs]def empirical_fidelity_error(y_pred_original, y_pred_copy): """Computes the empirical fidelity error of a classifier copy. Quantifies the resemblance of the copy to the original classifier. This value is zero when the copy makes exactly the same predictions than the original classifier (including misclassifications). Parameters ---------- y_pred_original : list or 1d array-like Predicted labels, as returned by the original classifier. y_pred_copy : list or 1d array-like Predicted labels, as returned by the classifier copy. Returns ------- float The numerical value of the empirical fidelity error. """ error = 1 - accuracy_score(y_pred_original, y_pred_copy) return error
[docs]def replacement_capability(y_true, y_pred_original, y_pred_copy): """Computes the replacement capability of a classifier copy. Quantifies the ability of the copy model to substitute the original model, i.e. maintaining the same accuracy in its predictions. This value is one when the accuracy of the copy model is the same as the original model, although the individual predictions may be different, approaching zero if the accuracy of the copy is much smaller than the original, and it can even take values larger than one if the copy model is better than the original. Parameters ---------- y_true : list or 1d array-like True labels from the data. y_pred_original : list or 1d array-like Predicted labels, as returned by the original classifier. y_pred_copy : list or 1d array-like Predicted labels, as returned by the classifier copy. Returns ------- float The numerical value of the replacement capability. """ accuracy_original = accuracy_score(y_true, y_pred_original) accuracy_copy = accuracy_score(y_true, y_pred_copy) rcapability = accuracy_copy / accuracy_original return rcapability
[docs]def summary_metrics( original_model=None, copy_model=None, test_data=None, synthetic_data=None, show_results=True, ): """Computes several metrics to evaluate the classifier copy. Summary of metrics that evaluate the quality of a classifier copy, not only to assess its performance as classifier but to quantify its resemblance to the original classifier. Accuracy of the original and the copy models (using the original test data), and the empirical fidelity error and replacement capability of the copy (using the original test data and/or the generated synthetic data). Parameters ---------- original_model : sklearn-type classifier Original ML classifier to be copied. copy_model : presc.copies.copying.ClassifierCopy ML classifier copy from the original ML classifier. test_data : presc.dataset.Dataset Subset of the original data reserved for testing. synthetic_data : presc.dataset.Dataset Synthetic data generated using the original model. show_results : bool If `True` the metrics are also printed. Returns ------- dict The values of all metrics. """ results = { "Original Model Accuracy (test)": None, "Copy Model Accuracy (test)": None, "Empirical Fidelity Error (synthetic)": None, "Empirical Fidelity Error (test)": None, "Replacement Capability (synthetic)": None, "Replacement Capability (test)": None, } if test_data is not None: if original_model is not None: y_pred_original_test = original_model.predict(test_data.features) original_accuracy = accuracy_score(test_data.labels, y_pred_original_test) results["Original Model Accuracy (test)"] = original_accuracy if copy_model is not None: y_pred_copy_test = copy_model.copy.predict(test_data.features) copy_accuracy = accuracy_score(test_data.labels, y_pred_copy_test) results["Copy Model Accuracy (test)"] = copy_accuracy efe_test = copy_model.compute_fidelity_error(test_data=test_data.features) results["Empirical Fidelity Error (test)"] = efe_test if (original_model is not None) and (copy_model is not None): rc_test = replacement_capability( test_data.labels, y_pred_original_test, y_pred_copy_test ) results["Replacement Capability (test)"] = rc_test if synthetic_data is not None: if original_model is not None: y_pred_original_synth = original_model.predict(synthetic_data.features) if copy_model is not None: y_pred_copy_synth = copy_model.copy.predict(synthetic_data.features) efe_synthetic = copy_model.compute_fidelity_error( test_data=synthetic_data.features ) results["Empirical Fidelity Error (synthetic)"] = efe_synthetic if (original_model is not None) and (copy_model is not None): rc_synthetic = replacement_capability( synthetic_data.labels, y_pred_original_synth, y_pred_copy_synth ) results["Replacement Capability (synthetic)"] = rc_synthetic if show_results: for name, value in results.items(): if value is not None: print(f"{name:<37} {value:.4f}") return results
[docs]def multivariable_density_comparison( datasets=[None], feature1=None, feature2=None, label_col="class", titles=None, other_kwargs={ "alpha": 0.3, "common_norm": False, "fill": True, "n_levels": 4, "legend": False, }, ): """Visualization to compare class density projections in detail. Allows to compare the different topologies of a number of ML classifiers in a multivariable feature space by choosing a feature pair and "squashing" the rest of the features into a projected density distribution for each class. It is important that the classifier datasets are obtained through a homogeneous sampling throughout the feature space to avoid introducing spurious shapes in the projected density distributions. `uniform_sampling` is a good option for that. `normal_sampling` and any other non-uniform samplers should be avoided because the intrinsic class distributions become convoluted with its gaussian shape obscuring them. Note that `grid_sampling` is also not recommended because it samples very specific interval points and thus yields density peaks. Parameters ---------- datasets : list of pandas DataFrames List of the datasets with the sampled and labeled points for each classifier included in the comparison. feature1 : Name of feature to display in the x-axis. feature2 : Name of feature to display in the y-axis. label_col : str Name of the label column. titles : list of str List of names to identify each classifier and label their subplot. **other_kwargs : dict Any other seaborn.kdeplot parameters needed to adjust the visualization. Default parameters are {"alpha": 0.3, "common_norm": False, "fill": True, "n_levels": 4, "legend": False}. The value of any parameter specified within the other_kwargs dictionary will be overwritten, including any of these. Returns ------- matplotlib.figure.Figure Figure with the detailed classifier comparison. matplotlib.axes.Axes or array of Axes Contains most of the figure elements of the classifier comparison and sets the coordinate system. """ kdeplot_kwargs = { "alpha": 0.3, "common_norm": False, "fill": True, "n_levels": 4, "legend": False, } for key, value in other_kwargs.items(): kdeplot_kwargs[key] = value num_comparisons = len(datasets) class_names = set() for index_models in range(num_comparisons): class_names = class_names | set(datasets[index_models][label_col].unique()) class_names = sorted(list(class_names)) max_num_classes = len(class_names) fig, axs = plt.subplots( max_num_classes, num_comparisons, figsize=(3.5 * num_comparisons, 3.5 * max_num_classes), sharex=True, sharey=True, ) for index_models in range(num_comparisons): for index_classes, class_name in enumerate(class_names): axs[index_classes, index_models] = sns.kdeplot( x=datasets[index_models][ datasets[index_models][label_col] == class_name ][feature1], y=datasets[index_models][ datasets[index_models][label_col] == class_name ][feature2], hue=datasets[index_models][label_col], ax=axs[index_classes, index_models], **kdeplot_kwargs, ) axs[index_classes, 0].set_ylabel( label_col + " = " + str(class_name) + "\n\n" + feature2 ) if titles is not None: axs[0, index_models].set_title(titles[index_models]) plt.show(block=False) return fig, axs
[docs]def keep_top_classes(dataset, min_num_samples=2, classes_to_keep=None): """Function to remove rows from minoritary classes from PRESC Datasets. Only classes that have more than the specified minimum number of samples will be kept. If a list of the classes of interest is indicated, then this requirement is overrided. Parameters ---------- dataset : presc.dataset.Dataset PRESC dataset from which we want to remove the minoritary classes. min_num_samples : int Minimum number of samples that the classes should have in order to keep them. classes_to_keep : list Name of the classes to keep. If a list of classes is specified here, then the parameter `min_num_samples` is overriden, and the specified classes will have any number of samples. Returns ------- presc.dataset.Dataset PRESC Dataset without the samples from the minoritary classes. """ label_col = dataset.labels.name if classes_to_keep is None: classes_to_keep = ( dataset.df[label_col] .value_counts()[dataset.df[label_col].value_counts() >= min_num_samples] .index.to_list() ) new_dataframe = dataset.df[dataset.df[label_col].isin(classes_to_keep)].copy() if new_dataframe[label_col].dtype.name == "categorical": print(str(label_col), " is categorical") new_dataframe[label_col] = new_dataframe.loc[:, label_col].cat.set_categories( classes_to_keep ) return Dataset(new_dataframe, label_col=label_col)