Source code for autoqild.utilities.statistical_tests

"""Implementation of paired t-test and wilcoxon_signed_rank_test used to detect
leakage using blind classifiers."""

import logging

import numpy as np
from scipy.stats import t, wilcoxon

__all__ = ["wilcoxon_signed_rank_test", "paired_ttest"]


[docs] def wilcoxon_signed_rank_test(accuracies, accuracies2, alternative="two-sided", verbose=False): """Performs the Wilcoxon signed-rank test on two sets of accuracies. Parameters ---------- accuracies : ndarray First set of accuracy values. accuracies2 : ndarray Second set of accuracy values. alternative : str, optional Defines the alternative hypothesis (default is "two-sided"). verbose : bool, optional If True, outputs additional logging information (default is False). Returns ------- p_value : float The p-value from the Wilcoxon signed-rank test. """ logger = logging.getLogger("Wilcoxon-Signed_Rank") try: _, p_value = wilcoxon(accuracies, accuracies2, correction=True, alternative=alternative) except Exception as e: if verbose: logger.info("Accuracies are exactly same {}".format(str(e))) p_value = 1.0 return p_value
[docs] def paired_ttest(x1, x2, n_training_folds, n_test_folds, correction=True, alternative="two-sided", verbose=False,): """Performs a paired t-test on two sets of values with and without correction. Parameters ---------- x1 : ndarray First set of values. x2 : ndarray Second set of values. n_training_folds : int Number of training folds. n_test_folds : int Number of test folds. correction : bool, optional If True, applies a correction to the variance (default is True). alternative : str, optional Defines the alternative hypothesis (default is "two-sided"). verbose : bool, optional If True, outputs additional logging information (default is False). Returns ------- p_value : float The p-value from the paired t-test. """ logger = logging.getLogger("Paired T-Test") n = len(x1) df = n - 1 diff = [(x1[i] - x2[i]) for i in range(n)] # Compute the mean of differences d_bar = np.mean(diff) # compute the variance of differences sigma2 = np.var(diff, ddof=1) if sigma2 == 0.0: sigma2 = 1e-30 if verbose: logger.info("Correcting the sigma") if correction: if verbose: logger.info("With the correction option") if verbose: logger.info("D_bar {} Variance {} Sigma {}".format(d_bar, sigma2, np.sqrt(sigma2))) # compute the modified variance if correction: sigma2 = sigma2 * (1 / n + n_test_folds / n_training_folds) else: sigma2 = sigma2 / n # compute the t_static with np.errstate(divide="ignore", invalid="ignore"): t_static = np.divide(d_bar, np.sqrt(sigma2)) # Compute p-value and plot the results if alternative == "less": p_value = t.cdf(t_static, df) elif alternative == "greater": p_value = t.sf(t_static, df) elif alternative == "two-sided": p_value = 2 * t.sf(np.abs(t_static), df) if verbose: logger.info(f"Final Variance {sigma2} Sigma {np.sqrt(sigma2)} t_static {t_static} p {p_value}") logger.info(f"np.isnan(p) {np.isnan(p_value)}, np.isinf {np.isinf(p_value)}, d_bar == 0 {d_bar == 0}, sigma2_mod == 0 {sigma2 == 0}, np.isinf(t_static) {np.isinf(t_static)}, " f"np.isnan(t_static) {np.isnan(t_static)}") if ( np.isnan(p_value) or np.isinf(p_value) or d_bar == 0 or sigma2 == 0 or np.isinf(t_static) or np.isnan(t_static) ): p_value = 1.0 return p_value