Source code for autoqild.detectors.autogluon_leakage_detector

"""A leakage detection class leveraging AutoGluon for hyperparameter
optimization and model evaluation."""

import logging
import os.path

from .ild_base_class import InformationLeakageDetector
from ..automl import AutoGluonClassifier
from ..bayes_search.bayes_search_utils import get_scores
from ..utilities import *

__all__ = ["AutoGluonLeakageDetector"]



[docs]
class AutoGluonLeakageDetector(InformationLeakageDetector):
    """AutoGluonLeakageDetector leverages the AutoGluon framework for detecting
    information leakage in machine learning models. This class extends the
    `InformationLeakageDetector` base class and uses AutoGluon for
    hyperparameter optimization and model training. It evaluates potential
    information leakage using various metrics across different cross-validation
    splits.

    Parameters
    ----------
    padding_name : str
        The name of the padding method used in experiments to potentially obscure or prevent leakage.

    learner_params : dict
        Parameters related to the AutoGluon classifier used in the leakage detection process.

    fit_params : dict
        Parameters passed to the `fit` method of the AutoGluon models during training.

    hash_value : str
        A unique hash value used to identify and manage result files for a specific experiment.

    cv_iterations : int
        The number of cross-validation iterations to perform during model evaluation.

    n_hypothesis : int
        The number of hypotheses or models to be tested for leakage.

    base_directory : str
        The base directory where result files, logs, and backups are stored.

    validation_loss : str
        The evaluation metric used to assess model performance during hyperparameter optimization.

    random_state : int or None, optional
        Controls the randomness for reproducibility, ensuring consistent results across different runs.

    **kwargs : dict, optional
        Additional keyword arguments passed to the `InformationLeakageDetector` base class.

    Attributes
    ----------
    base_detector : AutoGluonClassifier
        The base AutoGluon classifier used for model training.

    learner : AutoGluonClassifier instance
        The AutoGluon classifier instance used for the current experiment.

    logger : logging.Logger
        Logger instance used for recording the steps and processes of the leakage detection.
    """

    def __init__(
        self,
        padding_name,
        learner_params,
        fit_params,
        hash_value,
        cv_iterations,
        n_hypothesis,
        base_directory,
        validation_loss,
        random_state=None,
        **kwargs,
    ):
        super().__init__(
            padding_name=padding_name,
            learner_params=learner_params,
            fit_params=fit_params,
            hash_value=hash_value,
            cv_iterations=cv_iterations,
            n_hypothesis=n_hypothesis,
            base_directory=base_directory,
            random_state=random_state,
            **kwargs,
        )
        self.base_detector = AutoGluonClassifier
        self.learner = None
        output_folder = os.path.join(
            base_directory,
            OPTIMIZER_FOLDER,
            hash_value,
            f"{self.padding_code}_autogluon",
        )
        create_directory_safely(output_folder)
        self.learner_params["output_folder"] = output_folder
        self.learner_params["eval_metric"] = validation_loss
        self.learner_params["delete_tmp_folder_after_terminate"] = False
        self.learner_params["remove_boosting_models"] = True
        self.logger = logging.getLogger(AutoGluonLeakageDetector.__name__)


[docs]
    def hyperparameter_optimization(self, X, y):
        """Performs hyperparameter optimization using AutoGluon to find the
        best models for leakage detection.

        This method runs a Bayesian optimization process to identify the best models according to the specified evaluation metric.
        The optimized models are then stored for subsequent evaluation.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The input feature matrix used for training during hyperparameter optimization.

        y : array-like of shape (n_samples,)
            The target values (class labels) corresponding to each row in X.

        Returns
        -------
        int
            The size of the training dataset after the reduction (if applicable).
        """
        X_train, y_train = self.__get_training_dataset__(X, y)
        self.learner = self.base_detector(**self.learner_params)
        self.learner.fit(X_train, y_train)
        for i in range(self.n_hypothesis * 3):
            self.logger.info(f"Getting model at {i}")
            model = self.learner.get_k_rank_model(i + 1)
            self.estimators.append(model)
        train_size = X_train.shape[0]
        return train_size



[docs]
    def fit(self, X, y, **kwargs):
        """Fits the models using cross-validation and evaluates them for
        information leakage.

        This method performs cross-validation, training the AutoGluon models across different data splits.
        The models are then evaluated for potential leakage using metrics such as accuracy and log-loss.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The input feature matrix used for model training.

        y : array-like of shape (n_samples,)
            The target values (class labels) corresponding to each row in X.
        """
        if self._is_fitted_:
            self.logger.info(f"Model already fitted for the padding {self.padding_code}")
        else:
            train_size = self.hyperparameter_optimization(X, y)
            n_hypothesis = 0
            for i, model in enumerate(self.estimators):
                if n_hypothesis == self.n_hypothesis:
                    break
                try:
                    self.logger.info(
                        f"************** Model {i + 1}: {model.__class__.__name__} **************"
                    )
                    for k, (train_index, test_index) in enumerate(self.cv_iterator.split(X, y)):
                        self.logger.info(
                            f"************************** Split {k + 1} ***************************"
                        )
                        train_index = train_index[:train_size]
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y[train_index], y[test_index]
                        if i == 0:
                            self.__calculate_random_classifier_accuracy__(
                                X_train, y_train, X_test, y_test
                            )
                            self.__calculate_majority_voting_accuracy__(
                                X_train, y_train, X_test, y_test
                            )
                        train_data = self.learner.convert_to_dataframe(X_train, y_train)
                        test_data = self.learner.convert_to_dataframe(X_test, None)
                        X_t = train_data.drop(columns=["class"])
                        y_t = train_data["class"]
                        model._n_repeats_finished = 0
                        n_repeat_start = 0
                        model.fit(X=X_t, y=y_t, n_repeat_start=n_repeat_start)
                        p_pred, y_pred = get_scores(test_data, model)
                        self.evaluate_scores(
                            X_test,
                            X_train,
                            y_test,
                            y_train,
                            y_pred,
                            p_pred,
                            model,
                            n_hypothesis,
                        )
                    n_hypothesis += 1
                    self.logger.info(f"Hypothesis Done {n_hypothesis} out of {self.n_hypothesis}")
                except Exception as error:
                    log_exception_error(self.logger, error)
                    self.logger.error(f"Problem with fitting the model")
            self.__store_results__()



[docs]
    def evaluate_scores(self, X_test, X_train, y_test, y_train, y_pred, p_pred, model, n_model):
        """Evaluates and stores model performance metrics for the detection
        process.

        This method computes various evaluation metrics, such as log-loss, accuracy, and confusion matrix, for the
        model`s predictions. The results are stored and logged for further analysis.

        Parameters
        ----------
        X_test : array-like of shape (n_samples, n_features)
            The input feature matrix for the test set.

        X_train : array-like of shape (n_samples, n_features)
            The input feature matrix for the training set.

        y_test : array-like of shape (n_samples,)
            The true target labels for the test set.

        y_train : array-like of shape (n_samples,)
            The true target labels for the training set.

        y_pred : array-like of shape (n_samples,)
            The predicted labels for the test set.

        p_pred : array-like of shape (n_samples, n_classes)
            The predicted class probabilities for the test set.

        model : object
            The trained model that is being evaluated.

        n_model : int
            The index of the model within the list of models being evaluated.
        """
        super().evaluate_scores(
            X_test=X_test,
            X_train=X_train,
            y_test=y_test,
            y_train=y_train,
            y_pred=y_pred,
            p_pred=p_pred,
            model=model,
            n_model=n_model,
        )



[docs]
    def detect(self, detection_method=LOG_LOSS_MI_ESTIMATION):
        """Executes the detection process to identify potential information
        leakage using the specified method.

        Parameters
        ----------
        detection_method : str
        The method to use for detecting information leakage. Options include:
        - `paired-t-test`: Uses paired t-test to compare the accuracy of models against the majority voting baseline.
        - `paired-t-test-random`: Uses paired t-test to compare the accuracy of models against a random classifier.
        - `fishers-exact-mean`: Applies Fisher's Exact Test on the confusion matrix and computes the mean p-value.
        - `fishers-exact-median`: Applies Fisher's Exact Test on the confusion matrix and computes the median p-value.
        - `mid_point_mi`: Detects leakage using the midpoint mutual information estimation.
        - `log_loss_mi`: Detects leakage using log loss mutual information estimation.
        - `log_loss_mi_isotonic_regression`: Uses log loss mutual information estimation with isotonic regression calibration.
        - `log_loss_mi_platt_scaling`: Uses log loss mutual information estimation with Platt scaling calibration.
        - `log_loss_mi_beta_calibration`: Uses log loss mutual information estimation with beta calibration.
        - `log_loss_mi_temperature_scaling`: Uses log loss mutual information estimation with temperature scaling.
        - `log_loss_mi_histogram_binning`: Uses log loss mutual information estimation with histogram binning.
        - `p_c_softmax_mi`: Uses PC-Softmax mutual information estimation for detection.

        Returns
        -------
        detection_decision : bool
            Indicates whether any models showed significant leakage.
        hypothesis_rejected : int
            The number of models flagged for leakage.

        Notes
        -----
        The method implements a Holm-Bonferroni correction to control the family-wise error rate for multiple models.
        """
        return super().detect(detection_method=detection_method)