Source code for tn4ml.eval

import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
from typing import Collection



[docs]
def plot_loss(history: dict, validation: bool = True, figsize: tuple =(5, 5), save_path: str = None, legend_args: dict = {}):
    """
    Plot the loss of the model during training and validation.

    Parameters
    ----------
    history: dict
        History object from the model training.
    validation: bool
        Whether to plot the validation loss.
    figsize: tuple
        Size of the figure.
    save_path: str
        Path to save the plot.

    Returns
    -------
    Displays the plot.
    """
    plt.figure(figsize=figsize)
    plt.plot(range(len(history['loss'])), history['loss'], label='train')
    if validation:
        plt.plot(range(len(history['val_loss'])), history['val_loss'], label='validation')
    plt.legend(legend_args)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')

    if save_path:
        plt.savefig(save_path + '.pdf', format='pdf', dpi=300)
    else:
        plt.show()
    plt.close()



[docs]
def plot_accuracy(history: dict, figsize: tuple =(5, 5), save_path: str = None, legend_args: dict = {}):
    """
    Plot the accuracy of the model during training and validation.

    Parameters
    ----------
    history: dict
        History object from the model training.
    validation: bool
        Whether to plot the validation accuracy.
    figsize: tuple
        Size of the figure.
    save_path: str
        Path to save the plot.
    legend_args: dict
        Arguments for the legend.

    Returns
    -------
        Displays or saves the plot.
    """
    plt.figure(figsize=figsize)
    plt.plot(range(len(history['val_acc'])), history['val_acc'], label='validation')
    plt.legend(legend_args)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')

    if save_path:
        plt.savefig(save_path + '.pdf', format='pdf', dpi=300)
    else:
        plt.show()
    plt.close()



[docs]
def get_roc_curve_data(y_true: np.ndarray, y_scores: np.ndarray, anomaly_det: bool = False):
    """
    Calculate the ROC curve data from normal and anomaly scores. Use it when both y_true and y_scores are not binary.

    Parameters
    ----------
    y_true: :class:`numpy.ndarray`
        True or normal scores.
    y_scores: :class:`numpy.ndarray`
        Predicted scores or anomaly scores.
    anomaly: bool
        Whether the scores are anomaly scores or
    
    Returns
    -------
    fpr_loss: :class:`numpy.ndarray`
        False positive rate values.
    tpr_loss: :class:`numpy.ndarray`
        True positive rate values.
    """
    if anomaly_det:
        true_val = np.concatenate((np.ones(y_scores.shape[0]), np.zeros(y_true.shape[0])))
        pred_val = np.concatenate((y_scores, y_true))
    else:
        true_val = y_true
        pred_val = y_scores
    
    fpr, tpr, _ = roc_curve(true_val, pred_val, drop_intermediate=False)
    return fpr, tpr



[docs]
def get_precision_recall_curve_data(y_true: np.ndarray, y_scores: np.ndarray, anomaly_det: bool = False):
    """
    Calculate the ROC curve data from normal and anomaly scores. Use it when both y_true and y_scores are not binary.

    Parameters
    ----------
    y_true: :class:`numpy.ndarray`
        True or normal scores.
    y_scores: :class:`numpy.ndarray`
        Predicted scores or anomaly scores.
    
    Returns
    -------
    fpr_loss: :class:`numpy.ndarray`
        False positive rate values.
    tpr_loss: :class:`numpy.ndarray`
        True positive rate values.
    """
    if anomaly_det:
        true_val = np.concatenate((np.ones(y_scores.shape[0]), np.zeros(y_true.shape[0])))
        pred_val = np.concatenate((y_scores, y_true))
    else:
        true_val = y_true
        pred_val = y_scores
    precision, recall, _ = precision_recall_curve(true_val, pred_val, drop_intermediate=False)
    return precision, recall




[docs]
def get_FPR_for_fixed_TPR(tpr_window, fpr, tpr, tolerance):
    """
    Calculate the FPR for a fixed TPR value.

    Parameters
    ----------

    tpr_window: float
        Fixed TPR value.
    fpr: :class:`numpy.ndarray`
        False positive rate values.
    tpr: :class:`numpy.ndarray`
        True positive rate values.
    tolerance: float
        Tolerance value for the fixed TPR value.
    
    Returns
    -------
    fpr: float
        FPR value for the fixed TPR value.
    """
    position = np.where((tpr>=tpr_window-tpr_window*tolerance) & (tpr<=tpr_window+tpr_window*tolerance))[0]
    return np.mean(fpr[position])



[docs]
def get_TPR_for_fixed_FPR(fpr_window, fpr, tpr, tolerance):
    """
    Calculate the TPR for a fixed FPR value.
    
    Parameters
    ----------
    
    fpr_window: float
        Fixed FPR value.
    fpr: :class:`numpy.ndarray`
        False positive rate values.
    tpr: :class:`numpy.ndarray`
        True positive rate values.
    tolerance: float
        Tolerance value for the fixed FPR value.
    
    Returns
    -------
    tpr: float
        TPR value for the fixed FPR value.
    """

    position = np.where((fpr>=fpr_window-fpr_window*tolerance) & (fpr<=fpr_window+fpr_window*tolerance))[0]
    return np.mean(tpr[position])



[docs]
def get_mean_and_error(data):
    """
    Calculate the mean and standard deviation of the input data.
    
    Parameters
    ----------
    data: :class:`numpy.ndarray`
        Input data to calculate the mean and standard deviation.
    
    Returns
    -------
    mean: :class:`numpy.ndarray`
        Mean of the input data.
    std: :class:`numpy.ndarray`
        Standard deviation of the input data.
    """

    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    return mean, std



[docs]
def plot_ROC_curve_from_metrics(y_true: np.ndarray, y_scores: np.ndarray, title: str = "ROC Curve", save_path: str = None):
    """
    Calculates TPR and FPR from input metrics and plots the ROC curve.

    Parameters
    ----------
    y_true: :class:`numpy.ndarray`
        List or array of true binary labels (0 or 1).   
    y_scores: :class:`numpy.ndarray`
        List or array of predicted scores or probabilities.
    title: str (Optional) 
        Title for the plot. Defaults to "ROC Curve".
    save_path: str (Optional)
        Path and name to save the plot.

    Returns
    ------
        Displays or saves the plot.
    """
    # Calculate FPR, TPR, and thresholds
    fpr, tpr = get_roc_curve_data(y_true, y_scores)
    
    # Calculate the AUC
    auc_value = auc(fpr, tpr)

    # Plot the ROC curve
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color="blue", lw=2, label=f"ROC Curve (AUC = {auc_value:.2f})")
    plt.plot([0, 1], [0, 1], color="red", linestyle="--", label="Random Guess")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(title)
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path + '.pdf', format='pdf', dpi=300)
    else:
        plt.show()
    plt.close()



[docs]
def plot_ROC_curve_from_data(fpr: np.ndarray, tpr: np.ndarray, title: str = "ROC Curve", save_path: str = None):
    """
    Plots the ROC curve from input FPR and TPR values.

    Parameters
    ----------
    fpr_loss: :class:`numpy.ndarray`
        False positive rate values.
    tpr_loss: :class:`numpy.ndarray`
        True positive rate values.
    title: str (Optional) 
        Title for the plot. Defaults to "ROC Curve".
    save_path: str (Optional)
        Path and name to save the plot. Example: `./ROC_curve.pdf`

    Returns
    ------
        Displays or saves the plot.
    """
    # Calculate the AUC
    auc_value = auc(fpr, tpr)

    # Plot the ROC curve
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color="blue", lw=2, label=f"ROC Curve (AUC = {auc_value:.2f})")
    plt.plot([0, 1], [0, 1], color="red", linestyle="--", label="Random Guess")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(title)
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path + '.pdf', format='pdf', dpi=300)
    else:
        plt.show()
    plt.close()



[docs]
def plot_PR_curve(y_true: np.ndarray, y_scores: np.ndarray, title: str = "Precision-Recall Curve", save_path: str = None):
    """
    Calculates precision and recall from input metrics and plots the Precision-Recall curve.

    Parameters
    ----------
    y_true: :class:`numpy.ndarray`
        List or array of true binary labels (0 or 1).   
    y_scores: :class:`numpy.ndarray`
        List or array of predicted scores or probabilities.
    title: str (Optional) 
        Title for the plot. Defaults to "Precision-Recall Curve".
    save_path: str (Optional)
        Path and name to save the plot.

    Returns
    ------
        Displays or saves the plot.
    """
        
    # Calculate FPR, TPR, and thresholds
    precision, recall = get_precision_recall_curve_data(y_true, y_scores)
    
    # Calculate the AUC_PR
    if not np.all((y_true == 0) | (y_true == 1)):
        label = 'PR Curve'
    else:
        auc_pr = average_precision_score(y_true, y_scores)
        label = f"PR Curve (AUC = {auc_pr:.2f})"

    # Plot the ROC curve
    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, color="blue", lw=2, label=label)
    plt.plot([0, 1], [0, 1], color="red", linestyle="--", label="Random Guess")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(title)
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path + '.pdf', format='pdf', dpi=300)
    else:
        plt.show()
    plt.close()



[docs]
def compare_AUC(save_dir: str = '.',
                bond_dims: Collection[int] = None,
                spacings: Collection[int] = None,
                initializers: Collection[str] = None,
                embedding: str = 'trigonometric',
                nruns: int = 0,
                fig_size: tuple = (6, 5),
                labels: dict = None,
                anomaly_det: bool = False):
    """
    Example of code to compare the TPR values for fixed FPR for different values of hyperparameters, when spacing parameter is fixed.
    - code for generating plots from the paper "tn4ml: Tensor Network Training and Customization for Machine Learning"

    This works with the results saved in the directory structure as follows::

        root_dir/initializer_string/bond_' + str(bond_dim) + '/spacing_' + str(spacing) + '/' + embedding_string+'/run_' + str(nrun)

    Example::

        root_dir/randn_1e-1/bond_10/spacing_2/trigonometric/run_1
    
    Parameters
    ----------
    save_dir: str
        Directory where the results are saved.
    bond_dims: list[int]
        List of bond dimensions.
    spacings: list[int]
        List of spacing values. If model is :class:`tn4ml.models.smpo.SpacedMatrixProductOperator`, then spacing is required.
    initializers: list[str]
        List of initializers.
    embedding: str
        List of embeddings.
    nruns: int
        Number of runs for each model. Assumes that the model is run at least 2 times.
    fig_size: tuple
        Size of the figure.
    labels: dict
        Dictionary containing the labels for the bond dimensions
        Example::
            
            LABELS = {'5': (r'bond = 5', 'o', '#016c59'),
                    '10': (r'bond = 10','X', '#7a5195'),
                    '30': (r'bond = 30', 'v', '#67a9cf'),
                    '50': (r'bond = 50', 'd', '#ffa600')}

    Returns
    -------
        Displays or saves the plot.
    """
    
    for spacing in spacings:
        plt.figure(figsize=fig_size)
        auc_per_bond_data = {}; auc_per_bond_err = {}
        for bond_dim in bond_dims:
            auc_per_init_data = []; auc_per_init_err = []
            for init in initializers:
                auc_data=[]
                for j in range(1, nruns+1):
                    if nruns == 1:
                        dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding
                    else:
                        dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding +'/run_'+str(j)
                            
                    fpr, tpr = get_roc_curve_data(np.load(dir_name + '/normal_score.npy'), np.load(dir_name + '/anomaly_score.npy'), anomaly_det=anomaly_det)
                    auc_data.append(auc(fpr, tpr))
                mean_error = get_mean_and_error(np.array(auc_data))
                auc_per_init_data.append(mean_error[0])
                auc_per_init_err.append(mean_error[1])
            auc_per_bond_data[bond_dim] = auc_per_init_data
            auc_per_bond_err[bond_dim] = auc_per_init_err
        
        for bond_dim in bond_dims:
            data = auc_per_bond_data[bond_dim]
            data_err = auc_per_bond_err[bond_dim]
            plt.errorbar(list(range(len(initializers))), data, yerr=data_err, label=labels[str(bond_dim)][0],
                        linestyle='None', marker=labels[str(bond_dim)][1], capsize=3, color=labels[str(bond_dim)][2])
                
        plt.title(f'S = {spacing}')
        plt.ylabel('AUC')
        plt.yticks(fontsize=12)
        plt.xticks(range(len(initializers)), initializers, fontsize=14)
        plt.grid(True)
        plt.tight_layout()
        plt.legend(fancybox=True, frameon=True, prop={"size":10}, loc='best')

        if save_dir:
            if not os.path.exists(f'{save_dir}/results/plots/AUC'):
                os.makedirs(f'{save_dir}/results/plots/AUC')
            plt.savefig(f'{save_dir}/results/plots/AUC/spacing_{spacing}.pdf')
        else:
            plt.show()
        plt.close()



[docs]
def compare_TPR_per_FPR(save_dir: str = '.',
                FPR_fixed: float = 0.1,
                bond_dims: Collection[int] = None,
                spacings: Collection[int] = None,
                initializers: Collection[str] = None,
                embedding: str = 'trigonometric',
                nruns: int = 0,
                fig_size: tuple = (6, 5),
                labels: dict = None,
                anomaly_det: bool = False):
    """
    Example of code to compare the TPR values for fixed FPR for different values of hyperparameters, when spacing parameter is fixed.

    This works with the results saved in the directory structure as follows::

        root_dir/initializer_string/bond_<bond_dim>/spacing_<spacing>/<embedding_string>/run_<nrun>

    Example::

        root_dir/randn_1e-1/bond_10/spacing_2/trigonometric/run_1

    Parameters
    ----------
    save_dir : str
        Directory where the results are saved.
    FPR_fixed : float
        Fixed FPR value.
    bond_dims : list[int]
        List of bond dimensions.
    spacings : list[int]
        List of spacing values. If model is :class:`tn4ml.models.smpo.SpacedMatrixProductOperator`, then spacing is required.
    initializers : list[str]
        List of initializers.
    embedding : str
        Embedding method used.
    nruns : int
        Number of runs for each model. Assumes that the model is run at least 2 times.
    fig_size : tuple
        Size of the figure.
    labels : dict
        Dictionary containing the labels for the bond dimensions.

        Example::

            LABELS = {'5': (r'bond = 5', 'o', '#016c59'),
                    '10': (r'bond = 10', 'X', '#7a5195'),
                    '30': (r'bond = 30', 'v', '#67a9cf'),
                    '50': (r'bond = 50', 'd', '#ffa600')}

    Returns
    -------
    None
        Displays or saves the plot.
    """
    
    for spacing in spacings:
        plt.figure(figsize=fig_size)
        tpr_per_bond_data = {}; tpr_per_bond_err = {}
        for bond_dim in bond_dims:
            tpr_per_init_data = []; tpr_per_init_err = []
            for init in initializers:
                tpr_data=[]
                for j in range(1, nruns+1):
                    if nruns == 1:
                        dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding
                    else:
                        dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding +'/run_'+str(j)
                            
                    fpr, tpr = get_roc_curve_data(np.load(dir_name + '/normal_score.npy'), np.load(dir_name + '/anomaly_score.npy'), anomaly_det=anomaly_det)
                    tpr_per_fpr = get_TPR_for_fixed_FPR(FPR_fixed, np.array(fpr), np.array(tpr), tolerance=0.01)
                    tpr_data.append(tpr_per_fpr)
                mean_error = get_mean_and_error(np.array(tpr_data))
                tpr_per_init_data.append(mean_error[0])
                tpr_per_init_err.append(mean_error[1])
            tpr_per_bond_data[bond_dim] = tpr_per_init_data
            tpr_per_bond_err[bond_dim] = tpr_per_init_err
        
        for bond_dim in bond_dims:
            data = tpr_per_bond_data[bond_dim]
            data_err = tpr_per_bond_err[bond_dim]
            plt.errorbar(list(range(len(initializers))), data, yerr=data_err, label=labels[str(bond_dim)][0],
                        linestyle='None', marker=labels[str(bond_dim)][1], capsize=3, color=labels[str(bond_dim)][2])
                
        plt.title(f'S = {spacing}, FPR = {FPR_fixed}')
        plt.ylabel('TPR')
        plt.yticks(fontsize=12)
        plt.xticks(range(len(initializers)), initializers, fontsize=14)
        plt.grid(True)
        plt.tight_layout()
        plt.legend(fancybox=True, frameon=True, prop={"size":10}, loc='best')

        if save_dir:
            if not os.path.exists(f'{save_dir}/results/plots/TPR'):
                os.makedirs(f'{save_dir}/results/plots/TPR')
            plt.savefig(f'{save_dir}/results/plots/TPR/spacing_{spacing}_FPR_{FPR_fixed}.pdf')
        else:
            plt.show()
        plt.close()



[docs]
def compare_FPR_per_TPR(save_dir: str = '.',
                TPR_fixed: float = 0.95,
                bond_dims: Collection[int] = None,
                spacings: Collection[int] = None,
                initializers: Collection[str] = None,
                embedding: str = 'trigonometric',
                nruns: int = 0,
                fig_size: tuple = (6, 5),
                labels: dict = None,
                anomaly_det: bool = False):
    """
    Example of code to compare the FPR values for fixed TPR for different values of hyperparameters, when spacing parameter is fixed.
    - code for generating plots from the paper "tn4ml: Tensor Network Training and Customization for Machine Learning"

    This works with the results saved in the directory structure as follows::
        
        root_dir/initializer_string/bond_' + str(bond_dim) + '/spacing_' + str(spacing) + '/' + embedding_string+'/run_' + str(nrun)

    Example::
        
        root_dir/randn_1e-1/bond_10/spacing_2/trigonometric/run_1
    
    Parameters
    ----------
    save_dir: str
        Directory where the results are saved.
    TPR_fixed: float
        Fixed TPR value.
    bond_dims: list[int]
        List of bond dimensions.
    spacings: list[int]
        List of spacing values. If model is :class:`tn4ml.models.smpo.SpacedMatrixProductOperator`, then spacing is required.
    initializers: list[str]
        List of initializers.
    embedding: str
        List of embeddings.
    nruns: int
        Number of runs for each model. Assumes that the model is run at least 2 times.
    fig_size: tuple
        Size of the figure.
    labels: dict
        Dictionary containing the labels for the bond dimensions
        Example:: 
            
            LABELS = {'5': (r'bond = 5', 'o', '#016c59'),
                    '10': (r'bond = 10','X', '#7a5195'),
                    '30': (r'bond = 30', 'v', '#67a9cf'),
                    '50': (r'bond = 50', 'd', '#ffa600')}

    Returns
    -------
        Displays or saves the plot.
    """
    
    for spacing in spacings:
        plt.figure(figsize=fig_size)
        tpr_per_bond_data = {}; tpr_per_bond_err = {}
        for bond_dim in bond_dims:
            tpr_per_init_data = []; tpr_per_init_err = []
            for init in initializers:
                tpr_data=[]
                for j in range(1, nruns+1):
                    if nruns == 1:
                        dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding
                    else:
                        dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding +'/run_'+str(j)
                            
                    fpr, tpr = get_roc_curve_data(np.load(dir_name + '/normal_score.npy'), np.load(dir_name + '/anomaly_score.npy'), anomaly_det=anomaly_det)
                    tpr_per_fpr = get_FPR_for_fixed_TPR(TPR_fixed, np.array(fpr), np.array(tpr), tolerance=0.01)
                    tpr_data.append(tpr_per_fpr)
                mean_error = get_mean_and_error(np.array(tpr_data))
                tpr_per_init_data.append(mean_error[0])
                tpr_per_init_err.append(mean_error[1])
            tpr_per_bond_data[bond_dim] = tpr_per_init_data
            tpr_per_bond_err[bond_dim] = tpr_per_init_err
        
        for bond_dim in bond_dims:
            data = tpr_per_bond_data[bond_dim]
            data_err = tpr_per_bond_err[bond_dim]
            plt.errorbar(list(range(len(initializers))), data, yerr=data_err, label=labels[str(bond_dim)][0],
                        linestyle='None', marker=labels[str(bond_dim)][1], capsize=3, color=labels[str(bond_dim)][2])
                
        plt.title(f'S = {spacing}, FPR = {TPR_fixed}')
        plt.ylabel('FPR')
        plt.yticks(fontsize=12)
        plt.xticks(range(len(initializers)), initializers, fontsize=14)
        plt.grid(True)
        plt.tight_layout()
        plt.legend(fancybox=True, frameon=True, prop={"size":10}, loc='best')

        if save_dir:
            if not os.path.exists(f'{save_dir}/results/plots/FPR'):
                os.makedirs(f'{save_dir}/results/plots/FPR')
            plt.savefig(f'{save_dir}/results/plots/FPR/spacing_{spacing}_TPR_{TPR_fixed}.pdf')
        else:
            plt.show()
        plt.close()