Source code for tn4ml.eval

import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
from typing import Collection


[docs] def plot_loss(history: dict, validation: bool = True, figsize: tuple =(5, 5), save_path: str = None, legend_args: dict = {}): """ Plot the loss of the model during training and validation. Parameters ---------- history: dict History object from the model training. validation: bool Whether to plot the validation loss. figsize: tuple Size of the figure. save_path: str Path to save the plot. Returns ------- Displays the plot. """ plt.figure(figsize=figsize) plt.plot(range(len(history['loss'])), history['loss'], label='train') if validation: plt.plot(range(len(history['val_loss'])), history['val_loss'], label='validation') plt.legend(legend_args) plt.xlabel('Epochs') plt.ylabel('Loss') if save_path: plt.savefig(save_path + '.pdf', format='pdf', dpi=300) else: plt.show() plt.close()
[docs] def plot_accuracy(history: dict, figsize: tuple =(5, 5), save_path: str = None, legend_args: dict = {}): """ Plot the accuracy of the model during training and validation. Parameters ---------- history: dict History object from the model training. validation: bool Whether to plot the validation accuracy. figsize: tuple Size of the figure. save_path: str Path to save the plot. legend_args: dict Arguments for the legend. Returns ------- Displays or saves the plot. """ plt.figure(figsize=figsize) plt.plot(range(len(history['val_acc'])), history['val_acc'], label='validation') plt.legend(legend_args) plt.xlabel('Epochs') plt.ylabel('Accuracy') if save_path: plt.savefig(save_path + '.pdf', format='pdf', dpi=300) else: plt.show() plt.close()
[docs] def get_roc_curve_data(y_true: np.ndarray, y_scores: np.ndarray, anomaly_det: bool = False): """ Calculate the ROC curve data from normal and anomaly scores. Use it when both y_true and y_scores are not binary. Parameters ---------- y_true: :class:`numpy.ndarray` True or normal scores. y_scores: :class:`numpy.ndarray` Predicted scores or anomaly scores. anomaly: bool Whether the scores are anomaly scores or Returns ------- fpr_loss: :class:`numpy.ndarray` False positive rate values. tpr_loss: :class:`numpy.ndarray` True positive rate values. """ if anomaly_det: true_val = np.concatenate((np.ones(y_scores.shape[0]), np.zeros(y_true.shape[0]))) pred_val = np.concatenate((y_scores, y_true)) else: true_val = y_true pred_val = y_scores fpr, tpr, _ = roc_curve(true_val, pred_val, drop_intermediate=False) return fpr, tpr
[docs] def get_precision_recall_curve_data(y_true: np.ndarray, y_scores: np.ndarray, anomaly_det: bool = False): """ Calculate the ROC curve data from normal and anomaly scores. Use it when both y_true and y_scores are not binary. Parameters ---------- y_true: :class:`numpy.ndarray` True or normal scores. y_scores: :class:`numpy.ndarray` Predicted scores or anomaly scores. Returns ------- fpr_loss: :class:`numpy.ndarray` False positive rate values. tpr_loss: :class:`numpy.ndarray` True positive rate values. """ if anomaly_det: true_val = np.concatenate((np.ones(y_scores.shape[0]), np.zeros(y_true.shape[0]))) pred_val = np.concatenate((y_scores, y_true)) else: true_val = y_true pred_val = y_scores precision, recall, _ = precision_recall_curve(true_val, pred_val, drop_intermediate=False) return precision, recall
[docs] def get_FPR_for_fixed_TPR(tpr_window, fpr, tpr, tolerance): """ Calculate the FPR for a fixed TPR value. Parameters ---------- tpr_window: float Fixed TPR value. fpr: :class:`numpy.ndarray` False positive rate values. tpr: :class:`numpy.ndarray` True positive rate values. tolerance: float Tolerance value for the fixed TPR value. Returns ------- fpr: float FPR value for the fixed TPR value. """ position = np.where((tpr>=tpr_window-tpr_window*tolerance) & (tpr<=tpr_window+tpr_window*tolerance))[0] return np.mean(fpr[position])
[docs] def get_TPR_for_fixed_FPR(fpr_window, fpr, tpr, tolerance): """ Calculate the TPR for a fixed FPR value. Parameters ---------- fpr_window: float Fixed FPR value. fpr: :class:`numpy.ndarray` False positive rate values. tpr: :class:`numpy.ndarray` True positive rate values. tolerance: float Tolerance value for the fixed FPR value. Returns ------- tpr: float TPR value for the fixed FPR value. """ position = np.where((fpr>=fpr_window-fpr_window*tolerance) & (fpr<=fpr_window+fpr_window*tolerance))[0] return np.mean(tpr[position])
[docs] def get_mean_and_error(data): """ Calculate the mean and standard deviation of the input data. Parameters ---------- data: :class:`numpy.ndarray` Input data to calculate the mean and standard deviation. Returns ------- mean: :class:`numpy.ndarray` Mean of the input data. std: :class:`numpy.ndarray` Standard deviation of the input data. """ mean = np.mean(data, axis=0) std = np.std(data, axis=0) return mean, std
[docs] def plot_ROC_curve_from_metrics(y_true: np.ndarray, y_scores: np.ndarray, title: str = "ROC Curve", save_path: str = None): """ Calculates TPR and FPR from input metrics and plots the ROC curve. Parameters ---------- y_true: :class:`numpy.ndarray` List or array of true binary labels (0 or 1). y_scores: :class:`numpy.ndarray` List or array of predicted scores or probabilities. title: str (Optional) Title for the plot. Defaults to "ROC Curve". save_path: str (Optional) Path and name to save the plot. Returns ------ Displays or saves the plot. """ # Calculate FPR, TPR, and thresholds fpr, tpr = get_roc_curve_data(y_true, y_scores) # Calculate the AUC auc_value = auc(fpr, tpr) # Plot the ROC curve plt.figure(figsize=(8, 6)) plt.plot(fpr, tpr, color="blue", lw=2, label=f"ROC Curve (AUC = {auc_value:.2f})") plt.plot([0, 1], [0, 1], color="red", linestyle="--", label="Random Guess") plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title(title) plt.legend(loc="lower right") plt.grid(alpha=0.3) plt.tight_layout() if save_path: plt.savefig(save_path + '.pdf', format='pdf', dpi=300) else: plt.show() plt.close()
[docs] def plot_ROC_curve_from_data(fpr: np.ndarray, tpr: np.ndarray, title: str = "ROC Curve", save_path: str = None): """ Plots the ROC curve from input FPR and TPR values. Parameters ---------- fpr_loss: :class:`numpy.ndarray` False positive rate values. tpr_loss: :class:`numpy.ndarray` True positive rate values. title: str (Optional) Title for the plot. Defaults to "ROC Curve". save_path: str (Optional) Path and name to save the plot. Example: `./ROC_curve.pdf` Returns ------ Displays or saves the plot. """ # Calculate the AUC auc_value = auc(fpr, tpr) # Plot the ROC curve plt.figure(figsize=(8, 6)) plt.plot(fpr, tpr, color="blue", lw=2, label=f"ROC Curve (AUC = {auc_value:.2f})") plt.plot([0, 1], [0, 1], color="red", linestyle="--", label="Random Guess") plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title(title) plt.legend(loc="lower right") plt.grid(alpha=0.3) plt.tight_layout() if save_path: plt.savefig(save_path + '.pdf', format='pdf', dpi=300) else: plt.show() plt.close()
[docs] def plot_PR_curve(y_true: np.ndarray, y_scores: np.ndarray, title: str = "Precision-Recall Curve", save_path: str = None): """ Calculates precision and recall from input metrics and plots the Precision-Recall curve. Parameters ---------- y_true: :class:`numpy.ndarray` List or array of true binary labels (0 or 1). y_scores: :class:`numpy.ndarray` List or array of predicted scores or probabilities. title: str (Optional) Title for the plot. Defaults to "Precision-Recall Curve". save_path: str (Optional) Path and name to save the plot. Returns ------ Displays or saves the plot. """ # Calculate FPR, TPR, and thresholds precision, recall = get_precision_recall_curve_data(y_true, y_scores) # Calculate the AUC_PR if not np.all((y_true == 0) | (y_true == 1)): label = 'PR Curve' else: auc_pr = average_precision_score(y_true, y_scores) label = f"PR Curve (AUC = {auc_pr:.2f})" # Plot the ROC curve plt.figure(figsize=(8, 6)) plt.plot(recall, precision, color="blue", lw=2, label=label) plt.plot([0, 1], [0, 1], color="red", linestyle="--", label="Random Guess") plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title(title) plt.legend(loc="lower right") plt.grid(alpha=0.3) plt.tight_layout() if save_path: plt.savefig(save_path + '.pdf', format='pdf', dpi=300) else: plt.show() plt.close()
[docs] def compare_AUC(save_dir: str = '.', bond_dims: Collection[int] = None, spacings: Collection[int] = None, initializers: Collection[str] = None, embedding: str = 'trigonometric', nruns: int = 0, fig_size: tuple = (6, 5), labels: dict = None, anomaly_det: bool = False): """ Example of code to compare the TPR values for fixed FPR for different values of hyperparameters, when spacing parameter is fixed. - code for generating plots from the paper "tn4ml: Tensor Network Training and Customization for Machine Learning" This works with the results saved in the directory structure as follows:: root_dir/initializer_string/bond_' + str(bond_dim) + '/spacing_' + str(spacing) + '/' + embedding_string+'/run_' + str(nrun) Example:: root_dir/randn_1e-1/bond_10/spacing_2/trigonometric/run_1 Parameters ---------- save_dir: str Directory where the results are saved. bond_dims: list[int] List of bond dimensions. spacings: list[int] List of spacing values. If model is :class:`tn4ml.models.smpo.SpacedMatrixProductOperator`, then spacing is required. initializers: list[str] List of initializers. embedding: str List of embeddings. nruns: int Number of runs for each model. Assumes that the model is run at least 2 times. fig_size: tuple Size of the figure. labels: dict Dictionary containing the labels for the bond dimensions Example:: LABELS = {'5': (r'bond = 5', 'o', '#016c59'), '10': (r'bond = 10','X', '#7a5195'), '30': (r'bond = 30', 'v', '#67a9cf'), '50': (r'bond = 50', 'd', '#ffa600')} Returns ------- Displays or saves the plot. """ for spacing in spacings: plt.figure(figsize=fig_size) auc_per_bond_data = {}; auc_per_bond_err = {} for bond_dim in bond_dims: auc_per_init_data = []; auc_per_init_err = [] for init in initializers: auc_data=[] for j in range(1, nruns+1): if nruns == 1: dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding else: dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding +'/run_'+str(j) fpr, tpr = get_roc_curve_data(np.load(dir_name + '/normal_score.npy'), np.load(dir_name + '/anomaly_score.npy'), anomaly_det=anomaly_det) auc_data.append(auc(fpr, tpr)) mean_error = get_mean_and_error(np.array(auc_data)) auc_per_init_data.append(mean_error[0]) auc_per_init_err.append(mean_error[1]) auc_per_bond_data[bond_dim] = auc_per_init_data auc_per_bond_err[bond_dim] = auc_per_init_err for bond_dim in bond_dims: data = auc_per_bond_data[bond_dim] data_err = auc_per_bond_err[bond_dim] plt.errorbar(list(range(len(initializers))), data, yerr=data_err, label=labels[str(bond_dim)][0], linestyle='None', marker=labels[str(bond_dim)][1], capsize=3, color=labels[str(bond_dim)][2]) plt.title(f'S = {spacing}') plt.ylabel('AUC') plt.yticks(fontsize=12) plt.xticks(range(len(initializers)), initializers, fontsize=14) plt.grid(True) plt.tight_layout() plt.legend(fancybox=True, frameon=True, prop={"size":10}, loc='best') if save_dir: if not os.path.exists(f'{save_dir}/results/plots/AUC'): os.makedirs(f'{save_dir}/results/plots/AUC') plt.savefig(f'{save_dir}/results/plots/AUC/spacing_{spacing}.pdf') else: plt.show() plt.close()
[docs] def compare_TPR_per_FPR(save_dir: str = '.', FPR_fixed: float = 0.1, bond_dims: Collection[int] = None, spacings: Collection[int] = None, initializers: Collection[str] = None, embedding: str = 'trigonometric', nruns: int = 0, fig_size: tuple = (6, 5), labels: dict = None, anomaly_det: bool = False): """ Example of code to compare the TPR values for fixed FPR for different values of hyperparameters, when spacing parameter is fixed. This works with the results saved in the directory structure as follows:: root_dir/initializer_string/bond_<bond_dim>/spacing_<spacing>/<embedding_string>/run_<nrun> Example:: root_dir/randn_1e-1/bond_10/spacing_2/trigonometric/run_1 Parameters ---------- save_dir : str Directory where the results are saved. FPR_fixed : float Fixed FPR value. bond_dims : list[int] List of bond dimensions. spacings : list[int] List of spacing values. If model is :class:`tn4ml.models.smpo.SpacedMatrixProductOperator`, then spacing is required. initializers : list[str] List of initializers. embedding : str Embedding method used. nruns : int Number of runs for each model. Assumes that the model is run at least 2 times. fig_size : tuple Size of the figure. labels : dict Dictionary containing the labels for the bond dimensions. Example:: LABELS = {'5': (r'bond = 5', 'o', '#016c59'), '10': (r'bond = 10', 'X', '#7a5195'), '30': (r'bond = 30', 'v', '#67a9cf'), '50': (r'bond = 50', 'd', '#ffa600')} Returns ------- None Displays or saves the plot. """ for spacing in spacings: plt.figure(figsize=fig_size) tpr_per_bond_data = {}; tpr_per_bond_err = {} for bond_dim in bond_dims: tpr_per_init_data = []; tpr_per_init_err = [] for init in initializers: tpr_data=[] for j in range(1, nruns+1): if nruns == 1: dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding else: dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding +'/run_'+str(j) fpr, tpr = get_roc_curve_data(np.load(dir_name + '/normal_score.npy'), np.load(dir_name + '/anomaly_score.npy'), anomaly_det=anomaly_det) tpr_per_fpr = get_TPR_for_fixed_FPR(FPR_fixed, np.array(fpr), np.array(tpr), tolerance=0.01) tpr_data.append(tpr_per_fpr) mean_error = get_mean_and_error(np.array(tpr_data)) tpr_per_init_data.append(mean_error[0]) tpr_per_init_err.append(mean_error[1]) tpr_per_bond_data[bond_dim] = tpr_per_init_data tpr_per_bond_err[bond_dim] = tpr_per_init_err for bond_dim in bond_dims: data = tpr_per_bond_data[bond_dim] data_err = tpr_per_bond_err[bond_dim] plt.errorbar(list(range(len(initializers))), data, yerr=data_err, label=labels[str(bond_dim)][0], linestyle='None', marker=labels[str(bond_dim)][1], capsize=3, color=labels[str(bond_dim)][2]) plt.title(f'S = {spacing}, FPR = {FPR_fixed}') plt.ylabel('TPR') plt.yticks(fontsize=12) plt.xticks(range(len(initializers)), initializers, fontsize=14) plt.grid(True) plt.tight_layout() plt.legend(fancybox=True, frameon=True, prop={"size":10}, loc='best') if save_dir: if not os.path.exists(f'{save_dir}/results/plots/TPR'): os.makedirs(f'{save_dir}/results/plots/TPR') plt.savefig(f'{save_dir}/results/plots/TPR/spacing_{spacing}_FPR_{FPR_fixed}.pdf') else: plt.show() plt.close()
[docs] def compare_FPR_per_TPR(save_dir: str = '.', TPR_fixed: float = 0.95, bond_dims: Collection[int] = None, spacings: Collection[int] = None, initializers: Collection[str] = None, embedding: str = 'trigonometric', nruns: int = 0, fig_size: tuple = (6, 5), labels: dict = None, anomaly_det: bool = False): """ Example of code to compare the FPR values for fixed TPR for different values of hyperparameters, when spacing parameter is fixed. - code for generating plots from the paper "tn4ml: Tensor Network Training and Customization for Machine Learning" This works with the results saved in the directory structure as follows:: root_dir/initializer_string/bond_' + str(bond_dim) + '/spacing_' + str(spacing) + '/' + embedding_string+'/run_' + str(nrun) Example:: root_dir/randn_1e-1/bond_10/spacing_2/trigonometric/run_1 Parameters ---------- save_dir: str Directory where the results are saved. TPR_fixed: float Fixed TPR value. bond_dims: list[int] List of bond dimensions. spacings: list[int] List of spacing values. If model is :class:`tn4ml.models.smpo.SpacedMatrixProductOperator`, then spacing is required. initializers: list[str] List of initializers. embedding: str List of embeddings. nruns: int Number of runs for each model. Assumes that the model is run at least 2 times. fig_size: tuple Size of the figure. labels: dict Dictionary containing the labels for the bond dimensions Example:: LABELS = {'5': (r'bond = 5', 'o', '#016c59'), '10': (r'bond = 10','X', '#7a5195'), '30': (r'bond = 30', 'v', '#67a9cf'), '50': (r'bond = 50', 'd', '#ffa600')} Returns ------- Displays or saves the plot. """ for spacing in spacings: plt.figure(figsize=fig_size) tpr_per_bond_data = {}; tpr_per_bond_err = {} for bond_dim in bond_dims: tpr_per_init_data = []; tpr_per_init_err = [] for init in initializers: tpr_data=[] for j in range(1, nruns+1): if nruns == 1: dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding else: dir_name = save_dir + '/' + init + '/bond_' + str(bond_dim) + '/spacing_' + str(spacing)+'/'+ embedding +'/run_'+str(j) fpr, tpr = get_roc_curve_data(np.load(dir_name + '/normal_score.npy'), np.load(dir_name + '/anomaly_score.npy'), anomaly_det=anomaly_det) tpr_per_fpr = get_FPR_for_fixed_TPR(TPR_fixed, np.array(fpr), np.array(tpr), tolerance=0.01) tpr_data.append(tpr_per_fpr) mean_error = get_mean_and_error(np.array(tpr_data)) tpr_per_init_data.append(mean_error[0]) tpr_per_init_err.append(mean_error[1]) tpr_per_bond_data[bond_dim] = tpr_per_init_data tpr_per_bond_err[bond_dim] = tpr_per_init_err for bond_dim in bond_dims: data = tpr_per_bond_data[bond_dim] data_err = tpr_per_bond_err[bond_dim] plt.errorbar(list(range(len(initializers))), data, yerr=data_err, label=labels[str(bond_dim)][0], linestyle='None', marker=labels[str(bond_dim)][1], capsize=3, color=labels[str(bond_dim)][2]) plt.title(f'S = {spacing}, FPR = {TPR_fixed}') plt.ylabel('FPR') plt.yticks(fontsize=12) plt.xticks(range(len(initializers)), initializers, fontsize=14) plt.grid(True) plt.tight_layout() plt.legend(fancybox=True, frameon=True, prop={"size":10}, loc='best') if save_dir: if not os.path.exists(f'{save_dir}/results/plots/FPR'): os.makedirs(f'{save_dir}/results/plots/FPR') plt.savefig(f'{save_dir}/results/plots/FPR/spacing_{spacing}_TPR_{TPR_fixed}.pdf') else: plt.show() plt.close()