Source code for tn4ml.models.model

import logging
import os
from collections.abc import Callable, Collection, Sequence
from pathlib import Path
from time import time
from typing import Any

import funcy
import jax
import jax.numpy as jnp
import numpy as np
import optax
import quimb as qu
import quimb.tensor as qtn
from tqdm import tqdm

from ..embeddings import *
from ..strategy import *
from ..util import EarlyStopping, TrainingType, gradient_clip

logger = logging.getLogger(__name__)


def _as_class_labels(values: jnp.ndarray) -> jnp.ndarray:
    """Convert class scores, one-hot labels, or class labels to label indices.

    If values are class scores, the predicted class is selected with argmax.
    If values are one-hot labels, the class label is extracted with argmax.
    If values are already class labels, they are returned as is.

    Parameters
    ----------
    values : jnp.ndarray
        Array of class scores, one-hot labels, or class labels.

    Returns
    -------
    jnp.ndarray
        Array of class label indices.
    """
    values = jnp.asarray(values)
    if values.ndim == 0:
        return values.reshape((1,)).astype(jnp.int32)
    if values.ndim == 1:
        return values.astype(jnp.int32)
    if values.shape[-1] == 1:
        return jnp.squeeze(values, axis=-1).astype(jnp.int32)
    return jnp.argmax(values, axis=-1)


def _enable_cpu_multithreading() -> None:
    """Enable XLA multi-threading for CPU backend.

    JAX on CPU defaults to single-threaded XLA kernels.
    Setting XLA_FLAGS before the first jax call unlocks all cores.
    Call this at the top of your training script when running CPU-only.

    Example
    -------
    >>> from tn4ml.models.model import _enable_cpu_multithreading
    >>> _enable_cpu_multithreading()
    """
    current = os.environ.get("XLA_FLAGS", "")
    flags = []
    if "--xla_cpu_multi_thread_eigen_intra_op_parallelism=1" not in current:
        flags.append("--xla_cpu_multi_thread_eigen_intra_op_parallelism=1")
    if flags:
        os.environ["XLA_FLAGS"] = current + " " + " ".join(flags)



[docs]
class Model(qtn.TensorNetwork):
    """:class:`tn4ml.models.Model` class models training model of class :class:`quimb.tensor.tensor_core.TensorNetwork`.

    Attributes
    ----------
    loss : `Callable`, or `None`
        Loss function. See :mod:`tn4ml.metrics` for examples.
    strategy : :class:`tn4ml.strategy.Strategy`
        Strategy for computing gradients.
    optimizer : str
        Type of optimizer matching names of optimizers from optax.
    learning_rate : float
        Learning rate for optimizer.
    train_type : int
        Type of training: 0 = 'unsupervised' or 1 ='supervised', 2 = 'target TN'.
    gradient_transforms : sequence
        Sequence of gradient transformations.
    device : (str, int)
        Device for computation, e.g. ('cpu', 0) or ('gpu', 0). [0] = device name, [1] = device index.
    opt_state : Any
        State of optimizer.
    """


[docs]
    def __init__(self) -> None:
        """Initialize :class:`tn4ml.models.Model`."""
        self.loss: Callable = None
        self.strategy: Any = "global"
        self.optimizer: optax.GradientTransformation = optax.adam
        self.learning_rate: float = 1e-2
        self.train_type: int = TrainingType.UNSUPERVISED
        self.gradient_transforms: Sequence = None
        self.opt_state: Any = None
        self.device: tuple = ("cpu", 0)



[docs]
    def save(self, model_name: str, dir_name: str = "~", tn: bool = False):
        """Save :class:`tn4ml.models.Model` to pickle file.

        Parameters
        ----------
        model_name : str
            Name of Model.
        dir_name: str
            Directory for saving Model.
        tn : bool
            If True, model object is TensorNetwork.
        """
        exec(  # nosec B102 – dynamic import of the concrete subclass for pickling  # noqa: RUF003
            compile(
                "from "
                + self.__class__.__module__
                + " import "
                + self.__class__.__name__,
                "<string>",
                "single",
            )
        )
        arrays = tuple(np.array(jax.device_get(x)) for x in self.arrays)
        if tn:
            tensors = []
            for i, array in enumerate(arrays):
                tensors.append(
                    qtn.Tensor(
                        array,
                        inds=self.tensors[i].inds,
                        tags=self._site_tag_id.format(i),
                    )
                )
            model = type(self)(tensors)  # type: ignore[call-arg]
        else:
            model = type(self)(arrays)  # type: ignore[call-arg]

        Path(dir_name).mkdir(parents=True, exist_ok=True)

        qu.save_to_disk(model, f"{dir_name}/{model_name}.pkl")



[docs]
    def nparams(self) -> int:
        """Return number of parameters of the model.

        Returns
        -------
        int
        """
        return sum([np.prod(tensor.data.shape) for tensor in self.tensors])



[docs]
    def configure(self, **kwargs):
        """Configure model for training with specific parameters.

        Parameters
        ----------
        kwargs : dict
        Configuration parameters. Supported keys:

        - strategy: str or Strategy object
        Training strategy ('global', 'sweeps', 'local', 'dmrg', 'dmrg-like')
        - optimizer: callable or optax optimizer
            Optimization algorithm to use.
        - loss: callable
            Loss function for training.
        - train_type: int
            Type of training (from :class:`tn4ml.util.TrainingType`)
        - learning_rate: float
            Learning rate for optimizer.
        - gradient_transforms: sequence
            Sequence of gradient transformations for optax
        - device: (str, int)
            Device for computation, e.g. ('cpu', 0) or ('gpu', 0). [0] = device name, [1] = device index.

        Returns
        -------
        None

        Examples
        --------
        >>> model.configure(strategy='global', optimizer=optax.adam, learning_rate=0.01, loss=tn4ml.metrics.LogQuadNorm, train_type=TrainingType.UNSUPERVISED)
        """
        for key, value in kwargs.items():
            if key == "strategy":
                if isinstance(value, Strategy):
                    self.strategy = value
                elif value in [
                    "sweeps",
                    "local",
                    "dmrg",
                    "dmrg-like",
                    "sweeps-one-way",
                    "sweeps-one-way-per-site",
                    "sweeps-per-site",
                ]:
                    if value == "sweeps-one-way-per-site":
                        self.strategy = Sweeps(
                            two_way=False, grouping=1
                        )  # one-way, grouping = 1
                    elif value == "sweeps-per-site":
                        self.strategy = Sweeps(
                            two_way=True, grouping=1
                        )  # two-way, grouping = 1
                    elif value == "sweeps-one-way":
                        self.strategy = Sweeps(
                            two_way=False, grouping=2
                        )  # one-way, grouping = 2
                    else:
                        self.strategy = Sweeps()  # default is two-way, grouping = 2
                elif value in ["global", "sgd", "gd", "gradient_descent"]:
                    self.strategy = "global"
                else:
                    raise ValueError(f'Strategy "{value}" not found')
            elif key in [
                "optimizer",
                "loss",
                "train_type",
                "learning_rate",
                "gradient_transforms",
                "device",
            ]:
                setattr(self, key, value)
            else:
                raise AttributeError(f"Attribute {key} not found")

        if self.train_type not in [
            TrainingType.UNSUPERVISED,
            TrainingType.SUPERVISED,
            TrainingType.TARGET_TN,
        ]:
            raise AttributeError(
                f"Specify type of training: {TrainingType.UNSUPERVISED.name}, {TrainingType.SUPERVISED.name}, or {TrainingType.TARGET_TN.name}!"
            )

        if not hasattr(self, "optimizer") or not hasattr(self, "gradient_transforms"):
            raise AttributeError(
                "Provide 'optimizer' or sequence of 'gradient_transforms'! "
            )

        if self.gradient_transforms:
            self.optimizer = optax.chain(*self.gradient_transforms)
        else:
            if hasattr(self, "optimizer") and callable(self.optimizer):
                self.optimizer = self.optimizer(learning_rate=self.learning_rate)
            else:
                self.optimizer = optax.adam(learning_rate=self.learning_rate)

        if len(self.device) != 2 or not isinstance(self.device, tuple):
            raise AttributeError("Device must be a tuple of (str, int)!")

        if self.device[0] not in ["cpu", "gpu"]:
            raise AttributeError("Device must be 'cpu' or 'gpu'!")

        available = (
            jax.devices(self.device[0])
            if self.device[0] in [d.platform for d in jax.devices()]
            else []
        )
        if not available:
            raise RuntimeError(
                f"Device '{self.device[0]}' was requested but no such device is available. "
                f"Available devices: {jax.devices()}"
            )
        if self.device[0] == "cpu":
            _enable_cpu_multithreading()

        logger.info(
            "backend=%s | requested=%s:%s | all devices=%s",
            jax.default_backend(),
            self.device[0],
            self.device[1],
            jax.devices(),
        )



[docs]
    def predict(
        self,
        sample: np.ndarray,
        embedding: Embedding | None = None,
        return_tn: bool = False,
        normalize: bool = False,
    ) -> np.ndarray | qtn.TensorNetwork:
        """Predict the output of the model.

        Parameters
        ----------
        sample : :class:`numpy.ndarray`
            Input data.
        embedding : :class:`tn4ml.embeddings.Embedding`
            Data embedding function.
        return_tn : bool
            If True, returns tensor network, otherwise returns data. Useful when you want to vmap over predict function.

        Returns
        -------
        :class:`quimb.tensor.tensor_core.TensorNetwork`
            Output of the model.
        """
        if embedding is None:
            embedding = TrigonometricEmbedding()

        if len(sample.flatten()) < self.L:
            raise ValueError(f"Input data must have at least {self.L} elements!")

        tn_sample = embed(sample, embedding)

        if callable(getattr(self, "apply", None)):
            output = self.apply(tn_sample)
        else:
            output = self & tn_sample

        if return_tn:
            return output
        output = output.contract(all, optimize="auto-hq")
        if isinstance(output, qtn.Tensor):
            y_pred = output.squeeze().data
        else:
            y_pred = output.squeeze()
        if normalize:
            y_pred = y_pred / jnp.linalg.norm(y_pred)
        return y_pred



[docs]
    def forward(
        self,
        data: jnp.ndarray,
        embedding: Embedding | None = None,
        batch_size: int = 64,
        normalize: bool = False,
        dtype: Any = jnp.float_,
        seed: int = 42,
        alternate_flip: bool = False,
    ) -> jnp.ndarray:
        """Forward pass of the model.

        Parameters
        ----------
        data : :class:`jax.numpy.ndarray`
            Input data.
        y_true: :class:`jax.numpy.ndarray`
            Target class vector.
        embedding: :class:`tn4ml.embeddings.Embedding`
            Data embedding function.
        batch_size: int
            Batch size for data processing.
        normalize: bool
            If True, the model output is normalized.
        dtype: Any
            Data type of input data.
        seed: int
            Random seed for data shuffling.

        Returns
        -------
        :class:`jax.numpy.ndarray`
            Output of the model.
        """
        if embedding is None:
            embedding = TrigonometricEmbedding()

        _target_device = jax.devices(self.device[0])[self.device[1]]

        with jax.default_device(_target_device):
            _predict_batch = jax.jit(
                jax.vmap(self.predict, in_axes=(0, None, None, None)),
                static_argnums=(1, 2, 3),
            )

        outputs = []
        for batch_data in _batch_iterator(
            data,
            batch_size=batch_size,
            shuffle=False,
            dtype=dtype,
            seed=seed,
            alternate_flip=alternate_flip,
        ):
            x = jax.device_put(jnp.array(batch_data, dtype=dtype), _target_device)
            outputs.append(jnp.squeeze(_predict_batch(x, embedding, False, normalize)))

        return jnp.concatenate(outputs, axis=0)



[docs]
    def accuracy(
        self,
        data: jnp.ndarray | np.ndarray,
        y_true: jnp.ndarray | np.ndarray | None = None,
        embedding: Embedding | None = None,
        batch_size: int = 64,
        shuffle: bool = False,
        normalize: bool = False,
        accuracy_fn: Callable[[jnp.ndarray], jnp.ndarray] | None = None,
        dtype: Any = jnp.float_,
        seed: int = 42,
        alternate_flip: bool = False,
    ) -> float:
        """Calculate accuracy for supervised learning.

        Parameters
        ----------
        model : :class:`tn4ml.models.Model`
            Tensor Network model.
        data: :class:`numpy.ndarray`
            Input data.
        y_true: :class:`numpy.ndarray`
            Target class vector.
        embedding: :class:`tn4ml.embeddings.Embedding`
            Data embedding function.
        batch_size: int
            Batch size for data processing.
        normalize: bool
            If True, the model output is normalized in predict function.
        accuracy_fn: Callable
            Function applied to raw model outputs before class labels are extracted.
            If it returns class scores, the predicted class is selected with argmax;
            if it returns class labels, those labels are compared directly.
        dtype: Any
            Data type of input data.
        seed: int
            Random seed for data shuffling.

        Returns
        -------
        float
        """
        if embedding is None:
            embedding = TrigonometricEmbedding()

        if y_true is None:
            raise ValueError("For unsupervised learning you must provide target data!")

        num_samples = 0
        if not isinstance(self.device, tuple):
            self.device = (self.device, 0)  # ensure device is tuple
        _target_device = jax.devices(self.device[0])[self.device[1]]
        with jax.default_device(_target_device):
            _predict_batch = jax.jit(
                jax.vmap(self.predict, in_axes=(0, None, None, None)),
                static_argnums=(1, 2, 3),
            )

        correct_predictions = jnp.array(0)
        for batch_data in _batch_iterator(
            data,
            y_true,
            batch_size=batch_size,
            shuffle=shuffle,
            dtype=dtype,
            seed=seed,
            alternate_flip=alternate_flip,
        ):
            x, y = batch_data
            x = jax.device_put(jnp.array(x, dtype=dtype), _target_device)
            y = jax.device_put(jnp.array(y), _target_device)

            y_pred = _predict_batch(x, embedding, False, normalize)
            if accuracy_fn is not None:
                y_pred = accuracy_fn(y_pred)

            correct_predictions += jnp.sum(
                _as_class_labels(y_pred) == _as_class_labels(y)
            )
            num_samples += x.shape[0]

        return float(jax.block_until_ready(correct_predictions)) / num_samples



[docs]
    def update_tensors(self, params):
        """Update tensors of the model with new parameters.

        Parameters
        ----------
        params : sequence of :class:`jax.numpy.ndarray`
            New parameters of the model.
        sitetags : sequence of str, or default `None`
            Names of tensors for differentiation (for Sweeping strategy).

        Returns
        -------
        None
        """
        if isinstance(self.strategy, Sweeps):
            if self.sitetags is None:
                raise ValueError(
                    "For Sweeping strategy you must provide names of tensors for differentiation."
                )
            tensor = self.select_tensors(self.sitetags)[0]
            tensor.modify(data=params[0])
        else:
            for tensor, array in zip(self.tensors, params, strict=False):
                tensor.modify(data=array)



[docs]
    def compute_entropy(self, data, embedding):
        """Compute entropy of the model.

        Parameters
        ----------
        data : :class:`jax.numpy.ndarray`
            Input data.
        embedding : :class:`tn4ml.embeddings.Embedding`
            Data embedding function.

        Returns
        -------
        float
            Entropy of the model.
        """
        data_embeded = embed(np.asarray(data), embedding)
        mps = self.apply(data_embeded)
        return mps.entropy(len(mps.tensors) // 2)



[docs]
    def compute_entropy_batch(self, data, embedding):
        """Compute entropy of the model for a batch of data.

        Parameters
        ----------
        data : :class:`jax.numpy.ndarray`
            Input data.
        embedding : :class:`tn4ml.embeddings.Embedding`
            Data embedding function.

        Returns
        -------
        float
            Entropy of the model.
        """
        data = jnp.array(data)
        return self.compute_entropy(data[0], embedding)



[docs]
    def create_train_step(self, params, loss_func):
        """Create functions for training steps and gradients.

        Creates function for calculating value and gradients of loss, and function for one step in training procedure.
        Initializes the optimizer and creates optimizer state.

        Parameters
        ----------
        params : sequence of :class:`jax.numpy.ndarray`
            Parameters of the model.
        loss_func : function
            Loss function.
        grads_func : function
            Function for calculating gradients of loss.

        Returns
        -------
        train_step : function
            Function to perform one training step.
        opt_state : tuple
            State of optimizer at the initialization.
        """
        init_params = {i: jnp.array(data) for i, data in enumerate(params)}
        opt_state = self.optimizer.init(init_params)

        def value_and_grad(params, data=None, targets=None):
            """Calculate loss value and gradient."""

            def loss_scalar_fn(data, targets, *params):
                return loss_func(data, targets, *params)

            loss, grads = jax.value_and_grad(
                loss_scalar_fn, argnums=range(2, 2 + len(params))
            )(data, targets, *params)
            return loss, grads

        with jax.default_device(jax.devices(self.device[0])[self.device[1]]):
            jit_value_and_grad = jax.jit(value_and_grad)

        def train_step(params, opt_state, data=None, grad_clip_threshold=None):
            """Perform one training step.

            Parameters
            ----------
            params : sequence of :class:`jax.numpy.ndarray`
                Parameters of the model.
            opt_state : tuple
                State of optimizer.
            data : sequence of :class:`jax.numpy.ndarray`
                Input data — must already be on the correct device.

            Returns
            -------
            float, :class:`jax.numpy.ndarray`
            """
            if data is not None and isinstance(data, tuple) and len(data) == 2:
                data, targets = data
            else:
                targets = None

            loss, grads = jit_value_and_grad(params, data, targets)

            if grad_clip_threshold:
                grads = gradient_clip(grads, grad_clip_threshold)

            # convert to pytree structure
            grads = {i: jnp.array(data) for i, data in enumerate(grads)}
            params = {i: jnp.array(data) for i, data in enumerate(params)}

            try:
                updates, opt_state = self.optimizer.update(grads, opt_state)
            except TypeError as exc:
                if "incompatible shapes for broadcasting" not in str(exc):
                    raise
                # Sweep canonicalization can change the contracted tensor axis
                # order for the same sweep step. In that case Adam's moment
                # buffers no longer match the current gradient shape.
                opt_state = self.optimizer.init(params)
                updates, opt_state = self.optimizer.update(grads, opt_state)
            params = optax.apply_updates(params, updates)

            # convert back to arrays
            params = tuple(jnp.array(v) for v in params.values())

            # update TN inplace
            self.update_tensors(params)

            return params, opt_state, loss

        return train_step, opt_state



[docs]
    def train(
        self,
        inputs: Collection | None = None,
        val_inputs: Any | None = None,
        targets: Any | None = None,
        val_targets: Any | None = None,
        tn_target: qtn.TensorNetwork | None = None,
        batch_size: int | None = None,
        epochs: int | None = 1,
        embedding: Embedding | None = None,
        normalize: bool | None = False,
        canonize: tuple | None = (False, None),
        time_limit: int | None = None,
        earlystop: EarlyStopping | None = None,
        gradient_clip_threshold: float | None = None,
        val_batch_size: int | None = None,
        eval_metric: Callable | None = None,
        display_val_acc: bool | None = False,
        accuracy_fn: Callable[[jnp.ndarray], jnp.ndarray] | None = None,
        dtype: Any = jnp.float_,
        shuffle: bool | None = False,
        seed: int | None = 42,
        alternate_flip: bool = False,
    ):
        """Perform the training procedure of :class:`tn4ml.models.Model`.

        Parameters
        ----------
        inputs : sequence of :class:`numpy.ndarray`
            Data used for training procedure.
        val_inputs : sequence of :class:`numpy.ndarray`
            Data used for validation.
        targets: sequence of :class:`numpy.ndarray`
            Targets for training procedure (if training is supervised).
        val_targets: sequence of :class:`numpy.ndarray`
            Targets for validation (if training is supervised).
        tn_target: :class:`quimb.tensor.tensor_core.TensorNetwork` or any specialized TN class from `quimb.tensor` module
            Target tensor network for training.
        batch_size : int, or default `None`
            Number of samples per gradient update.
        epochs : int
            Number of epochs for training.
        embedding : :class:`tn4ml.embeddings.Embedding`
            Data embedding function.
        normalize : bool
            If True, the model is normalized after each iteration.
        canonize: tuple([bool, int])
            tuple indicating is model canonized after each iteration. Example: (True, 0) - model is canonized in canonization center = 0.
        time_limit: int
            Time limit on model's training in seconds.
        earlystop : :class:`tn4ml.util.EarlyStopping`
            Early stopping training when monitored metric stopped improving.
        gradient_clip_threshold : float
            Threshold for gradient clipping.
        val_batch_size : int
            Number of samples per validation batch.
        display_val_acc : bool
            If True, displays validation accuracy.
        accuracy_fn : Callable
            Function applied to raw model outputs before validation accuracy labels
            are extracted. Passed to :meth:`accuracy`.
        alternate_flip : bool
            If True, flips every other batch along axis=1.

        Returns
        -------
        history: dict
            Records training loss and metric values.
        """
        if embedding is None:
            embedding = TrigonometricEmbedding()

        num_batches = max(1, len(inputs) // batch_size)

        if targets is not None and targets.ndim == 1:
            targets = np.expand_dims(targets, axis=-1)

        if val_inputs is not None and eval_metric is None:
            eval_metric = self.loss

        self.batch_size = batch_size

        if inputs is not None:
            n_batches = max(1, len(inputs) // self.batch_size)

        if not hasattr(self, "history"):
            self.history: dict = {}
            self.history["loss"] = []
            self.history["epoch_time"] = []
            self.history["unfinished"] = False
            if val_inputs is not None:
                if val_batch_size is None:
                    raise ValueError("Validation batch size must be provided!")
                self.history["val_loss"] = []
                if display_val_acc:
                    self.history["val_acc"] = []

        if earlystop:
            return_value: Any = 0
            earlystop.on_begin_train(self.history, self)

        self.sitetags = None  # for sweeping strategy

        def loss_fn(data=None, targets=None, *params):
            """Batches embedding + loss computation internally, with model params fixed externally."""
            tn = self.copy()

            if hasattr(self, "sitetags") and self.sitetags is not None:
                tn.select_tensors(self.sitetags)[0].modify(data=params[0])
            else:
                for tensor, array in zip(tn.tensors, params, strict=False):
                    tensor.modify(data=array)

            # Define batched version of embed + loss logic
            def single_loss(x, y=None):
                tn_i = embed(x, embedding)  # create TN from data

                if self.train_type == TrainingType.UNSUPERVISED:
                    return self.loss(tn, tn_i)
                if self.train_type == TrainingType.SUPERVISED:
                    return self.loss(tn, tn_i, y)
                assert self.train_type == TrainingType.TARGET_TN, (
                    "Train type must be TARGET_TN!"
                )
                return self.loss(tn, tn_target)

            if self.train_type == TrainingType.UNSUPERVISED:
                return jnp.mean(jax.vmap(single_loss, in_axes=(0,))(data))
            if self.train_type == TrainingType.SUPERVISED:
                return jnp.mean(jax.vmap(single_loss, in_axes=(0, 0))(data, targets))
            # TARGET_TN
            return jnp.mean(jax.vmap(single_loss, in_axes=(0,))(data))

        if isinstance(self.strategy, Sweeps):
            # initialize optimizers
            with jax.default_device(jax.devices(self.device[0])[self.device[1]]):
                self.loss_func = jax.jit(loss_fn)
            self.opt_states = {}

            for sites in self.strategy.iterate_sites(self):
                self.strategy.prehook(self, sites)

                self.sitetags = [self.site_tag(site) for site in sites]

                params_i = self.select_tensors(self.sitetags)[0].data
                params_i = jnp.expand_dims(params_i, axis=0)  # add batch dimension

                self.step, opt_state = self.create_train_step(
                    params=params_i, loss_func=self.loss_func
                )

                self.opt_states[sites] = opt_state

                self.strategy.posthook(self, sites)
        else:
            if self.strategy != "global":
                raise ValueError(
                    "Only Global Gradient Descent and DMRG Sweeping strategy is supported for now!"
                )

            # initialize optimizer
            params = self.arrays
            with jax.default_device(jax.devices(self.device[0])[self.device[1]]):
                self.loss_func = jax.jit(loss_fn)
            self.step, self.opt_state = self.create_train_step(
                params=params, loss_func=self.loss_func
            )

        finish = False
        start_train = time()
        with tqdm(total=epochs, desc="epoch") as outerbar:
            for epoch in range(epochs):
                time_epoch = time()

                if self.train_type == TrainingType.TARGET_TN:
                    params = self.arrays
                    _, self.opt_state, loss_epoch = self.step(
                        params,
                        self.opt_state,
                        None,
                        grad_clip_threshold=gradient_clip_threshold,
                    )

                    self.history["loss"].append(loss_epoch)
                    self.history["epoch_time"].append(time() - time_epoch)
                else:
                    loss_batch: Any = 0
                    _target_device = jax.devices(self.device[0])[self.device[1]]
                    for batch_data in _batch_iterator(
                        inputs,
                        targets,
                        batch_size,
                        dtype=dtype,
                        shuffle=shuffle,
                        seed=seed,
                        alternate_flip=alternate_flip,
                    ):
                        if isinstance(batch_data, tuple) and len(batch_data) == 2:
                            _x, _y = batch_data
                            batch_data = (
                                jax.device_put(
                                    jnp.array(_x, dtype=dtype), _target_device
                                ),
                                jax.device_put(jnp.array(_y), _target_device),
                            )
                        else:
                            batch_data = jax.device_put(
                                jnp.array(batch_data, dtype=dtype), _target_device
                            )
                        if isinstance(self.strategy, Sweeps):
                            loss_curr: Any = 0
                            site_count = 0
                            for sites in self.strategy.iterate_sites(self):
                                site_count += 1
                                self.strategy.prehook(self, sites)

                                site_tag = self.site_tag(min(sites))
                                tensor = self.select_tensors(site_tag)[0]

                                if self.strategy.grouping == 2:
                                    # Transpose tensor if needed to match expected ordering
                                    expected_inds = self.strategy.inds_order[sites]
                                    if (
                                        sorted(tensor.inds) == sorted(expected_inds)
                                        and tensor.inds != expected_inds
                                    ):
                                        tensor.transpose(*expected_inds, inplace=True)

                                    self.sitetags = [
                                        self.site_tag(site) for site in sorted(sites)
                                    ]

                                # Get params with batch dimension
                                params_i = jnp.expand_dims(tensor.data, axis=0)

                                # Optimizer step
                                _, self.opt_states[sites], loss_group = self.step(
                                    params_i,
                                    self.opt_states[sites],
                                    batch_data,
                                    grad_clip_threshold=gradient_clip_threshold,
                                )

                                self.strategy.posthook(self, sites)
                                loss_curr += loss_group

                            loss_curr /= site_count
                        else:
                            # Global strategy
                            params = self.arrays
                            _, self.opt_state, loss_curr = self.step(
                                params,
                                self.opt_state,
                                batch_data,
                                grad_clip_threshold=gradient_clip_threshold,
                            )

                        loss_batch += loss_curr

                        if normalize:
                            self.normalize()

                        if canonize[0] and not isinstance(self.strategy, Sweeps):
                            self.canonicalize(canonize[1], inplace=True)

                    loss_epoch = loss_batch / n_batches

                    loss_epoch = float(jax.block_until_ready(loss_epoch))

                    self.history["loss"].append(loss_epoch)

                    self.history["epoch_time"].append(time() - time_epoch)

                    if finish:
                        break

                    # if for some reason you have a limited amount of time to train the model
                    if time_limit is not None and (
                        time() - start_train + np.mean(self.history["epoch_time"])
                        >= time_limit
                    ):
                        self.history["unfinished"] = True
                        return self.history

                    # evaluate validation loss
                    if val_inputs is not None:
                        loss_val_epoch = self.evaluate(
                            val_inputs,
                            val_targets,
                            batch_size=val_batch_size,
                            embedding=embedding,
                            evaluate_type=self.train_type,
                            metric=eval_metric,
                            dtype=dtype,
                            shuffle=shuffle,
                            seed=seed,
                            alternate_flip=alternate_flip,
                        )

                        self.history["val_loss"].append(loss_val_epoch)
                        if display_val_acc:
                            accuracy_val_epoch = self.accuracy(
                                val_inputs,
                                val_targets,
                                batch_size=val_batch_size,
                                embedding=embedding,
                                shuffle=shuffle,
                                accuracy_fn=accuracy_fn,
                                dtype=dtype,
                                seed=seed,
                                alternate_flip=alternate_flip,
                            )
                            self.history["val_acc"].append(accuracy_val_epoch)

                        if earlystop and earlystop.monitor == "val_loss":
                            current = loss_val_epoch
                            return_value = earlystop.on_end_epoch(current, epoch, self)
                    else:
                        if earlystop:
                            if earlystop.monitor == "loss":
                                current = loss_epoch
                            else:
                                current = (
                                    sum(self.history[earlystop.monitor][-num_batches:])
                                    / num_batches
                                )
                            return_value = earlystop.on_end_epoch(current, epoch, self)

                if epoch == 0:
                    outerbar.bar_format = "{l_bar}{bar} {n_fmt}/{total_fmt} {postfix}"

                if val_inputs is not None:
                    if display_val_acc:
                        outerbar.set_postfix(
                            {
                                "loss": f"{loss_epoch:.4f}",
                                "val_loss": f"{self.history['val_loss'][-1]:.4f}",
                                "val_acc": f"{self.history['val_acc'][-1]:.4f}",
                            }
                        )
                    else:
                        outerbar.set_postfix(
                            {
                                "loss": loss_epoch,
                                "val_loss": f"{self.history['val_loss'][-1]:.4f}",
                            }
                        )
                else:
                    outerbar.set_postfix({"loss": f"{loss_epoch:.4f}"})

                outerbar.update()

                if earlystop and return_value == 1:
                    best_model = earlystop.memory["best_model"]
                    return best_model.history

        return self.history



[docs]
    def evaluate(
        self,
        inputs: Collection | None = None,
        targets: Any | None = None,
        tn_target: qtn.TensorNetwork | None = None,
        batch_size: int | None = None,
        embedding: Embedding | None = None,
        evaluate_type: int = TrainingType.UNSUPERVISED,
        return_list: bool = False,
        metric: Callable | None = None,
        dtype: Any = jnp.float_,
        shuffle: bool | None = False,
        seed: int | None = 42,
        alternate_flip: bool | None = False,
    ) -> float | np.ndarray:
        """Evaluate the model on the data.

        Parameters
        ----------
        inputs : sequence of :class:`numpy.ndarray`
            Data used for evaluation.
        targets: sequence of :class:`numpy.ndarray`
            Targets for evaluation (if evaluation is supervised).
        tn_target: :class:`quimb.tensor.tensor_core.TensorNetwork` or any specialized TN class from `quimb`
            Target tensor network for evaluation.
        batch_size : int, or default `None`
            Number of samples per evaluation.
        embedding : :class:`tn4ml.embeddings.Embedding`
            Data embedding function.
        evaluate_type : int
            Type of evaluation: 0 = 'unsupervised' or 1 ='unsupervised'.
        return_list : bool
            If True, returns list of loss values for each batch.
        metric : function
            Metric function for evaluation.
        dtype : Any
            Data type of input data.
        shuffle : bool
            If True, data is shuffled.
        seed : int
            Random seed for data shuffling.

        Returns
        -------
        float
            Loss value.
        """
        if embedding is None:
            embedding = TrigonometricEmbedding()

        if evaluate_type not in [
            TrainingType.UNSUPERVISED,
            TrainingType.SUPERVISED,
            TrainingType.TARGET_TN,
        ]:
            raise ValueError(
                f"Specify type of evaluation: {TrainingType.UNSUPERVISED.name}, {TrainingType.SUPERVISED.name}, or {TrainingType.TARGET_TN.name}!"
            )

        if hasattr(self, "batch_size") and batch_size is None:
            batch_size = self.batch_size

        if not hasattr(self, "batch_size"):
            self.batch_size = batch_size

        if return_list:
            loss: list = []

        loss_metric = metric or self.loss

        def loss_fn(data=None, targets=None, *params):
            """Batches embedding + loss computation internally, with model params fixed externally."""
            tn = self.copy()

            if hasattr(self, "sitetags") and self.sitetags is not None:
                tn.select_tensors(self.sitetags)[0].modify(data=params[0])
            else:
                for tensor, array in zip(tn.tensors, params, strict=False):
                    tensor.modify(data=array)

            # Define batched version of embed + loss logic
            def single_loss(x, y=None):
                tn_i = embed(x, embedding)  # create TN from data

                if evaluate_type == TrainingType.UNSUPERVISED:
                    return loss_metric(tn, tn_i)
                if evaluate_type == TrainingType.SUPERVISED:
                    return loss_metric(tn, tn_i, y)
                assert evaluate_type == TrainingType.TARGET_TN, (
                    "Train type must be TARGET_TN!"
                )
                return loss_metric(tn, tn_target)

            if evaluate_type == TrainingType.UNSUPERVISED:
                return jax.vmap(single_loss, in_axes=(0,))(data)
            if evaluate_type == TrainingType.SUPERVISED:
                return jax.vmap(single_loss, in_axes=(0, 0))(data, targets)
            # TARGET_TN
            return jax.vmap(single_loss, in_axes=(0,))(data)

        if inputs is not None:
            loss_value: Any = 0
            num_batches = 0
            for batch_data in _batch_iterator(
                inputs,
                targets,
                batch_size,
                dtype=dtype,
                shuffle=shuffle,
                seed=seed,
                alternate_flip=alternate_flip,
            ):
                if isinstance(batch_data, tuple) and len(batch_data) == 2:
                    x, y = batch_data
                    x, y = jnp.array(x, dtype=dtype), jnp.array(y)
                    x = jax.device_put(
                        x, device=jax.devices(self.device[0])[self.device[1]]
                    )
                    y = jax.device_put(
                        y, device=jax.devices(self.device[0])[self.device[1]]
                    )
                else:
                    x = jnp.array(batch_data, dtype=dtype)
                    x = jax.device_put(
                        x, device=jax.devices(self.device[0])[self.device[1]]
                    )
                    y = None

                if isinstance(self.strategy, Sweeps):
                    loss_curr = np.zeros((x.shape[0],))
                    site_count = 0
                    for sites in self.strategy.iterate_sites(self):
                        site_count += 1
                        self.strategy.prehook(self, sites)

                        self.sitetags = [self.site_tag(site) for site in sites]

                        params_i = self.select_tensors(self.sitetags)[0].data
                        params_i = jnp.expand_dims(params_i, axis=0)

                        loss_group = loss_fn(x, y, *params_i)

                        self.strategy.posthook(self, sites)

                        loss_curr += loss_group
                    loss_curr /= site_count
                else:
                    params = self.arrays
                    loss_curr = loss_fn(x, y, *params)

                loss_value += np.mean(loss_curr)
                num_batches += 1

                if return_list:
                    loss.extend(loss_curr)

            if return_list:
                return np.array(loss)

            if num_batches == 0:
                raise ValueError("No evaluation batches were produced.")

            loss_value = loss_value / num_batches
        else:
            assert (
                evaluate_type == TrainingType.TARGET_TN
            )  # If inputs are not provided, evaluation type must be 2!
            assert (
                tn_target is not None
            )  # If inputs are not provided, target tensor network must be provided!

            params = self.arrays
            loss_value = loss_fn(None, None, *params)
        return float(loss_value)



[docs]
    def convert_to_pytree(self):
        """Convert tensor network to pytree structure.

        Reference to :func:`quimb.tensor.pack`.

        Returns
        -------
        pytree (dict)
        skeleton (Tensor, TensorNetwork, or similar) - A copy of obj with all references to the original data removed.
        """
        params, skeleton = qtn.pack(self)
        return params, skeleton





[docs]
def load_model(model_name, dir_name=None):
    """Load the Model from pickle file.

    Parameters
    ----------
    model_name : str
        Name of the model.
    dir_name : str
        Directory where model is stored.

    Returns
    -------
    :class:`tn4ml.models.Model` or subclass
    """
    if dir_name is None:
        return qu.load_from_disk(f"{model_name}.pkl")
    return qu.load_from_disk(f"{dir_name}/{model_name}.pkl")



def _check_chunks(chunked: Any) -> Any:
    """Return chunks without dropping partial batches.

    Parameters
    ----------
    chunked : sequence
        Sequence of chunks.

    Returns
    -------
    sequence
    """
    return chunked


def _batch_iterator(
    x: Any,
    y: Any | None = None,
    batch_size: int = 2,
    dtype: Any = jnp.float_,
    shuffle: bool = True,
    seed: int = 0,
    alternate_flip: bool = False,
):
    """Iterate over batches of data with optional alternating batch flipping.

    Parameters
    ----------
    x : sequence
        Input data.
    batch_size : int
        Size of batch.
    y : sequence, or default `None`
        Target data.
    dtype : Any
        Data type of input data.
    shuffle : bool
        If True, shuffles the data.
    seed : int
        Seed for shuffling.
    alternate_flip : bool
        If True, flips every other batch along axis=1.

    Yields
    ------
    tuple
        Batch of input and target data (if target data is provided)
    """
    key = jax.random.PRNGKey(seed)

    # Convert to JAX array
    x = jax.numpy.asarray(x, dtype=dtype)

    if shuffle:
        perm = jax.random.permutation(key, len(x))
        x = x[perm]  # Shuffle x
        if y is not None:
            y = jax.numpy.asarray(y)  # Keep dtype as is
            y = y[perm]

    # Chunk the data
    x_chunks = _check_chunks(list(funcy.chunks(batch_size, x)))

    if y is not None:
        y_chunks = _check_chunks(list(funcy.chunks(batch_size, y)))

        # Track batch number for alternating flips
        for batch_idx, (x_chunk, y_chunk) in enumerate(
            zip(x_chunks, y_chunks, strict=False)
        ):
            # Flip every other batch if alternate_flip is enabled
            if (
                alternate_flip and batch_idx % 2 == 1
            ):  # For odd-indexed batches (0-indexed)
                x_chunk = jax.numpy.asarray(x_chunk, dtype=dtype)
                y_chunk = jax.numpy.asarray(y_chunk)
                # Flip each sample in the batch along axis=1
                x_chunk = jax.numpy.flip(x_chunk, axis=1)

            yield x_chunk, y_chunk
    else:
        for batch_idx, x_chunk in enumerate(x_chunks):
            # Flip every other batch if alternate_flip is enabled
            if (
                alternate_flip and batch_idx % 2 == 1
            ):  # For odd-indexed batches (0-indexed)
                x_chunk = jax.numpy.asarray(x_chunk, dtype=dtype)

                # Flip each sample in the batch along axis=1
                x_chunk = jax.numpy.flip(x_chunk, axis=1)

            yield x_chunk