WayScience · wli51 · Jun 10, 2026 · d33bs · Jun 10, 2026 · d33bs
diff --git a/src/virtual_stain_flow/evaluation/evaluation_utils.py b/src/virtual_stain_flow/evaluation/evaluation_utils.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union, Any
 
 import numpy as np
 import pandas as pd
@@ -10,13 +10,25 @@
 from virtual_stain_flow.datasets.base_wrapper_dataset import BaseWrapperDataset
 
 
+def _to_numpy_image(value: Any) -> np.ndarray:
+    if isinstance(value, torch.Tensor):
+        return value.detach().cpu().numpy()
+    return np.asarray(value)
+
+
+def _normalize_to_list(sample: Any) -> List[np.ndarray]:
+    if isinstance(sample, (list, tuple)):
+        return [_to_numpy_image(item) for item in sample]
+    return [_to_numpy_image(sample)]
+
+
 def extract_samples_from_dataset(
     dataset: Union[BaseImageDataset, CropImageDataset, BaseWrapperDataset],
     indices: List[int],
 ) -> Tuple[
-    List[np.ndarray],
-    List[np.ndarray],
-    Optional[List[np.ndarray]],
+    List[Union[np.ndarray, List[np.ndarray]]],
+    List[Union[np.ndarray, List[np.ndarray]]],
+    Optional[List[Union[np.ndarray, List[np.ndarray]]]],
     Optional[List[Tuple[int, int]]],
 ]:
     """
@@ -26,13 +38,15 @@ def extract_samples_from_dataset(
     (x, y) coordinates of each crop for visualization with bounding boxes.
 
     :param dataset: A BaseImageDataset or CropImageDataset instance.
-    :param indices: List of dataset indices to extract.
-    :return: Tuple of (inputs, targets, raw_images, patch_coords).
-        - inputs: List of numpy arrays, each with shape (C, H, W) or (H, W).
-        - targets: List of numpy arrays, each with shape (C, H, W) or (H, W).
-        - raw_images: List of numpy arrays for CropImageDataset (original uncropped images),
-          or None for BaseImageDataset.
-        - patch_coords: List of (x, y) tuples for CropImageDataset, or None for BaseImageDataset.
+        :param indices: List of dataset indices to extract.
+        :return: Tuple of (inputs, targets, raw_images, patch_coords).
+                - inputs: List of numpy arrays, each with shape (C, H, W) or (H, W).
+                    Multi-input samples can be provided as a list of arrays per sample.
+                - targets: List of numpy arrays, each with shape (C, H, W) or (H, W).
+                    Multi-target samples can be provided as a list of arrays per sample.
+                - raw_images: List of numpy arrays for CropImageDataset (original uncropped images),
+                    or None for BaseImageDataset.
+                - patch_coords: List of (x, y) tuples for CropImageDataset, or None for BaseImageDataset.
     """
     is_wrapper_dataset = False
     if isinstance(dataset, BaseWrapperDataset):
@@ -55,25 +69,21 @@ def extract_samples_from_dataset(
             f"max index requested: {max(indices)}"
         )
 
-    inputs: List[np.ndarray] = []
-    targets: List[np.ndarray] = []
-    raw_images: Optional[List[np.ndarray]] = [] if is_crop_dataset else None
+    inputs: List[Union[np.ndarray, List[np.ndarray]]] = []
+    targets: List[Union[np.ndarray, List[np.ndarray]]] = []
+    raw_images: Optional[List[Union[np.ndarray, List[np.ndarray]]]] = [] if is_crop_dataset else None
     patch_coords: Optional[List[Tuple[int, int]]] = [] if is_crop_dataset else None
 
     for idx in indices:
         # Access dataset item to trigger lazy loading and state update
         input_tensor, target_tensor = dataset[idx]
 
         # Convert to numpy - handle both Tensor and ndarray inputs
-        if isinstance(input_tensor, torch.Tensor):
-            inputs.append(input_tensor.numpy())
-        else:
-            inputs.append(np.asarray(input_tensor))
-
-        if isinstance(target_tensor, torch.Tensor):
-            targets.append(target_tensor.numpy())
-        else:
-            targets.append(np.asarray(target_tensor))
+        input_list = _normalize_to_list(input_tensor)
+        target_list = _normalize_to_list(target_tensor)
+
+        inputs.append(input_list[0] if len(input_list) == 1 else input_list)
+        targets.append(target_list[0] if len(target_list) == 1 else target_list)
 
         if is_crop_dataset:
             # Access the original uncropped image and crop coordinates

diff --git a/src/virtual_stain_flow/evaluation/predict_utils.py b/src/virtual_stain_flow/evaluation/predict_utils.py
@@ -1,18 +1,26 @@
-from typing import Optional, List, Tuple, Callable
+from typing import Optional, List, Tuple, Callable, Union, Any
 
 import torch
 import numpy as np
 from torch.utils.data import DataLoader, Dataset, Subset
 from albumentations import ImageOnlyTransform, Compose
 
+def _move_to_device(value: Any, device: Union[str, torch.device]) -> Any:
+    if isinstance(value, torch.Tensor):
+        return value.to(device)
+    if isinstance(value, (list, tuple)):
+        return type(value)(_move_to_device(item, device) for item in value)
+    return value
+
+
 def predict_image(
     dataset: Dataset,
     model: torch.nn.Module,
     batch_size: int = 1,
-    device: str = "cpu",
+    device: Union[str, torch.device] = "cpu",
     num_workers: int = 0,
-    indices: Optional[List[int]] = None
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    indices: Optional[List[int]] = None,
+) -> Tuple[torch.Tensor, torch.Tensor, Union[torch.Tensor, List[torch.Tensor]]]:
     """
     Runs a model on a dataset, performing a forward pass on all (or a subset of) input images 
     in evaluation mode and returning a stacked tensor of predictions.
@@ -27,6 +35,7 @@ def predict_image(
     :param indices: Optional list of dataset indices to subset the dataset before inference.
 
     :return: Tuple of stacked target, prediction, and input tensors.
+        For multi-input datasets, the third element is a list of stacked input tensors.
     """
     # Subset the dataset if indices are provided
     if indices is not None:
@@ -38,25 +47,41 @@ def predict_image(
     model.to(device)
     model.eval()
 
-    predictions, targets, inputs = [], [], []
+    predictions, targets = [], []
+    inputs: Union[List[torch.Tensor], List[List[torch.Tensor]]] = []
 
     with torch.no_grad():
         for input, target in dataloader:  # Unpacking (input_tensor, target_tensor)
-            input = input.to(device)  # Move input data to the specified device
+            input = _move_to_device(input, device)
 
             # Forward pass
-            prediction = model(input)
-
+            if isinstance(input, (list, tuple)):
+                prediction = model(*input)
+            else:
+                prediction = model(input)
+
             # output both target and prediction tensors for metric
             targets.append(target.cpu())
             predictions.append(prediction.cpu())  # Move to CPU for stacking
-            inputs.append(input.cpu())
+
+            if isinstance(input, (list, tuple)):
+                if not inputs:
+                    inputs = [[] for _ in range(len(input))]
+                for idx, item in enumerate(input):
+                    inputs[idx].append(item.cpu())
+            else:
+                inputs.append(input.cpu())
+
+    if inputs and isinstance(inputs[0], list):
+        inputs_stacked = [torch.cat(batch_list, dim=0) for batch_list in inputs]  # type: ignore[arg-type]
+    else:
+        inputs_stacked = torch.cat(inputs, dim=0)  # type: ignore[arg-type]
 
     return (
-        torch.cat(targets, dim=0), 
-        torch.cat(predictions, dim=0), 
-        torch.cat(inputs, dim=0)
-    ) 
+        torch.cat(targets, dim=0),
+        torch.cat(predictions, dim=0),
+        inputs_stacked,
+    )
 
 def process_tensor_image(
     img_tensor: torch.Tensor,