Source code for roiextractors.extractors.tiffimagingextractors.scanimagetiff_utils

"""Utility functions for ScanImage TIFF Extractors."""

import json

import numpy as np

from ...extraction_tools import PathType, get_package


def _get_scanimage_reader() -> type:
    """Import the scanimage-tiff-reader package and return the ScanImageTiffReader class."""
    return get_package(
        package_name="ScanImageTiffReader", installation_instructions="pip install scanimage-tiff-reader"
    ).ScanImageTiffReader


[docs] def extract_extra_metadata( file_path: PathType, ) -> dict: # TODO: Refactor neuroconv to reference this implementation to avoid duplication """Extract metadata from a ScanImage TIFF file. Parameters ---------- file_path : PathType Path to the TIFF file. Returns ------- extra_metadata: dict Dictionary of metadata extracted from the TIFF file. Notes ----- Known to work on SI versions v3.8.0, v2019bR0, v2022.0.0, and v2023.0.0 """ ScanImageTiffReader = _get_scanimage_reader() io = ScanImageTiffReader(str(file_path)) extra_metadata = {} for metadata_string in (io.description(iframe=0), io.metadata()): system_metadata_dict = { x.split("=")[0].strip(): x.split("=")[1].strip() for x in metadata_string.replace("\n", "\r").split("\r") if "=" in x } extra_metadata = dict(**extra_metadata, **system_metadata_dict) if "\n\n" in io.metadata(): additional_metadata_string = io.metadata().split("\n\n")[1] additional_metadata = json.loads(additional_metadata_string) extra_metadata = dict(**extra_metadata, **additional_metadata) return extra_metadata
[docs] def parse_matlab_vector(matlab_vector: str) -> list: """Parse a MATLAB vector string into a list of integer values. Parameters ---------- matlab_vector : str MATLAB vector string. Returns ------- vector: list of int List of integer values. Raises ------ ValueError If the MATLAB vector string cannot be parsed. Notes ----- MATLAB vector string is of the form "[1 2 3 ... N]" or "[1,2,3,...,N]" or "[1;2;3;...;N]". There may or may not be whitespace between the values. Ex. "[1, 2, 3]" or "[1,2,3]". """ vector = matlab_vector.strip("[]") if ";" in vector: vector = vector.split(";") elif "," in vector: vector = vector.split(",") elif " " in vector: vector = vector.split(" ") elif len(vector) == 1: pass else: raise ValueError(f"Could not parse vector from {matlab_vector}.") vector = [int(x.strip()) for x in vector if x != ""] return vector
[docs] def read_scanimage_metadata(file_path: PathType) -> dict: """ Read and parse metadata from a ScanImage TIFF file. This function extracts both the non-varying frame metadata and ROI group metadata (if available) from a ScanImage TIFF file and processes them to extract key imaging parameters. The function returns a python dictionary with fields already parsed to python objects (in opposition to a string) Parameters ---------- file_path : PathType Path to the ScanImage TIFF file. Returns ------- metadata_dict : dict Dictionary containing three nested dictionaries: - scan_image_non_varying_frame_metadata: Raw non-varying frame metadata - scan_image_roi_group_metadata: Raw ROI group metadata (if present) - roiextractors_parsed_metadata: Parsed metadata with standardized keys: - sampling_frequency: Frame or volume scan rate in Hz - num_channels: Number of available imaging channels - num_planes: Number of imaging planes (slices) - frames_per_slice: Number of frames per Z slice - channel_names: List of available channel names - roi_metadata: ROI definitions (if present) Notes ----- The ScanImage TIFF format includes: 1. TIFF Header Section: Defines byte order and offsets 2. ScanImage Static Metadata Section: Contains metadata applicable to all frames - Non-Varying Frame Data: System configuration for all frames - ROI Group Data: Defined regions of interest (if available) 3. Frame Sections: One per image frame, containing: - IFD Header: Image File Directory with tags and values - Frame-specific data: Timestamps and other frame metadata - Image data: The actual image pixels The function uses the tifffile module's read_scanimage_metadata function to extract the raw metadata, then processes it to standardize key imaging parameters. """ from tifffile import read_scanimage_metadata with open(file_path, "rb") as fh: all_metadata = read_scanimage_metadata(fh) non_varying_frame_metadata = all_metadata[0] roi_group_metadata = all_metadata[1] if non_varying_frame_metadata["SI.hStackManager.enable"]: num_slices = non_varying_frame_metadata["SI.hStackManager.numSlices"] flyback_frames = ( non_varying_frame_metadata["SI.hStackManager.numFramesPerVolumeWithFlyback"] - non_varying_frame_metadata["SI.hStackManager.numFramesPerVolume"] ) # num_planes = num_slices + flyback_frames # TODO: discuss on issue, leave the current behavior now num_planes = num_slices frames_per_slice = non_varying_frame_metadata["SI.hStackManager.framesPerSlice"] else: num_planes = 1 frames_per_slice = 1 if num_planes == 1: sampling_frequency = non_varying_frame_metadata["SI.hRoiManager.scanFrameRate"] else: sampling_frequency = non_varying_frame_metadata["SI.hRoiManager.scanVolumeRate"] # `channelSave` indicates whether the channel is saved. Note that a channel might not be saved even if it is active. # We check `channelSave` first but keep the `channelsActive` check for backward compatibility. channel_availability_keys = ["SI.hChannels.channelSave", "SI.hChannels.channelsActive"] channel_availability = None for key in channel_availability_keys: if key in non_varying_frame_metadata.keys(): channel_availability = key break if channel_availability is None: raise ValueError(f"Could not find any of {channel_availability_keys} in metadata.") available_channels = non_varying_frame_metadata[channel_availability] available_channels = [available_channels] if not isinstance(available_channels, list) else available_channels channel_indices = np.array(available_channels) - 1 # Account for MATLAB indexing channel_names = non_varying_frame_metadata["SI.hChannels.channelName"] channel_names_available = [channel_names[i] for i in channel_indices] num_channels = len(channel_names_available) if roi_group_metadata: roi_metadata = roi_group_metadata["RoiGroups"] else: roi_metadata = None metadata_parsed = dict( sampling_frequency=sampling_frequency, num_channels=num_channels, num_planes=num_planes, frames_per_slice=frames_per_slice, channel_names=channel_names_available, roi_metadata=roi_metadata, ) metadata_dict = dict( scan_image_non_varying_frame_metadata=non_varying_frame_metadata, scan_image_roi_group_metadata=roi_group_metadata, roiextractors_parsed_metadata=metadata_parsed, ) return metadata_dict
[docs] def parse_metadata(metadata: dict) -> dict: """Parse metadata dictionary to extract relevant information and store it standard keys for ImagingExtractors. Currently supports - sampling_frequency - num_planes - frames_per_slice - channel_names - num_channels Parameters ---------- metadata : dict Dictionary of metadata extracted from the TIFF file. Returns ------- metadata_parsed: dict Dictionary of parsed metadata. Notes ----- Known to work on SI versions v2019bR0, v2022.0.0, and v2023.0.0. Fails on v3.8.0. SI.hChannels.channelsActive = string of MATLAB-style vector with channel integers (see parse_matlab_vector). SI.hChannels.channelName = "{'channel_name_1' 'channel_name_2' ... 'channel_name_M'}" where M is the number of channels (active or not). """ sampling_frequency = float(metadata["SI.hRoiManager.scanFrameRate"]) if metadata["SI.hStackManager.enable"] == "true": num_planes = int(metadata["SI.hStackManager.numSlices"]) frames_per_slice = int(metadata["SI.hStackManager.framesPerSlice"]) else: num_planes = 1 frames_per_slice = 1 # `channelSave` indicates whether the channel is saved. Note that a channel might not be saved even if it is active. # We check `channelSave` first but keep the `channelsActive` check for backward compatibility. channel_availability_keys = ["SI.hChannels.channelSave", "SI.hChannels.channelsActive"] channel_availability = None for key in channel_availability_keys: if key in metadata.keys(): channel_availability = key break if channel_availability is None: raise ValueError(f"Could not find any of {channel_availability_keys} in metadata.") available_channels = parse_matlab_vector(metadata[channel_availability]) channel_indices = np.array(available_channels) - 1 # Account for MATLAB indexing channel_names = np.array(metadata["SI.hChannels.channelName"].split("'")[1::2]) channel_names = channel_names[channel_indices].tolist() num_channels = len(channel_names) if "RoiGroups" in metadata.keys(): roi_metadata = metadata["RoiGroups"] else: roi_metadata = None metadata_parsed = dict( sampling_frequency=sampling_frequency, num_channels=num_channels, num_planes=num_planes, frames_per_slice=frames_per_slice, channel_names=channel_names, roi_metadata=roi_metadata, ) return metadata_parsed
[docs] def parse_metadata_v3_8(metadata: dict) -> dict: """Parse metadata dictionary to extract relevant information and store it standard keys for ImagingExtractors. Requires old version of metadata (v3.8). Currently supports - sampling frequency - num_channels - num_planes Parameters ---------- metadata : dict Dictionary of metadata extracted from the TIFF file. Returns ------- metadata_parsed: dict Dictionary of parsed metadata. """ sampling_frequency = float(metadata["state.acq.frameRate"]) num_channels = int(metadata["state.acq.numberOfChannelsSave"]) num_planes = int(metadata["state.acq.numberOfZSlices"]) metadata_parsed = dict( sampling_frequency=sampling_frequency, num_channels=num_channels, num_planes=num_planes, ) return metadata_parsed
[docs] def extract_timestamps_from_file(file_path: PathType) -> np.ndarray: """Extract the frame timestamps from a ScanImage TIFF file. Parameters ---------- file_path : PathType Path to the TIFF file. Returns ------- timestamps : numpy.ndarray Array of frame timestamps in seconds. Raises ------ AssertionError If the frame timestamps are not found in the TIFF file. Notes ----- Known to work on SI versions v2019bR0, v2022.0.0, and v2023.0.0. Fails on v3.8.0. """ ScanImageTiffReader = _get_scanimage_reader() io = ScanImageTiffReader(str(file_path)) assert "frameTimestamps_sec" in io.description(iframe=0), "frameTimestamps_sec not found in TIFF file" num_frames = io.shape()[0] timestamps = np.zeros(num_frames) for iframe in range(num_frames): description = io.description(iframe=iframe) description_lines = description.split("\n") for line in description_lines: if "frameTimestamps_sec" in line: timestamps[iframe] = float(line.split("=")[1].strip()) break return timestamps