Source code for roiextractors.extractors.tiffimagingextractors.scanimagetiff_utils
"""Utility functions for ScanImage TIFF Extractors."""
import json
import numpy as np
from ...extraction_tools import PathType, get_package
def _get_scanimage_reader() -> type:
"""Import the scanimage-tiff-reader package and return the ScanImageTiffReader class."""
return get_package(
package_name="ScanImageTiffReader", installation_instructions="pip install scanimage-tiff-reader"
).ScanImageTiffReader
[docs]
def extract_extra_metadata(
file_path: PathType,
) -> dict: # TODO: Refactor neuroconv to reference this implementation to avoid duplication
"""Extract metadata from a ScanImage TIFF file.
Parameters
----------
file_path : PathType
Path to the TIFF file.
Returns
-------
extra_metadata: dict
Dictionary of metadata extracted from the TIFF file.
Notes
-----
Known to work on SI versions v3.8.0, v2019bR0, v2022.0.0, and v2023.0.0
"""
ScanImageTiffReader = _get_scanimage_reader()
io = ScanImageTiffReader(str(file_path))
extra_metadata = {}
for metadata_string in (io.description(iframe=0), io.metadata()):
system_metadata_dict = {
x.split("=")[0].strip(): x.split("=")[1].strip()
for x in metadata_string.replace("\n", "\r").split("\r")
if "=" in x
}
extra_metadata = dict(**extra_metadata, **system_metadata_dict)
if "\n\n" in io.metadata():
additional_metadata_string = io.metadata().split("\n\n")[1]
additional_metadata = json.loads(additional_metadata_string)
extra_metadata = dict(**extra_metadata, **additional_metadata)
return extra_metadata
[docs]
def parse_matlab_vector(matlab_vector: str) -> list:
"""Parse a MATLAB vector string into a list of integer values.
Parameters
----------
matlab_vector : str
MATLAB vector string.
Returns
-------
vector: list of int
List of integer values.
Raises
------
ValueError
If the MATLAB vector string cannot be parsed.
Notes
-----
MATLAB vector string is of the form "[1 2 3 ... N]" or "[1,2,3,...,N]" or "[1;2;3;...;N]".
There may or may not be whitespace between the values. Ex. "[1, 2, 3]" or "[1,2,3]".
"""
vector = matlab_vector.strip("[]")
if ";" in vector:
vector = vector.split(";")
elif "," in vector:
vector = vector.split(",")
elif " " in vector:
vector = vector.split(" ")
elif len(vector) == 1:
pass
else:
raise ValueError(f"Could not parse vector from {matlab_vector}.")
vector = [int(x.strip()) for x in vector if x != ""]
return vector
[docs]
def read_scanimage_metadata(file_path: PathType) -> dict:
"""
Read and parse metadata from a ScanImage TIFF file.
This function extracts both the non-varying frame metadata and ROI group metadata
(if available) from a ScanImage TIFF file and processes them to extract key imaging parameters.
The function returns a python dictionary with fields already parsed to python objects (in opposition to a string)
Parameters
----------
file_path : PathType
Path to the ScanImage TIFF file.
Returns
-------
metadata_dict : dict
Dictionary containing three nested dictionaries:
- scan_image_non_varying_frame_metadata: Raw non-varying frame metadata
- scan_image_roi_group_metadata: Raw ROI group metadata (if present)
- roiextractors_parsed_metadata: Parsed metadata with standardized keys:
- sampling_frequency: Frame or volume scan rate in Hz
- num_channels: Number of available imaging channels
- num_planes: Number of imaging planes (slices)
- frames_per_slice: Number of frames per Z slice
- channel_names: List of available channel names
- roi_metadata: ROI definitions (if present)
Notes
-----
The ScanImage TIFF format includes:
1. TIFF Header Section: Defines byte order and offsets
2. ScanImage Static Metadata Section: Contains metadata applicable to all frames
- Non-Varying Frame Data: System configuration for all frames
- ROI Group Data: Defined regions of interest (if available)
3. Frame Sections: One per image frame, containing:
- IFD Header: Image File Directory with tags and values
- Frame-specific data: Timestamps and other frame metadata
- Image data: The actual image pixels
The function uses the tifffile module's read_scanimage_metadata function to extract
the raw metadata, then processes it to standardize key imaging parameters.
"""
from tifffile import read_scanimage_metadata
with open(file_path, "rb") as fh:
all_metadata = read_scanimage_metadata(fh)
non_varying_frame_metadata = all_metadata[0]
roi_group_metadata = all_metadata[1]
if non_varying_frame_metadata["SI.hStackManager.enable"]:
num_slices = non_varying_frame_metadata["SI.hStackManager.numSlices"]
flyback_frames = (
non_varying_frame_metadata["SI.hStackManager.numFramesPerVolumeWithFlyback"]
- non_varying_frame_metadata["SI.hStackManager.numFramesPerVolume"]
)
# num_planes = num_slices + flyback_frames # TODO: discuss on issue, leave the current behavior now
num_planes = num_slices
frames_per_slice = non_varying_frame_metadata["SI.hStackManager.framesPerSlice"]
else:
num_planes = 1
frames_per_slice = 1
if num_planes == 1:
sampling_frequency = non_varying_frame_metadata["SI.hRoiManager.scanFrameRate"]
else:
sampling_frequency = non_varying_frame_metadata["SI.hRoiManager.scanVolumeRate"]
# `channelSave` indicates whether the channel is saved. Note that a channel might not be saved even if it is active.
# We check `channelSave` first but keep the `channelsActive` check for backward compatibility.
channel_availability_keys = ["SI.hChannels.channelSave", "SI.hChannels.channelsActive"]
channel_availability = None
for key in channel_availability_keys:
if key in non_varying_frame_metadata.keys():
channel_availability = key
break
if channel_availability is None:
raise ValueError(f"Could not find any of {channel_availability_keys} in metadata.")
available_channels = non_varying_frame_metadata[channel_availability]
available_channels = [available_channels] if not isinstance(available_channels, list) else available_channels
channel_indices = np.array(available_channels) - 1 # Account for MATLAB indexing
channel_names = non_varying_frame_metadata["SI.hChannels.channelName"]
channel_names_available = [channel_names[i] for i in channel_indices]
num_channels = len(channel_names_available)
if roi_group_metadata:
roi_metadata = roi_group_metadata["RoiGroups"]
else:
roi_metadata = None
metadata_parsed = dict(
sampling_frequency=sampling_frequency,
num_channels=num_channels,
num_planes=num_planes,
frames_per_slice=frames_per_slice,
channel_names=channel_names_available,
roi_metadata=roi_metadata,
)
metadata_dict = dict(
scan_image_non_varying_frame_metadata=non_varying_frame_metadata,
scan_image_roi_group_metadata=roi_group_metadata,
roiextractors_parsed_metadata=metadata_parsed,
)
return metadata_dict
[docs]
def parse_metadata(metadata: dict) -> dict:
"""Parse metadata dictionary to extract relevant information and store it standard keys for ImagingExtractors.
Currently supports
- sampling_frequency
- num_planes
- frames_per_slice
- channel_names
- num_channels
Parameters
----------
metadata : dict
Dictionary of metadata extracted from the TIFF file.
Returns
-------
metadata_parsed: dict
Dictionary of parsed metadata.
Notes
-----
Known to work on SI versions v2019bR0, v2022.0.0, and v2023.0.0. Fails on v3.8.0.
SI.hChannels.channelsActive = string of MATLAB-style vector with channel integers (see parse_matlab_vector).
SI.hChannels.channelName = "{'channel_name_1' 'channel_name_2' ... 'channel_name_M'}"
where M is the number of channels (active or not).
"""
sampling_frequency = float(metadata["SI.hRoiManager.scanFrameRate"])
if metadata["SI.hStackManager.enable"] == "true":
num_planes = int(metadata["SI.hStackManager.numSlices"])
frames_per_slice = int(metadata["SI.hStackManager.framesPerSlice"])
else:
num_planes = 1
frames_per_slice = 1
# `channelSave` indicates whether the channel is saved. Note that a channel might not be saved even if it is active.
# We check `channelSave` first but keep the `channelsActive` check for backward compatibility.
channel_availability_keys = ["SI.hChannels.channelSave", "SI.hChannels.channelsActive"]
channel_availability = None
for key in channel_availability_keys:
if key in metadata.keys():
channel_availability = key
break
if channel_availability is None:
raise ValueError(f"Could not find any of {channel_availability_keys} in metadata.")
available_channels = parse_matlab_vector(metadata[channel_availability])
channel_indices = np.array(available_channels) - 1 # Account for MATLAB indexing
channel_names = np.array(metadata["SI.hChannels.channelName"].split("'")[1::2])
channel_names = channel_names[channel_indices].tolist()
num_channels = len(channel_names)
if "RoiGroups" in metadata.keys():
roi_metadata = metadata["RoiGroups"]
else:
roi_metadata = None
metadata_parsed = dict(
sampling_frequency=sampling_frequency,
num_channels=num_channels,
num_planes=num_planes,
frames_per_slice=frames_per_slice,
channel_names=channel_names,
roi_metadata=roi_metadata,
)
return metadata_parsed
[docs]
def parse_metadata_v3_8(metadata: dict) -> dict:
"""Parse metadata dictionary to extract relevant information and store it standard keys for ImagingExtractors.
Requires old version of metadata (v3.8).
Currently supports
- sampling frequency
- num_channels
- num_planes
Parameters
----------
metadata : dict
Dictionary of metadata extracted from the TIFF file.
Returns
-------
metadata_parsed: dict
Dictionary of parsed metadata.
"""
sampling_frequency = float(metadata["state.acq.frameRate"])
num_channels = int(metadata["state.acq.numberOfChannelsSave"])
num_planes = int(metadata["state.acq.numberOfZSlices"])
metadata_parsed = dict(
sampling_frequency=sampling_frequency,
num_channels=num_channels,
num_planes=num_planes,
)
return metadata_parsed
[docs]
def extract_timestamps_from_file(file_path: PathType) -> np.ndarray:
"""Extract the frame timestamps from a ScanImage TIFF file.
Parameters
----------
file_path : PathType
Path to the TIFF file.
Returns
-------
timestamps : numpy.ndarray
Array of frame timestamps in seconds.
Raises
------
AssertionError
If the frame timestamps are not found in the TIFF file.
Notes
-----
Known to work on SI versions v2019bR0, v2022.0.0, and v2023.0.0. Fails on v3.8.0.
"""
ScanImageTiffReader = _get_scanimage_reader()
io = ScanImageTiffReader(str(file_path))
assert "frameTimestamps_sec" in io.description(iframe=0), "frameTimestamps_sec not found in TIFF file"
num_frames = io.shape()[0]
timestamps = np.zeros(num_frames)
for iframe in range(num_frames):
description = io.description(iframe=iframe)
description_lines = description.split("\n")
for line in description_lines:
if "frameTimestamps_sec" in line:
timestamps[iframe] = float(line.split("=")[1].strip())
break
return timestamps