Source code for bioimageloader.collections._s_bsst265

import os.path
from functools import cached_property
from pathlib import Path
from typing import Dict, Optional

import albumentations
import cv2
import numpy as np
import tifffile
from skimage.util import img_as_float32

from ..base import MaskDataset


[docs]class S_BSST265(MaskDataset): """An annotated fluorescence image dataset for training nuclear segmentation methods [1]_ Immuno Fluorescence (IF) images, designed for ML Parameters ---------- root_dir : str Path to root directory output : {'both', 'image', 'mask'}, default: 'both' Change outputs. 'both' returns {'image': image, 'mask': mask}. transforms : albumentations.Compose, optional An instance of Compose (albumentations pkg) that defines augmentation in sequence. num_samples : int, optional Useful when ``transforms`` is set. Define the total length of the dataset. If it is set, it overwrites ``__len__``. Notes ----- - All images have grayscale BUT some have 3 channels - rawimages: Raw nuclear images in TIFF format - groundtruth: Annotated masks in TIFF format - groundtruth_svgs: SVG-Files for each annotated masks and corresponding raw image in JPEG format - singlecell_groundtruth: Groundtruth for randomly selected nuclei of the testset (25 nuclei per testset class, a subset of all nuclei of the testset classes; human experts can compete with this low number of nuclei per subset by calculating Dice coefficients between their annotations and the groundtruth annotations) - visualized_groundtruth: Visualization of groundtruth masks in PNG format - visualized_singlecell_groundtruth: Visualization of groundtruth for randomly selected nuclei in PNG format - Find more info in README.txt inside the root directory References ---------- .. [1] F. Kromp et al., “An annotated fluorescence image dataset for training nuclear segmentation methods,” Scientific Data, vol. 7, no. 1, Art. no. 1, Aug. 2020, doi: 10.1038/s41597-020-00608-w. See Also -------- MaskDataset : Super class Dataset : Base class DatasetInterface : Interface """ # Dataset's acronym acronym = 'S_BSST265' def __init__( self, # Interface requirement root_dir: str, *, output: str = 'both', transforms: Optional[albumentations.Compose] = None, num_samples: Optional[int] = None, **kwargs ): # Interface and super-class arguments self._root_dir = os.path.join(root_dir, 'S-BSST265') self._output = output self._transforms = transforms self._num_samples = num_samples
[docs] def get_image(self, p: Path) -> np.ndarray: tif = tifffile.imread(p) tif = img_as_float32(tif) if tif.shape[-1] != 3: tif = cv2.cvtColor(tif, cv2.COLOR_GRAY2RGB) return tif
[docs] def get_mask(self, p: Path) -> np.ndarray: tif = tifffile.imread(p) return tif.astype(np.int16)
@cached_property def file_list(self) -> list: root_dir = self.root_dir parent = 'rawimages' file_list = sorted( root_dir.glob(f'{parent}/*.tif'), key=self._sort_key ) return file_list @cached_property def anno_dict(self) -> Dict[int, Path]: root_dir = self.root_dir parent = 'groundtruth' anno_list = sorted( root_dir.glob(f'{parent}/*.tif'), key=self._sort_key ) anno_dict = dict((k, v) for k, v in enumerate(anno_list)) return anno_dict @staticmethod def _sort_key(p, zfill=2): split = p.stem.split('_') return '_'.join([s.zfill(zfill) for s in split])