Source code for bioimageloader.collections._s_bsst265

import os.path
from functools import cached_property
from pathlib import Path
from typing import Dict, Optional

import albumentations
import cv2
import numpy as np
import tifffile
from skimage.util import img_as_float32

from ..base import MaskDataset


[docs]class S_BSST265(MaskDataset):
    """An annotated fluorescence image dataset for training nuclear segmentation
    methods [1]_

    Immuno Fluorescence (IF) images, designed for ML

    Parameters
    ----------
    root_dir : str
        Path to root directory
    output : {'both', 'image', 'mask'}, default: 'both'
        Change outputs. 'both' returns {'image': image, 'mask': mask}.
    transforms : albumentations.Compose, optional
        An instance of Compose (albumentations pkg) that defines
        augmentation in sequence.
    num_samples : int, optional
        Useful when ``transforms`` is set. Define the total length of the
        dataset. If it is set, it overwrites ``__len__``.

    Notes
    -----
    - All images have grayscale BUT some have 3 channels
    - rawimages: Raw nuclear images in TIFF format
    - groundtruth: Annotated masks in TIFF format
    - groundtruth_svgs: SVG-Files for each annotated masks and corresponding raw
      image in JPEG format
    - singlecell_groundtruth: Groundtruth for randomly selected nuclei of the
      testset (25 nuclei per testset class, a subset of all nuclei of the
      testset classes; human experts can compete with this low number of nuclei
      per subset by calculating Dice coefficients between their annotations and
      the groundtruth annotations)
    - visualized_groundtruth: Visualization of groundtruth masks in PNG format
    - visualized_singlecell_groundtruth: Visualization of groundtruth for
      randomly selected nuclei in PNG format
    - Find more info in README.txt inside the root directory

    References
    ----------
    .. [1] F. Kromp et al., “An annotated fluorescence image dataset for
       training nuclear segmentation methods,” Scientific Data, vol. 7, no. 1,
       Art. no. 1, Aug. 2020, doi: 10.1038/s41597-020-00608-w.

    See Also
    --------
    MaskDataset : Super class
    Dataset : Base class
    DatasetInterface : Interface

    """
    # Dataset's acronym
    acronym = 'S_BSST265'

    def __init__(
        self,
        # Interface requirement
        root_dir: str,
        *,
        output: str = 'both',
        transforms: Optional[albumentations.Compose] = None,
        num_samples: Optional[int] = None,
        **kwargs
    ):
        # Interface and super-class arguments
        self._root_dir = os.path.join(root_dir, 'S-BSST265')
        self._output = output
        self._transforms = transforms
        self._num_samples = num_samples

[docs]    def get_image(self, p: Path) -> np.ndarray:
        tif = tifffile.imread(p)
        tif = img_as_float32(tif)
        if tif.shape[-1] != 3:
            tif = cv2.cvtColor(tif, cv2.COLOR_GRAY2RGB)
        return tif

[docs]    def get_mask(self, p: Path) -> np.ndarray:
        tif = tifffile.imread(p)
        return tif.astype(np.int16)

    @cached_property
    def file_list(self) -> list:
        root_dir = self.root_dir
        parent = 'rawimages'
        file_list = sorted(
            root_dir.glob(f'{parent}/*.tif'), key=self._sort_key
        )
        return file_list

    @cached_property
    def anno_dict(self) -> Dict[int, Path]:
        root_dir = self.root_dir
        parent = 'groundtruth'
        anno_list = sorted(
            root_dir.glob(f'{parent}/*.tif'), key=self._sort_key
        )
        anno_dict = dict((k, v) for k, v in enumerate(anno_list))
        return anno_dict

    @staticmethod
    def _sort_key(p, zfill=2):
        split = p.stem.split('_')
        return '_'.join([s.zfill(zfill) for s in split])