Source code for bioimageloader.collections._bbbc004

from functools import cached_property
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Union

import albumentations
import cv2
import numpy as np
import tifffile
from skimage.util import img_as_float32

from ..base import MaskDataset


[docs]class BBBC004(MaskDataset): """Synthetic cells Biological application One of the principal challenges in counting or segmenting nuclei is dealing with clustered nuclei. To help assess algorithms' performance in this regard, this synthetic image set consists of five subsets with increasing degree of clustering. Images Five subsets of 20 images each are provided. Each image contains 300 objects, but the objects overlap and cluster with different probabilities in the five subsets. The images were generated with the SIMCEP simulating platform for fluorescent cell population images (Lehmussola et al., IEEE T. Med. Imaging, 2007 and Lehmussola et al., P. IEEE, 2008). Parameters ---------- root_dir : str Path to root directory output : {'both', 'image', 'mask'}, default: 'both' Change outputs. 'both' returns {'image': image, 'mask': mask}. transforms : albumentations.Compose, optional An instance of Compose (albumentations pkg) that defines augmentation in sequence. num_samples : int, optional Useful when ``transforms`` is set. Define the total length of the dataset. If it is set, it overwrites ``__len__``. References ---------- .. [1] https://bbbc.broadinstitute.org/BBBC004 See Also -------- MaskDataset : Super class DatasetInterface : Interface """ # Set acronym acronym = 'BBBC004' def __init__( self, root_dir: str, *, # only keyword param output: str = 'both', transforms: Optional[albumentations.Compose] = None, num_samples: Optional[int] = None, # specific to this dataset **kwargs ): self._root_dir = root_dir self._output = output self._transforms = transforms self._num_samples = num_samples # specific to this one here
[docs] def get_image(self, p: Path) -> np.ndarray: img = tifffile.imread(p) img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) return img_as_float32(img)
[docs] def get_mask(self, p: Path) -> np.ndarray: mask = tifffile.imread(p) mask = mask[:, :, 2] return mask
@cached_property def file_list(self) -> List[Path]: # Important to decorate with `cached_property` in general root_dir = self.root_dir parent = '*_images' file_list = sorted(root_dir.glob(f'{parent}/*.tif'), key=lambda x: int(x.stem.split("GRAY")[0])) return file_list @cached_property def anno_dict(self) -> Dict[int, Path]: # Important to decorate with `cached_property` in general root_dir = self.root_dir parent = '*_foreground' anno_list = sorted(root_dir.glob(f'{parent}/*.tif'), key=lambda x: int(x.stem)) anno_dict = dict((k, v) for k, v in enumerate(anno_list)) return anno_dict