from functools import cached_property
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Union
import albumentations
import cv2
import numpy as np
import tifffile
from skimage.util import img_as_float32
from ..base import MaskDataset
[docs]class BBBC004(MaskDataset):
"""Synthetic cells
Biological application
One of the principal challenges in counting or segmenting nuclei is dealing with clustered nuclei.
To help assess algorithms' performance in this regard, this synthetic image set consists of five
subsets with increasing degree of clustering.
Images
Five subsets of 20 images each are provided. Each image contains 300 objects, but the objects overlap
and cluster with different probabilities in the five subsets. The images were generated with the SIMCEP
simulating platform for fluorescent cell population images (Lehmussola et al., IEEE T. Med. Imaging, 2007 and Lehmussola et al., P. IEEE, 2008).
Parameters
----------
root_dir : str
Path to root directory
output : {'both', 'image', 'mask'}, default: 'both'
Change outputs. 'both' returns {'image': image, 'mask': mask}.
transforms : albumentations.Compose, optional
An instance of Compose (albumentations pkg) that defines augmentation in
sequence.
num_samples : int, optional
Useful when ``transforms`` is set. Define the total length of the
dataset. If it is set, it overwrites ``__len__``.
References
----------
.. [1] https://bbbc.broadinstitute.org/BBBC004
See Also
--------
MaskDataset : Super class
DatasetInterface : Interface
"""
# Set acronym
acronym = 'BBBC004'
def __init__(
self,
root_dir: str,
*, # only keyword param
output: str = 'both',
transforms: Optional[albumentations.Compose] = None,
num_samples: Optional[int] = None,
# specific to this dataset
**kwargs
):
self._root_dir = root_dir
self._output = output
self._transforms = transforms
self._num_samples = num_samples
# specific to this one here
[docs] def get_image(self, p: Path) -> np.ndarray:
img = tifffile.imread(p)
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
return img_as_float32(img)
[docs] def get_mask(self, p: Path) -> np.ndarray:
mask = tifffile.imread(p)
mask = mask[:, :, 2]
return mask
@cached_property
def file_list(self) -> List[Path]:
# Important to decorate with `cached_property` in general
root_dir = self.root_dir
parent = '*_images'
file_list = sorted(root_dir.glob(f'{parent}/*.tif'), key=lambda x: int(x.stem.split("GRAY")[0]))
return file_list
@cached_property
def anno_dict(self) -> Dict[int, Path]:
# Important to decorate with `cached_property` in general
root_dir = self.root_dir
parent = '*_foreground'
anno_list = sorted(root_dir.glob(f'{parent}/*.tif'), key=lambda x: int(x.stem))
anno_dict = dict((k, v) for k, v in enumerate(anno_list))
return anno_dict