Source code for bioimageloader.collections._stardist

"""StarDist data[1]_ is a subset of DSB2018

.. [1] https://github.com/stardist/stardist/releases/download/0.1.0/dsb2018.zip
"""

import os.path
from functools import cached_property
from pathlib import Path
from typing import Dict, List, Optional
from skimage.util import img_as_float32

import albumentations
import cv2
import numpy as np
import tifffile

from ..base import MaskDataset


[docs]class StarDist(MaskDataset): """Dataset for StarDist [1]_, [2]_ Cell Detection with Star-convex Polygons StarDist data is a subset of Data Science Bowl 2018 [3]_ Parameters ---------- root_dir : str Path to root directory output : {'both', 'image', 'mask'}, default: 'both' Change outputs. 'both' returns {'image': image, 'mask': mask}. transforms : albumentations.Compose, optional An instance of Compose (albumentations pkg) that defines augmentation in sequence. num_samples : int, optional Useful when ``transforms`` is set. Define the total length of the dataset. If it is set, it overwrites ``__len__``. training : bool, default: True Load training set if True, else load testing one Notes ----- - StarDist data is a subset of Data Science Bowl 2018 [3]_. Choose only one, do not mix them. - ``root_dir`` is not 'dsb2018' even though the archive name is 'dsb2018', because it conflicts with the original DSB2018. Make a new directory. - All images have grayscale References ---------- .. [1] U. Schmidt, M. Weigert, C. Broaddus, and G. Myers, “Cell Detection with Star-convex Polygons,” arXiv:1806.03535 [cs], vol. 11071, pp. 265–273, 2018, doi: 10.1007/978-3-030-00934-2_30. .. [2] https://github.com/stardist/stardist/releases/download/0.1.0/dsb2018.zip .. [3] https://www.kaggle.com/c/data-science-bowl-2018/ See Also -------- DSB2018 MaskDataset : Super class Dataset : Base class DatasetInterface : Interface """ # Set acronym acronym = 'StarDist' def __init__( self, root_dir: str, *, output: str = 'both', transforms: Optional[albumentations.Compose] = None, num_samples: Optional[int] = None, # specific to this dataset training: bool = True, **kwargs ): self._root_dir = os.path.join(root_dir, 'dsb2018') self._output = output self._transforms = transforms self._num_samples = num_samples # specific to this one here self.training = training
[docs] def get_image(self, p: Path) -> np.ndarray: img = tifffile.imread(p) img = img_as_float32(img) return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
[docs] def get_mask(self, p: Path) -> np.ndarray: mask = tifffile.imread(p) # originally uint16, which pytorch doesn't like return mask.astype(np.int16)
@cached_property def file_list(self) -> List[Path]: # Call MaskDataset.root_dir parent = 'train' if self.training else 'test' return sorted((self.root_dir / parent / 'images').glob('*.tif')) @cached_property def anno_dict(self) -> Dict[int, Path]: return dict((i, p.parent.with_name('masks') / p.name) for i, p in enumerate(self.file_list))