Source code for bioimageloader.collections._cellpose

from functools import cached_property
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Union

import albumentations
import cv2
import numpy as np
from skimage.util import img_as_float32

from bioimageloader.base import MaskDataset
from bioimageloader.utils import imread_asarray


[docs]class Cellpose(MaskDataset): """Dataset for Cellpose [1]_, [2]_ Cellpose: a generalist algorithm for cellular segmentation Parameters ---------- root_dir : str Path to root directory output : {'both', 'image', 'mask'}, default: 'both' Change outputs. 'both' returns {'image': image, 'mask': mask}. transforms : albumentations.Compose, optional An instance of Compose (albumentations pkg) that defines augmentation in sequence. num_samples : int, optional Useful when ``transforms`` is set. Define the total length of the dataset. If it is set, it overwrites ``__len__``. grayscale : bool, default: False Convert images to grayscale grayscale_mode : {'cv2', 'equal', Sequence[float]}, default: 'cv2' How to convert to grayscale. If set to 'cv2', it follows opencv implementation. Else if set to 'equal', it sums up values along channel axis, then divides it by the number of expected channels. training : bool, default: True Load training set if True, else load testing one. gray_is_not_green : bool, default: True Proper grayscale. Green channel value will be broadcast to all channels. specialized_data : bool, default: False Load "specialized data" mentioned in the paper [1]_. Notes ----- - Download link is hard to find [3]_ - It is a complete dataset by itself, meaning that it is not intended to be mixed or concatenated with others. It consists of various sources of images, not only bioimages but also images of fruits, rocks and etc. - All images have 3 channels, but technically they are not RGB. Every images have values on the second channel and if there is more signal, then it goes to the first one. There is no image that has values on the last channel. As a result, when visualized in RGB, they look all green and red. In particular, for this reason, grayscale images have signal in the second channel and look green. ``gray_is_not_green`` argument address that. - Built-in grayscale conversion methods are not correct for this dataset. The conversion should be channel-agnostic. - Currently, ``gray_is_not_green=False`` and ``grayscale=True`` will reduce values of single channel images 1/3 times. References ---------- .. [1] C. Stringer, M. Michaelos, and M. Pachitariu, “Cellpose: a generalist algorithm for cellular segmentation,” bioRxiv, p. 2020.02.02.931238, Feb. 2020, doi: 10.1101/2020.02.02.931238. .. [2] https://github.com/mouseLand/cellpose .. [3] https://www.cellpose.org/dataset See Also -------- MaskDataset : Super class DatasetInterface : Interface """ # Set acronym acronym = 'Cellpose' names_rg_training = ['000_img.png', '001_img.png', '002_img.png', '003_img.png', '004_img.png', '005_img.png', '006_img.png', '007_img.png', '008_img.png', '009_img.png', '010_img.png', '011_img.png', '012_img.png', '013_img.png', '014_img.png', '015_img.png', '016_img.png', '017_img.png', '018_img.png', '019_img.png', '020_img.png', '021_img.png', '022_img.png', '023_img.png', '024_img.png', '025_img.png', '026_img.png', '027_img.png', '028_img.png', '029_img.png', '030_img.png', '031_img.png', '032_img.png', '033_img.png', '034_img.png', '035_img.png', '036_img.png', '037_img.png', '038_img.png', '039_img.png', '040_img.png', '041_img.png', '042_img.png', '043_img.png', '044_img.png', '045_img.png', '046_img.png', '047_img.png', '048_img.png', '049_img.png', '050_img.png', '051_img.png', '052_img.png', '053_img.png', '054_img.png', '055_img.png', '056_img.png', '057_img.png', '058_img.png', '059_img.png', '060_img.png', '061_img.png', '062_img.png', '063_img.png', '064_img.png', '065_img.png', '066_img.png', '067_img.png', '068_img.png', '069_img.png', '070_img.png', '071_img.png', '072_img.png', '073_img.png', '074_img.png', '075_img.png', '076_img.png', '077_img.png', '078_img.png', '079_img.png', '080_img.png', '081_img.png', '082_img.png', '083_img.png', '084_img.png', '085_img.png', '086_img.png', '087_img.png', '088_img.png', '089_img.png', '090_img.png', '091_img.png', '092_img.png', '093_img.png', '094_img.png', '095_img.png', '096_img.png', '097_img.png', '098_img.png', '099_img.png', '100_img.png', '101_img.png', '102_img.png', '103_img.png', '104_img.png', '105_img.png', '106_img.png', '107_img.png', '108_img.png', '109_img.png', '110_img.png', '111_img.png', '112_img.png', '113_img.png', '114_img.png', '115_img.png', '116_img.png', '117_img.png', '118_img.png', '119_img.png', '120_img.png', '121_img.png', '122_img.png', '144_img.png', '145_img.png', '146_img.png', '147_img.png', '148_img.png', '149_img.png', '150_img.png', '151_img.png', '152_img.png', '153_img.png', '154_img.png', '156_img.png', '157_img.png', '161_img.png', '162_img.png', '167_img.png', '169_img.png', '177_img.png', '178_img.png', '180_img.png', '181_img.png', '182_img.png', '183_img.png', '185_img.png', '186_img.png', '187_img.png', '191_img.png', '192_img.png', '193_img.png', '195_img.png', '197_img.png', '198_img.png', '199_img.png', '200_img.png', '201_img.png', '203_img.png', '205_img.png', '206_img.png', '207_img.png', '209_img.png', '210_img.png', '213_img.png', '215_img.png', '218_img.png', '222_img.png', '223_img.png', '225_img.png', '226_img.png', '227_img.png', '228_img.png', '229_img.png', '230_img.png', '231_img.png', '232_img.png', '233_img.png', '234_img.png', '235_img.png', '236_img.png', '237_img.png', '238_img.png', '239_img.png', '240_img.png', '243_img.png', '244_img.png', '246_img.png', '250_img.png', '261_img.png', '264_img.png', '269_img.png', '270_img.png', '271_img.png', '272_img.png', '273_img.png', '274_img.png', '280_img.png', '283_img.png', '284_img.png', '285_img.png', '286_img.png', '287_img.png', '332_img.png', '337_img.png', '340_img.png', '439_img.png'] names_rg_test = ['000_img.png', '001_img.png', '002_img.png', '003_img.png', '004_img.png', '005_img.png', '006_img.png', '007_img.png', '008_img.png', '009_img.png', '010_img.png', '011_img.png', '017_img.png', '019_img.png', '021_img.png', '022_img.png'] idx_sp_train = list(range(89)) idx_sp_test = list(range(11)) # num_channels = 2 def __init__( self, root_dir: str, *, # only keyword param output: str = 'both', transforms: Optional[albumentations.Compose] = None, num_samples: Optional[int] = None, grayscale: bool = False, # optional grayscale_mode: Union[str, Sequence[float]] = 'equal', # specific to this dataset training: bool = True, gray_is_not_green: bool = True, specialized_data: bool = False, **kwargs ): self._root_dir = root_dir self._output = output self._transforms = transforms self._num_samples = num_samples self._grayscale = grayscale self._grayscale_mode = grayscale_mode # specific to this one here self.training = training self.gray_is_not_green = gray_is_not_green self.specialized_data = specialized_data self.names_rg = self.names_rg_training if training else self.names_rg_test if specialized_data: self.specialized_idx = self.idx_sp_train if training else self.idx_sp_test
[docs] def get_image(self, p: Path) -> np.ndarray: img = imread_asarray(p) img = img_as_float32(img) if p.name in self.names_rg: return img if self.gray_is_not_green: return cv2.cvtColor(img[..., 1], cv2.COLOR_GRAY2RGB) return img
[docs] def get_mask(self, p: Path) -> np.ndarray: mask = imread_asarray(p) return mask
@cached_property def file_list(self) -> List[Path]: # Important to decorate with `cached_property` in general parent = 'train' if self.training else 'test' _file_list = sorted((self.root_dir / parent).glob('*_img.png')) if self.specialized_data: return [_file_list[i] for i in self.specialized_idx] return _file_list @cached_property def anno_dict(self) -> Dict[int, Path]: # Important to decorate with `cached_property` in general parent = 'train' if self.training else 'test' _anno_dict = dict((i, v) for i, v in enumerate( sorted((self.root_dir / parent).glob('*_masks.png')))) if self.specialized_data: return dict((i, _anno_dict[i]) for i in self.specialized_idx) return _anno_dict