Source code for bioimageloader.collections._bbbc009
from functools import cached_property
from pathlib import Path
from typing import Dict, List, Optional
import albumentations
import cv2
import numpy as np
import tifffile
from skimage.util import img_as_float32
from ..base import MaskDataset
[docs]class BBBC009(MaskDataset):
    """Human red blood cells
    This image set consists of five differential interference contrast (DIC) images of red bood cells.
    Parameters
    ----------
    root_dir : str
        Path to root directory
    output : {'both', 'image', 'mask'}, default: 'both'
        Change outputs. 'both' returns {'image': image, 'mask': mask}.
    transforms : albumentations.Compose, optional
        An instance of Compose (albumentations pkg) that defines augmentation in
        sequence.
    num_samples : int, optional
        Useful when ``transforms`` is set. Define the total length of the
        dataset. If it is set, it overwrites ``__len__``.
    References
    ----------
    .. [1] https://bbbc.broadinstitute.org/BBBC009
    See Also
    --------
    MaskDataset : Super class
    DatasetInterface : Interface
    """
    # Set acronym
    acronym = 'BBBC009'
    def __init__(
        self,
        root_dir: str,
        *,  # only keyword param
        output: str = 'both',
        transforms: Optional[albumentations.Compose] = None,
        num_samples: Optional[int] = None,
        # specific to this dataset
        **kwargs
    ):
        self._root_dir = root_dir
        self._output = output
        self._transforms = transforms
        self._num_samples = num_samples
        # specific to this one here
[docs]    def get_image(self, p: Path) -> np.ndarray:
        img = tifffile.imread(p)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        return img_as_float32(img)
[docs]    def get_mask(self, p: Path) -> np.ndarray:
        mask = tifffile.imread(p)
        # dtype=bool originally and bool is not well handled by albumentations
        return mask.astype(np.uint8)
    @cached_property
    def file_list(self) -> List[Path]:
        # Important to decorate with `cached_property` in general
        root_dir = self.root_dir
        parent = 'human_rbc_dic_images'
        file_list = sorted(root_dir.glob(f'{parent}/*.tif'))
        return file_list
    @cached_property
    def anno_dict(self) -> Dict[int, Path]:
        # Important to decorate with `cached_property` in general
        root_dir = self.root_dir
        parent = 'human_rbc_dic_outlines'
        anno_list = sorted(root_dir.glob(f'{parent}/*.tif'))
        anno_dict = dict((k, v) for k, v in enumerate(anno_list))
        return anno_dict