Source code for bioimageloader.collections._ucsb

import re
from functools import cached_property, partial
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Union

import albumentations
import numpy as np
import tifffile
from skimage.util import img_as_float32

from ..base import MaskDataset


[docs]class UCSB(MaskDataset): """A biosegmentation benchmark for evaluation of bioimage analysis methods Parameters ---------- root_dir : str Path to root directory output : {'both',' image', 'mask'}, default: 'both' Change outputs. 'both' returns {'image': image, 'mask': mask}. transforms : albumentations.Compose, optional An instance of Compose (albumentations pkg) that defines augmentation in sequence. num_samples : int, optional Useful when ``transforms`` is set. Define the total length of the dataset. If it is set, it overwrites ``__len__``. grayscale : bool, default: False Convert images to grayscale grayscale_mode : {'cv2', 'equal', Sequence[float]}, default: 'cv2' How to convert to grayscale. If set to 'cv2', it follows opencv implementation. Else if set to 'equal', it sums up values along channel axis, then divides it by the number of expected channels. category : {'benign', 'malignant'}, default: ('malignant',) Select which category of output you want See Also -------- MaskDataset : Super class Dataset : Base class DatasetInterface : Interface Notes ----- - 32 'benign', 26 'malignant' images (58 images in total) - 58x768x896 -> ~600 patches. Thus, the defulat `num_samples=900` (x1.5). - Images are not fully annotated References ---------- .. [1] E. Drelie Gelasca, B. Obara, D. Fedorov, K. Kvilekval, and B. Manjunath, “A biosegmentation benchmark for evaluation of bioimage analysis methods,” BMC Bioinformatics, vol. 10, p. 368, Nov. 2009, doi: 10.1186/1471-2105-10-368. """ # Dataset's acronym acronym = 'UCSB' def __init__( self, root_dir: str, *, output: str = 'both', transforms: Optional[albumentations.Compose] = None, num_samples: Optional[int] = None, grayscale: bool = False, grayscale_mode: Union[str, Sequence[float]] = 'cv2', # specific to this dataset category: Sequence[str] = ('malignant',), **kwargs ): self._root_dir = root_dir self._output = output self._transforms = transforms self._num_samples = num_samples self._grayscale = grayscale self._grayscale_mode = grayscale_mode # specific to this dataset self.category = category if not any([cat in ('benign', 'malignant') for cat in category]): raise ValueError("Set `category` in ('benign', 'malignant') in sequence")
[docs] def get_image(self, p: Path) -> np.ndarray: tif = tifffile.imread(p) return img_as_float32(tif)
[docs] def get_mask(self, p: Path) -> np.ndarray: tif = tifffile.imread(p) return tif
@staticmethod def _filter_category(p: Path, category: str): return re.search(category, p.stem) @cached_property def file_list(self) -> List[Path]: root_dir = self.root_dir parent = 'Breast Cancer Cells' file_list = root_dir.glob(f'{parent}/*.tif') if len(cat := self.category) == 1: file_list = filter(partial(self._filter_category, category=cat[0]), file_list) return sorted(file_list) @cached_property def anno_dict(self) -> Dict[int, Path]: root_dir = self.root_dir parent = 'Breast Cancer Cells GroundTruth' anno_list = sorted(root_dir.glob(f'{parent}/*.TIF')) if len(cat := self.category) == 1: anno_list = filter(partial(self._filter_category, category=cat[0]), anno_list) anno_dict = dict((k, v) for k, v in enumerate(anno_list)) return anno_dict