Source code for mml.core.data_loading.task_attributes

# LICENSE HEADER MANAGED BY add-license-header
#
# SPDX-FileCopyrightText: Copyright 2024 German Cancer Research Center (DKFZ) and contributors.
# SPDX-License-Identifier: MIT
#

from dataclasses import dataclass
from typing import List, Union

from mml.core.scripts.utils import StrEnum

# these are the kind of values assigned to modalities in the task description samples
ModalityEntry = Union[int, List[int], List[float], str]

# these are the acceptable keywords for kornia augmentations
EMPTY_MASK_TOKEN = "EMPTY_MASK_TOKEN"


[docs] class Modality(StrEnum): """ The modalities represent the possible keys of a loaded sample from a dataset. E.g. {'image': 'some/path/file.png', 'class': 3}. Note that while TaskDescription stores the Modality as enum in its samples, the loaded batch will contain the str representations! """ # supported types IMAGE = "image" # (str) default RGB image MASK = "mask" # (str) grayscale mask, used e.g. in instance or semantic segmentation CLASS = "class" # (int) multi or binary whole image classification label CLASSES = "classes" # (List[int]) multi-label classification labels SOFT_CLASSES = "soft_classes" # (List[float]) allows soft labels [0, 1] for a multi-class / multi-label setup VALUE = "value" # (float) regression target value # future possibilities, not supported yet BBOX = "bbox" KEYPOINTS = "keypoints" VIDEO_CLIP = "video_clip" THREE_D_IMAGE = "three_d_image" # meta information, not necessarily given TASK = "task" # (str) the name of the task the sample is loaded from SAMPLE_ID = "sample_id" # (str) the id of the sample
[docs] class TaskType(StrEnum): """ Defines the type of task. Different task types usually require completely different architectures and/or training procedures. This is aligned with the torchvision.models split at https://pytorch.org/docs/stable/torchvision/models.html """ CLASSIFICATION = "classification" REGRESSION = "regression" SEMANTIC_SEGMENTATION = "semantic_segmentation" DETECTION = "detection" VIDEO_CLASS = "video_classification" NO_TASK = "no_task" # for completely unlabeled datasets UNKNOWN = "unknown" DEFAULT = "unknown" MULTILABEL_CLASSIFICATION = "multilabel_classification"
[docs] def requires(self) -> List[List[Modality]]: """ Returns the necessary modalitie(s) for this kind of task. First list level is OR and second level is AND. So if returns [[A, B], [C, D]] either [A and B] or [C and D] are required. """ assignment = { TaskType.CLASSIFICATION: [[Modality.CLASS, Modality.IMAGE]], TaskType.SEMANTIC_SEGMENTATION: [[Modality.MASK, Modality.IMAGE]], TaskType.MULTILABEL_CLASSIFICATION: [ [Modality.CLASSES, Modality.IMAGE], [Modality.SOFT_CLASSES, Modality.IMAGE], ], TaskType.NO_TASK: [[Modality.IMAGE]], TaskType.REGRESSION: [[Modality.IMAGE, Modality.VALUE]], } if self in assignment: return assignment[self] else: return [[]]
[docs] class Keyword(StrEnum): """ Keyword labels of a task. Refers e.g. to the shown entities within the images. """ # domains MEDICAL = "medical" ANIMALS = "animals" BUILDINGS = "buildings" ARTIFICIAL = "artificial" NATURAL_OBJECTS = "natural_objects" HANDWRITINGS = "handwritings" SCENES = "scenes" FACES = "faces" DRIVING = "driving" DERMATOSCOPY = "dermatoscopy" CATARACT_SURGERY = "cataract_surgery" LARYNGOSCOPY = "laryngoscopy" LAPAROSCOPY = "laparoscopy" GASTROSCOPY_COLONOSCOPY = "gastroscopy_colonoscopy" ENDOSCOPY = "endoscopy" NEPHRECTOMY = "Nephrectomy" FUNDUS_PHOTOGRAPHY = "fundus_photography" ULTRASOUND = "ultrasound" MRI_SCAN = "mri_scan" X_RAY = "x_ray" CT_SCAN = "ct_scan" CLE = "confocal laser endomicroscopy" CAPSULE_ENDOSCOPY = "capsule endoscopy" COLPOSCOPY = "colposcopy" # task type ENDOSCOPIC_INSTRUMENTS = "endoscopic instruments" INSTRUMENT_COUNT = "counting endoscopic instruments" ANATOMICAL_STRUCTURES = "anatomical structures" TISSUE_PATHOLOGY = "tissue_pathology" IMAGE_ARTEFACTS = "image_artefacts" CHARS_DIGITS = "chars_or_digits" # body locations CHEST = "chest" BRAIN = "brain" EYE = "eye" BREAST = "breast" BONE = "bone" GYNECOLOGY = "Gynecology"
[docs] class License(StrEnum): """ License for distribution of a task (data). """ # there is no such thing as no license -> strict limitations apply here! UNKNOWN = "unknown" # Use when license is specific to data set CUSTOM = "License defined in TaskCreator description" # https://creativecommons.org/licenses/by-nc/4.0/ CC_BY_NC_4_0 = "Creative Commons Attribution-NonCommercial 4.0 International" # https://creativecommons.org/licenses/by/4.0/ CC_BY_4_0 = "Creative Commons Attribution 4.0 International" # https://creativecommons.org/licenses/by-nc-sa/4.0/ CC_BY_NC_SA_4_0 = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International" # https://opendatacommons.org/licenses/dbcl/1-0/ DATABASE_CONTENTS_LICENSE_1_0 = "Open Data Commons DbCL v1.0" # https://creativecommons.org/publicdomain/zero/1.0/ CC_0_1_0 = "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication" # https://choosealicense.com/licenses/mit/ MIT = "Massachusetts Institute of Technology"
[docs] @dataclass class Sizes: """ Small dataclass storing information about the dimensionality of a set of images. """ min_height: int = 0 max_height: int = 0 min_width: int = 0 max_width: int = 0
[docs] def to_list(self) -> List[int]: return [self.min_height, self.max_height, self.min_width, self.max_width]
[docs] @dataclass class RGBInfo: """ Small dataclass storing information about image channels (mostly mean and std). """ r: float = 0.0 g: float = 0.0 b: float = 0.0
[docs] def get_rgb(self) -> List[float]: return [self.r, self.g, self.b]
[docs] def to_list(self) -> List[float]: return self.get_rgb()
IMAGENET_MEAN = RGBInfo(0.485, 0.456, 0.406) IMAGENET_STD = RGBInfo(0.229, 0.224, 0.225)
[docs] class DataSplit(StrEnum): """ Represents parts of a dataset that are loaded together. May be selected joint by a fold number to determine the exact samples that are available for iteration over a :class:`~mml.core.data_loading.task_dataset.TaskDataset`. """ TRAIN = "TRAIN" FULL_TRAIN = "FULL_TRAIN" VAL = "VAL" TEST = "TEST" UNLABELLED = "UNLABELLED"