Source code for mml.core.data_loading.task_attributes

# LICENSE HEADER MANAGED BY add-license-header
#
# SPDX-FileCopyrightText: Copyright 2024 German Cancer Research Center (DKFZ) and contributors.
# SPDX-License-Identifier: MIT
#

from dataclasses import dataclass
from typing import List, Union

from mml.core.scripts.utils import StrEnum

# these are the kind of values assigned to modalities in the task description samples
ModalityEntry = Union[int, List[int], List[float], str]

# these are the acceptable keywords for kornia augmentations
EMPTY_MASK_TOKEN = "EMPTY_MASK_TOKEN"



[docs]
class Modality(StrEnum):
    """
    The modalities represent the possible keys of a loaded sample from a dataset. E.g. {'image': 'some/path/file.png',
    'class': 3}. Note that while TaskDescription stores the Modality as enum in its samples, the loaded batch will
    contain the str representations!
    """

    # supported types
    IMAGE = "image"  # (str) default RGB image
    MASK = "mask"  # (str) grayscale mask, used e.g. in instance or semantic segmentation
    CLASS = "class"  # (int) multi or binary whole image classification label
    CLASSES = "classes"  # (List[int]) multi-label classification labels
    SOFT_CLASSES = "soft_classes"  # (List[float]) allows soft labels [0, 1] for a multi-class / multi-label setup
    VALUE = "value"  # (float) regression target value
    # future possibilities, not supported yet
    BBOX = "bbox"
    KEYPOINTS = "keypoints"
    VIDEO_CLIP = "video_clip"
    THREE_D_IMAGE = "three_d_image"
    # meta information, not necessarily given
    TASK = "task"  # (str) the name of the task the sample is loaded from
    SAMPLE_ID = "sample_id"  # (str) the id of the sample




[docs]
class TaskType(StrEnum):
    """
    Defines the type of task. Different task types usually require completely different architectures and/or training
    procedures. This is aligned with the torchvision.models split at
    https://pytorch.org/docs/stable/torchvision/models.html
    """

    CLASSIFICATION = "classification"
    REGRESSION = "regression"
    SEMANTIC_SEGMENTATION = "semantic_segmentation"
    DETECTION = "detection"
    VIDEO_CLASS = "video_classification"
    NO_TASK = "no_task"  # for completely unlabeled datasets
    UNKNOWN = "unknown"
    DEFAULT = "unknown"
    MULTILABEL_CLASSIFICATION = "multilabel_classification"


[docs]
    def requires(self) -> List[List[Modality]]:
        """
        Returns the necessary modalitie(s) for this kind of task. First list level is OR and second level is AND. So
        if returns [[A, B], [C, D]] either [A and B] or [C and D] are required.
        """
        assignment = {
            TaskType.CLASSIFICATION: [[Modality.CLASS, Modality.IMAGE]],
            TaskType.SEMANTIC_SEGMENTATION: [[Modality.MASK, Modality.IMAGE]],
            TaskType.MULTILABEL_CLASSIFICATION: [
                [Modality.CLASSES, Modality.IMAGE],
                [Modality.SOFT_CLASSES, Modality.IMAGE],
            ],
            TaskType.NO_TASK: [[Modality.IMAGE]],
            TaskType.REGRESSION: [[Modality.IMAGE, Modality.VALUE]],
        }
        if self in assignment:
            return assignment[self]
        else:
            return [[]]





[docs]
class Keyword(StrEnum):
    """
    Keyword labels of a task. Refers e.g. to the shown entities within the images.
    """

    # domains
    MEDICAL = "medical"
    ANIMALS = "animals"
    BUILDINGS = "buildings"
    ARTIFICIAL = "artificial"
    NATURAL_OBJECTS = "natural_objects"
    HANDWRITINGS = "handwritings"
    SCENES = "scenes"
    FACES = "faces"
    DRIVING = "driving"
    DERMATOSCOPY = "dermatoscopy"
    CATARACT_SURGERY = "cataract_surgery"
    LARYNGOSCOPY = "laryngoscopy"
    LAPAROSCOPY = "laparoscopy"
    GASTROSCOPY_COLONOSCOPY = "gastroscopy_colonoscopy"
    ENDOSCOPY = "endoscopy"
    NEPHRECTOMY = "Nephrectomy"
    FUNDUS_PHOTOGRAPHY = "fundus_photography"
    ULTRASOUND = "ultrasound"
    MRI_SCAN = "mri_scan"
    X_RAY = "x_ray"
    CT_SCAN = "ct_scan"
    CLE = "confocal laser endomicroscopy"
    CAPSULE_ENDOSCOPY = "capsule endoscopy"
    COLPOSCOPY = "colposcopy"
    # task type
    ENDOSCOPIC_INSTRUMENTS = "endoscopic instruments"
    INSTRUMENT_COUNT = "counting endoscopic instruments"
    ANATOMICAL_STRUCTURES = "anatomical structures"
    TISSUE_PATHOLOGY = "tissue_pathology"
    IMAGE_ARTEFACTS = "image_artefacts"
    CHARS_DIGITS = "chars_or_digits"
    # body locations
    CHEST = "chest"
    BRAIN = "brain"
    EYE = "eye"
    BREAST = "breast"
    BONE = "bone"
    GYNECOLOGY = "Gynecology"




[docs]
class License(StrEnum):
    """
    License for distribution of a task (data).
    """

    # there is no such thing as no license -> strict limitations apply here!
    UNKNOWN = "unknown"
    # Use when license is specific to data set
    CUSTOM = "License defined in TaskCreator description"
    # https://creativecommons.org/licenses/by-nc/4.0/
    CC_BY_NC_4_0 = "Creative Commons Attribution-NonCommercial 4.0 International"
    # https://creativecommons.org/licenses/by/4.0/
    CC_BY_4_0 = "Creative Commons Attribution 4.0 International"
    # https://creativecommons.org/licenses/by-nc-sa/4.0/
    CC_BY_NC_SA_4_0 = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International"
    # https://opendatacommons.org/licenses/dbcl/1-0/
    DATABASE_CONTENTS_LICENSE_1_0 = "Open Data Commons DbCL v1.0"
    # https://creativecommons.org/publicdomain/zero/1.0/
    CC_0_1_0 = "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"
    # https://choosealicense.com/licenses/mit/
    MIT = "Massachusetts Institute of Technology"




[docs]
@dataclass
class Sizes:
    """
    Small dataclass storing information about the dimensionality of a set of images.
    """

    min_height: int = 0
    max_height: int = 0
    min_width: int = 0
    max_width: int = 0


[docs]
    def to_list(self) -> List[int]:
        return [self.min_height, self.max_height, self.min_width, self.max_width]





[docs]
@dataclass
class RGBInfo:
    """
    Small dataclass storing information about image channels (mostly mean and std).
    """

    r: float = 0.0
    g: float = 0.0
    b: float = 0.0


[docs]
    def get_rgb(self) -> List[float]:
        return [self.r, self.g, self.b]



[docs]
    def to_list(self) -> List[float]:
        return self.get_rgb()




IMAGENET_MEAN = RGBInfo(0.485, 0.456, 0.406)
IMAGENET_STD = RGBInfo(0.229, 0.224, 0.225)



[docs]
class DataSplit(StrEnum):
    """
    Represents parts of a dataset that are loaded together. May be selected joint by a fold number to determine the
    exact samples that are available for iteration over a :class:`~mml.core.data_loading.task_dataset.TaskDataset`.
    """

    TRAIN = "TRAIN"
    FULL_TRAIN = "FULL_TRAIN"
    VAL = "VAL"
    TEST = "TEST"
    UNLABELLED = "UNLABELLED"