Source code for pyradise.fileio.extraction
import os
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional, Tuple, Union
from pydicom.tag import Tag
from pyradise.data import Annotator, Modality, Organ
from pyradise.utils import is_dicom_file, load_dataset_tag
__all__ = [
"Extractor",
"ModalityExtractor",
"SimpleModalityExtractor",
"OrganExtractor",
"SimpleOrganExtractor",
"AnnotatorExtractor",
"SimpleAnnotatorExtractor",
]
[docs]class Extractor(ABC):
"""An abstract base class for all extractors. An extractor extracts information about a file from its file path,
the files content or from any other source of data in order to provide identification information
(e.g. the imaging modality of a certain NIFTI file). Extractors can be used in combination with a
:class:`~pyradise.fileio.crawling.Crawler` to extract the :class:`~pyradise.data.modality.Modality`,
:class:`~pyradise.data.organ.Organ` or :class:`~pyradise.data.annotator.Annotator` instances for
:class:`~pyradise.data.subject.Subject` construction.
Typically, the user needs to implement the concrete extractor classes specific for the current task. This renders
flexibility and allows for a wide range of use cases. However, the user can also use the provided implementations
and examples to get started quickly.
"""
[docs] @abstractmethod
def extract(self, path: str) -> Any:
"""Extract information about the file at the specified path.
Args:
path (str): The path to the file for which information needs to be extracted.
Returns:
Any: The extracted information.
"""
pass
[docs]class ModalityExtractor(Extractor):
"""A prototype class to extract the :class:`~pyradise.data.modality.Modality` from DICOM files and discrete image
file paths. It must be implemented by the user and is intended to be used with the
:class:`~pyradise.fileio.crawling.Crawler` types for DICOM and discrete image files. Thus, both abstract methods
(i.e. :meth:`extract_from_dicom` and :meth:`extract_from_path`) need to be implemented. In case of working
exclusively on DICOM or discrete image files, one extraction method may contain just a ``return None``.
Important:
If the file path does not specify an intensity image the extractor must return :data:`None`.
Warnings:
If :data:`return_default` is set to :data:`True` the :class:`ModalityExtractor` will return an enumerated
default :class:`~pyradise.data.modality.Modality` for each file for which no modality could be extracted.
This will have the effect that no error will be raised during loading. However, this functionality is intended
to be used exlusively for experimenting and debugging purposes such that the user can load data without
implementing a complete extractor. It's not recommended to use this feature for production purposes.
Subsequent errors may arise.
Notes:
If using the :class:`ModalityExtractor` in combination with a :class:`~pyradise.fileio.crawling.Crawler` all
paths to the discrete image files are provided sequentially to extract the
:class:`~pyradise.data.modality.Modality`. In case of working with DICOM data the
:class:`~pyradise.fileio.crawling.Crawler` will provide just one arbitrary file path to the
:class:`ModalityExtractor`.
Example:
Example of a :class:`ModalityExtractor` implementation to identify detailed modalities:
>>> from typing import (Any, Dict, Optional)
>>>
>>> from pyradise.fileio import (ModalityExtractor, Tag)
>>> from pyradise.data import Modality
>>>
>>>
>>> class ExampleModalityExtractor(ModalityExtractor):
>>>
>>> @staticmethod
>>> def _get_mr_modality(ds_dict: Dict[str, Any]) -> Optional[Modality]:
>>> # check for different variants of attributes to get the sequence
>>> # identification
>>> scanning_sq = ds_dict.get('Scanning Sequence', {}).get('value', [])
>>> scanning_sq = [scanning_sq] if isinstance(scanning_sq, str) else scanning_sq
>>> contrast = ds_dict.get('Contrast/Bolus Agent', {}).get('value', '')
>>>
>>> if all(val in scanning_sq for val in ('SE', 'IR')):
>>> return Modality('FLAIR')
>>> elif all(val in scanning_sq for val in ('GR', 'IR')) and len(contrast) > 0:
>>> return Modality('T1c')
>>> elif all(val in scanning_sq for val in ('GR', 'IR')) and len(contrast) == 0:
>>> return Modality('T1w')
>>> elif all(val == 'SE' for val in scanning_sq):
>>> return Modality('T2w')
>>> else:
>>> return None
>>>
>>> def extract_from_dicom(self, path: str) -> Optional[Modality]:
>>> # extract the necessary attributes from the file
>>> tags = (Tag(0x0008, 0x0060), # Modality
>>> Tag(0x0018, 0x0010), # ContrastBolusAgent
>>> Tag(0x0018, 0x0020)) # ScanningSequence
>>> dataset_dict = self._load_dicom_attributes(tags, path)
>>>
>>> # identify the modality
>>> extracted_modality = dataset_dict.get('Modality', {}).get('value', None)
>>> if extracted_modality == 'CT':
>>> return Modality('CT')
>>> elif extracted_modality == 'MR':
>>> return self._get_mr_modality(dataset_dict)
>>> else:
>>> return None
>>>
>>> def extract_from_path(self, path: str) -> Optional[Modality]:
>>> # extract the necessary attributes from the file name
>>> file_name = os.path.basename(path)
>>> if 'T1c' in file_name:
>>> return Modality('T1c')
>>> elif 'T1w' in file_name:
>>> return Modality('T1w')
>>> elif 'T2w' in file_name:
>>> return Modality('T2w')
>>> elif 'FLAIR' in file_name:
>>> return Modality('FLAIR')
>>> elif 'CT' in file_name:
>>> return Modality('CT')
>>> else:
>>> return None
Args:
return_default (bool): Indicates if an enumerated default :class:`~pyradise.data.modality.Modality` should be
returned if the extraction was not successful. Use this option exclusively for experimentation and debugging
because it can cause severe damage (default: False).
"""
modality_default_idx = 0
default_modality_name = "UnknownModality"
def __init__(self, return_default: bool = False) -> None:
super().__init__()
self.return_default = return_default
@staticmethod
def _load_dicom_attributes(tags: Union[Tuple[Tuple[int, int], ...], Tuple[Tag, ...]], path: str) -> Dict[str, Any]:
"""Load the DICOM attributes for the specified tags.
Args:
tags (Union[Tuple[Tuple[int, int], ...], Tuple[Tag, ...]]): The DICOM tags to extract the attributes for.
path (str): The path to the DICOM file to extract the attributes from.
Returns:
Dict[str, Any]: The loaded DICOM attributes.
"""
tags_ = [Tag(tag) for tag in tags]
dataset = load_dataset_tag(path, tags_)
data = {}
for tag in tags_:
item = dataset.get(tag, None)
if item is not None:
data[item.name] = {"name": item.name, "value": item.value, "vr": item.VR}
return data
def _get_next_default_modality_name(self) -> str:
"""Get the next enumerated modality name for unrecognized modalities.
Returns:
str: The next enumerated modality name.
"""
name = self.default_modality_name + str(self.modality_default_idx)
self.modality_default_idx += 1
return name
[docs] def is_enumerated_default_modality(self, modality: Optional[Union[Modality, str]]) -> bool:
"""Check if the specified modality is an enumerated default modality.
Args:
modality (Optional[Union[Modality, str]]): The modality to check.
Returns:
bool: True if the modality is an enumerated default modality, False otherwise.
"""
if modality is None:
return False
if isinstance(modality, Modality):
modality = modality.name
return self.default_modality_name in modality
[docs] @abstractmethod
def extract_from_dicom(self, path: str) -> Optional[Modality]:
"""Extract the :class:`~pyradise.data.modality.Modality` from the DICOM file at the specified path.
If the modality can not be detected :data:`None` must be returned.
Notes:
For your implementation you can load the DICOM file or specific DICOM attributes using the
:meth:`load_dataset` or :meth:`load_dataset_tag` functions from the :mod:`pyradise.utils` module.
For a detailed description of the DICOM attributes we refer to the `DICOM Standard
<https://www.dicomstandard.org/>`_ and the `DICOM Standard Browser <https://dicom.innolitics.com/>`_.
Args:
path (str): The path to the DICOM file to extract the :class:`~pyradise.data.modality.Modality` from.
Returns:
Optional[Modality]: The extracted :class:`~pyradise.data.modality.Modality` or :data:`None`.
"""
raise NotImplementedError()
[docs] @abstractmethod
def extract_from_path(self, path: str) -> Optional[Modality]:
"""Extract the :class:`~pyradise.data.modality.Modality` from the file path to a discrete image file or
from another other data source. If the modality can not be detected :data:`None` must be returned.
Args:
path (str): The path to the file to extract the :class:`~pyradise.data.modality.Modality` for.
Returns:
Optional[Modality]: The extracted :class:`~pyradise.data.modality.Modality` or :data:`None`.
"""
raise NotImplementedError()
[docs] def extract(self, path: str) -> Optional[Modality]:
"""Extract the :class:`~pyradise.data.modality.Modality` for either a DICOM or a discrete medical image file.
Args:
path (str): The path to the file to extract the :class:`~pyradise.data.modality.Modality` for.
Returns:
Optional[Modality]: The extracted :class:`~pyradise.data.modality.Modality` or :data:`None`.
"""
if is_dicom_file(path):
modality = self.extract_from_dicom(path)
else:
modality = self.extract_from_path(path)
if self.return_default and modality is None:
return Modality(self._get_next_default_modality_name())
return modality
[docs]class SimpleModalityExtractor(ModalityExtractor):
"""A simple :class:`ModalityExtractor` implementation that uses the 'Modality' attribute in the provided DICOM
image or searches for a provided set of modality names (``modalities``) in the file name in case of a
discrete image file to generate a :class:`~pyradise.data.modality.Modality` with the same name. If no match is
found :data:`None` is returned.
Args:
modalities (Tuple[str, ...]): The possible modality names for the intensity files which will also
be used to name the :class:`~pyradise.data.modality.Modality`.
return_default (bool): Indicates if an enumerated default :class:`~pyradise.data.modality.Modality` should be
returned if the extraction was not successfully. Use this option exclusively for experimentation and debugging
because it can cause severe damage (default: False).
"""
def __init__(
self,
modalities: Tuple[str, ...],
return_default: bool = False,
) -> None:
super().__init__(return_default)
self.modalities = modalities
[docs] def extract_from_path(self, path: str) -> Optional[Modality]:
"""Extract the :class:`~pyradise.data.modality.Modality` from the file name using the provided
``modalities``. If there is no match :data:`None` is returned.
Args:
path (str): The path to the file to extract the :class:`~pyradise.data.modality.Modality` for.
Returns:
Optional[Modality]: The extracted :class:`~pyradise.data.modality.Modality` or :data:`None`.
"""
file_name = os.path.basename(path)
for modality in self.modalities:
if modality in file_name:
return Modality(modality)
return None
[docs] def extract_from_dicom(self, path: str) -> Optional[Modality]:
"""Extract the DICOM attribute 'Modality' from the provided DICOM file. If no or an invalid 'Modality'
attribute is found, :data:`None` is returned.
Notes:
This method exclusively extracts the following top-level modalities: CT, MR, PT, and US.
For all other values of the DICOM 'Modality' attribute :data:`None` is returned.
Args:
path (str): The path to the DICOM file to extract the :class:`~pyradise.data.modality.Modality` from.
Returns:
Optional[Modality]: The extracted :class:`~pyradise.data.modality.Modality` or :data:`None`.
"""
# extract the Modality attribute
tags = (Tag(0x0008, 0x0060),) # Modality
dataset_dict = self._load_dicom_attributes(tags, path)
# get the general modality
extracted_modality = dataset_dict.get("Modality", {}).get("value", None)
if extracted_modality in ("CT", "MR", "PT", "US"):
return Modality(extracted_modality)
else:
return None
[docs]class OrganExtractor(Extractor):
"""A prototype class to extract an :class:`~pyradise.data.organ.Organ` from a discrete image file path. This class
must be implemented by the user and is intended to be used with a :class:`~pyradise.fileio.crawling.Crawler` for
discrete image formats.
Important:
If the file path does not specify a segmentation image the extractor must return :data:`None`.
Example:
Example of an :class:`OrganExtractor` implementation which takes search strings and associated organ names to
extract an :class:`Organ` from a file path:
>>> from typing import (Any, Dict, Optional)
>>>
>>> from pyradise.fileio import OrganExtractor
>>> from pyradise.data import Organ
>>>
>>>
>>> class ExampleOrganExtractor(OrganExtractor):
>>>
>>> def __init__(self,
>>> search_strings: Dict[str, str],
>>> names: Tuple[str, ...]
>>> ) -> None:
>>> super().__init__()
>>>
>>> assert len(search_strings) == len(names), /
>>> f'Number of search strings ({len(search_strings)}) must match the ' \
>>> f'number of organ names ({len(names)})!'
>>>
>>> self.search_strings = search_strings
>>> self.names = names
>>>
>>> def extract(self, path: str) -> Optional[Organ]:
>>> file_name = os.path.basename(path)
>>>
>>> for search_string, name in zip(self.search_strings, self.names):
>>> if search_string in file_name:
>>> return Organ(name)
>>>
>>> return None
"""
[docs] def extract(self, path: str) -> Optional[Organ]:
"""Extract the :class:`~pyradise.data.organ.Organ` from the file path.
Args:
path (str): The path to the file to extract the :class:`~pyradise.data.organ.Organ` for.
Returns:
Optional[Organ]: The extracted :class:`~pyradise.data.organ.Organ` or :data:`None`.
"""
raise NotImplementedError("The extract method needs to be adopted for the intended use case!")
[docs]class SimpleOrganExtractor(OrganExtractor):
"""A simple :class:`OrganExtractor` implementation that searches for a provided set of organ names
(``organs``) in the file name and generates an :class:`~pyradise.data.organ.Organ` with the same name. If no
match is found :data:`None` is returned.
Args:
organs (Tuple[str, ...]): The possible organ names which will also be used to name the output
:class:`~pyradise.data.organ.Organ`.
"""
def __init__(self, organs: Tuple[str, ...]) -> None:
super().__init__()
self.organs = organs
[docs] def extract(self, path: str) -> Optional[Organ]:
"""Extract the :class:`~pyradise.data.organ.Organ` from the file name using the provided ``organs``. If
no :class:`~pyradise.data.organ.Organ` can be extracted or the file does not contain a segmentation image
:data:`None` is returned.
Args:
path (str): The path to the file to extract the :class:`~pyradise.data.organ.Organ` for.
Returns:
Optional[Organ]: The extracted :class:`~pyradise.data.organ.Organ` or :data:`None`.
"""
file_name = os.path.basename(path)
for organ in reversed(self.organs):
if organ in file_name:
return Organ(organ)
return None
[docs]class AnnotatorExtractor(Extractor):
"""A prototype class to extract an :class:`~pyradise.data.annotator.Annotator` from a discrete image file path.
This class must be implemented by the user and is intended to be used with a
:class:`~pyradise.fileio.crawling.Crawler` for discrete image formats.
Important:
If the file path does not specify a segmentation image the extractor must return :data:`None`.
Example:
Example of an :class:`AnnotatorExtractor` implementation which takes search strings and associated annotator
names to extract a :class:`~pyradise.data.annotator.Annotator` from a file path:
>>> from typing import (Any, Dict, Optional)
>>>
>>> from pyradise.fileio import AnnotatorExtractor
>>> from pyradise.data import Annotator
>>>
>>>
>>> class ExampleAnnotatorExtractor(AnnotatorExtractor):
>>>
>>> def __init__(self,
>>> search_strings: Dict[str, str],
>>> names: Tuple[str, ...]
>>> ) -> None:
>>> super().__init__()
>>>
>>> assert len(search_strings) == len(names), /
>>> f'Number of search strings ({len(search_strings)}) must match the' \
>>> f'number of annotator names ({len(names)})!'
>>>
>>> self.search_strings = search_strings
>>> self.names = names
>>>
>>> def extract(self, path: str) -> Optional[Annotator]:
>>> file_name = os.path.basename(path)
>>>
>>> for search_string, name in zip(self.search_strings, self.names):
>>> if search_string in file_name:
>>> return Annotator(name)
>>>
>>> return None
"""
[docs] def extract(self, path: str) -> Optional[Annotator]:
"""Extract the :class:`~pyradise.data.annotator.Annotator` from the file path.
Args:
path (str): The path to the file to extract the :class:`~pyradise.data.annotator.Annotator` for.
Returns:
Optional[Annotator]: The extracted :class:`~pyradise.data.annotator.Annotator` or :data:`None`.
"""
raise NotImplementedError("The extract method needs to be adopted for the intended use case!")
[docs]class SimpleAnnotatorExtractor(AnnotatorExtractor):
"""A simple :class:`AnnotatorExtractor` implementation that searches for a provided set of annotator names
(``annotators``) in the file name and generates a :class:`~pyradise.data.annotator.Annotator` with the same name.
If no match is found :data:`None` is returned.
Args:
annotators (Tuple[str, ...]): The possible annotator names which will also be used to name the output
:class:`Annotator`.
"""
def __init__(self, annotators: Tuple[str, ...]) -> None:
super().__init__()
self.annotators = annotators
[docs] def extract(self, path: str) -> Optional[Annotator]:
"""Extract the :class:`~pyradise.data.annotator.Annotator` from the file name using the provided ``annotators``.
If no :class:`~pyradise.data.annotator.Annotator` can be extracted or the file does not contain a segmentation
image :data:`None` is returned.
Args:
path (str): The path to the file to extract the :class:`~pyradise.data.annotator.Annotator` for.
Returns:
Optional[Annotator]: The extracted :class:`~pyradise.data.annotator.Annotator` or :data:`None`.
"""
file_name = os.path.basename(path)
for annotator in self.annotators:
if annotator in file_name:
return Annotator(annotator)
return None