import logging
import os
from copy import copy
from dataclasses import dataclass, fields, is_dataclass
from datetime import datetime, timezone
from typing import Callable, List, Literal, Tuple, Type, Union
from pathlib import Path
import numpy as np
import pydicom
from bson import ObjectId
from pydicom.dataset import Dataset
from cobra_db import __version__
from cobra_db.dataset_mod import DatasetMod
from cobra_db.enums import Modality, PregnancyStatus
from cobra_db.utils import (
intersect_dicts,
parse_AS_as_int,
parse_DA_as_datetime,
parse_DA_TM_as_datetime,
)
[docs]class BaseObj:
"""Defines the interface to load the object from dict and how to export it to dict."""
[docs] @staticmethod
def expand(value):
if isinstance(value, BaseObj):
return value.to_dict()
else:
return value
[docs] def _to_dict(self):
if is_dataclass(self):
return self.__dict__
else:
raise NotImplementedError
[docs] def to_dict(self):
"""expand the nested objects and remove all keys with None value"""
d = {k: self.expand(v) for k, v in self._to_dict().items() if v is not None}
return d
[docs] @classmethod
def from_dict(cls, d: dict):
raise NotImplementedError(
f"This method should be implemented in the child {type(cls)}"
)
[docs]class Embedded(BaseObj):
"""Base class for an embedded doc (that doesn't get its own _id)"""
[docs] @classmethod
def from_dict(cls, d: dict):
return cls(**d)
[docs]@dataclass
class Entity(BaseObj):
"""Base class for all entities that are stored in the db with an _id, i.e. that
require a Dao.
Usually parent of a ``@dataclass`` class
"""
_id: ObjectId
_metadata: Metadata
[docs] def __repr__(self):
header = f"<{self.__class__.__name__}>"
body = copy(self.__dict__)
return f"{header}\n{body}\n"
[docs] def __post_init__(self):
"""Automatically add db_metadata if missing"""
if self._metadata is None:
self._metadata = Metadata.create()
if isinstance(self._metadata, dict):
self._metadata = Metadata(**self._metadata)
[docs] @classmethod
def from_dict(cls, obj_dict: dict):
obj = cls(**obj_dict)
for f in fields(obj):
typ = f.type
if type(typ) != type:
# TODO: not handled yet!!!
pass
else:
val_dict = obj_dict.get(f.name, None)
if val_dict is not None and issubclass(typ, BaseObj):
obj.__setattr__(f.name, typ.from_dict(val_dict))
return obj
[docs]class Source:
"""Base class for a pointer to more information."""
[docs]@dataclass
class DicomEntity(Entity):
"""Base class for all entities that contain dicom_tags."""
dicom_tags: dict
[docs] @staticmethod
def optional(ds: Dataset, tag_name: str, parser: Callable = None, default=None):
"""
Get tag value from dataset, if missing return default value.
"""
value = ds.get(tag_name, None)
if value is not None and value != "":
if parser is not None:
value = parser(value)
return value
else:
return default
[docs] @staticmethod
def choose_first(ds, *tag_names, parser, default):
"""
Choose first tag that does not return None,
"""
for tag_name in tag_names:
tag = DicomEntity.optional(ds, tag_name, parser, None)
if tag is not None:
return tag
return default
[docs] @classmethod
def from_dataset(cls, ds: Dataset):
"""Abstract method. How to create an instance from a pydicom Dataset"""
raise NotImplementedError(f"Should be implemented by child {type(cls)}")
[docs] def get_tag(self, keyword: str, default=None):
tag = self.dicom_tags.get(keyword, None)
if tag is not None:
value = tag.get("Value", default)
if not value == default:
if isinstance(value, list):
if len(value) == 1:
return value[0] # most of the cases are this
return value
else:
return default
[docs] def get_acquisition_datetime(self):
time = self.get_tag("AcquisitionTime")
time = "000000" if time is None else time
date = self.get_tag("AcquisitionDate")
date = "00000000" if date is None else date
return parse_DA_TM_as_datetime(date, time)
[docs]@dataclass
class EntitySource(Embedded, Source):
"""Pointer to another Entity instance in the database.
Should only be used when there's ambiguity about the referenced class/collection.
"""
name: str
description: str
_entity_type: Union[Type[Entity], str]
_id: ObjectId
[docs] def __post_init__(self):
if not type(self._entity_type) is str:
self._entity_type = self._entity_type.__name__
[docs] def to_dict(self):
return self.__dict__
[docs]@dataclass
class FileSource(Embedded, Source):
"""Pointer to a file in the filesystem.
rel_path is relative to the place where drive_name is mounted.
This is because drive_name can be mounted in different paths according
to the machine and we don't have another way of referencing a drive.
filename is automatically stored for easy indexing and querying without having to
manipulate the rel_path to extract the filename.
"""
drive_name: str
rel_path: str
filename: str = None # automatically set when rel_path is set in __post_init__
[docs] def get_local_filepath(self, mount_paths: dict) -> str:
"""Returns the filepath according to mount_paths"""
return os.path.join(mount_paths[self.drive_name], self.rel_path)
[docs] def __post_init__(self):
if self.filename is None:
self.filename = os.path.basename(self.rel_path)
[docs] @classmethod
def from_mount_paths(cls, filepath: str, mount_paths: dict):
"""create an instance from the filepath and the mount_paths.
:param filepath: _description_
:param mount_paths: _description_
"""
for drive_name, mount_path in mount_paths.items():
path = Path(filepath)
mount_path = Path(mount_path)
rel_path = str(path.relative_to(mount_path).as_posix())
return cls(drive_name=drive_name, rel_path=rel_path)
raise ValueError(f"{filepath} is not in any of the mount paths: {mount_paths}")
[docs]@dataclass
class Patient(Entity):
"""Represents a unique person that has gone through at least one study.
https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.2.2.html
https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.2.3.html
"""
anon_id: str
hidden: bool
birth_date: datetime = None
[docs] def __repr__(self):
return super().__repr__()
[docs] @classmethod
def from_dataset(cls, ds: Dataset):
"""
Create an instance of the entity from a DICOM dataset
"""
return cls(
_id=None,
_metadata=None,
anon_id=ds.PatientID,
birth_date=DicomEntity.optional(
ds, "PatientBirthDate", parse_DA_as_datetime, None
),
hidden=False,
)
[docs] def __eq__(self, other: "Patient") -> bool:
return self.anon_id == other.anon_id
[docs]@dataclass
class RadiologicalStudy(DicomEntity):
"""Aggregation of multiple dicom instances by the combination of patient and date."""
study_uid: List[str] # (0020,0010) LO StudyInstanceUID
date: datetime # (0008,0020) DA StudyDate + (0008,0030) TM StudyTime
series_count: int # (0020,0011) IS SeriesNumber, Overriden after grouping because
# the tags are not reliable.
patient_id: ObjectId = None
# SOPClassUID, the meaning can be found in pydicom._uid_dict.UID_dictionaty
# Tags that we want to keep even if the images in the same patient+date disagree.
sop_class: List[str] = None
modality: List[Modality] = None # (0008,0060) CS Modality
description: str = None # (0008,1030) LO StudyDescription
accession_number: List[
str
] = None # Used to link the study with other administrative registers.
patient_age: int = None # in years (0010,1010) AS PatientAge
patient_weight: float = None # (0010,1030) PatientWeight
patient_height: float = None # (0010,1020) PatientSize
patient_pregnancy_status: PregnancyStatus = None # (0010,21C0)
manufacturer: List[str] = None
manufacturer_model_name: List[str] = None
detector_id: List[str] = None
detector_type: List[str] = None
device_serial_number: List[str] = None
software_versions: List[str] = None
date_of_last_detector_calibration: List[str] = None
breast_implant_present: List[str] = None
[docs] def __repr__(self):
return super().__repr__()
[docs] @classmethod
def from_dataset(cls, ds: Dataset):
"""Create an instance of the entity from a DICOM dataset"""
try:
modality = [cls.optional(ds, "Modality", Modality, None)]
except ValueError as e:
logging.error(f"{e}")
modality = None
return cls(
_id=None,
_metadata=None,
study_uid=cls.optional(ds, "StudyInstanceUID", default=None),
modality=modality,
sop_class=[cls.optional(ds, "SOPClassUID", str, None)],
date=parse_DA_TM_as_datetime(
cls.optional(ds, "StudyDate", str, "00000000"),
cls.optional(ds, "StudyTime", str, "000000"),
),
description=cls.optional(ds, "StudyDescription"),
accession_number=cls.optional(ds, "AccessionNumber", str, None),
patient_age=cls.optional(ds, "PatientAge", parse_AS_as_int, None),
patient_weight=cls.optional(ds, "PatientWeight", float, None),
patient_height=cls.optional(ds, "PatientSize", float, None),
patient_pregnancy_status=(
cls.optional(ds, "PatientPregnancyStatus", PregnancyStatus, None)
),
dicom_tags=DatasetMod.tags_to_keywords(ds.to_json_dict()),
# The ones below all get overriden while grouping
series_count=cls.optional(ds, "SeriesNumber", int, None),
manufacturer=cls.optional(ds, "Manufacturer", str, None),
manufacturer_model_name=cls.optional(
ds, "ManufacturerModelName", str, None
),
detector_id=cls.optional(ds, "DetectorID", str, None),
detector_type=cls.optional(ds, "DetectorType", str, None),
device_serial_number=cls.optional(ds, "DeviceSerialNumber", str, None),
software_versions=cls.optional(ds, "SoftwareVersions", None, None),
date_of_last_detector_calibration=cls.optional(
ds, "DateOfLastDetectorCalibration", parse_DA_as_datetime, None
),
breast_implant_present=cls.optional(ds, "BreastImplantPresent", str, None),
)
[docs] @classmethod
def from_dict(cls, d: dict):
"""Convert a mongodb dict into a RadiologicalStudy object"""
return cls(**d)
[docs] def __eq__(self, other: "RadiologicalStudy") -> bool:
return self.anon_study_id == other.anon_study_id
[docs]@dataclass
class RadiologicalSeries(DicomEntity):
study_id: ObjectId
date: datetime = None # (0008,0021) DA SeriesDate + (0008,0031) TM SeriesTime
series_uid: str = None # (0020,000E) UI SeriesInstanceUID
description: str = None # 0008103E
# Overrided by the count in the ImageMetadata collection
image_count: int = None # (0020,0013) IS InstanceNumber.
image_shape: Tuple[int, int] = None # If the dicom_tags of all images agree.
protocol_name: str = None # (0018,1030) LO ProtocolName,
# other possible tags 00180018, 0008103E, 00181081, 00181250, 00180024, 00181030
[docs] def __repr__(self):
return super().__repr__()
[docs] @classmethod
def from_dataset(cls, ds: Dataset):
"""
Create an instance of the entity from a DICOM dataset
"""
return cls(
_id=None,
_metadata=None,
series_uid=cls.optional(ds, "SeriesInstanceUID", str, None),
date=parse_DA_TM_as_datetime(
cls.optional(ds, "SeriesDate", str, "00000000"),
cls.optional(ds, "SeriesTime", str, "000000"),
),
study_id=None,
image_count=cls.optional(ds, "InstanceNumber", int, None),
protocol_name=cls.optional(ds, "ProtocolName", str, None),
description=cls.optional(ds, "SeriesDescription", str, None),
image_shape=(
cls.optional(ds, "Rows", int, None),
cls.optional(ds, "Columns", int, None),
),
dicom_tags=DatasetMod.tags_to_keywords(ds.to_json_dict()),
)
[docs] def __eq__(self, other: "RadiologicalSeries") -> bool:
return self.series_uid == other.series_uid
[docs]@dataclass
class Annotation(Entity):
"""
Abstract class for any annotation that is stored in cobra_db. Should be subclassed
"""
annotation_type: List[
Literal["text", "semantic_segmentation", "binary", "categorical"]
]
annotator_email: str
date: datetime # annotation date
possible_labels: List[
str
] # List of all possible labels example [mass, non-mass, ...]
file_format: str # Name of the extension of the file. jpeg, png, npz, etc.
aggregation_level: Literal["patient", "study", "series", "image"]
# For example the SOPInstanceUID of the image, StudyInstanceID of the study, etc.
referenced_entity_uid: str
patient_anon_id: str
study_date: datetime
file_source: FileSource # file/folder of the annotation
project_name: str
[docs]@dataclass
class Annotator(Entity):
email: str
specialization: str
first_name: str
last_name: str