-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetch_dicom.py
More file actions
146 lines (123 loc) · 4.52 KB
/
fetch_dicom.py
File metadata and controls
146 lines (123 loc) · 4.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import json
import logging
from pathlib import Path
from typing import Any
import pydicom
logger = logging.getLogger(__name__)
DATA_DIR = Path("dicom_files")
INCLUDE_TAGS: frozenset[int] = frozenset({
# Patient Level
0x00100010, # Patient's Name
0x00100020, # Patient ID
0x00100040, # Patient's Sex
0x00101010, # Patient's Age
0x00102160, # Ethnic Group
0x00104000, # Patient Comments
0x001021B0, # Additional Patient History
# Study Level
0x0020000D, # Study Instance UID
0x00080020, # Study Date
0x00080030, # Study Time
0x00080050, # Accession Number
0x00081030, # Study Description
0x00081040, # Institutional Department Name
0x00080080, # Institution Name
0x00080060, # Modality
0x00080070, # Manufacturer
0x00081090, # Manufacturer's Model Name
# Series Level
0x0020000E, # Series Instance UID
0x00080021, # Series Date
0x00080031, # Series Time
0x00200011, # Series Number
0x0008103E, # Series Description
# Image (Instance) Level
0x00080018, # SOP Instance UID
0x00200013, # Instance Number
0x00080022, # Acquisition Date
0x00080032, # Acquisition Time
0x00200032, # Image Position (Patient)
0x00200037, # Image Orientation (Patient)
0x00280010, # Rows
0x00280011, # Columns
# Acquisition Parameters
0x00180010, # Contrast/Bolus Agent
0x00180015, # Body Part Examined
0x00180022, # Scan Options
0x00180050, # Slice Thickness
0x00180060, # KVP
0x00180090, # Data Collection Diameter
0x00181020, # Software Versions
0x00181030, # Protocol Name
0x00181040, # Contrast/Bolus Route
0x00181100, # Reconstruction Diameter
0x00181110, # Distance Source to Detector
0x00181111, # Distance Source to Patient
0x00181120, # Gantry/Detector Tilt
0x00181130, # Table Height
0x00181140, # Rotation Direction
0x00181150, # Exposure Time
0x00181151, # X-Ray Tube Current
0x00181152, # Exposure
0x00181160, # Filter Type
0x00181170, # Generator Power
0x00181190, # Focal Spot(s)
0x00181210, # Convolution Kernel
0x00185100, # Patient Position
0x00189305, # Revolution Time
0x00189306, # Single Collimation Width
0x00189307, # Total Collimation Width
0x00189309, # Table Speed
0x00189310, # Table Feed per Rotation
0x00189311, # Spiral Pitch Factor
0x00321030, # Reason for Study
0x00324000, # Study Comments
})
def element_to_dict(elem: pydicom.DataElement) -> dict[str, Any] | None:
"""Convert a single DataElement to a JSON-friendly dict. Expands sequences recursively."""
if elem.tag not in INCLUDE_TAGS:
return None
tag_str = f"({elem.tag.group:04X},{elem.tag.element:04X})"
if elem.VR == "SQ":
value = [dataset_to_list(item) for item in elem]
else:
val = elem.value
if isinstance(val, bytes):
try:
val = val.decode(errors="ignore")
except (UnicodeDecodeError, ValueError):
val = str(val)
elif not isinstance(val, (str, int, float, list, dict, type(None))):
val = str(val)
value = val
return {
"Tag": tag_str,
"VR": elem.VR,
"Name": elem.name,
"Value": value,
}
def dataset_to_list(ds: pydicom.Dataset) -> list[dict[str, Any]]:
"""Convert a Dataset to a list of DataElement dicts, skipping filtered/None entries."""
return [d for elem in ds if (d := element_to_dict(elem)) is not None]
def process_dicom_file(filepath: Path) -> dict[str, Any]:
try:
ds = pydicom.dcmread(filepath, stop_before_pixels=True, force=True)
return {
"FilePath": str(filepath),
"FileMeta": dataset_to_list(ds.file_meta),
"Dataset": dataset_to_list(ds),
}
except Exception as e:
logger.warning("Failed to read %s: %s", filepath, e)
return {"FilePath": str(filepath), "Error": str(e)}
def process_dicom_folder(folder_path: Path, output_json: Path) -> None:
all_dicom_data = []
for filepath in folder_path.rglob("*.dcm"):
logger.info("Processing: %s", filepath)
all_dicom_data.append(process_dicom_file(filepath))
with open(output_json, "w", encoding="utf-8") as f:
json.dump(all_dicom_data, f, indent=4, ensure_ascii=False)
logger.info("Metadata saved to %s", output_json)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
process_dicom_folder(DATA_DIR, Path("dicom_metadata.json"))