Allow dicom files to work with process_image (#171)

ieee8023 · web-flow · commit 02d92fa03392 · 2025-06-09T01:00:16.000Z
* allow dicom files to work with process_image
diff --git a/scripts/process_image.py b/scripts/process_image.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python
 # coding: utf-8
 
-import os,sys
-sys.path.insert(0,"..")
+import os, sys
+
+sys.path.insert(0, "..")
 from glob import glob
 import matplotlib.pyplot as plt
 import numpy as np
@@ -17,33 +18,22 @@
 import torchxrayvision as xrv
 
 parser = argparse.ArgumentParser()
-parser.add_argument('-f', type=str, default="", help='')
-parser.add_argument('img_path', type=str)
-parser.add_argument('-weights', type=str,default="densenet121-res224-all")
-parser.add_argument('-feats', default=False, help='', action='store_true')
-parser.add_argument('-cuda', default=False, help='', action='store_true')
-parser.add_argument('-resize', default=False, help='', action='store_true')
+parser.add_argument("-f", type=str, default="", help="")
+parser.add_argument("img_path", type=str)
+parser.add_argument("-weights", type=str, default="densenet121-res224-all")
+parser.add_argument("-feats", default=False, help="", action="store_true")
+parser.add_argument("-cuda", default=False, help="", action="store_true")
+parser.add_argument("-resize", default=False, help="", action="store_true")
 
 cfg = parser.parse_args()
 
-
-img = skimage.io.imread(cfg.img_path)
-img = xrv.datasets.normalize(img, 255)  
-
-# Check that images are 2D arrays
-if len(img.shape) > 2:
-    img = img[:, :, 0]
-if len(img.shape) < 2:
-    print("error, dimension lower than 2 for image")
-
-# Add color channel
-img = img[None, :, :]
-
+img = xrv.utils.load_image(cfg.img_path)
 
 # the models will resize the input to the correct size so this is optional.
 if cfg.resize:
-    transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),
-                                                xrv.datasets.XRayResizer(224)])
+    transform = torchvision.transforms.Compose(
+        [xrv.datasets.XRayCenterCrop(), xrv.datasets.XRayResizer(224)]
+    )
 else:
     transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop()])
 
@@ -58,19 +48,19 @@
     if cfg.cuda:
         img = img.cuda()
         model = model.cuda()
-        
+
     if cfg.feats:
         feats = model.features(img)
         feats = F.relu(feats, inplace=True)
         feats = F.adaptive_avg_pool2d(feats, (1, 1))
         output["feats"] = list(feats.cpu().detach().numpy().reshape(-1))
 
     preds = model(img).cpu()
-    output["preds"] = dict(zip(xrv.datasets.default_pathologies,preds[0].detach().numpy()))
-    
+    output["preds"] = dict(
+        zip(xrv.datasets.default_pathologies, preds[0].detach().numpy())
+    )
+
 if cfg.feats:
     print(output)
 else:
     pprint.pprint(output)
-    
-   
diff --git a/torchxrayvision/utils.py b/torchxrayvision/utils.py
@@ -8,16 +8,18 @@
 from os import PathLike
 from numpy import ndarray
 import warnings
-from tqdm.autonotebook import tqdm
+from tqdm.auto import tqdm
 
 
 def get_cache_dir():
     return os.path.expanduser(os.path.join("~", ".torchxrayvision", "models_data/"))
 
+
 def in_notebook():
     try:
         from IPython import get_ipython
-        if 'IPKernelApp' not in get_ipython().config:  # pragma: no cover
+
+        if "IPKernelApp" not in get_ipython().config:  # pragma: no cover
             return False
     except ImportError:
         return False
@@ -28,31 +30,37 @@ def in_notebook():
 
 # from here https://sumit-ghosh.com/articles/python-download-progress-bar/
 def download(url: str, filename: str):
-    with open(filename, 'wb') as f:
+    with open(filename, "wb") as f:
         response = requests.get(url, stream=True)
-        total = response.headers.get('content-length')
+        total = response.headers.get("content-length")
 
         if total is None:
             f.write(response.content)
         else:
             downloaded = 0
             total = int(total)
-            for data in response.iter_content(chunk_size=max(int(total / 1000), 1024 * 1024)):
+            for data in response.iter_content(
+                chunk_size=max(int(total / 1000), 1024 * 1024)
+            ):
                 downloaded += len(data)
                 f.write(data)
                 done = int(50 * downloaded / total)
-                sys.stdout.write('\r[{}{}]'.format('█' * done, '.' * (50 - done)))
+                sys.stdout.write("\r[{}{}]".format("█" * done, "." * (50 - done)))
                 sys.stdout.flush()
-    sys.stdout.write('\n')
+    sys.stdout.write("\n")
 
 
 def normalize(img, maxval, reshape=False):
     """Scales images to be roughly [-1024 1024]."""
 
     if img.max() > maxval:
-        raise Exception("max image value ({}) higher than expected bound ({}).".format(img.max(), maxval))
+        raise ValueError(
+            "max image value ({}) higher than expected bound ({}).".format(
+                img.max(), maxval
+            )
+        )
 
-    img = (2 * (img.astype(np.float32) / maxval) - 1.) * 1024
+    img = (2 * (img.astype(np.float32) / maxval) - 1.0) * 1024
 
     if reshape:
         # Check that images are 2D arrays
@@ -70,6 +78,13 @@ def normalize(img, maxval, reshape=False):
 def load_image(fname: str):
     """Load an image from a file and normalize it between -1024 and 1024. Assumes 8-bits per pixel."""
 
+    with open(fname, "rb") as f:
+        # Read the first 132 bytes (128 preamble + 4 for "DICM")
+        header = f.read(132)
+        # Check if the file is long enough and has "DICM" at position 128
+        if len(header) >= 132 and header[128:132] == b"DICM":
+            return read_xray_dcm(fname)[None, ...]
+
     img = skimage.io.imread(fname)
     img = normalize(img, 255)
 
@@ -85,8 +100,10 @@ def load_image(fname: str):
     return img
 
 
-def read_xray_dcm(path: PathLike, voi_lut: bool = False, fix_monochrome: bool = True) -> ndarray:
-    """read a dicom-like file and convert to numpy array 
+def read_xray_dcm(
+    path: PathLike, voi_lut: bool = False, fix_monochrome: bool = True
+) -> ndarray:
+    """read a dicom-like file and convert to numpy array
 
     Args:
         path (PathLike): path to the dicom file
@@ -99,35 +116,43 @@ def read_xray_dcm(path: PathLike, voi_lut: bool = False, fix_monochrome: bool =
     try:
         import pydicom
     except ImportError:
-        raise Exception("Missing Package Pydicom. Try installing it by running `pip install pydicom`.")
+        raise ImportError(
+            "Missing Package Pydicom. Try installing it by running `pip install pydicom`."
+        )
 
     # get the pixel array
     ds = pydicom.dcmread(path, force=True)
 
     # we have not tested RGB, YBR_FULL, or YBR_FULL_422 yet.
-    if ds.PhotometricInterpretation not in ['MONOCHROME1', 'MONOCHROME2']:
-        raise NotImplementedError(f'PhotometricInterpretation `{ds.PhotometricInterpretation}` is not yet supported.')
+    if ds.PhotometricInterpretation not in ["MONOCHROME1", "MONOCHROME2"]:
+        raise NotImplementedError(
+            f"PhotometricInterpretation `{ds.PhotometricInterpretation}` is not yet supported."
+        )
     # get the max possible pixel value from DCM header
-    max_possible_pixel_val = (2**ds.BitsStored - 1)
+    max_possible_pixel_val = 2**ds.BitsStored - 1
 
     data = ds.pixel_array
-    
+
     # LUT for human friendly view
     if voi_lut:
         data = pydicom.pixel_data_handlers.util.apply_voi_lut(data, ds, index=0)
 
     # `MONOCHROME1` have an inverted view; Bones are black; background is white
     # https://web.archive.org/web/20150920230923/http://www.mccauslandcenter.sc.edu/mricro/dicom/index.html
     if fix_monochrome and ds.PhotometricInterpretation == "MONOCHROME1":
-        warnings.warn(f"Coverting MONOCHROME1 to MONOCHROME2 interpretation for file: {path}. Can be avoided by setting `fix_monochrome=False`")
+        warnings.warn(
+            f"Converting MONOCHROME1 to MONOCHROME2 interpretation for file: {path}. Can be avoided by setting `fix_monochrome=False`"
+        )
         data = max_possible_pixel_val - data
 
     # normalize data to [-1024, 1024]
     data = normalize(data, max_possible_pixel_val)
     return data
 
 
-def infer(model: torch.nn.Module, dataset: torch.utils.data.Dataset, threads=4, device='cpu'):
+def infer(
+    model: torch.nn.Module, dataset: torch.utils.data.Dataset, threads=4, device="cpu"
+):
 
     dl = torch.utils.data.DataLoader(
         dataset,
@@ -148,37 +173,50 @@ def infer(model: torch.nn.Module, dataset: torch.utils.data.Dataset, threads=4,
 
 warning_log = {}
 
+
 def fix_resolution(x, resolution: int, model):
     """Check resolution of input and resize to match requested."""
 
     if len(x.shape) == 3:
         # Extend to be 4D
-        x = x[None,...]
+        x = x[None, ...]
 
     if x.shape[2] != x.shape[3]:
-        raise Exception(f"Height and width of the image must be the same. Input: {x.shape[2]} != {x.shape[3]}. Perform a center crop first.")
-    
-    if (x.shape[2] != resolution) | (x.shape[3] != resolution):
+        raise Exception(
+            f"Height and width of the image must be the same. Input: {x.shape[2]} != {x.shape[3]}. Perform a center crop first."
+        )
+
+    if (x.shape[2] != resolution) or (x.shape[3] != resolution):
         if not hash(model) in warning_log:
-            print("Warning: Input size ({}x{}) is not the native resolution ({}x{}) for this model. A resize will be performed but this could impact performance.".format(x.shape[2], x.shape[3], resolution, resolution))
+            print(
+                "Warning: Input size ({}x{}) is not the native resolution ({}x{}) for this model. A resize will be performed but this could impact performance.".format(
+                    x.shape[2], x.shape[3], resolution, resolution
+                )
+            )
             warning_log[hash(model)] = True
-        return torch.nn.functional.interpolate(x, size=(resolution, resolution), mode='bilinear', antialias=True)
+        return torch.nn.functional.interpolate(
+            x, size=(resolution, resolution), mode="bilinear", antialias=True
+        )
     return x
 
 
 def warn_normalization(x):
-    """Check normalization of input and warn if possibly wrong. When 
-    processing an image that may likely not have the correct 
-    normalization we can issue a warning. But running min and max on 
+    """Check normalization of input and warn if possibly wrong. When
+    processing an image that may likely not have the correct
+    normalization we can issue a warning. But running min and max on
     every image/batch is costly so we only do it on the first image/batch.
     """
 
     # Only run this check on the first image so we don't hurt performance.
     if not "norm_check" in warning_log:
         x_min = x.min()
         x_max = x.max()
-        if torch.logical_or(-255 < x_min, x_max < 255) or torch.logical_or(x_min < -1025, 1025 < x_max):
-            print(f'Warning: Input image does not appear to be normalized correctly. The input image has the range [{x_min:.2f},{x_max:.2f}] which doesn\'t seem to be in the [-1024,1024] range. This warning may be wrong though. Only the first image is tested and we are only using a heuristic in an attempt to save a user from using the wrong normalization.')
+        if torch.logical_or(-255 < x_min, x_max < 255) or torch.logical_or(
+            x_min < -1025, 1025 < x_max
+        ):
+            print(
+                f"Warning: Input image does not appear to be normalized correctly. The input image has the range [{x_min:.2f},{x_max:.2f}] which doesn't seem to be in the [-1024,1024] range. This warning may be wrong though. Only the first image is tested and we are only using a heuristic in an attempt to save a user from using the wrong normalization."
+            )
             warning_log["norm_correct"] = False
         else:
             warning_log["norm_correct"] = True