Skip to content

Commit b7ebd21

Browse files
authored
Merge pull request #5 from Australian-Imaging-Service/github-download
added retrieve from github function
2 parents 7d914c0 + 83f0dd9 commit b7ebd21

File tree

5 files changed

+149
-4
lines changed

5 files changed

+149
-4
lines changed

medimages4tests/mri/neuro/bold.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pathlib import Path
2+
from medimages4tests import base_cache_dir
3+
from medimages4tests.utils import retrieve_from_openneuro, OpenneuroSpec
4+
5+
6+
cache_dir = base_cache_dir / "mri" / "neuro" / "bold"
7+
8+
9+
SAMPLES = {
10+
"ds002014-01": OpenneuroSpec(
11+
dataset="ds002014",
12+
tag="1.0.1",
13+
path="sub-01/func/sub-01_task-languageproduction_run-01_bold",
14+
)
15+
}
16+
17+
18+
def get_image(out_dir: Path = None, sample: str = "ds002014-01"):
19+
if out_dir is None:
20+
out_dir = cache_dir / sample
21+
return retrieve_from_openneuro(SAMPLES[sample], out_dir)

medimages4tests/mri/neuro/dwi.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from pathlib import Path
2+
from medimages4tests import base_cache_dir
3+
from medimages4tests.utils import retrieve_from_openneuro, OpenneuroSpec
4+
5+
6+
cache_dir = base_cache_dir / "mri" / "neuro" / "t1w"
7+
8+
9+
SAMPLES = {
10+
"ds004024-CON031": OpenneuroSpec(
11+
dataset="ds004024",
12+
tag="1.0.1",
13+
path="sub-CON031/ses-mri/dwi/sub-CON031_ses-mri_dwi",
14+
)
15+
}
16+
17+
18+
def get_image(out_dir: Path = None, sample: str = "ds004024-CON031"):
19+
if out_dir is None:
20+
out_dir = cache_dir / sample
21+
return retrieve_from_openneuro(
22+
SAMPLES[sample], out_dir, suffixes=(".nii.gz", ".json", ".bvec", ".bval")
23+
)

medimages4tests/mri/neuro/t1w.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from pathlib import Path
12
from medimages4tests import base_cache_dir
23
from medimages4tests.utils import retrieve_from_openneuro, OpenneuroSpec
34

@@ -10,9 +11,26 @@
1011
dataset="ds004130",
1112
tag="1.0.0",
1213
path="sub-ON01016/anat/sub-ON01016_acq-fspgr_run-01_T1w",
13-
)
14+
),
15+
"ds002014-01": OpenneuroSpec(
16+
dataset="ds002014",
17+
tag="1.0.1",
18+
path="sub-01/anat/sub-01_T1w",
19+
),
20+
"ds001743-01": OpenneuroSpec(
21+
dataset="ds001743",
22+
tag="1.0.1",
23+
path="sub-01/anat/sub-01_T1w",
24+
),
25+
"ds004024-CON031": OpenneuroSpec(
26+
dataset="ds004024",
27+
tag="1.0.1",
28+
path="sub-CON031/ses-mri/dwi/sub-CON031_ses-mri_T1w",
29+
),
1430
}
1531

1632

17-
def get_image(sample="ds004130-ON01016"):
18-
return retrieve_from_openneuro(SAMPLES[sample], cache_dir / sample)
33+
def get_image(out_dir: Path = None, sample: str = "ds004130-ON01016"):
34+
if out_dir is None:
35+
out_dir = cache_dir / sample
36+
return retrieve_from_openneuro(SAMPLES[sample], out_dir)

medimages4tests/utils.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from tempfile import mkdtemp
2+
import requests
3+
import tarfile
24
import shutil
5+
import os
6+
import typing as ty
37
from pathlib import Path
48
import openneuro
59
import attrs
10+
from . import base_cache_dir
611

712

813
@attrs.define
914
class OpenneuroSpec:
10-
1115
dataset: str
1216
tag: str
1317
path: Path = attrs.field(converter=Path)
@@ -16,6 +20,7 @@ class OpenneuroSpec:
1620
def retrieve_from_openneuro(
1721
sample, cache_path, suffixes=(".nii.gz", ".json"), force_download=False
1822
):
23+
"""Retrieves an image from the OpenNeuro repository"""
1924
if not cache_path.parent.exists():
2025
cache_path.parent.mkdir(parents=True)
2126
out_path = cache_path.with_suffix(suffixes[0])
@@ -32,3 +37,66 @@ def retrieve_from_openneuro(
3237
(tmpdir / sample.path).with_suffix(ext), cache_path.with_suffix(ext)
3338
)
3439
return out_path
40+
41+
42+
def retrieve_from_github(
43+
org: str,
44+
repo: str,
45+
path: str,
46+
tag: str = "main",
47+
compressed: bool = True,
48+
cache_dir: ty.Union[Path, str, None] = None,
49+
) -> Path:
50+
"""Retrieves a sample file from a path within a GitHub repository
51+
52+
Parameters
53+
----------
54+
org: str
55+
the Github organisation
56+
repo : str
57+
the name of the git repository within the Github organisation
58+
path : str
59+
the path to the file relative to the repository
60+
tag : str, optional
61+
the git tag (version) to use, "main" by default
62+
compressed : bool, optional
63+
whether the file within the git repo has been archived with tar/gzip and
64+
needs to be uncompressed before use, True by default
65+
cache_dir : Path | str, optional
66+
the directory in which to download and cache the requested file, by default uses
67+
"~/.medimages/cache/github"
68+
"""
69+
if cache_dir is None:
70+
cache_dir = base_cache_dir / "github"
71+
else:
72+
cache_dir = Path(cache_dir).expanduser()
73+
cache_path = (cache_dir / repo / tag).joinpath(*path.split("/"))
74+
if cache_path.exists():
75+
return cache_path
76+
if not cache_path.parent.exists():
77+
cache_path.parent.mkdir(parents=True)
78+
url = f"https://raw.githubusercontent.com/{repo}/{tag}/{path}"
79+
if compressed:
80+
url += ".tar.gz"
81+
response = requests.get(url)
82+
if response.status_code != "200":
83+
raise ValueError(f"Did not find a file to download at '{url}'")
84+
if compressed:
85+
tmp_dir = Path(mkdtemp())
86+
download_path = tmp_dir / url.split("/")[-1]
87+
else:
88+
download_path = cache_path
89+
with open(download_path, "wb") as f:
90+
f.write(response.content)
91+
if compressed:
92+
extract_dir = tmp_dir / "extracted"
93+
extract_dir.mkdir()
94+
with tarfile.open(download_path) as tfile:
95+
tfile.extractall(path=extract_dir)
96+
dir_contents = list(extract_dir.iterdir())
97+
if len(dir_contents) > 1:
98+
raise ValueError(
99+
f"Contents or tar file at {url} contain more than one file/sub-dir ({dir_contents})"
100+
)
101+
os.rename(dir_contents[0], cache_path)
102+
return cache_path

tests/test_github.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import pytest
2+
import nibabel as nb
3+
import numpy as np
4+
from medimages4tests.utils import retrieve_from_github
5+
6+
7+
@pytest.mark.xfail
8+
def test_github_retrieve():
9+
10+
nifti_fpath = retrieve_from_github(
11+
org="nipype", repo="pydra-fsl-testdata", path="melodic_ica"
12+
)
13+
nifti = nifti = nb.load(nifti_fpath)
14+
15+
assert np.array_equal(nifti.header["dim"][:4], [3, 204, 256, 256])

0 commit comments

Comments
 (0)