1
1
from tempfile import mkdtemp
2
+ import requests
3
+ import tarfile
2
4
import shutil
5
+ import os
6
+ import typing as ty
3
7
from pathlib import Path
4
8
import openneuro
5
9
import attrs
10
+ from . import base_cache_dir
6
11
7
12
8
13
@attrs .define
9
14
class OpenneuroSpec :
10
-
11
15
dataset : str
12
16
tag : str
13
17
path : Path = attrs .field (converter = Path )
@@ -16,6 +20,7 @@ class OpenneuroSpec:
16
20
def retrieve_from_openneuro (
17
21
sample , cache_path , suffixes = (".nii.gz" , ".json" ), force_download = False
18
22
):
23
+ """Retrieves an image from the OpenNeuro repository"""
19
24
if not cache_path .parent .exists ():
20
25
cache_path .parent .mkdir (parents = True )
21
26
out_path = cache_path .with_suffix (suffixes [0 ])
@@ -32,3 +37,66 @@ def retrieve_from_openneuro(
32
37
(tmpdir / sample .path ).with_suffix (ext ), cache_path .with_suffix (ext )
33
38
)
34
39
return out_path
40
+
41
+
42
+ def retrieve_from_github (
43
+ org : str ,
44
+ repo : str ,
45
+ path : str ,
46
+ tag : str = "main" ,
47
+ compressed : bool = True ,
48
+ cache_dir : ty .Union [Path , str , None ] = None ,
49
+ ) -> Path :
50
+ """Retrieves a sample file from a path within a GitHub repository
51
+
52
+ Parameters
53
+ ----------
54
+ org: str
55
+ the Github organisation
56
+ repo : str
57
+ the name of the git repository within the Github organisation
58
+ path : str
59
+ the path to the file relative to the repository
60
+ tag : str, optional
61
+ the git tag (version) to use, "main" by default
62
+ compressed : bool, optional
63
+ whether the file within the git repo has been archived with tar/gzip and
64
+ needs to be uncompressed before use, True by default
65
+ cache_dir : Path | str, optional
66
+ the directory in which to download and cache the requested file, by default uses
67
+ "~/.medimages/cache/github"
68
+ """
69
+ if cache_dir is None :
70
+ cache_dir = base_cache_dir / "github"
71
+ else :
72
+ cache_dir = Path (cache_dir ).expanduser ()
73
+ cache_path = (cache_dir / repo / tag ).joinpath (* path .split ("/" ))
74
+ if cache_path .exists ():
75
+ return cache_path
76
+ if not cache_path .parent .exists ():
77
+ cache_path .parent .mkdir (parents = True )
78
+ url = f"https://raw.githubusercontent.com/{ repo } /{ tag } /{ path } "
79
+ if compressed :
80
+ url += ".tar.gz"
81
+ response = requests .get (url )
82
+ if response .status_code != "200" :
83
+ raise ValueError (f"Did not find a file to download at '{ url } '" )
84
+ if compressed :
85
+ tmp_dir = Path (mkdtemp ())
86
+ download_path = tmp_dir / url .split ("/" )[- 1 ]
87
+ else :
88
+ download_path = cache_path
89
+ with open (download_path , "wb" ) as f :
90
+ f .write (response .content )
91
+ if compressed :
92
+ extract_dir = tmp_dir / "extracted"
93
+ extract_dir .mkdir ()
94
+ with tarfile .open (download_path ) as tfile :
95
+ tfile .extractall (path = extract_dir )
96
+ dir_contents = list (extract_dir .iterdir ())
97
+ if len (dir_contents ) > 1 :
98
+ raise ValueError (
99
+ f"Contents or tar file at { url } contain more than one file/sub-dir ({ dir_contents } )"
100
+ )
101
+ os .rename (dir_contents [0 ], cache_path )
102
+ return cache_path
0 commit comments