Skip to content

Commit 9f2cc02

Browse files
authored
UC Merced: redistribute split files on Hugging Face (#2433)
1 parent 2dbb039 commit 9f2cc02

File tree

2 files changed

+13
-40
lines changed

2 files changed

+13
-40
lines changed

tests/datasets/test_ucmerced.py

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,34 +21,11 @@ class TestUCMerced:
2121
def dataset(
2222
self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
2323
) -> UCMerced:
24-
md5 = 'a42ef8779469d196d8f2971ee135f030'
25-
monkeypatch.setattr(UCMerced, 'md5', md5)
26-
url = os.path.join('tests', 'data', 'ucmerced', 'UCMerced_LandUse.zip')
24+
url = os.path.join('tests', 'data', 'ucmerced') + os.sep
2725
monkeypatch.setattr(UCMerced, 'url', url)
28-
monkeypatch.setattr(
29-
UCMerced,
30-
'split_urls',
31-
{
32-
'train': os.path.join(
33-
'tests', 'data', 'ucmerced', 'uc_merced-train.txt'
34-
),
35-
'val': os.path.join('tests', 'data', 'ucmerced', 'uc_merced-val.txt'),
36-
'test': os.path.join('tests', 'data', 'ucmerced', 'uc_merced-test.txt'),
37-
},
38-
)
39-
monkeypatch.setattr(
40-
UCMerced,
41-
'split_md5s',
42-
{
43-
'train': 'a01fa9f13333bb176fc1bfe26ff4c711',
44-
'val': 'a01fa9f13333bb176fc1bfe26ff4c711',
45-
'test': 'a01fa9f13333bb176fc1bfe26ff4c711',
46-
},
47-
)
48-
root = tmp_path
4926
split = request.param
5027
transforms = nn.Identity()
51-
return UCMerced(root, split, transforms, download=True, checksum=True)
28+
return UCMerced(tmp_path, split, transforms, download=True)
5229

5330
def test_getitem(self, dataset: UCMerced) -> None:
5431
x = dataset[0]
@@ -65,14 +42,14 @@ def test_add(self, dataset: UCMerced) -> None:
6542
assert len(ds) == 8
6643

6744
def test_already_downloaded(self, dataset: UCMerced, tmp_path: Path) -> None:
68-
UCMerced(root=tmp_path, download=True)
45+
UCMerced(tmp_path)
6946

7047
def test_already_downloaded_not_extracted(
7148
self, dataset: UCMerced, tmp_path: Path
7249
) -> None:
7350
shutil.rmtree(dataset.root)
74-
shutil.copy(dataset.url, tmp_path)
75-
UCMerced(root=tmp_path, download=False)
51+
shutil.copy(dataset.url + dataset.filename, tmp_path)
52+
UCMerced(tmp_path)
7653

7754
def test_not_downloaded(self, tmp_path: Path) -> None:
7855
with pytest.raises(DatasetNotFoundError, match='Dataset not found'):

torchgeo/datasets/ucmerced.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,17 @@ class UCMerced(NonGeoClassificationDataset):
6666
* https://dl.acm.org/doi/10.1145/1869790.1869829
6767
"""
6868

69-
url = 'https://hf.co/datasets/torchgeo/ucmerced/resolve/d0af6e2eeea2322af86078068bd83337148a2149/UCMerced_LandUse.zip'
69+
url = 'https://hf.co/datasets/torchgeo/ucmerced/resolve/7c5ef3454d9b1cccfa7ccde0c01fc8f00a45909a/'
7070
filename = 'UCMerced_LandUse.zip'
7171
md5 = '5b7ec56793786b6dc8a908e8854ac0e4'
7272

7373
base_dir = os.path.join('UCMerced_LandUse', 'Images')
7474

7575
splits = ('train', 'val', 'test')
76-
split_urls: ClassVar[dict[str, str]] = {
77-
'train': 'https://storage.googleapis.com/remote_sensing_representations/uc_merced-train.txt',
78-
'val': 'https://storage.googleapis.com/remote_sensing_representations/uc_merced-val.txt',
79-
'test': 'https://storage.googleapis.com/remote_sensing_representations/uc_merced-test.txt',
76+
split_filenames: ClassVar[dict[str, str]] = {
77+
'train': 'uc_merced-train.txt',
78+
'val': 'uc_merced-val.txt',
79+
'test': 'uc_merced-test.txt',
8080
}
8181
split_md5s: ClassVar[dict[str, str]] = {
8282
'train': 'f2fb12eb2210cfb53f93f063a35ff374',
@@ -113,7 +113,7 @@ def __init__(
113113
self._verify()
114114

115115
valid_fns = set()
116-
with open(os.path.join(self.root, f'uc_merced-{split}.txt')) as f:
116+
with open(os.path.join(self.root, self.split_filenames[split])) as f:
117117
for fn in f:
118118
valid_fns.add(fn.strip())
119119

@@ -173,16 +173,12 @@ def _verify(self) -> None:
173173
def _download(self) -> None:
174174
"""Download the dataset."""
175175
download_url(
176-
self.url,
177-
self.root,
178-
filename=self.filename,
179-
md5=self.md5 if self.checksum else None,
176+
self.url + self.filename, self.root, md5=self.md5 if self.checksum else None
180177
)
181178
for split in self.splits:
182179
download_url(
183-
self.split_urls[split],
180+
self.url + self.split_filenames[split],
184181
self.root,
185-
filename=f'uc_merced-{split}.txt',
186182
md5=self.split_md5s[split] if self.checksum else None,
187183
)
188184

0 commit comments

Comments
 (0)