Skip to content

Commit 1d24721

Browse files
feat: no compression for tar archive
1 parent 1d6e59c commit 1d24721

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

audio_data_pytorch/datasets/audio_web_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,12 @@ def str_to_tags(self, str: str) -> List[str]:
102102

103103
async def preprocess(self):
104104
urls, path = self.urls, self.root
105-
tarfile_name = os.path.join(path, f"{self.name}.tar.gz")
105+
tarfile_name = os.path.join(path, f"{self.name}.tar")
106106
waveform_id = 0
107107

108108
async with Downloader(urls, path=path) as files:
109109
async with Decompressor(files, path=path) as folders:
110-
with tarfile.open(tarfile_name, "w:gz") as archive:
110+
with tarfile.open(tarfile_name, "w") as archive:
111111
for folder in tqdm(folders):
112112
for wav in tqdm(glob.glob(folder + "/**/*.wav")):
113113
waveform, rate = torchaudio.load(wav)

audio_data_pytorch/datasets/clotho_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def data_path(self) -> str:
4545

4646
@property
4747
def tar_file_name(self) -> str:
48-
return os.path.join(self.data_path, f"clotho_{self.split}.tar.gz")
48+
return os.path.join(self.data_path, f"clotho_{self.split}.tar")
4949

5050
async def preprocess(self):
5151
urls, path = self.urls, self.data_path
@@ -58,7 +58,7 @@ async def preprocess(self):
5858
captions = pd.read_csv(caption_csv_file)
5959
length = len(captions.index)
6060

61-
with tarfile.open(self.tar_file_name, "w:gz") as archive:
61+
with tarfile.open(self.tar_file_name, "w") as archive:
6262
for i, caption in tqdm(captions.iterrows(), total=length):
6363
wav_file_name = caption.file_name
6464
wav_path = os.path.join(folders[0], self.split, wav_file_name)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
setup(
44
name="audio-data-pytorch",
55
packages=find_packages(exclude=[]),
6-
version="0.0.15",
6+
version="0.0.16",
77
license="MIT",
88
description="Audio Data - PyTorch",
99
long_description_content_type="text/markdown",

0 commit comments

Comments
 (0)