Skip to content

Commit fd2bcdf

Browse files
authored
chore: cache setup downloads (#13653)
We add an option to cache download artifacts during the setup to allow for faster local builds. The feature is behind the ``DD_SETUP_CACHE_DOWNLOADS`` flag. Artifacts are downloaded in `.download_cache` within the working directory. To force the setup script to re-download all or some artifacts, one can simply delete the content of this folder. ## Checklist - [ ] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent ef9646e commit fd2bcdf

File tree

2 files changed

+49
-15
lines changed

2 files changed

+49
-15
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,6 @@ tests/appsec/iast/fixtures/aspects/unpatched_callers.py
180180

181181
#MacOS files
182182
.DS_Store
183+
184+
# Setup download cache
185+
.download_cache/

setup.py

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def verify_checksum_from_file(sha256_filename, filename):
121121
expected_checksum, expected_filename = list(filter(None, open(sha256_filename, "r").read().strip().split(" ")))
122122
actual_checksum = hashlib.sha256(open(filename, "rb").read()).hexdigest()
123123
try:
124-
assert expected_filename.endswith(filename)
124+
assert expected_filename.endswith(Path(filename).name)
125125
assert expected_checksum == actual_checksum
126126
except AssertionError:
127127
print("Checksum verification error: Checksum and/or filename don't match:")
@@ -167,6 +167,9 @@ def is_64_bit_python():
167167

168168

169169
class LibraryDownload:
170+
CACHE_DIR = HERE / ".download_cache"
171+
USE_CACHE = os.getenv("DD_SETUP_CACHE_DOWNLOADS", "0").lower() in ("1", "yes", "on", "true")
172+
170173
name = None
171174
download_dir = None
172175
version = None
@@ -215,20 +218,34 @@ def download_artifacts(cls):
215218
archive_name,
216219
)
217220

218-
try:
219-
filename, http_response = urlretrieve(download_address, archive_name)
220-
except HTTPError as e:
221-
print("No archive found for dynamic library {}: {}".format(cls.name, archive_dir))
222-
raise e
223-
224-
# Verify checksum of downloaded file
225-
if cls.expected_checksums is None:
226-
sha256_address = download_address + ".sha256"
227-
sha256_filename, http_response = urlretrieve(sha256_address, archive_name + ".sha256")
228-
verify_checksum_from_file(sha256_filename, filename)
221+
download_dest = cls.CACHE_DIR / archive_name if cls.USE_CACHE else archive_name
222+
if cls.USE_CACHE and not cls.CACHE_DIR.exists():
223+
cls.CACHE_DIR.mkdir(parents=True)
224+
225+
if not (cls.USE_CACHE and download_dest.exists()):
226+
print(f"Downloading {archive_name} to {download_dest}")
227+
start_ns = time.time_ns()
228+
try:
229+
filename, _ = urlretrieve(download_address, str(download_dest))
230+
except HTTPError as e:
231+
print("No archive found for dynamic library {}: {}".format(cls.name, archive_dir))
232+
raise e
233+
234+
# Verify checksum of downloaded file
235+
if cls.expected_checksums is None:
236+
sha256_address = download_address + ".sha256"
237+
sha256_filename, _ = urlretrieve(sha256_address, str(download_dest) + ".sha256")
238+
verify_checksum_from_file(sha256_filename, str(download_dest))
239+
else:
240+
expected_checksum = cls.expected_checksums[CURRENT_OS][arch]
241+
verify_checksum_from_hash(expected_checksum, str(download_dest))
242+
243+
DebugMetadata.download_times[archive_name] = time.time_ns() - start_ns
244+
229245
else:
230-
expected_checksum = cls.expected_checksums[CURRENT_OS][arch]
231-
verify_checksum_from_hash(expected_checksum, filename)
246+
# If the file exists in the cache, we will use it
247+
filename = str(download_dest)
248+
print(f"Using cached {filename}")
232249

233250
# Open the tarfile first to get the files needed.
234251
# This could be solved with "r:gz" mode, that allows random access
@@ -248,7 +265,8 @@ def download_artifacts(cls):
248265
renamed_file = lib_dir / "lib{}{}".format(cls.name, suffix)
249266
original_file.rename(renamed_file)
250267

251-
Path(filename).unlink()
268+
if not cls.USE_CACHE:
269+
Path(filename).unlink()
252270

253271
@classmethod
254272
def run(cls):
@@ -443,6 +461,7 @@ class DebugMetadata:
443461
enabled = "_DD_DEBUG_EXT" in os.environ
444462
metadata_file = os.getenv("_DD_DEBUG_EXT_FILE", "debug_ext_metadata.txt")
445463
build_times = {}
464+
download_times = {}
446465

447466
@classmethod
448467
def dump_metadata(cls):
@@ -471,6 +490,18 @@ def dump_metadata(cls):
471490
ext_percent = (elapsed_ns / total_ns) * 100.0
472491
f.write(f"\t{ext.name}: {elapsed_s:0.2f}s ({ext_percent:0.2f}%)\n")
473492

493+
if cls.download_times:
494+
download_total_ns = sum(cls.download_times.values())
495+
download_total_s = download_total_ns / 1e9
496+
download_percent = (download_total_ns / total_ns) * 100.0
497+
498+
f.write("Artifact download times:\n")
499+
f.write(f"\tTotal: {download_total_s:0.2f}s ({download_percent:0.2f}%)\n")
500+
for n, elapsed_ns in sorted(cls.download_times.items(), key=lambda x: x[1], reverse=True):
501+
elapsed_s = elapsed_ns / 1e9
502+
ext_percent = (elapsed_ns / total_ns) * 100.0
503+
f.write(f"\t{n}: {elapsed_s:0.2f}s ({ext_percent:0.2f}%)\n")
504+
474505

475506
def debug_build_extension(fn):
476507
def wrapper(self, ext, *args, **kwargs):

0 commit comments

Comments
 (0)