Skip to content

Commit 74c9637

Browse files
authored
fix: check paths in an archive file before extracting (#366)
The paths in an archive file are checked for path traversal patterns before extraction. Also, Bandit v1.7.5 is producing false positives for request timeout arguments, which have been suppressed. Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
1 parent d5bea3c commit 74c9637

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

src/macaron/slsa_analyzer/checks/provenance_l3_check.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -239,21 +239,35 @@ def _extract_archive(self, file_path: str, temp_path: str) -> bool:
239239
bool
240240
Returns True if successful.
241241
"""
242+
243+
def _validate_path_traversal(path: str) -> bool:
244+
"""Check for path traversal attacks."""
245+
if path.startswith("/") or ".." in path:
246+
logger.debug("Found suspicious path in the archive file: %s.", path)
247+
return False
248+
try:
249+
# Check if there are any symbolic links.
250+
if os.path.realpath(path):
251+
return True
252+
except OSError as error:
253+
logger.debug("Failed to extract artifact from archive file: %s", error)
254+
return False
255+
return False
256+
242257
try:
243258
if zipfile.is_zipfile(file_path):
244259
with zipfile.ZipFile(file_path, "r") as zip_file:
245-
zip_file.extractall(temp_path)
260+
members = (path for path in zip_file.namelist() if _validate_path_traversal(path))
261+
zip_file.extractall(temp_path, members=members) # nosec B202:tarfile_unsafe_members
246262
return True
247263
elif tarfile.is_tarfile(file_path):
248264
with tarfile.open(file_path, mode="r:gz") as tar_file:
249-
tar_file.extractall(temp_path)
265+
members_tarinfo = (
266+
tarinfo for tarinfo in tar_file.getmembers() if _validate_path_traversal(tarinfo.name)
267+
)
268+
tar_file.extractall(temp_path, members=members_tarinfo) # nosec B202:tarfile_unsafe_members
250269
return True
251-
except (
252-
tarfile.TarError,
253-
zipfile.BadZipFile,
254-
zipfile.LargeZipFile,
255-
OSError,
256-
) as error:
270+
except (tarfile.TarError, zipfile.BadZipFile, zipfile.LargeZipFile, OSError, ValueError) as error:
257271
logger.info(error)
258272

259273
return False

src/macaron/util.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ def send_get_http(url: str, headers: dict) -> dict:
3636
The response's json data or an empty dict if there is an error.
3737
"""
3838
logger.debug("GET - %s", url)
39-
response = requests.get(url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10))
39+
response = requests.get(
40+
url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10)
41+
) # nosec B113:request_without_timeout
4042
while response.status_code != 200:
4143
logger.error(
4244
"Receiving error code %s from server. Message: %s.",
@@ -47,7 +49,9 @@ def send_get_http(url: str, headers: dict) -> dict:
4749
check_rate_limit(response)
4850
else:
4951
return {}
50-
response = requests.get(url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10))
52+
response = requests.get(
53+
url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10)
54+
) # nosec B113:request_without_timeout
5155

5256
return dict(response.json())
5357

@@ -70,7 +74,9 @@ def send_get_http_raw(url: str, headers: dict) -> Response | None:
7074
The response object or None if there is an error.
7175
"""
7276
logger.debug("GET - %s", url)
73-
response = requests.get(url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10))
77+
response = requests.get(
78+
url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10)
79+
) # nosec B113:request_without_timeout
7480
while response.status_code != 200:
7581
logger.error(
7682
"Receiving error code %s from server. Message: %s.",
@@ -81,7 +87,9 @@ def send_get_http_raw(url: str, headers: dict) -> Response | None:
8187
check_rate_limit(response)
8288
else:
8389
return None
84-
response = requests.get(url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10))
90+
response = requests.get(
91+
url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10)
92+
) # nosec B113:request_without_timeout
8593

8694
return response
8795

@@ -155,7 +163,9 @@ def download_github_build_log(url: str, headers: dict) -> str:
155163
The content of the downloaded build log or empty if error.
156164
"""
157165
logger.debug("Downloading content at link %s", url)
158-
response = requests.get(url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10))
166+
response = requests.get(
167+
url=url, headers=headers, timeout=defaults.getint("requests", "timeout", fallback=10)
168+
) # nosec B113:request_without_timeout
159169

160170
return response.content.decode("utf-8")
161171

0 commit comments

Comments
 (0)