Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion metaflow/plugins/datatools/s3/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
if TYPE_CHECKING:
import metaflow

# Compiled regex for normalizing consecutive slashes in S3 paths
# Matches two or more consecutive slashes not preceded by a colon (to preserve s3://)
_CONSECUTIVE_SLASHES_REGEX = re.compile(r"(?<!:)//+")


def _check_and_init_s3_deps():
try:
Expand Down Expand Up @@ -557,7 +561,9 @@ def __init__(
raise MetaflowS3URLException(
"s3root needs to be an S3 URL prefixed with s3://."
)
self._s3root = s3root.rstrip("/")
# Normalize the path by collapsing consecutive slashes
normalized = _CONSECUTIVE_SLASHES_REGEX.sub("/", s3root)
self._s3root = normalized.rstrip("/")
else:
# 3. use the client only with full URLs
self._s3root = None
Expand Down
5 changes: 4 additions & 1 deletion test/data/s3/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,10 +982,13 @@ def test_list_recursive_sibling_prefix_filtering(inject_failure_rate):
) as s3:
objects = s3.list_recursive()

# Use the normalized s3root from the S3 object for accurate path extraction
test_prefix_path = f"{s3_setup._s3root}/{test_prefix}/"

found_relative_paths = []
for obj in objects:
# Get path relative to our test prefix
relative_path = obj.url.replace(f"{base_s3root}/{test_prefix}/", "")
relative_path = obj.url.replace(test_prefix_path, "")
found_relative_paths.append(relative_path)

expected_under_log = ["log/test.txt"]
Expand Down
Loading