Skip to content

Commit 0c839c3

Browse files
authored
Fix range reads in respository-s3 (opensearch-project#9516)
The `readBlob(String, long, long)` method in the S3 repository has been broken since the upgrade to AWS SDK v2. The cause is that the SDK v2 returns the content range length details in a string formatting per the [RFC 9110][1] spec. For example: ``` bytes 0-100/200 ``` However, the code was attempting to parse it as: ``` bytes=0-100 ``` The fix here is to not parse this string at all and instead use `GetObjectResponse#contentLength`. Note that the incorrect format matches how a range is specified in a _request_ per the [byte ranges][2] section of the RFC and that is likely the source of the confusion. We lack any dedicated integration testing of this method so the bug was not caught by any tests. Additionally, the range read is only used by the searchable snapshot feature currently and we have no automated integration testing with the different repository implementations. One other complicating factor is that due to a fallback path that returns `Long.MAX_VALUE - 1` when the range is failed to be parsed, the bug will only manifest as a long overflow error when requesting past the first block and therefore wasn't caught with very simple manual testing. [1]: https://www.rfc-editor.org/rfc/rfc9110.html#name-content-range [2]: https://www.rfc-editor.org/rfc/rfc9110.html#name-byte-ranges Signed-off-by: Andrew Ross <andrross@amazon.com>
1 parent 89ccda9 commit 0c839c3

File tree

6 files changed

+37
-47
lines changed

6 files changed

+37
-47
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
161161
### Fixed
162162
- Fix flaky ResourceAwareTasksTests.testBasicTaskResourceTracking test ([#8993](https://github.com/opensearch-project/OpenSearch/pull/8993))
163163
- Fix memory leak when using Zstd Dictionary ([#9403](https://github.com/opensearch-project/OpenSearch/pull/9403))
164+
- Fix range reads in respository-s3 ([9512](https://github.com/opensearch-project/OpenSearch/issues/9512))
164165

165166
### Security
166167

plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsBlobStoreRepositoryTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,8 @@ protected Settings repositorySettings() {
6666
protected Collection<Class<? extends Plugin>> nodePlugins() {
6767
return Collections.singletonList(HdfsPlugin.class);
6868
}
69+
70+
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9513")
71+
@Override
72+
public void testReadRange() {}
6973
}

plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RetryingInputStream.java

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
import org.apache.logging.log4j.LogManager;
4141
import org.apache.logging.log4j.Logger;
4242
import org.apache.logging.log4j.message.ParameterizedMessage;
43-
import org.opensearch.common.collect.Tuple;
4443
import org.opensearch.common.util.io.IOUtils;
4544
import org.opensearch.repositories.s3.utils.HttpRangeUtils;
4645

@@ -120,7 +119,7 @@ private void openStream() throws IOException {
120119
);
121120
this.currentStreamLastOffset = Math.addExact(
122121
Math.addExact(start, currentOffset),
123-
getStreamLength(getObjectResponseInputStream.response())
122+
getObjectResponseInputStream.response().contentLength()
124123
);
125124
this.currentStream = getObjectResponseInputStream;
126125
this.isStreamAborted.set(false);
@@ -134,29 +133,6 @@ private void openStream() throws IOException {
134133
}
135134
}
136135

137-
private long getStreamLength(final GetObjectResponse getObjectResponse) {
138-
try {
139-
// Returns the content range of the object if response contains the Content-Range header.
140-
if (getObjectResponse.contentRange() != null) {
141-
final Tuple<Long, Long> s3ResponseRange = HttpRangeUtils.fromHttpRangeHeader(getObjectResponse.contentRange());
142-
assert s3ResponseRange.v2() >= s3ResponseRange.v1() : s3ResponseRange.v2() + " vs " + s3ResponseRange.v1();
143-
assert s3ResponseRange.v1() == start + currentOffset : "Content-Range start value ["
144-
+ s3ResponseRange.v1()
145-
+ "] exceeds start ["
146-
+ start
147-
+ "] + current offset ["
148-
+ currentOffset
149-
+ ']';
150-
assert s3ResponseRange.v2() == end : "Content-Range end value [" + s3ResponseRange.v2() + "] exceeds end [" + end + ']';
151-
return s3ResponseRange.v2() - s3ResponseRange.v1() + 1L;
152-
}
153-
return getObjectResponse.contentLength();
154-
} catch (Exception e) {
155-
assert false : e;
156-
return Long.MAX_VALUE - 1L; // assume a large stream so that the underlying stream is aborted on closing, unless eof is reached
157-
}
158-
}
159-
160136
@Override
161137
public int read() throws IOException {
162138
ensureOpen();

plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/utils/HttpRangeUtils.java

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,14 @@
88

99
package org.opensearch.repositories.s3.utils;
1010

11-
import software.amazon.awssdk.core.exception.SdkException;
12-
13-
import org.opensearch.common.collect.Tuple;
14-
15-
import java.util.regex.Matcher;
16-
import java.util.regex.Pattern;
17-
18-
public class HttpRangeUtils {
19-
20-
private static final Pattern RANGE_PATTERN = Pattern.compile("^bytes=([0-9]+)-([0-9]+)$");
21-
22-
public static Tuple<Long, Long> fromHttpRangeHeader(String headerValue) {
23-
Matcher matcher = RANGE_PATTERN.matcher(headerValue);
24-
if (!matcher.find()) {
25-
throw SdkException.create("Regex match for Content-Range header {" + headerValue + "} failed", new RuntimeException());
26-
}
27-
return new Tuple<>(Long.parseLong(matcher.group(1)), Long.parseLong(matcher.group(2)));
28-
}
29-
11+
public final class HttpRangeUtils {
12+
13+
/**
14+
* Provides a byte range string per <a href="https://www.rfc-editor.org/rfc/rfc9110.html#name-byte-ranges">RFC 9110</a>
15+
* @param start start position (inclusive)
16+
* @param end end position (inclusive)
17+
* @return A 'bytes=start-end' string
18+
*/
3019
public static String toHttpRangeHeader(long start, long end) {
3120
return "bytes=" + start + "-" + end;
3221
}

plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RetryingInputStreamTests.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
3939

4040
import org.opensearch.common.io.Streams;
41-
import org.opensearch.repositories.s3.utils.HttpRangeUtils;
4241
import org.opensearch.test.OpenSearchTestCase;
4342

4443
import java.io.ByteArrayInputStream;
@@ -104,11 +103,11 @@ public void testRangeInputStreamIsAborted() throws IOException {
104103
}
105104

106105
private S3RetryingInputStream createInputStream(final byte[] data, final Long start, final Long length) throws IOException {
107-
long end = Math.addExact(start, length - 1);
106+
final long end = Math.addExact(start, length - 1);
108107
final S3Client client = mock(S3Client.class);
109108
when(client.getObject(any(GetObjectRequest.class))).thenReturn(
110109
new ResponseInputStream<>(
111-
GetObjectResponse.builder().contentLength(length).contentRange(HttpRangeUtils.toHttpRangeHeader(start, end)).build(),
110+
GetObjectResponse.builder().contentLength(length).build(),
112111
new ByteArrayInputStream(data, Math.toIntExact(start), Math.toIntExact(length))
113112
)
114113
);

test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchBlobStoreRepositoryIntegTestCase.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,27 @@ public void testWriteRead() throws IOException {
165165
}
166166
}
167167

168+
public void testReadRange() throws IOException {
169+
try (BlobStore store = newBlobStore()) {
170+
final BlobContainer container = store.blobContainer(new BlobPath());
171+
final byte[] data = randomBytes(4096);
172+
173+
// Pick a subrange starting somewhere between position 100 and 1000
174+
// and ending somewhere between 100 bytes past that position and
175+
// 100 bytes before the end
176+
final int startOffset = randomIntBetween(100, 1000);
177+
final int endOffset = randomIntBetween(startOffset + 100, data.length - 100);
178+
final byte[] subrangeData = Arrays.copyOfRange(data, startOffset, endOffset);
179+
180+
writeBlob(container, "foobar", new BytesArray(data), randomBoolean());
181+
try (InputStream stream = container.readBlob("foobar", startOffset, subrangeData.length)) {
182+
final byte[] actual = stream.readAllBytes();
183+
assertArrayEquals(subrangeData, actual);
184+
}
185+
container.delete();
186+
}
187+
}
188+
168189
public void testList() throws IOException {
169190
try (BlobStore store = newBlobStore()) {
170191
final BlobContainer container = store.blobContainer(new BlobPath());

0 commit comments

Comments
 (0)