Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,13 @@ private boolean ensureCapacity(long minimumNumberOfBytesRequired) {
}
long shortfall = minimumNumberOfBytesRequired - refillableState.capacity;
if (shortfall > 0) {
int newSize = (int) Math.min(Math.max(refillableState.capacity * 2, nextPowerOfTwo((int) (refillableState.capacity + shortfall))), maximumFreeSpace);
// Grow to the next power of two beyond the current capacity rather than allocating the full declared
// length up front. This prevents a small malicious payload from triggering a disproportionately large
// allocation based on an attacker-declared VarUInt length: the new size depends only on the current
// capacity, never on the requested length. If the value is legitimately large, refill() will continue
// to grow the buffer (by doubling) as actual data is consumed from the stream. Because that growth
// starts from this power-of-two capacity, doubling keeps the buffer a power of two.
int newSize = (int) Math.min(nextPowerOfTwo((int) (refillableState.capacity + 1)), maximumFreeSpace);
byte[] newBuffer = new byte[newSize];
moveBytesToStartOfBuffer(newBuffer, startOffset);
refillableState.capacity = newSize;
Expand Down Expand Up @@ -672,11 +678,32 @@ private void shiftIndicesLeft(int shiftAmount) {
*/
private long refill(long minimumNumberOfBytesRequired) {
int numberOfBytesFilled = -1;
long shortfall;
long shortfall = minimumNumberOfBytesRequired - availableAt(offset);
// Sometimes an InputStream implementation will return fewer than the number of bytes requested even
// if the stream is not at EOF. If this happens and there is still a shortfall, keep requesting bytes
// until either the shortfall is filled or EOF is reached.
do {
// If the buffer is full but more bytes are still required, grow it (by doubling) to make room before
// reading more. The buffer is only grown once it has filled with data actually read from the stream,
// so allocation stays proportional to the data present in the stream rather than to an
// attacker-declared length. Doubling here (rather than calling nextPowerOfTwo() again) keeps this hot
// path cheap; ensureCapacity() always grows first and rounds the capacity up to a power of two, so
// doubling from that base keeps the buffer a power of two. Because ensureCapacity() has also already
// moved buffered bytes to the start of the buffer (offset == 0), growing only requires copying the
// existing bytes into a larger array; no parser indices need to be shifted.
if (freeSpaceAt(limit) == 0 && (minimumNumberOfBytesRequired - availableAt(offset)) > 0) {
if (refillableState.capacity >= refillableState.maximumBufferSize) {
// Cannot grow further; the value exceeds the maximum buffer size.
refillableState.isSkippingCurrentValue = true;
break;
}
int newSize = (int) Math.min(Math.max(refillableState.capacity * 2L, refillableState.capacity + 1L), refillableState.maximumBufferSize);
byte[] newBuffer = new byte[newSize];
System.arraycopy(buffer, 0, newBuffer, 0, (int) limit);
refillableState.capacity = newSize;
buffer = newBuffer;
byteBuffer = ByteBuffer.wrap(buffer, (int) offset, (int) refillableState.capacity);
}
try {
numberOfBytesFilled = refillableState.inputStream.read(buffer, (int) limit, (int) freeSpaceAt(limit));
} catch (EOFException e) {
Expand Down
78 changes: 69 additions & 9 deletions src/main/java/com/amazon/ion/system/IonReaderBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ public abstract class IonReaderBuilder
private boolean isIncrementalReadingEnabled = false;
private IonBufferConfiguration bufferConfiguration = IonBufferConfiguration.DEFAULT;
private List<InputStreamInterceptor> streamInterceptors = null;
private boolean gzipDecompressionEnabled = true;

protected IonReaderBuilder()
{
Expand All @@ -112,6 +113,7 @@ protected IonReaderBuilder(IonReaderBuilder that)
this.isIncrementalReadingEnabled = that.isIncrementalReadingEnabled;
this.bufferConfiguration = that.bufferConfiguration;
this.streamInterceptors = that.streamInterceptors == null ? null : new ArrayList<>(that.streamInterceptors);
this.gzipDecompressionEnabled = that.gzipDecompressionEnabled;
}

/**
Expand Down Expand Up @@ -323,21 +325,24 @@ public IonBufferConfiguration getBufferConfiguration() {
/**
* Adds an {@link InputStreamInterceptor} to the end of the list that the builder will attempt
* to apply to a stream before creating {@link IonReader} instances over that stream.
* {@link GzipStreamInterceptor} is always consulted first, and need not be added. The first
* {@link GzipStreamInterceptor} is normally consulted first, and need not be added. The first
* interceptor in the list that matches the stream will be used; if any chaining of interceptors
* is required, it is up to the caller to provide a custom interceptor implementation to
* achieve this.
* <p>
* Users may also or instead register implementations as service providers on the classpath.
* See {@link ServiceLoader} for details about how to do this.
* <p>
* The list of stream interceptors available to the reader always begins with
* The list of stream interceptors available to the reader normally begins with
* {@link GzipStreamInterceptor} and is followed by:
* <ol>
* <li>any stream interceptors detected on the classpath using
* {@link ServiceLoader#load(Class)}, then</li>
* <li>any stream interceptor(s) added by calling this method.</li>
* </ol>
* If GZIP auto-decompression has been disabled via {@link #withGzipDecompressionEnabled(boolean)},
* {@link GzipStreamInterceptor} is omitted from the list and any {@link GzipStreamInterceptor}
* added by calling this method is filtered out.
*
* @param streamInterceptor the stream interceptor to add.
*
Expand Down Expand Up @@ -378,25 +383,78 @@ private static List<InputStreamInterceptor> detectStreamInterceptorsOnClasspath(
}

/**
* Gets the {@link InputStreamInterceptor} instances available to this builder. The returned list will always begin
* with the default stream interceptor, which detects GZIP. Any stream interceptor(s) detected on the classpath
* by {@link ServiceLoader#load(Class)} will immediately follow. Any stream interceptor(s) manually added using
* {@link #addInputStreamInterceptor(InputStreamInterceptor)} will occur at the end of the list.
* Gets the {@link InputStreamInterceptor} instances available to this builder. Unless GZIP
* auto-decompression has been disabled via {@link #withGzipDecompressionEnabled(boolean)}, the returned
* list will begin with the default stream interceptor, which detects GZIP. Any stream interceptor(s)
* detected on the classpath by {@link ServiceLoader#load(Class)} will immediately follow. Any stream
* interceptor(s) manually added using {@link #addInputStreamInterceptor(InputStreamInterceptor)} will occur
* at the end of the list. If GZIP auto-decompression has been disabled, {@link GzipStreamInterceptor} is
* omitted from the returned list.
* @return an unmodifiable view of the stream interceptors currently configured.
*/
public List<InputStreamInterceptor> getInputStreamInterceptors() {
List<InputStreamInterceptor> interceptors =
streamInterceptors == null ? DETECTED_STREAM_INTERCEPTORS : streamInterceptors;
if (!gzipDecompressionEnabled) {
// Filter out GzipStreamInterceptor so GZIP-compressed data is not auto-decompressed.
List<InputStreamInterceptor> filtered = new ArrayList<>();
for (InputStreamInterceptor interceptor : interceptors) {
if (!(interceptor instanceof GzipStreamInterceptor)) {
filtered.add(interceptor);
}
}
return Collections.unmodifiableList(filtered);
}
if (streamInterceptors == null) {
return DETECTED_STREAM_INTERCEPTORS;
}
return Collections.unmodifiableList(streamInterceptors);
}

/**
* Declares whether GZIP auto-decompression is enabled when building an {@link IonReader},
* returning a new mutable builder if the current one is immutable.
*
* When enabled (the default), GZIP-compressed Ion data is automatically detected and decompressed.
* When disabled, GZIP-compressed data is not auto-decompressed.
*
* @param enabled true to enable GZIP auto-decompression (default), false to disable.
* @return this builder instance, if mutable; otherwise a mutable copy of this builder.
*/
public IonReaderBuilder withGzipDecompressionEnabled(boolean enabled) {
IonReaderBuilder b = mutable();
b.gzipDecompressionEnabled = enabled;
return b;
}

/**
* Sets whether GZIP auto-decompression is enabled.
*
* @param enabled true to enable GZIP auto-decompression (default), false to disable.
*
* @see #withGzipDecompressionEnabled(boolean)
*
* @throws UnsupportedOperationException if this builder is immutable.
*/
public void setGzipDecompressionEnabled(boolean enabled) {
mutationCheck();
this.gzipDecompressionEnabled = enabled;
}

/**
* @return true if GZIP auto-decompression is enabled (the default).
*/
public boolean isGzipDecompressionEnabled() {
return gzipDecompressionEnabled;
}

/**
* Based on the builder's configuration properties, creates a new IonReader
* instance over the given block of Ion data, detecting whether it's text or
* binary data.
* <p>
* This method will auto-detect and uncompress GZIPped Ion data.
* This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been
* disabled via {@link #withGzipDecompressionEnabled(boolean)}.
*
* @param ionData the source of the Ion data, which may be either Ion binary
* data or UTF-8 Ion text. The reader retains a reference to the array, so
Expand All @@ -417,7 +475,8 @@ public IonReader build(byte[] ionData)
* instance over the given block of Ion data, detecting whether it's text or
* binary data.
* <p>
* This method will auto-detect and uncompress GZIPped Ion data.
* This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been
* disabled via {@link #withGzipDecompressionEnabled(boolean)}.
*
* @param ionData the source of the Ion data, which is used only within the
* range of bytes starting at {@code offset} for {@code len} bytes.
Expand All @@ -437,7 +496,8 @@ public IonReader build(byte[] ionData)
* instance over the given stream of Ion data, detecting whether it's text or
* binary data.
* <p>
* This method will auto-detect and uncompress GZIPped Ion data.
* This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been
* disabled via {@link #withGzipDecompressionEnabled(boolean)}.
* <p>
* Because this library performs its own buffering, it's recommended that
* users avoid adding additional buffering to the given stream.
Expand Down
129 changes: 129 additions & 0 deletions src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
import com.amazon.ion.IonException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;

import java.io.ByteArrayInputStream;
import java.util.function.Consumer;
import java.util.stream.Stream;

import static com.amazon.ion.BitUtils.bytes;
import static com.amazon.ion.IonCursor.Event.NEEDS_DATA;
Expand All @@ -30,6 +33,7 @@
import static com.amazon.ion.impl.IonCursorTestUtilities.startContainer;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.lessThanOrEqualTo;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand Down Expand Up @@ -429,6 +433,131 @@ public void expectValueLargerThanMaxArraySizeToFailCleanly() {
cursor.close();
}

/**
* Creates a cursor that reads the given data from an InputStream (i.e. in refillable/streaming mode) using a
* buffer configuration with an 8-byte initial buffer and an unbounded maximum buffer size.
*/
private static IonCursorBinary newStreamingCursor(int[] data) {
IonBufferConfiguration config = IonBufferConfiguration.Builder.standard()
.withInitialBufferSize(8)
.build();
return new IonCursorBinary(config, new ByteArrayInputStream(bytes(data)), null, 0, 0);
}

// A legitimate string value with 100 content bytes, far larger than the 8-byte initial buffer. After the IVM is
// consumed, buffered bytes are shifted to the start of the buffer, so the content begins immediately after the
// value's header bytes: index 2 for the top-level string (type ID + 1-byte VarUInt length).
private static final int[] LARGE_STRING = new int[]{
0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM
0x8E, // String with VarUInt length
0x80 | 100, // VarUInt length 100 with stop bit
};

// The same value wrapped in a single annotation. The content begins at index 6 (wrapper type ID, wrapper length,
// annotations length, annotation SID, wrapped type ID, wrapped length).
private static final int[] LARGE_ANNOTATED_STRING = new int[]{
0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM
0xEE, // Annotation wrapper with VarUInt length
0x80 | 104, // VarUInt wrapper length 104 with stop bit
0x81, // VarUInt annotations length 1 with stop bit
0x84, // Annotation SID 4
0x8E, // Wrapped string with VarUInt length
0x80 | 100, // VarUInt length 100 with stop bit
};

private static int[] withContent(int[] header, int contentLength) {
int[] content = new int[contentLength];
for (int i = 0; i < contentLength; i++) {
content[i] = 'a';
}
int[] data = new int[header.length + contentLength];
System.arraycopy(header, 0, data, 0, header.length);
System.arraycopy(content, 0, data, header.length, contentLength);
return data;
}

private static Stream<Arguments> largeValues() {
return Stream.of(
// data, expected buffer index at which the content begins after the IVM is shifted out.
Arguments.of("unwrapped", withContent(LARGE_STRING, 100), 2),
Arguments.of("annotation-wrapped", withContent(LARGE_ANNOTATED_STRING, 100), 6)
);
}

/**
* Verifies that a legitimate value larger than the initial buffer (and larger than a single doubling of it)
* is read correctly. This exercises the incremental buffer growth in refill(): the buffer must double
* multiple times as real data is consumed from the stream.
*/
@ParameterizedTest(name = "{0}")
@MethodSource("largeValues")
public void largeValueReadFromStreamGrowsBufferIncrementally(String name, int[] data, int contentStartIndex) {
IonCursorBinary cursor = newStreamingCursor(data);
assertSequence(
cursor,
fillScalar(contentStartIndex, contentStartIndex + 100),
endStream()
);
// The buffer grew purely by doubling from a power-of-two start: 8 -> 16 -> 32 -> 64 -> 128, just large
// enough to hold the value.
assertEquals(128, cursor.buffer.length);
cursor.close();
}

// A top-level string whose header declares 1,000,000 content bytes but is followed by only 3. The declared
// length is well below the maximum buffer size, so it is not rejected as oversized.
private static final int[] LENGTH_BOMB_STRING = new int[]{
0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM
0x8E, // String with VarUInt length
0x3D, 0x04, 0xC0, // VarUInt length 1,000,000 (well below the max buffer size)
'a', 'b', 'c' // Only a few actual content bytes follow.
};

// The same length bomb declared inside an annotation wrapper. The wrapper length and the wrapped value length
// are internally consistent so the structural length check passes and the allocation path is reached.
private static final int[] LENGTH_BOMB_ANNOTATED_STRING = new int[]{
0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM
0xEE, // Annotation wrapper with VarUInt length
0x3D, 0x04, 0xC6, // VarUInt wrapper length 1,000,006 (well below the max buffer size)
0x81, // VarUInt annotations length 1 with stop bit
0x84, // Annotation SID 4
0x8E, // Wrapped string with VarUInt length
0x3D, 0x04, 0xC0, // VarUInt wrapped length 1,000,000 (consistent with the wrapper length)
'a', 'b', 'c' // Only a few actual content bytes follow.
};

private static Stream<Arguments> lengthBombs() {
return Stream.of(
Arguments.of("unwrapped", LENGTH_BOMB_STRING),
Arguments.of("annotation-wrapped", LENGTH_BOMB_ANNOTATED_STRING)
);
}

/**
* Verifies that a value declaring a length far larger than the data actually present in the stream does not
* cause a disproportionate allocation. The declared length (1,000,000) is well below the maximum buffer size,
* so it is not rejected as oversized; instead the buffer grows incrementally as data is consumed. Because the
* stream is exhausted after only a few bytes, the cursor cleanly reports NEEDS_DATA while the buffer remains
* tiny. (Note: the non-continuable IonReader wrapper will then throw an exception for an incomplete value;
* the continuable IonReader wrapper will wait for more data, but will not allocate more buffer space.)
* Before the fix, the buffer would have been allocated to nextPowerOfTwo(1,000,000) up front.
*/
@ParameterizedTest(name = "{0}")
@MethodSource("lengthBombs")
public void lengthBombFromStreamDoesNotOverAllocateAndReportsNeedsData(String name, int[] data) {
IonCursorBinary cursor = newStreamingCursor(data);
// Positioning on the value succeeds; the declared length is not yet validated against available data.
assertEquals(START_SCALAR, cursor.nextValue());
// Filling cannot complete because the stream is exhausted long before the declared length is reached. The
// cursor reports NEEDS_DATA rather than allocating (or attempting to allocate) the full declared length.
assertEquals(NEEDS_DATA, cursor.fillValue());
// The 1,000,000-byte declared length must not have driven allocation. Only a few bytes of real data were
// present, so the 8-byte buffer should have grown by doubling just past the available data, not toward the
// declared length.
assertThat(cursor.buffer.length, lessThanOrEqualTo(32));
cursor.close();
}

@ParameterizedTest(name = "constructFromBytes={0}")
@ValueSource(booleans = {true, false})
public void expectUseAfterCloseToHaveNoEffect(boolean constructFromBytes) {
Expand Down
Loading
Loading