From 309d525e6b025f45c7501b0a549aaba9ebc8b548 Mon Sep 17 00:00:00 2001 From: Deirdre Wang Date: Thu, 18 Jun 2026 15:01:30 -0700 Subject: [PATCH 1/2] fix: Grow reader buffer incrementally and add GZIP opt-out --- .../com/amazon/ion/impl/IonCursorBinary.java | 26 ++- .../amazon/ion/system/IonReaderBuilder.java | 59 +++++++ .../amazon/ion/impl/IonCursorBinaryTest.java | 72 ++++++++ .../IonReaderBuilderGzipToggleTest.java | 158 ++++++++++++++++++ 4 files changed, 313 insertions(+), 2 deletions(-) create mode 100644 src/test/java/com/amazon/ion/system/IonReaderBuilderGzipToggleTest.java diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index b6ffde938..69818a498 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -517,7 +517,11 @@ private boolean ensureCapacity(long minimumNumberOfBytesRequired) { } long shortfall = minimumNumberOfBytesRequired - refillableState.capacity; if (shortfall > 0) { - int newSize = (int) Math.min(Math.max(refillableState.capacity * 2, nextPowerOfTwo((int) (refillableState.capacity + shortfall))), maximumFreeSpace); + // Grow by doubling rather than allocating the full declared length up front. This prevents a + // small malicious payload from triggering a disproportionately large allocation based on an + // attacker-declared VarUInt length. If the value is legitimately large, refill() will continue + // to grow the buffer as actual data is consumed from the stream. + int newSize = (int) Math.min(Math.max(refillableState.capacity * 2L, refillableState.capacity + 1L), maximumFreeSpace); byte[] newBuffer = new byte[newSize]; moveBytesToStartOfBuffer(newBuffer, startOffset); refillableState.capacity = newSize; @@ -672,11 +676,29 @@ private void shiftIndicesLeft(int shiftAmount) { */ private long refill(long minimumNumberOfBytesRequired) { int numberOfBytesFilled = -1; - long shortfall; + long shortfall = minimumNumberOfBytesRequired - availableAt(offset); // Sometimes an InputStream implementation will return fewer than the number of bytes requested even // if the stream is not at EOF. If this happens and there is still a shortfall, keep requesting bytes // until either the shortfall is filled or EOF is reached. do { + // If the buffer is full but more bytes are still required, grow it (by doubling) to make room. + // Growing here, after data has been read, ensures allocation is proportional to the data actually + // present in the stream rather than to an attacker-declared length. Because ensureCapacity() has + // already moved buffered bytes to the start of the buffer (offset == 0), growing only requires + // copying the existing bytes into a larger array; no parser indices need to be shifted. + if (freeSpaceAt(limit) == 0 && (minimumNumberOfBytesRequired - availableAt(offset)) > 0) { + if (refillableState.capacity >= refillableState.maximumBufferSize) { + // Cannot grow further; the value exceeds the maximum buffer size. + refillableState.isSkippingCurrentValue = true; + break; + } + int newSize = (int) Math.min(Math.max(refillableState.capacity * 2L, refillableState.capacity + 1L), refillableState.maximumBufferSize); + byte[] newBuffer = new byte[newSize]; + System.arraycopy(buffer, 0, newBuffer, 0, (int) limit); + refillableState.capacity = newSize; + buffer = newBuffer; + byteBuffer = ByteBuffer.wrap(buffer, (int) offset, (int) refillableState.capacity); + } try { numberOfBytesFilled = refillableState.inputStream.read(buffer, (int) limit, (int) freeSpaceAt(limit)); } catch (EOFException e) { diff --git a/src/main/java/com/amazon/ion/system/IonReaderBuilder.java b/src/main/java/com/amazon/ion/system/IonReaderBuilder.java index a3927705c..7ade6e615 100644 --- a/src/main/java/com/amazon/ion/system/IonReaderBuilder.java +++ b/src/main/java/com/amazon/ion/system/IonReaderBuilder.java @@ -101,6 +101,7 @@ public abstract class IonReaderBuilder private boolean isIncrementalReadingEnabled = false; private IonBufferConfiguration bufferConfiguration = IonBufferConfiguration.DEFAULT; private List streamInterceptors = null; + private boolean gzipDecompressionEnabled = true; protected IonReaderBuilder() { @@ -112,6 +113,7 @@ protected IonReaderBuilder(IonReaderBuilder that) this.isIncrementalReadingEnabled = that.isIncrementalReadingEnabled; this.bufferConfiguration = that.bufferConfiguration; this.streamInterceptors = that.streamInterceptors == null ? null : new ArrayList<>(that.streamInterceptors); + this.gzipDecompressionEnabled = that.gzipDecompressionEnabled; } /** @@ -385,12 +387,69 @@ private static List detectStreamInterceptorsOnClasspath( * @return an unmodifiable view of the stream interceptors currently configured. */ public List getInputStreamInterceptors() { + if (!gzipDecompressionEnabled) { + if (streamInterceptors == null) { + // Filter out GzipStreamInterceptor from the detected list + List filtered = new ArrayList<>(); + for (InputStreamInterceptor interceptor : DETECTED_STREAM_INTERCEPTORS) { + if (!(interceptor instanceof GzipStreamInterceptor)) { + filtered.add(interceptor); + } + } + return Collections.unmodifiableList(filtered); + } + // Filter out GzipStreamInterceptor from the custom list + List filtered = new ArrayList<>(); + for (InputStreamInterceptor interceptor : streamInterceptors) { + if (!(interceptor instanceof GzipStreamInterceptor)) { + filtered.add(interceptor); + } + } + return Collections.unmodifiableList(filtered); + } if (streamInterceptors == null) { return DETECTED_STREAM_INTERCEPTORS; } return Collections.unmodifiableList(streamInterceptors); } + /** + * Declares whether GZIP auto-decompression is enabled when building an {@link IonReader}, + * returning a new mutable builder if the current one is immutable. + * + * When enabled (the default), GZIP-compressed Ion data is automatically detected and decompressed. + * When disabled, GZIP-compressed data is not auto-decompressed. + * + * @param enabled true to enable GZIP auto-decompression (default), false to disable. + * @return this builder instance, if mutable; otherwise a mutable copy of this builder. + */ + public IonReaderBuilder withGzipDecompressionEnabled(boolean enabled) { + IonReaderBuilder b = mutable(); + b.gzipDecompressionEnabled = enabled; + return b; + } + + /** + * Sets whether GZIP auto-decompression is enabled. + * + * @param enabled true to enable GZIP auto-decompression (default), false to disable. + * + * @see #withGzipDecompressionEnabled(boolean) + * + * @throws UnsupportedOperationException if this builder is immutable. + */ + public void setGzipDecompressionEnabled(boolean enabled) { + mutationCheck(); + this.gzipDecompressionEnabled = enabled; + } + + /** + * @return true if GZIP auto-decompression is enabled (the default). + */ + public boolean isGzipDecompressionEnabled() { + return gzipDecompressionEnabled; + } + /** * Based on the builder's configuration properties, creates a new IonReader * instance over the given block of Ion data, detecting whether it's text or diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index 4708b49ba..da3948a58 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -429,6 +429,78 @@ public void expectValueLargerThanMaxArraySizeToFailCleanly() { cursor.close(); } + /** + * Builds binary Ion for a string value of the given length, with the bytes prefixed by the IVM. + * Uses the VarUInt-length string encoding (type 0x8E) so that lengths greater than 13 are supported. + */ + private static int[] stringValueWithLength(int contentLength) { + // Encode the content length as a VarUInt (single byte for lengths < 128). + if (contentLength >= 128) { + throw new IllegalArgumentException("This helper only supports content lengths < 128."); + } + int[] data = new int[4 + 1 + 1 + contentLength]; + data[0] = 0xE0; data[1] = 0x01; data[2] = 0x00; data[3] = 0xEA; // IVM + data[4] = 0x8E; // String with VarUInt length + data[5] = 0x80 | contentLength; // VarUInt length with stop bit + for (int i = 0; i < contentLength; i++) { + data[6 + i] = 'a'; + } + return data; + } + + /** + * Verifies that a legitimate value larger than the initial buffer (and larger than a single doubling of it) + * is read correctly. This exercises the incremental buffer growth in refill(): the buffer must double + * multiple times as real data is consumed from the stream. + */ + @Test + public void largeValueReadFromStreamGrowsBufferIncrementally() { + int contentLength = 100; // Far larger than the 8-byte initial buffer; requires several doublings. + int[] data = stringValueWithLength(contentLength); + ByteArrayInputStream in = new ByteArrayInputStream(bytes(data)); + IonBufferConfiguration config = IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(8) + .build(); + IonCursorBinary cursor = new IonCursorBinary(config, in, null, 0, 0); + // After the IVM is consumed, buffered bytes are shifted to the start of the buffer, so the content + // begins at index 2 (after the type ID and VarUInt length bytes) and ends at 2 + contentLength. + assertSequence( + cursor, + fillScalar(2, 2 + contentLength), + endStream() + ); + cursor.close(); + } + + /** + * Verifies that a value declaring a length far larger than the data actually present in the stream does not + * cause a disproportionate allocation. The buffer grows only in proportion to the data actually consumed. + * When a fixed-size stream is exhausted before the declared length is satisfied, the cursor fails cleanly + * with an IonException rather than allocating (or attempting to allocate) the full declared length. + */ + @Test + public void lengthBombFromStreamDoesNotOverAllocate() { + int[] data = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0x8E, // String with VarUInt length + 0x07, 0x7f, 0x7f, 0x7f, 0xf9, // VarUInt length ~2 GB + 'a', 'b', 'c' // Only a few actual content bytes follow. + }; + ByteArrayInputStream in = new ByteArrayInputStream(bytes(data)); + IonBufferConfiguration config = IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(8) + .build(); + IonCursorBinary cursor = new IonCursorBinary(config, in, null, 0, 0); + // Positioning on the value succeeds; the declared length is not yet validated against available data. + assertEquals(START_SCALAR, cursor.nextValue()); + // Filling the value cannot complete because the stream is exhausted long before the declared length is + // reached. The cursor must fail cleanly without over-allocating or hanging. + IonException ie = assertThrows(IonException.class, cursor::fillValue); + assertThat(ie.getMessage(), + containsString("An oversized value was found even though no maximum size was configured.")); + cursor.close(); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void expectUseAfterCloseToHaveNoEffect(boolean constructFromBytes) { diff --git a/src/test/java/com/amazon/ion/system/IonReaderBuilderGzipToggleTest.java b/src/test/java/com/amazon/ion/system/IonReaderBuilderGzipToggleTest.java new file mode 100644 index 000000000..f289a2597 --- /dev/null +++ b/src/test/java/com/amazon/ion/system/IonReaderBuilderGzipToggleTest.java @@ -0,0 +1,158 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.util.GzipStreamInterceptor; +import com.amazon.ion.util.InputStreamInterceptor; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for the GZIP decompression toggle API on {@link IonReaderBuilder}. + */ +class IonReaderBuilderGzipToggleTest { + + @Test + void defaultBuilderHasGzipEnabled() { + IonReaderBuilder builder = IonReaderBuilder.standard(); + assertTrue(builder.isGzipDecompressionEnabled()); + } + + @Test + void getInputStreamInterceptorsIncludesGzipWhenEnabled() { + IonReaderBuilder builder = IonReaderBuilder.standard(); + List interceptors = builder.getInputStreamInterceptors(); + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertTrue(hasGzip, "Expected GzipStreamInterceptor in interceptor list when GZIP is enabled"); + } + + @Test + void getInputStreamInterceptorsExcludesGzipWhenDisabled() { + IonReaderBuilder builder = IonReaderBuilder.standard() + .withGzipDecompressionEnabled(false); + List interceptors = builder.getInputStreamInterceptors(); + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertFalse(hasGzip, "Expected no GzipStreamInterceptor in interceptor list when GZIP is disabled"); + } + + @Test + void customInterceptorsPreservedWhenGzipDisabled() { + InputStreamInterceptor customInterceptor = new InputStreamInterceptor() { + @Override + public String formatName() { + return "custom"; + } + + @Override + public int numberOfBytesNeededToDetermineMatch() { + return 2; + } + + @Override + public boolean isMatch(byte[] candidate, int offset, int length) { + return false; + } + + @Override + public InputStream newInputStream(InputStream interceptedStream) throws IOException { + return interceptedStream; + } + }; + + IonReaderBuilder builder = IonReaderBuilder.standard() + .addInputStreamInterceptor(customInterceptor) + .withGzipDecompressionEnabled(false); + + List interceptors = builder.getInputStreamInterceptors(); + + // Custom interceptor should still be present + assertTrue(interceptors.contains(customInterceptor), + "Custom interceptor should be preserved when GZIP is disabled"); + + // GZIP interceptor should be excluded + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertFalse(hasGzip, "GzipStreamInterceptor should be excluded when GZIP is disabled"); + } + + @Test + void customInterceptorsPreservedWhenGzipEnabled() { + InputStreamInterceptor customInterceptor = new InputStreamInterceptor() { + @Override + public String formatName() { + return "custom"; + } + + @Override + public int numberOfBytesNeededToDetermineMatch() { + return 2; + } + + @Override + public boolean isMatch(byte[] candidate, int offset, int length) { + return false; + } + + @Override + public InputStream newInputStream(InputStream interceptedStream) throws IOException { + return interceptedStream; + } + }; + + IonReaderBuilder builder = IonReaderBuilder.standard() + .addInputStreamInterceptor(customInterceptor) + .withGzipDecompressionEnabled(true); + + List interceptors = builder.getInputStreamInterceptors(); + + // Custom interceptor should be present + assertTrue(interceptors.contains(customInterceptor), + "Custom interceptor should be preserved when GZIP is enabled"); + + // GZIP interceptor should also be present + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertTrue(hasGzip, "GzipStreamInterceptor should be present when GZIP is enabled"); + } + + @Test + void withGzipDecompressionEnabledOnImmutableBuilderReturnsMutableCopy() { + IonReaderBuilder immutableBuilder = IonReaderBuilder.standard().immutable(); + IonReaderBuilder result = immutableBuilder.withGzipDecompressionEnabled(false); + + // The result should be a different instance (mutable copy) + assertNotSame(immutableBuilder, result); + // The original should still have GZIP enabled + assertTrue(immutableBuilder.isGzipDecompressionEnabled()); + // The new builder should have GZIP disabled + assertFalse(result.isGzipDecompressionEnabled()); + } + + @Test + void copyConstructorPreservesGzipFlag() { + IonReaderBuilder original = IonReaderBuilder.standard() + .withGzipDecompressionEnabled(false); + IonReaderBuilder copy = original.copy(); + + assertFalse(copy.isGzipDecompressionEnabled(), + "Copy should preserve the gzipDecompressionEnabled flag"); + } + + @Test + void setGzipDecompressionEnabledOnMutableBuilder() { + IonReaderBuilder builder = IonReaderBuilder.standard(); + builder.setGzipDecompressionEnabled(false); + assertFalse(builder.isGzipDecompressionEnabled()); + } + + @Test + void setGzipDecompressionEnabledOnImmutableBuilderThrows() { + IonReaderBuilder immutableBuilder = IonReaderBuilder.standard().immutable(); + assertThrows(UnsupportedOperationException.class, () -> { + immutableBuilder.setGzipDecompressionEnabled(false); + }); + } +} From 1766f2c670e56df1373ad92178ec29212044017c Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Tue, 23 Jun 2026 18:49:03 -0700 Subject: [PATCH 2/2] Adds to PR #1153. --- .../com/amazon/ion/impl/IonCursorBinary.java | 25 +-- .../amazon/ion/system/IonReaderBuilder.java | 43 ++--- .../amazon/ion/impl/IonCursorBinaryTest.java | 147 ++++++++++++------ 3 files changed, 139 insertions(+), 76 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index 69818a498..8555c0342 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -517,11 +517,13 @@ private boolean ensureCapacity(long minimumNumberOfBytesRequired) { } long shortfall = minimumNumberOfBytesRequired - refillableState.capacity; if (shortfall > 0) { - // Grow by doubling rather than allocating the full declared length up front. This prevents a - // small malicious payload from triggering a disproportionately large allocation based on an - // attacker-declared VarUInt length. If the value is legitimately large, refill() will continue - // to grow the buffer as actual data is consumed from the stream. - int newSize = (int) Math.min(Math.max(refillableState.capacity * 2L, refillableState.capacity + 1L), maximumFreeSpace); + // Grow to the next power of two beyond the current capacity rather than allocating the full declared + // length up front. This prevents a small malicious payload from triggering a disproportionately large + // allocation based on an attacker-declared VarUInt length: the new size depends only on the current + // capacity, never on the requested length. If the value is legitimately large, refill() will continue + // to grow the buffer (by doubling) as actual data is consumed from the stream. Because that growth + // starts from this power-of-two capacity, doubling keeps the buffer a power of two. + int newSize = (int) Math.min(nextPowerOfTwo((int) (refillableState.capacity + 1)), maximumFreeSpace); byte[] newBuffer = new byte[newSize]; moveBytesToStartOfBuffer(newBuffer, startOffset); refillableState.capacity = newSize; @@ -681,11 +683,14 @@ private long refill(long minimumNumberOfBytesRequired) { // if the stream is not at EOF. If this happens and there is still a shortfall, keep requesting bytes // until either the shortfall is filled or EOF is reached. do { - // If the buffer is full but more bytes are still required, grow it (by doubling) to make room. - // Growing here, after data has been read, ensures allocation is proportional to the data actually - // present in the stream rather than to an attacker-declared length. Because ensureCapacity() has - // already moved buffered bytes to the start of the buffer (offset == 0), growing only requires - // copying the existing bytes into a larger array; no parser indices need to be shifted. + // If the buffer is full but more bytes are still required, grow it (by doubling) to make room before + // reading more. The buffer is only grown once it has filled with data actually read from the stream, + // so allocation stays proportional to the data present in the stream rather than to an + // attacker-declared length. Doubling here (rather than calling nextPowerOfTwo() again) keeps this hot + // path cheap; ensureCapacity() always grows first and rounds the capacity up to a power of two, so + // doubling from that base keeps the buffer a power of two. Because ensureCapacity() has also already + // moved buffered bytes to the start of the buffer (offset == 0), growing only requires copying the + // existing bytes into a larger array; no parser indices need to be shifted. if (freeSpaceAt(limit) == 0 && (minimumNumberOfBytesRequired - availableAt(offset)) > 0) { if (refillableState.capacity >= refillableState.maximumBufferSize) { // Cannot grow further; the value exceeds the maximum buffer size. diff --git a/src/main/java/com/amazon/ion/system/IonReaderBuilder.java b/src/main/java/com/amazon/ion/system/IonReaderBuilder.java index 7ade6e615..b6985a261 100644 --- a/src/main/java/com/amazon/ion/system/IonReaderBuilder.java +++ b/src/main/java/com/amazon/ion/system/IonReaderBuilder.java @@ -325,7 +325,7 @@ public IonBufferConfiguration getBufferConfiguration() { /** * Adds an {@link InputStreamInterceptor} to the end of the list that the builder will attempt * to apply to a stream before creating {@link IonReader} instances over that stream. - * {@link GzipStreamInterceptor} is always consulted first, and need not be added. The first + * {@link GzipStreamInterceptor} is normally consulted first, and need not be added. The first * interceptor in the list that matches the stream will be used; if any chaining of interceptors * is required, it is up to the caller to provide a custom interceptor implementation to * achieve this. @@ -333,13 +333,16 @@ public IonBufferConfiguration getBufferConfiguration() { * Users may also or instead register implementations as service providers on the classpath. * See {@link ServiceLoader} for details about how to do this. *

- * The list of stream interceptors available to the reader always begins with + * The list of stream interceptors available to the reader normally begins with * {@link GzipStreamInterceptor} and is followed by: *

    *
  1. any stream interceptors detected on the classpath using * {@link ServiceLoader#load(Class)}, then
  2. *
  3. any stream interceptor(s) added by calling this method.
  4. *
+ * If GZIP auto-decompression has been disabled via {@link #withGzipDecompressionEnabled(boolean)}, + * {@link GzipStreamInterceptor} is omitted from the list and any {@link GzipStreamInterceptor} + * added by calling this method is filtered out. * * @param streamInterceptor the stream interceptor to add. * @@ -380,27 +383,22 @@ private static List detectStreamInterceptorsOnClasspath( } /** - * Gets the {@link InputStreamInterceptor} instances available to this builder. The returned list will always begin - * with the default stream interceptor, which detects GZIP. Any stream interceptor(s) detected on the classpath - * by {@link ServiceLoader#load(Class)} will immediately follow. Any stream interceptor(s) manually added using - * {@link #addInputStreamInterceptor(InputStreamInterceptor)} will occur at the end of the list. + * Gets the {@link InputStreamInterceptor} instances available to this builder. Unless GZIP + * auto-decompression has been disabled via {@link #withGzipDecompressionEnabled(boolean)}, the returned + * list will begin with the default stream interceptor, which detects GZIP. Any stream interceptor(s) + * detected on the classpath by {@link ServiceLoader#load(Class)} will immediately follow. Any stream + * interceptor(s) manually added using {@link #addInputStreamInterceptor(InputStreamInterceptor)} will occur + * at the end of the list. If GZIP auto-decompression has been disabled, {@link GzipStreamInterceptor} is + * omitted from the returned list. * @return an unmodifiable view of the stream interceptors currently configured. */ public List getInputStreamInterceptors() { + List interceptors = + streamInterceptors == null ? DETECTED_STREAM_INTERCEPTORS : streamInterceptors; if (!gzipDecompressionEnabled) { - if (streamInterceptors == null) { - // Filter out GzipStreamInterceptor from the detected list - List filtered = new ArrayList<>(); - for (InputStreamInterceptor interceptor : DETECTED_STREAM_INTERCEPTORS) { - if (!(interceptor instanceof GzipStreamInterceptor)) { - filtered.add(interceptor); - } - } - return Collections.unmodifiableList(filtered); - } - // Filter out GzipStreamInterceptor from the custom list + // Filter out GzipStreamInterceptor so GZIP-compressed data is not auto-decompressed. List filtered = new ArrayList<>(); - for (InputStreamInterceptor interceptor : streamInterceptors) { + for (InputStreamInterceptor interceptor : interceptors) { if (!(interceptor instanceof GzipStreamInterceptor)) { filtered.add(interceptor); } @@ -455,7 +453,8 @@ public boolean isGzipDecompressionEnabled() { * instance over the given block of Ion data, detecting whether it's text or * binary data. *

- * This method will auto-detect and uncompress GZIPped Ion data. + * This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been + * disabled via {@link #withGzipDecompressionEnabled(boolean)}. * * @param ionData the source of the Ion data, which may be either Ion binary * data or UTF-8 Ion text. The reader retains a reference to the array, so @@ -476,7 +475,8 @@ public IonReader build(byte[] ionData) * instance over the given block of Ion data, detecting whether it's text or * binary data. *

- * This method will auto-detect and uncompress GZIPped Ion data. + * This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been + * disabled via {@link #withGzipDecompressionEnabled(boolean)}. * * @param ionData the source of the Ion data, which is used only within the * range of bytes starting at {@code offset} for {@code len} bytes. @@ -496,7 +496,8 @@ public IonReader build(byte[] ionData) * instance over the given stream of Ion data, detecting whether it's text or * binary data. *

- * This method will auto-detect and uncompress GZIPped Ion data. + * This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been + * disabled via {@link #withGzipDecompressionEnabled(boolean)}. *

* Because this library performs its own buffering, it's recommended that * users avoid adding additional buffering to the given stream. diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index da3948a58..946276ee4 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -7,10 +7,13 @@ import com.amazon.ion.IonException; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; import java.util.function.Consumer; +import java.util.stream.Stream; import static com.amazon.ion.BitUtils.bytes; import static com.amazon.ion.IonCursor.Event.NEEDS_DATA; @@ -30,6 +33,7 @@ import static com.amazon.ion.impl.IonCursorTestUtilities.startContainer; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -430,74 +434,127 @@ public void expectValueLargerThanMaxArraySizeToFailCleanly() { } /** - * Builds binary Ion for a string value of the given length, with the bytes prefixed by the IVM. - * Uses the VarUInt-length string encoding (type 0x8E) so that lengths greater than 13 are supported. + * Creates a cursor that reads the given data from an InputStream (i.e. in refillable/streaming mode) using a + * buffer configuration with an 8-byte initial buffer and an unbounded maximum buffer size. */ - private static int[] stringValueWithLength(int contentLength) { - // Encode the content length as a VarUInt (single byte for lengths < 128). - if (contentLength >= 128) { - throw new IllegalArgumentException("This helper only supports content lengths < 128."); - } - int[] data = new int[4 + 1 + 1 + contentLength]; - data[0] = 0xE0; data[1] = 0x01; data[2] = 0x00; data[3] = 0xEA; // IVM - data[4] = 0x8E; // String with VarUInt length - data[5] = 0x80 | contentLength; // VarUInt length with stop bit + private static IonCursorBinary newStreamingCursor(int[] data) { + IonBufferConfiguration config = IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(8) + .build(); + return new IonCursorBinary(config, new ByteArrayInputStream(bytes(data)), null, 0, 0); + } + + // A legitimate string value with 100 content bytes, far larger than the 8-byte initial buffer. After the IVM is + // consumed, buffered bytes are shifted to the start of the buffer, so the content begins immediately after the + // value's header bytes: index 2 for the top-level string (type ID + 1-byte VarUInt length). + private static final int[] LARGE_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0x8E, // String with VarUInt length + 0x80 | 100, // VarUInt length 100 with stop bit + }; + + // The same value wrapped in a single annotation. The content begins at index 6 (wrapper type ID, wrapper length, + // annotations length, annotation SID, wrapped type ID, wrapped length). + private static final int[] LARGE_ANNOTATED_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0xEE, // Annotation wrapper with VarUInt length + 0x80 | 104, // VarUInt wrapper length 104 with stop bit + 0x81, // VarUInt annotations length 1 with stop bit + 0x84, // Annotation SID 4 + 0x8E, // Wrapped string with VarUInt length + 0x80 | 100, // VarUInt length 100 with stop bit + }; + + private static int[] withContent(int[] header, int contentLength) { + int[] content = new int[contentLength]; for (int i = 0; i < contentLength; i++) { - data[6 + i] = 'a'; + content[i] = 'a'; } + int[] data = new int[header.length + contentLength]; + System.arraycopy(header, 0, data, 0, header.length); + System.arraycopy(content, 0, data, header.length, contentLength); return data; } + private static Stream largeValues() { + return Stream.of( + // data, expected buffer index at which the content begins after the IVM is shifted out. + Arguments.of("unwrapped", withContent(LARGE_STRING, 100), 2), + Arguments.of("annotation-wrapped", withContent(LARGE_ANNOTATED_STRING, 100), 6) + ); + } + /** * Verifies that a legitimate value larger than the initial buffer (and larger than a single doubling of it) * is read correctly. This exercises the incremental buffer growth in refill(): the buffer must double * multiple times as real data is consumed from the stream. */ - @Test - public void largeValueReadFromStreamGrowsBufferIncrementally() { - int contentLength = 100; // Far larger than the 8-byte initial buffer; requires several doublings. - int[] data = stringValueWithLength(contentLength); - ByteArrayInputStream in = new ByteArrayInputStream(bytes(data)); - IonBufferConfiguration config = IonBufferConfiguration.Builder.standard() - .withInitialBufferSize(8) - .build(); - IonCursorBinary cursor = new IonCursorBinary(config, in, null, 0, 0); - // After the IVM is consumed, buffered bytes are shifted to the start of the buffer, so the content - // begins at index 2 (after the type ID and VarUInt length bytes) and ends at 2 + contentLength. + @ParameterizedTest(name = "{0}") + @MethodSource("largeValues") + public void largeValueReadFromStreamGrowsBufferIncrementally(String name, int[] data, int contentStartIndex) { + IonCursorBinary cursor = newStreamingCursor(data); assertSequence( cursor, - fillScalar(2, 2 + contentLength), + fillScalar(contentStartIndex, contentStartIndex + 100), endStream() ); + // The buffer grew purely by doubling from a power-of-two start: 8 -> 16 -> 32 -> 64 -> 128, just large + // enough to hold the value. + assertEquals(128, cursor.buffer.length); cursor.close(); } + // A top-level string whose header declares 1,000,000 content bytes but is followed by only 3. The declared + // length is well below the maximum buffer size, so it is not rejected as oversized. + private static final int[] LENGTH_BOMB_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0x8E, // String with VarUInt length + 0x3D, 0x04, 0xC0, // VarUInt length 1,000,000 (well below the max buffer size) + 'a', 'b', 'c' // Only a few actual content bytes follow. + }; + + // The same length bomb declared inside an annotation wrapper. The wrapper length and the wrapped value length + // are internally consistent so the structural length check passes and the allocation path is reached. + private static final int[] LENGTH_BOMB_ANNOTATED_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0xEE, // Annotation wrapper with VarUInt length + 0x3D, 0x04, 0xC6, // VarUInt wrapper length 1,000,006 (well below the max buffer size) + 0x81, // VarUInt annotations length 1 with stop bit + 0x84, // Annotation SID 4 + 0x8E, // Wrapped string with VarUInt length + 0x3D, 0x04, 0xC0, // VarUInt wrapped length 1,000,000 (consistent with the wrapper length) + 'a', 'b', 'c' // Only a few actual content bytes follow. + }; + + private static Stream lengthBombs() { + return Stream.of( + Arguments.of("unwrapped", LENGTH_BOMB_STRING), + Arguments.of("annotation-wrapped", LENGTH_BOMB_ANNOTATED_STRING) + ); + } + /** * Verifies that a value declaring a length far larger than the data actually present in the stream does not - * cause a disproportionate allocation. The buffer grows only in proportion to the data actually consumed. - * When a fixed-size stream is exhausted before the declared length is satisfied, the cursor fails cleanly - * with an IonException rather than allocating (or attempting to allocate) the full declared length. + * cause a disproportionate allocation. The declared length (1,000,000) is well below the maximum buffer size, + * so it is not rejected as oversized; instead the buffer grows incrementally as data is consumed. Because the + * stream is exhausted after only a few bytes, the cursor cleanly reports NEEDS_DATA while the buffer remains + * tiny. (Note: the non-continuable IonReader wrapper will then throw an exception for an incomplete value; + * the continuable IonReader wrapper will wait for more data, but will not allocate more buffer space.) + * Before the fix, the buffer would have been allocated to nextPowerOfTwo(1,000,000) up front. */ - @Test - public void lengthBombFromStreamDoesNotOverAllocate() { - int[] data = new int[]{ - 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM - 0x8E, // String with VarUInt length - 0x07, 0x7f, 0x7f, 0x7f, 0xf9, // VarUInt length ~2 GB - 'a', 'b', 'c' // Only a few actual content bytes follow. - }; - ByteArrayInputStream in = new ByteArrayInputStream(bytes(data)); - IonBufferConfiguration config = IonBufferConfiguration.Builder.standard() - .withInitialBufferSize(8) - .build(); - IonCursorBinary cursor = new IonCursorBinary(config, in, null, 0, 0); + @ParameterizedTest(name = "{0}") + @MethodSource("lengthBombs") + public void lengthBombFromStreamDoesNotOverAllocateAndReportsNeedsData(String name, int[] data) { + IonCursorBinary cursor = newStreamingCursor(data); // Positioning on the value succeeds; the declared length is not yet validated against available data. assertEquals(START_SCALAR, cursor.nextValue()); - // Filling the value cannot complete because the stream is exhausted long before the declared length is - // reached. The cursor must fail cleanly without over-allocating or hanging. - IonException ie = assertThrows(IonException.class, cursor::fillValue); - assertThat(ie.getMessage(), - containsString("An oversized value was found even though no maximum size was configured.")); + // Filling cannot complete because the stream is exhausted long before the declared length is reached. The + // cursor reports NEEDS_DATA rather than allocating (or attempting to allocate) the full declared length. + assertEquals(NEEDS_DATA, cursor.fillValue()); + // The 1,000,000-byte declared length must not have driven allocation. Only a few bytes of real data were + // present, so the 8-byte buffer should have grown by doubling just past the available data, not toward the + // declared length. + assertThat(cursor.buffer.length, lessThanOrEqualTo(32)); cursor.close(); }