diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index b6ffde938..8555c0342 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -517,7 +517,13 @@ private boolean ensureCapacity(long minimumNumberOfBytesRequired) { } long shortfall = minimumNumberOfBytesRequired - refillableState.capacity; if (shortfall > 0) { - int newSize = (int) Math.min(Math.max(refillableState.capacity * 2, nextPowerOfTwo((int) (refillableState.capacity + shortfall))), maximumFreeSpace); + // Grow to the next power of two beyond the current capacity rather than allocating the full declared + // length up front. This prevents a small malicious payload from triggering a disproportionately large + // allocation based on an attacker-declared VarUInt length: the new size depends only on the current + // capacity, never on the requested length. If the value is legitimately large, refill() will continue + // to grow the buffer (by doubling) as actual data is consumed from the stream. Because that growth + // starts from this power-of-two capacity, doubling keeps the buffer a power of two. + int newSize = (int) Math.min(nextPowerOfTwo((int) (refillableState.capacity + 1)), maximumFreeSpace); byte[] newBuffer = new byte[newSize]; moveBytesToStartOfBuffer(newBuffer, startOffset); refillableState.capacity = newSize; @@ -672,11 +678,32 @@ private void shiftIndicesLeft(int shiftAmount) { */ private long refill(long minimumNumberOfBytesRequired) { int numberOfBytesFilled = -1; - long shortfall; + long shortfall = minimumNumberOfBytesRequired - availableAt(offset); // Sometimes an InputStream implementation will return fewer than the number of bytes requested even // if the stream is not at EOF. If this happens and there is still a shortfall, keep requesting bytes // until either the shortfall is filled or EOF is reached. do { + // If the buffer is full but more bytes are still required, grow it (by doubling) to make room before + // reading more. The buffer is only grown once it has filled with data actually read from the stream, + // so allocation stays proportional to the data present in the stream rather than to an + // attacker-declared length. Doubling here (rather than calling nextPowerOfTwo() again) keeps this hot + // path cheap; ensureCapacity() always grows first and rounds the capacity up to a power of two, so + // doubling from that base keeps the buffer a power of two. Because ensureCapacity() has also already + // moved buffered bytes to the start of the buffer (offset == 0), growing only requires copying the + // existing bytes into a larger array; no parser indices need to be shifted. + if (freeSpaceAt(limit) == 0 && (minimumNumberOfBytesRequired - availableAt(offset)) > 0) { + if (refillableState.capacity >= refillableState.maximumBufferSize) { + // Cannot grow further; the value exceeds the maximum buffer size. + refillableState.isSkippingCurrentValue = true; + break; + } + int newSize = (int) Math.min(Math.max(refillableState.capacity * 2L, refillableState.capacity + 1L), refillableState.maximumBufferSize); + byte[] newBuffer = new byte[newSize]; + System.arraycopy(buffer, 0, newBuffer, 0, (int) limit); + refillableState.capacity = newSize; + buffer = newBuffer; + byteBuffer = ByteBuffer.wrap(buffer, (int) offset, (int) refillableState.capacity); + } try { numberOfBytesFilled = refillableState.inputStream.read(buffer, (int) limit, (int) freeSpaceAt(limit)); } catch (EOFException e) { diff --git a/src/main/java/com/amazon/ion/system/IonReaderBuilder.java b/src/main/java/com/amazon/ion/system/IonReaderBuilder.java index a3927705c..b6985a261 100644 --- a/src/main/java/com/amazon/ion/system/IonReaderBuilder.java +++ b/src/main/java/com/amazon/ion/system/IonReaderBuilder.java @@ -101,6 +101,7 @@ public abstract class IonReaderBuilder private boolean isIncrementalReadingEnabled = false; private IonBufferConfiguration bufferConfiguration = IonBufferConfiguration.DEFAULT; private List streamInterceptors = null; + private boolean gzipDecompressionEnabled = true; protected IonReaderBuilder() { @@ -112,6 +113,7 @@ protected IonReaderBuilder(IonReaderBuilder that) this.isIncrementalReadingEnabled = that.isIncrementalReadingEnabled; this.bufferConfiguration = that.bufferConfiguration; this.streamInterceptors = that.streamInterceptors == null ? null : new ArrayList<>(that.streamInterceptors); + this.gzipDecompressionEnabled = that.gzipDecompressionEnabled; } /** @@ -323,7 +325,7 @@ public IonBufferConfiguration getBufferConfiguration() { /** * Adds an {@link InputStreamInterceptor} to the end of the list that the builder will attempt * to apply to a stream before creating {@link IonReader} instances over that stream. - * {@link GzipStreamInterceptor} is always consulted first, and need not be added. The first + * {@link GzipStreamInterceptor} is normally consulted first, and need not be added. The first * interceptor in the list that matches the stream will be used; if any chaining of interceptors * is required, it is up to the caller to provide a custom interceptor implementation to * achieve this. @@ -331,13 +333,16 @@ public IonBufferConfiguration getBufferConfiguration() { * Users may also or instead register implementations as service providers on the classpath. * See {@link ServiceLoader} for details about how to do this. *

- * The list of stream interceptors available to the reader always begins with + * The list of stream interceptors available to the reader normally begins with * {@link GzipStreamInterceptor} and is followed by: *

    *
  1. any stream interceptors detected on the classpath using * {@link ServiceLoader#load(Class)}, then
  2. *
  3. any stream interceptor(s) added by calling this method.
  4. *
+ * If GZIP auto-decompression has been disabled via {@link #withGzipDecompressionEnabled(boolean)}, + * {@link GzipStreamInterceptor} is omitted from the list and any {@link GzipStreamInterceptor} + * added by calling this method is filtered out. * * @param streamInterceptor the stream interceptor to add. * @@ -378,25 +383,78 @@ private static List detectStreamInterceptorsOnClasspath( } /** - * Gets the {@link InputStreamInterceptor} instances available to this builder. The returned list will always begin - * with the default stream interceptor, which detects GZIP. Any stream interceptor(s) detected on the classpath - * by {@link ServiceLoader#load(Class)} will immediately follow. Any stream interceptor(s) manually added using - * {@link #addInputStreamInterceptor(InputStreamInterceptor)} will occur at the end of the list. + * Gets the {@link InputStreamInterceptor} instances available to this builder. Unless GZIP + * auto-decompression has been disabled via {@link #withGzipDecompressionEnabled(boolean)}, the returned + * list will begin with the default stream interceptor, which detects GZIP. Any stream interceptor(s) + * detected on the classpath by {@link ServiceLoader#load(Class)} will immediately follow. Any stream + * interceptor(s) manually added using {@link #addInputStreamInterceptor(InputStreamInterceptor)} will occur + * at the end of the list. If GZIP auto-decompression has been disabled, {@link GzipStreamInterceptor} is + * omitted from the returned list. * @return an unmodifiable view of the stream interceptors currently configured. */ public List getInputStreamInterceptors() { + List interceptors = + streamInterceptors == null ? DETECTED_STREAM_INTERCEPTORS : streamInterceptors; + if (!gzipDecompressionEnabled) { + // Filter out GzipStreamInterceptor so GZIP-compressed data is not auto-decompressed. + List filtered = new ArrayList<>(); + for (InputStreamInterceptor interceptor : interceptors) { + if (!(interceptor instanceof GzipStreamInterceptor)) { + filtered.add(interceptor); + } + } + return Collections.unmodifiableList(filtered); + } if (streamInterceptors == null) { return DETECTED_STREAM_INTERCEPTORS; } return Collections.unmodifiableList(streamInterceptors); } + /** + * Declares whether GZIP auto-decompression is enabled when building an {@link IonReader}, + * returning a new mutable builder if the current one is immutable. + * + * When enabled (the default), GZIP-compressed Ion data is automatically detected and decompressed. + * When disabled, GZIP-compressed data is not auto-decompressed. + * + * @param enabled true to enable GZIP auto-decompression (default), false to disable. + * @return this builder instance, if mutable; otherwise a mutable copy of this builder. + */ + public IonReaderBuilder withGzipDecompressionEnabled(boolean enabled) { + IonReaderBuilder b = mutable(); + b.gzipDecompressionEnabled = enabled; + return b; + } + + /** + * Sets whether GZIP auto-decompression is enabled. + * + * @param enabled true to enable GZIP auto-decompression (default), false to disable. + * + * @see #withGzipDecompressionEnabled(boolean) + * + * @throws UnsupportedOperationException if this builder is immutable. + */ + public void setGzipDecompressionEnabled(boolean enabled) { + mutationCheck(); + this.gzipDecompressionEnabled = enabled; + } + + /** + * @return true if GZIP auto-decompression is enabled (the default). + */ + public boolean isGzipDecompressionEnabled() { + return gzipDecompressionEnabled; + } + /** * Based on the builder's configuration properties, creates a new IonReader * instance over the given block of Ion data, detecting whether it's text or * binary data. *

- * This method will auto-detect and uncompress GZIPped Ion data. + * This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been + * disabled via {@link #withGzipDecompressionEnabled(boolean)}. * * @param ionData the source of the Ion data, which may be either Ion binary * data or UTF-8 Ion text. The reader retains a reference to the array, so @@ -417,7 +475,8 @@ public IonReader build(byte[] ionData) * instance over the given block of Ion data, detecting whether it's text or * binary data. *

- * This method will auto-detect and uncompress GZIPped Ion data. + * This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been + * disabled via {@link #withGzipDecompressionEnabled(boolean)}. * * @param ionData the source of the Ion data, which is used only within the * range of bytes starting at {@code offset} for {@code len} bytes. @@ -437,7 +496,8 @@ public IonReader build(byte[] ionData) * instance over the given stream of Ion data, detecting whether it's text or * binary data. *

- * This method will auto-detect and uncompress GZIPped Ion data. + * This method will auto-detect and uncompress GZIPped Ion data, unless GZIP auto-decompression has been + * disabled via {@link #withGzipDecompressionEnabled(boolean)}. *

* Because this library performs its own buffering, it's recommended that * users avoid adding additional buffering to the given stream. diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index 4708b49ba..946276ee4 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -7,10 +7,13 @@ import com.amazon.ion.IonException; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; import java.util.function.Consumer; +import java.util.stream.Stream; import static com.amazon.ion.BitUtils.bytes; import static com.amazon.ion.IonCursor.Event.NEEDS_DATA; @@ -30,6 +33,7 @@ import static com.amazon.ion.impl.IonCursorTestUtilities.startContainer; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -429,6 +433,131 @@ public void expectValueLargerThanMaxArraySizeToFailCleanly() { cursor.close(); } + /** + * Creates a cursor that reads the given data from an InputStream (i.e. in refillable/streaming mode) using a + * buffer configuration with an 8-byte initial buffer and an unbounded maximum buffer size. + */ + private static IonCursorBinary newStreamingCursor(int[] data) { + IonBufferConfiguration config = IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(8) + .build(); + return new IonCursorBinary(config, new ByteArrayInputStream(bytes(data)), null, 0, 0); + } + + // A legitimate string value with 100 content bytes, far larger than the 8-byte initial buffer. After the IVM is + // consumed, buffered bytes are shifted to the start of the buffer, so the content begins immediately after the + // value's header bytes: index 2 for the top-level string (type ID + 1-byte VarUInt length). + private static final int[] LARGE_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0x8E, // String with VarUInt length + 0x80 | 100, // VarUInt length 100 with stop bit + }; + + // The same value wrapped in a single annotation. The content begins at index 6 (wrapper type ID, wrapper length, + // annotations length, annotation SID, wrapped type ID, wrapped length). + private static final int[] LARGE_ANNOTATED_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0xEE, // Annotation wrapper with VarUInt length + 0x80 | 104, // VarUInt wrapper length 104 with stop bit + 0x81, // VarUInt annotations length 1 with stop bit + 0x84, // Annotation SID 4 + 0x8E, // Wrapped string with VarUInt length + 0x80 | 100, // VarUInt length 100 with stop bit + }; + + private static int[] withContent(int[] header, int contentLength) { + int[] content = new int[contentLength]; + for (int i = 0; i < contentLength; i++) { + content[i] = 'a'; + } + int[] data = new int[header.length + contentLength]; + System.arraycopy(header, 0, data, 0, header.length); + System.arraycopy(content, 0, data, header.length, contentLength); + return data; + } + + private static Stream largeValues() { + return Stream.of( + // data, expected buffer index at which the content begins after the IVM is shifted out. + Arguments.of("unwrapped", withContent(LARGE_STRING, 100), 2), + Arguments.of("annotation-wrapped", withContent(LARGE_ANNOTATED_STRING, 100), 6) + ); + } + + /** + * Verifies that a legitimate value larger than the initial buffer (and larger than a single doubling of it) + * is read correctly. This exercises the incremental buffer growth in refill(): the buffer must double + * multiple times as real data is consumed from the stream. + */ + @ParameterizedTest(name = "{0}") + @MethodSource("largeValues") + public void largeValueReadFromStreamGrowsBufferIncrementally(String name, int[] data, int contentStartIndex) { + IonCursorBinary cursor = newStreamingCursor(data); + assertSequence( + cursor, + fillScalar(contentStartIndex, contentStartIndex + 100), + endStream() + ); + // The buffer grew purely by doubling from a power-of-two start: 8 -> 16 -> 32 -> 64 -> 128, just large + // enough to hold the value. + assertEquals(128, cursor.buffer.length); + cursor.close(); + } + + // A top-level string whose header declares 1,000,000 content bytes but is followed by only 3. The declared + // length is well below the maximum buffer size, so it is not rejected as oversized. + private static final int[] LENGTH_BOMB_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0x8E, // String with VarUInt length + 0x3D, 0x04, 0xC0, // VarUInt length 1,000,000 (well below the max buffer size) + 'a', 'b', 'c' // Only a few actual content bytes follow. + }; + + // The same length bomb declared inside an annotation wrapper. The wrapper length and the wrapped value length + // are internally consistent so the structural length check passes and the allocation path is reached. + private static final int[] LENGTH_BOMB_ANNOTATED_STRING = new int[]{ + 0xE0, 0x01, 0x00, 0xEA, // Ion 1.0 IVM + 0xEE, // Annotation wrapper with VarUInt length + 0x3D, 0x04, 0xC6, // VarUInt wrapper length 1,000,006 (well below the max buffer size) + 0x81, // VarUInt annotations length 1 with stop bit + 0x84, // Annotation SID 4 + 0x8E, // Wrapped string with VarUInt length + 0x3D, 0x04, 0xC0, // VarUInt wrapped length 1,000,000 (consistent with the wrapper length) + 'a', 'b', 'c' // Only a few actual content bytes follow. + }; + + private static Stream lengthBombs() { + return Stream.of( + Arguments.of("unwrapped", LENGTH_BOMB_STRING), + Arguments.of("annotation-wrapped", LENGTH_BOMB_ANNOTATED_STRING) + ); + } + + /** + * Verifies that a value declaring a length far larger than the data actually present in the stream does not + * cause a disproportionate allocation. The declared length (1,000,000) is well below the maximum buffer size, + * so it is not rejected as oversized; instead the buffer grows incrementally as data is consumed. Because the + * stream is exhausted after only a few bytes, the cursor cleanly reports NEEDS_DATA while the buffer remains + * tiny. (Note: the non-continuable IonReader wrapper will then throw an exception for an incomplete value; + * the continuable IonReader wrapper will wait for more data, but will not allocate more buffer space.) + * Before the fix, the buffer would have been allocated to nextPowerOfTwo(1,000,000) up front. + */ + @ParameterizedTest(name = "{0}") + @MethodSource("lengthBombs") + public void lengthBombFromStreamDoesNotOverAllocateAndReportsNeedsData(String name, int[] data) { + IonCursorBinary cursor = newStreamingCursor(data); + // Positioning on the value succeeds; the declared length is not yet validated against available data. + assertEquals(START_SCALAR, cursor.nextValue()); + // Filling cannot complete because the stream is exhausted long before the declared length is reached. The + // cursor reports NEEDS_DATA rather than allocating (or attempting to allocate) the full declared length. + assertEquals(NEEDS_DATA, cursor.fillValue()); + // The 1,000,000-byte declared length must not have driven allocation. Only a few bytes of real data were + // present, so the 8-byte buffer should have grown by doubling just past the available data, not toward the + // declared length. + assertThat(cursor.buffer.length, lessThanOrEqualTo(32)); + cursor.close(); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void expectUseAfterCloseToHaveNoEffect(boolean constructFromBytes) { diff --git a/src/test/java/com/amazon/ion/system/IonReaderBuilderGzipToggleTest.java b/src/test/java/com/amazon/ion/system/IonReaderBuilderGzipToggleTest.java new file mode 100644 index 000000000..f289a2597 --- /dev/null +++ b/src/test/java/com/amazon/ion/system/IonReaderBuilderGzipToggleTest.java @@ -0,0 +1,158 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.util.GzipStreamInterceptor; +import com.amazon.ion.util.InputStreamInterceptor; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for the GZIP decompression toggle API on {@link IonReaderBuilder}. + */ +class IonReaderBuilderGzipToggleTest { + + @Test + void defaultBuilderHasGzipEnabled() { + IonReaderBuilder builder = IonReaderBuilder.standard(); + assertTrue(builder.isGzipDecompressionEnabled()); + } + + @Test + void getInputStreamInterceptorsIncludesGzipWhenEnabled() { + IonReaderBuilder builder = IonReaderBuilder.standard(); + List interceptors = builder.getInputStreamInterceptors(); + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertTrue(hasGzip, "Expected GzipStreamInterceptor in interceptor list when GZIP is enabled"); + } + + @Test + void getInputStreamInterceptorsExcludesGzipWhenDisabled() { + IonReaderBuilder builder = IonReaderBuilder.standard() + .withGzipDecompressionEnabled(false); + List interceptors = builder.getInputStreamInterceptors(); + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertFalse(hasGzip, "Expected no GzipStreamInterceptor in interceptor list when GZIP is disabled"); + } + + @Test + void customInterceptorsPreservedWhenGzipDisabled() { + InputStreamInterceptor customInterceptor = new InputStreamInterceptor() { + @Override + public String formatName() { + return "custom"; + } + + @Override + public int numberOfBytesNeededToDetermineMatch() { + return 2; + } + + @Override + public boolean isMatch(byte[] candidate, int offset, int length) { + return false; + } + + @Override + public InputStream newInputStream(InputStream interceptedStream) throws IOException { + return interceptedStream; + } + }; + + IonReaderBuilder builder = IonReaderBuilder.standard() + .addInputStreamInterceptor(customInterceptor) + .withGzipDecompressionEnabled(false); + + List interceptors = builder.getInputStreamInterceptors(); + + // Custom interceptor should still be present + assertTrue(interceptors.contains(customInterceptor), + "Custom interceptor should be preserved when GZIP is disabled"); + + // GZIP interceptor should be excluded + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertFalse(hasGzip, "GzipStreamInterceptor should be excluded when GZIP is disabled"); + } + + @Test + void customInterceptorsPreservedWhenGzipEnabled() { + InputStreamInterceptor customInterceptor = new InputStreamInterceptor() { + @Override + public String formatName() { + return "custom"; + } + + @Override + public int numberOfBytesNeededToDetermineMatch() { + return 2; + } + + @Override + public boolean isMatch(byte[] candidate, int offset, int length) { + return false; + } + + @Override + public InputStream newInputStream(InputStream interceptedStream) throws IOException { + return interceptedStream; + } + }; + + IonReaderBuilder builder = IonReaderBuilder.standard() + .addInputStreamInterceptor(customInterceptor) + .withGzipDecompressionEnabled(true); + + List interceptors = builder.getInputStreamInterceptors(); + + // Custom interceptor should be present + assertTrue(interceptors.contains(customInterceptor), + "Custom interceptor should be preserved when GZIP is enabled"); + + // GZIP interceptor should also be present + boolean hasGzip = interceptors.stream().anyMatch(i -> i instanceof GzipStreamInterceptor); + assertTrue(hasGzip, "GzipStreamInterceptor should be present when GZIP is enabled"); + } + + @Test + void withGzipDecompressionEnabledOnImmutableBuilderReturnsMutableCopy() { + IonReaderBuilder immutableBuilder = IonReaderBuilder.standard().immutable(); + IonReaderBuilder result = immutableBuilder.withGzipDecompressionEnabled(false); + + // The result should be a different instance (mutable copy) + assertNotSame(immutableBuilder, result); + // The original should still have GZIP enabled + assertTrue(immutableBuilder.isGzipDecompressionEnabled()); + // The new builder should have GZIP disabled + assertFalse(result.isGzipDecompressionEnabled()); + } + + @Test + void copyConstructorPreservesGzipFlag() { + IonReaderBuilder original = IonReaderBuilder.standard() + .withGzipDecompressionEnabled(false); + IonReaderBuilder copy = original.copy(); + + assertFalse(copy.isGzipDecompressionEnabled(), + "Copy should preserve the gzipDecompressionEnabled flag"); + } + + @Test + void setGzipDecompressionEnabledOnMutableBuilder() { + IonReaderBuilder builder = IonReaderBuilder.standard(); + builder.setGzipDecompressionEnabled(false); + assertFalse(builder.isGzipDecompressionEnabled()); + } + + @Test + void setGzipDecompressionEnabledOnImmutableBuilderThrows() { + IonReaderBuilder immutableBuilder = IonReaderBuilder.standard().immutable(); + assertThrows(UnsupportedOperationException.class, () -> { + immutableBuilder.setGzipDecompressionEnabled(false); + }); + } +}