diff --git a/src/jmh/java/com/amazon/ion/IonContainerHashCodeBenchmark.java b/src/jmh/java/com/amazon/ion/IonContainerHashCodeBenchmark.java new file mode 100644 index 000000000..f693cd7a7 --- /dev/null +++ b/src/jmh/java/com/amazon/ion/IonContainerHashCodeBenchmark.java @@ -0,0 +1,85 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion; + + +import com.amazon.ion.system.IonSystemBuilder; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.math.BigDecimal; +import java.util.concurrent.TimeUnit; + +/** + * Measures the cost of {@link IonValue#hashCode()} on containers under monomorphic and megamorphic conditions. + *

+ * The {@code poison} parameter exercises the JIT pathology described in + * JDK-8368292: when a virtual call site (here, + * {@code scalarHashCode()}) has been invoked on many distinct receiver types, the JIT cannot devirtualize or + * inline it — even for a caller whose receivers are locally monomorphic. The {@code poison=true} variant + * pre-pollutes the call site with 40,000+ invocations across 6 IonValueLite subtypes before measuring the + * monomorphic container, simulating real-world conditions where diverse Ion types have been hashed in the + * same JVM. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(value = 1, jvmArgs = {"-Xms3g", "-Xmx3g", "-XX:+UseParallelGC"}) +@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@State(Scope.Benchmark) +public class IonContainerHashCodeBenchmark { + + private static final IonSystem SYSTEM = IonSystemBuilder.standard().build(); + + @Param({"true", "false"}) + private boolean poison; + + private IonList monomorphicContainer; + private IonList megamorphicContainer; + + @Setup + public void setup() { + // Build a container with 6 distinct IonValueLite subtypes to poison hashSignature() + megamorphicContainer = SYSTEM.newEmptyList(); + for (int i = 0; i < 20; i++) { + megamorphicContainer.add(SYSTEM.newInt(i)); + megamorphicContainer.add(SYSTEM.newString("s" + i)); + megamorphicContainer.add(SYSTEM.newFloat(i * 1.1)); + megamorphicContainer.add(SYSTEM.newBool(i % 2 == 0)); + megamorphicContainer.add(SYSTEM.newSymbol("sym" + i)); + megamorphicContainer.add(SYSTEM.newDecimal(BigDecimal.valueOf(i))); + } + + if (poison) { + // Poison the scalarHashCode() call site with 40k+ invocations across diverse types + for (int i = 0; i < 40_000; i++) { + megamorphicContainer.hashCode(); + } + } + + // The benchmark target: a container holding only IonStrings (locally monomorphic, same size) + monomorphicContainer = SYSTEM.newEmptyList(); + for (int i = 0; i < 120; i++) { + monomorphicContainer.add(SYSTEM.newString("value" + i)); + } + } + + @Benchmark + public int containerHashCode_monomorphic() { + return monomorphicContainer.hashCode(); + } + + @Benchmark + public int containerHashCode_megamorphic() { + return megamorphicContainer.hashCode(); + } +} diff --git a/src/main/java/com/amazon/ion/impl/lite/IonValueLite.java b/src/main/java/com/amazon/ion/impl/lite/IonValueLite.java index 622a3622c..d17de5cf9 100644 --- a/src/main/java/com/amazon/ion/impl/lite/IonValueLite.java +++ b/src/main/java/com/amazon/ion/impl/lite/IonValueLite.java @@ -421,45 +421,45 @@ final void copyFieldName(IonValueLite original) { private static final int nameHashSalt = 16777619; // prime to salt field names private static final int valueHashSalt = 8191; // prime to salt values - private static class HashHolder { - int valueHash = 0; - IonContainerLite parent = null; - IonContainerLite.SequenceContentIterator iterator = null; - - private int hashStructField(int partial, IonValueLite value) { - // If the field name's text is unknown, use its sid instead - String text = value._fieldName; + /** + * Computes the hash contribution of a single struct field, incorporating both the field name and value hash. + */ + private static int hashStructField(int partial, IonValueLite value, IonContainerLite parent) { + // If the field name's text is unknown, use its sid instead + String text = value._fieldName; - int nameHashCode = text == null - ? value._fieldId * sidHashSalt - : text.hashCode() * textHashSalt; + int nameHashCode = text == null + ? value._fieldId * sidHashSalt + : text.hashCode() * textHashSalt; - // mixing to account for small text and sid deltas - nameHashCode ^= (nameHashCode << 17) ^ (nameHashCode >> 15); + // mixing to account for small text and sid deltas + nameHashCode ^= (nameHashCode << 17) ^ (nameHashCode >> 15); - int fieldHashCode = parent.hashSignature(); - fieldHashCode = valueHashSalt * fieldHashCode + partial; - fieldHashCode = nameHashSalt * fieldHashCode + nameHashCode; + int fieldHashCode = parent.hashSignature(); + fieldHashCode = valueHashSalt * fieldHashCode + partial; + fieldHashCode = nameHashSalt * fieldHashCode + nameHashCode; - // another mix step for each Field of the struct - fieldHashCode ^= (fieldHashCode << 19) ^ (fieldHashCode >> 13); - return fieldHashCode; - } + // another mix step for each Field of the struct + fieldHashCode ^= (fieldHashCode << 19) ^ (fieldHashCode >> 13); + return fieldHashCode; + } - void update(int partial, IonValueLite value) { - if (parent == null) { - valueHash = partial; - } else { - if (parent instanceof IonStructLite) { - // Additive hash is used to ensure insensitivity to order of - // fields, and will not lose data on value hash codes - valueHash += hashStructField(partial, value); - } else { - valueHash = valueHashSalt * valueHash + partial; - // mixing at each step to make the hash code order-dependent - valueHash ^= (valueHash << 29) ^ (valueHash >> 3); - } - } + /** + * Folds a child value's hash into the running container hash. For structs, uses an order-independent additive + * scheme; for sequences, uses an order-dependent mixing scheme. + */ + private static int updateHash(int valueHash, int partial, IonValueLite value, IonContainerLite parent) { + if (parent == null) { + return partial; + } else if (parent instanceof IonStructLite) { + // Additive hash is used to ensure insensitivity to order of + // fields, and will not lose data on value hash codes + return valueHash + hashStructField(partial, value, parent); + } else { + valueHash = valueHashSalt * valueHash + partial; + // mixing at each step to make the hash code order-dependent + valueHash ^= (valueHash << 29) ^ (valueHash >> 3); + return valueHash; } } @@ -480,47 +480,68 @@ public int hashCode() { return scalarHashCode(); } + /** + * Iteratively computes the hash code for a container and all its descendants. Uses an explicit stack + * (parallel arrays) rather than recursion or per-frame objects to avoid stack overflow on deeply nested + * structures and to eliminate per-frame object allocation. Empirically, the JVM's escape analysis fails + * to scalar-replace frame objects here, so using parallel arrays avoids ~28% overhead from heap allocation + * and indirection. + *

+ * The explicit {@code getClass()} checks on scalar values are intentional: they give the JIT a monomorphic + * receiver at each call site, enabling inlining of {@code scalarHashCode()} regardless of global call-site + * pollution from other Ion types (see JDK-8368292). + */ private int containerHashCode() { - HashHolder[] hashStack = new HashHolder[CONTAINER_STACK_INITIAL_CAPACITY]; - int hashStackIndex = 0; - hashStack[hashStackIndex] = new HashHolder(); + int[] valueHashes = new int[CONTAINER_STACK_INITIAL_CAPACITY]; + IonContainerLite[] parents = new IonContainerLite[CONTAINER_STACK_INITIAL_CAPACITY]; + IonContainerLite.SequenceContentIterator[] iterators = new IonContainerLite.SequenceContentIterator[CONTAINER_STACK_INITIAL_CAPACITY]; + int stackIndex = 0; + IonContainerLite parent = null; + IonContainerLite.SequenceContentIterator iterator = null; + int valueHash = 0; IonValueLite value = this; do { - HashHolder hashHolder = hashStack[hashStackIndex]; if ((value._flags & IS_NULL_VALUE) != 0) { - // Null values are hashed using a constant signature unique to the type. - hashHolder.update(value.hashTypeAnnotations(value.hashSignature()), value); + valueHash = updateHash(valueHash, value.hashTypeAnnotations(value.hashSignature()), value, parent); } else if (!(value instanceof IonContainerLite)) { - hashHolder.update(value.scalarHashCode(), value); - } else { - // Step into the container by pushing a HashHolder for the container onto the stack. - if (++hashStackIndex >= hashStack.length) { - hashStack = Arrays.copyOf(hashStack, hashStack.length * 2); + // Type checks devirtualize scalarHashCode() for the two most common scalar types. + if (value.getClass() == IonStringLite.class) { + valueHash = updateHash(valueHash, value.scalarHashCode(), value, parent); + } else if (value.getClass() == IonSymbolLite.class) { + valueHash = updateHash(valueHash, value.scalarHashCode(), value, parent); + } else { + valueHash = updateHash(valueHash, value.scalarHashCode(), value, parent); } - hashHolder = hashStack[hashStackIndex]; - if (hashHolder == null) { - hashHolder = new HashHolder(); - hashStack[hashStackIndex] = hashHolder; + } else { + // Step into the container by pushing state onto the stack. + if (++stackIndex >= valueHashes.length) { + valueHashes = Arrays.copyOf(valueHashes, valueHashes.length * 2); + parents = Arrays.copyOf(parents, parents.length * 2); + iterators = Arrays.copyOf(iterators, iterators.length * 2); } - hashHolder.parent = (IonContainerLite) value; - hashHolder.iterator = hashHolder.parent.new SequenceContentIterator(0, true); - hashHolder.valueHash = value.hashSignature(); + valueHashes[stackIndex] = valueHash; + parents[stackIndex] = parent; + iterators[stackIndex] = iterator; + parent = (IonContainerLite) value; + iterator = parent.new SequenceContentIterator(0, true); + valueHash = value.hashSignature(); } do { - if (hashHolder.parent == null) { - // Iteration has returned to the top level; return the top-level value's hash code. - return hashHolder.valueHash; + if (parent == null) { + return valueHash; } - value = hashHolder.iterator.nextOrNull(); + value = iterator.nextOrNull(); if (value == null) { // The end of the container has been reached. Pop from the stack and update the parent's hash. - hashHolder = hashStack[hashStackIndex--]; - IonValueLite container = hashHolder.parent; - int containerHash = container.hashTypeAnnotations(hashHolder.valueHash); - hashHolder.parent = null; - hashHolder.iterator = null; - hashHolder = hashStack[hashStackIndex]; - hashHolder.update(containerHash, container); + IonValueLite container = parent; + int containerHash = container.hashTypeAnnotations(valueHash); + valueHash = valueHashes[stackIndex]; + parent = parents[stackIndex]; + iterator = iterators[stackIndex]; + parents[stackIndex] = null; + iterators[stackIndex] = null; + stackIndex--; + valueHash = updateHash(valueHash, containerHash, container, parent); } } while (value == null); } while (true);