Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions src/jmh/java/com/amazon/ion/IonContainerHashCodeBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion;


import com.amazon.ion.system.IonSystemBuilder;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;

import java.math.BigDecimal;
import java.util.concurrent.TimeUnit;

/**
* Measures the cost of {@link IonValue#hashCode()} on containers under monomorphic and megamorphic conditions.
* <p>
* The {@code poison} parameter exercises the JIT pathology described in
* <a href="https://bugs.openjdk.org/browse/JDK-8368292">JDK-8368292</a>: when a virtual call site (here,
* {@code scalarHashCode()}) has been invoked on many distinct receiver types, the JIT cannot devirtualize or
* inline it — even for a caller whose receivers are locally monomorphic. The {@code poison=true} variant
* pre-pollutes the call site with 40,000+ invocations across 6 IonValueLite subtypes before measuring the
* monomorphic container, simulating real-world conditions where diverse Ion types have been hashed in the
* same JVM.
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 1, jvmArgs = {"-Xms3g", "-Xmx3g", "-XX:+UseParallelGC"})
@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
public class IonContainerHashCodeBenchmark {

private static final IonSystem SYSTEM = IonSystemBuilder.standard().build();

@Param({"true", "false"})
private boolean poison;

private IonList monomorphicContainer;
private IonList megamorphicContainer;

@Setup
public void setup() {
// Build a container with 6 distinct IonValueLite subtypes to poison hashSignature()
megamorphicContainer = SYSTEM.newEmptyList();
for (int i = 0; i < 20; i++) {
megamorphicContainer.add(SYSTEM.newInt(i));
megamorphicContainer.add(SYSTEM.newString("s" + i));
megamorphicContainer.add(SYSTEM.newFloat(i * 1.1));
megamorphicContainer.add(SYSTEM.newBool(i % 2 == 0));
megamorphicContainer.add(SYSTEM.newSymbol("sym" + i));
megamorphicContainer.add(SYSTEM.newDecimal(BigDecimal.valueOf(i)));
}

if (poison) {
// Poison the scalarHashCode() call site with 40k+ invocations across diverse types
for (int i = 0; i < 40_000; i++) {
megamorphicContainer.hashCode();
}
}

// The benchmark target: a container holding only IonStrings (locally monomorphic, same size)
monomorphicContainer = SYSTEM.newEmptyList();
for (int i = 0; i < 120; i++) {
monomorphicContainer.add(SYSTEM.newString("value" + i));
}
}

@Benchmark
public int containerHashCode_monomorphic() {
return monomorphicContainer.hashCode();
}

@Benchmark
public int containerHashCode_megamorphic() {
return megamorphicContainer.hashCode();
}
}
147 changes: 84 additions & 63 deletions src/main/java/com/amazon/ion/impl/lite/IonValueLite.java
Original file line number Diff line number Diff line change
Expand Up @@ -421,45 +421,45 @@ final void copyFieldName(IonValueLite original) {
private static final int nameHashSalt = 16777619; // prime to salt field names
private static final int valueHashSalt = 8191; // prime to salt values

private static class HashHolder {
int valueHash = 0;
IonContainerLite parent = null;
IonContainerLite.SequenceContentIterator iterator = null;

private int hashStructField(int partial, IonValueLite value) {
// If the field name's text is unknown, use its sid instead
String text = value._fieldName;
/**
* Computes the hash contribution of a single struct field, incorporating both the field name and value hash.
*/
private static int hashStructField(int partial, IonValueLite value, IonContainerLite parent) {
// If the field name's text is unknown, use its sid instead
String text = value._fieldName;

int nameHashCode = text == null
? value._fieldId * sidHashSalt
: text.hashCode() * textHashSalt;
int nameHashCode = text == null
? value._fieldId * sidHashSalt
: text.hashCode() * textHashSalt;

// mixing to account for small text and sid deltas
nameHashCode ^= (nameHashCode << 17) ^ (nameHashCode >> 15);
// mixing to account for small text and sid deltas
nameHashCode ^= (nameHashCode << 17) ^ (nameHashCode >> 15);

int fieldHashCode = parent.hashSignature();
fieldHashCode = valueHashSalt * fieldHashCode + partial;
fieldHashCode = nameHashSalt * fieldHashCode + nameHashCode;
int fieldHashCode = parent.hashSignature();
fieldHashCode = valueHashSalt * fieldHashCode + partial;
fieldHashCode = nameHashSalt * fieldHashCode + nameHashCode;

// another mix step for each Field of the struct
fieldHashCode ^= (fieldHashCode << 19) ^ (fieldHashCode >> 13);
return fieldHashCode;
}
// another mix step for each Field of the struct
fieldHashCode ^= (fieldHashCode << 19) ^ (fieldHashCode >> 13);
return fieldHashCode;
}

void update(int partial, IonValueLite value) {
if (parent == null) {
valueHash = partial;
} else {
if (parent instanceof IonStructLite) {
// Additive hash is used to ensure insensitivity to order of
// fields, and will not lose data on value hash codes
valueHash += hashStructField(partial, value);
} else {
valueHash = valueHashSalt * valueHash + partial;
// mixing at each step to make the hash code order-dependent
valueHash ^= (valueHash << 29) ^ (valueHash >> 3);
}
}
/**
* Folds a child value's hash into the running container hash. For structs, uses an order-independent additive
* scheme; for sequences, uses an order-dependent mixing scheme.
*/
private static int updateHash(int valueHash, int partial, IonValueLite value, IonContainerLite parent) {
if (parent == null) {
return partial;
} else if (parent instanceof IonStructLite) {
// Additive hash is used to ensure insensitivity to order of
// fields, and will not lose data on value hash codes
return valueHash + hashStructField(partial, value, parent);
} else {
valueHash = valueHashSalt * valueHash + partial;
// mixing at each step to make the hash code order-dependent
valueHash ^= (valueHash << 29) ^ (valueHash >> 3);
return valueHash;
}
}

Expand All @@ -480,47 +480,68 @@ public int hashCode() {
return scalarHashCode();
}

/**
* Iteratively computes the hash code for a container and all its descendants. Uses an explicit stack
* (parallel arrays) rather than recursion or per-frame objects to avoid stack overflow on deeply nested
* structures and to eliminate per-frame object allocation. Empirically, the JVM's escape analysis fails
* to scalar-replace frame objects here, so using parallel arrays avoids ~28% overhead from heap allocation
* and indirection.
* <p>
* The explicit {@code getClass()} checks on scalar values are intentional: they give the JIT a monomorphic
* receiver at each call site, enabling inlining of {@code scalarHashCode()} regardless of global call-site
* pollution from other Ion types (see <a href="https://bugs.openjdk.org/browse/JDK-8368292">JDK-8368292</a>).
*/
private int containerHashCode() {
HashHolder[] hashStack = new HashHolder[CONTAINER_STACK_INITIAL_CAPACITY];
int hashStackIndex = 0;
hashStack[hashStackIndex] = new HashHolder();
int[] valueHashes = new int[CONTAINER_STACK_INITIAL_CAPACITY];
IonContainerLite[] parents = new IonContainerLite[CONTAINER_STACK_INITIAL_CAPACITY];
IonContainerLite.SequenceContentIterator[] iterators = new IonContainerLite.SequenceContentIterator[CONTAINER_STACK_INITIAL_CAPACITY];
int stackIndex = 0;
IonContainerLite parent = null;
IonContainerLite.SequenceContentIterator iterator = null;
int valueHash = 0;
IonValueLite value = this;
do {
HashHolder hashHolder = hashStack[hashStackIndex];
if ((value._flags & IS_NULL_VALUE) != 0) {
// Null values are hashed using a constant signature unique to the type.
hashHolder.update(value.hashTypeAnnotations(value.hashSignature()), value);
valueHash = updateHash(valueHash, value.hashTypeAnnotations(value.hashSignature()), value, parent);
} else if (!(value instanceof IonContainerLite)) {
hashHolder.update(value.scalarHashCode(), value);
} else {
// Step into the container by pushing a HashHolder for the container onto the stack.
if (++hashStackIndex >= hashStack.length) {
hashStack = Arrays.copyOf(hashStack, hashStack.length * 2);
// Type checks devirtualize scalarHashCode() for the two most common scalar types.
if (value.getClass() == IonStringLite.class) {
valueHash = updateHash(valueHash, value.scalarHashCode(), value, parent);
} else if (value.getClass() == IonSymbolLite.class) {
valueHash = updateHash(valueHash, value.scalarHashCode(), value, parent);
} else {
valueHash = updateHash(valueHash, value.scalarHashCode(), value, parent);
}
hashHolder = hashStack[hashStackIndex];
if (hashHolder == null) {
hashHolder = new HashHolder();
hashStack[hashStackIndex] = hashHolder;
} else {
// Step into the container by pushing state onto the stack.
if (++stackIndex >= valueHashes.length) {
valueHashes = Arrays.copyOf(valueHashes, valueHashes.length * 2);
parents = Arrays.copyOf(parents, parents.length * 2);
iterators = Arrays.copyOf(iterators, iterators.length * 2);
}
hashHolder.parent = (IonContainerLite) value;
hashHolder.iterator = hashHolder.parent.new SequenceContentIterator(0, true);
hashHolder.valueHash = value.hashSignature();
valueHashes[stackIndex] = valueHash;
parents[stackIndex] = parent;
iterators[stackIndex] = iterator;
parent = (IonContainerLite) value;
iterator = parent.new SequenceContentIterator(0, true);
valueHash = value.hashSignature();
}
do {
if (hashHolder.parent == null) {
// Iteration has returned to the top level; return the top-level value's hash code.
return hashHolder.valueHash;
if (parent == null) {
return valueHash;
}
value = hashHolder.iterator.nextOrNull();
value = iterator.nextOrNull();
if (value == null) {
// The end of the container has been reached. Pop from the stack and update the parent's hash.
hashHolder = hashStack[hashStackIndex--];
IonValueLite container = hashHolder.parent;
int containerHash = container.hashTypeAnnotations(hashHolder.valueHash);
hashHolder.parent = null;
hashHolder.iterator = null;
hashHolder = hashStack[hashStackIndex];
hashHolder.update(containerHash, container);
IonValueLite container = parent;
int containerHash = container.hashTypeAnnotations(valueHash);
valueHash = valueHashes[stackIndex];
parent = parents[stackIndex];
iterator = iterators[stackIndex];
parents[stackIndex] = null;
iterators[stackIndex] = null;
stackIndex--;
valueHash = updateHash(valueHash, containerHash, container, parent);
}
} while (value == null);
} while (true);
Expand Down
Loading