From 1364643d693adecc4c1b3ff7c4edbdb2adf70346 Mon Sep 17 00:00:00 2001 From: Ryan Stewart <47729789+RyanJamesStewart@users.noreply.github.com> Date: Thu, 14 May 2026 08:09:22 -0700 Subject: [PATCH 1/3] perf: bypass views.value(i) for inline strings in ArrowBytesViewMap For inline strings (len <= 12), insert_if_new_inner called values.value(i) in the new-value branch even though the same bytes are already packed in view_u128 (the ByteView inline format is [len:u32 LE][data:12 bytes zero-padded]). Add append_inline_view helper and branch on len <= 12 to push view_u128 directly, skipping the views-buffer round-trip. The stored view is byte-identical to make_view(value, 0, 0) for inline inputs; the existing test suite confirms correctness. Closes #20054 --- datafusion/physical-expr-common/Cargo.toml | 4 + .../benches/binary_view_map_insert.rs | 91 +++++++++++++++++++ .../src/binary_view_map.rs | 33 ++++++- 3 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 datafusion/physical-expr-common/benches/binary_view_map_insert.rs diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml index 0e4748b81d3ff..a08791d61bf26 100644 --- a/datafusion/physical-expr-common/Cargo.toml +++ b/datafusion/physical-expr-common/Cargo.toml @@ -58,3 +58,7 @@ rand = { workspace = true } [[bench]] harness = false name = "compare_nested" + +[[bench]] +harness = false +name = "binary_view_map_insert" diff --git a/datafusion/physical-expr-common/benches/binary_view_map_insert.rs b/datafusion/physical-expr-common/benches/binary_view_map_insert.rs new file mode 100644 index 0000000000000..c0d8f96912c6f --- /dev/null +++ b/datafusion/physical-expr-common/benches/binary_view_map_insert.rs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Bench for ArrowBytesViewMap insert_if_new on inline (len<=12) and +//! out-of-line (len>12) string arrays. +//! +//! The inline case exercises the bypass added in this PR: for strings +//! where len <= 12, the new-value branch now extracts bytes from the +//! view u128 directly rather than calling values.value(i). + +use arrow::array::{ArrayRef, StringViewArray}; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use datafusion_physical_expr_common::binary_map::OutputType; +use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewSet; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::hint::black_box; +use std::sync::Arc; + +/// Build an array of `n` distinct strings, each of `len` bytes. +/// All characters are ASCII lowercase letters. +fn make_string_view_array(n: usize, len: usize, seed: u64) -> ArrayRef { + let mut rng = StdRng::seed_from_u64(seed); + let alphabet: Vec = (b'a'..=b'z').collect(); + let strings: Vec> = (0..n) + .map(|_| { + let s: String = (0..len) + .map(|_| alphabet[rng.random_range(0..alphabet.len())] as char) + .collect(); + Some(s) + }) + .collect(); + Arc::new(StringViewArray::from(strings)) +} + +fn bench_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("ArrowBytesViewSet_insert_if_new"); + + // Sizes: number of distinct strings inserted per benchmark iteration. + // Each iteration creates a fresh set and inserts all values (worst case: + // all values are new, so every row hits the new-value branch). + for &n in &[1_000usize, 10_000, 100_000] { + // Inline strings: len=8 (well within the 12-byte inline threshold). + let inline_array = make_string_view_array(n, 8, 42); + group.bench_with_input( + BenchmarkId::new("inline_len8", n), + &inline_array, + |b, arr| { + b.iter(|| { + let mut set = ArrowBytesViewSet::new(OutputType::Utf8View); + set.insert(arr); + black_box(set.len()); + }); + }, + ); + + // Out-of-line strings: len=64 (far above the 12-byte threshold). + // Used as a control: this path is unchanged and should show no delta. + let ool_array = make_string_view_array(n, 64, 42); + group.bench_with_input( + BenchmarkId::new("out_of_line_len64", n), + &ool_array, + |b, arr| { + b.iter(|| { + let mut set = ArrowBytesViewSet::new(OutputType::Utf8View); + set.insert(arr); + black_box(set.len()); + }); + }, + ); + } + + group.finish(); +} + +criterion_group!(benches, bench_insert); +criterion_main!(benches); diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs b/datafusion/physical-expr-common/src/binary_view_map.rs index abc3e28f82627..437464a754b1a 100644 --- a/datafusion/physical-expr-common/src/binary_view_map.rs +++ b/datafusion/physical-expr-common/src/binary_view_map.rs @@ -339,11 +339,25 @@ where payload } else { // no existing value, make a new one - let value: &[u8] = values.value(i).as_ref(); - let payload = make_payload_fn(Some(value)); + let (new_view, payload) = if len <= 12 { + // Inline path: bytes are already packed in view_u128. + // The inline ByteView format is [len:u32 LE][data:12 bytes zero-padded], + // so extracting bytes from the u128 avoids a round-trip through + // values.value(i) (which reads the views buffer and returns the same slice). + let view_bytes = view_u128.to_le_bytes(); + let value = &view_bytes[4..4 + len as usize]; + let payload = make_payload_fn(Some(value)); + // For inline strings, the stored view is identical to the input view: + // make_view(value, 0, 0) produces the same u128 as view_u128. + let new_view = self.append_inline_view(view_u128); + (new_view, payload) + } else { + let value: &[u8] = values.value(i).as_ref(); + let payload = make_payload_fn(Some(value)); + let new_view = self.append_value(value); + (new_view, payload) + }; - // Create view pointing to our buffers - let new_view = self.append_value(value); let new_header = Entry { view: new_view, hash, @@ -389,6 +403,17 @@ where } } + /// Append an already-computed inline view (len <= 12) directly, bypassing + /// buffer allocation. The caller guarantees that `view` is a valid inline + /// ByteView (length field in the low 32 bits is <= 12). + /// + /// Returns the view that was stored (identical to the argument). + fn append_inline_view(&mut self, view: u128) -> u128 { + self.views.push(view); + self.nulls.append_non_null(); + view + } + /// Append a value to our buffers and return the view pointing to it fn append_value(&mut self, value: &[u8]) -> u128 { let len = value.len(); From 72bea573ab38ec07145c4b2a8cf6d69b63f376fa Mon Sep 17 00:00:00 2001 From: Ryan Stewart <47729789+RyanJamesStewart@users.noreply.github.com> Date: Thu, 14 May 2026 15:42:35 -0700 Subject: [PATCH 2/3] v2: mark append_inline_view unsafe, drop standalone bench file Addresses alamb's review on PR #22172: * Mark `append_inline_view` as `unsafe fn` (binary_view_map.rs:411). The function relies on the caller passing a valid inline ByteView (len <= 12 in the low 32 bits + 12 zero-padded value bytes); the compiler can't verify that invariant from the signature. Marking it unsafe makes the safety contract explicit and forces the single call site in `insert_if_new_inner` to annotate, where the `if len <= 12` branch establishes the invariant. # Safety paragraph on the function spells out what goes wrong if a non-inline view is passed (downstream `views` consumers interpret the high 96 bits as [buffer_index, offset], which is unsound). * Drop benches/binary_view_map_insert.rs and its [[bench]] entry in the crate's Cargo.toml. alamb's note: the bench is unlikely to be run regularly post-merge and its utility drops after the fix lands. Behavior unchanged. 8/8 binary_view_map tests pass; clippy + fmt clean. --- datafusion/physical-expr-common/Cargo.toml | 4 - .../benches/binary_view_map_insert.rs | 91 ------------------- .../src/binary_view_map.rs | 23 ++++- 3 files changed, 19 insertions(+), 99 deletions(-) delete mode 100644 datafusion/physical-expr-common/benches/binary_view_map_insert.rs diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml index a08791d61bf26..0e4748b81d3ff 100644 --- a/datafusion/physical-expr-common/Cargo.toml +++ b/datafusion/physical-expr-common/Cargo.toml @@ -58,7 +58,3 @@ rand = { workspace = true } [[bench]] harness = false name = "compare_nested" - -[[bench]] -harness = false -name = "binary_view_map_insert" diff --git a/datafusion/physical-expr-common/benches/binary_view_map_insert.rs b/datafusion/physical-expr-common/benches/binary_view_map_insert.rs deleted file mode 100644 index c0d8f96912c6f..0000000000000 --- a/datafusion/physical-expr-common/benches/binary_view_map_insert.rs +++ /dev/null @@ -1,91 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Bench for ArrowBytesViewMap insert_if_new on inline (len<=12) and -//! out-of-line (len>12) string arrays. -//! -//! The inline case exercises the bypass added in this PR: for strings -//! where len <= 12, the new-value branch now extracts bytes from the -//! view u128 directly rather than calling values.value(i). - -use arrow::array::{ArrayRef, StringViewArray}; -use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; -use datafusion_physical_expr_common::binary_map::OutputType; -use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewSet; -use rand::rngs::StdRng; -use rand::{Rng, SeedableRng}; -use std::hint::black_box; -use std::sync::Arc; - -/// Build an array of `n` distinct strings, each of `len` bytes. -/// All characters are ASCII lowercase letters. -fn make_string_view_array(n: usize, len: usize, seed: u64) -> ArrayRef { - let mut rng = StdRng::seed_from_u64(seed); - let alphabet: Vec = (b'a'..=b'z').collect(); - let strings: Vec> = (0..n) - .map(|_| { - let s: String = (0..len) - .map(|_| alphabet[rng.random_range(0..alphabet.len())] as char) - .collect(); - Some(s) - }) - .collect(); - Arc::new(StringViewArray::from(strings)) -} - -fn bench_insert(c: &mut Criterion) { - let mut group = c.benchmark_group("ArrowBytesViewSet_insert_if_new"); - - // Sizes: number of distinct strings inserted per benchmark iteration. - // Each iteration creates a fresh set and inserts all values (worst case: - // all values are new, so every row hits the new-value branch). - for &n in &[1_000usize, 10_000, 100_000] { - // Inline strings: len=8 (well within the 12-byte inline threshold). - let inline_array = make_string_view_array(n, 8, 42); - group.bench_with_input( - BenchmarkId::new("inline_len8", n), - &inline_array, - |b, arr| { - b.iter(|| { - let mut set = ArrowBytesViewSet::new(OutputType::Utf8View); - set.insert(arr); - black_box(set.len()); - }); - }, - ); - - // Out-of-line strings: len=64 (far above the 12-byte threshold). - // Used as a control: this path is unchanged and should show no delta. - let ool_array = make_string_view_array(n, 64, 42); - group.bench_with_input( - BenchmarkId::new("out_of_line_len64", n), - &ool_array, - |b, arr| { - b.iter(|| { - let mut set = ArrowBytesViewSet::new(OutputType::Utf8View); - set.insert(arr); - black_box(set.len()); - }); - }, - ); - } - - group.finish(); -} - -criterion_group!(benches, bench_insert); -criterion_main!(benches); diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs b/datafusion/physical-expr-common/src/binary_view_map.rs index 437464a754b1a..df8e09cca9ca8 100644 --- a/datafusion/physical-expr-common/src/binary_view_map.rs +++ b/datafusion/physical-expr-common/src/binary_view_map.rs @@ -349,7 +349,13 @@ where let payload = make_payload_fn(Some(value)); // For inline strings, the stored view is identical to the input view: // make_view(value, 0, 0) produces the same u128 as view_u128. - let new_view = self.append_inline_view(view_u128); + // + // SAFETY: the enclosing `if len <= 12` branch establishes + // that view_u128 is a valid inline ByteView. Its low 32 + // bits encode `len` (<= 12) and the next 12 bytes are the + // value bytes the source array produced for this row, so + // every required invariant of `append_inline_view` holds. + let new_view = unsafe { self.append_inline_view(view_u128) }; (new_view, payload) } else { let value: &[u8] = values.value(i).as_ref(); @@ -404,11 +410,20 @@ where } /// Append an already-computed inline view (len <= 12) directly, bypassing - /// buffer allocation. The caller guarantees that `view` is a valid inline - /// ByteView (length field in the low 32 bits is <= 12). + /// buffer allocation. /// /// Returns the view that was stored (identical to the argument). - fn append_inline_view(&mut self, view: u128) -> u128 { + /// + /// # Safety + /// + /// `view` must be a valid inline `ByteView`: the length field in the low + /// 32 bits must be <= 12, and the remaining 12 bytes must hold the + /// value's bytes (zero-padded if shorter). Calling with a non-inline view + /// would store a value that downstream `views` consumers interpret as + /// `[buffer_index, offset]` into the `completed`/`in_progress` buffers, + /// which is unsound for any view that didn't originate from a real + /// allocation in those buffers. + unsafe fn append_inline_view(&mut self, view: u128) -> u128 { self.views.push(view); self.nulls.append_non_null(); view From 7c535f7560d2d0755ac310f78784be660007c524 Mon Sep 17 00:00:00 2001 From: Ryan Stewart <47729789+RyanJamesStewart@users.noreply.github.com> Date: Sat, 16 May 2026 06:25:49 -0700 Subject: [PATCH 3/3] v3: trim SAFETY comment per review --- datafusion/physical-expr-common/src/binary_view_map.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs b/datafusion/physical-expr-common/src/binary_view_map.rs index df8e09cca9ca8..9d4b556393a24 100644 --- a/datafusion/physical-expr-common/src/binary_view_map.rs +++ b/datafusion/physical-expr-common/src/binary_view_map.rs @@ -350,11 +350,8 @@ where // For inline strings, the stored view is identical to the input view: // make_view(value, 0, 0) produces the same u128 as view_u128. // - // SAFETY: the enclosing `if len <= 12` branch establishes - // that view_u128 is a valid inline ByteView. Its low 32 - // bits encode `len` (<= 12) and the next 12 bytes are the - // value bytes the source array produced for this row, so - // every required invariant of `append_inline_view` holds. + // SAFETY: view_u128 was a valid view, and the enclosing `len <= 12` + // ensures it is inline let new_view = unsafe { self.append_inline_view(view_u128) }; (new_view, payload) } else {