diff --git a/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs b/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs index 2584caa4f..7c59d89e6 100644 --- a/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs +++ b/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs @@ -288,6 +288,26 @@ impl storage::bin::GetAdjacencyList for SimpleNeighborProviderAsync { Ok(list) } + /// Optimized version that reuses a pre-allocated buffer to avoid per-call allocation. + /// + /// This directly copies the adjacency list into the provided buffer, avoiding the + /// intermediate `AdjacencyList` allocation that `get_adjacency_list` requires. + fn get_adjacency_list_into(&self, i: usize, buffer: &mut Vec) -> ANNResult<()> { + #[cfg(test)] + self.num_get_calls.increment(); + + // Lint: We don't have a good way of recovering from lock poisoning anyways. + let _guard = self.locks[i].read().unwrap(); + + // SAFETY: We are holding the read lock for `i`. + let list = unsafe { self.get_slice(i) }; + + // Reuse buffer: clear and copy data directly + buffer.clear(); + buffer.extend_from_slice(list); + Ok(()) + } + fn total(&self) -> usize { self.locks.len() } @@ -346,6 +366,32 @@ impl storage::bin::GetAdjacencyList for DiskAdaptor<'_> { Ok(list) } + /// Optimized version that reuses a pre-allocated buffer to avoid per-call allocation. + /// + /// This directly reads neighbors into the buffer and performs the start point remapping + /// in-place, avoiding the intermediate `AdjacencyList` allocation. + fn get_adjacency_list_into(&self, i: usize, buffer: &mut Vec) -> ANNResult<()> { + // Lint: We don't have a good way of recovering from lock poisoning anyways. + #[allow(clippy::unwrap_used)] + let _guard = self.provider.locks[i].read().unwrap(); + + // SAFETY: We are holding the read lock for `i`. + let list = unsafe { self.provider.get_slice(i) }; + + // Reuse buffer: clear and copy data directly + buffer.clear(); + buffer.extend_from_slice(list); + + // Remap the in-memory start point to the actual start point + for id in buffer.iter_mut() { + if *id == self.inmem_start_point { + *id = self.actual_start_point; + } + } + + Ok(()) + } + fn total(&self) -> usize { // Don't include any start points at the end. self.provider.locks.len() - self.provider.num_start_points diff --git a/diskann-providers/src/storage/bin.rs b/diskann-providers/src/storage/bin.rs index bf1f89d14..5ec320d84 100644 --- a/diskann-providers/src/storage/bin.rs +++ b/diskann-providers/src/storage/bin.rs @@ -100,6 +100,23 @@ pub(crate) trait GetAdjacencyList { /// Retrieve the data stored at index `i`. fn get_adjacency_list(&self, i: usize) -> ANNResult>; + /// Retrieve the data stored at index `i` into a pre-allocated buffer. + /// + /// This method allows callers to reuse a buffer across multiple calls, + /// avoiding per-call memory allocation overhead. The buffer is cleared + /// before being populated with the adjacency list data. + /// + /// Default implementation falls back to `get_adjacency_list` and copies. + fn get_adjacency_list_into(&self, i: usize, buffer: &mut Vec) -> ANNResult<()> + where + Self::Element: Clone, + { + buffer.clear(); + let list = self.get_adjacency_list(i)?; + buffer.extend_from_slice(&list); + Ok(()) + } + /// Return the total number of elements contained in `self`. fn total(&self) -> usize; @@ -344,31 +361,31 @@ where let mut observed_max_degree: u32 = 0; out.write_all(&index_size.to_le_bytes())?; - out.write_all(&observed_max_degree.to_le_bytes())?; // Will be updated later with correct max_degree + out.write_all(&observed_max_degree.to_le_bytes())?; out.write_all(&start_point.to_le_bytes())?; - out.write_all(&graph.additional_points().to_le_bytes())?; + let total = graph.total(); + // Pre-allocate a reusable buffer for adjacency lists + let initial_capacity = graph.max_degree().map(|d| d as usize).unwrap_or(128); + let mut neighbor_buffer: Vec = Vec::with_capacity(initial_capacity); + for i in 0..total { - let binding = graph.get_adjacency_list(i)?; - let neighbors: &[u32] = &binding; - let num_neighbors: u32 = neighbors.len() as u32; + // Reuse buffer to avoid per-vertex allocation overhead + graph.get_adjacency_list_into(i, &mut neighbor_buffer)?; + let num_neighbors: u32 = neighbor_buffer.len() as u32; - // Write the number of neighbors as a `u32`. out.write_all(&num_neighbors.to_le_bytes())?; - // Write all the neighbors, applying transformation if provided. - neighbors - .iter() - .copied() - .try_for_each(|n| out.write_all(&n.to_le_bytes()))?; + // Bulk write using bytemuck for zero-copy conversion + let neighbor_bytes: &[u8] = bytemuck::must_cast_slice(&neighbor_buffer); + out.write_all(neighbor_bytes)?; observed_max_degree = observed_max_degree.max(num_neighbors); - index_size += (std::mem::size_of::() * (1 + neighbors.len())) as u64; + index_size += (std::mem::size_of::() * (1 + neighbor_buffer.len())) as u64; } - // Use configured max degree if provided, otherwise use observed let max_degree = graph.max_degree().unwrap_or(observed_max_degree); // Finish up by writing the observed index size and max degree.