Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions be/src/exec/common/hash_table/hash_crc32_return32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "core/string_ref.h"
#include "core/types.h"
#include "core/uint128.h"

// CRC32 hash functions that return uint32_t instead of size_t.
// Uses type-appropriate _mm_crc32_u{8,16,32,64} intrinsics to avoid
// unnecessary widening of inputs smaller than 64 bits.

namespace doris {

static constexpr uint32_t CRC32_HASH_SEED = 0xFFFFFFFF;

// Type-dispatched CRC32 computation primitives.
// Each overload uses the narrowest intrinsic that matches the input width.
inline uint32_t crc32_compute(uint32_t crc, uint8_t v) {
return _mm_crc32_u8(crc, v);
}
inline uint32_t crc32_compute(uint32_t crc, uint16_t v) {
return _mm_crc32_u16(crc, v);
}
inline uint32_t crc32_compute(uint32_t crc, uint32_t v) {
return _mm_crc32_u32(crc, v);
}
inline uint32_t crc32_compute(uint32_t crc, uint64_t v) {
return static_cast<uint32_t>(_mm_crc32_u64(crc, v));
}

template <typename T>
struct HashCRC32Return32;

// --- Arithmetic types: use the narrowest intrinsic ---

template <>
struct HashCRC32Return32<UInt8> {
uint32_t operator()(UInt8 key) const { return crc32_compute(CRC32_HASH_SEED, key); }
};

template <>
struct HashCRC32Return32<UInt16> {
uint32_t operator()(UInt16 key) const { return crc32_compute(CRC32_HASH_SEED, key); }
};

template <>
struct HashCRC32Return32<UInt32> {
uint32_t operator()(UInt32 key) const { return crc32_compute(CRC32_HASH_SEED, key); }
};

template <>
struct HashCRC32Return32<UInt64> {
uint32_t operator()(UInt64 key) const { return crc32_compute(CRC32_HASH_SEED, key); }
};

// --- 128-bit types ---

template <>
struct HashCRC32Return32<UInt128> {
uint32_t operator()(const UInt128& x) const {
uint32_t crc = CRC32_HASH_SEED;
crc = crc32_compute(crc, x.low());
crc = crc32_compute(crc, x.high());
return crc;
}
};

// --- 256-bit types ---

template <>
struct HashCRC32Return32<UInt256> {
uint32_t operator()(const UInt256& x) const {
uint32_t crc = CRC32_HASH_SEED;
crc = crc32_compute(crc, static_cast<uint64_t>(x.items[0]));
crc = crc32_compute(crc, static_cast<uint64_t>(x.items[1]));
crc = crc32_compute(crc, static_cast<uint64_t>(x.items[2]));
crc = crc32_compute(crc, static_cast<uint64_t>(x.items[3]));
return crc;
}
};

// --- Packed compound types (used by FixedKeyHashTableContext) ---

template <>
struct HashCRC32Return32<UInt72> {
uint32_t operator()(const UInt72& x) const {
uint32_t crc = CRC32_HASH_SEED;
crc = crc32_compute(crc, x.a);
crc = crc32_compute(crc, x.b);
return crc;
}
};

template <>
struct HashCRC32Return32<UInt96> {
uint32_t operator()(const UInt96& x) const {
uint32_t crc = CRC32_HASH_SEED;
crc = crc32_compute(crc, x.a);
crc = crc32_compute(crc, x.b);
return crc;
}
};

template <>
struct HashCRC32Return32<UInt104> {
uint32_t operator()(const UInt104& x) const {
uint32_t crc = CRC32_HASH_SEED;
crc = crc32_compute(crc, x.a);
crc = crc32_compute(crc, x.b);
crc = crc32_compute(crc, x.c);
return crc;
}
};

template <>
struct HashCRC32Return32<UInt136> {
uint32_t operator()(const UInt136& x) const {
uint32_t crc = CRC32_HASH_SEED;
crc = crc32_compute(crc, x.a);
crc = crc32_compute(crc, x.b);
crc = crc32_compute(crc, x.c);
return crc;
}
};

// --- StringRef: truncate existing crc32_hash() result ---

template <>
struct HashCRC32Return32<StringRef> {
uint32_t operator()(const StringRef& x) const {
return static_cast<uint32_t>(crc32_hash(x.data, x.size));
}
};

} // namespace doris
2 changes: 1 addition & 1 deletion be/src/exec/common/hash_table/join_hash_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class JoinHashTable {
using key_type = Key;
using mapped_type = void*;
using value_type = void*;
size_t hash(const Key& x) const { return Hash()(x); }
uint32_t hash(const Key& x) const { return Hash()(x); }

size_t get_byte_size() const {
auto cal_vector_mem = [](const auto& vec) { return vec.capacity() * sizeof(vec[0]); };
Expand Down
12 changes: 7 additions & 5 deletions be/src/exec/common/join_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <algorithm>
#include <variant>

#include "exec/common/hash_table/hash_crc32_return32.h"
#include "exec/common/hash_table/hash_key_type.h"
#include "exec/common/hash_table/hash_map_context.h"
#include "exec/common/hash_table/join_hash_table.h"
Expand Down Expand Up @@ -71,18 +72,19 @@ template <int JoinOpType>
inline constexpr bool is_asof_outer_join_op_v = JoinOpType == TJoinOp::ASOF_LEFT_OUTER_JOIN;

template <class T>
using PrimaryTypeHashTableContext = MethodOneNumber<T, JoinHashMap<T, HashCRC32<T>, false>>;
using PrimaryTypeHashTableContext = MethodOneNumber<T, JoinHashMap<T, HashCRC32Return32<T>, false>>;

template <class T>
using DirectPrimaryTypeHashTableContext =
MethodOneNumberDirect<T, JoinHashMap<T, HashCRC32<T>, true>>;
MethodOneNumberDirect<T, JoinHashMap<T, HashCRC32Return32<T>, true>>;

template <class Key>
using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, HashCRC32<Key>, false>>;
using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, HashCRC32Return32<Key>, false>>;

using SerializedHashTableContext =
MethodSerialized<JoinHashMap<StringRef, DefaultHash<StringRef>, false>>;
using MethodOneString = MethodStringNoCache<JoinHashMap<StringRef, DefaultHash<StringRef>, false>>;
MethodSerialized<JoinHashMap<StringRef, HashCRC32Return32<StringRef>, false>>;
using MethodOneString =
MethodStringNoCache<JoinHashMap<StringRef, HashCRC32Return32<StringRef>, false>>;

using HashTableVariants = std::variant<
std::monostate, SerializedHashTableContext, PrimaryTypeHashTableContext<UInt8>,
Expand Down
Loading
Loading