Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions be/src/storage/segment/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,14 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) {
_opts.compression_type = _tablet_schema->compression_type();
}

// Vertical compaction calls init() multiple times against the same writer; the footer accumulates entries
// across calls, so this init()'s slice of footer columns starts at the current size.
const int variant_stats_footer_offset = _footer.columns_size();
RETURN_IF_ERROR(_create_writers(_tablet_schema, col_ids));

// Initialize variant statistics calculator
_variant_stats_calculator =
std::make_unique<VariantStatsCaculator>(&_footer, _tablet_schema, col_ids);
_variant_stats_calculator = std::make_unique<VariantStatsCaculator>(
&_footer, _tablet_schema, col_ids, variant_stats_footer_offset);

// we don't need the short key index for unique key merge on write table.
if (_has_key) {
Expand Down
10 changes: 6 additions & 4 deletions be/src/storage/segment/variant_stats_calculator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ namespace doris::segment_v2 {

VariantStatsCaculator::VariantStatsCaculator(SegmentFooterPB* footer,
TabletSchemaSPtr tablet_schema,
const std::vector<uint32_t>& column_ids)
const std::vector<uint32_t>& column_ids,
int footer_column_offset)
: _footer(footer), _tablet_schema(tablet_schema), _column_ids(column_ids) {
// Build the path to footer index mapping during initialization
for (int i = 0; i < _footer->columns_size(); ++i) {
// Only walk this init()'s slice of footer entries; earlier init() calls (vertical compaction's previous
// column groups) are not addressable via `column_ids` and would only inflate this scan.
for (int i = footer_column_offset; i < _footer->columns_size(); ++i) {
const auto& column = _footer->columns(i);
// path that need to record stats
if (column.has_column_path_info() &&
Expand Down Expand Up @@ -115,4 +117,4 @@ void VariantStatsCaculator::_calculate_sub_column_stats(const IColumn& column,
<< " (added " << current_non_null_count << " from current block)";
}

} // namespace doris::segment_v2
} // namespace doris::segment_v2
8 changes: 6 additions & 2 deletions be/src/storage/segment/variant_stats_calculator.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ namespace doris::segment_v2 {

class VariantStatsCaculator {
public:
// `footer_column_offset` is the index of the first footer entry that belongs to this init()'s `column_ids`.
// Required because SegmentWriter::init() can be invoked multiple times (vertical compaction) against
// an ever-growing footer; without the offset every additional init() would re-scan the whole footer.
explicit VariantStatsCaculator(SegmentFooterPB* footer, TabletSchemaSPtr tablet_schema,
const std::vector<uint32_t>& column_ids);
const std::vector<uint32_t>& column_ids,
int footer_column_offset = 0);

// Calculate variant statistics for the given column and block
Status calculate_variant_stats(const Block* block, size_t row_pos, size_t num_rows);
Expand All @@ -54,4 +58,4 @@ class VariantStatsCaculator {
size_t row_pos, size_t num_rows);
};

} // namespace doris::segment_v2
} // namespace doris::segment_v2
35 changes: 35 additions & 0 deletions be/test/storage/segment/variant_stats_calculator_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,4 +447,39 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithExtendedSchema) {
EXPECT_TRUE(status.ok());
}

TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithFooterOffset) {
// Vertical compaction calls SegmentWriter::init() multiple times against
// the same writer (key columns first, then each value-column group). The
// footer accumulates entries across calls, so the calculator built for the
// second init() must only index its own slice — not the leftover entries
// from the first init(). The offset tells the constructor where its slice
// starts and also where stats results should land.
add_footer_column_with_path(1, "stale_from_prev_init"); // pre-existing
add_footer_column_with_path(1, "another_stale_entry"); // pre-existing
const int footer_offset = _footer->columns_size();
add_footer_column_with_path(1, "sub_column"); // belongs to this init()

TabletColumn sub_column = create_variant_column(2, "variant.sub_column", 1, "sub_column");
_tablet_schema->append_column(sub_column);

std::vector<uint32_t> column_ids = {0};
VariantStatsCaculator calculator(_footer.get(), _tablet_schema, column_ids, footer_offset);

Block block;
auto nullable_column = create_nullable_column({false, true, false}, {"a", "", "c"});
block.insert({std::move(nullable_column),
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
"sub_column"});

auto status = calculator.calculate_variant_stats(&block, 0, 3);
EXPECT_TRUE(status.ok());

// Stats land on this init()'s slice, not the pre-existing entries — proves
// we ignored the stale entries even though they share parent_unique_id=1
// and the same path keys would otherwise collide in the index map.
EXPECT_EQ(_footer->columns(0).none_null_size(), 0);
EXPECT_EQ(_footer->columns(1).none_null_size(), 0);
EXPECT_EQ(_footer->columns(footer_offset).none_null_size(), 2);
}

} // namespace doris::segment_v2
Loading