Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions src/support/delta_debugging.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright 2026 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef wasm_support_delta_debugging_h
#define wasm_support_delta_debugging_h

#include <algorithm>
#include <cassert>
#include <vector>

namespace wasm {

// Use the delta debugging algorithm (Zeller 1999,
// https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of
// items necessary to preserve some property. Returns that minimal set of
// items, preserving their input order. `tryPartition` should have this
// signature:
//
// bool tryPartition(size_t partitionIndex,
// size_t numPartitions,
// const std::vector<T>& partition)
//
// It should return true iff the property is preserved while keeping only
// `partition` items.
template<typename T, typename F>
std::vector<T> deltaDebugging(std::vector<T> items, F&& tryPartition) {
size_t numPartitions = 2;
while (numPartitions <= items.size()) {
// Partition the items.
std::vector<std::vector<T>> partitions;
size_t size = items.size();
size_t basePartitionSize = size / numPartitions;
size_t rem = size % numPartitions;
size_t idx = 0;
for (size_t i = 0; i < numPartitions; ++i) {
size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0);
if (partitionSize > 0) {
std::vector<T> partition;
partition.reserve(partitionSize);
for (size_t j = 0; j < partitionSize; ++j) {
partition.push_back(items[idx++]);
}
partitions.emplace_back(std::move(partition));
}
}
assert(numPartitions == partitions.size());

bool reduced = false;

// Try keeping only one partition. Try each partition in turn.
for (size_t i = 0; i < numPartitions; ++i) {
if (tryPartition(i, numPartitions, partitions[i])) {
items = std::move(partitions[i]);
numPartitions = 2;
reduced = true;
break;
}
}
if (reduced) {
continue;
}

// Otherwise, try keeping the complement of a partition. Do not do this with
// only two partitions because that would be no different from what we
// already tried.
if (numPartitions > 2) {
for (size_t i = 0; i < numPartitions; ++i) {
std::vector<T> complement;
complement.reserve(items.size() - partitions[i].size());
for (size_t j = 0; j < numPartitions; ++j) {
if (j != i) {
complement.insert(
complement.end(), partitions[j].begin(), partitions[j].end());
}
}
if (tryPartition(i, numPartitions, complement)) {
items = std::move(complement);
numPartitions = std::max(numPartitions - 1, size_t(2));
reduced = true;
break;
}
}
if (reduced) {
continue;
}
}

// Otherwise, make the partitions finer grained.
if (numPartitions < items.size()) {
numPartitions = std::min(items.size(), 2 * numPartitions);
} else {
break;
}
}
return items;
}

} // namespace wasm

#endif // wasm_support_delta_debugging_h
54 changes: 46 additions & 8 deletions src/tools/wasm-reduce/wasm-reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@

#include "ir/branch-utils.h"
#include "ir/iteration.h"
#include "ir/literal-utils.h"
#include "ir/properties.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/colors.h"
#include "support/command-line.h"
#include "support/delta_debugging.h"
#include "support/file.h"
#include "support/hash.h"
#include "support/path.h"
Expand Down Expand Up @@ -894,8 +894,45 @@ struct Reducer
}
}

// Reduces entire functions at a time. Returns whether we did a significant
// amount of reduction that justifies doing even more.
void reduceFunctionBodies() {
std::cerr << "| try to remove function bodies\n";
// Use function indices to speed up finding the complement of the kept
// partition.
std::vector<Index> funcs;
funcs.reserve(module->functions.size());
for (Index i = 0; i < module->functions.size(); ++i) {
funcs.push_back(i);
}
deltaDebugging(
std::move(funcs),
[&](Index partitionIndex,
Index numPartitions,
const std::vector<Index>& partition) {
std::cerr << "| try partition " << partitionIndex + 1 << " / "
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why add 1 here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Printing 1-based indices is slightly more intuitive than 0-based indices.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree to disagree 😄

<< numPartitions << " (size " << partition.size() << ")\n";
std::vector<Name> removed;
removed.reserve(module->functions.size() - partition.size());
Index i = 0;
for (Index j : partition) {
while (i < j) {
removed.push_back(module->functions[i++]->name);
}
++i;
}
while (i < module->functions.size()) {
removed.push_back(module->functions[i++]->name);
}
if (tryToEmptyFunctions(removed)) {
// TODO: Consider doing this just once after the delta debugging since
// we never need to restore from the working copy while removing
// function bodies.
noteReduction(removed.size());
return true;
}
return false;
});
}

bool reduceFunctions() {
// try to remove functions
std::vector<Name> functionNames;
Expand Down Expand Up @@ -936,11 +973,9 @@ struct Reducer
}
std::cerr << "| trying at i=" << i << " of size " << names.size()
<< "\n";
// Try to remove functions and/or empty them. Note that
// tryToRemoveFunctions() will reload the module if it fails, which means
// function names may change - for that reason, run it second.
justReduced = tryToEmptyFunctions(names) || tryToRemoveFunctions(names);
if (justReduced) {
// Note that tryToRemoveFunctions() will reload the module if it fails,
// which means function names may change.
if (tryToRemoveFunctions(names)) {
noteReduction(names.size());
// Subtract 1 since the loop increments us anyhow by one: we want to
// skip over the skipped functions, and not any more.
Expand All @@ -967,8 +1002,11 @@ struct Reducer
assert(curr == module.get());
curr = nullptr;

reduceFunctionBodies();

// Reduction of entire functions at a time is very effective, and we do it
// with exponential growth and backoff, so keep doing it while it works.
// TODO: Figure out how to use delta debugging for this as well.
while (reduceFunctions()) {
}

Expand Down
1 change: 1 addition & 0 deletions test/gtest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set(unittest_SOURCES
cast-check.cpp
cfg.cpp
dataflow.cpp
delta_debugging.cpp
dfa_minimization.cpp
disjoint_sets.cpp
leaves.cpp
Expand Down
82 changes: 82 additions & 0 deletions test/gtest/delta_debugging.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include "support/delta_debugging.h"
#include "gtest/gtest.h"
#include <algorithm>
#include <string>
#include <vector>

using namespace wasm;

TEST(DeltaDebuggingTest, EmptyInput) {
std::vector<int> items;
auto result = deltaDebugging(
items, [](size_t, size_t, const std::vector<int>&) { return false; });
EXPECT_TRUE(result.empty());
}

TEST(DeltaDebuggingTest, SingleItem) {
std::vector<int> items = {0, 1, 2, 3, 4, 5, 6, 7};
auto result = deltaDebugging(
items, [](size_t, size_t, const std::vector<int>& partition) {
return std::find(partition.begin(), partition.end(), 3) !=
partition.end();
});
std::vector<int> expected = {3};
EXPECT_EQ(result, expected);
}

TEST(DeltaDebuggingTest, MultipleItemsAdjacent) {
std::vector<int> items = {0, 1, 2, 3, 4, 5, 6, 7};
auto result = deltaDebugging(
items, [](size_t, size_t, const std::vector<int>& partition) {
bool has2 =
std::find(partition.begin(), partition.end(), 2) != partition.end();
bool has3 =
std::find(partition.begin(), partition.end(), 3) != partition.end();
return has2 && has3;
});
std::vector<int> expected = {2, 3};
EXPECT_EQ(result, expected);
}

TEST(DeltaDebuggingTest, MultipleItemsNonAdjacent) {
std::vector<int> items = {0, 1, 2, 3, 4, 5, 6, 7};
auto result = deltaDebugging(
items, [](size_t, size_t, const std::vector<int>& partition) {
bool has2 =
std::find(partition.begin(), partition.end(), 2) != partition.end();
bool has5 =
std::find(partition.begin(), partition.end(), 5) != partition.end();
return has2 && has5;
});
std::vector<int> expected = {2, 5};
EXPECT_EQ(result, expected);
}

TEST(DeltaDebuggingTest, OrderMaintained) {
std::vector<int> items = {3, 1, 4, 2};
auto result = deltaDebugging(
items, [](size_t, size_t, const std::vector<int>& partition) {
bool has3 =
std::find(partition.begin(), partition.end(), 3) != partition.end();
bool has2 =
std::find(partition.begin(), partition.end(), 2) != partition.end();
return has3 && has2;
});
std::vector<int> expected = {3, 2};
EXPECT_EQ(result, expected);
}

TEST(DeltaDebuggingTest, DifferentTypes) {
std::vector<std::string> items = {"apple", "banana", "cherry", "date"};
auto result = deltaDebugging(
items, [](size_t, size_t, const std::vector<std::string>& partition) {
bool hasBanana =
std::find(partition.begin(), partition.end(), "banana") !=
partition.end();
bool hasDate = std::find(partition.begin(), partition.end(), "date") !=
partition.end();
return hasBanana && hasDate;
});
std::vector<std::string> expected = {"banana", "date"};
EXPECT_EQ(result, expected);
}
Loading