32-bit checksum is not enough to prevent ABA problem when multiple threads Get and Set same key
ZiyanShi opened this issue · comments
Bug Report
KVDK version
System configuration
2 * Intel(R) Xeon(R) Platinum 8269C CPU @ 2.50GHz
CPU: 2 * Intel(R) Xeon(R) Platinum 8269C CPU @ 2.50GHz
DDR: 12 * DDR4 16384 MB 2666 MT/s
PMEM: 4 * 129408 MB 2666 MT/s
Reproduce steps
Modify kv_engine.hpp
inline uint64_t get_timestamp() {
auto res = get_cpu_tsc() - ts_on_startup_ + newest_version_on_startup_;
return res;
}
to
inline uint64_t get_timestamp() {
return 0;
}
Add test case to stress_test.cpp
TEST(HotspotTest, StringABATest)
{
const std::string path_db{"/mnt/pmem0/kvdk_test_hotspot"};
std::string cmd = "rm -rf " + path_db + "\n";
int _sink = system(cmd.data());
kvdk::Configs configs;
configs.pmem_file_size = 1ULL << 30;
configs.max_write_threads = 4;
kvdk::Engine* engine = nullptr;
kvdk::Engine::Open(path_db.data(), &engine, configs, stderr);
size_t n_skip = 636031;
size_t v_size = (1ULL << 12);
ProgressBar bar{std::cout, "", n_skip};
for (size_t i = 0; i < n_skip; i++)
{
GetRandomString(v_size);
bar.Update(i + 1);
}
std::string key{"some_key"};
key.resize(40);
std::string zero_filled(v_size, 0);
std::string value{GetRandomString(v_size)};
for (size_t i = 3456; i < value.size(); i++)
value[i] = 0;
engine->Set(key, zero_filled);
auto SetOrGet = [&](size_t tid)
{
size_t n_iteration = (1ULL << 20);
if (tid % 2 == 0)
{
ProgressBar bar2{std::cout, "", n_iteration, (tid == 0)};
for (size_t i = 0; i < n_iteration; i++)
{
EXPECT_EQ(engine->Set(key, zero_filled), kvdk::Status::Ok);
EXPECT_EQ(engine->Set(key, value), kvdk::Status::Ok);
if ((i + 1) % (1ULL << 10) == 0 && (tid == 0))
bar2.Update(i + 1);
}
}
else
{
std::string value_got;
for (size_t i = 0; i < n_iteration; i++)
{
EXPECT_EQ(engine->Get(key, &value_got), kvdk::Status::Ok);
ASSERT_TRUE(value_got == zero_filled || value_got == value)
<< "Got invalid value: \n"
<< value_got << "\n"
<< "Expected: \n"
<< value << std::endl;
}
}
};
LaunchNThreads(4, SetOrGet);
_sink = system(cmd.data());
}
Build as Release and run
./build/dbstress_test --gtest_filter="*ABA*" --gtest_break_on_failure --gtest_catch_exceptions=1 --gtest_repeat=-1
Expect behavior
Test case success
Current behavior
Test case fail
Note
This test case is found by birthday attack.
get_timestamp() is fixed to return 0 to help reproduce the issue.