Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Add fault injection in cache fuzzer #11969

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion velox/common/caching/SsdCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ SsdCache::SsdCache(const Config& config)
// Make sure the given path of Ssd files has the prefix for local file system.
// Local file system would be derived based on the prefix.
VELOX_CHECK(
filePrefix_.find('/') == 0,
filePrefix_.find('/') == 0 || filePrefix_.find("faulty:/") == 0,
"Ssd path '{}' does not start with '/' that points to local file system.",
filePrefix_);
VELOX_CHECK_NOT_NULL(executor_);
Expand Down
4 changes: 4 additions & 0 deletions velox/docs/develop/testing/cache-fuzzer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,8 @@ Here is a full list of supported command line arguments.

* ``–-num_restarts``: Number of cache restarts in one iteration.

* ``–-enable_file_faulty_injection``: Enable fault injection on read and write
operations for cache-related files. When enabled, the file read and write
operations will fail 5 out of 100 times.

If running from CLion IDE, add ``--logtostderr=1`` to see the full output.
45 changes: 40 additions & 5 deletions velox/exec/fuzzer/CacheFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "velox/common/caching/FileIds.h"
#include "velox/common/caching/SsdCache.h"
#include "velox/common/file/FileSystems.h"
#include "velox/common/file/tests/FaultyFileSystem.h"
#include "velox/common/memory/Memory.h"
#include "velox/common/memory/MmapAllocator.h"
#include "velox/dwio/common/CachedBufferedInput.h"
Expand Down Expand Up @@ -76,8 +77,15 @@ DEFINE_int64(

DEFINE_int32(num_restarts, 3, "Number of cache restarts in one iteration.");

DEFINE_bool(
enable_file_faulty_injection,
true,
"Enable fault injection on read and write operations for cache-related files. When enabled, "
"the file read and write operations will fail 5 out of 100 times.");

using namespace facebook::velox::cache;
using namespace facebook::velox::dwio::common;
using namespace facebook::velox::tests::utils;

namespace facebook::velox::exec::test {
namespace {
Expand All @@ -90,6 +98,7 @@ class CacheFuzzer {

private:
static constexpr int32_t kRandomized = -1;
static constexpr int32_t kFileFaultInjectionPct = 5;

void seed(size_t seed) {
currentSeed_ = seed;
Expand Down Expand Up @@ -149,6 +158,7 @@ class CacheFuzzer {
std::vector<std::vector<std::pair<int32_t, int32_t>>> fileFragments_;
std::vector<std::unique_ptr<CachedBufferedInput>> inputs_;
std::shared_ptr<exec::test::TempDirectoryPath> sourceDataDir_;
std::shared_ptr<exec::test::TempDirectoryPath> cacheDataDir_;
std::unique_ptr<memory::MemoryManager> memoryManager_;
std::unique_ptr<folly::IOThreadPoolExecutor> executor_;
std::shared_ptr<AsyncDataCache> cache_;
Expand All @@ -175,10 +185,14 @@ bool isDone(size_t i, T startTime) {
CacheFuzzer::CacheFuzzer(size_t initialSeed) {
seed(initialSeed);
filesystems::registerLocalFileSystem();
registerFaultyFileSystem();
}

void CacheFuzzer::initSourceDataFiles() {
// Skip errors on source data files.
sourceDataDir_ = exec::test::TempDirectoryPath::create();
cacheDataDir_ =
exec::test::TempDirectoryPath::create(FLAGS_enable_file_faulty_injection);
fs_ = filesystems::getFileSystem(sourceDataDir_->getPath(), nullptr);

// Create files with random sizes.
Expand Down Expand Up @@ -211,6 +225,23 @@ void CacheFuzzer::initSourceDataFiles() {
fileSizes_.emplace_back(fileSize);
}
}

if (FLAGS_enable_file_faulty_injection) {
faultyFileSystem()->setFileInjectionHook([&](FaultFileOperation* op) {
std::random_device rd;
boost::random::uniform_int_distribution<int> dist(1, 100);
if ((op->type == FaultFileOperation::Type::kWrite ||
op->type == FaultFileOperation::Type::kAppend) &&
dist(rd) <= kFileFaultInjectionPct) {
VELOX_FAIL("Inject hook write failure");
}
if ((op->type == FaultFileOperation::Type::kReadv ||
op->type == FaultFileOperation::Type::kRead) &&
dist(rd) <= kFileFaultInjectionPct) {
VELOX_FAIL("Inject hook read failure");
}
});
}
}

int64_t CacheFuzzer::getMemoryCacheBytes(bool restartCache) {
Expand Down Expand Up @@ -252,7 +283,6 @@ int32_t CacheFuzzer::getSsdCacheShards(bool restartCache) {
lastNumSsdCacheShards_ = FLAGS_num_ssd_cache_shards;
}
}

return lastNumSsdCacheShards_;
}

Expand Down Expand Up @@ -302,7 +332,7 @@ void CacheFuzzer::initializeCache(bool restartCache) {
enableChecksumReadVerification(restartCache);

SsdCache::Config config(
fmt::format("{}/cache", sourceDataDir_->getPath()),
fmt::format("{}/cache", cacheDataDir_->getPath()),
ssdCacheBytes,
numSsdCacheShards,
executor_.get(),
Expand Down Expand Up @@ -334,9 +364,10 @@ void CacheFuzzer::initializeCache(bool restartCache) {
{});

LOG(INFO) << fmt::format(
"Initialized cache with {} memory space, {} SSD cache",
"Initialized cache with {} memory space, {} SSD cache, {} file faulty injection",
succinctBytes(memoryCacheBytes),
ssdCacheBytes == 0 ? "with" : "without");
ssdCacheBytes == 0 ? "with" : "without",
FLAGS_enable_file_faulty_injection ? "with" : "without");
}

void CacheFuzzer::initializeInputs() {
Expand Down Expand Up @@ -415,19 +446,23 @@ void CacheFuzzer::resetCache() {

void CacheFuzzer::resetSourceDataFiles() {
const auto& sourceDataDirPath = sourceDataDir_->getPath();
const auto& cacheDataDirPath = cacheDataDir_->getPath();
if (fs_->exists(sourceDataDirPath)) {
fs_->rmdir(sourceDataDirPath);
}
if (fs_->exists(cacheDataDirPath)) {
fs_->rmdir(cacheDataDirPath);
}
fs_.reset();
sourceDataDir_.reset();
cacheDataDir_.reset();
fileNames_.clear();
fileIds_.clear();
fileSizes_.clear();
fileIds().testingReset();
}

void CacheFuzzer::read(uint32_t fileIdx, int32_t fragmentIdx) {
// TODO: Faulty injection.
const auto [offset, length] = fileFragments_[fileIdx][fragmentIdx];
auto stream = inputs_[fileIdx]->read(offset, length, LogType::TEST);
const void* buffer;
Expand Down
Loading