Skip to content

Commit

Permalink
test: Add fault injection in cache fuzzer
Browse files Browse the repository at this point in the history
Differential Revision: D67662693
  • Loading branch information
zacw7 authored and facebook-github-bot committed Dec 27, 2024
1 parent 9e418b1 commit e0d2783
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 11 deletions.
6 changes: 0 additions & 6 deletions velox/common/caching/SsdCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ SsdCache::SsdCache(const Config& config)
numShards_(config.numShards),
groupStats_(std::make_unique<FileGroupStats>()),
executor_(config.executor) {
// Make sure the given path of Ssd files has the prefix for local file system.
// Local file system would be derived based on the prefix.
VELOX_CHECK(
filePrefix_.find('/') == 0,
"Ssd path '{}' does not start with '/' that points to local file system.",
filePrefix_);
VELOX_CHECK_NOT_NULL(executor_);

VELOX_SSD_CACHE_LOG(INFO) << "SSD cache config: " << config.toString();
Expand Down
4 changes: 4 additions & 0 deletions velox/docs/develop/testing/cache-fuzzer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,8 @@ Here is a full list of supported command line arguments.

* ``–-num_restarts``: Number of cache restarts in one iteration.

* ``–-enable_file_faulty_injection``: Enable fault injection on read and write
operations for cache-related files. When enabled, write operation will fail
1 out of 100 times, and read operation will fail 5 out of 100 times.

If running from CLion IDE, add ``--logtostderr=1`` to see the full output.
43 changes: 38 additions & 5 deletions velox/exec/fuzzer/CacheFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@

#include <folly/executors/IOThreadPoolExecutor.h>
#include <gtest/gtest.h>
#include <regex>
#include "velox/common/caching/FileIds.h"
#include "velox/common/caching/SsdCache.h"
#include "velox/common/file/FileSystems.h"
#include "velox/common/file/tests/FaultyFileSystem.h"
#include "velox/common/memory/Memory.h"
#include "velox/common/memory/MmapAllocator.h"
#include "velox/dwio/common/CachedBufferedInput.h"
Expand Down Expand Up @@ -76,8 +78,15 @@ DEFINE_int64(

DEFINE_int32(num_restarts, 3, "Number of cache restarts in one iteration.");

DEFINE_bool(
enable_file_faulty_injection,
true,
"Enable fault injection on read and write operations for cache-related files. When enabled, "
"write operation will fail 1 out of 100 times, and read operation will fail 5 out of 100 times.");

using namespace facebook::velox::cache;
using namespace facebook::velox::dwio::common;
using namespace facebook::velox::tests::utils;

namespace facebook::velox::exec::test {
namespace {
Expand All @@ -90,6 +99,9 @@ class CacheFuzzer {

private:
static constexpr int32_t kRandomized = -1;
static constexpr int32_t kFileWriteErrorRate = 1;
static constexpr int32_t kFileReadErrorRate = 5;
inline static const std::regex kDataFileNamePattern{".*/file_\\d+"};

void seed(size_t seed) {
currentSeed_ = seed;
Expand Down Expand Up @@ -175,10 +187,12 @@ bool isDone(size_t i, T startTime) {
CacheFuzzer::CacheFuzzer(size_t initialSeed) {
seed(initialSeed);
filesystems::registerLocalFileSystem();
registerFaultyFileSystem();
}

void CacheFuzzer::initSourceDataFiles() {
sourceDataDir_ = exec::test::TempDirectoryPath::create();
sourceDataDir_ =
exec::test::TempDirectoryPath::create(FLAGS_enable_file_faulty_injection);
fs_ = filesystems::getFileSystem(sourceDataDir_->getPath(), nullptr);

// Create files with random sizes.
Expand Down Expand Up @@ -211,6 +225,25 @@ void CacheFuzzer::initSourceDataFiles() {
fileSizes_.emplace_back(fileSize);
}
}

if (FLAGS_enable_file_faulty_injection) {
faultyFileSystem()->setFileInjectionHook([&](FaultFileOperation* op) {
if (std::regex_match(op->path, kDataFileNamePattern)) {
// Skip errors on input data files.
return;
}
std::random_device rd;
boost::random::uniform_int_distribution<int> dist(1, 100);
if (op->type == FaultFileOperation::Type::kWrite &&
dist(rd) <= kFileWriteErrorRate) {
VELOX_FAIL("Inject hook write failure");
}
if (op->type == FaultFileOperation::Type::kReadv &&
dist(rd) <= kFileReadErrorRate) {
VELOX_FAIL("Inject hook read failure");
}
});
}
}

int64_t CacheFuzzer::getMemoryCacheBytes(bool restartCache) {
Expand Down Expand Up @@ -252,7 +285,6 @@ int32_t CacheFuzzer::getSsdCacheShards(bool restartCache) {
lastNumSsdCacheShards_ = FLAGS_num_ssd_cache_shards;
}
}

return lastNumSsdCacheShards_;
}

Expand Down Expand Up @@ -334,9 +366,10 @@ void CacheFuzzer::initializeCache(bool restartCache) {
{});

LOG(INFO) << fmt::format(
"Initialized cache with {} memory space, {} SSD cache",
"Initialized cache with {} memory space, {} SSD cache, {} file faulty injection",
succinctBytes(memoryCacheBytes),
ssdCacheBytes == 0 ? "with" : "without");
ssdCacheBytes == 0 ? "with" : "without",
FLAGS_enable_file_faulty_injection ? "with" : "without");
}

void CacheFuzzer::initializeInputs() {
Expand Down Expand Up @@ -418,6 +451,7 @@ void CacheFuzzer::resetSourceDataFiles() {
if (fs_->exists(sourceDataDirPath)) {
fs_->rmdir(sourceDataDirPath);
}
faultyFileSystem()->clearFileFaultInjections();
fs_.reset();
sourceDataDir_.reset();
fileNames_.clear();
Expand All @@ -427,7 +461,6 @@ void CacheFuzzer::resetSourceDataFiles() {
}

void CacheFuzzer::read(uint32_t fileIdx, int32_t fragmentIdx) {
// TODO: Faulty injection.
const auto [offset, length] = fileFragments_[fileIdx][fragmentIdx];
auto stream = inputs_[fileIdx]->read(offset, length, LogType::TEST);
const void* buffer;
Expand Down

0 comments on commit e0d2783

Please sign in to comment.