Merge pull request #5262 from sajjadrahnama/fault_injection_activation

Fault Injection Active/Deactivation
This commit is contained in:
Jingyu Zhou 2021-07-26 11:57:49 -07:00 committed by GitHub
commit 87bdeada98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 34 additions and 7 deletions

View File

@ -46,10 +46,11 @@
#include "fdbrpc/Replication.h"
#include "fdbrpc/ReplicationUtils.h"
#include "fdbrpc/AsyncFileWriteChecker.h"
#include "flow/FaultInjection.h"
#include "flow/actorcompiler.h" // This must be the last #include.
bool simulator_should_inject_fault(const char* context, const char* file, int line, int error_code) {
if (!g_network->isSimulated())
if (!g_network->isSimulated() || !faultInjectionActivated)
return false;
auto p = g_simulator.getCurrentProcess();

View File

@ -41,6 +41,7 @@
#include "flow/ProtocolVersion.h"
#include "flow/network.h"
#include "flow/TypeTraits.h"
#include "flow/FaultInjection.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#undef max
@ -1647,7 +1648,7 @@ void SimulationConfig::setTss(const TestConfig& testConfig) {
tssCount =
std::max(0, std::min(tssCount, (db.usableRegions * (machine_count / datacenters) - replication_type) / 2));
if (!testConfig.config.present() && tssCount > 0) {
if (!testConfig.config.present() && tssCount > 0 && faultInjectionActivated) {
std::string confStr = format("tss_count:=%d tss_storage_engine:=%d", tssCount, db.storageServerStoreType);
set_config(confStr);
double tssRandom = deterministicRandom()->random01();

View File

@ -68,6 +68,7 @@
#include "flow/TLSConfig.actor.h"
#include "flow/Tracing.h"
#include "flow/UnitTest.h"
#include "flow/FaultInjection.h"
#if defined(__linux__) || defined(__FreeBSD__)
#include <execinfo.h>
@ -92,7 +93,7 @@ enum {
OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_BUILD_FLAGS, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR,
OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_UNITTESTPARAM, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE,
OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE,
OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB,
OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB, OPT_FAULT_INJECTION,
};
CSimpleOpt::SOption g_rgOptions[] = {
@ -177,6 +178,8 @@ CSimpleOpt::SOption g_rgOptions[] = {
{ OPT_BLOB_CREDENTIAL_FILE, "--blob_credential_file", SO_REQ_SEP },
{ OPT_CONFIG_PATH, "--config_path", SO_REQ_SEP },
{ OPT_USE_TEST_CONFIG_DB, "--use_test_config_db", SO_NONE },
{ OPT_FAULT_INJECTION, "-fi", SO_REQ_SEP },
{ OPT_FAULT_INJECTION, "--fault_injection", SO_REQ_SEP },
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
@ -646,6 +649,7 @@ static void printUsage(const char* name, bool devhelp) {
"--kvfile FILE",
"Input file (SQLite database file) for use by the 'kvfilegeneratesums' and 'kvfileintegritycheck' roles.");
printOptionUsage("-b [on,off], --buggify [on,off]", " Sets Buggify system state, defaults to `off'.");
printOptionUsage("-fi [on,off], --fault_injection [on,off]", " Sets fault injection, defaults to `on'.");
printOptionUsage("--crash", "Crash on serious errors instead of continuing.");
printOptionUsage("-N NETWORKIMPL, --network NETWORKIMPL",
" Select network implementation, `net2' (default),"
@ -960,7 +964,7 @@ struct CLIOptions {
8LL << 30; // Nice to maintain the same default value for memLimit and SERVER_KNOBS->SERVER_MEM_LIMIT and
// SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT
uint64_t storageMemLimit = 1LL << 30;
bool buggifyEnabled = false, restarting = false;
bool buggifyEnabled = false, faultInjectionEnabled = true, restarting = false;
Optional<Standalone<StringRef>> zoneId;
Optional<Standalone<StringRef>> dcId;
ProcessClass processClass = ProcessClass(ProcessClass::UnsetClass, ProcessClass::CommandLineSource);
@ -1382,6 +1386,17 @@ private:
flushAndExit(FDB_EXIT_ERROR);
}
break;
case OPT_FAULT_INJECTION:
if (!strcmp(args.OptionArg(), "on"))
faultInjectionEnabled = true;
else if (!strcmp(args.OptionArg(), "off"))
faultInjectionEnabled = false;
else {
fprintf(stderr, "ERROR: Unknown fault injection state `%s'\n", args.OptionArg());
printHelpTeaser(argv[0]);
flushAndExit(FDB_EXIT_ERROR);
}
break;
case OPT_CRASHONERROR:
g_crashOnError = true;
break;
@ -1638,6 +1653,7 @@ int main(int argc, char* argv[]) {
setThreadLocalDeterministicRandomSeed(opts.randomSeed);
enableBuggify(opts.buggifyEnabled, BuggifyType::General);
enableFaultInjection(opts.faultInjectionEnabled);
IKnobCollection::setGlobalKnobCollection(IKnobCollection::Type::SERVER,
Randomize::True,
@ -1795,6 +1811,7 @@ int main(int argc, char* argv[]) {
.detail("CommandLine", opts.commandLine)
.setMaxFieldLength(0)
.detail("BuggifyEnabled", opts.buggifyEnabled)
.detail("FaultInjectionEnabled", opts.faultInjectionEnabled)
.detail("MemoryLimit", opts.memLimit)
.trackLatest("ProgramStart");

View File

@ -25,6 +25,7 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "flow/FaultInjection.h"
#include "flow/actorcompiler.h" // This must be the last #include.
static std::set<int> const& normalAttritionErrors() {
@ -78,8 +79,8 @@ struct MachineAttritionWorkload : TestWorkload {
std::vector<LocalityData> machines;
MachineAttritionWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
enabled =
!clientId && g_network->isSimulated(); // only do this on the "first" client, and only when in simulation
// only do this on the "first" client, and only when in simulation and only when fault injection is enabled
enabled = !clientId && g_network->isSimulated() && faultInjectionActivated;
machinesToKill = getOption(options, LiteralStringRef("machinesToKill"), 2);
machinesToLeave = getOption(options, LiteralStringRef("machinesToLeave"), 1);
workersToKill = getOption(options, LiteralStringRef("workersToKill"), 2);

View File

@ -20,4 +20,9 @@
#include "flow/FaultInjection.h"
bool (*should_inject_fault)(const char* context, const char* file, int line, int error_code) = 0;
bool (*should_inject_fault)(const char* context, const char* file, int line, int error_code) = 0;
bool faultInjectionActivated = true;
void enableFaultInjection(bool enabled) {
faultInjectionActivated = enabled;
}

View File

@ -32,6 +32,8 @@
#define SHOULD_INJECT_FAULT(context) (should_inject_fault && should_inject_fault(context, __FILE__, __LINE__, 0))
extern bool (*should_inject_fault)(const char* context, const char* file, int line, int error_code);
extern bool faultInjectionActivated;
extern void enableFaultInjection(bool enabled); // Enable fault injection called from fdbserver actor main function
#else
#define INJECT_FAULT(error_type, context)
#endif