Merge pull request #5262 from sajjadrahnama/fault_injection_activation
Fault Injection Active/Deactivation
This commit is contained in:
commit
87bdeada98
|
@ -46,10 +46,11 @@
|
|||
#include "fdbrpc/Replication.h"
|
||||
#include "fdbrpc/ReplicationUtils.h"
|
||||
#include "fdbrpc/AsyncFileWriteChecker.h"
|
||||
#include "flow/FaultInjection.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
bool simulator_should_inject_fault(const char* context, const char* file, int line, int error_code) {
|
||||
if (!g_network->isSimulated())
|
||||
if (!g_network->isSimulated() || !faultInjectionActivated)
|
||||
return false;
|
||||
|
||||
auto p = g_simulator.getCurrentProcess();
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "flow/ProtocolVersion.h"
|
||||
#include "flow/network.h"
|
||||
#include "flow/TypeTraits.h"
|
||||
#include "flow/FaultInjection.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
#undef max
|
||||
|
@ -1647,7 +1648,7 @@ void SimulationConfig::setTss(const TestConfig& testConfig) {
|
|||
tssCount =
|
||||
std::max(0, std::min(tssCount, (db.usableRegions * (machine_count / datacenters) - replication_type) / 2));
|
||||
|
||||
if (!testConfig.config.present() && tssCount > 0) {
|
||||
if (!testConfig.config.present() && tssCount > 0 && faultInjectionActivated) {
|
||||
std::string confStr = format("tss_count:=%d tss_storage_engine:=%d", tssCount, db.storageServerStoreType);
|
||||
set_config(confStr);
|
||||
double tssRandom = deterministicRandom()->random01();
|
||||
|
|
|
@ -68,6 +68,7 @@
|
|||
#include "flow/TLSConfig.actor.h"
|
||||
#include "flow/Tracing.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "flow/FaultInjection.h"
|
||||
|
||||
#if defined(__linux__) || defined(__FreeBSD__)
|
||||
#include <execinfo.h>
|
||||
|
@ -92,7 +93,7 @@ enum {
|
|||
OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_BUILD_FLAGS, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR,
|
||||
OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_UNITTESTPARAM, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE,
|
||||
OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE,
|
||||
OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB,
|
||||
OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB, OPT_FAULT_INJECTION,
|
||||
};
|
||||
|
||||
CSimpleOpt::SOption g_rgOptions[] = {
|
||||
|
@ -177,6 +178,8 @@ CSimpleOpt::SOption g_rgOptions[] = {
|
|||
{ OPT_BLOB_CREDENTIAL_FILE, "--blob_credential_file", SO_REQ_SEP },
|
||||
{ OPT_CONFIG_PATH, "--config_path", SO_REQ_SEP },
|
||||
{ OPT_USE_TEST_CONFIG_DB, "--use_test_config_db", SO_NONE },
|
||||
{ OPT_FAULT_INJECTION, "-fi", SO_REQ_SEP },
|
||||
{ OPT_FAULT_INJECTION, "--fault_injection", SO_REQ_SEP },
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
TLS_OPTION_FLAGS
|
||||
|
@ -646,6 +649,7 @@ static void printUsage(const char* name, bool devhelp) {
|
|||
"--kvfile FILE",
|
||||
"Input file (SQLite database file) for use by the 'kvfilegeneratesums' and 'kvfileintegritycheck' roles.");
|
||||
printOptionUsage("-b [on,off], --buggify [on,off]", " Sets Buggify system state, defaults to `off'.");
|
||||
printOptionUsage("-fi [on,off], --fault_injection [on,off]", " Sets fault injection, defaults to `on'.");
|
||||
printOptionUsage("--crash", "Crash on serious errors instead of continuing.");
|
||||
printOptionUsage("-N NETWORKIMPL, --network NETWORKIMPL",
|
||||
" Select network implementation, `net2' (default),"
|
||||
|
@ -960,7 +964,7 @@ struct CLIOptions {
|
|||
8LL << 30; // Nice to maintain the same default value for memLimit and SERVER_KNOBS->SERVER_MEM_LIMIT and
|
||||
// SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT
|
||||
uint64_t storageMemLimit = 1LL << 30;
|
||||
bool buggifyEnabled = false, restarting = false;
|
||||
bool buggifyEnabled = false, faultInjectionEnabled = true, restarting = false;
|
||||
Optional<Standalone<StringRef>> zoneId;
|
||||
Optional<Standalone<StringRef>> dcId;
|
||||
ProcessClass processClass = ProcessClass(ProcessClass::UnsetClass, ProcessClass::CommandLineSource);
|
||||
|
@ -1382,6 +1386,17 @@ private:
|
|||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
break;
|
||||
case OPT_FAULT_INJECTION:
|
||||
if (!strcmp(args.OptionArg(), "on"))
|
||||
faultInjectionEnabled = true;
|
||||
else if (!strcmp(args.OptionArg(), "off"))
|
||||
faultInjectionEnabled = false;
|
||||
else {
|
||||
fprintf(stderr, "ERROR: Unknown fault injection state `%s'\n", args.OptionArg());
|
||||
printHelpTeaser(argv[0]);
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
break;
|
||||
case OPT_CRASHONERROR:
|
||||
g_crashOnError = true;
|
||||
break;
|
||||
|
@ -1638,6 +1653,7 @@ int main(int argc, char* argv[]) {
|
|||
setThreadLocalDeterministicRandomSeed(opts.randomSeed);
|
||||
|
||||
enableBuggify(opts.buggifyEnabled, BuggifyType::General);
|
||||
enableFaultInjection(opts.faultInjectionEnabled);
|
||||
|
||||
IKnobCollection::setGlobalKnobCollection(IKnobCollection::Type::SERVER,
|
||||
Randomize::True,
|
||||
|
@ -1795,6 +1811,7 @@ int main(int argc, char* argv[]) {
|
|||
.detail("CommandLine", opts.commandLine)
|
||||
.setMaxFieldLength(0)
|
||||
.detail("BuggifyEnabled", opts.buggifyEnabled)
|
||||
.detail("FaultInjectionEnabled", opts.faultInjectionEnabled)
|
||||
.detail("MemoryLimit", opts.memLimit)
|
||||
.trackLatest("ProgramStart");
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "flow/FaultInjection.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
static std::set<int> const& normalAttritionErrors() {
|
||||
|
@ -78,8 +79,8 @@ struct MachineAttritionWorkload : TestWorkload {
|
|||
std::vector<LocalityData> machines;
|
||||
|
||||
MachineAttritionWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
enabled =
|
||||
!clientId && g_network->isSimulated(); // only do this on the "first" client, and only when in simulation
|
||||
// only do this on the "first" client, and only when in simulation and only when fault injection is enabled
|
||||
enabled = !clientId && g_network->isSimulated() && faultInjectionActivated;
|
||||
machinesToKill = getOption(options, LiteralStringRef("machinesToKill"), 2);
|
||||
machinesToLeave = getOption(options, LiteralStringRef("machinesToLeave"), 1);
|
||||
workersToKill = getOption(options, LiteralStringRef("workersToKill"), 2);
|
||||
|
|
|
@ -20,4 +20,9 @@
|
|||
|
||||
#include "flow/FaultInjection.h"
|
||||
|
||||
bool (*should_inject_fault)(const char* context, const char* file, int line, int error_code) = 0;
|
||||
bool (*should_inject_fault)(const char* context, const char* file, int line, int error_code) = 0;
|
||||
bool faultInjectionActivated = true;
|
||||
|
||||
void enableFaultInjection(bool enabled) {
|
||||
faultInjectionActivated = enabled;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#define SHOULD_INJECT_FAULT(context) (should_inject_fault && should_inject_fault(context, __FILE__, __LINE__, 0))
|
||||
|
||||
extern bool (*should_inject_fault)(const char* context, const char* file, int line, int error_code);
|
||||
extern bool faultInjectionActivated;
|
||||
extern void enableFaultInjection(bool enabled); // Enable fault injection called from fdbserver actor main function
|
||||
#else
|
||||
#define INJECT_FAULT(error_type, context)
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue