Merge branch 'main' of https://github.com/apple/foundationdb into feature/main/listTenant

This commit is contained in:
Xiaoxi Wang 2023-02-06 12:15:40 -08:00
commit 18a3de3594
17 changed files with 514 additions and 185 deletions

View File

@ -70,12 +70,15 @@ void ApiWorkload::start() {
schedule([this]() {
// 1. Clear data
clearData([this]() {
// 2. Workload setup
setup([this]() {
// 3. Populate initial data
populateData([this]() {
// 4. Generate random workload
runTests();
// 2. Create tenants if necessary.
createTenantsIfNecessary([this] {
// 3. Workload setup.
setup([this]() {
// 4. Populate initial data
populateData([this]() {
// 5. Generate random workload
runTests();
});
});
});
});
@ -259,9 +262,17 @@ void ApiWorkload::createTenants(TTaskFct cont) {
[this, cont]() { schedule(cont); });
}
void ApiWorkload::createTenantsIfNecessary(TTaskFct cont) {
if (tenants.size() > 0) {
createTenants(cont);
} else {
schedule(cont);
}
}
void ApiWorkload::populateData(TTaskFct cont) {
if (tenants.size() > 0) {
createTenants([this, cont]() { populateTenantData(cont, std::make_optional(0)); });
populateTenantData(cont, std::make_optional(0));
} else {
populateTenantData(cont, {});
}

View File

@ -141,6 +141,7 @@ private:
void populateDataTx(TTaskFct cont, std::optional<int> tenantId);
void populateTenantData(TTaskFct cont, std::optional<int> tenantId);
void createTenants(TTaskFct cont);
void createTenantsIfNecessary(TTaskFct cont);
void clearTenantData(TTaskFct cont, std::optional<int> tenantId);

View File

@ -40,7 +40,6 @@ public:
}
private:
// FIXME: add tenant support for DB operations
// FIXME: use other new blob granule apis!
enum OpType {
OP_INSERT,
@ -58,15 +57,8 @@ private:
void setup(TTaskFct cont) override { setupBlobGranules(cont); }
// FIXME: get rid of readSuccess* in this test now that setup is verify()-ing
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
std::unordered_set<std::optional<int>> tenantsWithReadSuccess;
std::set<fdb::ByteString> validatedFiles;
inline void setReadSuccess(std::optional<int> tenantId) { tenantsWithReadSuccess.insert(tenantId); }
inline bool seenReadSuccess(std::optional<int> tenantId) { return tenantsWithReadSuccess.count(tenantId); }
void debugOp(std::string opName, fdb::KeyRange keyRange, std::optional<int> tenantId, std::string message) {
if (BG_API_DEBUG_VERBOSE) {
info(fmt::format("{0}: [{1} - {2}) {3}: {4}",
@ -99,30 +91,15 @@ private:
granuleContext);
auto out = fdb::Result::KeyValueRefArray{};
fdb::Error err = res.getKeyValueArrayNothrow(out);
if (err.code() == error_code_blob_granule_transaction_too_old) {
bool previousSuccess = seenReadSuccess(tenantId);
if (previousSuccess) {
error("Read bg too old after read success!\n");
} else {
info("Read bg too old\n");
}
ASSERT(!previousSuccess);
*tooOld = true;
ctx->done();
} else if (err.code() != error_code_success) {
ASSERT(err.code() != error_code_blob_granule_transaction_too_old);
if (err.code() != error_code_success) {
ctx->onError(err);
} else {
auto resCopy = copyKeyValueArray(out);
auto& [resVector, out_more] = resCopy;
ASSERT(!out_more);
results.get()->assign(resVector.begin(), resVector.end());
bool previousSuccess = seenReadSuccess(tenantId);
if (!previousSuccess) {
info(fmt::format("Read {0}: first success\n", debugTenantStr(tenantId)));
setReadSuccess(tenantId);
} else {
debugOp("Read", keyRange, tenantId, "complete");
}
debugOp("Read", keyRange, tenantId, "complete");
ctx->done();
}
},
@ -183,19 +160,13 @@ private:
},
[this, keyRange, tenantId, results, cont]() {
debugOp("GetGranules", keyRange, tenantId, fmt::format("complete with {0} granules", results->size()));
this->validateRanges(results, keyRange, seenReadSuccess(tenantId));
this->validateRanges(results, keyRange);
schedule(cont);
},
getTenant(tenantId));
}
void randomSummarizeOp(TTaskFct cont, std::optional<int> tenantId) {
if (!seenReadSuccess(tenantId)) {
// tester can't handle this throwing bg_txn_too_old, so just don't call it unless we have already seen a
// read success
schedule(cont);
return;
}
fdb::KeyRange keyRange = randomNonEmptyKeyRange();
auto results = std::make_shared<std::vector<fdb::GranuleSummary>>();
@ -231,33 +202,29 @@ private:
ranges->push_back((*results)[i].keyRange);
}
this->validateRanges(ranges, keyRange, true);
this->validateRanges(ranges, keyRange);
schedule(cont);
},
getTenant(tenantId));
}
void validateRanges(std::shared_ptr<std::vector<fdb::KeyRange>> results,
fdb::KeyRange keyRange,
bool shouldBeRanges) {
if (shouldBeRanges) {
if (results->size() == 0) {
error(fmt::format("ValidateRanges: [{0} - {1}): No ranges returned!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey)));
}
ASSERT(results->size() > 0);
if (results->front().beginKey > keyRange.beginKey || results->back().endKey < keyRange.endKey) {
error(fmt::format("ValidateRanges: [{0} - {1}): Incomplete range(s) returned [{2} - {3})!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey),
fdb::toCharsRef(results->front().beginKey),
fdb::toCharsRef(results->back().endKey)));
}
ASSERT(results->front().beginKey <= keyRange.beginKey);
ASSERT(results->back().endKey >= keyRange.endKey);
void validateRanges(std::shared_ptr<std::vector<fdb::KeyRange>> results, fdb::KeyRange keyRange) {
if (results->size() == 0) {
error(fmt::format("ValidateRanges: [{0} - {1}): No ranges returned!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey)));
}
ASSERT(results->size() > 0);
if (results->front().beginKey > keyRange.beginKey || results->back().endKey < keyRange.endKey) {
error(fmt::format("ValidateRanges: [{0} - {1}): Incomplete range(s) returned [{2} - {3})!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey),
fdb::toCharsRef(results->front().beginKey),
fdb::toCharsRef(results->back().endKey)));
}
ASSERT(results->front().beginKey <= keyRange.beginKey);
ASSERT(results->back().endKey >= keyRange.endKey);
for (int i = 0; i < results->size(); i++) {
// no empty or inverted ranges
if ((*results)[i].beginKey >= (*results)[i].endKey) {
@ -293,7 +260,6 @@ private:
execOperation(
[keyRange, results](auto ctx) {
// FIXME: add tenant!
fdb::Future f =
ctx->dbOps()->listBlobbifiedRanges(keyRange.beginKey, keyRange.endKey, 1000).eraseType();
ctx->continueAfter(f, [ctx, f, results]() {
@ -303,7 +269,7 @@ private:
},
[this, keyRange, tenantId, results, cont]() {
debugOp("GetBlobRanges", keyRange, tenantId, fmt::format("complete with {0} ranges", results->size()));
this->validateRanges(results, keyRange, seenReadSuccess(tenantId));
this->validateRanges(results, keyRange);
schedule(cont);
},
getTenant(tenantId),
@ -319,7 +285,6 @@ private:
auto verifyVersion = std::make_shared<int64_t>(-1);
execOperation(
[keyRange, verifyVersion](auto ctx) {
// FIXME: add tenant!!
fdb::Future f = ctx->dbOps()
->verifyBlobRange(keyRange.beginKey, keyRange.endKey, -2 /* latest version*/)
.eraseType();
@ -330,13 +295,6 @@ private:
},
[this, keyRange, tenantId, verifyVersion, cont]() {
debugOp("Verify", keyRange, tenantId, fmt::format("Complete @ {0}", *verifyVersion));
bool previousSuccess = seenReadSuccess(tenantId);
if (*verifyVersion == -1) {
ASSERT(!previousSuccess);
} else if (!previousSuccess) {
info(fmt::format("Verify {0}: first success\n", debugTenantStr(tenantId)));
setReadSuccess(tenantId);
}
schedule(cont);
},
getTenant(tenantId),
@ -475,11 +433,6 @@ private:
std::optional<int> tenantId,
int64_t readVersion) {
ASSERT(!results.empty());
ASSERT(results.front().keyRange.beginKey <= keyRange.beginKey);
ASSERT(keyRange.endKey <= results.back().keyRange.endKey);
for (int i = 0; i < results.size() - 1; i++) {
ASSERT(results[i].keyRange.endKey == results[i + 1].keyRange.beginKey);
}
if (tenantId) {
// FIXME: support tenants!!
@ -487,6 +440,12 @@ private:
return;
}
ASSERT(results.front().keyRange.beginKey <= keyRange.beginKey);
ASSERT(keyRange.endKey <= results.back().keyRange.endKey);
for (int i = 0; i < results.size() - 1; i++) {
ASSERT(results[i].keyRange.endKey == results[i + 1].keyRange.beginKey);
}
TesterGranuleContext testerContext(ctx->getBGBasePath());
fdb::native::FDBReadBlobGranuleContext bgCtx = createGranuleContext(&testerContext);
for (int i = 0; i < results.size(); i++) {
@ -495,9 +454,6 @@ private:
}
void randomReadDescription(TTaskFct cont, std::optional<int> tenantId) {
if (!seenReadSuccess(tenantId)) {
return;
}
fdb::KeyRange keyRange = randomNonEmptyKeyRange();
auto results = std::make_shared<std::vector<fdb::GranuleDescription>>();
auto readVersionOut = std::make_shared<int64_t>();

View File

@ -0,0 +1,24 @@
[[test]]
title = 'Blob Granule API Tenant Correctness Multi Threaded'
multiThreaded = true
buggify = true
minFdbThreads = 2
maxFdbThreads = 8
minClients = 1
maxClients = 8
minTenants = 1
maxTenants = 5
[[server]]
blob_granules_enabled = true
[[test.workload]]
name = 'ApiBlobGranuleCorrectness'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
# TODO - increase initialSize and/or buggify down BG_SNAPSHOT_FILE_TARGET_BYTES to force multiple granules
initialSize = 100
numRandomOperations = 100

View File

@ -60,7 +60,7 @@ class StatFetcher:
class TestPicker:
def __init__(self, test_dir: Path):
if not test_dir.exists():
raise RuntimeError('{} is neither a directory nor a file'.format(test_dir))
raise RuntimeError("{} is neither a directory nor a file".format(test_dir))
self.include_files_regex = re.compile(config.include_test_files)
self.exclude_files_regex = re.compile(config.exclude_test_files)
self.include_tests_regex = re.compile(config.include_test_classes)
@ -78,6 +78,7 @@ class TestPicker:
self.stat_fetcher = StatFetcher(self.tests)
else:
from test_harness.fdb import FDBStatFetcher
self.stat_fetcher = FDBStatFetcher(self.tests)
if config.stats is not None:
self.load_stats(config.stats)
@ -106,50 +107,60 @@ class TestPicker:
break
assert test_name is not None and test_desc is not None
self.stat_fetcher.add_run_time(test_name, run_time, out)
out.attributes['TotalTestTime'] = str(test_desc.total_runtime)
out.attributes['TestRunCount'] = str(test_desc.num_runs)
out.attributes["TotalTestTime"] = str(test_desc.total_runtime)
out.attributes["TestRunCount"] = str(test_desc.num_runs)
def dump_stats(self) -> str:
res = array.array('I')
res = array.array("I")
for _, spec in self.tests.items():
res.append(spec.total_runtime)
return base64.standard_b64encode(res.tobytes()).decode('utf-8')
return base64.standard_b64encode(res.tobytes()).decode("utf-8")
def fetch_stats(self):
self.stat_fetcher.read_stats()
def load_stats(self, serialized: str):
times = array.array('I')
times = array.array("I")
times.frombytes(base64.standard_b64decode(serialized))
assert len(times) == len(self.tests.items())
for idx, (_, spec) in enumerate(self.tests.items()):
spec.total_runtime = times[idx]
def parse_txt(self, path: Path):
if self.include_files_regex.search(str(path)) is None or self.exclude_files_regex.search(str(path)) is not None:
if (
self.include_files_regex.search(str(path)) is None
or self.exclude_files_regex.search(str(path)) is not None
):
return
with path.open('r') as f:
with path.open("r") as f:
test_name: str | None = None
test_class: str | None = None
priority: float | None = None
for line in f:
line = line.strip()
kv = line.split('=')
kv = line.split("=")
if len(kv) != 2:
continue
kv[0] = kv[0].strip()
kv[1] = kv[1].strip(' \r\n\t\'"')
if kv[0] == 'testTitle' and test_name is None:
kv[1] = kv[1].strip(" \r\n\t'\"")
if kv[0] == "testTitle" and test_name is None:
test_name = kv[1]
if kv[0] == 'testClass' and test_class is None:
if kv[0] == "testClass" and test_class is None:
test_class = kv[1]
if kv[0] == 'testPriority' and priority is None:
if kv[0] == "testPriority" and priority is None:
try:
priority = float(kv[1])
except ValueError:
raise RuntimeError("Can't parse {} -- testPriority in {} should be set to a float".format(kv[1],
path))
if test_name is not None and test_class is not None and priority is not None:
raise RuntimeError(
"Can't parse {} -- testPriority in {} should be set to a float".format(
kv[1], path
)
)
if (
test_name is not None
and test_class is not None
and priority is not None
):
break
if test_name is None:
return
@ -157,8 +168,10 @@ class TestPicker:
test_class = test_name
if priority is None:
priority = 1.0
if self.include_tests_regex.search(test_class) is None \
or self.exclude_tests_regex.search(test_class) is not None:
if (
self.include_tests_regex.search(test_class) is None
or self.exclude_tests_regex.search(test_class) is not None
):
return
if test_class not in self.tests:
self.tests[test_class] = TestDescription(path, test_class, priority)
@ -173,12 +186,12 @@ class TestPicker:
# check whether we're looking at a restart test
if self.follow_test.match(test.name) is not None:
return
if test.suffix == '.txt' or test.suffix == '.toml':
if test.suffix == ".txt" or test.suffix == ".toml":
self.parse_txt(test)
@staticmethod
def list_restart_files(start_file: Path) -> List[Path]:
name = re.sub(r'-\d+.(txt|toml)', '', start_file.name)
name = re.sub(r"-\d+.(txt|toml)", "", start_file.name)
res: List[Path] = []
for test_file in start_file.parent.iterdir():
if test_file.name.startswith(name):
@ -209,12 +222,12 @@ class TestPicker:
class OldBinaries:
def __init__(self):
self.first_file_expr = re.compile(r'.*-1\.(txt|toml)')
self.first_file_expr = re.compile(r".*-1\.(txt|toml)")
self.old_binaries_path: Path = config.old_binaries_path
self.binaries: OrderedDict[Version, Path] = collections.OrderedDict()
if not self.old_binaries_path.exists() or not self.old_binaries_path.is_dir():
return
exec_pattern = re.compile(r'fdbserver-\d+\.\d+\.\d+(\.exe)?')
exec_pattern = re.compile(r"fdbserver-\d+\.\d+\.\d+(\.exe)?")
for file in self.old_binaries_path.iterdir():
if not file.is_file() or not os.access(file, os.X_OK):
continue
@ -222,9 +235,9 @@ class OldBinaries:
self._add_file(file)
def _add_file(self, file: Path):
version_str = file.name.split('-')[1]
if version_str.endswith('.exe'):
version_str = version_str[0:-len('.exe')]
version_str = file.name.split("-")[1]
if version_str.endswith(".exe"):
version_str = version_str[0 : -len(".exe")]
ver = Version.parse(version_str)
self.binaries[ver] = file
@ -232,21 +245,21 @@ class OldBinaries:
if len(self.binaries) == 0:
return config.binary
max_version = Version.max_version()
min_version = Version.parse('5.0.0')
min_version = Version.parse("5.0.0")
dirs = test_file.parent.parts
if 'restarting' not in dirs:
if "restarting" not in dirs:
return config.binary
version_expr = dirs[-1].split('_')
version_expr = dirs[-1].split("_")
first_file = self.first_file_expr.match(test_file.name) is not None
if first_file and version_expr[0] == 'to':
if first_file and version_expr[0] == "to":
# downgrade test -- first binary should be current one
return config.binary
if not first_file and version_expr[0] == 'from':
if not first_file and version_expr[0] == "from":
# upgrade test -- we only return an old version for the first test file
return config.binary
if version_expr[0] == 'from' or version_expr[0] == 'to':
if version_expr[0] == "from" or version_expr[0] == "to":
min_version = Version.parse(version_expr[1])
if len(version_expr) == 4 and version_expr[2] == 'until':
if len(version_expr) == 4 and version_expr[2] == "until":
max_version = Version.parse(version_expr[3])
candidates: List[Path] = []
for ver, binary in self.binaries.items():
@ -259,13 +272,13 @@ class OldBinaries:
def is_restarting_test(test_file: Path):
for p in test_file.parts:
if p == 'restarting':
if p == "restarting":
return True
return False
def is_no_sim(test_file: Path):
return test_file.parts[-2] == 'noSim'
return test_file.parts[-2] == "noSim"
class ResourceMonitor(threading.Thread):
@ -291,9 +304,19 @@ class ResourceMonitor(threading.Thread):
class TestRun:
def __init__(self, binary: Path, test_file: Path, random_seed: int, uid: uuid.UUID,
restarting: bool = False, test_determinism: bool = False, buggify_enabled: bool = False,
stats: str | None = None, expected_unseed: int | None = None, will_restart: bool = False):
def __init__(
self,
binary: Path,
test_file: Path,
random_seed: int,
uid: uuid.UUID,
restarting: bool = False,
test_determinism: bool = False,
buggify_enabled: bool = False,
stats: str | None = None,
expected_unseed: int | None = None,
will_restart: bool = False,
):
self.binary = binary
self.test_file = test_file
self.random_seed = random_seed
@ -313,23 +336,31 @@ class TestRun:
self.temp_path = config.run_dir / str(self.uid)
# state for the run
self.retryable_error: bool = False
self.summary: Summary = Summary(binary, uid=self.uid, stats=self.stats, expected_unseed=self.expected_unseed,
will_restart=will_restart, long_running=config.long_running)
self.summary: Summary = Summary(
binary,
uid=self.uid,
stats=self.stats,
expected_unseed=self.expected_unseed,
will_restart=will_restart,
long_running=config.long_running,
)
self.run_time: int = 0
self.success = self.run()
def log_test_plan(self, out: SummaryTree):
test_plan: SummaryTree = SummaryTree('TestPlan')
test_plan.attributes['TestUID'] = str(self.uid)
test_plan.attributes['RandomSeed'] = str(self.random_seed)
test_plan.attributes['TestFile'] = str(self.test_file)
test_plan.attributes['Buggify'] = '1' if self.buggify_enabled else '0'
test_plan.attributes['FaultInjectionEnabled'] = '1' if self.fault_injection_enabled else '0'
test_plan.attributes['DeterminismCheck'] = '1' if self.test_determinism else '0'
test_plan: SummaryTree = SummaryTree("TestPlan")
test_plan.attributes["TestUID"] = str(self.uid)
test_plan.attributes["RandomSeed"] = str(self.random_seed)
test_plan.attributes["TestFile"] = str(self.test_file)
test_plan.attributes["Buggify"] = "1" if self.buggify_enabled else "0"
test_plan.attributes["FaultInjectionEnabled"] = (
"1" if self.fault_injection_enabled else "0"
)
test_plan.attributes["DeterminismCheck"] = "1" if self.test_determinism else "0"
out.append(test_plan)
def delete_simdir(self):
shutil.rmtree(self.temp_path / Path('simfdb'))
shutil.rmtree(self.temp_path / Path("simfdb"))
def run(self):
command: List[str] = []
@ -341,47 +372,68 @@ class TestRun:
# the test take longer. Also old binaries weren't built with
# USE_VALGRIND=ON, and we have seen false positives with valgrind in
# such binaries.
command.append('valgrind')
valgrind_file = self.temp_path / Path('valgrind-{}.xml'.format(self.random_seed))
dbg_path = os.getenv('FDB_VALGRIND_DBGPATH')
command.append("valgrind")
valgrind_file = self.temp_path / Path(
"valgrind-{}.xml".format(self.random_seed)
)
dbg_path = os.getenv("FDB_VALGRIND_DBGPATH")
if dbg_path is not None:
command.append('--extra-debuginfo-path={}'.format(dbg_path))
command += ['--xml=yes', '--xml-file={}'.format(valgrind_file.absolute()), '-q']
command += [str(self.binary.absolute()),
'-r', 'test' if is_no_sim(self.test_file) else 'simulation',
'-f', str(self.test_file),
'-s', str(self.random_seed)]
command.append("--extra-debuginfo-path={}".format(dbg_path))
command += [
"--xml=yes",
"--xml-file={}".format(valgrind_file.absolute()),
"-q",
]
command += [
str(self.binary.absolute()),
"-r",
"test" if is_no_sim(self.test_file) else "simulation",
"-f",
str(self.test_file),
"-s",
str(self.random_seed),
]
if self.trace_format is not None:
command += ['--trace_format', self.trace_format]
command += ["--trace_format", self.trace_format]
if self.use_tls_plugin:
command += ['--tls_plugin', str(config.tls_plugin_path)]
command += ["--tls_plugin", str(config.tls_plugin_path)]
env["FDB_TLS_PLUGIN"] = str(config.tls_plugin_path)
if config.disable_kaio:
command += ['--knob-disable-posix-kernel-aio=1']
if Version.of_binary(self.binary) >= '7.1.0':
command += ['-fi', 'on' if self.fault_injection_enabled else 'off']
command += ["--knob-disable-posix-kernel-aio=1"]
if Version.of_binary(self.binary) >= "7.1.0":
command += ["-fi", "on" if self.fault_injection_enabled else "off"]
if self.restarting:
command.append('--restarting')
command.append("--restarting")
if self.buggify_enabled:
command += ['-b', 'on']
command += ["-b", "on"]
if config.crash_on_error:
command.append('--crash')
command.append("--crash")
if config.long_running:
# disable simulation speedup
command += ['--knob-sim-speedup-after-seconds=36000']
command += ["--knob-sim-speedup-after-seconds=36000"]
# disable traceTooManyLines Error MAX_TRACE_LINES
command += ['--knob-max-trace-lines=1000000000']
command += ["--knob-max-trace-lines=1000000000"]
self.temp_path.mkdir(parents=True, exist_ok=True)
# self.log_test_plan(out)
resources = ResourceMonitor()
resources.start()
process = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, cwd=self.temp_path,
text=True, env=env)
process = subprocess.Popen(
command,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
cwd=self.temp_path,
text=True,
env=env,
)
did_kill = False
# No timeout for long running tests
timeout = 20 * config.kill_seconds if self.use_valgrind else (None if config.long_running else config.kill_seconds)
timeout = (
20 * config.kill_seconds
if self.use_valgrind
else (None if config.long_running else config.kill_seconds)
)
err_out: str
try:
_, err_out = process.communicate(timeout=timeout)
@ -398,7 +450,7 @@ class TestRun:
self.summary.was_killed = did_kill
self.summary.valgrind_out_file = valgrind_file
self.summary.error_out = err_out
self.summary.summarize(self.temp_path, ' '.join(command))
self.summary.summarize(self.temp_path, " ".join(command))
return self.summary.ok()
@ -407,18 +459,18 @@ def decorate_summary(out: SummaryTree, test_file: Path, seed: int, buggify: bool
tests are then hard to reproduce (they can be reproduced through TestHarness but
require the user to run in the joshua docker container). To account for this we
will write the necessary information into the attributes if it is missing."""
if 'TestFile' not in out.attributes:
out.attributes['TestFile'] = str(test_file)
if 'RandomSeed' not in out.attributes:
out.attributes['RandomSeed'] = str(seed)
if 'BuggifyEnabled' not in out.attributes:
out.attributes['BuggifyEnabled'] = '1' if buggify else '0'
if "TestFile" not in out.attributes:
out.attributes["TestFile"] = str(test_file)
if "RandomSeed" not in out.attributes:
out.attributes["RandomSeed"] = str(seed)
if "BuggifyEnabled" not in out.attributes:
out.attributes["BuggifyEnabled"] = "1" if buggify else "0"
class TestRunner:
def __init__(self):
self.uid = uuid.uuid4()
self.test_path: Path = Path('tests')
self.test_path: Path = Path("tests")
self.cluster_file: str | None = None
self.fdb_app_dir: str | None = None
self.binary_chooser = OldBinaries()
@ -426,32 +478,45 @@ class TestRunner:
def backup_sim_dir(self, seed: int):
temp_dir = config.run_dir / str(self.uid)
src_dir = temp_dir / 'simfdb'
src_dir = temp_dir / "simfdb"
assert src_dir.is_dir()
dest_dir = temp_dir / 'simfdb.{}'.format(seed)
dest_dir = temp_dir / "simfdb.{}".format(seed)
assert not dest_dir.exists()
shutil.copytree(src_dir, dest_dir)
def restore_sim_dir(self, seed: int):
temp_dir = config.run_dir / str(self.uid)
src_dir = temp_dir / 'simfdb.{}'.format(seed)
src_dir = temp_dir / "simfdb.{}".format(seed)
assert src_dir.exists()
dest_dir = temp_dir / 'simfdb'
dest_dir = temp_dir / "simfdb"
shutil.rmtree(dest_dir)
shutil.move(src_dir, dest_dir)
def run_tests(self, test_files: List[Path], seed: int, test_picker: TestPicker) -> bool:
def run_tests(
self, test_files: List[Path], seed: int, test_picker: TestPicker
) -> bool:
result: bool = True
for count, file in enumerate(test_files):
will_restart = count + 1 < len(test_files)
binary = self.binary_chooser.choose_binary(file)
unseed_check = not is_no_sim(file) and config.random.random() < config.unseed_check_ratio
unseed_check = (
not is_no_sim(file)
and config.random.random() < config.unseed_check_ratio
)
buggify_enabled: bool = config.random.random() < config.buggify_on_ratio
if unseed_check and count != 0:
# for restarting tests we will need to restore the sim2 after the first run
# for restarting tests we will need to restore the sim2 directory after the first run
self.backup_sim_dir(seed + count - 1)
run = TestRun(binary, file.absolute(), seed + count, self.uid, restarting=count != 0,
stats=test_picker.dump_stats(), will_restart=will_restart, buggify_enabled=buggify_enabled)
run = TestRun(
binary,
file.absolute(),
seed + count,
self.uid,
restarting=count != 0,
stats=test_picker.dump_stats(),
will_restart=will_restart,
buggify_enabled=buggify_enabled,
)
result = result and run.success
test_picker.add_time(test_files[0], run.run_time, run.summary.out)
decorate_summary(run.summary.out, file, seed + count, run.buggify_enabled)
@ -463,11 +528,21 @@ class TestRunner:
if unseed_check and run.summary.unseed is not None:
if count != 0:
self.restore_sim_dir(seed + count - 1)
run2 = TestRun(binary, file.absolute(), seed + count, self.uid, restarting=count != 0,
stats=test_picker.dump_stats(), expected_unseed=run.summary.unseed,
will_restart=will_restart, buggify_enabled=buggify_enabled)
run2 = TestRun(
binary,
file.absolute(),
seed + count,
self.uid,
restarting=count != 0,
stats=test_picker.dump_stats(),
expected_unseed=run.summary.unseed,
will_restart=will_restart,
buggify_enabled=buggify_enabled,
)
test_picker.add_time(file, run2.run_time, run.summary.out)
decorate_summary(run2.summary.out, file, seed + count, run.buggify_enabled)
decorate_summary(
run2.summary.out, file, seed + count, run.buggify_enabled
)
run2.summary.out.dump(sys.stdout)
result = result and run2.success
if not result:
@ -475,7 +550,11 @@ class TestRunner:
return result
def run(self) -> bool:
seed = config.random_seed if config.random_seed is not None else config.random.randint(0, 2 ** 32 - 1)
seed = (
config.random_seed
if config.random_seed is not None
else config.random.randint(0, 2**32 - 1)
)
test_files = self.test_picker.choose_test()
success = self.run_tests(test_files, seed, self.test_picker)
if config.clean_up:

View File

@ -70,7 +70,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( MAX_MESSAGE_SIZE, std::max<int>(LOG_SYSTEM_PUSHED_DATA_BLOCK_SIZE, 1e5 + 2e4 + 1) + 8 ); // VALUE_SIZE_LIMIT + SYSTEM_KEY_SIZE_LIMIT + 9 bytes (4 bytes for length, 4 bytes for sequence number, and 1 byte for mutation type)
init( TLOG_MESSAGE_BLOCK_BYTES, 10e6 );
init( TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR, double(TLOG_MESSAGE_BLOCK_BYTES) / (TLOG_MESSAGE_BLOCK_BYTES - MAX_MESSAGE_SIZE) ); //1.0121466709838096006362758832473
init( PEEK_TRACKER_EXPIRATION_TIME, 600 ); if( randomize && BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = deterministicRandom()->coinflip() ? 0.1 : 120;
init( PEEK_TRACKER_EXPIRATION_TIME, 600 ); if( randomize && BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = 120; // Cannot be buggified lower without changing the following assert in LogSystemPeekCursor.actor.cpp: ASSERT_WE_THINK(e.code() == error_code_operation_obsolete || SERVER_KNOBS->PEEK_TRACKER_EXPIRATION_TIME < 10);
init( PEEK_USING_STREAMING, false ); if( randomize && isSimulated && BUGGIFY ) PEEK_USING_STREAMING = true;
init( PARALLEL_GET_MORE_REQUESTS, 32 ); if( randomize && BUGGIFY ) PARALLEL_GET_MORE_REQUESTS = 2;
init( MULTI_CURSOR_PRE_FETCH_LIMIT, 10 );

View File

@ -914,8 +914,7 @@ ACTOR Future<BlobFileIndex> writeDeltaFile(Reference<BlobWorkerData> bwData,
}
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
if (BUGGIFY_WITH_PROB(0.01)) {
@ -1158,8 +1157,7 @@ ACTOR Future<BlobFileIndex> writeSnapshot(Reference<BlobWorkerData> bwData,
}
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
// FIXME: change when we implement multiplexing
@ -1213,8 +1211,7 @@ ACTOR Future<BlobFileIndex> dumpInitialSnapshotFromFDB(Reference<BlobWorkerData>
DEBUG_KEY_RANGE("BlobWorkerFDBSnapshot", readVersion, metadata->keyRange, bwData->id);
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
// initial snapshot is committed in fdb, we can pop the change feed up to this version
@ -1633,8 +1630,7 @@ ACTOR Future<Void> reevaluateInitialSplit(Reference<BlobWorkerData> bwData,
reply.proposedSplitKey = proposedSplitKey;
bwData->currentManagerStatusStream.get().send(reply);
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
// if a new manager appears, also tell it about this granule being splittable, or retry after a certain
// amount of time of not hearing back
@ -4377,8 +4373,7 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
}
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
return info;

View File

@ -203,6 +203,7 @@ struct ResolutionRequestBuilder {
ASSERT(transactionNumberInBatch >= 0 && transactionNumberInBatch < 32768);
bool isTXNStateTransaction = false;
DisabledTraceEvent("AddTransaction", self->dbgid).detail("TenantMode", (int)self->getTenantMode());
bool needParseTenantId = !trRequest.tenantInfo.hasTenant() && self->getTenantMode() == TenantMode::REQUIRED;
VectorRef<int64_t> tenantIds;
for (auto& m : trIn.mutations) {
@ -3223,7 +3224,16 @@ ACTOR Future<Void> updateLocalDbInfo(Reference<AsyncVar<ServerDBInfo> const> in,
// only update the db info if this is the current CP, or before we received first one including current CP.
// Several db infos at the beginning just contain the provisional CP
if (isIncluded || !firstValidDbInfo) {
out->set(in->get());
DisabledTraceEvent("UpdateLocalDbInfo", myInterface.id())
.detail("Provisional", myInterface.provisional)
.detail("Included", isIncluded)
.detail("FirstValid", firstValidDbInfo)
.detail("ReceivedRC", in->get().recoveryCount)
.detail("RecoveryCount", recoveryCount)
.detail("TenantMode", (int)in->get().client.tenantMode);
if (in->get().recoveryCount >= out->get().recoveryCount) {
out->set(in->get());
}
}
wait(in->onChange());

View File

@ -286,8 +286,8 @@ if(WITH_PYTHON)
TEST_FILES restarting/from_6.3.13_until_7.2.0/DrUpgradeRestart-1.txt
restarting/from_6.3.13_until_7.2.0/DrUpgradeRestart-2.txt)
add_fdb_test(
TEST_FILES restarting/from_7.0.0_until_7.2.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.0.0_until_7.2.0/UpgradeAndBackupRestore-2.toml)
TEST_FILES restarting/from_7.0.0_until_7.1.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.0.0_until_7.1.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/ConfigureTestRestart-1.toml
restarting/from_7.1.0_until_7.2.0/ConfigureTestRestart-2.toml)
@ -309,6 +309,9 @@ if(WITH_PYTHON)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/ConfigureStorageMigrationTestRestart-1.toml
restarting/from_7.1.0_until_7.2.0/ConfigureStorageMigrationTestRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.1.0_until_7.2.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/VersionVectorDisableRestart-1.toml
restarting/from_7.1.0_until_7.2.0/VersionVectorDisableRestart-2.toml)
@ -331,14 +334,17 @@ if(WITH_PYTHON)
TEST_FILES restarting/from_7.2.0/DrUpgradeRestart-1.toml
restarting/from_7.2.0/DrUpgradeRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.2.4/UpgradeAndBackupRestore-1.toml
restarting/from_7.2.4/UpgradeAndBackupRestore-2.toml)
TEST_FILES restarting/from_7.2.4_until_7.3.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.2.4_until_7.3.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/ConfigureTestRestart-1.toml
restarting/from_7.3.0/ConfigureTestRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/ConfigureStorageMigrationTestRestart-1.toml
restarting/from_7.3.0/ConfigureStorageMigrationTestRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.3.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/VersionVectorDisableRestart-1.toml
restarting/from_7.3.0/VersionVectorDisableRestart-2.toml)

View File

@ -0,0 +1,57 @@
storageEngineExcludeTypes=3
[[test]]
testTitle = 'SubmitBackup'
simBackupAgents= 'BackupToFile'
clearAfterTest = false
runConsistencyCheck=false
[[test.workload]]
testName = 'SubmitBackup'
delayFor = 0
stopWhenDone = false
[[test]]
testTitle = 'FirstCycleTest'
clearAfterTest=false
runConsistencyCheck = false
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'BeforeRestart'
[[test.workload]]
testName = 'RandomClogging'
testDuration = 90.0
[[test.workload]]
testName = 'Rollback'
meanDelay = 90.0
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test.workload]]
testName='Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test]]
testTitle = 'SaveDatabase'
clearAfterTest = false
[[test.workload]]
testName = 'SaveAndKill'
restartInfoLocation = 'simfdb/restartInfo.ini'
testDuration=30.0

View File

@ -0,0 +1,61 @@
[[test]]
testTitle = 'SecondCycleTest'
simBackupAgents = 'BackupToFile'
clearAfterTest=false
runConsistencyCheck=false
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'AfterRestart'
[[test.workload]]
testName = 'RandomClogging'
testDuration = 90.0
[[test.workload]]
testName = 'Rollback'
meanDelay = 90.0
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test]]
testTitle= 'RestoreBackup'
simBackupAgents = 'BackupToFile'
clearAfterTest=false
[[test.workload]]
testName = 'RestoreBackup'
tag = 'default'
[[test]]
testTitle = 'CheckCycles'
checkOnly=true
[[test.workload]]
testName = 'Cycle'
nodeCount=30000
keyPrefix = 'AfterRestart'
expectedRate=0
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
keyPrefix= 'BeforeRestart'
expectedRate = 0

View File

@ -0,0 +1,68 @@
[configuration]
storageEngineExcludeTypes = [3]
disableEncryption = true
[[test]]
testTitle = 'SubmitBackup'
simBackupAgents= 'BackupToFile'
clearAfterTest = false
runConsistencyCheck=false
disabledFailureInjectionWorkloads = 'Attrition'
[[test.workload]]
testName = 'SubmitBackup'
delayFor = 0
stopWhenDone = false
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 30.0
[[test]]
testTitle = 'FirstCycleTest'
clearAfterTest=false
runConsistencyCheck = false
disabledFailureInjectionWorkloads = 'Attrition'
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'BeforeRestart'
[[test.workload]]
testName = 'RandomClogging'
testDuration = 90.0
[[test.workload]]
testName = 'Rollback'
meanDelay = 90.0
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test.workload]]
testName='Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test]]
testTitle = 'SaveDatabase'
clearAfterTest = false
[[test.workload]]
testName = 'SaveAndKill'
restartInfoLocation = 'simfdb/restartInfo.ini'
testDuration=30.0

View File

@ -0,0 +1,61 @@
[[test]]
testTitle = 'SecondCycleTest'
simBackupAgents = 'BackupToFile'
clearAfterTest=false
runConsistencyCheck=false
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'AfterRestart'
[[test.workload]]
testName = 'RandomClogging'
testDuration = 90.0
[[test.workload]]
testName = 'Rollback'
meanDelay = 90.0
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test]]
testTitle= 'RestoreBackup'
simBackupAgents = 'BackupToFile'
clearAfterTest=false
[[test.workload]]
testName = 'RestoreBackup'
tag = 'default'
[[test]]
testTitle = 'CheckCycles'
checkOnly=true
[[test.workload]]
testName = 'Cycle'
nodeCount=30000
keyPrefix = 'AfterRestart'
expectedRate=0
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
keyPrefix= 'BeforeRestart'
expectedRate = 0