Merge commit 'eeee15f524ff769248495d70efa0501170fb5ea2' into correctness-fix

This commit is contained in:
Steve Atherton 2021-04-14 11:50:49 -07:00
commit de236894cb
4 changed files with 631 additions and 344 deletions

View File

@ -2477,7 +2477,6 @@ ACTOR Future<Version> watchValue(Future<Version> version,
cx->invalidateCache(key);
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID));
} else if (e.code() == error_code_watch_cancelled || e.code() == error_code_process_behind) {
TEST(e.code() == error_code_watch_cancelled); // Too many watches on the storage server, poll for changes instead
TEST(e.code() == error_code_watch_cancelled); // Too many watches on storage server, poll for changes
TEST(e.code() == error_code_process_behind); // The storage servers are all behind
wait(delay(CLIENT_KNOBS->WATCH_POLLING_TIME, info.taskID));

View File

@ -63,7 +63,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
default:
return ProcessClass::NeverAssign;
}
case ProcessClass::CommitProxy:
case ProcessClass::CommitProxy: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit
switch (_class) {
case ProcessClass::CommitProxyClass:
return ProcessClass::BestFit;
@ -71,10 +71,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
return ProcessClass::GoodFit;
case ProcessClass::UnsetClass:
return ProcessClass::UnsetFit;
case ProcessClass::GrvProxyClass:
return ProcessClass::OkayFit;
case ProcessClass::ResolutionClass:
return ProcessClass::OkayFit;
case ProcessClass::TransactionClass:
return ProcessClass::OkayFit;
case ProcessClass::CoordinatorClass:
@ -84,7 +80,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
default:
return ProcessClass::WorstFit;
}
case ProcessClass::GrvProxy:
case ProcessClass::GrvProxy: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit
switch (_class) {
case ProcessClass::GrvProxyClass:
return ProcessClass::BestFit;
@ -92,10 +88,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
return ProcessClass::GoodFit;
case ProcessClass::UnsetClass:
return ProcessClass::UnsetFit;
case ProcessClass::CommitProxyClass:
return ProcessClass::OkayFit;
case ProcessClass::ResolutionClass:
return ProcessClass::OkayFit;
case ProcessClass::TransactionClass:
return ProcessClass::OkayFit;
case ProcessClass::CoordinatorClass:
@ -105,7 +97,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
default:
return ProcessClass::WorstFit;
}
case ProcessClass::Master:
case ProcessClass::Master: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit
switch (_class) {
case ProcessClass::MasterClass:
return ProcessClass::BestFit;
@ -113,7 +105,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
return ProcessClass::GoodFit;
case ProcessClass::UnsetClass:
return ProcessClass::UnsetFit;
case ProcessClass::ResolutionClass:
case ProcessClass::TransactionClass:
return ProcessClass::OkayFit;
case ProcessClass::CoordinatorClass:
case ProcessClass::TesterClass:
@ -122,7 +114,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
default:
return ProcessClass::WorstFit;
}
case ProcessClass::Resolver:
case ProcessClass::Resolver: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit
switch (_class) {
case ProcessClass::ResolutionClass:
return ProcessClass::BestFit;
@ -147,8 +139,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
return ProcessClass::GoodFit;
case ProcessClass::UnsetClass:
return ProcessClass::UnsetFit;
case ProcessClass::ResolutionClass:
return ProcessClass::OkayFit;
case ProcessClass::TransactionClass:
return ProcessClass::OkayFit;
case ProcessClass::CoordinatorClass:
@ -167,8 +157,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
return ProcessClass::GoodFit;
case ProcessClass::UnsetClass:
return ProcessClass::UnsetFit;
case ProcessClass::ResolutionClass:
return ProcessClass::OkayFit;
case ProcessClass::TransactionClass:
return ProcessClass::OkayFit;
case ProcessClass::CoordinatorClass:

File diff suppressed because it is too large Load Diff

View File

@ -264,6 +264,40 @@ def process_traces(basedir, testname, path, out, aggregationPolicy, symbolicateB
parser.writeObject({'CMakeSEED': str(cmake_seed)})
return res
class RestartTestPolicy:
def __init__(self, name, old_binary, new_binary):
# Default is to use the same binary for the restart test, unless constraints are satisfied.
self._first_binary = new_binary
self._second_binary = new_binary
if old_binary is None:
_logger.info("No old binary provided")
old_binary_version_raw = subprocess.check_output([old_binary, '--version']).decode('utf-8')
match = re.match('FoundationDB.*\(v([0-9]+\.[0-9]+\.[0-9]+)\)', old_binary_version_raw)
assert match, old_binary_version_raw
old_binary_version = tuple(map(int, match.group(1).split('.')))
match = re.match('.*/restarting/from_([0-9]+\.[0-9]+\.[0-9]+)/', name)
if match: # upgrading _from_
lower_bound = tuple(map(int, match.group(1).split('.')))
if old_binary_version >= lower_bound:
self._first_binary = old_binary
_logger.info("Using old binary as first binary: {} >= {}".format(old_binary_version, lower_bound))
else:
_logger.info("Using new binary as first binary: {} < {}".format(old_binary_version, lower_bound))
match = re.match('.*/restarting/to_([0-9]+\.[0-9]+\.[0-9]+)/', name)
if match: # downgrading _to_
lower_bound = tuple(map(int, match.group(1).split('.')))
if old_binary_version >= lower_bound:
self._second_binary = old_binary
_logger.info("Using old binary as second binary: {} >= {}".format(old_binary_version, lower_bound))
else:
_logger.info("Using new binary as second binary: {} < {}".format(old_binary_version, lower_bound))
def first_binary(self):
return self._first_binary
def second_binary(self):
return self._second_binary
def run_simulation_test(basedir, options):
fdbserver = os.path.join(basedir, 'bin', 'fdbserver')
pargs = [fdbserver,
@ -298,14 +332,19 @@ def run_simulation_test(basedir, options):
os.mkdir(wd)
return_codes = {} # {command: return_code}
first = True
restart_test_policy = None
if len(options.testfile) > 1:
restart_test_policy = RestartTestPolicy(options.testfile[0], options.old_binary, fdbserver)
for testfile in options.testfile:
tmp = list(pargs)
# old_binary is not under test, so don't run under valgrind
valgrind_args = []
if first and options.old_binary is not None and len(options.testfile) > 1:
_logger.info("Run old binary at {}".format(options.old_binary))
tmp[0] = options.old_binary
elif options.use_valgrind:
if restart_test_policy is not None:
if first:
tmp[0] = restart_test_policy.first_binary()
else:
tmp[0] = restart_test_policy.second_binary()
# old_binary is not under test, so don't run under valgrind
if options.use_valgrind and tmp[0] == fdbserver:
valgrind_args = ['valgrind', '--error-exitcode=99', '--']
if not first:
tmp.append('-R')