Merge remote-tracking branch 'origin/main' into authz-general-tls-and-integration-test
This commit is contained in:
commit
f7ef5edab4
|
@ -198,16 +198,17 @@ function(stage_correctness_package)
|
|||
set(src_dir "${src_dir}/")
|
||||
string(SUBSTRING ${src_dir} ${dir_len} -1 dest_dir)
|
||||
string(SUBSTRING ${file} ${dir_len} -1 rel_out_file)
|
||||
set(out_file ${STAGE_OUT_DIR}/${rel_out_file})
|
||||
set(out_file ${STAGE_OUT_DIR}/${rel_out_file})
|
||||
list(APPEND external_files ${out_file})
|
||||
add_custom_command(
|
||||
add_custom_command(
|
||||
OUTPUT ${out_file}
|
||||
DEPENDS ${file}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${file} ${out_file}
|
||||
COMMENT "Copying ${STAGE_CONTEXT} external file ${file}"
|
||||
)
|
||||
DEPENDS ${file}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${file} ${out_file}
|
||||
COMMENT "Copying ${STAGE_CONTEXT} external file ${file}"
|
||||
)
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
list(APPEND package_files ${STAGE_OUT_DIR}/bin/fdbserver
|
||||
${STAGE_OUT_DIR}/bin/coverage.fdbserver.xml
|
||||
${STAGE_OUT_DIR}/bin/coverage.fdbclient.xml
|
||||
|
@ -217,6 +218,7 @@ function(stage_correctness_package)
|
|||
${STAGE_OUT_DIR}/bin/TraceLogHelper.dll
|
||||
${STAGE_OUT_DIR}/CMakeCache.txt
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${package_files}
|
||||
DEPENDS ${CMAKE_BINARY_DIR}/CMakeCache.txt
|
||||
|
@ -238,6 +240,20 @@ function(stage_correctness_package)
|
|||
${STAGE_OUT_DIR}/bin
|
||||
COMMENT "Copying files for ${STAGE_CONTEXT} package"
|
||||
)
|
||||
|
||||
set(test_harness_dir "${CMAKE_SOURCE_DIR}/contrib/TestHarness2")
|
||||
file(GLOB_RECURSE test_harness2_files RELATIVE "${test_harness_dir}" CONFIGURE_DEPENDS "${test_harness_dir}/*.py")
|
||||
foreach(file IN LISTS test_harness2_files)
|
||||
set(src_file "${test_harness_dir}/${file}")
|
||||
set(out_file "${STAGE_OUT_DIR}/${file}")
|
||||
get_filename_component(dir "${out_file}" DIRECTORY)
|
||||
file(MAKE_DIRECTORY "${dir}")
|
||||
add_custom_command(OUTPUT ${out_file}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${src_file}" "${out_file}"
|
||||
DEPENDS "${src_file}")
|
||||
list(APPEND package_files "${out_file}")
|
||||
endforeach()
|
||||
|
||||
list(APPEND package_files ${test_files} ${external_files})
|
||||
if(STAGE_OUT_FILES)
|
||||
set(${STAGE_OUT_FILES} ${package_files} PARENT_SCOPE)
|
||||
|
@ -449,7 +465,11 @@ function(add_fdbclient_test)
|
|||
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT ${T_TEST_TIMEOUT})
|
||||
else()
|
||||
# default timeout
|
||||
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 300)
|
||||
if(USE_SANITIZER)
|
||||
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 1200)
|
||||
else()
|
||||
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 300)
|
||||
endif()
|
||||
endif()
|
||||
set_tests_properties("${T_NAME}" PROPERTIES ENVIRONMENT "${SANITIZER_OPTIONS}")
|
||||
endfunction()
|
||||
|
|
|
@ -4,4 +4,6 @@
|
|||
export ASAN_OPTIONS="detect_leaks=0"
|
||||
|
||||
OLDBINDIR="${OLDBINDIR:-/app/deploy/global_data/oldBinaries}"
|
||||
mono bin/TestHarness.exe joshua-run "${OLDBINDIR}" false
|
||||
#mono bin/TestHarness.exe joshua-run "${OLDBINDIR}" false
|
||||
|
||||
python3 -m test_harness.app -s ${JOSHUA_SEED} --old-binaries-path ${OLDBINDIR}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash -u
|
||||
for file in `find . -name 'trace*.xml'` ; do
|
||||
mono ./bin/TestHarness.exe summarize "${file}" summary.xml "" JoshuaTimeout true
|
||||
done
|
||||
|
||||
|
||||
python3 -m test_harness.timeout
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
#!/bin/sh
|
||||
OLDBINDIR="${OLDBINDIR:-/app/deploy/global_data/oldBinaries}"
|
||||
mono bin/TestHarness.exe joshua-run "${OLDBINDIR}" true
|
||||
python3 -m test_harness.app -s ${JOSHUA_SEED} --old-binaries-path ${OLDBINDIR} --use-valgrind
|
||||
|
|
|
@ -1,6 +1,2 @@
|
|||
#!/bin/bash -u
|
||||
for file in `find . -name 'trace*.xml'` ; do
|
||||
for valgrindFile in `find . -name 'valgrind*.xml'` ; do
|
||||
mono ./bin/TestHarness.exe summarize "${file}" summary.xml "${valgrindFile}" JoshuaTimeout true
|
||||
done
|
||||
done
|
||||
python3 -m test_harness.timeout --use-valgrind
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
/tmp/
|
||||
/venv
|
|
@ -0,0 +1,2 @@
|
|||
# Currently this file is left intentionally empty. It's main job for now is to indicate that this directory
|
||||
# should be used as a module.
|
|
@ -0,0 +1,25 @@
|
|||
import argparse
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from test_harness.config import config
|
||||
from test_harness.run import TestRunner
|
||||
from test_harness.summarize import SummaryTree
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
parser = argparse.ArgumentParser('TestHarness', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
config.build_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
config.extract_args(args)
|
||||
test_runner = TestRunner()
|
||||
if not test_runner.run():
|
||||
exit(1)
|
||||
except Exception as e:
|
||||
_, _, exc_traceback = sys.exc_info()
|
||||
error = SummaryTree('TestHarnessError')
|
||||
error.attributes['Severity'] = '40'
|
||||
error.attributes['ErrorMessage'] = str(e)
|
||||
error.attributes['Trace'] = repr(traceback.format_tb(exc_traceback))
|
||||
error.dump(sys.stdout)
|
||||
exit(1)
|
|
@ -0,0 +1,260 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import copy
|
||||
import os
|
||||
import random
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import List, Any, OrderedDict, Dict
|
||||
|
||||
|
||||
class BuggifyOptionValue(Enum):
|
||||
ON = 1
|
||||
OFF = 2
|
||||
RANDOM = 3
|
||||
|
||||
|
||||
class BuggifyOption:
|
||||
def __init__(self, val: str | None = None):
|
||||
self.value = BuggifyOptionValue.RANDOM
|
||||
if val is not None:
|
||||
v = val.lower()
|
||||
if v in ['on', '1', 'true']:
|
||||
self.value = BuggifyOptionValue.ON
|
||||
elif v in ['off', '0', 'false']:
|
||||
self.value = BuggifyOptionValue.OFF
|
||||
elif v in ['random', 'rnd', 'r']:
|
||||
pass
|
||||
else:
|
||||
assert False, 'Invalid value {} -- use true, false, or random'.format(v)
|
||||
|
||||
|
||||
class ConfigValue:
|
||||
def __init__(self, name: str, **kwargs):
|
||||
self.name = name
|
||||
self.value = None
|
||||
self.kwargs = kwargs
|
||||
if 'default' in self.kwargs:
|
||||
self.value = self.kwargs['default']
|
||||
|
||||
def get_arg_name(self) -> str:
|
||||
if 'long_name' in self.kwargs:
|
||||
return self.kwargs['long_name']
|
||||
else:
|
||||
return self.name
|
||||
|
||||
def add_to_args(self, parser: argparse.ArgumentParser):
|
||||
kwargs = copy.copy(self.kwargs)
|
||||
long_name = self.name
|
||||
short_name = None
|
||||
if 'long_name' in kwargs:
|
||||
long_name = kwargs['long_name']
|
||||
del kwargs['long_name']
|
||||
if 'short_name' in kwargs:
|
||||
short_name = kwargs['short_name']
|
||||
del kwargs['short_name']
|
||||
if 'action' in kwargs and kwargs['action'] in ['store_true', 'store_false']:
|
||||
del kwargs['type']
|
||||
long_name = long_name.replace('_', '-')
|
||||
if short_name is None:
|
||||
# line below is useful for debugging
|
||||
# print('add_argument(\'--{}\', [{{{}}}])'.format(long_name, ', '.join(['\'{}\': \'{}\''.format(k, v)
|
||||
# for k, v in kwargs.items()])))
|
||||
parser.add_argument('--{}'.format(long_name), **kwargs)
|
||||
else:
|
||||
# line below is useful for debugging
|
||||
# print('add_argument(\'-{}\', \'--{}\', [{{{}}}])'.format(short_name, long_name,
|
||||
# ', '.join(['\'{}\': \'{}\''.format(k, v)
|
||||
# for k, v in kwargs.items()])))
|
||||
parser.add_argument('-{}'.format(short_name), '--{}'.format(long_name), **kwargs)
|
||||
|
||||
def get_value(self, args: argparse.Namespace) -> tuple[str, Any]:
|
||||
return self.name, args.__getattribute__(self.get_arg_name())
|
||||
|
||||
|
||||
class Config:
|
||||
"""
|
||||
This is the central configuration class for test harness. The values in this class are exposed globally through
|
||||
a global variable test_harness.config.config. This class provides some "magic" to keep test harness flexible.
|
||||
Each parameter can further be configured using an `_args` member variable which is expected to be a dictionary.
|
||||
* The value of any variable can be set through the command line. For a variable named `variable_name` we will
|
||||
by default create a new command line option `--variable-name` (`_` is automatically changed to `-`). This
|
||||
default can be changed by setting the `'long_name'` property in the `_arg` dict.
|
||||
* In addition the user can also optionally set a short-name. This can be achieved by setting the `'short_name'`
|
||||
property in the `_arg` dictionary.
|
||||
* All additional properties in `_args` are passed to `argparse.add_argument`.
|
||||
* If the default of a variable is `None` the user should explicitly set the `'type'` property to an appropriate
|
||||
type.
|
||||
* In addition to command line flags, all configuration options can also be controlled through environment variables.
|
||||
By default, `variable-name` can be changed by setting the environment variable `TH_VARIABLE_NAME`. This default
|
||||
can be changed by setting the `'env_name'` property.
|
||||
* Test harness comes with multiple executables. Each of these should use the config facility. For this,
|
||||
`Config.build_arguments` should be called first with the `argparse` parser. Then `Config.extract_args` needs
|
||||
to be called with the result of `argparse.ArgumentParser.parse_args`. A sample example could look like this:
|
||||
```
|
||||
parser = argparse.ArgumentParser('TestHarness', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
config.build_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
config.extract_args(args)
|
||||
```
|
||||
* Changing the default value for all executables might not always be desirable. If it should be only changed for
|
||||
one executable Config.change_default should be used.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.random = random.Random()
|
||||
self.cluster_file: str | None = None
|
||||
self.cluster_file_args = {'short_name': 'C', 'type': str, 'help': 'Path to fdb cluster file', 'required': False,
|
||||
'env_name': 'JOSHUA_CLUSTER_FILE'}
|
||||
self.joshua_dir: str | None = None
|
||||
self.joshua_dir_args = {'type': str, 'help': 'Where to write FDB data to', 'required': False,
|
||||
'env_name': 'JOSHUA_APP_DIR'}
|
||||
self.stats: str | None = None
|
||||
self.stats_args = {'type': str, 'help': 'A base64 encoded list of statistics (used to reproduce runs)',
|
||||
'required': False}
|
||||
self.random_seed: int | None = None
|
||||
self.random_seed_args = {'type': int,
|
||||
'help': 'Force given seed given to fdbserver -- mostly useful for debugging',
|
||||
'required': False}
|
||||
self.kill_seconds: int = 30 * 60
|
||||
self.kill_seconds_args = {'help': 'Timeout for individual test'}
|
||||
self.buggify_on_ratio: float = 0.8
|
||||
self.buggify_on_ratio_args = {'help': 'Probability that buggify is turned on'}
|
||||
self.write_run_times = False
|
||||
self.write_run_times_args = {'help': 'Write back probabilities after each test run',
|
||||
'action': 'store_true'}
|
||||
self.unseed_check_ratio: float = 0.05
|
||||
self.unseed_check_ratio_args = {'help': 'Probability for doing determinism check'}
|
||||
self.test_dirs: List[str] = ['slow', 'fast', 'restarting', 'rare', 'noSim']
|
||||
self.test_dirs_args: dict = {'nargs': '*', 'help': 'test_directories to look for files in'}
|
||||
self.trace_format: str = 'json'
|
||||
self.trace_format_args = {'choices': ['json', 'xml'], 'help': 'What format fdb should produce'}
|
||||
self.crash_on_error: bool = True
|
||||
self.crash_on_error_args = {'long_name': 'no_crash', 'action': 'store_false',
|
||||
'help': 'Don\'t crash on first error'}
|
||||
self.max_warnings: int = 10
|
||||
self.max_warnings_args = {'short_name': 'W'}
|
||||
self.max_errors: int = 10
|
||||
self.max_errors_args = {'short_name': 'E'}
|
||||
self.old_binaries_path: Path = Path('/app/deploy/global_data/oldBinaries/')
|
||||
self.old_binaries_path_args = {'help': 'Path to the directory containing the old fdb binaries'}
|
||||
self.use_valgrind: bool = False
|
||||
self.use_valgrind_args = {'action': 'store_true'}
|
||||
self.buggify = BuggifyOption('random')
|
||||
self.buggify_args = {'short_name': 'b', 'choices': ['on', 'off', 'random']}
|
||||
self.pretty_print: bool = False
|
||||
self.pretty_print_args = {'short_name': 'P', 'action': 'store_true'}
|
||||
self.clean_up: bool = True
|
||||
self.clean_up_args = {'long_name': 'no_clean_up', 'action': 'store_false'}
|
||||
self.run_dir: Path = Path('tmp')
|
||||
self.joshua_seed: int = random.randint(0, 2 ** 32 - 1)
|
||||
self.joshua_seed_args = {'short_name': 's', 'help': 'A random seed', 'env_name': 'JOSHUA_SEED'}
|
||||
self.print_coverage = False
|
||||
self.print_coverage_args = {'action': 'store_true'}
|
||||
self.binary = Path('bin') / ('fdbserver.exe' if os.name == 'nt' else 'fdbserver')
|
||||
self.binary_args = {'help': 'Path to executable'}
|
||||
self.hit_per_runs_ratio: int = 20000
|
||||
self.hit_per_runs_ratio_args = {'help': 'Maximum test runs before each code probe hit at least once'}
|
||||
self.output_format: str = 'xml'
|
||||
self.output_format_args = {'short_name': 'O', 'choices': ['json', 'xml'],
|
||||
'help': 'What format TestHarness should produce'}
|
||||
self.include_test_files: str = r'.*'
|
||||
self.include_test_files_args = {'help': 'Only consider test files whose path match against the given regex'}
|
||||
self.exclude_test_files: str = r'.^'
|
||||
self.exclude_test_files_args = {'help': 'Don\'t consider test files whose path match against the given regex'}
|
||||
self.include_test_classes: str = r'.*'
|
||||
self.include_test_classes_args = {'help': 'Only consider tests whose names match against the given regex'}
|
||||
self.exclude_test_names: str = r'.^'
|
||||
self.exclude_test_names_args = {'help': 'Don\'t consider tests whose names match against the given regex'}
|
||||
self.details: bool = False
|
||||
self.details_args = {'help': 'Print detailed results', 'short_name': 'c', 'action': 'store_true'}
|
||||
self.success: bool = False
|
||||
self.success_args = {'help': 'Print successful results', 'action': 'store_true'}
|
||||
self.cov_include_files: str = r'.*'
|
||||
self.cov_include_files_args = {'help': 'Only consider coverage traces that originated in files matching regex'}
|
||||
self.cov_exclude_files: str = r'.^'
|
||||
self.cov_exclude_files_args = {'help': 'Ignore coverage traces that originated in files matching regex'}
|
||||
self.max_stderr_bytes: int = 1000
|
||||
self.write_stats: bool = True
|
||||
self.read_stats: bool = True
|
||||
self.reproduce_prefix: str | None = None
|
||||
self.reproduce_prefix_args = {'type': str, 'required': False,
|
||||
'help': 'When printing the results, prepend this string to the command'}
|
||||
self._env_names: Dict[str, str] = {}
|
||||
self._config_map = self._build_map()
|
||||
self._read_env()
|
||||
self.random.seed(self.joshua_seed, version=2)
|
||||
|
||||
def change_default(self, attr: str, default_val):
|
||||
assert attr in self._config_map, 'Unknown config attribute {}'.format(attr)
|
||||
self.__setattr__(attr, default_val)
|
||||
self._config_map[attr].kwargs['default'] = default_val
|
||||
|
||||
def _get_env_name(self, var_name: str) -> str:
|
||||
return self._env_names.get(var_name, 'TH_{}'.format(var_name.upper()))
|
||||
|
||||
def dump(self):
|
||||
for attr in dir(self):
|
||||
obj = getattr(self, attr)
|
||||
if attr == 'random' or attr.startswith('_') or callable(obj) or attr.endswith('_args'):
|
||||
continue
|
||||
print('config.{}: {} = {}'.format(attr, type(obj), obj))
|
||||
|
||||
def _build_map(self) -> OrderedDict[str, ConfigValue]:
|
||||
config_map: OrderedDict[str, ConfigValue] = collections.OrderedDict()
|
||||
for attr in dir(self):
|
||||
obj = getattr(self, attr)
|
||||
if attr == 'random' or attr.startswith('_') or callable(obj):
|
||||
continue
|
||||
if attr.endswith('_args'):
|
||||
name = attr[0:-len('_args')]
|
||||
assert name in config_map
|
||||
assert isinstance(obj, dict)
|
||||
for k, v in obj.items():
|
||||
if k == 'env_name':
|
||||
self._env_names[name] = v
|
||||
else:
|
||||
config_map[name].kwargs[k] = v
|
||||
else:
|
||||
# attribute_args has to be declared after the attribute
|
||||
assert attr not in config_map
|
||||
val_type = type(obj)
|
||||
kwargs = {'type': val_type, 'default': obj}
|
||||
config_map[attr] = ConfigValue(attr, **kwargs)
|
||||
return config_map
|
||||
|
||||
def _read_env(self):
|
||||
for attr in dir(self):
|
||||
obj = getattr(self, attr)
|
||||
if attr == 'random' or attr.startswith('_') or attr.endswith('_args') or callable(obj):
|
||||
continue
|
||||
env_name = self._get_env_name(attr)
|
||||
attr_type = self._config_map[attr].kwargs['type']
|
||||
assert type(None) != attr_type
|
||||
e = os.getenv(env_name)
|
||||
if e is not None:
|
||||
self.__setattr__(attr, attr_type(e))
|
||||
|
||||
def build_arguments(self, parser: argparse.ArgumentParser):
|
||||
for val in self._config_map.values():
|
||||
val.add_to_args(parser)
|
||||
|
||||
def extract_args(self, args: argparse.Namespace):
|
||||
for val in self._config_map.values():
|
||||
k, v = val.get_value(args)
|
||||
if v is not None:
|
||||
config.__setattr__(k, v)
|
||||
self.random.seed(self.joshua_seed, version=2)
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
if __name__ == '__main__':
|
||||
# test the config setup
|
||||
parser = argparse.ArgumentParser('TestHarness Config Tester',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
config.build_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
config.extract_args(args)
|
||||
config.dump()
|
|
@ -0,0 +1,144 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import OrderedDict, Tuple, List
|
||||
|
||||
import collections
|
||||
import fdb
|
||||
import fdb.tuple
|
||||
import struct
|
||||
|
||||
from test_harness.run import StatFetcher, TestDescription
|
||||
from test_harness.config import config
|
||||
from test_harness.summarize import SummaryTree, Coverage
|
||||
|
||||
# Before increasing this, make sure that all Joshua clusters (at Apple and Snowflake) have been upgraded.
|
||||
# This version needs to be changed if we either need newer features from FDB or the current API version is
|
||||
# getting retired.
|
||||
fdb.api_version(630)
|
||||
|
||||
|
||||
def str_to_tuple(s: str | None):
|
||||
if s is None:
|
||||
return s
|
||||
return tuple(s.split(','))
|
||||
|
||||
|
||||
fdb_db = None
|
||||
|
||||
|
||||
def open_db(cluster_file: str | None):
|
||||
global fdb_db
|
||||
if fdb_db is None:
|
||||
fdb_db = fdb.open(cluster_file)
|
||||
return fdb_db
|
||||
|
||||
|
||||
def chunkify(iterable, sz: int):
|
||||
res = []
|
||||
for item in iterable:
|
||||
res.append(item)
|
||||
if len(res) >= sz:
|
||||
yield res
|
||||
res = []
|
||||
if len(res) > 0:
|
||||
yield res
|
||||
|
||||
|
||||
@fdb.transactional
|
||||
def write_coverage_chunk(tr, path: Tuple[str, ...], metadata: Tuple[str, ...],
|
||||
coverage: List[Tuple[Coverage, bool]], initialized: bool) -> bool:
|
||||
cov_dir = fdb.directory.create_or_open(tr, path)
|
||||
if not initialized:
|
||||
metadata_dir = fdb.directory.create_or_open(tr, metadata)
|
||||
v = tr[metadata_dir['initialized']]
|
||||
initialized = v.present()
|
||||
for cov, covered in coverage:
|
||||
if not initialized or covered:
|
||||
tr.add(cov_dir.pack((cov.file, cov.line, cov.comment)), struct.pack('<I', 1 if covered else 0))
|
||||
return initialized
|
||||
|
||||
|
||||
@fdb.transactional
|
||||
def set_initialized(tr, metadata: Tuple[str, ...]):
|
||||
metadata_dir = fdb.directory.create_or_open(tr, metadata)
|
||||
tr[metadata_dir['initialized']] = fdb.tuple.pack((True,))
|
||||
|
||||
|
||||
def write_coverage(cluster_file: str | None, cov_path: Tuple[str, ...], metadata: Tuple[str, ...],
|
||||
coverage: OrderedDict[Coverage, bool]):
|
||||
db = open_db(cluster_file)
|
||||
assert config.joshua_dir is not None
|
||||
initialized: bool = False
|
||||
for chunk in chunkify(coverage.items(), 100):
|
||||
initialized = write_coverage_chunk(db, cov_path, metadata, chunk, initialized)
|
||||
if not initialized:
|
||||
set_initialized(db, metadata)
|
||||
|
||||
|
||||
@fdb.transactional
|
||||
def _read_coverage(tr, cov_path: Tuple[str, ...]) -> OrderedDict[Coverage, int]:
|
||||
res = collections.OrderedDict()
|
||||
cov_dir = fdb.directory.create_or_open(tr, cov_path)
|
||||
for k, v in tr[cov_dir.range()]:
|
||||
file, line, comment = cov_dir.unpack(k)
|
||||
count = struct.unpack('<I', v)[0]
|
||||
res[Coverage(file, line, comment)] = count
|
||||
return res
|
||||
|
||||
|
||||
def read_coverage(cluster_file: str | None, cov_path: Tuple[str, ...]) -> OrderedDict[Coverage, int]:
|
||||
db = open_db(cluster_file)
|
||||
return _read_coverage(db, cov_path)
|
||||
|
||||
|
||||
class TestStatistics:
|
||||
def __init__(self, runtime: int, run_count: int):
|
||||
self.runtime: int = runtime
|
||||
self.run_count: int = run_count
|
||||
|
||||
|
||||
class Statistics:
|
||||
def __init__(self, cluster_file: str | None, joshua_dir: Tuple[str, ...]):
|
||||
self.db = open_db(cluster_file)
|
||||
self.stats_dir = self.open_stats_dir(self.db, joshua_dir)
|
||||
self.stats: OrderedDict[str, TestStatistics] = self.read_stats_from_db(self.db)
|
||||
|
||||
@fdb.transactional
|
||||
def open_stats_dir(self, tr, app_dir: Tuple[str]):
|
||||
stats_dir = app_dir + ('runtime_stats',)
|
||||
return fdb.directory.create_or_open(tr, stats_dir)
|
||||
|
||||
@fdb.transactional
|
||||
def read_stats_from_db(self, tr) -> OrderedDict[str, TestStatistics]:
|
||||
result = collections.OrderedDict()
|
||||
for k, v in tr[self.stats_dir.range()]:
|
||||
test_name = self.stats_dir.unpack(k)[0]
|
||||
runtime, run_count = struct.unpack('<II', v)
|
||||
result[test_name] = TestStatistics(runtime, run_count)
|
||||
return result
|
||||
|
||||
@fdb.transactional
|
||||
def _write_runtime(self, tr, test_name: str, time: int) -> None:
|
||||
key = self.stats_dir.pack((test_name,))
|
||||
tr.add(key, struct.pack('<II', time, 1))
|
||||
|
||||
def write_runtime(self, test_name: str, time: int) -> None:
|
||||
assert self.db is not None
|
||||
self._write_runtime(self.db, test_name, time)
|
||||
|
||||
|
||||
class FDBStatFetcher(StatFetcher):
|
||||
def __init__(self, tests: OrderedDict[str, TestDescription],
|
||||
joshua_dir: Tuple[str] = str_to_tuple(config.joshua_dir)):
|
||||
super().__init__(tests)
|
||||
self.statistics = Statistics(config.cluster_file, joshua_dir)
|
||||
|
||||
def read_stats(self):
|
||||
for k, v in self.statistics.stats.items():
|
||||
if k in self.tests.keys():
|
||||
self.tests[k].total_runtime = v.runtime
|
||||
self.tests[k].num_runs = v.run_count
|
||||
|
||||
def add_run_time(self, test_name: str, runtime: int, out: SummaryTree):
|
||||
self.statistics.write_runtime(test_name, runtime)
|
||||
super().add_run_time(test_name, runtime, out)
|
|
@ -0,0 +1,161 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import io
|
||||
import sys
|
||||
import xml.sax
|
||||
import xml.sax.handler
|
||||
from pathlib import Path
|
||||
from typing import List, OrderedDict, Set
|
||||
|
||||
from joshua import joshua_model
|
||||
|
||||
import test_harness.run
|
||||
from test_harness.config import config
|
||||
from test_harness.summarize import SummaryTree
|
||||
|
||||
|
||||
class ToSummaryTree(xml.sax.handler.ContentHandler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.root: SummaryTree | None = None
|
||||
self.stack: List[SummaryTree] = []
|
||||
|
||||
def result(self) -> SummaryTree:
|
||||
assert len(self.stack) == 0 and self.root is not None, 'Parse Error'
|
||||
return self.root
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
new_child = SummaryTree(name)
|
||||
for k, v in attrs.items():
|
||||
new_child.attributes[k] = v
|
||||
self.stack.append(new_child)
|
||||
|
||||
def endElement(self, name):
|
||||
closed = self.stack.pop()
|
||||
assert closed.name == name
|
||||
if len(self.stack) == 0:
|
||||
self.root = closed
|
||||
else:
|
||||
self.stack[-1].children.append(closed)
|
||||
|
||||
|
||||
def _print_summary(summary: SummaryTree, commands: Set[str]):
|
||||
cmd = []
|
||||
if config.reproduce_prefix is not None:
|
||||
cmd.append(config.reproduce_prefix)
|
||||
cmd.append('fdbserver')
|
||||
if 'TestFile' in summary.attributes:
|
||||
file_name = summary.attributes['TestFile']
|
||||
role = 'test' if test_harness.run.is_no_sim(Path(file_name)) else 'simulation'
|
||||
cmd += ['-r', role, '-f', file_name]
|
||||
else:
|
||||
cmd += ['-r', 'simulation', '-f', '<ERROR>']
|
||||
if 'RandomSeed' in summary.attributes:
|
||||
cmd += ['-s', summary.attributes['RandomSeed']]
|
||||
else:
|
||||
cmd += ['-s', '<Error>']
|
||||
if 'BuggifyEnabled' in summary.attributes:
|
||||
arg = 'on'
|
||||
if summary.attributes['BuggifyEnabled'].lower() in ['0', 'off', 'false']:
|
||||
arg = 'off'
|
||||
cmd += ['-b', arg]
|
||||
else:
|
||||
cmd += ['b', '<ERROR>']
|
||||
cmd += ['--crash', '--trace_format', config.trace_format]
|
||||
key = ' '.join(cmd)
|
||||
count = 1
|
||||
while key in commands:
|
||||
key = '{} # {}'.format(' '.join(cmd), count)
|
||||
count += 1
|
||||
# we want the command as the first attribute
|
||||
attributes = {'Command': ' '.join(cmd)}
|
||||
for k, v in summary.attributes.items():
|
||||
if k == 'Errors':
|
||||
attributes['ErrorCount'] = v
|
||||
else:
|
||||
attributes[k] = v
|
||||
summary.attributes = attributes
|
||||
if config.details:
|
||||
key = str(len(commands))
|
||||
str_io = io.StringIO()
|
||||
summary.dump(str_io, prefix=(' ' if config.pretty_print else ''))
|
||||
if config.output_format == 'json':
|
||||
sys.stdout.write('{}"Test{}": {}'.format(' ' if config.pretty_print else '',
|
||||
key, str_io.getvalue()))
|
||||
else:
|
||||
sys.stdout.write(str_io.getvalue())
|
||||
if config.pretty_print:
|
||||
sys.stdout.write('\n' if config.output_format == 'xml' else ',\n')
|
||||
return key
|
||||
error_count = 0
|
||||
warning_count = 0
|
||||
small_summary = SummaryTree('Test')
|
||||
small_summary.attributes = attributes
|
||||
errors = SummaryTree('Errors')
|
||||
warnings = SummaryTree('Warnings')
|
||||
buggifies: OrderedDict[str, List[int]] = collections.OrderedDict()
|
||||
for child in summary.children:
|
||||
if 'Severity' in child.attributes and child.attributes['Severity'] == '40' and error_count < config.max_errors:
|
||||
error_count += 1
|
||||
errors.append(child)
|
||||
if 'Severity' in child.attributes and child.attributes[
|
||||
'Severity'] == '30' and warning_count < config.max_warnings:
|
||||
warning_count += 1
|
||||
warnings.append(child)
|
||||
if child.name == 'BuggifySection':
|
||||
file = child.attributes['File']
|
||||
line = int(child.attributes['Line'])
|
||||
buggifies.setdefault(file, []).append(line)
|
||||
buggifies_elem = SummaryTree('Buggifies')
|
||||
for file, lines in buggifies.items():
|
||||
lines.sort()
|
||||
if config.output_format == 'json':
|
||||
buggifies_elem.attributes[file] = ' '.join(str(line) for line in lines)
|
||||
else:
|
||||
child = SummaryTree('Buggify')
|
||||
child.attributes['File'] = file
|
||||
child.attributes['Lines'] = ' '.join(str(line) for line in lines)
|
||||
small_summary.append(child)
|
||||
small_summary.children.append(buggifies_elem)
|
||||
if len(errors.children) > 0:
|
||||
small_summary.children.append(errors)
|
||||
if len(warnings.children) > 0:
|
||||
small_summary.children.append(warnings)
|
||||
output = io.StringIO()
|
||||
small_summary.dump(output, prefix=(' ' if config.pretty_print else ''))
|
||||
if config.output_format == 'json':
|
||||
sys.stdout.write('{}"{}": {}'.format(' ' if config.pretty_print else '', key, output.getvalue().strip()))
|
||||
else:
|
||||
sys.stdout.write('{}{}'.format(' ' if config.pretty_print else '', output.getvalue().strip()))
|
||||
sys.stdout.write('\n' if config.output_format == 'xml' else ',\n')
|
||||
|
||||
|
||||
def print_errors(ensemble_id: str):
|
||||
joshua_model.open(config.cluster_file)
|
||||
properties = joshua_model.get_ensemble_properties(ensemble_id)
|
||||
compressed = properties["compressed"] if "compressed" in properties else False
|
||||
for rec in joshua_model.tail_results(ensemble_id, errors_only=(not config.success), compressed=compressed):
|
||||
if len(rec) == 5:
|
||||
version_stamp, result_code, host, seed, output = rec
|
||||
elif len(rec) == 4:
|
||||
version_stamp, result_code, host, output = rec
|
||||
seed = None
|
||||
elif len(rec) == 3:
|
||||
version_stamp, result_code, output = rec
|
||||
host = None
|
||||
seed = None
|
||||
elif len(rec) == 2:
|
||||
version_stamp, seed = rec
|
||||
output = str(joshua_model.fdb.tuple.unpack(seed)[0]) + "\n"
|
||||
result_code = None
|
||||
host = None
|
||||
seed = None
|
||||
else:
|
||||
raise Exception("Unknown result format")
|
||||
lines = output.splitlines()
|
||||
commands: Set[str] = set()
|
||||
for line in lines:
|
||||
summary = ToSummaryTree()
|
||||
xml.sax.parseString(line, summary)
|
||||
commands.add(_print_summary(summary.result(), commands))
|
|
@ -0,0 +1,144 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import test_harness.fdb
|
||||
|
||||
from typing import List, Tuple, OrderedDict
|
||||
from test_harness.summarize import SummaryTree, Coverage
|
||||
from test_harness.config import config
|
||||
from xml.sax.saxutils import quoteattr
|
||||
|
||||
|
||||
class GlobalStatistics:
|
||||
def __init__(self):
|
||||
self.total_probes_hit: int = 0
|
||||
self.total_cpu_time: int = 0
|
||||
self.total_test_runs: int = 0
|
||||
self.total_missed_probes: int = 0
|
||||
|
||||
|
||||
class EnsembleResults:
|
||||
def __init__(self, cluster_file: str | None, ensemble_id: str):
|
||||
self.global_statistics = GlobalStatistics()
|
||||
self.fdb_path = ('joshua', 'ensembles', 'results', 'application', ensemble_id)
|
||||
self.coverage_path = self.fdb_path + ('coverage',)
|
||||
self.statistics = test_harness.fdb.Statistics(cluster_file, self.fdb_path)
|
||||
coverage_dict: OrderedDict[Coverage, int] = test_harness.fdb.read_coverage(cluster_file, self.coverage_path)
|
||||
self.coverage: List[Tuple[Coverage, int]] = []
|
||||
self.min_coverage_hit: int | None = None
|
||||
self.ratio = self.global_statistics.total_test_runs / config.hit_per_runs_ratio
|
||||
for cov, count in coverage_dict.items():
|
||||
if re.search(config.cov_include_files, cov.file) is None:
|
||||
continue
|
||||
if re.search(config.cov_exclude_files, cov.file) is not None:
|
||||
continue
|
||||
self.global_statistics.total_probes_hit += count
|
||||
self.coverage.append((cov, count))
|
||||
if count <= self.ratio:
|
||||
self.global_statistics.total_missed_probes += 1
|
||||
if self.min_coverage_hit is None or self.min_coverage_hit > count:
|
||||
self.min_coverage_hit = count
|
||||
self.coverage.sort(key=lambda x: (x[1], x[0].file, x[0].line))
|
||||
self.stats: List[Tuple[str, int, int]] = []
|
||||
for k, v in self.statistics.stats.items():
|
||||
self.global_statistics.total_test_runs += v.run_count
|
||||
self.global_statistics.total_cpu_time += v.runtime
|
||||
self.stats.append((k, v.runtime, v.run_count))
|
||||
self.stats.sort(key=lambda x: x[1], reverse=True)
|
||||
if self.min_coverage_hit is not None:
|
||||
self.coverage_ok = self.min_coverage_hit > self.ratio
|
||||
else:
|
||||
self.coverage_ok = False
|
||||
|
||||
def dump(self, prefix: str):
|
||||
errors = 0
|
||||
out = SummaryTree('EnsembleResults')
|
||||
out.attributes['TotalRuntime'] = str(self.global_statistics.total_cpu_time)
|
||||
out.attributes['TotalTestRuns'] = str(self.global_statistics.total_test_runs)
|
||||
out.attributes['TotalProbesHit'] = str(self.global_statistics.total_probes_hit)
|
||||
out.attributes['MinProbeHit'] = str(self.min_coverage_hit)
|
||||
out.attributes['TotalProbes'] = str(len(self.coverage))
|
||||
out.attributes['MissedProbes'] = str(self.global_statistics.total_missed_probes)
|
||||
|
||||
for cov, count in self.coverage:
|
||||
severity = 10 if count > self.ratio else 40
|
||||
if severity == 40:
|
||||
errors += 1
|
||||
if (severity == 40 and errors <= config.max_errors) or config.details:
|
||||
child = SummaryTree('CodeProbe')
|
||||
child.attributes['Severity'] = str(severity)
|
||||
child.attributes['File'] = cov.file
|
||||
child.attributes['Line'] = str(cov.line)
|
||||
child.attributes['Comment'] = '' if cov.comment is None else cov.comment
|
||||
child.attributes['HitCount'] = str(count)
|
||||
out.append(child)
|
||||
|
||||
if config.details:
|
||||
for k, runtime, run_count in self.stats:
|
||||
child = SummaryTree('Test')
|
||||
child.attributes['Name'] = k
|
||||
child.attributes['Runtime'] = str(runtime)
|
||||
child.attributes['RunCount'] = str(run_count)
|
||||
out.append(child)
|
||||
if errors > 0:
|
||||
out.attributes['Errors'] = str(errors)
|
||||
str_io = io.StringIO()
|
||||
out.dump(str_io, prefix=prefix, new_line=config.pretty_print)
|
||||
if config.output_format == 'xml':
|
||||
sys.stdout.write(str_io.getvalue())
|
||||
else:
|
||||
sys.stdout.write('{}"EnsembleResults":{}{}'.format(' ' if config.pretty_print else '',
|
||||
'\n' if config.pretty_print else ' ',
|
||||
str_io.getvalue()))
|
||||
|
||||
|
||||
def write_header(ensemble_id: str):
|
||||
if config.output_format == 'json':
|
||||
if config.pretty_print:
|
||||
print('{')
|
||||
print(' "{}": {},\n'.format('ID', json.dumps(ensemble_id.strip())))
|
||||
else:
|
||||
sys.stdout.write('{{{}: {},'.format('ID', json.dumps(ensemble_id.strip())))
|
||||
elif config.output_format == 'xml':
|
||||
sys.stdout.write('<Ensemble ID={}>'.format(quoteattr(ensemble_id.strip())))
|
||||
if config.pretty_print:
|
||||
sys.stdout.write('\n')
|
||||
else:
|
||||
assert False, 'unknown output format {}'.format(config.output_format)
|
||||
|
||||
|
||||
def write_footer():
|
||||
if config.output_format == 'xml':
|
||||
sys.stdout.write('</Ensemble>\n')
|
||||
elif config.output_format == 'json':
|
||||
sys.stdout.write('}\n')
|
||||
else:
|
||||
assert False, 'unknown output format {}'.format(config.output_format)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser('TestHarness Results', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
config.change_default('pretty_print', True)
|
||||
config.change_default('max_warnings', 0)
|
||||
config.build_arguments(parser)
|
||||
parser.add_argument('ensemble_id', type=str, help='The ensemble to fetch the result for')
|
||||
args = parser.parse_args()
|
||||
config.extract_args(args)
|
||||
config.output_format = args.output_format
|
||||
write_header(args.ensemble_id)
|
||||
try:
|
||||
import test_harness.joshua
|
||||
test_harness.joshua.print_errors(args.ensemble_id)
|
||||
except ModuleNotFoundError:
|
||||
child = SummaryTree('JoshuaNotFound')
|
||||
child.attributes['Severity'] = '30'
|
||||
child.attributes['Message'] = 'Could not import Joshua -- set PYTHONPATH to joshua checkout dir'
|
||||
child.dump(sys.stdout, prefix=(' ' if config.pretty_print else ''), new_line=config.pretty_print)
|
||||
results = EnsembleResults(config.cluster_file, args.ensemble_id)
|
||||
results.dump(' ' if config.pretty_print else '')
|
||||
write_footer()
|
||||
exit(0 if results.coverage_ok else 1)
|
|
@ -0,0 +1,465 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import base64
|
||||
import collections
|
||||
import math
|
||||
import os
|
||||
import resource
|
||||
import shutil
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from functools import total_ordering
|
||||
from pathlib import Path
|
||||
from test_harness.version import Version
|
||||
from test_harness.config import config
|
||||
from typing import List, Pattern, OrderedDict
|
||||
|
||||
from test_harness.summarize import Summary, SummaryTree
|
||||
|
||||
|
||||
@total_ordering
|
||||
class TestDescription:
|
||||
def __init__(self, path: Path, name: str, priority: float):
|
||||
self.paths: List[Path] = [path]
|
||||
self.name = name
|
||||
self.priority: float = priority
|
||||
# we only measure in seconds. Otherwise, keeping determinism will be difficult
|
||||
self.total_runtime: int = 0
|
||||
self.num_runs: int = 0
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, TestDescription):
|
||||
return self.name < other.name
|
||||
else:
|
||||
return self.name < str(other)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, TestDescription):
|
||||
return self.name < other.name
|
||||
else:
|
||||
return self.name < str(other.name)
|
||||
|
||||
|
||||
class StatFetcher:
|
||||
def __init__(self, tests: OrderedDict[str, TestDescription]):
|
||||
self.tests = tests
|
||||
|
||||
def read_stats(self):
|
||||
pass
|
||||
|
||||
def add_run_time(self, test_name: str, runtime: int, out: SummaryTree):
|
||||
self.tests[test_name].total_runtime += runtime
|
||||
|
||||
|
||||
class TestPicker:
|
||||
def __init__(self, test_dir: Path):
|
||||
if not test_dir.exists():
|
||||
raise RuntimeError('{} is neither a directory nor a file'.format(test_dir))
|
||||
self.include_files_regex = re.compile(config.include_test_files)
|
||||
self.exclude_files_regex = re.compile(config.exclude_test_files)
|
||||
self.include_tests_regex = re.compile(config.include_test_classes)
|
||||
self.exclude_tests_regex = re.compile(config.exclude_test_names)
|
||||
self.test_dir: Path = test_dir
|
||||
self.tests: OrderedDict[str, TestDescription] = collections.OrderedDict()
|
||||
self.restart_test: Pattern = re.compile(r".*-\d+\.(txt|toml)")
|
||||
self.follow_test: Pattern = re.compile(r".*-[2-9]\d*\.(txt|toml)")
|
||||
|
||||
for subdir in self.test_dir.iterdir():
|
||||
if subdir.is_dir() and subdir.name in config.test_dirs:
|
||||
self.walk_test_dir(subdir)
|
||||
self.stat_fetcher: StatFetcher
|
||||
if config.stats is not None or config.joshua_dir is None:
|
||||
self.stat_fetcher = StatFetcher(self.tests)
|
||||
else:
|
||||
from test_harness.fdb import FDBStatFetcher
|
||||
self.stat_fetcher = FDBStatFetcher(self.tests)
|
||||
if config.stats is not None:
|
||||
self.load_stats(config.stats)
|
||||
else:
|
||||
self.fetch_stats()
|
||||
|
||||
def add_time(self, test_file: Path, run_time: int, out: SummaryTree) -> None:
|
||||
# getting the test name is fairly inefficient. But since we only have 100s of tests, I won't bother
|
||||
test_name: str | None = None
|
||||
test_desc: TestDescription | None = None
|
||||
for name, test in self.tests.items():
|
||||
for p in test.paths:
|
||||
test_files: List[Path]
|
||||
if self.restart_test.match(p.name):
|
||||
test_files = self.list_restart_files(p)
|
||||
else:
|
||||
test_files = [p]
|
||||
for file in test_files:
|
||||
if file.absolute() == test_file.absolute():
|
||||
test_name = name
|
||||
test_desc = test
|
||||
break
|
||||
if test_name is not None:
|
||||
break
|
||||
if test_name is not None:
|
||||
break
|
||||
assert test_name is not None and test_desc is not None
|
||||
self.stat_fetcher.add_run_time(test_name, run_time, out)
|
||||
out.attributes['TotalTestTime'] = str(test_desc.total_runtime)
|
||||
out.attributes['TestRunCount'] = str(test_desc.num_runs)
|
||||
|
||||
def dump_stats(self) -> str:
|
||||
res = array.array('I')
|
||||
for _, spec in self.tests.items():
|
||||
res.append(spec.total_runtime)
|
||||
return base64.standard_b64encode(res.tobytes()).decode('utf-8')
|
||||
|
||||
def fetch_stats(self):
|
||||
self.stat_fetcher.read_stats()
|
||||
|
||||
def load_stats(self, serialized: str):
|
||||
times = array.array('I')
|
||||
times.frombytes(base64.standard_b64decode(serialized))
|
||||
assert len(times) == len(self.tests.items())
|
||||
for idx, (_, spec) in enumerate(self.tests.items()):
|
||||
spec.total_runtime = times[idx]
|
||||
|
||||
def parse_txt(self, path: Path):
|
||||
if self.include_files_regex.search(str(path)) is None or self.exclude_files_regex.search(str(path)) is not None:
|
||||
return
|
||||
with path.open('r') as f:
|
||||
test_name: str | None = None
|
||||
test_class: str | None = None
|
||||
priority: float | None = None
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
kv = line.split('=')
|
||||
if len(kv) != 2:
|
||||
continue
|
||||
kv[0] = kv[0].strip()
|
||||
kv[1] = kv[1].strip(' \r\n\t\'"')
|
||||
if kv[0] == 'testTitle' and test_name is None:
|
||||
test_name = kv[1]
|
||||
if kv[0] == 'testClass' and test_class is None:
|
||||
test_class = kv[1]
|
||||
if kv[0] == 'testPriority' and priority is None:
|
||||
try:
|
||||
priority = float(kv[1])
|
||||
except ValueError:
|
||||
raise RuntimeError("Can't parse {} -- testPriority in {} should be set to a float".format(kv[1],
|
||||
path))
|
||||
if test_name is not None and test_class is not None and priority is not None:
|
||||
break
|
||||
if test_name is None:
|
||||
return
|
||||
if test_class is None:
|
||||
test_class = test_name
|
||||
if priority is None:
|
||||
priority = 1.0
|
||||
if self.include_tests_regex.search(test_class) is None \
|
||||
or self.exclude_tests_regex.search(test_class) is not None:
|
||||
return
|
||||
if test_class not in self.tests:
|
||||
self.tests[test_class] = TestDescription(path, test_class, priority)
|
||||
else:
|
||||
self.tests[test_class].paths.append(path)
|
||||
|
||||
def walk_test_dir(self, test: Path):
|
||||
if test.is_dir():
|
||||
for file in test.iterdir():
|
||||
self.walk_test_dir(file)
|
||||
else:
|
||||
# check whether we're looking at a restart test
|
||||
if self.follow_test.match(test.name) is not None:
|
||||
return
|
||||
if test.suffix == '.txt' or test.suffix == '.toml':
|
||||
self.parse_txt(test)
|
||||
|
||||
@staticmethod
|
||||
def list_restart_files(start_file: Path) -> List[Path]:
|
||||
name = re.sub(r'-\d+.(txt|toml)', '', start_file.name)
|
||||
res: List[Path] = []
|
||||
for test_file in start_file.parent.iterdir():
|
||||
if test_file.name.startswith(name):
|
||||
res.append(test_file)
|
||||
assert len(res) > 1
|
||||
res.sort()
|
||||
return res
|
||||
|
||||
def choose_test(self) -> List[Path]:
|
||||
min_runtime: float | None = None
|
||||
candidates: List[TestDescription] = []
|
||||
for _, v in self.tests.items():
|
||||
this_time = v.total_runtime * v.priority
|
||||
if min_runtime is None or this_time < min_runtime:
|
||||
min_runtime = this_time
|
||||
candidates = [v]
|
||||
elif this_time == min_runtime:
|
||||
candidates.append(v)
|
||||
candidates.sort()
|
||||
choice = config.random.randint(0, len(candidates) - 1)
|
||||
test = candidates[choice]
|
||||
result = test.paths[config.random.randint(0, len(test.paths) - 1)]
|
||||
if self.restart_test.match(result.name):
|
||||
return self.list_restart_files(result)
|
||||
else:
|
||||
return [result]
|
||||
|
||||
|
||||
class OldBinaries:
|
||||
def __init__(self):
|
||||
self.first_file_expr = re.compile(r'.*-1\.(txt|toml)')
|
||||
self.old_binaries_path: Path = config.old_binaries_path
|
||||
self.binaries: OrderedDict[Version, Path] = collections.OrderedDict()
|
||||
if not self.old_binaries_path.exists() or not self.old_binaries_path.is_dir():
|
||||
return
|
||||
exec_pattern = re.compile(r'fdbserver-\d+\.\d+\.\d+(\.exe)?')
|
||||
for file in self.old_binaries_path.iterdir():
|
||||
if not file.is_file() or not os.access(file, os.X_OK):
|
||||
continue
|
||||
if exec_pattern.fullmatch(file.name) is not None:
|
||||
self._add_file(file)
|
||||
|
||||
def _add_file(self, file: Path):
|
||||
version_str = file.name.split('-')[1]
|
||||
if version_str.endswith('.exe'):
|
||||
version_str = version_str[0:-len('.exe')]
|
||||
ver = Version.parse(version_str)
|
||||
self.binaries[ver] = file
|
||||
|
||||
def choose_binary(self, test_file: Path) -> Path:
|
||||
if len(self.binaries) == 0:
|
||||
return config.binary
|
||||
max_version = Version.max_version()
|
||||
min_version = Version.parse('5.0.0')
|
||||
dirs = test_file.parent.parts
|
||||
if 'restarting' not in dirs:
|
||||
return config.binary
|
||||
version_expr = dirs[-1].split('_')
|
||||
first_file = self.first_file_expr.match(test_file.name) is not None
|
||||
if first_file and version_expr[0] == 'to':
|
||||
# downgrade test -- first binary should be current one
|
||||
return config.binary
|
||||
if not first_file and version_expr[0] == 'from':
|
||||
# upgrade test -- we only return an old version for the first test file
|
||||
return config.binary
|
||||
if version_expr[0] == 'from' or version_expr[0] == 'to':
|
||||
min_version = Version.parse(version_expr[1])
|
||||
if len(version_expr) == 4 and version_expr[2] == 'until':
|
||||
max_version = Version.parse(version_expr[3])
|
||||
candidates: List[Path] = []
|
||||
for ver, binary in self.binaries.items():
|
||||
if min_version <= ver <= max_version:
|
||||
candidates.append(binary)
|
||||
if len(candidates) == 0:
|
||||
return config.binary
|
||||
return config.random.choice(candidates)
|
||||
|
||||
|
||||
def is_restarting_test(test_file: Path):
|
||||
for p in test_file.parts:
|
||||
if p == 'restarting':
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_no_sim(test_file: Path):
|
||||
return test_file.parts[-2] == 'noSim'
|
||||
|
||||
|
||||
class ResourceMonitor(threading.Thread):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.start_time = time.time()
|
||||
self.end_time: float | None = None
|
||||
self._stop_monitor = False
|
||||
self.max_rss = 0
|
||||
|
||||
def run(self) -> None:
|
||||
while not self._stop_monitor:
|
||||
time.sleep(1)
|
||||
resources = resource.getrusage(resource.RUSAGE_CHILDREN)
|
||||
self.max_rss = max(resources.ru_maxrss, self.max_rss)
|
||||
|
||||
def stop(self):
|
||||
self.end_time = time.time()
|
||||
self._stop_monitor = True
|
||||
|
||||
def time(self):
|
||||
return self.end_time - self.start_time
|
||||
|
||||
|
||||
class TestRun:
|
||||
def __init__(self, binary: Path, test_file: Path, random_seed: int, uid: uuid.UUID,
|
||||
restarting: bool = False, test_determinism: bool = False, buggify_enabled: bool = False,
|
||||
stats: str | None = None, expected_unseed: int | None = None, will_restart: bool = False):
|
||||
self.binary = binary
|
||||
self.test_file = test_file
|
||||
self.random_seed = random_seed
|
||||
self.uid = uid
|
||||
self.restarting = restarting
|
||||
self.test_determinism = test_determinism
|
||||
self.stats: str | None = stats
|
||||
self.expected_unseed: int | None = expected_unseed
|
||||
self.use_valgrind: bool = config.use_valgrind
|
||||
self.old_binary_path: Path = config.old_binaries_path
|
||||
self.buggify_enabled: bool = buggify_enabled
|
||||
self.fault_injection_enabled: bool = True
|
||||
self.trace_format: str | None = config.trace_format
|
||||
if Version.of_binary(self.binary) < "6.1.0":
|
||||
self.trace_format = None
|
||||
self.temp_path = config.run_dir / str(self.uid)
|
||||
# state for the run
|
||||
self.retryable_error: bool = False
|
||||
self.summary: Summary = Summary(binary, uid=self.uid, stats=self.stats, expected_unseed=self.expected_unseed,
|
||||
will_restart=will_restart)
|
||||
self.run_time: int = 0
|
||||
self.success = self.run()
|
||||
|
||||
def log_test_plan(self, out: SummaryTree):
|
||||
test_plan: SummaryTree = SummaryTree('TestPlan')
|
||||
test_plan.attributes['TestUID'] = str(self.uid)
|
||||
test_plan.attributes['RandomSeed'] = str(self.random_seed)
|
||||
test_plan.attributes['TestFile'] = str(self.test_file)
|
||||
test_plan.attributes['Buggify'] = '1' if self.buggify_enabled else '0'
|
||||
test_plan.attributes['FaultInjectionEnabled'] = '1' if self.fault_injection_enabled else '0'
|
||||
test_plan.attributes['DeterminismCheck'] = '1' if self.test_determinism else '0'
|
||||
out.append(test_plan)
|
||||
|
||||
def delete_simdir(self):
|
||||
shutil.rmtree(self.temp_path / Path('simfdb'))
|
||||
|
||||
def run(self):
|
||||
command: List[str] = []
|
||||
valgrind_file: Path | None = None
|
||||
if self.use_valgrind:
|
||||
command.append('valgrind')
|
||||
valgrind_file = self.temp_path / Path('valgrind-{}.xml'.format(self.random_seed))
|
||||
dbg_path = os.getenv('FDB_VALGRIND_DBGPATH')
|
||||
if dbg_path is not None:
|
||||
command.append('--extra-debuginfo-path={}'.format(dbg_path))
|
||||
command += ['--xml=yes', '--xml-file={}'.format(valgrind_file.absolute()), '-q']
|
||||
command += [str(self.binary.absolute()),
|
||||
'-r', 'test' if is_no_sim(self.test_file) else 'simulation',
|
||||
'-f', str(self.test_file),
|
||||
'-s', str(self.random_seed)]
|
||||
if self.trace_format is not None:
|
||||
command += ['--trace_format', self.trace_format]
|
||||
if Version.of_binary(self.binary) >= '7.1.0':
|
||||
command += ['-fi', 'on' if self.fault_injection_enabled else 'off']
|
||||
if self.restarting:
|
||||
command.append('--restarting')
|
||||
if self.buggify_enabled:
|
||||
command += ['-b', 'on']
|
||||
if config.crash_on_error:
|
||||
command.append('--crash')
|
||||
|
||||
self.temp_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# self.log_test_plan(out)
|
||||
resources = ResourceMonitor()
|
||||
resources.start()
|
||||
process = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, cwd=self.temp_path,
|
||||
text=True)
|
||||
did_kill = False
|
||||
timeout = 20 * config.kill_seconds if self.use_valgrind else config.kill_seconds
|
||||
err_out: str
|
||||
try:
|
||||
_, err_out = process.communicate(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
_, err_out = process.communicate()
|
||||
did_kill = True
|
||||
resources.stop()
|
||||
resources.join()
|
||||
# we're rounding times up, otherwise we will prefer running very short tests (<1s)
|
||||
self.run_time = math.ceil(resources.time())
|
||||
self.summary.runtime = resources.time()
|
||||
self.summary.max_rss = resources.max_rss
|
||||
self.summary.was_killed = did_kill
|
||||
self.summary.valgrind_out_file = valgrind_file
|
||||
self.summary.error_out = err_out
|
||||
self.summary.summarize(self.temp_path, ' '.join(command))
|
||||
return self.summary.ok()
|
||||
|
||||
|
||||
def decorate_summary(out: SummaryTree, test_file: Path, seed: int, buggify: bool):
|
||||
"""Sometimes a test can crash before ProgramStart is written to the traces. These
|
||||
tests are then hard to reproduce (they can be reproduced through TestHarness but
|
||||
require the user to run in the joshua docker container). To account for this we
|
||||
will write the necessary information into the attributes if it is missing."""
|
||||
if 'TestFile' not in out.attributes:
|
||||
out.attributes['TestFile'] = str(test_file)
|
||||
if 'RandomSeed' not in out.attributes:
|
||||
out.attributes['RandomSeed'] = str(seed)
|
||||
if 'BuggifyEnabled' not in out.attributes:
|
||||
out.attributes['BuggifyEnabled'] = '1' if buggify else '0'
|
||||
|
||||
|
||||
class TestRunner:
|
||||
def __init__(self):
|
||||
self.uid = uuid.uuid4()
|
||||
self.test_path: Path = Path('tests')
|
||||
self.cluster_file: str | None = None
|
||||
self.fdb_app_dir: str | None = None
|
||||
self.binary_chooser = OldBinaries()
|
||||
self.test_picker = TestPicker(self.test_path)
|
||||
|
||||
def backup_sim_dir(self, seed: int):
|
||||
temp_dir = config.run_dir / str(self.uid)
|
||||
src_dir = temp_dir / 'simfdb'
|
||||
assert src_dir.is_dir()
|
||||
dest_dir = temp_dir / 'simfdb.{}'.format(seed)
|
||||
assert not dest_dir.exists()
|
||||
shutil.copytree(src_dir, dest_dir)
|
||||
|
||||
def restore_sim_dir(self, seed: int):
|
||||
temp_dir = config.run_dir / str(self.uid)
|
||||
src_dir = temp_dir / 'simfdb.{}'.format(seed)
|
||||
assert src_dir.exists()
|
||||
dest_dir = temp_dir / 'simfdb'
|
||||
shutil.rmtree(dest_dir)
|
||||
shutil.move(src_dir, dest_dir)
|
||||
|
||||
def run_tests(self, test_files: List[Path], seed: int, test_picker: TestPicker) -> bool:
|
||||
result: bool = True
|
||||
for count, file in enumerate(test_files):
|
||||
will_restart = count + 1 < len(test_files)
|
||||
binary = self.binary_chooser.choose_binary(file)
|
||||
unseed_check = not is_no_sim(file) and config.random.random() < config.unseed_check_ratio
|
||||
buggify_enabled: bool = config.random.random() < config.buggify_on_ratio
|
||||
if unseed_check and count != 0:
|
||||
# for restarting tests we will need to restore the sim2 after the first run
|
||||
self.backup_sim_dir(seed + count - 1)
|
||||
run = TestRun(binary, file.absolute(), seed + count, self.uid, restarting=count != 0,
|
||||
stats=test_picker.dump_stats(), will_restart=will_restart, buggify_enabled=buggify_enabled)
|
||||
result = result and run.success
|
||||
test_picker.add_time(test_files[0], run.run_time, run.summary.out)
|
||||
decorate_summary(run.summary.out, file, seed + count, run.buggify_enabled)
|
||||
if unseed_check and run.summary.unseed:
|
||||
run.summary.out.append(run.summary.list_simfdb())
|
||||
run.summary.out.dump(sys.stdout)
|
||||
if not result:
|
||||
return False
|
||||
if unseed_check and run.summary.unseed is not None:
|
||||
if count != 0:
|
||||
self.restore_sim_dir(seed + count - 1)
|
||||
run2 = TestRun(binary, file.absolute(), seed + count, self.uid, restarting=count != 0,
|
||||
stats=test_picker.dump_stats(), expected_unseed=run.summary.unseed,
|
||||
will_restart=will_restart, buggify_enabled=buggify_enabled)
|
||||
test_picker.add_time(file, run2.run_time, run.summary.out)
|
||||
decorate_summary(run2.summary.out, file, seed + count, run.buggify_enabled)
|
||||
run2.summary.out.dump(sys.stdout)
|
||||
result = result and run2.success
|
||||
if not result:
|
||||
return False
|
||||
return result
|
||||
|
||||
def run(self) -> bool:
|
||||
seed = config.random_seed if config.random_seed is not None else config.random.randint(0, 2 ** 32 - 1)
|
||||
test_files = self.test_picker.choose_test()
|
||||
success = self.run_tests(test_files, seed, self.test_picker)
|
||||
if config.clean_up:
|
||||
shutil.rmtree(config.run_dir / str(self.uid))
|
||||
return success
|
|
@ -0,0 +1,614 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
import uuid
|
||||
import xml.sax
|
||||
import xml.sax.handler
|
||||
import xml.sax.saxutils
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, TextIO, Callable, Optional, OrderedDict, Any, Tuple, Iterator, Iterable
|
||||
|
||||
from test_harness.config import config
|
||||
from test_harness.valgrind import parse_valgrind_output
|
||||
|
||||
|
||||
class SummaryTree:
|
||||
def __init__(self, name: str):
|
||||
self.name = name
|
||||
self.children: List[SummaryTree] = []
|
||||
self.attributes: Dict[str, str] = {}
|
||||
|
||||
def append(self, element: SummaryTree):
|
||||
self.children.append(element)
|
||||
|
||||
def to_dict(self, add_name: bool = True) -> Dict[str, Any] | List[Any]:
|
||||
if len(self.children) > 0 and len(self.attributes) == 0:
|
||||
children = []
|
||||
for child in self.children:
|
||||
children.append(child.to_dict())
|
||||
if add_name:
|
||||
return {self.name: children}
|
||||
else:
|
||||
return children
|
||||
res: Dict[str, Any] = {}
|
||||
if add_name:
|
||||
res['Type'] = self.name
|
||||
for k, v in self.attributes.items():
|
||||
res[k] = v
|
||||
children = []
|
||||
child_keys: Dict[str, int] = {}
|
||||
for child in self.children:
|
||||
if child.name in child_keys:
|
||||
child_keys[child.name] += 1
|
||||
else:
|
||||
child_keys[child.name] = 1
|
||||
for child in self.children:
|
||||
if child_keys[child.name] == 1 and child.name not in self.attributes:
|
||||
res[child.name] = child.to_dict(add_name=False)
|
||||
else:
|
||||
children.append(child.to_dict())
|
||||
if len(children) > 0:
|
||||
res['children'] = children
|
||||
return res
|
||||
|
||||
def to_json(self, out: TextIO, prefix: str = ''):
|
||||
res = json.dumps(self.to_dict(), indent=(' ' if config.pretty_print else None))
|
||||
for line in res.splitlines(False):
|
||||
out.write('{}{}\n'.format(prefix, line))
|
||||
|
||||
def to_xml(self, out: TextIO, prefix: str = ''):
|
||||
# minidom doesn't support omitting the xml declaration which is a problem for joshua
|
||||
# However, our xml is very simple and therefore serializing manually is easy enough
|
||||
attrs = []
|
||||
print_width = 120
|
||||
try:
|
||||
print_width, _ = os.get_terminal_size()
|
||||
except OSError:
|
||||
pass
|
||||
for k, v in self.attributes.items():
|
||||
attrs.append('{}={}'.format(k, xml.sax.saxutils.quoteattr(v)))
|
||||
elem = '{}<{}{}'.format(prefix, self.name, ('' if len(attrs) == 0 else ' '))
|
||||
out.write(elem)
|
||||
if config.pretty_print:
|
||||
curr_line_len = len(elem)
|
||||
for i in range(len(attrs)):
|
||||
attr_len = len(attrs[i])
|
||||
if i == 0 or attr_len + curr_line_len + 1 <= print_width:
|
||||
if i != 0:
|
||||
out.write(' ')
|
||||
out.write(attrs[i])
|
||||
curr_line_len += attr_len
|
||||
else:
|
||||
out.write('\n')
|
||||
out.write(' ' * len(elem))
|
||||
out.write(attrs[i])
|
||||
curr_line_len = len(elem) + attr_len
|
||||
else:
|
||||
out.write(' '.join(attrs))
|
||||
if len(self.children) == 0:
|
||||
out.write('/>')
|
||||
else:
|
||||
out.write('>')
|
||||
for child in self.children:
|
||||
if config.pretty_print:
|
||||
out.write('\n')
|
||||
child.to_xml(out, prefix=(' {}'.format(prefix) if config.pretty_print else prefix))
|
||||
if len(self.children) > 0:
|
||||
out.write('{}{}</{}>'.format(('\n' if config.pretty_print else ''), prefix, self.name))
|
||||
|
||||
def dump(self, out: TextIO, prefix: str = '', new_line: bool = True):
|
||||
if config.output_format == 'json':
|
||||
self.to_json(out, prefix=prefix)
|
||||
else:
|
||||
self.to_xml(out, prefix=prefix)
|
||||
if new_line:
|
||||
out.write('\n')
|
||||
|
||||
|
||||
ParserCallback = Callable[[Dict[str, str]], Optional[str]]
|
||||
|
||||
|
||||
class ParseHandler:
|
||||
def __init__(self, out: SummaryTree):
|
||||
self.out = out
|
||||
self.events: OrderedDict[Optional[Tuple[str, Optional[str]]], List[ParserCallback]] = collections.OrderedDict()
|
||||
|
||||
def add_handler(self, attr: Tuple[str, Optional[str]], callback: ParserCallback) -> None:
|
||||
self.events.setdefault(attr, []).append(callback)
|
||||
|
||||
def _call(self, callback: ParserCallback, attrs: Dict[str, str]) -> str | None:
|
||||
try:
|
||||
return callback(attrs)
|
||||
except Exception as e:
|
||||
_, _, exc_traceback = sys.exc_info()
|
||||
child = SummaryTree('NonFatalParseError')
|
||||
child.attributes['Severity'] = '30'
|
||||
child.attributes['ErrorMessage'] = str(e)
|
||||
child.attributes['Trace'] = repr(traceback.format_tb(exc_traceback))
|
||||
self.out.append(child)
|
||||
return None
|
||||
|
||||
def handle(self, attrs: Dict[str, str]):
|
||||
if None in self.events:
|
||||
for callback in self.events[None]:
|
||||
self._call(callback, attrs)
|
||||
for k, v in attrs.items():
|
||||
if (k, None) in self.events:
|
||||
for callback in self.events[(k, None)]:
|
||||
remap = self._call(callback, attrs)
|
||||
if remap is not None:
|
||||
v = remap
|
||||
attrs[k] = v
|
||||
if (k, v) in self.events:
|
||||
for callback in self.events[(k, v)]:
|
||||
remap = self._call(callback, attrs)
|
||||
if remap is not None:
|
||||
v = remap
|
||||
attrs[k] = v
|
||||
|
||||
|
||||
class Parser:
|
||||
def parse(self, file: TextIO, handler: ParseHandler) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class XmlParser(Parser, xml.sax.handler.ContentHandler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.handler: ParseHandler | None = None
|
||||
|
||||
def parse(self, file: TextIO, handler: ParseHandler) -> None:
|
||||
xml.sax.parse(file, self)
|
||||
|
||||
def startElement(self, name, attrs) -> None:
|
||||
attributes: Dict[str, str] = {}
|
||||
for name in attrs.getNames():
|
||||
attributes[name] = attrs.getValue(name)
|
||||
assert self.handler is not None
|
||||
self.handler.handle(attributes)
|
||||
|
||||
|
||||
class JsonParser(Parser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def parse(self, file: TextIO, handler: ParseHandler):
|
||||
for line in file:
|
||||
obj = json.loads(line)
|
||||
handler.handle(obj)
|
||||
|
||||
|
||||
class Coverage:
|
||||
def __init__(self, file: str, line: str | int, comment: str | None = None):
|
||||
self.file = file
|
||||
self.line = int(line)
|
||||
self.comment = comment
|
||||
|
||||
def to_tuple(self) -> Tuple[str, int, str | None]:
|
||||
return self.file, self.line, self.comment
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
return self.to_tuple() == other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() == other.to_tuple()
|
||||
else:
|
||||
return False
|
||||
|
||||
def __lt__(self, other) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
return self.to_tuple() < other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() < other.to_tuple()
|
||||
else:
|
||||
return False
|
||||
|
||||
def __le__(self, other) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
return self.to_tuple() <= other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() <= other.to_tuple()
|
||||
else:
|
||||
return False
|
||||
|
||||
def __gt__(self, other: Coverage) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
return self.to_tuple() > other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() > other.to_tuple()
|
||||
else:
|
||||
return False
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
return self.to_tuple() >= other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() >= other.to_tuple()
|
||||
else:
|
||||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.file, self.line, self.comment))
|
||||
|
||||
|
||||
class TraceFiles:
|
||||
def __init__(self, path: Path):
|
||||
self.path: Path = path
|
||||
self.timestamps: List[int] = []
|
||||
self.runs: OrderedDict[int, List[Path]] = collections.OrderedDict()
|
||||
trace_expr = re.compile(r'trace.*\.(json|xml)')
|
||||
for file in self.path.iterdir():
|
||||
if file.is_file() and trace_expr.match(file.name) is not None:
|
||||
ts = int(file.name.split('.')[6])
|
||||
if ts in self.runs:
|
||||
self.runs[ts].append(file)
|
||||
else:
|
||||
self.timestamps.append(ts)
|
||||
self.runs[ts] = [file]
|
||||
self.timestamps.sort(reverse=True)
|
||||
|
||||
def __getitem__(self, idx: int) -> List[Path]:
|
||||
res = self.runs[self.timestamps[idx]]
|
||||
res.sort()
|
||||
return res
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.runs)
|
||||
|
||||
def items(self) -> Iterator[List[Path]]:
|
||||
class TraceFilesIterator(Iterable[List[Path]]):
|
||||
def __init__(self, trace_files: TraceFiles):
|
||||
self.current = 0
|
||||
self.trace_files: TraceFiles = trace_files
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self) -> List[Path]:
|
||||
if len(self.trace_files) <= self.current:
|
||||
raise StopIteration
|
||||
self.current += 1
|
||||
return self.trace_files[self.current - 1]
|
||||
return TraceFilesIterator(self)
|
||||
|
||||
|
||||
class Summary:
|
||||
def __init__(self, binary: Path, runtime: float = 0, max_rss: int | None = None,
|
||||
was_killed: bool = False, uid: uuid.UUID | None = None, expected_unseed: int | None = None,
|
||||
exit_code: int = 0, valgrind_out_file: Path | None = None, stats: str | None = None,
|
||||
error_out: str = None, will_restart: bool = False):
|
||||
self.binary = binary
|
||||
self.runtime: float = runtime
|
||||
self.max_rss: int | None = max_rss
|
||||
self.was_killed: bool = was_killed
|
||||
self.expected_unseed: int | None = expected_unseed
|
||||
self.exit_code: int = exit_code
|
||||
self.out: SummaryTree = SummaryTree('Test')
|
||||
self.test_begin_found: bool = False
|
||||
self.test_end_found: bool = False
|
||||
self.unseed: int | None = None
|
||||
self.valgrind_out_file: Path | None = valgrind_out_file
|
||||
self.severity_map: OrderedDict[tuple[str, int], int] = collections.OrderedDict()
|
||||
self.error: bool = False
|
||||
self.errors: int = 0
|
||||
self.warnings: int = 0
|
||||
self.coverage: OrderedDict[Coverage, bool] = collections.OrderedDict()
|
||||
self.test_count: int = 0
|
||||
self.tests_passed: int = 0
|
||||
self.error_out = error_out
|
||||
self.stderr_severity: str = '40'
|
||||
self.will_restart: bool = will_restart
|
||||
self.test_dir: Path | None = None
|
||||
|
||||
if uid is not None:
|
||||
self.out.attributes['TestUID'] = str(uid)
|
||||
if stats is not None:
|
||||
self.out.attributes['Statistics'] = stats
|
||||
self.out.attributes['JoshuaSeed'] = str(config.joshua_seed)
|
||||
self.out.attributes['WillRestart'] = '1' if self.will_restart else '0'
|
||||
|
||||
self.handler = ParseHandler(self.out)
|
||||
self.register_handlers()
|
||||
|
||||
def summarize_files(self, trace_files: List[Path]):
|
||||
assert len(trace_files) > 0
|
||||
for f in trace_files:
|
||||
self.parse_file(f)
|
||||
self.done()
|
||||
|
||||
def summarize(self, trace_dir: Path, command: str):
|
||||
self.test_dir = trace_dir
|
||||
trace_files = TraceFiles(trace_dir)
|
||||
if len(trace_files) == 0:
|
||||
self.error = True
|
||||
child = SummaryTree('NoTracesFound')
|
||||
child.attributes['Severity'] = '40'
|
||||
child.attributes['Path'] = str(trace_dir.absolute())
|
||||
child.attributes['Command'] = command
|
||||
self.out.append(child)
|
||||
return
|
||||
self.summarize_files(trace_files[0])
|
||||
if config.joshua_dir is not None:
|
||||
import test_harness.fdb
|
||||
test_harness.fdb.write_coverage(config.cluster_file,
|
||||
test_harness.fdb.str_to_tuple(config.joshua_dir) + ('coverage',),
|
||||
test_harness.fdb.str_to_tuple(config.joshua_dir) + ('coverage-metadata',),
|
||||
self.coverage)
|
||||
|
||||
def list_simfdb(self) -> SummaryTree:
|
||||
res = SummaryTree('SimFDB')
|
||||
res.attributes['TestDir'] = str(self.test_dir)
|
||||
if self.test_dir is None:
|
||||
return res
|
||||
simfdb = self.test_dir / Path('simfdb')
|
||||
if not simfdb.exists():
|
||||
res.attributes['NoSimDir'] = "simfdb doesn't exist"
|
||||
return res
|
||||
elif not simfdb.is_dir():
|
||||
res.attributes['NoSimDir'] = 'simfdb is not a directory'
|
||||
return res
|
||||
for file in simfdb.iterdir():
|
||||
child = SummaryTree('Directory' if file.is_dir() else 'File')
|
||||
child.attributes['Name'] = file.name
|
||||
res.append(child)
|
||||
return res
|
||||
|
||||
def ok(self):
|
||||
return not self.error
|
||||
|
||||
def done(self):
|
||||
if config.print_coverage:
|
||||
for k, v in self.coverage.items():
|
||||
child = SummaryTree('CodeCoverage')
|
||||
child.attributes['File'] = k.file
|
||||
child.attributes['Line'] = str(k.line)
|
||||
if not v:
|
||||
child.attributes['Covered'] = '0'
|
||||
if k.comment is not None and len(k.comment):
|
||||
child.attributes['Comment'] = k.comment
|
||||
self.out.append(child)
|
||||
if self.warnings > config.max_warnings:
|
||||
child = SummaryTree('WarningLimitExceeded')
|
||||
child.attributes['Severity'] = '30'
|
||||
child.attributes['WarningCount'] = str(self.warnings)
|
||||
self.out.append(child)
|
||||
if self.errors > config.max_errors:
|
||||
child = SummaryTree('ErrorLimitExceeded')
|
||||
child.attributes['Severity'] = '40'
|
||||
child.attributes['ErrorCount'] = str(self.errors)
|
||||
self.out.append(child)
|
||||
if self.was_killed:
|
||||
child = SummaryTree('ExternalTimeout')
|
||||
child.attributes['Severity'] = '40'
|
||||
self.out.append(child)
|
||||
self.error = True
|
||||
if self.max_rss is not None:
|
||||
self.out.attributes['PeakMemory'] = str(self.max_rss)
|
||||
if self.valgrind_out_file is not None:
|
||||
try:
|
||||
valgrind_errors = parse_valgrind_output(self.valgrind_out_file)
|
||||
for valgrind_error in valgrind_errors:
|
||||
if valgrind_error.kind.startswith('Leak'):
|
||||
continue
|
||||
self.error = True
|
||||
child = SummaryTree('ValgrindError')
|
||||
child.attributes['Severity'] = '40'
|
||||
child.attributes['What'] = valgrind_error.what.what
|
||||
child.attributes['Backtrace'] = valgrind_error.what.backtrace
|
||||
aux_count = 0
|
||||
for aux in valgrind_error.aux:
|
||||
child.attributes['WhatAux{}'.format(aux_count)] = aux.what
|
||||
child.attributes['BacktraceAux{}'.format(aux_count)] = aux.backtrace
|
||||
aux_count += 1
|
||||
self.out.append(child)
|
||||
except Exception as e:
|
||||
self.error = True
|
||||
child = SummaryTree('ValgrindParseError')
|
||||
child.attributes['Severity'] = '40'
|
||||
child.attributes['ErrorMessage'] = str(e)
|
||||
_, _, exc_traceback = sys.exc_info()
|
||||
child.attributes['Trace'] = repr(traceback.format_tb(exc_traceback))
|
||||
self.out.append(child)
|
||||
if not self.test_end_found:
|
||||
child = SummaryTree('TestUnexpectedlyNotFinished')
|
||||
child.attributes['Severity'] = '40'
|
||||
self.out.append(child)
|
||||
if self.error_out is not None and len(self.error_out) > 0:
|
||||
if self.stderr_severity == '40':
|
||||
self.error = True
|
||||
lines = self.error_out.split('\n')
|
||||
stderr_bytes = 0
|
||||
for line in lines:
|
||||
remaining_bytes = config.max_stderr_bytes - stderr_bytes
|
||||
if remaining_bytes > 0:
|
||||
out_err = line[0:remaining_bytes] + ('...' if len(line) > remaining_bytes else '')
|
||||
child = SummaryTree('StdErrOutput')
|
||||
child.attributes['Severity'] = self.stderr_severity
|
||||
child.attributes['Output'] = out_err
|
||||
self.out.append(child)
|
||||
stderr_bytes += len(line)
|
||||
if stderr_bytes > config.max_stderr_bytes:
|
||||
child = SummaryTree('StdErrOutputTruncated')
|
||||
child.attributes['Severity'] = self.stderr_severity
|
||||
child.attributes['BytesRemaining'] = stderr_bytes - config.max_stderr_bytes
|
||||
self.out.append(child)
|
||||
|
||||
self.out.attributes['Ok'] = '1' if self.ok() else '0'
|
||||
if not self.ok():
|
||||
reason = 'Unknown'
|
||||
if self.error:
|
||||
reason = 'ProducedErrors'
|
||||
elif not self.test_end_found:
|
||||
reason = 'TestDidNotFinish'
|
||||
elif self.tests_passed == 0:
|
||||
reason = 'NoTestsPassed'
|
||||
elif self.test_count != self.tests_passed:
|
||||
reason = 'Expected {} tests to pass, but only {} did'.format(self.test_count, self.tests_passed)
|
||||
self.out.attributes['FailReason'] = reason
|
||||
|
||||
def parse_file(self, file: Path):
|
||||
parser: Parser
|
||||
if file.suffix == '.json':
|
||||
parser = JsonParser()
|
||||
elif file.suffix == '.xml':
|
||||
parser = XmlParser()
|
||||
else:
|
||||
child = SummaryTree('TestHarnessBug')
|
||||
child.attributes['File'] = __file__
|
||||
frame = inspect.currentframe()
|
||||
if frame is not None:
|
||||
child.attributes['Line'] = str(inspect.getframeinfo(frame).lineno)
|
||||
child.attributes['Details'] = 'Unexpected suffix {} for file {}'.format(file.suffix, file.name)
|
||||
self.error = True
|
||||
self.out.append(child)
|
||||
return
|
||||
with file.open('r') as f:
|
||||
try:
|
||||
parser.parse(f, self.handler)
|
||||
except Exception as e:
|
||||
child = SummaryTree('SummarizationError')
|
||||
child.attributes['Severity'] = '40'
|
||||
child.attributes['ErrorMessage'] = str(e)
|
||||
self.out.append(child)
|
||||
|
||||
def register_handlers(self):
|
||||
def remap_event_severity(attrs):
|
||||
if 'Type' not in attrs or 'Severity' not in attrs:
|
||||
return None
|
||||
k = (attrs['Type'], int(attrs['Severity']))
|
||||
if k in self.severity_map:
|
||||
return str(self.severity_map[k])
|
||||
|
||||
self.handler.add_handler(('Severity', None), remap_event_severity)
|
||||
|
||||
def program_start(attrs: Dict[str, str]):
|
||||
if self.test_begin_found:
|
||||
return
|
||||
self.test_begin_found = True
|
||||
self.out.attributes['RandomSeed'] = attrs['RandomSeed']
|
||||
self.out.attributes['SourceVersion'] = attrs['SourceVersion']
|
||||
self.out.attributes['Time'] = attrs['ActualTime']
|
||||
self.out.attributes['BuggifyEnabled'] = attrs['BuggifyEnabled']
|
||||
self.out.attributes['DeterminismCheck'] = '0' if self.expected_unseed is None else '1'
|
||||
if self.binary.name != 'fdbserver':
|
||||
self.out.attributes['OldBinary'] = self.binary.name
|
||||
if 'FaultInjectionEnabled' in attrs:
|
||||
self.out.attributes['FaultInjectionEnabled'] = attrs['FaultInjectionEnabled']
|
||||
|
||||
self.handler.add_handler(('Type', 'ProgramStart'), program_start)
|
||||
|
||||
def set_test_file(attrs: Dict[str, str]):
|
||||
test_file = Path(attrs['TestFile'])
|
||||
cwd = Path('.').absolute()
|
||||
try:
|
||||
test_file = test_file.relative_to(cwd)
|
||||
except ValueError:
|
||||
pass
|
||||
self.out.attributes['TestFile'] = str(test_file)
|
||||
|
||||
self.handler.add_handler(('Type', 'Simulation'), set_test_file)
|
||||
self.handler.add_handler(('Type', 'NonSimulationTest'), set_test_file)
|
||||
|
||||
def set_elapsed_time(attrs: Dict[str, str]):
|
||||
if self.test_end_found:
|
||||
return
|
||||
self.test_end_found = True
|
||||
self.unseed = int(attrs['RandomUnseed'])
|
||||
if self.expected_unseed is not None and self.unseed != self.expected_unseed:
|
||||
severity = 40 if ('UnseedMismatch', 40) not in self.severity_map \
|
||||
else self.severity_map[('UnseedMismatch', 40)]
|
||||
if severity >= 30:
|
||||
child = SummaryTree('UnseedMismatch')
|
||||
child.attributes['Unseed'] = str(self.unseed)
|
||||
child.attributes['ExpectedUnseed'] = str(self.expected_unseed)
|
||||
child.attributes['Severity'] = str(severity)
|
||||
if severity >= 40:
|
||||
self.error = True
|
||||
self.out.append(child)
|
||||
self.out.attributes['SimElapsedTime'] = attrs['SimTime']
|
||||
self.out.attributes['RealElapsedTime'] = attrs['RealTime']
|
||||
if self.unseed is not None:
|
||||
self.out.attributes['RandomUnseed'] = str(self.unseed)
|
||||
|
||||
self.handler.add_handler(('Type', 'ElapsedTime'), set_elapsed_time)
|
||||
|
||||
def parse_warning(attrs: Dict[str, str]):
|
||||
self.warnings += 1
|
||||
if self.warnings > config.max_warnings:
|
||||
return
|
||||
child = SummaryTree(attrs['Type'])
|
||||
for k, v in attrs.items():
|
||||
if k != 'Type':
|
||||
child.attributes[k] = v
|
||||
self.out.append(child)
|
||||
|
||||
self.handler.add_handler(('Severity', '30'), parse_warning)
|
||||
|
||||
def parse_error(attrs: Dict[str, str]):
|
||||
self.errors += 1
|
||||
self.error = True
|
||||
if self.errors > config.max_errors:
|
||||
return
|
||||
child = SummaryTree(attrs['Type'])
|
||||
for k, v in attrs.items():
|
||||
child.attributes[k] = v
|
||||
self.out.append(child)
|
||||
|
||||
self.handler.add_handler(('Severity', '40'), parse_error)
|
||||
|
||||
def coverage(attrs: Dict[str, str]):
|
||||
covered = True
|
||||
if 'Covered' in attrs:
|
||||
covered = int(attrs['Covered']) != 0
|
||||
comment = ''
|
||||
if 'Comment' in attrs:
|
||||
comment = attrs['Comment']
|
||||
c = Coverage(attrs['File'], attrs['Line'], comment)
|
||||
if covered or c not in self.coverage:
|
||||
self.coverage[c] = covered
|
||||
|
||||
self.handler.add_handler(('Type', 'CodeCoverage'), coverage)
|
||||
|
||||
def expected_test_pass(attrs: Dict[str, str]):
|
||||
self.test_count = int(attrs['Count'])
|
||||
|
||||
self.handler.add_handler(('Type', 'TestsExpectedToPass'), expected_test_pass)
|
||||
|
||||
def test_passed(attrs: Dict[str, str]):
|
||||
if attrs['Passed'] == '1':
|
||||
self.tests_passed += 1
|
||||
|
||||
self.handler.add_handler(('Type', 'TestResults'), test_passed)
|
||||
|
||||
def remap_event_severity(attrs: Dict[str, str]):
|
||||
self.severity_map[(attrs['TargetEvent'], int(attrs['OriginalSeverity']))] = int(attrs['NewSeverity'])
|
||||
|
||||
self.handler.add_handler(('Type', 'RemapEventSeverity'), remap_event_severity)
|
||||
|
||||
def buggify_section(attrs: Dict[str, str]):
|
||||
if attrs['Type'] == 'FaultInjected' or attrs.get('Activated', '0') == '1':
|
||||
child = SummaryTree(attrs['Type'])
|
||||
child.attributes['File'] = attrs['File']
|
||||
child.attributes['Line'] = attrs['Line']
|
||||
self.out.append(child)
|
||||
self.handler.add_handler(('Type', 'BuggifySection'), buggify_section)
|
||||
self.handler.add_handler(('Type', 'FaultInjected'), buggify_section)
|
||||
|
||||
def running_unit_test(attrs: Dict[str, str]):
|
||||
child = SummaryTree('RunningUnitTest')
|
||||
child.attributes['Name'] = attrs['Name']
|
||||
child.attributes['File'] = attrs['File']
|
||||
child.attributes['Line'] = attrs['Line']
|
||||
self.handler.add_handler(('Type', 'RunningUnitTest'), running_unit_test)
|
||||
|
||||
def stderr_severity(attrs: Dict[str, str]):
|
||||
if 'NewSeverity' in attrs:
|
||||
self.stderr_severity = attrs['NewSeverity']
|
||||
self.handler.add_handler(('Type', 'StderrSeverity'), stderr_severity)
|
|
@ -0,0 +1,16 @@
|
|||
import sys
|
||||
|
||||
from test_harness.valgrind import parse_valgrind_output
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
errors = parse_valgrind_output(Path(sys.argv[1]))
|
||||
for valgrind_error in errors:
|
||||
print('ValgrindError: what={}, kind={}'.format(valgrind_error.what.what, valgrind_error.kind))
|
||||
print('Backtrace: {}'.format(valgrind_error.what.backtrace))
|
||||
counter = 0
|
||||
for aux in valgrind_error.aux:
|
||||
print('Aux {}:'.format(counter))
|
||||
print(' What: {}'.format(aux.what))
|
||||
print(' Backtrace: {}'.format(aux.backtrace))
|
|
@ -0,0 +1,60 @@
|
|||
import argparse
|
||||
import re
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
from test_harness.config import config
|
||||
from test_harness.summarize import Summary, TraceFiles
|
||||
from typing import Pattern, List
|
||||
|
||||
|
||||
def files_matching(path: Path, pattern: Pattern, recurse: bool = True) -> List[Path]:
|
||||
res: List[Path] = []
|
||||
for file in path.iterdir():
|
||||
if file.is_file() and pattern.match(file.name) is not None:
|
||||
res.append(file)
|
||||
elif file.is_dir() and recurse:
|
||||
res += files_matching(file, pattern, recurse)
|
||||
return res
|
||||
|
||||
|
||||
def dirs_with_files_matching(path: Path, pattern: Pattern, recurse: bool = True) -> List[Path]:
|
||||
res: List[Path] = []
|
||||
sub_directories: List[Path] = []
|
||||
has_file = False
|
||||
for file in path.iterdir():
|
||||
if file.is_file() and pattern.match(file.name) is not None:
|
||||
has_file = True
|
||||
elif file.is_dir() and recurse:
|
||||
sub_directories.append(file)
|
||||
if has_file:
|
||||
res.append(path)
|
||||
if recurse:
|
||||
for file in sub_directories:
|
||||
res += dirs_with_files_matching(file, pattern, recurse=True)
|
||||
res.sort()
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser('TestHarness Timeout', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
config.build_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
config.extract_args(args)
|
||||
valgrind_files: List[Path] = []
|
||||
if config.use_valgrind:
|
||||
valgrind_files = files_matching(Path.cwd(), re.compile(r'valgrind.*\.xml'))
|
||||
|
||||
for directory in dirs_with_files_matching(Path.cwd(), re.compile(r'trace.*\.(json|xml)'), recurse=True):
|
||||
trace_files = TraceFiles(directory)
|
||||
for files in trace_files.items():
|
||||
if config.use_valgrind:
|
||||
for valgrind_file in valgrind_files:
|
||||
summary = Summary(Path('bin/fdbserver'), was_killed=True)
|
||||
summary.valgrind_out_file = valgrind_file
|
||||
summary.summarize_files(files)
|
||||
summary.out.dump(sys.stdout)
|
||||
else:
|
||||
summary = Summary(Path('bin/fdbserver'), was_killed=True)
|
||||
summary.summarize_files(files)
|
||||
summary.out.dump(sys.stdout)
|
|
@ -0,0 +1,141 @@
|
|||
import enum
|
||||
import xml
|
||||
import xml.sax.handler
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
class ValgrindWhat:
|
||||
def __init__(self):
|
||||
self.what: str = ''
|
||||
self.backtrace: str = ''
|
||||
|
||||
|
||||
class ValgrindError:
|
||||
def __init__(self):
|
||||
self.what: ValgrindWhat = ValgrindWhat()
|
||||
self.kind: str = ''
|
||||
self.aux: List[ValgrindWhat] = []
|
||||
|
||||
|
||||
# noinspection PyArgumentList
|
||||
class ValgrindParseState(enum.Enum):
|
||||
ROOT = enum.auto()
|
||||
ERROR = enum.auto()
|
||||
ERROR_AUX = enum.auto()
|
||||
KIND = enum.auto()
|
||||
WHAT = enum.auto()
|
||||
TRACE = enum.auto()
|
||||
AUX_WHAT = enum.auto()
|
||||
STACK = enum.auto()
|
||||
STACK_AUX = enum.auto()
|
||||
STACK_IP = enum.auto()
|
||||
STACK_IP_AUX = enum.auto()
|
||||
|
||||
|
||||
class ValgrindHandler(xml.sax.handler.ContentHandler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.stack: List[ValgrindError] = []
|
||||
self.result: List[ValgrindError] = []
|
||||
self.state_stack: List[ValgrindParseState] = []
|
||||
|
||||
def state(self) -> ValgrindParseState:
|
||||
if len(self.state_stack) == 0:
|
||||
return ValgrindParseState.ROOT
|
||||
return self.state_stack[-1]
|
||||
|
||||
@staticmethod
|
||||
def from_content(content):
|
||||
# pdb.set_trace()
|
||||
if isinstance(content, bytes):
|
||||
return content.decode()
|
||||
assert isinstance(content, str)
|
||||
return content
|
||||
|
||||
def characters(self, content):
|
||||
# pdb.set_trace()
|
||||
state = self.state()
|
||||
if len(self.state_stack) == 0:
|
||||
return
|
||||
else:
|
||||
assert len(self.stack) > 0
|
||||
if state is ValgrindParseState.KIND:
|
||||
self.stack[-1].kind += self.from_content(content)
|
||||
elif state is ValgrindParseState.WHAT:
|
||||
self.stack[-1].what.what += self.from_content(content)
|
||||
elif state is ValgrindParseState.AUX_WHAT:
|
||||
self.stack[-1].aux[-1].what += self.from_content(content)
|
||||
elif state is ValgrindParseState.STACK_IP:
|
||||
self.stack[-1].what.backtrace += self.from_content(content)
|
||||
elif state is ValgrindParseState.STACK_IP_AUX:
|
||||
self.stack[-1].aux[-1].backtrace += self.from_content(content)
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
# pdb.set_trace()
|
||||
if name == 'error':
|
||||
self.stack.append(ValgrindError())
|
||||
self.state_stack.append(ValgrindParseState.ERROR)
|
||||
if len(self.stack) == 0:
|
||||
return
|
||||
if name == 'kind':
|
||||
self.state_stack.append(ValgrindParseState.KIND)
|
||||
elif name == 'what':
|
||||
self.state_stack.append(ValgrindParseState.WHAT)
|
||||
elif name == 'auxwhat':
|
||||
assert self.state() in [ValgrindParseState.ERROR, ValgrindParseState.ERROR_AUX]
|
||||
self.state_stack.pop()
|
||||
self.state_stack.append(ValgrindParseState.ERROR_AUX)
|
||||
self.state_stack.append(ValgrindParseState.AUX_WHAT)
|
||||
self.stack[-1].aux.append(ValgrindWhat())
|
||||
elif name == 'stack':
|
||||
state = self.state()
|
||||
assert state in [ValgrindParseState.ERROR, ValgrindParseState.ERROR_AUX]
|
||||
if state == ValgrindParseState.ERROR:
|
||||
self.state_stack.append(ValgrindParseState.STACK)
|
||||
else:
|
||||
self.state_stack.append(ValgrindParseState.STACK_AUX)
|
||||
elif name == 'ip':
|
||||
state = self.state()
|
||||
assert state in [ValgrindParseState.STACK, ValgrindParseState.STACK_AUX]
|
||||
if state == ValgrindParseState.STACK:
|
||||
self.state_stack.append(ValgrindParseState.STACK_IP)
|
||||
if len(self.stack[-1].what.backtrace) == 0:
|
||||
self.stack[-1].what.backtrace = 'addr2line -e fdbserver.debug -p -C -f -i '
|
||||
else:
|
||||
self.stack[-1].what.backtrace += ' '
|
||||
else:
|
||||
self.state_stack.append(ValgrindParseState.STACK_IP_AUX)
|
||||
if len(self.stack[-1].aux[-1].backtrace) == 0:
|
||||
self.stack[-1].aux[-1].backtrace = 'addr2line -e fdbserver.debug -p -C -f -i '
|
||||
else:
|
||||
self.stack[-1].aux[-1].backtrace += ' '
|
||||
|
||||
def endElement(self, name):
|
||||
# pdb.set_trace()
|
||||
if name == 'error':
|
||||
self.result.append(self.stack.pop())
|
||||
self.state_stack.pop()
|
||||
elif name == 'kind':
|
||||
assert self.state() == ValgrindParseState.KIND
|
||||
self.state_stack.pop()
|
||||
elif name == 'what':
|
||||
assert self.state() == ValgrindParseState.WHAT
|
||||
self.state_stack.pop()
|
||||
elif name == 'auxwhat':
|
||||
assert self.state() == ValgrindParseState.AUX_WHAT
|
||||
self.state_stack.pop()
|
||||
elif name == 'stack':
|
||||
assert self.state() in [ValgrindParseState.STACK, ValgrindParseState.STACK_AUX]
|
||||
self.state_stack.pop()
|
||||
elif name == 'ip':
|
||||
self.state_stack.pop()
|
||||
state = self.state()
|
||||
assert state in [ValgrindParseState.STACK, ValgrindParseState.STACK_AUX]
|
||||
|
||||
|
||||
def parse_valgrind_output(valgrind_out_file: Path) -> List[ValgrindError]:
|
||||
handler = ValgrindHandler()
|
||||
with valgrind_out_file.open('r') as f:
|
||||
xml.sax.parse(f, handler)
|
||||
return handler.result
|
|
@ -0,0 +1,66 @@
|
|||
from functools import total_ordering
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
@total_ordering
|
||||
class Version:
|
||||
def __init__(self):
|
||||
self.major: int = 0
|
||||
self.minor: int = 0
|
||||
self.patch: int = 0
|
||||
|
||||
def version_tuple(self):
|
||||
return self.major, self.minor, self.patch
|
||||
|
||||
def _compare(self, other) -> int:
|
||||
lhs: Tuple[int, int, int] = self.version_tuple()
|
||||
rhs: Tuple[int, int, int]
|
||||
if isinstance(other, Version):
|
||||
rhs = other.version_tuple()
|
||||
else:
|
||||
rhs = Version.parse(str(other)).version_tuple()
|
||||
if lhs < rhs:
|
||||
return -1
|
||||
elif lhs > rhs:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
return self._compare(other) == 0
|
||||
|
||||
def __lt__(self, other) -> bool:
|
||||
return self._compare(other) < 0
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.version_tuple())
|
||||
|
||||
def __str__(self):
|
||||
return format('{}.{}.{}'.format(self.major, self.minor, self.patch))
|
||||
|
||||
@staticmethod
|
||||
def of_binary(binary: Path):
|
||||
parts = binary.name.split('-')
|
||||
if len(parts) != 2:
|
||||
return Version.max_version()
|
||||
return Version.parse(parts[1])
|
||||
|
||||
@staticmethod
|
||||
def parse(version: str):
|
||||
version_tuple = version.split('.')
|
||||
self = Version()
|
||||
self.major = int(version_tuple[0])
|
||||
if len(version_tuple) > 1:
|
||||
self.minor = int(version_tuple[1])
|
||||
if len(version_tuple) > 2:
|
||||
self.patch = int(version_tuple[2])
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def max_version():
|
||||
self = Version()
|
||||
self.major = 2**32 - 1
|
||||
self.minor = 2**32 - 1
|
||||
self.patch = 2**32 - 1
|
||||
return self
|
|
@ -0,0 +1,420 @@
|
|||
# Dynamic Knobs
|
||||
|
||||
This document is largely adapted from original design documents by Markus
|
||||
Pilman and Trevor Clinkenbeard.
|
||||
|
||||
## Background
|
||||
|
||||
FoundationDB parameters control the behavior of the database, including whether
|
||||
certain features are available and the value of internal constants. Parameters
|
||||
will be referred to as knobs for the remainder of this document. Currently,
|
||||
these knobs are configured through arguments passed to `fdbserver` processes,
|
||||
often controlled by `fdbmonitor`. This has a number of problems:
|
||||
|
||||
1. Updating knobs involves updating `foundationdb.conf` files on each host in a
|
||||
cluster. This has a lot of overhead and typically requires external tooling
|
||||
for large scale changes.
|
||||
2. All knob changes require a process restart.
|
||||
3. We can't easily track the history of knob changes.
|
||||
|
||||
## Overview
|
||||
|
||||
The dynamic knobs project creates a strictly serializable quorum-based
|
||||
configuration database stored on the coordinators. Each `fdbserver` process
|
||||
specifies a configuration path and applies knob overrides from the
|
||||
configuration database for its specified classes.
|
||||
|
||||
### Caveats
|
||||
|
||||
The configuration database explicitly does not support the following:
|
||||
|
||||
1. A high load. The update rate, while not specified, should be relatively low.
|
||||
2. A large amount of data. The database is meant to be relatively small (under
|
||||
one megabyte). Data is not sharded and every coordinator stores a complete
|
||||
copy.
|
||||
3. Concurrent writes. At most one write can succeed at a time, and clients must
|
||||
retry their failed writes.
|
||||
|
||||
## Design
|
||||
|
||||
### Configuration Path
|
||||
|
||||
Each `fdbserver` process can now include a `--config_path` argument specifying
|
||||
its configuration path. A configuration path is a hierarchical list of
|
||||
configuration classes specifying which knob overrides the `fdbserver` process
|
||||
should apply from the configuration database. For example:
|
||||
|
||||
```bash
|
||||
$ fdbserver --config_path classA/classB/classC ...
|
||||
```
|
||||
|
||||
Knob overrides follow descending priority:
|
||||
|
||||
1. Manually specified command line knobs.
|
||||
2. Individual configuration class overrides.
|
||||
* Subdirectories override parent directories. For example, if the
|
||||
configuration path is `az-1/storage/gp3`, the `gp3` configuration takes
|
||||
priority over the `storage` configuration, which takes priority over the
|
||||
`az-1` configuration.
|
||||
3. Global configuration knobs.
|
||||
4. Default knob values.
|
||||
|
||||
#### Example
|
||||
|
||||
For example, imagine an `fdbserver` process run as follows:
|
||||
|
||||
```bash
|
||||
$ fdbserver --datadir /mnt/fdb/storage/4500 --logdir /var/log/foundationdb --public_address auto:4500 --config_path az-1/storage/gp3 --knob_disable_asserts false
|
||||
```
|
||||
|
||||
And the configuration database contains:
|
||||
|
||||
| ConfigClass | KnobName | KnobValue |
|
||||
|-------------|---------------------|-----------|
|
||||
| az-2 | page_cache_4k | 8e9 |
|
||||
| storage | min_trace_severity | 20 |
|
||||
| az-1 | compaction_interval | 280 |
|
||||
| storage | compaction_interval | 350 |
|
||||
| az-1 | disable_asserts | true |
|
||||
| \<global\> | max_metric_size | 5000 |
|
||||
| gp3 | max_metric_size | 1000 |
|
||||
|
||||
The final configuration for the process will be:
|
||||
|
||||
| KnobName | KnobValue | Explanation |
|
||||
|---------------------|-------------|-------------|
|
||||
| page_cache_4k | \<default\> | The configuration database knob override for `az-2` is ignored, so the compiled default is used |
|
||||
| min_trace_severity | 20 | Because the `storage` configuration class is part of the process’s configuration path, the corresponding knob override is applied from the configuration database |
|
||||
| compaction_interval | 350 | The `storage` knob override takes precedence over the `az-1` knob override |
|
||||
| disable_asserts | false | This knob is manually overridden, so all other overrides are ignored |
|
||||
| max_metric_size | 1000 | Knob overrides for specific configuration classes take precedence over global knob overrides, so the global override is ignored |
|
||||
|
||||
### Clients
|
||||
|
||||
Clients can write to the configuration database using transactions.
|
||||
Configuration database transactions are differentiated from regular
|
||||
transactions through specification of the `USE_CONFIG_DATABASE` database
|
||||
option.
|
||||
|
||||
In configuration transactions, the client uses the tuple layer to interact with
|
||||
the configuration database. Keys are tuples of size two, where the first item
|
||||
is the configuration class being written, and the second item is the knob name.
|
||||
The value should be specified as a string. It will be converted to the
|
||||
appropriate type based on the declared type of the knob being set.
|
||||
|
||||
Below is a sample Python script to write to the configuration database.
|
||||
|
||||
```python
|
||||
import fdb
|
||||
|
||||
fdb.api_version(720)
|
||||
|
||||
@fdb.transactional
|
||||
def set_knob(tr, knob_name, knob_value, config_class, description):
|
||||
tr['\xff\xff/description'] = description
|
||||
tr[fdb.tuple.pack((config_class, knob_name,))] = knob_value
|
||||
|
||||
# This function performs two knob changes transactionally.
|
||||
@fdb.transactional
|
||||
def set_multiple_knobs(tr):
|
||||
tr['\xff\xff/description'] = 'description'
|
||||
tr[fdb.tuple.pack((None, 'min_trace_severity',))] = '10'
|
||||
tr[fdb.tuple.pack(('az-1', 'min_trace_severity',))] = '20'
|
||||
|
||||
db = fdb.open()
|
||||
db.options.set_use_config_database()
|
||||
|
||||
set_knob(db, 'min_trace_severity', '10', None, 'description')
|
||||
set_knob(db, 'min_trace_severity', '20', 'az-1', 'description')
|
||||
```
|
||||
|
||||
### Disable the Configuration Database
|
||||
|
||||
The configuration database includes both client and server changes and is
|
||||
enabled by default. Thus, to disable the configuration database, changes must
|
||||
be made to both.
|
||||
|
||||
#### Server
|
||||
|
||||
The configuration database can be disabled by specifying the ``fdbserver``
|
||||
command line option ``--no-config-db``. Note that this option must be specified
|
||||
for *every* ``fdbserver`` process.
|
||||
|
||||
#### Client
|
||||
|
||||
The only client change from the configuration database is as part of the change
|
||||
coordinators command. The change coordinators command is not considered
|
||||
successful until the configuration database is readable on the new
|
||||
coordinators. This will cause the change coordinators command to hang if run
|
||||
against a database with dynamic knobs disabled. To disable the client side
|
||||
configuration database liveness check, specify the ``--no-config-db`` flag when
|
||||
changing coordinators. For example:
|
||||
|
||||
```
|
||||
fdbcli> coordinators auto --no-config-db
|
||||
```
|
||||
|
||||
## Status
|
||||
|
||||
The current state of the configuration database is output as part of `status
|
||||
json`. The configuration path for each process can be determined from the
|
||||
``command_line`` key associated with each process.
|
||||
|
||||
Sample from ``status json``:
|
||||
|
||||
```
|
||||
"configuration_database" : {
|
||||
"commits" : [
|
||||
{
|
||||
"description" : "set some knobs",
|
||||
"timestamp" : 1659570000,
|
||||
"version" : 1
|
||||
},
|
||||
{
|
||||
"description" : "make some other changes",
|
||||
"timestamp" : 1659570000,
|
||||
"version" : 2
|
||||
}
|
||||
],
|
||||
"last_compacted_version" : 0,
|
||||
"most_recent_version" : 2,
|
||||
"mutations" : [
|
||||
{
|
||||
"config_class" : "<global>",
|
||||
"knob_name" : "min_trace_severity",
|
||||
"knob_value" : "int:5",
|
||||
"type" : "set",
|
||||
"version" : 1
|
||||
},
|
||||
{
|
||||
"config_class" : "<global>",
|
||||
"knob_name" : "compaction_interval",
|
||||
"knob_value" : "double:30.000000",
|
||||
"type" : "set",
|
||||
"version" : 1
|
||||
},
|
||||
{
|
||||
"config_class" : "az-1",
|
||||
"knob_name" : "compaction_interval",
|
||||
"knob_value" : "double:60.000000",
|
||||
"type" : "set",
|
||||
"version" : 1
|
||||
},
|
||||
{
|
||||
"config_class" : "<global>",
|
||||
"knob_name" : "compaction_interval",
|
||||
"type" : "clear",
|
||||
"version" : 2
|
||||
},
|
||||
{
|
||||
"config_class" : "<global>",
|
||||
"knob_name" : "update_node_timeout",
|
||||
"knob_value" : "double:4.000000",
|
||||
"type" : "set",
|
||||
"version" : 2
|
||||
}
|
||||
],
|
||||
"snapshot" : {
|
||||
"<global>" : {
|
||||
"min_trace_severity" : "int:5",
|
||||
"update_node_timeout" : "double:4.000000"
|
||||
},
|
||||
"az-1" : {
|
||||
"compaction_interval" : "double:60.000000"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
After compaction, ``status json`` would show:
|
||||
|
||||
```
|
||||
"configuration_database" : {
|
||||
"commits" : [
|
||||
],
|
||||
"last_compacted_version" : 2,
|
||||
"most_recent_version" : 2,
|
||||
"mutations" : [
|
||||
],
|
||||
"snapshot" : {
|
||||
"<global>" : {
|
||||
"min_trace_severity" : "int:5",
|
||||
"update_node_timeout" : "double:4.000000"
|
||||
},
|
||||
"az-1" : {
|
||||
"compaction_interval" : "double:60.000000"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Detailed Implementation
|
||||
|
||||
The configuration database is implemented as a replicated state machine living
|
||||
on the coordinators. This allows configuration database transactions to
|
||||
continue to function in the event of a catastrophic loss of the transaction
|
||||
subsystem.
|
||||
|
||||
To commit a transaction, clients run the two phase Paxos protocol. First, the
|
||||
client asks for a live version from a quorum of coordinators. When a
|
||||
coordinator receives a request for its live version, it increments its local
|
||||
live version by one and returns it to the client. Then, the client submits its
|
||||
writes at the live version it received in the previous step. A coordinator will
|
||||
accept the commit if it is still on the same live version. If a majority of
|
||||
coordinators accept the commit, it is considered committed.
|
||||
|
||||
### Coordinator
|
||||
|
||||
Each coordinator runs a ``ConfigNode`` which serves as a replica storing one
|
||||
full copy of the configuration database. Coordinators never communicate with
|
||||
other coordinators while processing configuration database transactions.
|
||||
Instead, the client runs the transaction and determines when it has quorum
|
||||
agreement.
|
||||
|
||||
Coordinators serve the following ``ConfigTransactionInterface`` to allow
|
||||
clients to read from and write to the configuration database.
|
||||
|
||||
#### ``ConfigTransactionInterface``
|
||||
| Request | Request fields | Reply fields | Explanation |
|
||||
|------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------|
|
||||
| GetGeneration | (coordinatorsHash) | (generation) or (coordinators_changed error) | Get a new read version. This read version is used for all future requests in the transaction |
|
||||
| Get | (configuration class, knob name, coordinatorsHash, generation) | (knob value or empty) or (coordinators_changed error) or (transaction_too_old error) | Returns the current value stored at the specified configuration class and knob name, or empty if no value exists |
|
||||
| GetConfigClasses | (coordinatorsHash, generation) | (configuration classes) or (coordinators_changed error) or (transaction_too_old error) | Returns a list of all configuration classes stored in the configuration database |
|
||||
| GetKnobs | (configuration class, coordinatorsHash, generation) | (knob names) or (coordinators_changed error) or (transaction_too_old error) | Returns a list of all knob names stored for the provided configuration class |
|
||||
| Commit | (mutation list, coordinatorsHash, generation) | ack or (coordinators_changed error) or (commit_unknown_result error) or (not_committed error) | Commit mutations set by the transaction |
|
||||
|
||||
Coordinators also serve the following ``ConfigFollowerInterface`` to provide
|
||||
access to (and modification of) their current state. Most interaction through
|
||||
this interface is done by the cluster controller through its
|
||||
``IConfigConsumer`` implementation living on the ``ConfigBroadcaster``.
|
||||
|
||||
#### ``ConfigFollowerInterface``
|
||||
| Request | Request fields | Reply fields | Explanation |
|
||||
|-----------------------|----------------------------------------------------------------------|-----------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|
|
||||
| GetChanges | (lastSeenVersion, mostRecentVersion) | (mutation list, version) or (version_already_compacted error) or (process_behind error) | Request changes since the last seen version, receive a new most recent version, as well as recent mutations |
|
||||
| GetSnapshotAndChanges | (mostRecentVersion) | (snapshot, snapshotVersion, changes) | Request the full configuration database, in the form of a base snapshot and changes to apply on top of the snapshot |
|
||||
| Compact | (version) | ack | Compact mutations up to the provided version |
|
||||
| Rollforward | (rollbackTo, lastKnownCommitted, target, changes, specialZeroQuorum) | ack or (version_already_compacted error) or (transaction_too_old error) | Rollback/rollforward mutations on a node to catch it up with the majority |
|
||||
| GetCommittedVersion | () | (registered, lastCompacted, lastLive, lastCommitted) | Request version information from a ``ConfigNode`` |
|
||||
| Lock | (coordinatorsHash) | ack | Lock a ``ConfigNode`` to prevent it from serving requests during a coordinator change |
|
||||
|
||||
### Cluster Controller
|
||||
|
||||
The cluster controller runs a singleton ``ConfigBroadcaster`` which is
|
||||
responsible for periodically polling the ``ConfigNode``s for updates, then
|
||||
broadcasting these updates to workers through the ``ConfigBroadcastInterface``.
|
||||
When workers join the cluster, they register themselves and their
|
||||
``ConfigBroadcastInterface`` with the broadcaster. The broadcaster then pushes
|
||||
new updates to registered workers.
|
||||
|
||||
The ``ConfigBroadcastInterface`` is also used by ``ConfigNode``s to register
|
||||
with the ``ConfigBroadcaster``. ``ConfigNode``s need to register with the
|
||||
broadcaster because the broadcaster decides when the ``ConfigNode`` may begin
|
||||
serving requests, based on global information about status of other
|
||||
``ConfigNode``s. For example, if a system with three ``ConfigNode``s suffers a
|
||||
fault where one ``ConfigNode`` loses data, the faulty ``ConfigNode`` should
|
||||
not be allowed to begin serving requests again until it has been rolled forward
|
||||
and is up to date with the latest state of the configuration database.
|
||||
|
||||
#### ``ConfigBroadcastInterface``
|
||||
|
||||
| Request | Request fields | Reply fields | Explanation |
|
||||
|------------|------------------------------------------------------------|-------------------------------|---------------------------------------------------------------------------------------------|
|
||||
| Snapshot | (snapshot, version, restartDelay) | ack | A snapshot of the configuration database sent by the broadcaster to workers |
|
||||
| Changes | (changes, mostRecentVersion, restartDelay) | ack | A list of changes up to and including mostRecentVersion, sent by the broadcaster to workers |
|
||||
| Registered | () | (registered, lastSeenVersion) | Sent by the broadcaster to new ``ConfigNode``s to determine their registration status |
|
||||
| Ready | (snapshot, snapshotVersion, liveVersion, coordinatorsHash) | ack | Sent by the broadcaster to new ``ConfigNode``s to allow them to start serving requests |
|
||||
|
||||
### Worker
|
||||
|
||||
Each worker runs a ``LocalConfiguration`` instance which receives and applies
|
||||
knob updates from the ``ConfigBroadcaster``. The local configuration maintains
|
||||
a durable ``KeyValueStoreMemory`` containing the following:
|
||||
|
||||
* The latest known configuration version
|
||||
* The most recently used configuration path
|
||||
* All knob overrides corresponding to the configuration path at the latest known version
|
||||
|
||||
Once a worker starts, it will:
|
||||
|
||||
* Apply manually set knobs
|
||||
* Read its local configuration file
|
||||
* If the stored configuration path does not match the configuration path
|
||||
specified on the command line, delete the local configuration file
|
||||
* Otherwise, apply knob updates from the local configuration file. Manually
|
||||
specified knobs will not be overridden
|
||||
* Register with the broadcaster to receive new updates for its configuration
|
||||
classes
|
||||
* Persist these updates when received and restart if necessary
|
||||
|
||||
### Knob Atomicity
|
||||
|
||||
All knobs are classified as either atomic or non-atomic. Atomic knobs require a
|
||||
process restart when changed, while non-atomic knobs do not.
|
||||
|
||||
### Compaction
|
||||
|
||||
``ConfigNode``s store individual mutations in order to be able to update other,
|
||||
out of date ``ConfigNode``s without needing to send a full snapshot. Each
|
||||
configuration database commit also contains additional metadata such as a
|
||||
timestamp and a text description of the changes being made. To keep the size of
|
||||
the configuration database manageable, a compaction process runs periodically
|
||||
(defaulting to every five minutes) which compacts individual mutations into a
|
||||
simplified snapshot of key-value pairs. Compaction is controlled by the
|
||||
``ConfigBroadcaster``, using information it peridiodically requests from
|
||||
``ConfigNode``s. Compaction will only compact up to the minimum known version
|
||||
across *all* ``ConfigNode``s. This means that if one ``ConfigNode`` is
|
||||
permanently partitioned from the ``ConfigBroadcaster`` or from clients, no
|
||||
compaction will ever take place.
|
||||
|
||||
### Rollback / Rollforward
|
||||
|
||||
It is necessary to be able to roll ``ConfigNode``s backward and forward with
|
||||
respect to their committed versions due to the nature of quorum logic and
|
||||
unreliable networks.
|
||||
|
||||
Consider a case where a client commit gets persisted durably on one out of
|
||||
three ``ConfigNode``s (assume commit messages to the other two nodes are lost).
|
||||
Since the value is not committed on a majority of ``ConfigNode``s, it cannot be
|
||||
considered committed. But it is also incorrect to have the value persist on one
|
||||
out of three nodes as future commits are made. In this case, the most common
|
||||
result is that the ``ConfigNode`` will be rolled back when the next commit from
|
||||
a different client is made, and then rolled forward to contain the data from
|
||||
the commit. ``PaxosConfigConsumer`` contains logic to recognize ``ConfigNode``
|
||||
minorities and update them to match the quorum.
|
||||
|
||||
### Changing Coordinators
|
||||
|
||||
Since the configuration database lives on the coordinators and the
|
||||
[coordinators can be
|
||||
changed](https://apple.github.io/foundationdb/configuration.html#configuration-changing-coordination-servers),
|
||||
it is necessary to copy the configuration database from the old to the new
|
||||
coordinators during such an event. A coordinator change performs the following
|
||||
steps in regards to the configuration database:
|
||||
|
||||
1. Write ``\xff/coordinatorsKey`` with the new coordinators string. The key
|
||||
``\xff/previousCoordinators`` contains the current (old) set of
|
||||
coordinators.
|
||||
2. Lock the old ``ConfigNode``s so they can no longer serve client requests.
|
||||
3. Start a recovery, causing a new cluster controller (and therefore
|
||||
``ConfigBroadcaster``) to be selected.
|
||||
4. Read ``\xff/previousCoordinators`` on the ``ConfigBroadcaster`` and, if
|
||||
present, read an up-to-date snapshot of the configuration database on the
|
||||
old coordinators.
|
||||
5. Determine if each registering ``ConfigNode`` needs an up-to-date snapshot of
|
||||
the configuration database sent to it, based on its reported version and the
|
||||
snapshot version of the database received from the old coordinators.
|
||||
* Some new coordinators which were also coordinators in the previous
|
||||
configuration may not need a snapshot.
|
||||
6. Send ready requests to new ``ConfigNode``s, including an up-to-date snapshot
|
||||
if necessary. This allows the new coordinators to begin serving
|
||||
configuration database requests from clients.
|
||||
|
||||
## Testing
|
||||
|
||||
The ``ConfigDatabaseUnitTests`` class unit test a number of different
|
||||
configuration database dimensions.
|
||||
|
||||
The ``ConfigIncrement`` workload tests contention between clients attempting to
|
||||
write to the configuration database, paired with machine failure and
|
||||
coordinator changes.
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* BlobGranuleCommon.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/BlobGranuleCommon.h"
|
||||
|
||||
BlobGranuleSummaryRef summarizeGranuleChunk(Arena& ar, const BlobGranuleChunkRef& chunk) {
|
||||
BlobGranuleSummaryRef summary;
|
||||
ASSERT(chunk.snapshotFile.present());
|
||||
ASSERT(chunk.snapshotVersion != invalidVersion);
|
||||
ASSERT(chunk.includedVersion >= chunk.snapshotVersion);
|
||||
ASSERT(chunk.newDeltas.empty());
|
||||
|
||||
if (chunk.tenantPrefix.present()) {
|
||||
summary.keyRange = KeyRangeRef(ar, chunk.keyRange.removePrefix(chunk.tenantPrefix.get()));
|
||||
} else {
|
||||
summary.keyRange = KeyRangeRef(ar, chunk.keyRange);
|
||||
}
|
||||
|
||||
summary.snapshotVersion = chunk.snapshotVersion;
|
||||
summary.snapshotSize = chunk.snapshotFile.get().length;
|
||||
summary.deltaVersion = chunk.includedVersion;
|
||||
summary.deltaSize = 0;
|
||||
for (auto& it : chunk.deltaFiles) {
|
||||
summary.deltaSize += it.length;
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
|
@ -133,3 +133,70 @@ ACTOR Future<Void> readBlobGranules(BlobGranuleFileRequest request,
|
|||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Return true if a given range is fully covered by blob chunks
|
||||
bool isRangeFullyCovered(KeyRange range, Standalone<VectorRef<BlobGranuleChunkRef>> blobChunks) {
|
||||
std::vector<KeyRangeRef> blobRanges;
|
||||
for (const BlobGranuleChunkRef& chunk : blobChunks) {
|
||||
blobRanges.push_back(chunk.keyRange);
|
||||
}
|
||||
|
||||
return range.isCovered(blobRanges);
|
||||
}
|
||||
|
||||
void testAddChunkRange(KeyRef begin, KeyRef end, Standalone<VectorRef<BlobGranuleChunkRef>>& chunks) {
|
||||
BlobGranuleChunkRef chunk;
|
||||
chunk.keyRange = KeyRangeRef(begin, end);
|
||||
chunks.push_back(chunks.arena(), chunk);
|
||||
}
|
||||
|
||||
TEST_CASE("/fdbserver/blobgranule/isRangeCoveredByBlob") {
|
||||
Standalone<VectorRef<BlobGranuleChunkRef>> chunks;
|
||||
// chunk1 key_a1 - key_a9
|
||||
testAddChunkRange("key_a1"_sr, "key_a9"_sr, chunks);
|
||||
// chunk2 key_b1 - key_b9
|
||||
testAddChunkRange("key_b1"_sr, "key_b9"_sr, chunks);
|
||||
|
||||
// check empty range. not covered
|
||||
{ ASSERT(isRangeFullyCovered(KeyRangeRef(), chunks) == false); }
|
||||
|
||||
// check empty chunks. not covered
|
||||
{
|
||||
Standalone<VectorRef<BlobGranuleChunkRef>> empyChunks;
|
||||
ASSERT(isRangeFullyCovered(KeyRangeRef(), empyChunks) == false);
|
||||
}
|
||||
|
||||
// check '' to \xff
|
||||
{ ASSERT(isRangeFullyCovered(KeyRangeRef(LiteralStringRef(""), LiteralStringRef("\xff")), chunks) == false); }
|
||||
|
||||
// check {key_a1, key_a9}
|
||||
{ ASSERT(isRangeFullyCovered(KeyRangeRef("key_a1"_sr, "key_a9"_sr), chunks)); }
|
||||
|
||||
// check {key_a1, key_a3}
|
||||
{ ASSERT(isRangeFullyCovered(KeyRangeRef("key_a1"_sr, "key_a3"_sr), chunks)); }
|
||||
|
||||
// check {key_a0, key_a3}
|
||||
{ ASSERT(isRangeFullyCovered(KeyRangeRef("key_a0"_sr, "key_a3"_sr), chunks) == false); }
|
||||
|
||||
// check {key_a5, key_b2}
|
||||
{
|
||||
auto range = KeyRangeRef("key_a5"_sr, "key_b5"_sr);
|
||||
ASSERT(isRangeFullyCovered(range, chunks) == false);
|
||||
ASSERT(range.begin == "key_a5"_sr);
|
||||
ASSERT(range.end == "key_b5"_sr);
|
||||
}
|
||||
|
||||
// check unsorted chunks
|
||||
{
|
||||
Standalone<VectorRef<BlobGranuleChunkRef>> unsortedChunks(chunks);
|
||||
testAddChunkRange("key_0"_sr, "key_a"_sr, unsortedChunks);
|
||||
ASSERT(isRangeFullyCovered(KeyRangeRef("key_00"_sr, "key_01"_sr), unsortedChunks));
|
||||
}
|
||||
// check continued chunks
|
||||
{
|
||||
Standalone<VectorRef<BlobGranuleChunkRef>> continuedChunks(chunks);
|
||||
testAddChunkRange("key_a9"_sr, "key_b1"_sr, continuedChunks);
|
||||
ASSERT(isRangeFullyCovered(KeyRangeRef("key_a1"_sr, "key_b9"_sr), continuedChunks) == false);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -81,6 +81,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
init( CHANGE_FEED_CACHE_SIZE, 100000 ); if( randomize && BUGGIFY ) CHANGE_FEED_CACHE_SIZE = 1;
|
||||
init( CHANGE_FEED_POP_TIMEOUT, 10.0 );
|
||||
init( CHANGE_FEED_STREAM_MIN_BYTES, 1e4 ); if( randomize && BUGGIFY ) CHANGE_FEED_STREAM_MIN_BYTES = 1;
|
||||
init( CHANGE_FEED_START_INTERVAL, 10.0 );
|
||||
|
||||
init( MAX_BATCH_SIZE, 1000 ); if( randomize && BUGGIFY ) MAX_BATCH_SIZE = 1;
|
||||
init( GRV_BATCH_TIMEOUT, 0.005 ); if( randomize && BUGGIFY ) GRV_BATCH_TIMEOUT = 0.1;
|
||||
|
@ -276,7 +277,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
|
||||
// Blob granules
|
||||
init( BG_MAX_GRANULE_PARALLELISM, 10 );
|
||||
init( BG_TOO_MANY_GRANULES, 1000 );
|
||||
init( BG_TOO_MANY_GRANULES, 10000 );
|
||||
|
||||
init( CHANGE_QUORUM_BAD_STATE_RETRY_TIMES, 3 );
|
||||
init( CHANGE_QUORUM_BAD_STATE_RETRY_DELAY, 2.0 );
|
||||
|
|
|
@ -7717,7 +7717,11 @@ ACTOR Future<Standalone<VectorRef<BlobGranuleChunkRef>>> readBlobGranulesActor(
|
|||
KeyRange range,
|
||||
Version begin,
|
||||
Optional<Version> read,
|
||||
Version* readVersionOut) { // read not present is "use transaction version"
|
||||
Version* readVersionOut,
|
||||
int chunkLimit,
|
||||
bool summarize) { // read not present is "use transaction version"
|
||||
|
||||
ASSERT(chunkLimit > 0);
|
||||
|
||||
state RangeResult blobGranuleMapping;
|
||||
state Key granuleStartKey;
|
||||
|
@ -7872,6 +7876,7 @@ ACTOR Future<Standalone<VectorRef<BlobGranuleChunkRef>>> readBlobGranulesActor(
|
|||
req.readVersion = rv;
|
||||
req.tenantInfo = self->getTenant().present() ? self->trState->getTenantInfo() : TenantInfo();
|
||||
req.canCollapseBegin = true; // TODO make this a parameter once we support it
|
||||
req.summarize = summarize;
|
||||
|
||||
std::vector<Reference<ReferencedInterface<BlobWorkerInterface>>> v;
|
||||
v.push_back(
|
||||
|
@ -7942,6 +7947,12 @@ ACTOR Future<Standalone<VectorRef<BlobGranuleChunkRef>>> readBlobGranulesActor(
|
|||
chunkEndKey = chunkEndKey.removePrefix(tenantPrefix.get());
|
||||
}
|
||||
keyRange = KeyRangeRef(std::min(chunkEndKey, keyRange.end), keyRange.end);
|
||||
if (summarize && results.size() == chunkLimit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (summarize && results.size() == chunkLimit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if we detect that this blob worker fails, cancel the request, as otherwise load balance will
|
||||
|
@ -7990,7 +8001,32 @@ Future<Standalone<VectorRef<BlobGranuleChunkRef>>> Transaction::readBlobGranules
|
|||
Version begin,
|
||||
Optional<Version> readVersion,
|
||||
Version* readVersionOut) {
|
||||
return readBlobGranulesActor(this, range, begin, readVersion, readVersionOut);
|
||||
return readBlobGranulesActor(
|
||||
this, range, begin, readVersion, readVersionOut, std::numeric_limits<int>::max(), false);
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranulesActor(Transaction* self,
|
||||
KeyRange range,
|
||||
Version summaryVersion,
|
||||
int rangeLimit) {
|
||||
state Version readVersionOut;
|
||||
Standalone<VectorRef<BlobGranuleChunkRef>> chunks =
|
||||
wait(readBlobGranulesActor(self, range, 0, summaryVersion, &readVersionOut, rangeLimit, true));
|
||||
ASSERT(chunks.size() <= rangeLimit);
|
||||
ASSERT(readVersionOut == summaryVersion);
|
||||
Standalone<VectorRef<BlobGranuleSummaryRef>> summaries;
|
||||
summaries.reserve(summaries.arena(), chunks.size());
|
||||
for (auto& it : chunks) {
|
||||
summaries.push_back(summaries.arena(), summarizeGranuleChunk(summaries.arena(), it));
|
||||
}
|
||||
|
||||
return summaries;
|
||||
}
|
||||
|
||||
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> Transaction::summarizeBlobGranules(const KeyRange& range,
|
||||
Version summaryVersion,
|
||||
int rangeLimit) {
|
||||
return summarizeBlobGranulesActor(this, range, summaryVersion, rangeLimit);
|
||||
}
|
||||
|
||||
ACTOR Future<Version> setPerpetualStorageWiggle(Database cx, bool enable, LockAware lockAware) {
|
||||
|
@ -8016,11 +8052,18 @@ ACTOR Future<Version> setPerpetualStorageWiggle(Database cx, bool enable, LockAw
|
|||
|
||||
ACTOR Future<Version> checkBlobSubrange(Database db, KeyRange keyRange, Optional<Version> version) {
|
||||
state Transaction tr(db);
|
||||
state Version readVersionOut = invalidVersion;
|
||||
loop {
|
||||
try {
|
||||
wait(success(tr.readBlobGranules(keyRange, 0, version, &readVersionOut)));
|
||||
return readVersionOut;
|
||||
state Version summaryVersion;
|
||||
if (version.present()) {
|
||||
summaryVersion = version.get();
|
||||
} else {
|
||||
wait(store(summaryVersion, tr.getReadVersion()));
|
||||
}
|
||||
// same properties as a read for validating granule is readable, just much less memory and network bandwidth
|
||||
// used
|
||||
wait(success(tr.summarizeBlobGranules(keyRange, summaryVersion, std::numeric_limits<int>::max())));
|
||||
return summaryVersion;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
|
@ -8764,6 +8807,7 @@ Reference<ChangeFeedStorageData> DatabaseContext::getStorageData(StorageServerIn
|
|||
newStorageUpdater->interfToken = token;
|
||||
newStorageUpdater->updater = storageFeedVersionUpdater(interf, newStorageUpdater.getPtr());
|
||||
newStorageUpdater->context = this;
|
||||
newStorageUpdater->created = now();
|
||||
changeFeedUpdaters[token] = newStorageUpdater.getPtr();
|
||||
return newStorageUpdater;
|
||||
}
|
||||
|
@ -8773,12 +8817,12 @@ Reference<ChangeFeedStorageData> DatabaseContext::getStorageData(StorageServerIn
|
|||
Version DatabaseContext::getMinimumChangeFeedVersion() {
|
||||
Version minVersion = std::numeric_limits<Version>::max();
|
||||
for (auto& it : changeFeedUpdaters) {
|
||||
if (it.second->version.get() > 0) {
|
||||
if (now() - it.second->created > CLIENT_KNOBS->CHANGE_FEED_START_INTERVAL) {
|
||||
minVersion = std::min(minVersion, it.second->version.get());
|
||||
}
|
||||
}
|
||||
for (auto& it : notAtLatestChangeFeeds) {
|
||||
if (it.second->getVersion() > 0) {
|
||||
if (now() - it.second->created > CLIENT_KNOBS->CHANGE_FEED_START_INTERVAL) {
|
||||
minVersion = std::min(minVersion, it.second->getVersion());
|
||||
}
|
||||
}
|
||||
|
@ -8800,7 +8844,7 @@ ChangeFeedStorageData::~ChangeFeedStorageData() {
|
|||
}
|
||||
|
||||
ChangeFeedData::ChangeFeedData(DatabaseContext* context)
|
||||
: dbgid(deterministicRandom()->randomUniqueID()), context(context), notAtLatest(1) {
|
||||
: dbgid(deterministicRandom()->randomUniqueID()), context(context), notAtLatest(1), created(now()) {
|
||||
if (context) {
|
||||
context->notAtLatestChangeFeeds[dbgid] = this;
|
||||
}
|
||||
|
@ -9205,6 +9249,7 @@ ACTOR Future<Void> mergeChangeFeedStream(Reference<DatabaseContext> db,
|
|||
results->notAtLatest.set(interfs.size());
|
||||
if (results->context) {
|
||||
results->context->notAtLatestChangeFeeds[results->dbgid] = results.getPtr();
|
||||
results->created = now();
|
||||
}
|
||||
refresh.send(Void());
|
||||
|
||||
|
@ -9411,6 +9456,7 @@ ACTOR Future<Void> singleChangeFeedStream(Reference<DatabaseContext> db,
|
|||
results->notAtLatest.set(1);
|
||||
if (results->context) {
|
||||
results->context->notAtLatestChangeFeeds[results->dbgid] = results.getPtr();
|
||||
results->created = now();
|
||||
}
|
||||
refresh.send(Void());
|
||||
|
||||
|
@ -9535,6 +9581,7 @@ ACTOR Future<Void> getChangeFeedStreamActor(Reference<DatabaseContext> db,
|
|||
results->notAtLatest.set(1);
|
||||
if (results->context) {
|
||||
results->context->notAtLatestChangeFeeds[results->dbgid] = results.getPtr();
|
||||
results->created = now();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -735,16 +735,21 @@ ACTOR Future<S3BlobStoreEndpoint::ReusableConnection> connect_impl(Reference<S3B
|
|||
service = b->knobs.secure_connection ? "https" : "http";
|
||||
}
|
||||
bool isTLS = b->knobs.secure_connection == 1;
|
||||
state Reference<IConnection> conn;
|
||||
if (b->useProxy) {
|
||||
// TODO(renxuan): Support http proxy + TLS
|
||||
if (isTLS || b->service == "443") {
|
||||
fprintf(stderr, "ERROR: TLS is not supported yet when using HTTP proxy.\n");
|
||||
throw connection_failed();
|
||||
if (isTLS) {
|
||||
Reference<IConnection> _conn =
|
||||
wait(HTTP::proxyConnect(host, service, b->proxyHost.get(), b->proxyPort.get()));
|
||||
conn = _conn;
|
||||
} else {
|
||||
host = b->proxyHost.get();
|
||||
service = b->proxyPort.get();
|
||||
Reference<IConnection> _conn = wait(INetworkConnections::net()->connect(host, service, false));
|
||||
conn = _conn;
|
||||
}
|
||||
host = b->proxyHost.get();
|
||||
service = b->proxyPort.get();
|
||||
} else {
|
||||
wait(store(conn, INetworkConnections::net()->connect(host, service, isTLS)));
|
||||
}
|
||||
state Reference<IConnection> conn = wait(INetworkConnections::net()->connect(host, service, isTLS));
|
||||
wait(conn->connectHandshake());
|
||||
|
||||
TraceEvent("S3BlobStoreEndpointNewConnection")
|
||||
|
@ -892,7 +897,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<S3BlobStoreEndp
|
|||
canonicalURI += boost::algorithm::join(queryParameters, "&");
|
||||
}
|
||||
|
||||
if (bstore->useProxy) {
|
||||
if (bstore->useProxy && bstore->knobs.secure_connection == 0) {
|
||||
// Has to be in absolute-form.
|
||||
canonicalURI = "http://" + bstore->host + ":" + bstore->service + canonicalURI;
|
||||
}
|
||||
|
|
|
@ -675,7 +675,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( STORAGE_SERVER_LIST_FETCH_TIMEOUT, 20.0 );
|
||||
init( BW_THROTTLING_ENABLED, true );
|
||||
|
||||
bool buggifySmallBWLag = false; //randomize && BUGGIFY;
|
||||
bool buggifySmallBWLag = randomize && BUGGIFY;
|
||||
init( TARGET_BW_LAG, 50.0 ); if(buggifySmallBWLag) TARGET_BW_LAG = 10.0;
|
||||
init( TARGET_BW_LAG_BATCH, 20.0 ); if(buggifySmallBWLag) TARGET_BW_LAG_BATCH = 4.0;
|
||||
init( TARGET_BW_LAG_UPDATE, 9.0 ); if(buggifySmallBWLag) TARGET_BW_LAG_UPDATE = 1.0;
|
||||
|
|
|
@ -233,6 +233,22 @@ struct BlobGranuleChunkRef {
|
|||
}
|
||||
};
|
||||
|
||||
struct BlobGranuleSummaryRef {
|
||||
constexpr static FileIdentifier file_identifier = 9774587;
|
||||
KeyRangeRef keyRange;
|
||||
Version snapshotVersion;
|
||||
int64_t snapshotSize;
|
||||
Version deltaVersion;
|
||||
int64_t deltaSize;
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, keyRange, snapshotVersion, snapshotSize, deltaVersion, deltaSize);
|
||||
}
|
||||
};
|
||||
|
||||
BlobGranuleSummaryRef summarizeGranuleChunk(Arena& ar, const BlobGranuleChunkRef& chunk);
|
||||
|
||||
enum BlobGranuleSplitState { Unknown = 0, Initialized = 1, Assigned = 2, Done = 3 };
|
||||
|
||||
// Boundary metadata for each range indexed by the beginning of the range.
|
||||
|
|
|
@ -51,5 +51,7 @@ ACTOR Future<Void> readBlobGranules(BlobGranuleFileRequest request,
|
|||
Reference<BlobConnectionProvider> bstore,
|
||||
PromiseStream<RangeResult> results);
|
||||
|
||||
bool isRangeFullyCovered(KeyRange range, Standalone<VectorRef<BlobGranuleChunkRef>> blobChunks);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
||||
|
|
|
@ -30,7 +30,7 @@ struct BlobWorkerStats {
|
|||
Counter deltaBytesWritten, snapshotBytesWritten;
|
||||
Counter bytesReadFromFDBForInitialSnapshot;
|
||||
Counter bytesReadFromS3ForCompaction;
|
||||
Counter rangeAssignmentRequests, readRequests;
|
||||
Counter rangeAssignmentRequests, readRequests, summaryReads;
|
||||
Counter wrongShardServer;
|
||||
Counter changeFeedInputBytes;
|
||||
Counter readReqTotalFilesReturned;
|
||||
|
@ -75,8 +75,8 @@ struct BlobWorkerStats {
|
|||
bytesReadFromFDBForInitialSnapshot("BytesReadFromFDBForInitialSnapshot", cc),
|
||||
bytesReadFromS3ForCompaction("BytesReadFromS3ForCompaction", cc),
|
||||
rangeAssignmentRequests("RangeAssignmentRequests", cc), readRequests("ReadRequests", cc),
|
||||
wrongShardServer("WrongShardServer", cc), changeFeedInputBytes("ChangeFeedInputBytes", cc),
|
||||
readReqTotalFilesReturned("ReadReqTotalFilesReturned", cc),
|
||||
summaryReads("SummaryReads", cc), wrongShardServer("WrongShardServer", cc),
|
||||
changeFeedInputBytes("ChangeFeedInputBytes", cc), readReqTotalFilesReturned("ReadReqTotalFilesReturned", cc),
|
||||
readReqDeltaBytesReturned("ReadReqDeltaBytesReturned", cc), commitVersionChecks("CommitVersionChecks", cc),
|
||||
granuleUpdateErrors("GranuleUpdateErrors", cc), granuleRequestTimeouts("GranuleRequestTimeouts", cc),
|
||||
readRequestsWithBegin("ReadRequestsWithBegin", cc), readRequestsCollapsed("ReadRequestsCollapsed", cc),
|
||||
|
|
|
@ -113,6 +113,7 @@ struct BlobGranuleFileRequest {
|
|||
Version readVersion;
|
||||
bool canCollapseBegin = true;
|
||||
TenantInfo tenantInfo;
|
||||
bool summarize = false;
|
||||
ReplyPromise<BlobGranuleFileReply> reply;
|
||||
|
||||
BlobGranuleFileRequest() {}
|
||||
|
@ -121,7 +122,7 @@ struct BlobGranuleFileRequest {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, keyRange, beginVersion, readVersion, canCollapseBegin, tenantInfo, reply, arena);
|
||||
serializer(ar, keyRange, beginVersion, readVersion, canCollapseBegin, tenantInfo, summarize, reply, arena);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -78,6 +78,7 @@ public:
|
|||
int64_t CHANGE_FEED_CACHE_SIZE;
|
||||
double CHANGE_FEED_POP_TIMEOUT;
|
||||
int64_t CHANGE_FEED_STREAM_MIN_BYTES;
|
||||
double CHANGE_FEED_START_INTERVAL;
|
||||
|
||||
int MAX_BATCH_SIZE;
|
||||
double GRV_BATCH_TIMEOUT;
|
||||
|
|
|
@ -170,6 +170,7 @@ struct ChangeFeedStorageData : ReferenceCounted<ChangeFeedStorageData> {
|
|||
NotifiedVersion desired;
|
||||
UID interfToken;
|
||||
DatabaseContext* context;
|
||||
double created;
|
||||
|
||||
~ChangeFeedStorageData();
|
||||
};
|
||||
|
@ -191,6 +192,7 @@ struct ChangeFeedData : ReferenceCounted<ChangeFeedData> {
|
|||
Version endVersion = invalidVersion;
|
||||
Version popVersion =
|
||||
invalidVersion; // like TLog pop version, set by SS and client can check it to see if they missed data
|
||||
double created = 0;
|
||||
|
||||
explicit ChangeFeedData(DatabaseContext* context = nullptr);
|
||||
~ChangeFeedData();
|
||||
|
|
|
@ -331,6 +331,22 @@ struct KeyRangeRef {
|
|||
bool empty() const { return begin == end; }
|
||||
bool singleKeyRange() const { return equalsKeyAfter(begin, end); }
|
||||
|
||||
// Return true if it's fully covered by given range list. Note that ranges should be sorted
|
||||
bool isCovered(std::vector<KeyRangeRef>& ranges) {
|
||||
ASSERT(std::is_sorted(ranges.begin(), ranges.end(), KeyRangeRef::ArbitraryOrder()));
|
||||
KeyRangeRef clone(begin, end);
|
||||
for (auto r : ranges) {
|
||||
if (begin < r.begin)
|
||||
return false; // uncovered gap between clone.begin and r.begin
|
||||
if (end <= r.end)
|
||||
return true; // range is fully covered
|
||||
if (end > r.begin)
|
||||
// {clone.begin, r.end} is covered. need to check coverage for {r.end, clone.end}
|
||||
clone = KeyRangeRef(r.end, clone.end);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Standalone<KeyRangeRef> withPrefix(const StringRef& prefix) const {
|
||||
return KeyRangeRef(begin.withPrefix(prefix), end.withPrefix(prefix));
|
||||
}
|
||||
|
|
|
@ -421,6 +421,10 @@ public:
|
|||
Optional<Version> readVersion,
|
||||
Version* readVersionOut = nullptr);
|
||||
|
||||
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(const KeyRange& range,
|
||||
Version summaryVersion,
|
||||
int rangeLimit);
|
||||
|
||||
// If checkWriteConflictRanges is true, existing write conflict ranges will be searched for this key
|
||||
void set(const KeyRef& key, const ValueRef& value, AddConflictRange = AddConflictRange::True);
|
||||
void atomicOp(const KeyRef& key,
|
||||
|
|
|
@ -411,7 +411,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
}
|
||||
|
||||
state Reference<HTTP::Response> r(new HTTP::Response());
|
||||
state Future<Void> responseReading = r->read(conn, verb == "HEAD" || verb == "DELETE");
|
||||
state Future<Void> responseReading = r->read(conn, verb == "HEAD" || verb == "DELETE" || verb == "CONNECT");
|
||||
|
||||
send_start = timer();
|
||||
|
||||
|
@ -525,4 +525,135 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> sendProxyConnectRequest(Reference<IConnection> conn,
|
||||
std::string remoteHost,
|
||||
std::string remoteService) {
|
||||
state Headers headers;
|
||||
headers["Host"] = remoteHost + ":" + remoteService;
|
||||
headers["Accept"] = "application/xml";
|
||||
headers["Proxy-Connection"] = "Keep-Alive";
|
||||
state int requestTimeout = 60;
|
||||
state int maxTries = FLOW_KNOBS->RESTCLIENT_CONNECT_TRIES;
|
||||
state int thisTry = 1;
|
||||
state double nextRetryDelay = 2.0;
|
||||
state Reference<IRateControl> sendReceiveRate = makeReference<Unlimited>();
|
||||
state int64_t bytes_sent = 0;
|
||||
|
||||
loop {
|
||||
state Optional<Error> err;
|
||||
state Reference<Response> r;
|
||||
|
||||
try {
|
||||
Reference<Response> _r = wait(timeoutError(doRequest(conn,
|
||||
"CONNECT",
|
||||
remoteHost + ":" + remoteService,
|
||||
headers,
|
||||
nullptr,
|
||||
0,
|
||||
sendReceiveRate,
|
||||
&bytes_sent,
|
||||
sendReceiveRate),
|
||||
requestTimeout));
|
||||
r = _r;
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled)
|
||||
throw;
|
||||
err = e;
|
||||
}
|
||||
|
||||
// If err is not present then r is valid.
|
||||
// If r->code is in successCodes then record the successful request and return r.
|
||||
if (!err.present() && r->code == 200) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
// All errors in err are potentially retryable as well as certain HTTP response codes...
|
||||
bool retryable = err.present() || r->code == 500 || r->code == 502 || r->code == 503 || r->code == 429;
|
||||
|
||||
// But only if our previous attempt was not the last allowable try.
|
||||
retryable = retryable && (thisTry < maxTries);
|
||||
|
||||
TraceEvent event(SevWarn, retryable ? "ProxyConnectCommandFailedRetryable" : "ProxyConnectCommandFailed");
|
||||
|
||||
// Attach err to trace event if present, otherwise extract some stuff from the response
|
||||
if (err.present()) {
|
||||
event.errorUnsuppressed(err.get());
|
||||
}
|
||||
event.suppressFor(60);
|
||||
if (!err.present()) {
|
||||
event.detail("ResponseCode", r->code);
|
||||
}
|
||||
|
||||
event.detail("ThisTry", thisTry);
|
||||
|
||||
// If r is not valid or not code 429 then increment the try count. 429's will not count against the attempt
|
||||
// limit.
|
||||
if (!r || r->code != 429)
|
||||
++thisTry;
|
||||
|
||||
// We will wait delay seconds before the next retry, start with nextRetryDelay.
|
||||
double delay = nextRetryDelay;
|
||||
// Double but limit the *next* nextRetryDelay.
|
||||
nextRetryDelay = std::min(nextRetryDelay * 2, 60.0);
|
||||
|
||||
if (retryable) {
|
||||
// If r is valid then obey the Retry-After response header if present.
|
||||
if (r) {
|
||||
auto iRetryAfter = r->headers.find("Retry-After");
|
||||
if (iRetryAfter != r->headers.end()) {
|
||||
event.detail("RetryAfterHeader", iRetryAfter->second);
|
||||
char* pEnd;
|
||||
double retryAfter = strtod(iRetryAfter->second.c_str(), &pEnd);
|
||||
if (*pEnd) // If there were other characters then don't trust the parsed value, use a probably safe
|
||||
// value of 5 minutes.
|
||||
retryAfter = 300;
|
||||
// Update delay
|
||||
delay = std::max(delay, retryAfter);
|
||||
}
|
||||
}
|
||||
|
||||
// Log the delay then wait.
|
||||
event.detail("RetryDelay", delay);
|
||||
wait(::delay(delay));
|
||||
} else {
|
||||
// We can't retry, so throw something.
|
||||
|
||||
// This error code means the authentication header was not accepted, likely the account or key is wrong.
|
||||
if (r && r->code == 406)
|
||||
throw http_not_accepted();
|
||||
|
||||
if (r && r->code == 401)
|
||||
throw http_auth_failed();
|
||||
|
||||
throw connection_failed();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Reference<IConnection>> proxyConnectImpl(std::string remoteHost,
|
||||
std::string remoteService,
|
||||
std::string proxyHost,
|
||||
std::string proxyService) {
|
||||
state NetworkAddress remoteEndpoint =
|
||||
wait(map(INetworkConnections::net()->resolveTCPEndpoint(remoteHost, remoteService),
|
||||
[=](std::vector<NetworkAddress> const& addresses) -> NetworkAddress {
|
||||
NetworkAddress addr = addresses[deterministicRandom()->randomInt(0, addresses.size())];
|
||||
addr.fromHostname = true;
|
||||
addr.flags = NetworkAddress::FLAG_TLS;
|
||||
return addr;
|
||||
}));
|
||||
state Reference<IConnection> connection = wait(INetworkConnections::net()->connect(proxyHost, proxyService));
|
||||
wait(sendProxyConnectRequest(connection, remoteHost, remoteService));
|
||||
boost::asio::ip::tcp::socket socket = std::move(connection->getSocket());
|
||||
Reference<IConnection> remoteConnection = wait(INetworkConnections::net()->connect(remoteEndpoint, &socket));
|
||||
return remoteConnection;
|
||||
}
|
||||
|
||||
Future<Reference<IConnection>> proxyConnect(const std::string& remoteHost,
|
||||
const std::string& remoteService,
|
||||
const std::string& proxyHost,
|
||||
const std::string& proxyService) {
|
||||
return proxyConnectImpl(remoteHost, remoteService, proxyHost, proxyService);
|
||||
}
|
||||
|
||||
} // namespace HTTP
|
||||
|
|
|
@ -68,6 +68,12 @@ Future<Reference<Response>> doRequest(Reference<IConnection> const& conn,
|
|||
Reference<IRateControl> const& recvRate,
|
||||
const std::string& requestHeader = std::string());
|
||||
|
||||
// Connect to proxy, send CONNECT command, and connect to the remote host.
|
||||
Future<Reference<IConnection>> proxyConnect(const std::string& remoteHost,
|
||||
const std::string& remoteService,
|
||||
const std::string& proxyHost,
|
||||
const std::string& proxyService);
|
||||
|
||||
constexpr int HTTP_STATUS_CODE_OK = 200;
|
||||
constexpr int HTTP_STATUS_CODE_CREATED = 201;
|
||||
constexpr int HTTP_STATUS_CODE_ACCEPTED = 202;
|
||||
|
|
|
@ -488,6 +488,7 @@ Future<REPLY_TYPE(Request)> loadBalance(
|
|||
break;
|
||||
} else if (badServers == alternatives->countBest() && i == badServers) {
|
||||
TraceEvent("AllLocalAlternativesFailed")
|
||||
.suppressFor(1.0)
|
||||
.detail("Alternatives", alternatives->description())
|
||||
.detail("Total", alternatives->size())
|
||||
.detail("Best", alternatives->countBest());
|
||||
|
|
|
@ -152,7 +152,8 @@ void GranuleFiles::getFiles(Version beginVersion,
|
|||
bool canCollapse,
|
||||
BlobGranuleChunkRef& chunk,
|
||||
Arena& replyArena,
|
||||
int64_t& deltaBytesCounter) const {
|
||||
int64_t& deltaBytesCounter,
|
||||
bool summarize) const {
|
||||
BlobFileIndex dummyIndex; // for searching
|
||||
|
||||
// if beginVersion == 0 or we can collapse, find the latest snapshot <= readVersion
|
||||
|
@ -202,12 +203,13 @@ void GranuleFiles::getFiles(Version beginVersion,
|
|||
Version lastIncluded = invalidVersion;
|
||||
if (snapshotF != snapshotFiles.end()) {
|
||||
chunk.snapshotVersion = snapshotF->version;
|
||||
chunk.snapshotFile = BlobFilePointerRef(replyArena,
|
||||
snapshotF->filename,
|
||||
snapshotF->offset,
|
||||
snapshotF->length,
|
||||
snapshotF->fullFileLength,
|
||||
snapshotF->cipherKeysMeta);
|
||||
chunk.snapshotFile =
|
||||
BlobFilePointerRef(replyArena,
|
||||
summarize ? "" : snapshotF->filename,
|
||||
snapshotF->offset,
|
||||
snapshotF->length,
|
||||
snapshotF->fullFileLength,
|
||||
summarize ? Optional<BlobGranuleCipherKeysMeta>() : snapshotF->cipherKeysMeta);
|
||||
lastIncluded = chunk.snapshotVersion;
|
||||
} else {
|
||||
chunk.snapshotVersion = invalidVersion;
|
||||
|
@ -215,18 +217,19 @@ void GranuleFiles::getFiles(Version beginVersion,
|
|||
|
||||
while (deltaF != deltaFiles.end() && deltaF->version < readVersion) {
|
||||
chunk.deltaFiles.emplace_back_deep(replyArena,
|
||||
deltaF->filename,
|
||||
summarize ? "" : deltaF->filename,
|
||||
deltaF->offset,
|
||||
deltaF->length,
|
||||
deltaF->fullFileLength,
|
||||
deltaF->cipherKeysMeta);
|
||||
summarize ? Optional<BlobGranuleCipherKeysMeta>() : deltaF->cipherKeysMeta);
|
||||
deltaBytesCounter += deltaF->length;
|
||||
ASSERT(lastIncluded < deltaF->version);
|
||||
lastIncluded = deltaF->version;
|
||||
deltaF++;
|
||||
}
|
||||
// include last delta file that passes readVersion, if it exists
|
||||
if (deltaF != deltaFiles.end() && lastIncluded < readVersion) {
|
||||
if (deltaF != deltaFiles.end() &&
|
||||
((!summarize && lastIncluded < readVersion) || (summarize && deltaF->version == readVersion))) {
|
||||
chunk.deltaFiles.emplace_back_deep(replyArena,
|
||||
deltaF->filename,
|
||||
deltaF->offset,
|
||||
|
@ -236,6 +239,7 @@ void GranuleFiles::getFiles(Version beginVersion,
|
|||
deltaBytesCounter += deltaF->length;
|
||||
lastIncluded = deltaF->version;
|
||||
}
|
||||
chunk.includedVersion = lastIncluded;
|
||||
}
|
||||
|
||||
static std::string makeTestFileName(Version v) {
|
||||
|
@ -259,7 +263,7 @@ static void checkFiles(const GranuleFiles& f,
|
|||
Arena a;
|
||||
BlobGranuleChunkRef chunk;
|
||||
int64_t deltaBytes = 0;
|
||||
f.getFiles(beginVersion, readVersion, canCollapse, chunk, a, deltaBytes);
|
||||
f.getFiles(beginVersion, readVersion, canCollapse, chunk, a, deltaBytes, false);
|
||||
fmt::print("results({0}, {1}, {2}):\nEXPECTED:\n snapshot={3}\n deltas ({4}):\n",
|
||||
beginVersion,
|
||||
readVersion,
|
||||
|
@ -403,6 +407,49 @@ TEST_CASE("/blobgranule/server/common/granulefiles") {
|
|||
return Void();
|
||||
}
|
||||
|
||||
static void checkSummary(const GranuleFiles& f,
|
||||
Version summaryVersion,
|
||||
Version expectedSnapshotVersion,
|
||||
int64_t expectedSnapshotSize,
|
||||
Version expectedDeltaVersion,
|
||||
Version expectedDeltaSize) {
|
||||
Arena fileArena, summaryArena;
|
||||
BlobGranuleChunkRef chunk;
|
||||
int64_t deltaBytes = 0;
|
||||
f.getFiles(0, summaryVersion, true, chunk, fileArena, deltaBytes, true);
|
||||
|
||||
BlobGranuleSummaryRef summary = summarizeGranuleChunk(summaryArena, chunk);
|
||||
|
||||
ASSERT(expectedSnapshotVersion == summary.snapshotVersion);
|
||||
ASSERT(expectedSnapshotSize == summary.snapshotSize);
|
||||
ASSERT(expectedDeltaVersion == summary.deltaVersion);
|
||||
ASSERT(expectedDeltaSize == summary.deltaSize);
|
||||
ASSERT(deltaBytes == expectedDeltaSize);
|
||||
}
|
||||
|
||||
/*
|
||||
* This should technically be in client unit tests but we don't have a unit test there
|
||||
* Files:
|
||||
* S @ 100 (10 bytes)
|
||||
* D @ 150 (5 bytes)
|
||||
* D @ 200 (6 bytes)
|
||||
*/
|
||||
TEST_CASE("/blobgranule/server/common/granulesummary") {
|
||||
GranuleFiles files;
|
||||
files.snapshotFiles.push_back(makeTestFile(100, 10));
|
||||
files.deltaFiles.push_back(makeTestFile(150, 5));
|
||||
files.deltaFiles.push_back(makeTestFile(200, 6));
|
||||
|
||||
checkSummary(files, 100, 100, 10, 100, 0);
|
||||
checkSummary(files, 149, 100, 10, 100, 0);
|
||||
checkSummary(files, 150, 100, 10, 150, 5);
|
||||
checkSummary(files, 199, 100, 10, 150, 5);
|
||||
checkSummary(files, 200, 100, 10, 200, 11);
|
||||
checkSummary(files, 700, 100, 10, 200, 11);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// FIXME: if credentials can expire, refresh periodically
|
||||
ACTOR Future<Void> loadBlobMetadataForTenants(BGTenantMap* self, std::vector<TenantMapEntry> tenantMapEntries) {
|
||||
ASSERT(SERVER_KNOBS->BG_METADATA_SOURCE == "tenant");
|
||||
|
|
|
@ -212,3 +212,123 @@ ACTOR Future<Void> clearAndAwaitMerge(Database cx, KeyRange range) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> getSummaries(Database cx,
|
||||
KeyRange range,
|
||||
Version summaryVersion,
|
||||
Optional<TenantName> tenantName) {
|
||||
state Transaction tr(cx, tenantName);
|
||||
loop {
|
||||
try {
|
||||
Standalone<VectorRef<BlobGranuleSummaryRef>> summaries =
|
||||
wait(tr.summarizeBlobGranules(range, summaryVersion, 1000000));
|
||||
|
||||
// do some basic validation
|
||||
ASSERT(!summaries.empty());
|
||||
ASSERT(summaries.front().keyRange.begin == range.begin);
|
||||
ASSERT(summaries.back().keyRange.end == range.end);
|
||||
|
||||
for (int i = 0; i < summaries.size() - 1; i++) {
|
||||
ASSERT(summaries[i].keyRange.end == summaries[i + 1].keyRange.begin);
|
||||
}
|
||||
|
||||
return summaries;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> validateGranuleSummaries(Database cx,
|
||||
KeyRange range,
|
||||
Optional<TenantName> tenantName,
|
||||
Promise<Void> testComplete) {
|
||||
state Arena lastSummaryArena;
|
||||
state KeyRangeMap<Optional<BlobGranuleSummaryRef>> lastSummary;
|
||||
state Version lastSummaryVersion = invalidVersion;
|
||||
state Transaction tr(cx, tenantName);
|
||||
state int successCount = 0;
|
||||
try {
|
||||
loop {
|
||||
// get grv and get latest summaries
|
||||
state Version nextSummaryVersion;
|
||||
tr.reset();
|
||||
loop {
|
||||
try {
|
||||
wait(store(nextSummaryVersion, tr.getReadVersion()));
|
||||
ASSERT(nextSummaryVersion >= lastSummaryVersion);
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
state Standalone<VectorRef<BlobGranuleSummaryRef>> nextSummary;
|
||||
try {
|
||||
wait(store(nextSummary, getSummaries(cx, range, nextSummaryVersion, tenantName)));
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_blob_granule_transaction_too_old) {
|
||||
ASSERT(lastSummaryVersion == invalidVersion);
|
||||
|
||||
wait(delay(1.0));
|
||||
continue;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastSummaryVersion != invalidVersion) {
|
||||
CODE_PROBE(true, "comparing multiple summaries");
|
||||
// diff with last summary ranges to ensure versions never decreased for any range
|
||||
for (auto& it : nextSummary) {
|
||||
auto lastSummaries = lastSummary.intersectingRanges(it.keyRange);
|
||||
for (auto& itLast : lastSummaries) {
|
||||
|
||||
if (!itLast.cvalue().present()) {
|
||||
ASSERT(lastSummaryVersion == invalidVersion);
|
||||
continue;
|
||||
}
|
||||
auto& last = itLast.cvalue().get();
|
||||
|
||||
ASSERT(it.snapshotVersion >= last.snapshotVersion);
|
||||
// same invariant isn't always true for delta version because of force flushing around granule
|
||||
// merges
|
||||
if (it.keyRange == itLast.range()) {
|
||||
ASSERT(it.deltaVersion >= last.deltaVersion);
|
||||
if (it.snapshotVersion == last.snapshotVersion) {
|
||||
ASSERT(it.snapshotSize == last.snapshotSize);
|
||||
}
|
||||
if (it.snapshotVersion == last.snapshotVersion && it.deltaVersion == last.deltaVersion) {
|
||||
ASSERT(it.snapshotSize == last.snapshotSize);
|
||||
ASSERT(it.deltaSize == last.deltaSize);
|
||||
} else if (it.snapshotVersion == last.snapshotVersion) {
|
||||
ASSERT(it.deltaSize > last.deltaSize);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!testComplete.canBeSet()) {
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
||||
successCount++;
|
||||
|
||||
lastSummaryArena = nextSummary.arena();
|
||||
lastSummaryVersion = nextSummaryVersion;
|
||||
lastSummary.insert(range, {});
|
||||
for (auto& it : nextSummary) {
|
||||
lastSummary.insert(it.keyRange, it);
|
||||
}
|
||||
|
||||
wait(delayJittered(deterministicRandom()->randomInt(1, 10)));
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_operation_cancelled) {
|
||||
TraceEvent(SevError, "UnexpectedErrorValidateGranuleSummaries").error(e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1028,6 +1028,9 @@ static bool handleRangeIsAssign(Reference<BlobManagerData> bmData, RangeAssignme
|
|||
|
||||
bool forcePurging = bmData->isForcePurging(assignment.keyRange);
|
||||
|
||||
if (forcePurging && assignment.previousFailure.present()) {
|
||||
--bmData->stats.blockedAssignments;
|
||||
}
|
||||
if (assignment.worker.present() && assignment.worker.get().isValid()) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("BW {0} already chosen for seqno {1} in BM {2}\n",
|
||||
|
|
|
@ -3493,7 +3493,7 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
|||
}
|
||||
state BlobGranuleChunkRef chunk;
|
||||
// TODO change with early reply
|
||||
chunk.includedVersion = req.readVersion;
|
||||
|
||||
chunk.keyRange =
|
||||
KeyRangeRef(StringRef(rep.arena, item.first.begin), StringRef(rep.arena, item.first.end));
|
||||
if (tenantPrefix.present()) {
|
||||
|
@ -3501,123 +3501,131 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
|||
}
|
||||
|
||||
int64_t deltaBytes = 0;
|
||||
item.second.getFiles(
|
||||
granuleBeginVersion, req.readVersion, req.canCollapseBegin, chunk, rep.arena, deltaBytes);
|
||||
item.second.getFiles(granuleBeginVersion,
|
||||
req.readVersion,
|
||||
req.canCollapseBegin,
|
||||
chunk,
|
||||
rep.arena,
|
||||
deltaBytes,
|
||||
req.summarize);
|
||||
bwData->stats.readReqDeltaBytesReturned += deltaBytes;
|
||||
if (granuleBeginVersion > 0 && chunk.snapshotFile.present()) {
|
||||
CODE_PROBE(true, "collapsed begin version request for efficiency");
|
||||
didCollapse = true;
|
||||
}
|
||||
|
||||
// Invoke calls to populate 'EncryptionKeysCtx' for snapshot and/or deltaFiles asynchronously
|
||||
state Optional<Future<BlobGranuleCipherKeysCtx>> snapCipherKeysCtx;
|
||||
if (chunk.snapshotFile.present()) {
|
||||
const bool encrypted = chunk.snapshotFile.get().cipherKeysMetaRef.present();
|
||||
|
||||
if (BW_DEBUG) {
|
||||
TraceEvent("DoBlobGranuleFileRequestDelta_KeysCtxPrepare")
|
||||
.detail("FileName", chunk.snapshotFile.get().filename.toString())
|
||||
.detail("Encrypted", encrypted);
|
||||
}
|
||||
|
||||
if (encrypted) {
|
||||
ASSERT(bwData->isEncryptionEnabled);
|
||||
ASSERT(!chunk.snapshotFile.get().cipherKeysCtx.present());
|
||||
|
||||
snapCipherKeysCtx = getGranuleCipherKeysFromKeysMetaRef(
|
||||
bwData, chunk.snapshotFile.get().cipherKeysMetaRef.get(), &rep.arena);
|
||||
}
|
||||
}
|
||||
state std::unordered_map<int, Future<BlobGranuleCipherKeysCtx>> deltaCipherKeysCtxs;
|
||||
for (int deltaIdx = 0; deltaIdx < chunk.deltaFiles.size(); deltaIdx++) {
|
||||
const bool encrypted = chunk.deltaFiles[deltaIdx].cipherKeysMetaRef.present();
|
||||
|
||||
if (BW_DEBUG) {
|
||||
TraceEvent("DoBlobGranuleFileRequestDelta_KeysCtxPrepare")
|
||||
.detail("FileName", chunk.deltaFiles[deltaIdx].filename.toString())
|
||||
.detail("Encrypted", encrypted);
|
||||
}
|
||||
|
||||
if (encrypted) {
|
||||
ASSERT(bwData->isEncryptionEnabled);
|
||||
ASSERT(!chunk.deltaFiles[deltaIdx].cipherKeysCtx.present());
|
||||
|
||||
deltaCipherKeysCtxs.emplace(
|
||||
deltaIdx,
|
||||
getGranuleCipherKeysFromKeysMetaRef(
|
||||
bwData, chunk.deltaFiles[deltaIdx].cipherKeysMetaRef.get(), &rep.arena));
|
||||
}
|
||||
}
|
||||
|
||||
// new deltas (if version is larger than version of last delta file)
|
||||
// FIXME: do trivial key bounds here if key range is not fully contained in request key
|
||||
// range
|
||||
if (req.readVersion > metadata->durableDeltaVersion.get() && !metadata->currentDeltas.empty()) {
|
||||
if (metadata->durableDeltaVersion.get() != metadata->pendingDeltaVersion) {
|
||||
fmt::print(
|
||||
"real-time read [{0} - {1}) @ {2} doesn't have mutations!! durable={3}, pending={4}\n",
|
||||
metadata->keyRange.begin.printable(),
|
||||
metadata->keyRange.end.printable(),
|
||||
req.readVersion,
|
||||
metadata->durableDeltaVersion.get(),
|
||||
metadata->pendingDeltaVersion);
|
||||
}
|
||||
|
||||
// prune mutations based on begin version, if possible
|
||||
ASSERT(metadata->durableDeltaVersion.get() == metadata->pendingDeltaVersion);
|
||||
MutationsAndVersionRef* mutationIt = metadata->currentDeltas.begin();
|
||||
if (granuleBeginVersion > metadata->currentDeltas.back().version) {
|
||||
CODE_PROBE(true, "beginVersion pruning all in-memory mutations");
|
||||
mutationIt = metadata->currentDeltas.end();
|
||||
} else if (granuleBeginVersion > metadata->currentDeltas.front().version) {
|
||||
// binary search for beginVersion
|
||||
CODE_PROBE(true, "beginVersion pruning some in-memory mutations");
|
||||
mutationIt = std::lower_bound(metadata->currentDeltas.begin(),
|
||||
metadata->currentDeltas.end(),
|
||||
MutationsAndVersionRef(granuleBeginVersion, 0),
|
||||
MutationsAndVersionRef::OrderByVersion());
|
||||
}
|
||||
|
||||
// add mutations to response
|
||||
while (mutationIt != metadata->currentDeltas.end()) {
|
||||
if (mutationIt->version > req.readVersion) {
|
||||
CODE_PROBE(true, "readVersion pruning some in-memory mutations");
|
||||
break;
|
||||
}
|
||||
chunk.newDeltas.push_back_deep(rep.arena, *mutationIt);
|
||||
mutationIt++;
|
||||
}
|
||||
}
|
||||
|
||||
// Update EncryptionKeysCtx information for the chunk->snapshotFile
|
||||
if (chunk.snapshotFile.present() && snapCipherKeysCtx.present()) {
|
||||
ASSERT(chunk.snapshotFile.get().cipherKeysMetaRef.present());
|
||||
|
||||
BlobGranuleCipherKeysCtx keysCtx = wait(snapCipherKeysCtx.get());
|
||||
chunk.snapshotFile.get().cipherKeysCtx = std::move(keysCtx);
|
||||
// reclaim memory from non-serializable field
|
||||
chunk.snapshotFile.get().cipherKeysMetaRef.reset();
|
||||
|
||||
if (BW_DEBUG) {
|
||||
TraceEvent("DoBlobGranuleFileRequestSnap_KeysCtxDone")
|
||||
.detail("FileName", chunk.snapshotFile.get().filename.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// Update EncryptionKeysCtx information for the chunk->deltaFiles
|
||||
if (!deltaCipherKeysCtxs.empty()) {
|
||||
ASSERT(!chunk.deltaFiles.empty());
|
||||
|
||||
state std::unordered_map<int, Future<BlobGranuleCipherKeysCtx>>::const_iterator itr;
|
||||
for (itr = deltaCipherKeysCtxs.begin(); itr != deltaCipherKeysCtxs.end(); itr++) {
|
||||
BlobGranuleCipherKeysCtx keysCtx = wait(itr->second);
|
||||
chunk.deltaFiles[itr->first].cipherKeysCtx = std::move(keysCtx);
|
||||
// reclaim memory from non-serializable field
|
||||
chunk.deltaFiles[itr->first].cipherKeysMetaRef.reset();
|
||||
if (!req.summarize) {
|
||||
chunk.includedVersion = req.readVersion;
|
||||
// Invoke calls to populate 'EncryptionKeysCtx' for snapshot and/or deltaFiles asynchronously
|
||||
state Optional<Future<BlobGranuleCipherKeysCtx>> snapCipherKeysCtx;
|
||||
if (chunk.snapshotFile.present()) {
|
||||
const bool encrypted = chunk.snapshotFile.get().cipherKeysMetaRef.present();
|
||||
|
||||
if (BW_DEBUG) {
|
||||
TraceEvent("DoBlobGranuleFileRequestDelta_KeysCtxDone")
|
||||
.detail("FileName", chunk.deltaFiles[itr->first].filename.toString());
|
||||
TraceEvent("DoBlobGranuleFileRequestDelta_KeysCtxPrepare")
|
||||
.detail("FileName", chunk.snapshotFile.get().filename.toString())
|
||||
.detail("Encrypted", encrypted);
|
||||
}
|
||||
|
||||
if (encrypted) {
|
||||
ASSERT(bwData->isEncryptionEnabled);
|
||||
ASSERT(!chunk.snapshotFile.get().cipherKeysCtx.present());
|
||||
|
||||
snapCipherKeysCtx = getGranuleCipherKeysFromKeysMetaRef(
|
||||
bwData, chunk.snapshotFile.get().cipherKeysMetaRef.get(), &rep.arena);
|
||||
}
|
||||
}
|
||||
state std::unordered_map<int, Future<BlobGranuleCipherKeysCtx>> deltaCipherKeysCtxs;
|
||||
for (int deltaIdx = 0; deltaIdx < chunk.deltaFiles.size(); deltaIdx++) {
|
||||
const bool encrypted = chunk.deltaFiles[deltaIdx].cipherKeysMetaRef.present();
|
||||
|
||||
if (BW_DEBUG) {
|
||||
TraceEvent("DoBlobGranuleFileRequestDelta_KeysCtxPrepare")
|
||||
.detail("FileName", chunk.deltaFiles[deltaIdx].filename.toString())
|
||||
.detail("Encrypted", encrypted);
|
||||
}
|
||||
|
||||
if (encrypted) {
|
||||
ASSERT(bwData->isEncryptionEnabled);
|
||||
ASSERT(!chunk.deltaFiles[deltaIdx].cipherKeysCtx.present());
|
||||
|
||||
deltaCipherKeysCtxs.emplace(
|
||||
deltaIdx,
|
||||
getGranuleCipherKeysFromKeysMetaRef(
|
||||
bwData, chunk.deltaFiles[deltaIdx].cipherKeysMetaRef.get(), &rep.arena));
|
||||
}
|
||||
}
|
||||
|
||||
// new deltas (if version is larger than version of last delta file)
|
||||
// FIXME: do trivial key bounds here if key range is not fully contained in request key
|
||||
// range
|
||||
if (req.readVersion > metadata->durableDeltaVersion.get() && !metadata->currentDeltas.empty()) {
|
||||
if (metadata->durableDeltaVersion.get() != metadata->pendingDeltaVersion) {
|
||||
fmt::print(
|
||||
"real-time read [{0} - {1}) @ {2} doesn't have mutations!! durable={3}, pending={4}\n",
|
||||
metadata->keyRange.begin.printable(),
|
||||
metadata->keyRange.end.printable(),
|
||||
req.readVersion,
|
||||
metadata->durableDeltaVersion.get(),
|
||||
metadata->pendingDeltaVersion);
|
||||
}
|
||||
|
||||
// prune mutations based on begin version, if possible
|
||||
ASSERT(metadata->durableDeltaVersion.get() == metadata->pendingDeltaVersion);
|
||||
MutationsAndVersionRef* mutationIt = metadata->currentDeltas.begin();
|
||||
if (granuleBeginVersion > metadata->currentDeltas.back().version) {
|
||||
CODE_PROBE(true, "beginVersion pruning all in-memory mutations");
|
||||
mutationIt = metadata->currentDeltas.end();
|
||||
} else if (granuleBeginVersion > metadata->currentDeltas.front().version) {
|
||||
// binary search for beginVersion
|
||||
CODE_PROBE(true, "beginVersion pruning some in-memory mutations");
|
||||
mutationIt = std::lower_bound(metadata->currentDeltas.begin(),
|
||||
metadata->currentDeltas.end(),
|
||||
MutationsAndVersionRef(granuleBeginVersion, 0),
|
||||
MutationsAndVersionRef::OrderByVersion());
|
||||
}
|
||||
|
||||
// add mutations to response
|
||||
while (mutationIt != metadata->currentDeltas.end()) {
|
||||
if (mutationIt->version > req.readVersion) {
|
||||
CODE_PROBE(true, "readVersion pruning some in-memory mutations");
|
||||
break;
|
||||
}
|
||||
chunk.newDeltas.push_back_deep(rep.arena, *mutationIt);
|
||||
mutationIt++;
|
||||
}
|
||||
}
|
||||
|
||||
// Update EncryptionKeysCtx information for the chunk->snapshotFile
|
||||
if (chunk.snapshotFile.present() && snapCipherKeysCtx.present()) {
|
||||
ASSERT(chunk.snapshotFile.get().cipherKeysMetaRef.present());
|
||||
|
||||
BlobGranuleCipherKeysCtx keysCtx = wait(snapCipherKeysCtx.get());
|
||||
chunk.snapshotFile.get().cipherKeysCtx = std::move(keysCtx);
|
||||
// reclaim memory from non-serializable field
|
||||
chunk.snapshotFile.get().cipherKeysMetaRef.reset();
|
||||
|
||||
if (BW_DEBUG) {
|
||||
TraceEvent("DoBlobGranuleFileRequestSnap_KeysCtxDone")
|
||||
.detail("FileName", chunk.snapshotFile.get().filename.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// Update EncryptionKeysCtx information for the chunk->deltaFiles
|
||||
if (!deltaCipherKeysCtxs.empty()) {
|
||||
ASSERT(!chunk.deltaFiles.empty());
|
||||
|
||||
state std::unordered_map<int, Future<BlobGranuleCipherKeysCtx>>::const_iterator itr;
|
||||
for (itr = deltaCipherKeysCtxs.begin(); itr != deltaCipherKeysCtxs.end(); itr++) {
|
||||
BlobGranuleCipherKeysCtx keysCtx = wait(itr->second);
|
||||
chunk.deltaFiles[itr->first].cipherKeysCtx = std::move(keysCtx);
|
||||
// reclaim memory from non-serializable field
|
||||
chunk.deltaFiles[itr->first].cipherKeysMetaRef.reset();
|
||||
|
||||
if (BW_DEBUG) {
|
||||
TraceEvent("DoBlobGranuleFileRequestDelta_KeysCtxDone")
|
||||
.detail("FileName", chunk.deltaFiles[itr->first].filename.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3660,6 +3668,11 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
|||
}
|
||||
|
||||
ACTOR Future<Void> handleBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, BlobGranuleFileRequest req) {
|
||||
++bwData->stats.readRequests;
|
||||
++bwData->stats.activeReadRequests;
|
||||
if (req.summarize) {
|
||||
++bwData->stats.summaryReads;
|
||||
}
|
||||
choose {
|
||||
when(wait(doBlobGranuleFileRequest(bwData, req))) {}
|
||||
when(wait(delay(SERVER_KNOBS->BLOB_WORKER_REQUEST_TIMEOUT))) {
|
||||
|
@ -4755,8 +4768,6 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
|
|||
try {
|
||||
loop choose {
|
||||
when(BlobGranuleFileRequest req = waitNext(bwInterf.blobGranuleFileRequest.getFuture())) {
|
||||
++self->stats.readRequests;
|
||||
++self->stats.activeReadRequests;
|
||||
self->addActor.send(handleBlobGranuleFileRequest(self, req));
|
||||
}
|
||||
when(state GranuleStatusStreamRequest req = waitNext(bwInterf.granuleStatusStreamRequest.getFuture())) {
|
||||
|
|
|
@ -929,8 +929,8 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {
|
|||
double targetRateRatio;
|
||||
if (blobWorkerLag > 3 * limits->bwLagTarget) {
|
||||
targetRateRatio = 0;
|
||||
// ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
// now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
} else if (blobWorkerLag > limits->bwLagTarget) {
|
||||
targetRateRatio = SERVER_KNOBS->BW_LAG_DECREASE_AMOUNT;
|
||||
} else {
|
||||
|
@ -987,8 +987,8 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {
|
|||
.detail("RecoveryDuration", getRecoveryDuration(lastBWVer));
|
||||
}
|
||||
limitReason = limitReason_t::blob_worker_missing;
|
||||
// ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
// now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
} else if (bwTPS < limits->tpsLimit) {
|
||||
if (printRateKeepLimitReasonDetails) {
|
||||
TraceEvent("RatekeeperLimitReasonDetails")
|
||||
|
@ -1016,8 +1016,8 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {
|
|||
;
|
||||
}
|
||||
limitReason = limitReason_t::blob_worker_missing;
|
||||
// ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
// now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
}
|
||||
} else if (blobWorkerLag > 3 * limits->bwLagTarget) {
|
||||
limits->tpsLimit = 0.0;
|
||||
|
@ -1029,8 +1029,8 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {
|
|||
.detail("HistorySize", blobWorkerVersionHistory.size());
|
||||
}
|
||||
limitReason = limitReason_t::blob_worker_missing;
|
||||
// ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
// now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
ASSERT(!g_network->isSimulated() || limits->bwLagTarget != SERVER_KNOBS->TARGET_BW_LAG ||
|
||||
now() < FLOW_KNOBS->SIM_SPEEDUP_AFTER_SECONDS + 50);
|
||||
}
|
||||
} else {
|
||||
blobWorkerTime = now();
|
||||
|
|
|
@ -84,7 +84,7 @@ bool destructed = false;
|
|||
class TestConfig {
|
||||
class ConfigBuilder {
|
||||
using value_type = toml::basic_value<toml::discard_comments>;
|
||||
using base_variant = std::variant<int, bool, std::string, std::vector<int>, ConfigDBType>;
|
||||
using base_variant = std::variant<int, float, double, bool, std::string, std::vector<int>, ConfigDBType>;
|
||||
using types =
|
||||
variant_map<variant_concat<base_variant, variant_map<base_variant, Optional>>, std::add_pointer_t>;
|
||||
std::unordered_map<std::string_view, types> confMap;
|
||||
|
@ -94,6 +94,10 @@ class TestConfig {
|
|||
visitor(const value_type& v) : value(v) {}
|
||||
void operator()(int* val) const { *val = value.as_integer(); }
|
||||
void operator()(Optional<int>* val) const { *val = value.as_integer(); }
|
||||
void operator()(float* val) const { *val = value.as_floating(); }
|
||||
void operator()(Optional<float>* val) const { *val = value.as_floating(); }
|
||||
void operator()(double* val) const { *val = value.as_floating(); }
|
||||
void operator()(Optional<double>* val) const { *val = value.as_floating(); }
|
||||
void operator()(bool* val) const { *val = value.as_boolean(); }
|
||||
void operator()(Optional<bool>* val) const { *val = value.as_boolean(); }
|
||||
void operator()(std::string* val) const { *val = value.as_string(); }
|
||||
|
@ -344,6 +348,8 @@ public:
|
|||
bool allowCreatingTenants = true;
|
||||
bool injectTargetedSSRestart = false;
|
||||
bool injectSSDelay = false;
|
||||
std::string testClass; // unused -- used in TestHarness
|
||||
float testPriority; // unused -- used in TestHarness
|
||||
|
||||
ConfigDBType getConfigDBType() const { return configDBType; }
|
||||
|
||||
|
@ -371,7 +377,9 @@ public:
|
|||
}
|
||||
std::string extraDatabaseModeStr;
|
||||
ConfigBuilder builder;
|
||||
builder.add("extraDatabaseMode", &extraDatabaseModeStr)
|
||||
builder.add("testClass", &testClass)
|
||||
.add("testPriority", &testPriority)
|
||||
.add("extraDatabaseMode", &extraDatabaseModeStr)
|
||||
.add("extraDatabaseCount", &extraDatabaseCount)
|
||||
.add("minimumReplication", &minimumReplication)
|
||||
.add("minimumRegions", &minimumRegions)
|
||||
|
|
|
@ -1115,7 +1115,7 @@ private:
|
|||
CLIOptions() = default;
|
||||
|
||||
void parseEnvInternal() {
|
||||
for (std::string knob : getEnvironmentKnobOptions()) {
|
||||
for (const std::string& knob : getEnvironmentKnobOptions()) {
|
||||
auto pos = knob.find_first_of("=");
|
||||
if (pos == std::string::npos) {
|
||||
fprintf(stderr,
|
||||
|
@ -1866,6 +1866,30 @@ private:
|
|||
localities.set(LocalityData::keyDcId, dcId);
|
||||
}
|
||||
};
|
||||
|
||||
// Returns true iff validation is successful
|
||||
bool validateSimulationDataFiles(std::string const& dataFolder, bool isRestarting) {
|
||||
std::vector<std::string> files = platform::listFiles(dataFolder);
|
||||
if (!isRestarting) {
|
||||
for (const auto& file : files) {
|
||||
if (file != "restartInfo.ini" && file != getTestEncryptionFileName()) {
|
||||
TraceEvent(SevError, "IncompatibleFileFound").detail("DataFolder", dataFolder).detail("FileName", file);
|
||||
fprintf(stderr,
|
||||
"ERROR: Data folder `%s' is non-empty; please use clean, fdb-only folder\n",
|
||||
dataFolder.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (isRestarting && files.empty()) {
|
||||
TraceEvent(SevWarnAlways, "FileNotFound").detail("DataFolder", dataFolder);
|
||||
printf("ERROR: Data folder `%s' is empty, but restarting option selected. Run Phase 1 test first\n",
|
||||
dataFolder.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
@ -2065,7 +2089,7 @@ int main(int argc, char* argv[]) {
|
|||
}
|
||||
|
||||
std::string environmentKnobOptions;
|
||||
for (std::string knobOption : getEnvironmentKnobOptions()) {
|
||||
for (const std::string& knobOption : getEnvironmentKnobOptions()) {
|
||||
environmentKnobOptions += knobOption + " ";
|
||||
}
|
||||
if (environmentKnobOptions.length()) {
|
||||
|
@ -2131,17 +2155,8 @@ int main(int argc, char* argv[]) {
|
|||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
}
|
||||
std::vector<std::string> files = platform::listFiles(dataFolder);
|
||||
if ((files.size() > 1 || (files.size() == 1 && files[0] != "restartInfo.ini")) && !opts.restarting) {
|
||||
TraceEvent(SevError, "IncompatibleFileFound").detail("DataFolder", dataFolder);
|
||||
fprintf(stderr,
|
||||
"ERROR: Data folder `%s' is non-empty; please use clean, fdb-only folder\n",
|
||||
dataFolder.c_str());
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
} else if (files.empty() && opts.restarting) {
|
||||
TraceEvent(SevWarnAlways, "FileNotFound").detail("DataFolder", dataFolder);
|
||||
printf("ERROR: Data folder `%s' is empty, but restarting option selected. Run Phase 1 test first\n",
|
||||
dataFolder.c_str());
|
||||
|
||||
if (!validateSimulationDataFiles(dataFolder, opts.restarting)) {
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
|
||||
|
|
|
@ -86,7 +86,8 @@ struct GranuleFiles {
|
|||
bool canCollapse,
|
||||
BlobGranuleChunkRef& chunk,
|
||||
Arena& replyArena,
|
||||
int64_t& deltaBytesCounter) const;
|
||||
int64_t& deltaBytesCounter,
|
||||
bool summarize) const;
|
||||
};
|
||||
|
||||
// serialize change feed key as UID bytes, to use 16 bytes on disk
|
||||
|
|
|
@ -55,6 +55,11 @@ void printGranuleChunks(const Standalone<VectorRef<BlobGranuleChunkRef>>& chunks
|
|||
|
||||
ACTOR Future<Void> clearAndAwaitMerge(Database cx, KeyRange range);
|
||||
|
||||
ACTOR Future<Void> validateGranuleSummaries(Database cx,
|
||||
KeyRange range,
|
||||
Optional<TenantName> tenantName,
|
||||
Promise<Void> testComplete);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -296,6 +296,8 @@ Future<Void> testExpectedError(Future<Void> test,
|
|||
Optional<Error> throwOnError = Optional<Error>(),
|
||||
UID id = UID());
|
||||
|
||||
std::string getTestEncryptionFileName();
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -5538,7 +5538,9 @@ ACTOR Future<Void> tryGetRangeFromBlob(PromiseStream<RangeResult> results,
|
|||
throw blob_granule_transaction_too_old(); // no data on blob
|
||||
}
|
||||
|
||||
// todo check if blob storage covers all the expected key range
|
||||
if (!isRangeFullyCovered(keys, chunks)) {
|
||||
throw blob_granule_transaction_too_old();
|
||||
}
|
||||
|
||||
for (const BlobGranuleChunkRef& chunk : chunks) {
|
||||
state KeyRangeRef chunkRange = chunk.keyRange;
|
||||
|
|
|
@ -1146,6 +1146,9 @@ ACTOR Future<bool> runTest(Database cx,
|
|||
std::map<std::string, std::function<void(const std::string&)>> testSpecGlobalKeys = {
|
||||
// These are read by SimulatedCluster and used before testers exist. Thus, they must
|
||||
// be recognized and accepted, but there's no point in placing them into a testSpec.
|
||||
// testClass and testPriority are only used for TestHarness, we'll ignore those here
|
||||
{ "testClass", [](std::string const&) {} },
|
||||
{ "testPriority", [](std::string const&) {} },
|
||||
{ "extraDatabaseMode",
|
||||
[](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedExtraDatabaseMode", ""); } },
|
||||
{ "extraDatabaseCount",
|
||||
|
|
|
@ -73,7 +73,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
restorePrefixesToInclude = getOption(options, "restorePrefixesToInclude"_sr, std::vector<std::string>());
|
||||
shouldSkipRestoreRanges = deterministicRandom()->random01() < 0.3 ? true : false;
|
||||
if (getOption(options, "encrypted"_sr, deterministicRandom()->random01() < 0.1)) {
|
||||
encryptionKeyFileName = "simfdb/test_encryption_key_file";
|
||||
encryptionKeyFileName = "simfdb/" + getTestEncryptionFileName();
|
||||
}
|
||||
|
||||
TraceEvent("BARW_ClientId").detail("Id", wcx.clientId);
|
||||
|
@ -883,5 +883,9 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
|
||||
int BackupAndRestoreCorrectnessWorkload::backupAgentRequests = 0;
|
||||
|
||||
std::string getTestEncryptionFileName() {
|
||||
return "test_encryption_key_file";
|
||||
}
|
||||
|
||||
WorkloadFactory<BackupAndRestoreCorrectnessWorkload> BackupAndRestoreCorrectnessWorkloadFactory(
|
||||
"BackupAndRestoreCorrectness");
|
||||
|
|
|
@ -91,6 +91,9 @@ struct ThreadData : ReferenceCounted<ThreadData>, NonCopyable {
|
|||
Promise<Void> firstWriteSuccessful;
|
||||
Version minSuccessfulReadVersion = MAX_VERSION;
|
||||
|
||||
Future<Void> summaryClient;
|
||||
Promise<Void> triggerSummaryComplete;
|
||||
|
||||
// stats
|
||||
int64_t errors = 0;
|
||||
int64_t mismatches = 0;
|
||||
|
@ -886,6 +889,7 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
|
|||
for (auto& it : directories) {
|
||||
// Wait for blob worker to initialize snapshot before starting test for that range
|
||||
Future<Void> start = waitFirstSnapshot(this, cx, it, true);
|
||||
it->summaryClient = validateGranuleSummaries(cx, normalKeys, it->tenantName, it->triggerSummaryComplete);
|
||||
clients.push_back(timeout(writeWorker(this, start, cx, it), testDuration, Void()));
|
||||
clients.push_back(timeout(readWorker(this, start, cx, it), testDuration, Void()));
|
||||
}
|
||||
|
@ -919,6 +923,9 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
|
|||
BlobGranuleCorrectnessWorkload* self,
|
||||
Reference<ThreadData> threadData) {
|
||||
|
||||
if (threadData->triggerSummaryComplete.canBeSet()) {
|
||||
threadData->triggerSummaryComplete.send(Void());
|
||||
}
|
||||
state bool result = true;
|
||||
state int finalRowsValidated;
|
||||
if (threadData->writeVersions.empty()) {
|
||||
|
@ -985,6 +992,9 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
|
|||
wait(clearAndAwaitMerge(cx, threadData->directoryRange));
|
||||
}
|
||||
|
||||
// validate that summary completes without error
|
||||
wait(threadData->summaryClient);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -82,6 +82,9 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
|
||||
std::vector<std::tuple<KeyRange, Version, UID, Future<GranuleFiles>>> purgedDataToCheck;
|
||||
|
||||
Future<Void> summaryClient;
|
||||
Promise<Void> triggerSummaryComplete;
|
||||
|
||||
BlobGranuleVerifierWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
doSetup = !clientId; // only do this on the "first" client
|
||||
testDuration = getOption(options, LiteralStringRef("testDuration"), 120.0);
|
||||
|
@ -292,7 +295,6 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
ASSERT(!self->purgedDataToCheck.empty());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -525,6 +527,11 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
reportErrors(verifyGranules(cx, this, false), "BlobGranuleVerifier"), testDuration, Void()));
|
||||
}
|
||||
}
|
||||
if (!enablePurging) {
|
||||
summaryClient = validateGranuleSummaries(cx, normalKeys, {}, triggerSummaryComplete);
|
||||
} else {
|
||||
summaryClient = Future<Void>(Void());
|
||||
}
|
||||
return delay(testDuration);
|
||||
}
|
||||
|
||||
|
@ -924,7 +931,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
state int64_t totalRows = 0;
|
||||
loop {
|
||||
state RangeResult output;
|
||||
state Version readVersion;
|
||||
state Version readVersion = invalidVersion;
|
||||
try {
|
||||
Version ver = wait(tr.getReadVersion());
|
||||
readVersion = ver;
|
||||
|
@ -950,10 +957,21 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
|
||||
if (!output.empty()) {
|
||||
state KeyRange rangeToCheck = KeyRangeRef(keyRange.begin, keyAfter(output.back().key));
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> blob =
|
||||
wait(readFromBlob(cx, self->bstore, rangeToCheck, 0, readVersion));
|
||||
if (!compareFDBAndBlob(output, blob, rangeToCheck, readVersion, BGV_DEBUG)) {
|
||||
return false;
|
||||
try {
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> blob =
|
||||
wait(readFromBlob(cx, self->bstore, rangeToCheck, 0, readVersion));
|
||||
if (!compareFDBAndBlob(output, blob, rangeToCheck, readVersion, BGV_DEBUG)) {
|
||||
return false;
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (BGV_DEBUG && e.code() == error_code_blob_granule_transaction_too_old) {
|
||||
fmt::print("CheckAllData got BG_TTO for [{0} - {1}) @ {2}\n",
|
||||
rangeToCheck.begin.printable(),
|
||||
rangeToCheck.end.printable(),
|
||||
readVersion);
|
||||
}
|
||||
ASSERT(e.code() != error_code_blob_granule_transaction_too_old);
|
||||
throw e;
|
||||
}
|
||||
totalRows += output.size();
|
||||
keyRange = KeyRangeRef(rangeToCheck.end, keyRange.end);
|
||||
|
@ -971,6 +989,9 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
ACTOR Future<bool> _check(Database cx, BlobGranuleVerifierWorkload* self) {
|
||||
if (self->triggerSummaryComplete.canBeSet()) {
|
||||
self->triggerSummaryComplete.send(Void());
|
||||
}
|
||||
state Transaction tr(cx);
|
||||
if (self->doForcePurge) {
|
||||
if (self->startedForcePurge) {
|
||||
|
@ -1009,35 +1030,33 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
state bool availabilityPassed = true;
|
||||
|
||||
state Standalone<VectorRef<KeyRangeRef>> allRanges;
|
||||
if (self->granuleRanges.get().empty()) {
|
||||
|
||||
state Future<Void> rangeFetcher = self->findGranules(cx, self);
|
||||
loop {
|
||||
// wait until entire keyspace has granules
|
||||
if (!self->granuleRanges.get().empty()) {
|
||||
bool haveAll = true;
|
||||
if (self->granuleRanges.get().front().begin != normalKeys.begin ||
|
||||
self->granuleRanges.get().back().end != normalKeys.end) {
|
||||
haveAll = false;
|
||||
}
|
||||
for (int i = 0; haveAll && i < self->granuleRanges.get().size() - 1; i++) {
|
||||
if (self->granuleRanges.get()[i].end != self->granuleRanges.get()[i + 1].begin) {
|
||||
haveAll = false;
|
||||
}
|
||||
}
|
||||
if (haveAll) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("Waiting to get granule ranges for check\n");
|
||||
}
|
||||
state Future<Void> rangeFetcher = self->findGranules(cx, self);
|
||||
loop {
|
||||
wait(self->granuleRanges.onChange());
|
||||
// wait until entire keyspace has granules
|
||||
if (!self->granuleRanges.get().empty()) {
|
||||
bool haveAll = true;
|
||||
if (self->granuleRanges.get().front().begin != normalKeys.begin ||
|
||||
self->granuleRanges.get().back().end != normalKeys.end) {
|
||||
haveAll = false;
|
||||
}
|
||||
for (int i = 0; haveAll && i < self->granuleRanges.get().size() - 1; i++) {
|
||||
if (self->granuleRanges.get()[i].end != self->granuleRanges.get()[i + 1].begin) {
|
||||
haveAll = false;
|
||||
}
|
||||
}
|
||||
if (haveAll) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
rangeFetcher.cancel();
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("Got granule ranges for check\n");
|
||||
}
|
||||
wait(self->granuleRanges.onChange());
|
||||
}
|
||||
|
||||
rangeFetcher.cancel();
|
||||
|
||||
allRanges = self->granuleRanges.get();
|
||||
for (auto& range : allRanges) {
|
||||
state KeyRange r = range;
|
||||
|
@ -1165,6 +1184,17 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
ASSERT(dataCheckAfterMerge);
|
||||
}
|
||||
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV check waiting on summarizer to complete\n");
|
||||
}
|
||||
|
||||
// validate that summary completes without error
|
||||
wait(self->summaryClient);
|
||||
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV check done\n");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,16 +33,20 @@
|
|||
|
||||
#include "flow/StreamCipher.h"
|
||||
#include "flow/BlobCipher.h"
|
||||
#include "flow/ScopeExit.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/Error.h"
|
||||
|
||||
#include "flow/Knobs.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <boost/format.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <boost/filesystem/operations.hpp>
|
||||
|
@ -65,6 +69,7 @@
|
|||
#include <direct.h>
|
||||
#include <pdh.h>
|
||||
#include <pdhmsg.h>
|
||||
#include <processenv.h>
|
||||
#pragma comment(lib, "pdh.lib")
|
||||
|
||||
// for SHGetFolderPath
|
||||
|
@ -148,6 +153,9 @@
|
|||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
/* Needed for cross-platform 'environ' */
|
||||
#include <crt_externs.h>
|
||||
|
||||
#include <sys/uio.h>
|
||||
#include <sys/syslimits.h>
|
||||
#include <mach/mach.h>
|
||||
|
@ -1935,14 +1943,35 @@ std::string epochsToGMTString(double epochs) {
|
|||
}
|
||||
|
||||
std::vector<std::string> getEnvironmentKnobOptions() {
|
||||
char** e = environ;
|
||||
constexpr const size_t ENVKNOB_PREFIX_LEN = sizeof(ENVIRONMENT_KNOB_OPTION_PREFIX) - 1;
|
||||
std::vector<std::string> knobOptions;
|
||||
for (; *e; e++) {
|
||||
std::string envOption(*e);
|
||||
#if defined(_WIN32)
|
||||
auto e = GetEnvironmentStrings();
|
||||
if (e == nullptr)
|
||||
return {};
|
||||
auto cleanup = ScopeExit([e]() { FreeEnvironmentStrings(e); });
|
||||
while (*e) {
|
||||
auto candidate = std::string_view(e);
|
||||
if (boost::starts_with(candidate, ENVIRONMENT_KNOB_OPTION_PREFIX))
|
||||
knobOptions.emplace_back(candidate.substr(ENVKNOB_PREFIX_LEN));
|
||||
e += (candidate.size() + 1);
|
||||
}
|
||||
#else
|
||||
char** e = nullptr;
|
||||
#ifdef __linux__
|
||||
e = environ;
|
||||
#elif defined(__APPLE__)
|
||||
e = *_NSGetEnviron();
|
||||
#else
|
||||
#error Port me!
|
||||
#endif
|
||||
for (; e && *e; e++) {
|
||||
std::string_view envOption(*e);
|
||||
if (boost::starts_with(envOption, ENVIRONMENT_KNOB_OPTION_PREFIX)) {
|
||||
knobOptions.push_back(envOption.substr(strlen(ENVIRONMENT_KNOB_OPTION_PREFIX)));
|
||||
knobOptions.emplace_back(envOption.substr(ENVKNOB_PREFIX_LEN));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return knobOptions;
|
||||
}
|
||||
|
||||
|
|
|
@ -256,6 +256,7 @@ FORMAT_TRACEABLE(long int, "%ld");
|
|||
FORMAT_TRACEABLE(unsigned long int, "%lu");
|
||||
FORMAT_TRACEABLE(long long int, "%lld");
|
||||
FORMAT_TRACEABLE(unsigned long long int, "%llu");
|
||||
FORMAT_TRACEABLE(float, "%g");
|
||||
FORMAT_TRACEABLE(double, "%g");
|
||||
FORMAT_TRACEABLE(void*, "%p");
|
||||
FORMAT_TRACEABLE(volatile long, "%ld");
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
testClass = "Backup"
|
||||
|
||||
[[test]]
|
||||
testTitle = 'Cycle'
|
||||
clearAfterTest = 'false'
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
testClass = "Backup"
|
||||
|
||||
[[test]]
|
||||
testTitle = 'BackupAndRestore'
|
||||
clearAfterTest = false
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
testClass = "Backup"
|
||||
|
||||
[[test]]
|
||||
testTitle = 'BackupAndRestore'
|
||||
clearAfterTest = false
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
testClass = "Backup"
|
||||
|
||||
[[test]]
|
||||
testTitle = 'Cycle'
|
||||
clearAfterTest = 'false'
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
testClass = "Backup"
|
||||
|
||||
[configuration]
|
||||
extraDatabaseMode = 'LocalOrSingle'
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
testClass = "Backup"
|
||||
|
||||
[configuration]
|
||||
extraDatabaseMode = 'LocalOrSingle'
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
[configuration]
|
||||
testClass = "BlobGranule"
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
# FIXME: re-enable rocks at some point
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
[configuration]
|
||||
testClass = "BlobGranule"
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
injectTargetedSSRestart = true
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
[configuration]
|
||||
testClass = "BlobGranule"
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
injectTargetedSSRestart = true
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
[configuration]
|
||||
testClass = "BlobGranule"
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
injectTargetedSSRestart = true
|
||||
|
|
|
@ -3,6 +3,7 @@ blobGranulesEnabled = true
|
|||
allowDefaultTenant = false
|
||||
# FIXME: re-enable rocks at some point
|
||||
storageEngineExcludeTypes = [4, 5]
|
||||
testClass = "BlobGranule"
|
||||
|
||||
[[test]]
|
||||
testTitle = 'BlobGranuleVerifySmallClean'
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
[configuration]
|
||||
allowDefaultTenant = false
|
||||
testClass = "ChangeFeeds"
|
||||
|
||||
# TODO add failure events, and then add a version that also supports randomMoveKeys
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
[configuration]
|
||||
testClass = "ChangeFeeds"
|
||||
allowDefaultTenant = false
|
||||
|
||||
# TODO add failure events, and then add a version that also supports randomMoveKeys
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
[configuration]
|
||||
testClass = "ChangeFeeds"
|
||||
allowDefaultTenant = false
|
||||
|
||||
[[test]]
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
[configuration]
|
||||
testClass = "Encryption"
|
||||
|
||||
[[knobs]]
|
||||
enable_encryption = true
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
[configuration]
|
||||
testClass = "Encryption"
|
||||
|
||||
[[knobs]]
|
||||
enable_encryption = false
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass = "Backup"
|
||||
storageEngineExcludeTypes=3
|
||||
|
||||
[[test]]
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
testClass = "Backup"
|
||||
|
||||
[[test]]
|
||||
testTitle = 'SecondCycleTest'
|
||||
simBackupAgents = 'BackupToFile'
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
;Take snap and do cycle test
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[4, 5]
|
||||
buggify=off
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
logAntiQuorum = 0
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[4, 5]
|
||||
|
||||
testTitle=RestoreBackup
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
;write 1000 Keys ending with even numbers
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[4, 5]
|
||||
|
||||
buggify=off
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
;write 1000 Keys ending with even numbers
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[4, 5]
|
||||
|
||||
buggify=off
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
;write 1000 Keys ending with even number
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
testClass=SnapshotTest
|
||||
storageEngineExcludeTypes=[4, 5]
|
||||
|
||||
buggify=off
|
||||
|
|
Loading…
Reference in New Issue