Merge pull request #5185 from sfc-gh-clin/extend-fdbclient-test-for-multiple-processes

Extend cmake fdbclient_test for multiple processes
This commit is contained in:
Chaoguang Lin 2021-07-15 14:34:04 -07:00 committed by GitHub
commit c8e18f99f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 124 additions and 23 deletions

View File

@ -81,5 +81,15 @@ if (NOT WIN32 AND NOT OPEN_FOR_IDE)
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
${CMAKE_BINARY_DIR}/bin/fdbcli
@CLUSTER_FILE@
1
)
add_fdbclient_test(
NAME multi_process_fdbcli_tests
PROCESS_NUMBER 5
TEST_TIMEOUT 120 # The test can take near to 1 minutes sometime, set timeout to 2 minutes to be safe
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
${CMAKE_BINARY_DIR}/bin/fdbcli
@CLUSTER_FILE@
5
)
endif()

View File

@ -332,22 +332,90 @@ def transaction(logger):
output7 = run_fdbcli_command('get', 'key')
assert output7 == "`key': not found"
def get_fdb_process_addresses():
# get all processes' network addresses
output = run_fdbcli_command('kill')
# except the first line, each line is one process
addresses = output.split('\n')[1:]
assert len(addresses) == process_number
return addresses
@enable_logging()
def coordinators(logger):
# we should only have one coordinator for now
output1 = run_fdbcli_command('coordinators')
assert len(output1.split('\n')) > 2
cluster_description = output1.split('\n')[0].split(': ')[-1]
logger.debug("Cluster description: {}".format(cluster_description))
coordinators = output1.split('\n')[1].split(': ')[-1]
# verify the coordinator
coordinator_list = get_value_from_status_json(True, 'client', 'coordinators', 'coordinators')
assert len(coordinator_list) == 1
assert coordinator_list[0]['address'] == coordinators
# verify the cluster description
assert get_value_from_status_json(True, 'cluster', 'connection_string').startswith('{}:'.format(cluster_description))
addresses = get_fdb_process_addresses()
# set all 5 processes as coordinators and update the cluster description
new_cluster_description = 'a_simple_description'
run_fdbcli_command('coordinators', *addresses, 'description={}'.format(new_cluster_description))
# verify now we have 5 coordinators and the description is updated
output2 = run_fdbcli_command('coordinators')
assert output2.split('\n')[0].split(': ')[-1] == new_cluster_description
assert output2.split('\n')[1] == 'Cluster coordinators ({}): {}'.format(5, ','.join(addresses))
# auto change should go back to 1 coordinator
run_fdbcli_command('coordinators', 'auto')
assert len(get_value_from_status_json(True, 'client', 'coordinators', 'coordinators')) == 1
@enable_logging()
def exclude(logger):
# get all processes' network addresses
addresses = get_fdb_process_addresses()
logger.debug("Cluster processes: {}".format(' '.join(addresses)))
# There should be no excluded process for now
no_excluded_process_output = 'There are currently no servers or localities excluded from the database.'
output1 = run_fdbcli_command('exclude')
assert no_excluded_process_output in output1
# randomly pick one and exclude the process
excluded_address = random.choice(addresses)
# sometimes we need to retry the exclude
while True:
logger.debug("Excluding process: {}".format(excluded_address))
error_message = run_fdbcli_command_and_get_error('exclude', excluded_address)
if not error_message:
break
logger.debug("Retry exclude after 1 second")
time.sleep(1)
output2 = run_fdbcli_command('exclude')
# logger.debug(output3)
assert 'There are currently 1 servers or localities being excluded from the database' in output2
assert excluded_address in output2
run_fdbcli_command('include', excluded_address)
# check the include is successful
output4 = run_fdbcli_command('exclude')
assert no_excluded_process_output in output4
if __name__ == '__main__':
# fdbcli_tests.py <path_to_fdbcli_binary> <path_to_fdb_cluster_file>
assert len(sys.argv) == 3, "Please pass arguments: <path_to_fdbcli_binary> <path_to_fdb_cluster_file>"
# fdbcli_tests.py <path_to_fdbcli_binary> <path_to_fdb_cluster_file> <process_number>
assert len(sys.argv) == 4, "Please pass arguments: <path_to_fdbcli_binary> <path_to_fdb_cluster_file> <process_number>"
# shell command template
command_template = [sys.argv[1], '-C', sys.argv[2], '--exec']
# tests for fdbcli commands
# assertions will fail if fdbcli does not work as expected
advanceversion()
cache_range()
consistencycheck()
datadistribution()
kill()
lockAndUnlock()
maintenance()
setclass()
suspend()
transaction()
process_number = int(sys.argv[3])
if process_number == 1:
advanceversion()
cache_range()
consistencycheck()
datadistribution()
kill()
lockAndUnlock()
maintenance()
setclass()
suspend()
transaction()
else:
assert process_number > 1, "Process number should be positive"
coordinators()
exclude()

View File

@ -398,7 +398,7 @@ endfunction()
# Creates a single cluster before running the specified command (usually a ctest test)
function(add_fdbclient_test)
set(options DISABLED ENABLED)
set(oneValueArgs NAME)
set(oneValueArgs NAME PROCESS_NUMBER TEST_TIMEOUT)
set(multiValueArgs COMMAND)
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
if(OPEN_FOR_IDE)
@ -414,12 +414,26 @@ function(add_fdbclient_test)
message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test")
endif()
message(STATUS "Adding Client test ${T_NAME}")
add_test(NAME "${T_NAME}"
if (T_PROCESS_NUMBER)
add_test(NAME "${T_NAME}"
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
--build-dir ${CMAKE_BINARY_DIR}
--process-number ${T_PROCESS_NUMBER}
--
${T_COMMAND})
else()
add_test(NAME "${T_NAME}"
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
--build-dir ${CMAKE_BINARY_DIR}
--
${T_COMMAND})
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 60)
endif()
if (T_TEST_TIMEOUT)
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT ${T_TEST_TIMEOUT})
else()
# default timeout
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 60)
endif()
set_tests_properties("${T_NAME}" PROPERTIES ENVIRONMENT UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1)
endfunction()

View File

@ -38,7 +38,7 @@ cluster_file = {etcdir}/fdb.cluster
command = {fdbserver_bin}
public_address = auto:$ID
listen_address = public
datadir = {datadir}
datadir = {datadir}/$ID
logdir = {logdir}
# logsize = 10MiB
# maxlogssize = 100MiB
@ -53,13 +53,13 @@ logdir = {logdir}
## An individual fdbserver process with id 4000
## Parameters set here override defaults from the [fdbserver] section
[fdbserver.{server_port}]
"""
"""
valid_letters_for_secret = string.ascii_letters + string.digits
def __init__(self, basedir: str, fdbserver_binary: str, fdbmonitor_binary: str,
fdbcli_binary: str, create_config=True, port=None, ip_address=None):
fdbcli_binary: str, process_number: int, create_config=True, port=None, ip_address=None):
self.basedir = Path(basedir)
self.fdbserver_binary = Path(fdbserver_binary)
self.fdbmonitor_binary = Path(fdbmonitor_binary)
@ -93,9 +93,16 @@ logdir = {logdir}
etcdir=self.etc,
fdbserver_bin=self.fdbserver_binary,
datadir=self.data,
logdir=self.log,
server_port=self.port
logdir=self.log
))
# By default, the cluster only has one process
# If a port number is given and process_number > 1, we will use subsequent numbers
# E.g., port = 4000, process_number = 5
# Then 4000,4001,4002,4003,4004 will be used as ports
# If port number is not given, we will randomly pick free ports
for index, _ in enumerate(range(process_number)):
f.write('[fdbserver.{server_port}]\n'.format(server_port=self.port))
self.port = get_free_port() if port is None else str(int(self.port) + 1)
def __enter__(self):
assert not self.running, "Can't start a server that is already running"

View File

@ -11,7 +11,7 @@ from random import choice
from pathlib import Path
class TempCluster:
def __init__(self, build_dir: str,port: str = None):
def __init__(self, build_dir: str, process_number: int = 1, port: str = None):
self.build_dir = Path(build_dir).resolve()
assert self.build_dir.exists(), "{} does not exist".format(build_dir)
assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
@ -23,6 +23,7 @@ class TempCluster:
self.build_dir.joinpath('bin', 'fdbserver'),
self.build_dir.joinpath('bin', 'fdbmonitor'),
self.build_dir.joinpath('bin', 'fdbcli'),
process_number,
port = port)
self.log = self.cluster.log
self.etc = self.cluster.etc
@ -63,9 +64,10 @@ if __name__ == '__main__':
""")
parser.add_argument('--build-dir', '-b', metavar='BUILD_DIRECTORY', help='FDB build directory', required=True)
parser.add_argument('cmd', metavar="COMMAND", nargs="+", help="The command to run")
parser.add_argument('--process-number', '-p', help="Number of fdb processes running", type=int, default=1)
args = parser.parse_args()
errcode = 1
with TempCluster(args.build_dir) as cluster:
with TempCluster(args.build_dir, args.process_number) as cluster:
print("log-dir: {}".format(cluster.log))
print("etc-dir: {}".format(cluster.etc))
print("data-dir: {}".format(cluster.data))