Merge branch 'main' of github.com:apple/foundationdb into tenant-delete-id
This commit is contained in:
commit
b7cba23126
4
.flake8
4
.flake8
|
@ -1,5 +1,5 @@
|
|||
[flake8]
|
||||
ignore = E203, E266, E501, W503, F403, F401, E711, C901, W605
|
||||
max-line-length = 79
|
||||
ignore = E203, E266, E501, W503, F403, F401, E711, C901, E721, W605
|
||||
max-line-length = 88
|
||||
max-complexity = 18
|
||||
select = B,C,E,F,W,T4,B9
|
|
@ -110,6 +110,12 @@ set(FDB_PACKAGE_NAME "${FDB_MAJOR}.${FDB_MINOR}")
|
|||
configure_file(${CMAKE_SOURCE_DIR}/versions.target.cmake ${CMAKE_CURRENT_BINARY_DIR}/versions.target)
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/version.txt ${FDB_VERSION})
|
||||
|
||||
set(FDB_CURRENT_VERSION ${PROJECT_VERSION})
|
||||
set(FDB_FUTURE_VERSION "7.4.0")
|
||||
set(FDB_PREV_RELEASE_VERSION "7.1.25")
|
||||
set(FDB_PREV2_RELEASE_VERSION "7.0.0")
|
||||
set(FDB_PREV3_RELEASE_VERSION "6.3.25")
|
||||
|
||||
################################################################################
|
||||
# Flow
|
||||
################################################################################
|
||||
|
|
|
@ -154,6 +154,8 @@ class ApiTest(Test):
|
|||
snapshot_reads = [x + '_SNAPSHOT' for x in reads]
|
||||
database_reads = [x + '_DATABASE' for x in reads]
|
||||
database_mutations = [x + '_DATABASE' for x in mutations]
|
||||
tenant_reads = [x + '_TENANT' for x in reads]
|
||||
tenant_mutations = [x + '_TENANT' for x in mutations]
|
||||
mutations += ['VERSIONSTAMP']
|
||||
versions = ['GET_READ_VERSION', 'SET_READ_VERSION', 'GET_COMMITTED_VERSION']
|
||||
snapshot_versions = ['GET_READ_VERSION_SNAPSHOT']
|
||||
|
@ -183,6 +185,8 @@ class ApiTest(Test):
|
|||
|
||||
if not args.no_tenants:
|
||||
op_choices += tenants
|
||||
op_choices += tenant_reads
|
||||
op_choices += tenant_mutations
|
||||
|
||||
idempotent_atomic_ops = ['BIT_AND', 'BIT_OR', 'MAX', 'MIN', 'BYTE_MIN', 'BYTE_MAX']
|
||||
atomic_ops = idempotent_atomic_ops + ['ADD', 'BIT_XOR', 'APPEND_IF_FITS']
|
||||
|
|
|
@ -283,7 +283,8 @@ if(NOT WIN32)
|
|||
foreach(test_file ${API_TEST_FILES})
|
||||
get_filename_component(file_name "${test_file}" NAME_WE)
|
||||
set(test_name "fdb_c_api_test_${file_name}")
|
||||
add_test(NAME "${test_name}"
|
||||
add_scripted_fdb_test(NAME "${test_name}"
|
||||
TIMEOUT 300
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/run_c_api_tests.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--api-tester-bin $<TARGET_FILE:fdb_c_api_tester>
|
||||
|
@ -291,99 +292,87 @@ if(NOT WIN32)
|
|||
--test-file ${test_file}
|
||||
--retain-client-lib-copies
|
||||
)
|
||||
set_tests_properties("${test_name}" PROPERTIES TIMEOUT 300)
|
||||
endforeach()
|
||||
|
||||
add_test(NAME fdb_c_upgrade_to_future_version
|
||||
add_scripted_fdb_test(NAME fdb_c_upgrade_to_future_version
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "7.3.0" "7.4.0" "7.3.0"
|
||||
--upgrade-path "${FDB_CURRENT_VERSION}" "${FDB_FUTURE_VERSION}" "${FDB_CURRENT_VERSION}"
|
||||
--process-number 3
|
||||
)
|
||||
set_tests_properties("fdb_c_upgrade_to_future_version" PROPERTIES ENVIRONMENT "${SANITIZER_OPTIONS}")
|
||||
|
||||
add_test(NAME fdb_c_upgrade_to_future_version_blob_granules
|
||||
add_scripted_fdb_test(NAME fdb_c_upgrade_to_future_version_blob_granules
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/ApiBlobGranulesCorrectness.toml
|
||||
--upgrade-path "7.3.0" "7.4.0" "7.3.0"
|
||||
--upgrade-path "${FDB_CURRENT_VERSION}" "${FDB_FUTURE_VERSION}" "${FDB_CURRENT_VERSION}"
|
||||
--blob-granules-enabled
|
||||
--process-number 3
|
||||
)
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER)
|
||||
add_test(NAME fdb_c_client_config_tests
|
||||
add_scripted_fdb_test(NAME fdb_c_client_config_tests
|
||||
COMMAND $<TARGET_FILE:Python3::Interpreter> ${CMAKE_CURRENT_SOURCE_DIR}/test/fdb_c_client_config_tests.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--client-config-tester-bin $<TARGET_FILE:fdb_c_client_config_tester>
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_upgrade_single_threaded_630api
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
|
||||
--upgrade-path "6.3.23" "7.0.0" "7.1.9" "7.3.0"
|
||||
--process-number 1
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_upgrade_single_threaded_700api
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
|
||||
--upgrade-path "7.0.0" "7.1.9" "7.3.0"
|
||||
--process-number 1
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_upgrade_multi_threaded_630api
|
||||
add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev3_gradual
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "6.3.23" "7.0.0" "7.1.9" "7.3.0" "7.1.9"
|
||||
--upgrade-path "${FDB_PREV3_RELEASE_VERSION}" "${FDB_PREV2_RELEASE_VERSION}" "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
|
||||
--process-number 3
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_upgrade_multi_threaded_700api
|
||||
add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev3_direct
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "7.0.0" "7.1.9" "7.3.0" "7.1.9"
|
||||
--upgrade-path "${FDB_PREV3_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
|
||||
--process-number 3
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_upgrade_multi_threaded_710api
|
||||
add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev2_gradual
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "7.1.9" "7.3.0" "7.1.9"
|
||||
--upgrade-path "${FDB_PREV2_RELEASE_VERSION}" "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
|
||||
--process-number 3
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_cluster_wiggle
|
||||
add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev2_direct
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "7.3.0" "wiggle"
|
||||
--upgrade-path "${FDB_PREV2_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
|
||||
--process-number 3
|
||||
)
|
||||
|
||||
add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
|
||||
--process-number 3
|
||||
)
|
||||
|
||||
add_scripted_fdb_test(NAME fdb_c_wiggle_only
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "${FDB_CURRENT_VERSION}" "wiggle"
|
||||
--disable-log-dump
|
||||
--process-number 3
|
||||
--redundancy double
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_wiggle_and_upgrade_latest
|
||||
add_scripted_fdb_test(NAME fdb_c_wiggle_and_upgrade
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "7.1.9" "wiggle" "7.3.0"
|
||||
--disable-log-dump
|
||||
--process-number 3
|
||||
--redundancy double
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_wiggle_and_upgrade_63
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
|
||||
--upgrade-path "6.3.24" "wiggle" "7.0.0"
|
||||
--upgrade-path "${FDB_PREV_RELEASE_VERSION}" "wiggle" "${FDB_CURRENT_VERSION}"
|
||||
--disable-log-dump
|
||||
--process-number 3
|
||||
--redundancy double
|
||||
|
@ -470,7 +459,7 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer
|
|||
target_link_libraries(fdb_c_shim_lib_tester PRIVATE fdb_c_shim SimpleOpt fdb_cpp Threads::Threads)
|
||||
target_include_directories(fdb_c_shim_lib_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include)
|
||||
|
||||
add_test(NAME fdb_c_shim_library_tests
|
||||
add_scripted_fdb_test(NAME fdb_c_shim_library_tests
|
||||
COMMAND $<TARGET_FILE:Python3::Interpreter> ${CMAKE_CURRENT_SOURCE_DIR}/test/fdb_c_shim_tests.py
|
||||
--build-dir ${CMAKE_BINARY_DIR}
|
||||
--unit-tests-bin $<TARGET_FILE:fdb_c_shim_unit_tests>
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
[[test]]
|
||||
title = 'Mixed Workload for Upgrade Tests with a Single FDB Thread'
|
||||
multiThreaded = false
|
||||
buggify = true
|
||||
databasePerTransaction = false
|
||||
minDatabases = 2
|
||||
maxDatabases = 8
|
||||
minClientThreads = 2
|
||||
maxClientThreads = 8
|
||||
minClients = 2
|
||||
maxClients = 8
|
||||
|
||||
[[test.workload]]
|
||||
name = 'ApiCorrectness'
|
||||
minKeyLength = 1
|
||||
maxKeyLength = 64
|
||||
minValueLength = 1
|
||||
maxValueLength = 1000
|
||||
maxKeysPerTransaction = 50
|
||||
initialSize = 100
|
||||
runUntilStop = true
|
||||
readExistingKeysRatio = 0.9
|
||||
|
||||
[[test.workload]]
|
||||
name = 'CancelTransaction'
|
||||
minKeyLength = 1
|
||||
maxKeyLength = 64
|
||||
minValueLength = 1
|
||||
maxValueLength = 1000
|
||||
maxKeysPerTransaction = 50
|
||||
initialSize = 100
|
||||
runUntilStop = true
|
||||
readExistingKeysRatio = 0.9
|
||||
|
||||
[[test.workload]]
|
||||
name = 'AtomicOpsCorrectness'
|
||||
initialSize = 0
|
||||
runUntilStop = true
|
||||
|
||||
[[test.workload]]
|
||||
name = 'WatchAndWait'
|
||||
initialSize = 0
|
||||
runUntilStop = true
|
|
@ -7,16 +7,9 @@ import sys
|
|||
import os
|
||||
import glob
|
||||
import unittest
|
||||
|
||||
sys.path[:0] = [os.path.join(os.path.dirname(__file__), "..", "..", "..", "tests", "TestRunner")]
|
||||
|
||||
# fmt: off
|
||||
from binary_download import FdbBinaryDownloader, CURRENT_VERSION
|
||||
from fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION
|
||||
from binary_download import FdbBinaryDownloader
|
||||
from local_cluster import LocalCluster, random_secret_string
|
||||
# fmt: on
|
||||
|
||||
PREV_RELEASE_VERSION = "7.1.5"
|
||||
PREV_PREV_RELEASE_VERSION = "7.0.0"
|
||||
|
||||
args = None
|
||||
downloader = None
|
||||
|
@ -180,15 +173,15 @@ class ClientConfigTests(unittest.TestCase):
|
|||
def test_multiple_external_clients(self):
|
||||
# Multiple external clients, normal case
|
||||
test = ClientConfigTest(self)
|
||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION])
|
||||
test.disable_local_client = True
|
||||
test.api_version = api_version_from_str(PREV_PREV_RELEASE_VERSION)
|
||||
test.api_version = api_version_from_str(PREV2_RELEASE_VERSION)
|
||||
test.exec()
|
||||
|
||||
def test_no_external_client_support_api_version(self):
|
||||
# Multiple external clients, API version supported by none of them
|
||||
test = ClientConfigTest(self)
|
||||
test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
||||
test.create_external_lib_dir([PREV2_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
||||
test.disable_local_client = True
|
||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||
test.expected_error = 2204 # API function missing
|
||||
|
@ -197,7 +190,7 @@ class ClientConfigTests(unittest.TestCase):
|
|||
def test_no_external_client_support_api_version_ignore(self):
|
||||
# Multiple external clients; API version supported by none of them; Ignore failures
|
||||
test = ClientConfigTest(self)
|
||||
test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
||||
test.create_external_lib_dir([PREV2_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
||||
test.disable_local_client = True
|
||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||
test.ignore_external_client_failures = True
|
||||
|
@ -207,7 +200,7 @@ class ClientConfigTests(unittest.TestCase):
|
|||
def test_one_external_client_wrong_api_version(self):
|
||||
# Multiple external clients, API version unsupported by one of othem
|
||||
test = ClientConfigTest(self)
|
||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION])
|
||||
test.disable_local_client = True
|
||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||
test.expected_error = 2204 # API function missing
|
||||
|
@ -216,7 +209,7 @@ class ClientConfigTests(unittest.TestCase):
|
|||
def test_one_external_client_wrong_api_version_ignore(self):
|
||||
# Multiple external clients; API version unsupported by one of them; Ignore failures
|
||||
test = ClientConfigTest(self)
|
||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION])
|
||||
test.disable_local_client = True
|
||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||
test.ignore_external_client_failures = True
|
||||
|
@ -286,6 +279,6 @@ if __name__ == "__main__":
|
|||
|
||||
downloader = FdbBinaryDownloader(args.build_dir)
|
||||
downloader.download_old_binaries(PREV_RELEASE_VERSION)
|
||||
downloader.download_old_binaries(PREV_PREV_RELEASE_VERSION)
|
||||
downloader.download_old_binaries(PREV2_RELEASE_VERSION)
|
||||
|
||||
unittest.main(verbosity=2)
|
||||
|
|
|
@ -6,15 +6,10 @@ import shutil
|
|||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path[:0] = [os.path.join(os.path.dirname(__file__), '..', '..', '..', 'tests', 'TestRunner')]
|
||||
|
||||
# fmt: off
|
||||
from binary_download import FdbBinaryDownloader, CURRENT_VERSION
|
||||
from binary_download import FdbBinaryDownloader
|
||||
from local_cluster import LocalCluster, random_secret_string
|
||||
# fmt: on
|
||||
from fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION
|
||||
|
||||
LAST_RELEASE_VERSION = "7.1.5"
|
||||
TESTER_STATS_INTERVAL_SEC = 5
|
||||
DEFAULT_TEST_FILE = "CApiCorrectnessMultiThr.toml"
|
||||
IMPLIBSO_ERROR_CODE = -6 # SIGABORT
|
||||
|
@ -54,13 +49,12 @@ class TestEnv(LocalCluster):
|
|||
self.downloader.binary_path(version, "fdbserver"),
|
||||
self.downloader.binary_path(version, "fdbmonitor"),
|
||||
self.downloader.binary_path(version, "fdbcli"),
|
||||
1
|
||||
1,
|
||||
)
|
||||
self.set_env_var("LD_LIBRARY_PATH", self.downloader.lib_dir(version))
|
||||
client_lib = self.downloader.lib_path(version)
|
||||
assert client_lib.exists(), "{} does not exist".format(client_lib)
|
||||
self.client_lib_external = self.tmp_dir.joinpath(
|
||||
"libfdb_c_external.so")
|
||||
self.client_lib_external = self.tmp_dir.joinpath("libfdb_c_external.so")
|
||||
shutil.copyfile(client_lib, self.client_lib_external)
|
||||
|
||||
def __enter__(self):
|
||||
|
@ -73,22 +67,16 @@ class TestEnv(LocalCluster):
|
|||
shutil.rmtree(self.tmp_dir)
|
||||
|
||||
def exec_client_command(self, cmd_args, env_vars=None, expected_ret_code=0):
|
||||
print("Executing test command: {}".format(
|
||||
" ".join([str(c) for c in cmd_args])
|
||||
))
|
||||
tester_proc = subprocess.Popen(
|
||||
cmd_args, stdout=sys.stdout, stderr=sys.stderr, env=env_vars
|
||||
)
|
||||
print("Executing test command: {}".format(" ".join([str(c) for c in cmd_args])))
|
||||
tester_proc = subprocess.Popen(cmd_args, stdout=sys.stdout, stderr=sys.stderr, env=env_vars)
|
||||
tester_retcode = tester_proc.wait()
|
||||
assert tester_retcode == expected_ret_code, "Tester completed return code {}, but {} was expected".format(
|
||||
tester_retcode, expected_ret_code)
|
||||
tester_retcode, expected_ret_code
|
||||
)
|
||||
|
||||
|
||||
class FdbCShimTests:
|
||||
def __init__(
|
||||
self,
|
||||
args
|
||||
):
|
||||
def __init__(self, args):
|
||||
self.build_dir = Path(args.build_dir).resolve()
|
||||
assert self.build_dir.exists(), "{} does not exist".format(args.build_dir)
|
||||
assert self.build_dir.is_dir(), "{} is not a directory".format(args.build_dir)
|
||||
|
@ -97,15 +85,14 @@ class FdbCShimTests:
|
|||
self.api_tester_bin = Path(args.api_tester_bin).resolve()
|
||||
assert self.api_tester_bin.exists(), "{} does not exist".format(self.api_tests_bin)
|
||||
self.shim_lib_tester_bin = Path(args.shim_lib_tester_bin).resolve()
|
||||
assert self.shim_lib_tester_bin.exists(
|
||||
), "{} does not exist".format(self.shim_lib_tester_bin)
|
||||
assert self.shim_lib_tester_bin.exists(), "{} does not exist".format(self.shim_lib_tester_bin)
|
||||
self.api_test_dir = Path(args.api_test_dir).resolve()
|
||||
assert self.api_test_dir.exists(), "{} does not exist".format(self.api_test_dir)
|
||||
self.downloader = FdbBinaryDownloader(args.build_dir)
|
||||
# binary downloads are currently available only for x86_64
|
||||
self.platform = platform.machine()
|
||||
if (self.platform == "x86_64"):
|
||||
self.downloader.download_old_binaries(LAST_RELEASE_VERSION)
|
||||
if self.platform == "x86_64":
|
||||
self.downloader.download_old_binaries(PREV_RELEASE_VERSION)
|
||||
self.downloader.download_old_binaries("7.0.0")
|
||||
|
||||
def build_c_api_tester_args(self, test_env, test_file):
|
||||
|
@ -127,34 +114,27 @@ class FdbCShimTests:
|
|||
"--tmp-dir",
|
||||
test_env.tmp_dir,
|
||||
"--stats-interval",
|
||||
str(TESTER_STATS_INTERVAL_SEC * 1000)
|
||||
str(TESTER_STATS_INTERVAL_SEC * 1000),
|
||||
]
|
||||
|
||||
def run_c_api_test(self, version, test_file):
|
||||
print('-' * 80)
|
||||
print("-" * 80)
|
||||
print("C API Test - version: {}, workload: {}".format(version, test_file))
|
||||
print('-' * 80)
|
||||
print("-" * 80)
|
||||
with TestEnv(self.build_dir, self.downloader, version) as test_env:
|
||||
cmd_args = self.build_c_api_tester_args(test_env, test_file)
|
||||
env_vars = os.environ.copy()
|
||||
env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(
|
||||
version)
|
||||
env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(version)
|
||||
test_env.exec_client_command(cmd_args, env_vars)
|
||||
|
||||
def run_c_unit_tests(self, version):
|
||||
print('-' * 80)
|
||||
print("-" * 80)
|
||||
print("C Unit Tests - version: {}".format(version))
|
||||
print('-' * 80)
|
||||
print("-" * 80)
|
||||
with TestEnv(self.build_dir, self.downloader, version) as test_env:
|
||||
cmd_args = [
|
||||
self.unit_tests_bin,
|
||||
test_env.cluster_file,
|
||||
"fdb",
|
||||
test_env.client_lib_external
|
||||
]
|
||||
cmd_args = [self.unit_tests_bin, test_env.cluster_file, "fdb", test_env.client_lib_external]
|
||||
env_vars = os.environ.copy()
|
||||
env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(
|
||||
version)
|
||||
env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(version)
|
||||
test_env.exec_client_command(cmd_args, env_vars)
|
||||
|
||||
def run_c_shim_lib_tester(
|
||||
|
@ -167,9 +147,9 @@ class FdbCShimTests:
|
|||
set_env_path=False,
|
||||
set_ld_lib_path=False,
|
||||
use_external_lib=True,
|
||||
expected_ret_code=0
|
||||
expected_ret_code=0,
|
||||
):
|
||||
print('-' * 80)
|
||||
print("-" * 80)
|
||||
if api_version is None:
|
||||
api_version = api_version_from_str(version)
|
||||
test_flags = []
|
||||
|
@ -183,9 +163,8 @@ class FdbCShimTests:
|
|||
test_flags.append("use_external_lib")
|
||||
else:
|
||||
test_flags.append("use_local_lib")
|
||||
print("C Shim Tests - version: {}, API version: {}, {}".format(version,
|
||||
api_version, ", ".join(test_flags)))
|
||||
print('-' * 80)
|
||||
print("C Shim Tests - version: {}, API version: {}, {}".format(version, api_version, ", ".join(test_flags)))
|
||||
print("-" * 80)
|
||||
cmd_args = [
|
||||
self.shim_lib_tester_bin,
|
||||
"--cluster-file",
|
||||
|
@ -196,20 +175,16 @@ class FdbCShimTests:
|
|||
if call_set_path:
|
||||
cmd_args = cmd_args + [
|
||||
"--local-client-library",
|
||||
("dummy" if invalid_lib_path else self.downloader.lib_path(version))
|
||||
("dummy" if invalid_lib_path else self.downloader.lib_path(version)),
|
||||
]
|
||||
if use_external_lib:
|
||||
cmd_args = cmd_args + [
|
||||
"--disable-local-client",
|
||||
"--external-client-library",
|
||||
test_env.client_lib_external
|
||||
]
|
||||
cmd_args = cmd_args + ["--disable-local-client", "--external-client-library", test_env.client_lib_external]
|
||||
env_vars = os.environ.copy()
|
||||
env_vars["LD_LIBRARY_PATH"] = (
|
||||
self.downloader.lib_dir(version) if set_ld_lib_path else "")
|
||||
env_vars["LD_LIBRARY_PATH"] = self.downloader.lib_dir(version) if set_ld_lib_path else ""
|
||||
if set_env_path:
|
||||
env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = (
|
||||
"dummy" if invalid_lib_path else self.downloader.lib_path(version))
|
||||
"dummy" if invalid_lib_path else self.downloader.lib_path(version)
|
||||
)
|
||||
test_env.exec_client_command(cmd_args, env_vars, expected_ret_code)
|
||||
|
||||
def run_tests(self):
|
||||
|
@ -221,50 +196,60 @@ class FdbCShimTests:
|
|||
|
||||
with TestEnv(self.build_dir, self.downloader, CURRENT_VERSION) as test_env:
|
||||
# Test lookup of the client library over LD_LIBRARY_PATH
|
||||
self.run_c_shim_lib_tester(
|
||||
CURRENT_VERSION, test_env, set_ld_lib_path=True)
|
||||
self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, set_ld_lib_path=True)
|
||||
|
||||
# Test setting the client library path over an API call
|
||||
self.run_c_shim_lib_tester(
|
||||
CURRENT_VERSION, test_env, call_set_path=True)
|
||||
self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True)
|
||||
|
||||
# Test setting the client library path over an environment variable
|
||||
self.run_c_shim_lib_tester(
|
||||
CURRENT_VERSION, test_env, set_env_path=True)
|
||||
self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, set_env_path=True)
|
||||
|
||||
# Test using the loaded client library as the local client
|
||||
self.run_c_shim_lib_tester(
|
||||
CURRENT_VERSION, test_env, call_set_path=True, use_external_lib=False)
|
||||
self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True, use_external_lib=False)
|
||||
|
||||
# Test setting an invalid client library path over an API call
|
||||
self.run_c_shim_lib_tester(
|
||||
CURRENT_VERSION, test_env, call_set_path=True, invalid_lib_path=True, expected_ret_code=IMPLIBSO_ERROR_CODE)
|
||||
CURRENT_VERSION,
|
||||
test_env,
|
||||
call_set_path=True,
|
||||
invalid_lib_path=True,
|
||||
expected_ret_code=IMPLIBSO_ERROR_CODE,
|
||||
)
|
||||
|
||||
# Test setting an invalid client library path over an environment variable
|
||||
self.run_c_shim_lib_tester(
|
||||
CURRENT_VERSION, test_env, set_env_path=True, invalid_lib_path=True, expected_ret_code=IMPLIBSO_ERROR_CODE)
|
||||
CURRENT_VERSION,
|
||||
test_env,
|
||||
set_env_path=True,
|
||||
invalid_lib_path=True,
|
||||
expected_ret_code=IMPLIBSO_ERROR_CODE,
|
||||
)
|
||||
|
||||
# Test calling a function that exists in the loaded library, but not for the selected API version
|
||||
self.run_c_shim_lib_tester(
|
||||
CURRENT_VERSION, test_env, call_set_path=True, api_version=700)
|
||||
self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True, api_version=700)
|
||||
|
||||
# binary downloads are currently available only for x86_64
|
||||
if self.platform == "x86_64":
|
||||
# Test the API workload with the release version
|
||||
self.run_c_api_test(LAST_RELEASE_VERSION, DEFAULT_TEST_FILE)
|
||||
self.run_c_api_test(PREV_RELEASE_VERSION, DEFAULT_TEST_FILE)
|
||||
|
||||
with TestEnv(self.build_dir, self.downloader, LAST_RELEASE_VERSION) as test_env:
|
||||
with TestEnv(self.build_dir, self.downloader, PREV_RELEASE_VERSION) as test_env:
|
||||
# Test using the loaded client library as the local client
|
||||
self.run_c_shim_lib_tester(
|
||||
LAST_RELEASE_VERSION, test_env, call_set_path=True, use_external_lib=False)
|
||||
self.run_c_shim_lib_tester(PREV_RELEASE_VERSION, test_env, call_set_path=True, use_external_lib=False)
|
||||
|
||||
# Test the client library of the release version in combination with the dev API version
|
||||
self.run_c_shim_lib_tester(
|
||||
LAST_RELEASE_VERSION, test_env, call_set_path=True, api_version=api_version_from_str(CURRENT_VERSION), expected_ret_code=1)
|
||||
PREV_RELEASE_VERSION,
|
||||
test_env,
|
||||
call_set_path=True,
|
||||
api_version=api_version_from_str(CURRENT_VERSION),
|
||||
expected_ret_code=1,
|
||||
)
|
||||
|
||||
# Test calling a function that does not exist in the loaded library
|
||||
self.run_c_shim_lib_tester(
|
||||
"7.0.0", test_env, call_set_path=True, api_version=700, expected_ret_code=IMPLIBSO_ERROR_CODE)
|
||||
"7.0.0", test_env, call_set_path=True, api_version=700, expected_ret_code=IMPLIBSO_ERROR_CODE
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -285,25 +270,17 @@ if __name__ == "__main__":
|
|||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--unit-tests-bin',
|
||||
type=str,
|
||||
help='Path to the fdb_c_shim_unit_tests executable.',
|
||||
required=True)
|
||||
"--unit-tests-bin", type=str, help="Path to the fdb_c_shim_unit_tests executable.", required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
'--api-tester-bin',
|
||||
type=str,
|
||||
help='Path to the fdb_c_shim_api_tester executable.',
|
||||
required=True)
|
||||
"--api-tester-bin", type=str, help="Path to the fdb_c_shim_api_tester executable.", required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
'--shim-lib-tester-bin',
|
||||
type=str,
|
||||
help='Path to the fdb_c_shim_lib_tester executable.',
|
||||
required=True)
|
||||
"--shim-lib-tester-bin", type=str, help="Path to the fdb_c_shim_lib_tester executable.", required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
'--api-test-dir',
|
||||
type=str,
|
||||
help='Path to a directory with api test definitions.',
|
||||
required=True)
|
||||
"--api-test-dir", type=str, help="Path to a directory with api test definitions.", required=True
|
||||
)
|
||||
args = parser.parse_args()
|
||||
test = FdbCShimTests(args)
|
||||
test.run_tests()
|
||||
|
|
|
@ -42,6 +42,8 @@ import (
|
|||
// usually created and committed automatically by the (Database).Transact
|
||||
// method.
|
||||
type Database struct {
|
||||
// String reference to the cluster file.
|
||||
clusterFile string
|
||||
*database
|
||||
}
|
||||
|
||||
|
@ -56,6 +58,16 @@ type DatabaseOptions struct {
|
|||
d *database
|
||||
}
|
||||
|
||||
// Close will close the Database and clean up all resources.
|
||||
// You have to ensure that you're not resuing this database.
|
||||
func (d *Database) Close() {
|
||||
// Remove database object from the cached databases
|
||||
delete(openDatabases, d.clusterFile)
|
||||
|
||||
// Destroy the database
|
||||
d.destroy()
|
||||
}
|
||||
|
||||
func (opt DatabaseOptions) setOpt(code int, param []byte) error {
|
||||
return setOpt(func(p *C.uint8_t, pl C.int) C.fdb_error_t {
|
||||
return C.fdb_database_set_option(opt.d.ptr, C.FDBDatabaseOption(code), p, pl)
|
||||
|
@ -63,6 +75,10 @@ func (opt DatabaseOptions) setOpt(code int, param []byte) error {
|
|||
}
|
||||
|
||||
func (d *database) destroy() {
|
||||
if d.ptr == nil {
|
||||
return
|
||||
}
|
||||
|
||||
C.fdb_database_destroy(d.ptr)
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ import (
|
|||
// Would put this in futures.go but for the documented issue with
|
||||
// exports and functions in preamble
|
||||
// (https://code.google.com/p/go-wiki/wiki/cgo#Global_functions)
|
||||
//
|
||||
//export unlockMutex
|
||||
func unlockMutex(p unsafe.Pointer) {
|
||||
m := (*sync.Mutex)(p)
|
||||
|
@ -337,7 +338,7 @@ func createDatabase(clusterFile string) (Database, error) {
|
|||
db := &database{outdb}
|
||||
runtime.SetFinalizer(db, (*database).destroy)
|
||||
|
||||
return Database{db}, nil
|
||||
return Database{clusterFile, db}, nil
|
||||
}
|
||||
|
||||
// Deprecated: Use OpenDatabase instead.
|
||||
|
|
|
@ -48,7 +48,10 @@ func ExampleOpenDefault() {
|
|||
return
|
||||
}
|
||||
|
||||
_ = db
|
||||
// Close the database after usage
|
||||
defer db.Close()
|
||||
|
||||
// Do work here
|
||||
|
||||
// Output:
|
||||
}
|
||||
|
@ -313,3 +316,30 @@ func ExamplePrintable() {
|
|||
fmt.Println(fdb.Printable([]byte{0, 1, 2, 'a', 'b', 'c', '1', '2', '3', '!', '?', 255}))
|
||||
// Output: \x00\x01\x02abc123!?\xff
|
||||
}
|
||||
|
||||
func TestDatabaseCloseRemovesResources(t *testing.T) {
|
||||
err := fdb.APIVersion(API_VERSION)
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to set API version: %v\n", err)
|
||||
}
|
||||
|
||||
// OpenDefault opens the database described by the platform-specific default
|
||||
// cluster file
|
||||
db, err := fdb.OpenDefault()
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to set API version: %v\n", err)
|
||||
}
|
||||
|
||||
// Close the database after usage
|
||||
db.Close()
|
||||
|
||||
// Open the same database again, if the database is still in the cache we would return the same object, if not we create a new object with a new pointer
|
||||
newDB, err := fdb.OpenDefault()
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to set API version: %v\n", err)
|
||||
}
|
||||
|
||||
if db == newDB {
|
||||
t.Fatalf("Expected a different database object, got: %v and %v\n", db, newDB)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,14 +25,14 @@ https://apple.github.io/foundationdb/api-python.html"""
|
|||
|
||||
|
||||
def open(*args, **kwargs):
|
||||
raise RuntimeError('You must call api_version() before using any fdb methods')
|
||||
raise RuntimeError("You must call api_version() before using any fdb methods")
|
||||
|
||||
|
||||
init = open
|
||||
|
||||
|
||||
def transactional(*args, **kwargs):
|
||||
raise RuntimeError('You must call api_version() before using fdb.transactional')
|
||||
raise RuntimeError("You must call api_version() before using fdb.transactional")
|
||||
|
||||
|
||||
def _add_symbols(module, symbols):
|
||||
|
@ -41,29 +41,29 @@ def _add_symbols(module, symbols):
|
|||
|
||||
|
||||
def is_api_version_selected():
|
||||
return '_version' in globals()
|
||||
return "_version" in globals()
|
||||
|
||||
|
||||
def get_api_version():
|
||||
if is_api_version_selected():
|
||||
return globals()['_version']
|
||||
return globals()["_version"]
|
||||
else:
|
||||
raise RuntimeError('API version is not set')
|
||||
raise RuntimeError("API version is not set")
|
||||
|
||||
|
||||
def api_version(ver):
|
||||
header_version = 720
|
||||
|
||||
if '_version' in globals():
|
||||
if globals()['_version'] != ver:
|
||||
raise RuntimeError('FDB API already loaded at version %d' % _version)
|
||||
if "_version" in globals():
|
||||
if globals()["_version"] != ver:
|
||||
raise RuntimeError("FDB API already loaded at version %d" % _version)
|
||||
return
|
||||
|
||||
if ver < 13:
|
||||
raise RuntimeError('FDB API versions before 13 are not supported')
|
||||
raise RuntimeError("FDB API versions before 13 are not supported")
|
||||
|
||||
if ver > header_version:
|
||||
raise RuntimeError('Latest known FDB API version is %d' % header_version)
|
||||
raise RuntimeError("Latest known FDB API version is %d" % header_version)
|
||||
|
||||
import fdb.impl
|
||||
|
||||
|
@ -71,31 +71,37 @@ def api_version(ver):
|
|||
if err == 2203: # api_version_not_supported, but that's not helpful to the user
|
||||
max_supported_ver = fdb.impl._capi.fdb_get_max_api_version()
|
||||
if header_version > max_supported_ver:
|
||||
raise RuntimeError("This version of the FoundationDB Python binding is not supported by the installed "
|
||||
"FoundationDB C library. The binding requires a library that supports API version "
|
||||
"%d, but the installed library supports a maximum version of %d." % (header_version, max_supported_ver))
|
||||
raise RuntimeError(
|
||||
"This version of the FoundationDB Python binding is not supported by the installed "
|
||||
"FoundationDB C library. The binding requires a library that supports API version "
|
||||
"%d, but the installed library supports a maximum version of %d."
|
||||
% (header_version, max_supported_ver)
|
||||
)
|
||||
|
||||
else:
|
||||
raise RuntimeError("API version %d is not supported by the installed FoundationDB C library." % ver)
|
||||
raise RuntimeError(
|
||||
"API version %d is not supported by the installed FoundationDB C library."
|
||||
% ver
|
||||
)
|
||||
|
||||
elif err != 0:
|
||||
raise RuntimeError('FoundationDB API error')
|
||||
raise RuntimeError("FoundationDB API error")
|
||||
|
||||
fdb.impl.init_c_api()
|
||||
|
||||
list = (
|
||||
'FDBError',
|
||||
'predicates',
|
||||
'Future',
|
||||
'Database',
|
||||
'Tenant',
|
||||
'Transaction',
|
||||
'KeyValue',
|
||||
'KeySelector',
|
||||
'open',
|
||||
'transactional',
|
||||
'options',
|
||||
'StreamingMode',
|
||||
"FDBError",
|
||||
"predicates",
|
||||
"Future",
|
||||
"Database",
|
||||
"Tenant",
|
||||
"Transaction",
|
||||
"KeyValue",
|
||||
"KeySelector",
|
||||
"open",
|
||||
"transactional",
|
||||
"options",
|
||||
"StreamingMode",
|
||||
)
|
||||
|
||||
_add_symbols(fdb.impl, list)
|
||||
|
@ -134,14 +140,20 @@ def api_version(ver):
|
|||
if not hasattr(self, "__iterating"):
|
||||
self.__iterating = iter(self)
|
||||
return next(self.__iterating)
|
||||
|
||||
setattr(fdb.impl.FDBRange, "next", next)
|
||||
|
||||
globals()['_version'] = ver
|
||||
globals()["_version"] = ver
|
||||
|
||||
import fdb.directory_impl
|
||||
directory_symbols = ('directory', 'DirectoryLayer',)
|
||||
|
||||
directory_symbols = (
|
||||
"directory",
|
||||
"DirectoryLayer",
|
||||
)
|
||||
_add_symbols(fdb.directory_impl, directory_symbols)
|
||||
|
||||
import fdb.subspace_impl
|
||||
subspace_symbols = ('Subspace',)
|
||||
|
||||
subspace_symbols = ("Subspace",)
|
||||
_add_symbols(fdb.subspace_impl, subspace_symbols)
|
||||
|
|
|
@ -35,8 +35,7 @@ class AllocatorTransactionState:
|
|||
self.lock = threading.Lock()
|
||||
|
||||
|
||||
class HighContentionAllocator (object):
|
||||
|
||||
class HighContentionAllocator(object):
|
||||
def __init__(self, subspace):
|
||||
self.counters = subspace[0]
|
||||
self.recent = subspace[1]
|
||||
|
@ -45,9 +44,9 @@ class HighContentionAllocator (object):
|
|||
@_impl.transactional
|
||||
def allocate(self, tr):
|
||||
"""Returns a byte string that
|
||||
1) has never and will never be returned by another call to this
|
||||
method on the same subspace
|
||||
2) is nearly as short as possible given the above
|
||||
1) has never and will never be returned by another call to this
|
||||
method on the same subspace
|
||||
2) is nearly as short as possible given the above
|
||||
"""
|
||||
|
||||
# Get transaction-local state
|
||||
|
@ -59,16 +58,23 @@ class HighContentionAllocator (object):
|
|||
tr_state = tr.__fdb_directory_layer_hca_state__
|
||||
|
||||
while True:
|
||||
[start] = [self.counters.unpack(k)[0] for k, _ in tr.snapshot.get_range(
|
||||
self.counters.range().start, self.counters.range().stop, limit=1, reverse=True)] or [0]
|
||||
[start] = [
|
||||
self.counters.unpack(k)[0]
|
||||
for k, _ in tr.snapshot.get_range(
|
||||
self.counters.range().start,
|
||||
self.counters.range().stop,
|
||||
limit=1,
|
||||
reverse=True,
|
||||
)
|
||||
] or [0]
|
||||
|
||||
window_advanced = False
|
||||
while True:
|
||||
with tr_state.lock:
|
||||
if window_advanced:
|
||||
del tr[self.counters: self.counters[start]]
|
||||
del tr[self.counters : self.counters[start]]
|
||||
tr.options.set_next_write_no_write_conflict_range()
|
||||
del tr[self.recent: self.recent[start]]
|
||||
del tr[self.recent : self.recent[start]]
|
||||
|
||||
# Increment the allocation count for the current window
|
||||
tr.add(self.counters[start], struct.pack("<q", 1))
|
||||
|
@ -94,10 +100,15 @@ class HighContentionAllocator (object):
|
|||
candidate = random.randrange(start, start + window)
|
||||
|
||||
with tr_state.lock:
|
||||
latest_counter = tr.snapshot.get_range(self.counters.range().start, self.counters.range().stop, limit=1, reverse=True)
|
||||
latest_counter = tr.snapshot.get_range(
|
||||
self.counters.range().start,
|
||||
self.counters.range().stop,
|
||||
limit=1,
|
||||
reverse=True,
|
||||
)
|
||||
candidate_value = tr[self.recent[candidate]]
|
||||
tr.options.set_next_write_no_write_conflict_range()
|
||||
tr[self.recent[candidate]] = b''
|
||||
tr[self.recent[candidate]] = b""
|
||||
|
||||
latest_counter = [self.counters.unpack(k)[0] for k, _ in latest_counter]
|
||||
if len(latest_counter) > 0 and latest_counter[0] > start:
|
||||
|
@ -121,7 +132,7 @@ class HighContentionAllocator (object):
|
|||
|
||||
|
||||
class Directory(object):
|
||||
def __init__(self, directory_layer, path=(), layer=b''):
|
||||
def __init__(self, directory_layer, path=(), layer=b""):
|
||||
self._directory_layer = directory_layer
|
||||
self._path = path
|
||||
self._layer = layer
|
||||
|
@ -129,7 +140,9 @@ class Directory(object):
|
|||
@_impl.transactional
|
||||
def create_or_open(self, tr, path, layer=None):
|
||||
path = self._tuplify_path(path)
|
||||
return self._directory_layer.create_or_open(tr, self._partition_subpath(path), layer)
|
||||
return self._directory_layer.create_or_open(
|
||||
tr, self._partition_subpath(path), layer
|
||||
)
|
||||
|
||||
@_impl.transactional
|
||||
def open(self, tr, path, layer=None):
|
||||
|
@ -139,7 +152,9 @@ class Directory(object):
|
|||
@_impl.transactional
|
||||
def create(self, tr, path, layer=None, prefix=None):
|
||||
path = self._tuplify_path(path)
|
||||
return self._directory_layer.create(tr, self._partition_subpath(path), layer, prefix)
|
||||
return self._directory_layer.create(
|
||||
tr, self._partition_subpath(path), layer, prefix
|
||||
)
|
||||
|
||||
@_impl.transactional
|
||||
def list(self, tr, path=()):
|
||||
|
@ -150,7 +165,9 @@ class Directory(object):
|
|||
def move(self, tr, old_path, new_path):
|
||||
old_path = self._tuplify_path(old_path)
|
||||
new_path = self._tuplify_path(new_path)
|
||||
return self._directory_layer.move(tr, self._partition_subpath(old_path), self._partition_subpath(new_path))
|
||||
return self._directory_layer.move(
|
||||
tr, self._partition_subpath(old_path), self._partition_subpath(new_path)
|
||||
)
|
||||
|
||||
@_impl.transactional
|
||||
def move_to(self, tr, new_absolute_path):
|
||||
|
@ -161,25 +178,33 @@ class Directory(object):
|
|||
if partition_path != directory_layer._path:
|
||||
raise ValueError("Cannot move between partitions.")
|
||||
|
||||
return directory_layer.move(tr, self._path[partition_len:], new_absolute_path[partition_len:])
|
||||
return directory_layer.move(
|
||||
tr, self._path[partition_len:], new_absolute_path[partition_len:]
|
||||
)
|
||||
|
||||
@_impl.transactional
|
||||
def remove(self, tr, path=()):
|
||||
path = self._tuplify_path(path)
|
||||
directory_layer = self._get_layer_for_path(path)
|
||||
return directory_layer.remove(tr, self._partition_subpath(path, directory_layer))
|
||||
return directory_layer.remove(
|
||||
tr, self._partition_subpath(path, directory_layer)
|
||||
)
|
||||
|
||||
@_impl.transactional
|
||||
def remove_if_exists(self, tr, path=()):
|
||||
path = self._tuplify_path(path)
|
||||
directory_layer = self._get_layer_for_path(path)
|
||||
return directory_layer.remove_if_exists(tr, self._partition_subpath(path, directory_layer))
|
||||
return directory_layer.remove_if_exists(
|
||||
tr, self._partition_subpath(path, directory_layer)
|
||||
)
|
||||
|
||||
@_impl.transactional
|
||||
def exists(self, tr, path=()):
|
||||
path = self._tuplify_path(path)
|
||||
directory_layer = self._get_layer_for_path(path)
|
||||
return directory_layer.exists(tr, self._partition_subpath(path, directory_layer))
|
||||
return directory_layer.exists(
|
||||
tr, self._partition_subpath(path, directory_layer)
|
||||
)
|
||||
|
||||
def get_layer(self):
|
||||
return self._layer
|
||||
|
@ -194,7 +219,7 @@ class Directory(object):
|
|||
|
||||
def _partition_subpath(self, path, directory_layer=None):
|
||||
directory_layer = directory_layer or self._directory_layer
|
||||
return self._path[len(directory_layer._path):] + path
|
||||
return self._path[len(directory_layer._path) :] + path
|
||||
|
||||
# Called by all functions that could operate on this subspace directly (move_to, remove, remove_if_exists, exists)
|
||||
# Subclasses can choose to return a different directory layer to use for the operation if path is in fact ()
|
||||
|
@ -203,8 +228,12 @@ class Directory(object):
|
|||
|
||||
|
||||
class DirectoryLayer(Directory):
|
||||
|
||||
def __init__(self, node_subspace=Subspace(rawPrefix=b'\xfe'), content_subspace=Subspace(), allow_manual_prefixes=False):
|
||||
def __init__(
|
||||
self,
|
||||
node_subspace=Subspace(rawPrefix=b"\xfe"),
|
||||
content_subspace=Subspace(),
|
||||
allow_manual_prefixes=False,
|
||||
):
|
||||
Directory.__init__(self, self)
|
||||
|
||||
# If specified, new automatically allocated prefixes will all fall within content_subspace
|
||||
|
@ -215,11 +244,11 @@ class DirectoryLayer(Directory):
|
|||
|
||||
# The root node is the one whose contents are the node subspace
|
||||
self._root_node = self._node_subspace[self._node_subspace.key()]
|
||||
self._allocator = HighContentionAllocator(self._root_node[b'hca'])
|
||||
self._allocator = HighContentionAllocator(self._root_node[b"hca"])
|
||||
|
||||
@_impl.transactional
|
||||
def create_or_open(self, tr, path, layer=None):
|
||||
""" Opens the directory with the given path.
|
||||
"""Opens the directory with the given path.
|
||||
|
||||
If the directory does not exist, it is created (creating parent
|
||||
directories if necessary).
|
||||
|
@ -229,12 +258,16 @@ class DirectoryLayer(Directory):
|
|||
"""
|
||||
return self._create_or_open_internal(tr, path, layer)
|
||||
|
||||
def _create_or_open_internal(self, tr, path, layer=None, prefix=None, allow_create=True, allow_open=True):
|
||||
def _create_or_open_internal(
|
||||
self, tr, path, layer=None, prefix=None, allow_create=True, allow_open=True
|
||||
):
|
||||
self._check_version(tr, write_access=False)
|
||||
|
||||
if prefix is not None and not self._allow_manual_prefixes:
|
||||
if len(self._path) == 0:
|
||||
raise ValueError("Cannot specify a prefix unless manual prefixes are enabled.")
|
||||
raise ValueError(
|
||||
"Cannot specify a prefix unless manual prefixes are enabled."
|
||||
)
|
||||
else:
|
||||
raise ValueError("Cannot specify a prefix in a partition.")
|
||||
|
||||
|
@ -248,7 +281,9 @@ class DirectoryLayer(Directory):
|
|||
if existing_node.exists():
|
||||
if existing_node.is_in_partition():
|
||||
subpath = existing_node.get_partition_subpath()
|
||||
return existing_node.get_contents(self)._directory_layer._create_or_open_internal(
|
||||
return existing_node.get_contents(
|
||||
self
|
||||
)._directory_layer._create_or_open_internal(
|
||||
tr, subpath, layer, prefix, allow_create, allow_open
|
||||
)
|
||||
|
||||
|
@ -256,7 +291,9 @@ class DirectoryLayer(Directory):
|
|||
raise ValueError("The directory already exists.")
|
||||
|
||||
if layer and existing_node.layer() != layer:
|
||||
raise ValueError("The directory was created with an incompatible layer.")
|
||||
raise ValueError(
|
||||
"The directory was created with an incompatible layer."
|
||||
)
|
||||
|
||||
return existing_node.get_contents(self)
|
||||
|
||||
|
@ -269,16 +306,23 @@ class DirectoryLayer(Directory):
|
|||
prefix = self._content_subspace.key() + self._allocator.allocate(tr)
|
||||
|
||||
if len(list(tr.get_range_startswith(prefix, limit=1))) > 0:
|
||||
raise Exception("The database has keys stored at the prefix chosen by the automatic prefix allocator: %r." % prefix)
|
||||
raise Exception(
|
||||
"The database has keys stored at the prefix chosen by the automatic prefix allocator: %r."
|
||||
% prefix
|
||||
)
|
||||
|
||||
if not self._is_prefix_free(tr.snapshot, prefix):
|
||||
raise Exception("The directory layer has manually allocated prefixes that conflict with the automatic prefix allocator.")
|
||||
raise Exception(
|
||||
"The directory layer has manually allocated prefixes that conflict with the automatic prefix allocator."
|
||||
)
|
||||
|
||||
elif not self._is_prefix_free(tr, prefix):
|
||||
raise ValueError("The given prefix is already in use.")
|
||||
|
||||
if len(path) > 1:
|
||||
parent_node = self._node_with_prefix(self.create_or_open(tr, path[:-1]).key())
|
||||
parent_node = self._node_with_prefix(
|
||||
self.create_or_open(tr, path[:-1]).key()
|
||||
)
|
||||
else:
|
||||
parent_node = self._root_node
|
||||
if not parent_node:
|
||||
|
@ -288,15 +332,15 @@ class DirectoryLayer(Directory):
|
|||
node = self._node_with_prefix(prefix)
|
||||
tr[parent_node[self.SUBDIRS][path[-1]]] = prefix
|
||||
if not layer:
|
||||
layer = b''
|
||||
layer = b""
|
||||
|
||||
tr[node[b'layer']] = layer
|
||||
tr[node[b"layer"]] = layer
|
||||
|
||||
return self._contents_of_node(node, path, layer)
|
||||
|
||||
@_impl.transactional
|
||||
def open(self, tr, path, layer=None):
|
||||
""" Opens the directory with the given path.
|
||||
"""Opens the directory with the given path.
|
||||
|
||||
An error is raised if the directory does not exist, or if a layer is
|
||||
specified and a different layer was specified when the directory was
|
||||
|
@ -321,7 +365,7 @@ class DirectoryLayer(Directory):
|
|||
|
||||
@_impl.transactional
|
||||
def move_to(self, tr, new_absolute_path):
|
||||
raise Exception('The root directory cannot be moved.')
|
||||
raise Exception("The root directory cannot be moved.")
|
||||
|
||||
@_impl.transactional
|
||||
def move(self, tr, old_path, new_path):
|
||||
|
@ -339,8 +383,10 @@ class DirectoryLayer(Directory):
|
|||
old_path = _to_unicode_path(old_path)
|
||||
new_path = _to_unicode_path(new_path)
|
||||
|
||||
if old_path == new_path[:len(old_path)]:
|
||||
raise ValueError("The destination directory cannot be a subdirectory of the source directory.")
|
||||
if old_path == new_path[: len(old_path)]:
|
||||
raise ValueError(
|
||||
"The destination directory cannot be a subdirectory of the source directory."
|
||||
)
|
||||
|
||||
old_node = self._find(tr, old_path).prefetch_metadata(tr)
|
||||
new_node = self._find(tr, new_path).prefetch_metadata(tr)
|
||||
|
@ -349,18 +395,30 @@ class DirectoryLayer(Directory):
|
|||
raise ValueError("The source directory does not exist.")
|
||||
|
||||
if old_node.is_in_partition() or new_node.is_in_partition():
|
||||
if not old_node.is_in_partition() or not new_node.is_in_partition() or old_node.path != new_node.path:
|
||||
if (
|
||||
not old_node.is_in_partition()
|
||||
or not new_node.is_in_partition()
|
||||
or old_node.path != new_node.path
|
||||
):
|
||||
raise ValueError("Cannot move between partitions.")
|
||||
|
||||
return new_node.get_contents(self).move(tr, old_node.get_partition_subpath(), new_node.get_partition_subpath())
|
||||
return new_node.get_contents(self).move(
|
||||
tr, old_node.get_partition_subpath(), new_node.get_partition_subpath()
|
||||
)
|
||||
|
||||
if new_node.exists():
|
||||
raise ValueError("The destination directory already exists. Remove it first.")
|
||||
raise ValueError(
|
||||
"The destination directory already exists. Remove it first."
|
||||
)
|
||||
|
||||
parent_node = self._find(tr, new_path[:-1])
|
||||
if not parent_node.exists():
|
||||
raise ValueError("The parent of the destination directory does not exist. Create it first.")
|
||||
tr[parent_node.subspace[self.SUBDIRS][new_path[-1]]] = self._node_subspace.unpack(old_node.subspace.key())[0]
|
||||
raise ValueError(
|
||||
"The parent of the destination directory does not exist. Create it first."
|
||||
)
|
||||
tr[
|
||||
parent_node.subspace[self.SUBDIRS][new_path[-1]]
|
||||
] = self._node_subspace.unpack(old_node.subspace.key())[0]
|
||||
self._remove_from_parent(tr, old_path)
|
||||
return self._contents_of_node(old_node.subspace, new_path, old_node.layer())
|
||||
|
||||
|
@ -400,7 +458,9 @@ class DirectoryLayer(Directory):
|
|||
return False
|
||||
|
||||
if node.is_in_partition():
|
||||
return node.get_contents(self)._directory_layer._remove_internal(tr, node.get_partition_subpath(), fail_on_nonexistent)
|
||||
return node.get_contents(self)._directory_layer._remove_internal(
|
||||
tr, node.get_partition_subpath(), fail_on_nonexistent
|
||||
)
|
||||
|
||||
self._remove_recursive(tr, node.subspace)
|
||||
self._remove_from_parent(tr, path)
|
||||
|
@ -447,7 +507,7 @@ class DirectoryLayer(Directory):
|
|||
VERSION = (1, 0, 0)
|
||||
|
||||
def _check_version(self, tr, write_access=True):
|
||||
version = tr[self._root_node[b'version']]
|
||||
version = tr[self._root_node[b"version"]]
|
||||
|
||||
if not version.present():
|
||||
if write_access:
|
||||
|
@ -455,16 +515,22 @@ class DirectoryLayer(Directory):
|
|||
|
||||
return
|
||||
|
||||
version = struct.unpack('<III', bytes(version))
|
||||
version = struct.unpack("<III", bytes(version))
|
||||
|
||||
if version[0] > self.VERSION[0]:
|
||||
raise Exception("Cannot load directory with version %d.%d.%d using directory layer %d.%d.%d" % (version + self.VERSION))
|
||||
raise Exception(
|
||||
"Cannot load directory with version %d.%d.%d using directory layer %d.%d.%d"
|
||||
% (version + self.VERSION)
|
||||
)
|
||||
|
||||
if version[1] > self.VERSION[1] and write_access:
|
||||
raise Exception("Directory with version %d.%d.%d is read-only when opened using directory layer %d.%d.%d" % (version + self.VERSION))
|
||||
raise Exception(
|
||||
"Directory with version %d.%d.%d is read-only when opened using directory layer %d.%d.%d"
|
||||
% (version + self.VERSION)
|
||||
)
|
||||
|
||||
def _initialize_directory(self, tr):
|
||||
tr[self._root_node[b'version']] = struct.pack('<III', *self.VERSION)
|
||||
tr[self._root_node[b"version"]] = struct.pack("<III", *self.VERSION)
|
||||
|
||||
def _node_containing_key(self, tr, key):
|
||||
# Right now this is only used for _is_prefix_free(), but if we add
|
||||
|
@ -472,10 +538,12 @@ class DirectoryLayer(Directory):
|
|||
# path based on a key.
|
||||
if key.startswith(self._node_subspace.key()):
|
||||
return self._root_node
|
||||
for k, v in tr.get_range(self._node_subspace.range(()).start,
|
||||
self._node_subspace.pack((key,)) + b'\x00',
|
||||
reverse=True,
|
||||
limit=1):
|
||||
for k, v in tr.get_range(
|
||||
self._node_subspace.range(()).start,
|
||||
self._node_subspace.pack((key,)) + b"\x00",
|
||||
reverse=True,
|
||||
limit=1,
|
||||
):
|
||||
prev_prefix = self._node_subspace.unpack(k)[0]
|
||||
if key.startswith(prev_prefix):
|
||||
return self._node_with_prefix(prev_prefix)
|
||||
|
@ -489,7 +557,7 @@ class DirectoryLayer(Directory):
|
|||
def _contents_of_node(self, node, path, layer=None):
|
||||
prefix = self._node_subspace.unpack(node.key())[0]
|
||||
|
||||
if layer == b'partition':
|
||||
if layer == b"partition":
|
||||
return DirectoryPartition(self._path + path, prefix, self)
|
||||
else:
|
||||
return DirectorySubspace(self._path + path, prefix, self, layer)
|
||||
|
@ -497,8 +565,12 @@ class DirectoryLayer(Directory):
|
|||
def _find(self, tr, path):
|
||||
n = _Node(self._root_node, (), path)
|
||||
for i, name in enumerate(path):
|
||||
n = _Node(self._node_with_prefix(tr[n.subspace[self.SUBDIRS][name]]), path[:i + 1], path)
|
||||
if not n.exists() or n.layer(tr) == b'partition':
|
||||
n = _Node(
|
||||
self._node_with_prefix(tr[n.subspace[self.SUBDIRS][name]]),
|
||||
path[: i + 1],
|
||||
path,
|
||||
)
|
||||
if not n.exists() or n.layer(tr) == b"partition":
|
||||
return n
|
||||
return n
|
||||
|
||||
|
@ -521,8 +593,19 @@ class DirectoryLayer(Directory):
|
|||
# Returns true if the given prefix does not "intersect" any currently
|
||||
# allocated prefix (including the root node). This means that it neither
|
||||
# contains any other prefix nor is contained by any other prefix.
|
||||
return prefix and not self._node_containing_key(tr, prefix) \
|
||||
and not len(list(tr.get_range(self._node_subspace.pack((prefix,)), self._node_subspace.pack((_impl.strinc(prefix),)), limit=1)))
|
||||
return (
|
||||
prefix
|
||||
and not self._node_containing_key(tr, prefix)
|
||||
and not len(
|
||||
list(
|
||||
tr.get_range(
|
||||
self._node_subspace.pack((prefix,)),
|
||||
self._node_subspace.pack((_impl.strinc(prefix),)),
|
||||
limit=1,
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
def _is_prefix_empty(self, tr, prefix):
|
||||
return len(list(tr.get_range(prefix, _impl.strinc(prefix), limit=1))) == 0
|
||||
|
@ -541,11 +624,15 @@ def _to_unicode_path(path):
|
|||
if isinstance(name, bytes):
|
||||
path[i] = six.text_type(path[i])
|
||||
elif not isinstance(name, six.text_type):
|
||||
raise ValueError('Invalid path: must be a unicode string or a tuple of unicode strings')
|
||||
raise ValueError(
|
||||
"Invalid path: must be a unicode string or a tuple of unicode strings"
|
||||
)
|
||||
|
||||
return tuple(path)
|
||||
|
||||
raise ValueError('Invalid path: must be a unicode string or a tuple of unicode strings')
|
||||
raise ValueError(
|
||||
"Invalid path: must be a unicode string or a tuple of unicode strings"
|
||||
)
|
||||
|
||||
|
||||
directory = DirectoryLayer()
|
||||
|
@ -561,43 +648,59 @@ class DirectorySubspace(Subspace, Directory):
|
|||
Directory.__init__(self, directory_layer, path, layer)
|
||||
|
||||
def __repr__(self):
|
||||
return 'DirectorySubspace(path=' + repr(self._path) + ', prefix=' + repr(self.rawPrefix) + ')'
|
||||
return (
|
||||
"DirectorySubspace(path="
|
||||
+ repr(self._path)
|
||||
+ ", prefix="
|
||||
+ repr(self.rawPrefix)
|
||||
+ ")"
|
||||
)
|
||||
|
||||
|
||||
class DirectoryPartition(DirectorySubspace):
|
||||
def __init__(self, path, prefix, parent_directory_layer):
|
||||
directory_layer = DirectoryLayer(Subspace(rawPrefix=prefix + b'\xfe'), Subspace(rawPrefix=prefix))
|
||||
directory_layer = DirectoryLayer(
|
||||
Subspace(rawPrefix=prefix + b"\xfe"), Subspace(rawPrefix=prefix)
|
||||
)
|
||||
directory_layer._path = path
|
||||
DirectorySubspace.__init__(self, path, prefix, directory_layer, b'partition')
|
||||
DirectorySubspace.__init__(self, path, prefix, directory_layer, b"partition")
|
||||
|
||||
self._parent_directory_layer = parent_directory_layer
|
||||
|
||||
def __repr__(self):
|
||||
return 'DirectoryPartition(path=' + repr(self._path) + ', prefix=' + repr(self.rawPrefix) + ')'
|
||||
return (
|
||||
"DirectoryPartition(path="
|
||||
+ repr(self._path)
|
||||
+ ", prefix="
|
||||
+ repr(self.rawPrefix)
|
||||
+ ")"
|
||||
)
|
||||
|
||||
def __getitem__(self, name):
|
||||
raise Exception('Cannot open subspace in the root of a directory partition.')
|
||||
raise Exception("Cannot open subspace in the root of a directory partition.")
|
||||
|
||||
def key(self):
|
||||
raise Exception('Cannot get key for the root of a directory partition.')
|
||||
raise Exception("Cannot get key for the root of a directory partition.")
|
||||
|
||||
def pack(self, t=tuple()):
|
||||
raise Exception('Cannot pack keys using the root of a directory partition.')
|
||||
raise Exception("Cannot pack keys using the root of a directory partition.")
|
||||
|
||||
def unpack(self, key):
|
||||
raise Exception('Cannot unpack keys using the root of a directory partition.')
|
||||
raise Exception("Cannot unpack keys using the root of a directory partition.")
|
||||
|
||||
def range(self, t=tuple()):
|
||||
raise Exception('Cannot get range for the root of a directory partition.')
|
||||
raise Exception("Cannot get range for the root of a directory partition.")
|
||||
|
||||
def contains(self, key):
|
||||
raise Exception('Cannot check whether a key belongs to the root of a directory partition.')
|
||||
raise Exception(
|
||||
"Cannot check whether a key belongs to the root of a directory partition."
|
||||
)
|
||||
|
||||
def as_foundationdb_key(self):
|
||||
raise Exception('Cannot use the root of a directory partition as a key.')
|
||||
raise Exception("Cannot use the root of a directory partition as a key.")
|
||||
|
||||
def subspace(self, tuple):
|
||||
raise Exception('Cannot open subspace in the root of a directory partition.')
|
||||
raise Exception("Cannot open subspace in the root of a directory partition.")
|
||||
|
||||
def _get_layer_for_path(self, path):
|
||||
if path == ():
|
||||
|
@ -606,8 +709,7 @@ class DirectoryPartition(DirectorySubspace):
|
|||
return self._directory_layer
|
||||
|
||||
|
||||
class _Node (object):
|
||||
|
||||
class _Node(object):
|
||||
def __init__(self, subspace, path, target_path):
|
||||
self.subspace = subspace
|
||||
self.path = path
|
||||
|
@ -625,17 +727,23 @@ class _Node (object):
|
|||
|
||||
def layer(self, tr=None):
|
||||
if tr:
|
||||
self._layer = tr[self.subspace[b'layer']]
|
||||
self._layer = tr[self.subspace[b"layer"]]
|
||||
elif self._layer is None:
|
||||
raise Exception('Layer has not been read')
|
||||
raise Exception("Layer has not been read")
|
||||
|
||||
return self._layer
|
||||
|
||||
def is_in_partition(self, tr=None, include_empty_subpath=False):
|
||||
return self.exists() and self.layer(tr) == b'partition' and (include_empty_subpath or len(self.target_path) > len(self.path))
|
||||
return (
|
||||
self.exists()
|
||||
and self.layer(tr) == b"partition"
|
||||
and (include_empty_subpath or len(self.target_path) > len(self.path))
|
||||
)
|
||||
|
||||
def get_partition_subpath(self):
|
||||
return self.target_path[len(self.path):]
|
||||
return self.target_path[len(self.path) :]
|
||||
|
||||
def get_contents(self, directory_layer, tr=None):
|
||||
return directory_layer._contents_of_node(self.subspace, self.path, self.layer(tr))
|
||||
return directory_layer._contents_of_node(
|
||||
self.subspace, self.path, self.layer(tr)
|
||||
)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -40,13 +40,15 @@ def _get_boundary_keys(db_or_tr, begin, end):
|
|||
lastbegin = begin
|
||||
tr.options.set_read_system_keys()
|
||||
tr.options.set_lock_aware()
|
||||
kvs = tr.snapshot.get_range(b'\xff' + b'/keyServers/' + begin, b'\xff' + b'/keyServers/' + end)
|
||||
kvs = tr.snapshot.get_range(
|
||||
b"\xff" + b"/keyServers/" + begin, b"\xff" + b"/keyServers/" + end
|
||||
)
|
||||
if first_time:
|
||||
first_time = False
|
||||
yield None # trick to get the above get_range to be asynchronously dispatched before get_boundary_keys() returns.
|
||||
for kv in kvs:
|
||||
yield kv.key[13:]
|
||||
begin = kv.key[13:] + b'\x00'
|
||||
begin = kv.key[13:] + b"\x00"
|
||||
begin = end
|
||||
except _impl.FDBError as e:
|
||||
# if we get a transaction_too_old and *something* has happened, then we are no longer transactional
|
||||
|
@ -71,4 +73,8 @@ def get_boundary_keys(db_or_tr, begin, end):
|
|||
@_impl.transactional
|
||||
def get_addresses_for_key(tr, key):
|
||||
keyBytes = _impl.keyToBytes(key)
|
||||
return _impl.FutureStringArray(tr.capi.fdb_transaction_get_addresses_for_key(tr.tpointer, keyBytes, len(keyBytes)))
|
||||
return _impl.FutureStringArray(
|
||||
tr.capi.fdb_transaction_get_addresses_for_key(
|
||||
tr.tpointer, keyBytes, len(keyBytes)
|
||||
)
|
||||
)
|
||||
|
|
|
@ -23,13 +23,12 @@
|
|||
import fdb.tuple
|
||||
|
||||
|
||||
class Subspace (object):
|
||||
|
||||
def __init__(self, prefixTuple=tuple(), rawPrefix=b''):
|
||||
class Subspace(object):
|
||||
def __init__(self, prefixTuple=tuple(), rawPrefix=b""):
|
||||
self.rawPrefix = fdb.tuple.pack(prefixTuple, prefix=rawPrefix)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Subspace(rawPrefix=' + repr(self.rawPrefix) + ')'
|
||||
return "Subspace(rawPrefix=" + repr(self.rawPrefix) + ")"
|
||||
|
||||
def __getitem__(self, name):
|
||||
return Subspace((name,), self.rawPrefix)
|
||||
|
@ -45,7 +44,7 @@ class Subspace (object):
|
|||
|
||||
def unpack(self, key):
|
||||
if not self.contains(key):
|
||||
raise ValueError('Cannot unpack key that is not in subspace.')
|
||||
raise ValueError("Cannot unpack key that is not in subspace.")
|
||||
|
||||
return fdb.tuple.unpack(key, prefix_len=len(self.rawPrefix))
|
||||
|
||||
|
|
|
@ -25,9 +25,10 @@ https://apple.github.io/foundationdb/api-python.html"""
|
|||
|
||||
from fdb import impl as _impl
|
||||
|
||||
_tenant_map_prefix = b'\xff\xff/management/tenant/map/'
|
||||
_tenant_map_prefix = b"\xff\xff/management/tenant/map/"
|
||||
|
||||
# If the existence_check_marker is an empty list, then check whether the tenant exists.
|
||||
|
||||
# If the existence_check_marker is an empty list, then check whether the tenant exists.
|
||||
# After the check, append an item to the existence_check_marker list so that subsequent
|
||||
# calls to this function will not perform the existence check.
|
||||
#
|
||||
|
@ -37,11 +38,12 @@ def _check_tenant_existence(tr, key, existence_check_marker, force_maybe_commite
|
|||
existing_tenant = tr[key].wait()
|
||||
existence_check_marker.append(None)
|
||||
if force_maybe_commited:
|
||||
raise _impl.FDBError(1021) # maybe_committed
|
||||
raise _impl.FDBError(1021) # maybe_committed
|
||||
return existing_tenant != None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# Attempt to create a tenant in the cluster. If existence_check_marker is an empty
|
||||
# list, then this function will check if the tenant already exists and fail if it does.
|
||||
# Once the existence check is completed, it will not be done again if this function
|
||||
|
@ -51,15 +53,23 @@ def _check_tenant_existence(tr, key, existence_check_marker, force_maybe_commite
|
|||
#
|
||||
# If the existence_check_marker is a non-empty list, then the existence check is skipped.
|
||||
@_impl.transactional
|
||||
def _create_tenant_impl(tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False):
|
||||
def _create_tenant_impl(
|
||||
tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False
|
||||
):
|
||||
tr.options.set_special_key_space_enable_writes()
|
||||
key = b'%s%s' % (_tenant_map_prefix, tenant_name)
|
||||
key = b"%s%s" % (_tenant_map_prefix, tenant_name)
|
||||
|
||||
if (
|
||||
_check_tenant_existence(
|
||||
tr, key, existence_check_marker, force_existence_check_maybe_committed
|
||||
)
|
||||
is True
|
||||
):
|
||||
raise _impl.FDBError(2132) # tenant_already_exists
|
||||
|
||||
tr[key] = b""
|
||||
|
||||
if _check_tenant_existence(tr, key, existence_check_marker, force_existence_check_maybe_committed) is True:
|
||||
raise _impl.FDBError(2132) # tenant_already_exists
|
||||
|
||||
tr[key] = b''
|
||||
|
||||
# Attempt to delete a tenant from the cluster. If existence_check_marker is an empty
|
||||
# list, then this function will check if the tenant already exists and fail if it does
|
||||
# not. Once the existence check is completed, it will not be done again if this function
|
||||
|
@ -69,15 +79,23 @@ def _create_tenant_impl(tr, tenant_name, existence_check_marker, force_existence
|
|||
#
|
||||
# If the existence_check_marker is a non-empty list, then the existence check is skipped.
|
||||
@_impl.transactional
|
||||
def _delete_tenant_impl(tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False):
|
||||
def _delete_tenant_impl(
|
||||
tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False
|
||||
):
|
||||
tr.options.set_special_key_space_enable_writes()
|
||||
key = b'%s%s' % (_tenant_map_prefix, tenant_name)
|
||||
key = b"%s%s" % (_tenant_map_prefix, tenant_name)
|
||||
|
||||
if _check_tenant_existence(tr, key, existence_check_marker, force_existence_check_maybe_committed) is False:
|
||||
raise _impl.FDBError(2131) # tenant_not_found
|
||||
if (
|
||||
_check_tenant_existence(
|
||||
tr, key, existence_check_marker, force_existence_check_maybe_committed
|
||||
)
|
||||
is False
|
||||
):
|
||||
raise _impl.FDBError(2131) # tenant_not_found
|
||||
|
||||
del tr[key]
|
||||
|
||||
|
||||
class FDBTenantList(object):
|
||||
"""Iterates over the results of list_tenants query. Returns
|
||||
KeyValue objects.
|
||||
|
@ -96,6 +114,7 @@ class FDBTenantList(object):
|
|||
tenant_name = _impl.remove_prefix(next_item.key, _tenant_map_prefix)
|
||||
yield _impl.KeyValue(tenant_name, next_item.value)
|
||||
|
||||
|
||||
# Lists the tenants created in the cluster, specified by the begin and end range.
|
||||
# Also limited in number of results by the limit parameter.
|
||||
# Returns an iterable object that yields KeyValue objects
|
||||
|
@ -104,29 +123,36 @@ class FDBTenantList(object):
|
|||
@_impl.transactional
|
||||
def _list_tenants_impl(tr, begin, end, limit):
|
||||
tr.options.set_raw_access()
|
||||
begin_key = b'%s%s' % (_tenant_map_prefix, begin)
|
||||
end_key = b'%s%s' % (_tenant_map_prefix, end)
|
||||
begin_key = b"%s%s" % (_tenant_map_prefix, begin)
|
||||
end_key = b"%s%s" % (_tenant_map_prefix, end)
|
||||
|
||||
rangeresult = tr.get_range(begin_key, end_key, limit)
|
||||
|
||||
return FDBTenantList(rangeresult)
|
||||
|
||||
|
||||
def create_tenant(db_or_tr, tenant_name):
|
||||
tenant_name = _impl.process_tenant_name(tenant_name)
|
||||
|
||||
# Only perform the existence check when run using a database
|
||||
# Callers using a transaction are expected to check existence themselves if required
|
||||
existence_check_marker = [] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
|
||||
existence_check_marker = (
|
||||
[] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
|
||||
)
|
||||
_create_tenant_impl(db_or_tr, tenant_name, existence_check_marker)
|
||||
|
||||
|
||||
def delete_tenant(db_or_tr, tenant_name):
|
||||
tenant_name = _impl.process_tenant_name(tenant_name)
|
||||
|
||||
# Only perform the existence check when run using a database
|
||||
# Callers using a transaction are expected to check existence themselves if required
|
||||
existence_check_marker = [] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
|
||||
existence_check_marker = (
|
||||
[] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
|
||||
)
|
||||
_delete_tenant_impl(db_or_tr, tenant_name, existence_check_marker)
|
||||
|
||||
|
||||
def list_tenants(db_or_tr, begin, end, limit):
|
||||
begin = _impl.process_tenant_name(begin)
|
||||
end = _impl.process_tenant_name(end)
|
||||
|
|
|
@ -39,8 +39,8 @@ BYTES_CODE = 0x01
|
|||
STRING_CODE = 0x02
|
||||
NESTED_CODE = 0x05
|
||||
INT_ZERO_CODE = 0x14
|
||||
POS_INT_END = 0x1d
|
||||
NEG_INT_START = 0x0b
|
||||
POS_INT_END = 0x1D
|
||||
NEG_INT_START = 0x0B
|
||||
FLOAT_CODE = 0x20
|
||||
DOUBLE_CODE = 0x21
|
||||
FALSE_CODE = 0x26
|
||||
|
@ -54,10 +54,10 @@ VERSIONSTAMP_CODE = 0x33
|
|||
def _find_terminator(v, pos):
|
||||
# Finds the start of the next terminator [\x00]![\xff] or the end of v
|
||||
while True:
|
||||
pos = v.find(b'\x00', pos)
|
||||
pos = v.find(b"\x00", pos)
|
||||
if pos < 0:
|
||||
return len(v)
|
||||
if pos + 1 == len(v) or v[pos + 1:pos + 2] != b'\xff':
|
||||
if pos + 1 == len(v) or v[pos + 1 : pos + 2] != b"\xff":
|
||||
return pos
|
||||
pos += 2
|
||||
|
||||
|
@ -66,9 +66,9 @@ def _find_terminator(v, pos):
|
|||
# If decoding and sign bit is 0 (negative), flip all of the bits. Otherwise, just flip sign.
|
||||
def _float_adjust(v, encode):
|
||||
if encode and six.indexbytes(v, 0) & 0x80 != 0x00:
|
||||
return b''.join(map(lambda x: six.int2byte(x ^ 0xff), six.iterbytes(v)))
|
||||
return b"".join(map(lambda x: six.int2byte(x ^ 0xFF), six.iterbytes(v)))
|
||||
elif not encode and six.indexbytes(v, 0) & 0x80 != 0x80:
|
||||
return b''.join(map(lambda x: six.int2byte(x ^ 0xff), six.iterbytes(v)))
|
||||
return b"".join(map(lambda x: six.int2byte(x ^ 0xFF), six.iterbytes(v)))
|
||||
else:
|
||||
return six.int2byte(six.indexbytes(v, 0) ^ 0x80) + v[1:]
|
||||
|
||||
|
@ -84,7 +84,9 @@ class SingleFloat(object):
|
|||
elif isinstance(value, six.integer_types):
|
||||
self.value = ctypes.c_float(value).value
|
||||
else:
|
||||
raise ValueError("Incompatible type for single-precision float: " + repr(value))
|
||||
raise ValueError(
|
||||
"Incompatible type for single-precision float: " + repr(value)
|
||||
)
|
||||
|
||||
# Comparisons
|
||||
def __eq__(self, other):
|
||||
|
@ -119,24 +121,42 @@ class Versionstamp(object):
|
|||
LENGTH = 12
|
||||
_TR_VERSION_LEN = 10
|
||||
_MAX_USER_VERSION = (1 << 16) - 1
|
||||
_UNSET_TR_VERSION = 10 * six.int2byte(0xff)
|
||||
_STRUCT_FORMAT_STRING = '>' + str(_TR_VERSION_LEN) + 'sH'
|
||||
_UNSET_TR_VERSION = 10 * six.int2byte(0xFF)
|
||||
_STRUCT_FORMAT_STRING = ">" + str(_TR_VERSION_LEN) + "sH"
|
||||
|
||||
@classmethod
|
||||
def validate_tr_version(cls, tr_version):
|
||||
if tr_version is None:
|
||||
return
|
||||
if not isinstance(tr_version, bytes):
|
||||
raise TypeError("Global version has illegal type " + str(type(tr_version)) + " (requires bytes)")
|
||||
raise TypeError(
|
||||
"Global version has illegal type "
|
||||
+ str(type(tr_version))
|
||||
+ " (requires bytes)"
|
||||
)
|
||||
elif len(tr_version) != cls._TR_VERSION_LEN:
|
||||
raise ValueError("Global version has incorrect length " + str(len(tr_version)) + " (requires " + str(cls._TR_VERSION_LEN) + ")")
|
||||
raise ValueError(
|
||||
"Global version has incorrect length "
|
||||
+ str(len(tr_version))
|
||||
+ " (requires "
|
||||
+ str(cls._TR_VERSION_LEN)
|
||||
+ ")"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def validate_user_version(cls, user_version):
|
||||
if not isinstance(user_version, six.integer_types):
|
||||
raise TypeError("Local version has illegal type " + str(type(user_version)) + " (requires integer type)")
|
||||
raise TypeError(
|
||||
"Local version has illegal type "
|
||||
+ str(type(user_version))
|
||||
+ " (requires integer type)"
|
||||
)
|
||||
elif user_version < 0 or user_version > cls._MAX_USER_VERSION:
|
||||
raise ValueError("Local version has value " + str(user_version) + " which is out of range")
|
||||
raise ValueError(
|
||||
"Local version has value "
|
||||
+ str(user_version)
|
||||
+ " which is out of range"
|
||||
)
|
||||
|
||||
def __init__(self, tr_version=None, user_version=0):
|
||||
Versionstamp.validate_tr_version(tr_version)
|
||||
|
@ -153,30 +173,50 @@ class Versionstamp(object):
|
|||
if not isinstance(v, bytes):
|
||||
raise TypeError("Cannot parse versionstamp from non-byte string")
|
||||
elif len(v) - start < cls.LENGTH:
|
||||
raise ValueError("Versionstamp byte string is too short (only " + str(len(v) - start) + " bytes to read from")
|
||||
raise ValueError(
|
||||
"Versionstamp byte string is too short (only "
|
||||
+ str(len(v) - start)
|
||||
+ " bytes to read from"
|
||||
)
|
||||
else:
|
||||
tr_version = v[start:start + cls._TR_VERSION_LEN]
|
||||
tr_version = v[start : start + cls._TR_VERSION_LEN]
|
||||
if tr_version == cls._UNSET_TR_VERSION:
|
||||
tr_version = None
|
||||
user_version = six.indexbytes(v, start + cls._TR_VERSION_LEN) * (1 << 8) + six.indexbytes(v, start + cls._TR_VERSION_LEN + 1)
|
||||
user_version = six.indexbytes(v, start + cls._TR_VERSION_LEN) * (
|
||||
1 << 8
|
||||
) + six.indexbytes(v, start + cls._TR_VERSION_LEN + 1)
|
||||
return Versionstamp(tr_version, user_version)
|
||||
|
||||
def is_complete(self):
|
||||
return self.tr_version is not None
|
||||
|
||||
def __repr__(self):
|
||||
return "fdb.tuple.Versionstamp(" + repr(self.tr_version) + ", " + repr(self.user_version) + ")"
|
||||
return (
|
||||
"fdb.tuple.Versionstamp("
|
||||
+ repr(self.tr_version)
|
||||
+ ", "
|
||||
+ repr(self.user_version)
|
||||
+ ")"
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return "Versionstamp(" + repr(self.tr_version) + ", " + str(self.user_version) + ")"
|
||||
return (
|
||||
"Versionstamp("
|
||||
+ repr(self.tr_version)
|
||||
+ ", "
|
||||
+ str(self.user_version)
|
||||
+ ")"
|
||||
)
|
||||
|
||||
def to_bytes(self):
|
||||
tr_version = self.tr_version
|
||||
if isinstance(tr_version, fdb.impl.Value):
|
||||
tr_version = tr_version.value
|
||||
return struct.pack(self._STRUCT_FORMAT_STRING,
|
||||
tr_version if self.is_complete() else self._UNSET_TR_VERSION,
|
||||
self.user_version)
|
||||
return struct.pack(
|
||||
self._STRUCT_FORMAT_STRING,
|
||||
tr_version if self.is_complete() else self._UNSET_TR_VERSION,
|
||||
self.user_version,
|
||||
)
|
||||
|
||||
def completed(self, new_tr_version):
|
||||
if self.is_complete():
|
||||
|
@ -187,7 +227,10 @@ class Versionstamp(object):
|
|||
# Comparisons
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Versionstamp):
|
||||
return self.tr_version == other.tr_version and self.user_version == other.user_version
|
||||
return (
|
||||
self.tr_version == other.tr_version
|
||||
and self.user_version == other.user_version
|
||||
)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
@ -224,18 +267,22 @@ def _decode(v, pos):
|
|||
return None, pos + 1
|
||||
elif code == BYTES_CODE:
|
||||
end = _find_terminator(v, pos + 1)
|
||||
return v[pos + 1:end].replace(b"\x00\xFF", b"\x00"), end + 1
|
||||
return v[pos + 1 : end].replace(b"\x00\xFF", b"\x00"), end + 1
|
||||
elif code == STRING_CODE:
|
||||
end = _find_terminator(v, pos + 1)
|
||||
return v[pos + 1:end].replace(b"\x00\xFF", b"\x00").decode("utf-8"), end + 1
|
||||
return v[pos + 1 : end].replace(b"\x00\xFF", b"\x00").decode("utf-8"), end + 1
|
||||
elif code >= INT_ZERO_CODE and code < POS_INT_END:
|
||||
n = code - 20
|
||||
end = pos + 1 + n
|
||||
return struct.unpack(">Q", b'\x00' * (8 - n) + v[pos + 1:end])[0], end
|
||||
return struct.unpack(">Q", b"\x00" * (8 - n) + v[pos + 1 : end])[0], end
|
||||
elif code > NEG_INT_START and code < INT_ZERO_CODE:
|
||||
n = 20 - code
|
||||
end = pos + 1 + n
|
||||
return struct.unpack(">Q", b'\x00' * (8 - n) + v[pos + 1:end])[0] - _size_limits[n], end
|
||||
return (
|
||||
struct.unpack(">Q", b"\x00" * (8 - n) + v[pos + 1 : end])[0]
|
||||
- _size_limits[n],
|
||||
end,
|
||||
)
|
||||
elif code == POS_INT_END: # 0x1d; Positive 9-255 byte integer
|
||||
length = six.indexbytes(v, pos + 1)
|
||||
val = 0
|
||||
|
@ -244,25 +291,37 @@ def _decode(v, pos):
|
|||
val += six.indexbytes(v, pos + 2 + i)
|
||||
return val, pos + 2 + length
|
||||
elif code == NEG_INT_START: # 0x0b; Negative 9-255 byte integer
|
||||
length = six.indexbytes(v, pos + 1) ^ 0xff
|
||||
length = six.indexbytes(v, pos + 1) ^ 0xFF
|
||||
val = 0
|
||||
for i in _range(length):
|
||||
val = val << 8
|
||||
val += six.indexbytes(v, pos + 2 + i)
|
||||
return val - (1 << (length * 8)) + 1, pos + 2 + length
|
||||
elif code == FLOAT_CODE:
|
||||
return SingleFloat(struct.unpack(">f", _float_adjust(v[pos + 1:pos + 5], False))[0]), pos + 5
|
||||
return (
|
||||
SingleFloat(
|
||||
struct.unpack(">f", _float_adjust(v[pos + 1 : pos + 5], False))[0]
|
||||
),
|
||||
pos + 5,
|
||||
)
|
||||
elif code == DOUBLE_CODE:
|
||||
return struct.unpack(">d", _float_adjust(v[pos + 1:pos + 9], False))[0], pos + 9
|
||||
return (
|
||||
struct.unpack(">d", _float_adjust(v[pos + 1 : pos + 9], False))[0],
|
||||
pos + 9,
|
||||
)
|
||||
elif code == UUID_CODE:
|
||||
return uuid.UUID(bytes=v[pos + 1:pos + 17]), pos + 17
|
||||
return uuid.UUID(bytes=v[pos + 1 : pos + 17]), pos + 17
|
||||
elif code == FALSE_CODE:
|
||||
if fdb.is_api_version_selected() and fdb.get_api_version() < 500:
|
||||
raise ValueError("Invalid API version " + str(fdb._version) + " for boolean types")
|
||||
raise ValueError(
|
||||
"Invalid API version " + str(fdb._version) + " for boolean types"
|
||||
)
|
||||
return False, pos + 1
|
||||
elif code == TRUE_CODE:
|
||||
if fdb.is_api_version_selected() and fdb.get_api_version() < 500:
|
||||
raise ValueError("Invalid API version " + str(fdb._version) + " for boolean types")
|
||||
raise ValueError(
|
||||
"Invalid API version " + str(fdb._version) + " for boolean types"
|
||||
)
|
||||
return True, pos + 1
|
||||
elif code == VERSIONSTAMP_CODE:
|
||||
return Versionstamp.from_bytes(v, pos + 1), pos + 1 + Versionstamp.LENGTH
|
||||
|
@ -271,7 +330,7 @@ def _decode(v, pos):
|
|||
end_pos = pos + 1
|
||||
while end_pos < len(v):
|
||||
if six.indexbytes(v, end_pos) == 0x00:
|
||||
if end_pos + 1 < len(v) and six.indexbytes(v, end_pos + 1) == 0xff:
|
||||
if end_pos + 1 < len(v) and six.indexbytes(v, end_pos + 1) == 0xFF:
|
||||
ret.append(None)
|
||||
end_pos += 2
|
||||
else:
|
||||
|
@ -299,11 +358,15 @@ def _reduce_children(child_values):
|
|||
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
|
||||
def _bit_length(x):
|
||||
s = bin(x) # binary representation: bin(-37) --> '-0b100101'
|
||||
s = s.lstrip('-0b') # remove leading zeros and minus sign
|
||||
s = bin(x) # binary representation: bin(-37) --> '-0b100101'
|
||||
s = s.lstrip("-0b") # remove leading zeros and minus sign
|
||||
return len(s)
|
||||
|
||||
|
||||
else:
|
||||
|
||||
def _bit_length(x):
|
||||
return x.bit_length()
|
||||
|
||||
|
@ -314,23 +377,33 @@ def _encode(value, nested=False):
|
|||
# sorting need to work too!
|
||||
if value == None: # ==, not is, because some fdb.impl.Value are equal to None
|
||||
if nested:
|
||||
return b''.join([six.int2byte(NULL_CODE), six.int2byte(0xff)]), -1
|
||||
return b"".join([six.int2byte(NULL_CODE), six.int2byte(0xFF)]), -1
|
||||
else:
|
||||
return b''.join([six.int2byte(NULL_CODE)]), -1
|
||||
return b"".join([six.int2byte(NULL_CODE)]), -1
|
||||
elif isinstance(value, bytes): # also gets non-None fdb.impl.Value
|
||||
return six.int2byte(BYTES_CODE) + value.replace(b'\x00', b'\x00\xFF') + b'\x00', -1
|
||||
return (
|
||||
six.int2byte(BYTES_CODE) + value.replace(b"\x00", b"\x00\xFF") + b"\x00",
|
||||
-1,
|
||||
)
|
||||
elif isinstance(value, six.text_type):
|
||||
return six.int2byte(STRING_CODE) + value.encode('utf-8').replace(b'\x00', b'\x00\xFF') + b'\x00', -1
|
||||
elif isinstance(value, six.integer_types) and (not isinstance(value, bool) or (hasattr(fdb, '_version') and fdb._version < 500)):
|
||||
return (
|
||||
six.int2byte(STRING_CODE)
|
||||
+ value.encode("utf-8").replace(b"\x00", b"\x00\xFF")
|
||||
+ b"\x00",
|
||||
-1,
|
||||
)
|
||||
elif isinstance(value, six.integer_types) and (
|
||||
not isinstance(value, bool) or (hasattr(fdb, "_version") and fdb._version < 500)
|
||||
):
|
||||
if value == 0:
|
||||
return b''.join([six.int2byte(INT_ZERO_CODE)]), -1
|
||||
return b"".join([six.int2byte(INT_ZERO_CODE)]), -1
|
||||
elif value > 0:
|
||||
if value >= _size_limits[-1]:
|
||||
length = (_bit_length(value) + 7) // 8
|
||||
data = [six.int2byte(POS_INT_END), six.int2byte(length)]
|
||||
for i in _range(length - 1, -1, -1):
|
||||
data.append(six.int2byte((value >> (8 * i)) & 0xff))
|
||||
return b''.join(data), -1
|
||||
data.append(six.int2byte((value >> (8 * i)) & 0xFF))
|
||||
return b"".join(data), -1
|
||||
|
||||
n = bisect_left(_size_limits, value)
|
||||
return six.int2byte(INT_ZERO_CODE + n) + struct.pack(">Q", value)[-n:], -1
|
||||
|
@ -338,34 +411,53 @@ def _encode(value, nested=False):
|
|||
if -value >= _size_limits[-1]:
|
||||
length = (_bit_length(value) + 7) // 8
|
||||
value += (1 << (length * 8)) - 1
|
||||
data = [six.int2byte(NEG_INT_START), six.int2byte(length ^ 0xff)]
|
||||
data = [six.int2byte(NEG_INT_START), six.int2byte(length ^ 0xFF)]
|
||||
for i in _range(length - 1, -1, -1):
|
||||
data.append(six.int2byte((value >> (8 * i)) & 0xff))
|
||||
return b''.join(data), -1
|
||||
data.append(six.int2byte((value >> (8 * i)) & 0xFF))
|
||||
return b"".join(data), -1
|
||||
|
||||
n = bisect_left(_size_limits, -value)
|
||||
maxv = _size_limits[n]
|
||||
return six.int2byte(INT_ZERO_CODE - n) + struct.pack(">Q", maxv + value)[-n:], -1
|
||||
return (
|
||||
six.int2byte(INT_ZERO_CODE - n) + struct.pack(">Q", maxv + value)[-n:],
|
||||
-1,
|
||||
)
|
||||
elif isinstance(value, ctypes.c_float) or isinstance(value, SingleFloat):
|
||||
return six.int2byte(FLOAT_CODE) + _float_adjust(struct.pack(">f", value.value), True), -1
|
||||
return (
|
||||
six.int2byte(FLOAT_CODE)
|
||||
+ _float_adjust(struct.pack(">f", value.value), True),
|
||||
-1,
|
||||
)
|
||||
elif isinstance(value, ctypes.c_double):
|
||||
return six.int2byte(DOUBLE_CODE) + _float_adjust(struct.pack(">d", value.value), True), -1
|
||||
return (
|
||||
six.int2byte(DOUBLE_CODE)
|
||||
+ _float_adjust(struct.pack(">d", value.value), True),
|
||||
-1,
|
||||
)
|
||||
elif isinstance(value, float):
|
||||
return six.int2byte(DOUBLE_CODE) + _float_adjust(struct.pack(">d", value), True), -1
|
||||
return (
|
||||
six.int2byte(DOUBLE_CODE) + _float_adjust(struct.pack(">d", value), True),
|
||||
-1,
|
||||
)
|
||||
elif isinstance(value, uuid.UUID):
|
||||
return six.int2byte(UUID_CODE) + value.bytes, -1
|
||||
elif isinstance(value, bool):
|
||||
if value:
|
||||
return b''.join([six.int2byte(TRUE_CODE)]), -1
|
||||
return b"".join([six.int2byte(TRUE_CODE)]), -1
|
||||
else:
|
||||
return b''.join([six.int2byte(FALSE_CODE)]), -1
|
||||
return b"".join([six.int2byte(FALSE_CODE)]), -1
|
||||
elif isinstance(value, Versionstamp):
|
||||
version_pos = -1 if value.is_complete() else 1
|
||||
return six.int2byte(VERSIONSTAMP_CODE) + value.to_bytes(), version_pos
|
||||
elif isinstance(value, tuple) or isinstance(value, list):
|
||||
child_bytes, version_pos = _reduce_children(map(lambda x: _encode(x, True), value))
|
||||
child_bytes, version_pos = _reduce_children(
|
||||
map(lambda x: _encode(x, True), value)
|
||||
)
|
||||
new_version_pos = -1 if version_pos < 0 else version_pos + 1
|
||||
return b''.join([six.int2byte(NESTED_CODE)] + child_bytes + [six.int2byte(0x00)]), new_version_pos
|
||||
return (
|
||||
b"".join([six.int2byte(NESTED_CODE)] + child_bytes + [six.int2byte(0x00)]),
|
||||
new_version_pos,
|
||||
)
|
||||
else:
|
||||
raise ValueError("Unsupported data type: " + str(type(value)))
|
||||
|
||||
|
@ -387,13 +479,13 @@ def _pack_maybe_with_versionstamp(t, prefix=None):
|
|||
version_pos += len(prefix) if prefix is not None else 0
|
||||
bytes_list.extend(child_bytes)
|
||||
if fdb.is_api_version_selected() and fdb.get_api_version() < 520:
|
||||
bytes_list.append(struct.pack('<H', version_pos))
|
||||
bytes_list.append(struct.pack("<H", version_pos))
|
||||
else:
|
||||
bytes_list.append(struct.pack('<L', version_pos))
|
||||
bytes_list.append(struct.pack("<L", version_pos))
|
||||
else:
|
||||
bytes_list.extend(child_bytes)
|
||||
|
||||
return b''.join(bytes_list), version_pos
|
||||
return b"".join(bytes_list), version_pos
|
||||
|
||||
|
||||
# packs the specified tuple into a key
|
||||
|
@ -408,7 +500,9 @@ def pack(t, prefix=None):
|
|||
def pack_with_versionstamp(t, prefix=None):
|
||||
res, version_pos = _pack_maybe_with_versionstamp(t, prefix)
|
||||
if version_pos < 0:
|
||||
raise ValueError("No incomplete versionstamp included in tuple pack with versionstamp")
|
||||
raise ValueError(
|
||||
"No incomplete versionstamp included in tuple pack with versionstamp"
|
||||
)
|
||||
return res
|
||||
|
||||
|
||||
|
@ -433,6 +527,7 @@ def has_incomplete_versionstamp(t):
|
|||
return has_incomplete_versionstamp(item)
|
||||
else:
|
||||
return False
|
||||
|
||||
return any(map(_elem_has_incomplete, t))
|
||||
|
||||
|
||||
|
@ -450,9 +545,7 @@ def range(t):
|
|||
raise Exception("fdbtuple range() expects a tuple, got a " + str(type(t)))
|
||||
|
||||
p = pack(t)
|
||||
return slice(
|
||||
p + b'\x00',
|
||||
p + b'\xff')
|
||||
return slice(p + b"\x00", p + b"\xff")
|
||||
|
||||
|
||||
def _code_for(value):
|
||||
|
@ -462,7 +555,9 @@ def _code_for(value):
|
|||
return BYTES_CODE
|
||||
elif isinstance(value, six.text_type):
|
||||
return STRING_CODE
|
||||
elif (not hasattr(fdb, '_version') or fdb._version >= 500) and isinstance(value, bool):
|
||||
elif (not hasattr(fdb, "_version") or fdb._version >= 500) and isinstance(
|
||||
value, bool
|
||||
):
|
||||
return FALSE_CODE
|
||||
elif isinstance(value, six.integer_types):
|
||||
return INT_ZERO_CODE
|
||||
|
@ -514,8 +609,8 @@ def _compare_values(value1, value2):
|
|||
if code1 == NULL_CODE:
|
||||
return 0
|
||||
elif code1 == STRING_CODE:
|
||||
encoded1 = value1.encode('utf-8')
|
||||
encoded2 = value2.encode('utf-8')
|
||||
encoded1 = value1.encode("utf-8")
|
||||
encoded2 = value2.encode("utf-8")
|
||||
return -1 if encoded1 < encoded2 else 0 if encoded1 == encoded2 else 1
|
||||
elif code1 == FLOAT_CODE:
|
||||
f1 = value1 if isinstance(value1, SingleFloat) else SingleFloat(value1.value)
|
||||
|
|
|
@ -518,7 +518,7 @@ def test_timeouts(db):
|
|||
for i in range(2):
|
||||
tr.options.set_timeout(1500)
|
||||
tr.set_read_version(0x7ffffffffffffff0)
|
||||
x = tr[b'foo']
|
||||
_ = tr[b'foo']
|
||||
try:
|
||||
tr.commit().wait()
|
||||
tr.reset()
|
||||
|
@ -557,7 +557,7 @@ def test_db_timeouts(db):
|
|||
tr[b'foo'] = b'bar'
|
||||
tr.on_error(err).wait() # should not throw
|
||||
time.sleep(1)
|
||||
tr[b'foo']
|
||||
_ = tr[b'foo']
|
||||
try:
|
||||
tr.commit().wait() # should throw
|
||||
raise TestError("(2) Timeout didn't fire.")
|
||||
|
@ -574,7 +574,7 @@ def test_db_timeouts(db):
|
|||
time.sleep(0.75)
|
||||
tr[b'foo'] = b'bar'
|
||||
tr.on_error(err).wait() # should not throw
|
||||
tr[b'foo']
|
||||
_ = tr[b'foo']
|
||||
time.sleep(0.75)
|
||||
try:
|
||||
tr.commit().wait() # should throw
|
||||
|
@ -615,7 +615,7 @@ def test_db_timeouts(db):
|
|||
tr.reset()
|
||||
tr[b'foo'] = b'bar'
|
||||
time.sleep(0.2)
|
||||
tr.on_error(err).wait() #should not throw
|
||||
tr.on_error(err).wait() # should not throw
|
||||
tr[b'foo'] = b'bar'
|
||||
time.sleep(0.8)
|
||||
try:
|
||||
|
|
|
@ -24,15 +24,18 @@ import sys
|
|||
if __name__ == '__main__':
|
||||
fdb.api_version(720)
|
||||
|
||||
|
||||
@fdb.transactional
|
||||
def setValue(tr, key, value):
|
||||
tr[key] = value
|
||||
|
||||
|
||||
@fdb.transactional
|
||||
def setValueWithLimit(tr, key, value, limit):
|
||||
tr.options.set_size_limit(limit)
|
||||
tr[key] = value
|
||||
|
||||
|
||||
def test_size_limit_option(db):
|
||||
value = b'a' * 1024
|
||||
|
||||
|
@ -69,6 +72,7 @@ def test_size_limit_option(db):
|
|||
# Reset the size limit for future tests
|
||||
db.options.set_transaction_size_limit(10000000)
|
||||
|
||||
|
||||
@fdb.transactional
|
||||
def test_get_approximate_size(tr):
|
||||
tr[b'key1'] = b'value1'
|
||||
|
@ -90,6 +94,7 @@ def test_get_approximate_size(tr):
|
|||
s5 = tr.get_approximate_size().wait()
|
||||
assert(s4 < s5)
|
||||
|
||||
|
||||
# Expect a cluster file as input. This test will write to the FDB cluster, so
|
||||
# be aware of potential side effects.
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -27,24 +27,26 @@ from fdb.tuple import pack
|
|||
if __name__ == '__main__':
|
||||
fdb.api_version(720)
|
||||
|
||||
|
||||
def cleanup_tenant(db, tenant_name):
|
||||
try:
|
||||
tenant = db.open_tenant(tenant_name)
|
||||
del tenant[:]
|
||||
fdb.tenant_management.delete_tenant(db, tenant_name)
|
||||
except fdb.FDBError as e:
|
||||
if e.code == 2131: # tenant not found
|
||||
if e.code == 2131: # tenant not found
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def test_tenant_tuple_name(db):
|
||||
tuplename=(b'test', b'level', b'hierarchy', 3, 1.24, 'str')
|
||||
tuplename = (b'test', b'level', b'hierarchy', 3, 1.24, 'str')
|
||||
cleanup_tenant(db, tuplename)
|
||||
|
||||
fdb.tenant_management.create_tenant(db, tuplename)
|
||||
|
||||
tenant=db.open_tenant(tuplename)
|
||||
tenant = db.open_tenant(tuplename)
|
||||
tenant[b'foo'] = b'bar'
|
||||
|
||||
assert tenant[b'foo'] == b'bar'
|
||||
|
@ -100,7 +102,7 @@ def test_tenant_operations(db):
|
|||
del tr1[:]
|
||||
tr1.commit().wait()
|
||||
except fdb.FDBError as e:
|
||||
tr.on_error(e).wait()
|
||||
tr1.on_error(e).wait()
|
||||
|
||||
assert tenant1[b'tenant_test_key'] == None
|
||||
assert db[prefix1 + b'tenant_test_key'] == None
|
||||
|
@ -113,7 +115,7 @@ def test_tenant_operations(db):
|
|||
tenant1[b'tenant_test_key']
|
||||
assert False
|
||||
except fdb.FDBError as e:
|
||||
assert e.code == 2131 # tenant not found
|
||||
assert e.code == 2131 # tenant not found
|
||||
|
||||
del tenant2[:]
|
||||
fdb.tenant_management.delete_tenant(db, b'tenant2')
|
||||
|
@ -126,6 +128,7 @@ def test_tenant_operations(db):
|
|||
|
||||
assert db[b'tenant_test_key'] == None
|
||||
|
||||
|
||||
def test_tenant_operation_retries(db):
|
||||
cleanup_tenant(db, b'tenant1')
|
||||
cleanup_tenant(db, b'tenant2')
|
||||
|
@ -138,7 +141,7 @@ def test_tenant_operation_retries(db):
|
|||
fdb.tenant_management.create_tenant(db, b'tenant1')
|
||||
assert False
|
||||
except fdb.FDBError as e:
|
||||
assert e.code == 2132 # tenant already exists
|
||||
assert e.code == 2132 # tenant already exists
|
||||
|
||||
# Using a transaction skips the existence check
|
||||
tr = db.create_transaction()
|
||||
|
@ -166,7 +169,7 @@ def test_tenant_operation_retries(db):
|
|||
fdb.tenant_management.delete_tenant(db, b'tenant1')
|
||||
assert False
|
||||
except fdb.FDBError as e:
|
||||
assert e.code == 2131 # tenant not found
|
||||
assert e.code == 2131 # tenant not found
|
||||
|
||||
# Using a transaction skips the existence check
|
||||
tr = db.create_transaction()
|
||||
|
@ -186,11 +189,13 @@ def test_tenant_operation_retries(db):
|
|||
except fdb.FDBError as e:
|
||||
tr.on_error(e).wait()
|
||||
|
||||
|
||||
def test_tenants(db):
|
||||
test_tenant_tuple_name(db)
|
||||
test_tenant_operations(db)
|
||||
test_tenant_operation_retries(db)
|
||||
|
||||
|
||||
# Expect a cluster file as input. This test will write to the FDB cluster, so
|
||||
# be aware of potential side effects.
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -26,7 +26,6 @@ import sys
|
|||
import os
|
||||
import struct
|
||||
import threading
|
||||
import time
|
||||
import random
|
||||
import time
|
||||
import traceback
|
||||
|
@ -136,7 +135,7 @@ def test_fdb_transactional_generator(db):
|
|||
def function_that_yields(tr):
|
||||
yield 0
|
||||
assert fdb.get_api_version() < 630, "Pre-6.3, a decorator may wrap a function that yields"
|
||||
except ValueError as e:
|
||||
except ValueError:
|
||||
assert fdb.get_api_version() >= 630, "Post-6.3, a decorator should throw if wrapped function yields"
|
||||
|
||||
|
||||
|
@ -144,12 +143,13 @@ def test_fdb_transactional_returns_generator(db):
|
|||
try:
|
||||
def function_that_yields(tr):
|
||||
yield 0
|
||||
|
||||
@fdb.transactional
|
||||
def function_that_returns(tr):
|
||||
return function_that_yields(tr)
|
||||
function_that_returns()
|
||||
assert fdb.get_api_version() < 630, "Pre-6.3, returning a generator is allowed"
|
||||
except ValueError as e:
|
||||
except ValueError:
|
||||
assert fdb.get_api_version() >= 630, "Post-6.3, returning a generator should throw"
|
||||
|
||||
|
||||
|
@ -400,11 +400,11 @@ class Tester:
|
|||
inst.push(f)
|
||||
elif inst.op == six.u("GET_ESTIMATED_RANGE_SIZE"):
|
||||
begin, end = inst.pop(2)
|
||||
estimatedSize = obj.get_estimated_range_size_bytes(begin, end).wait()
|
||||
obj.get_estimated_range_size_bytes(begin, end).wait()
|
||||
inst.push(b"GOT_ESTIMATED_RANGE_SIZE")
|
||||
elif inst.op == six.u("GET_RANGE_SPLIT_POINTS"):
|
||||
begin, end, chunkSize = inst.pop(3)
|
||||
estimatedSize = obj.get_range_split_points(begin, end, chunkSize).wait()
|
||||
obj.get_range_split_points(begin, end, chunkSize).wait()
|
||||
inst.push(b"GOT_RANGE_SPLIT_POINTS")
|
||||
elif inst.op == six.u("GET_KEY"):
|
||||
key, or_equal, offset, prefix = inst.pop(4)
|
||||
|
@ -522,7 +522,7 @@ class Tester:
|
|||
self.last_version = inst.tr.get_committed_version()
|
||||
inst.push(b"GOT_COMMITTED_VERSION")
|
||||
elif inst.op == six.u("GET_APPROXIMATE_SIZE"):
|
||||
approximate_size = inst.tr.get_approximate_size().wait()
|
||||
inst.tr.get_approximate_size().wait()
|
||||
inst.push(b"GOT_APPROXIMATE_SIZE")
|
||||
elif inst.op == six.u("GET_VERSIONSTAMP"):
|
||||
inst.push(inst.tr.get_versionstamp())
|
||||
|
@ -613,9 +613,9 @@ class Tester:
|
|||
result += [tenant.key]
|
||||
try:
|
||||
metadata = json.loads(tenant.value)
|
||||
id = metadata["id"]
|
||||
prefix = metadata["prefix"]
|
||||
except (json.decoder.JSONDecodeError, KeyError) as e:
|
||||
_ = metadata["id"]
|
||||
_ = metadata["prefix"]
|
||||
except (json.decoder.JSONDecodeError, KeyError):
|
||||
assert False, "Invalid Tenant Metadata"
|
||||
inst.push(fdb.tuple.pack(tuple(result)))
|
||||
elif inst.op == six.u("UNIT_TESTS"):
|
||||
|
|
|
@ -173,7 +173,7 @@ def tupleTest(N=10000):
|
|||
print("Prefix not before prefixed:\n Tuple: %s\n Bytes: %s\n Other: %s\n Bytes: %s" % (t, repr(pack(t)), t2, repr(pack(t2))))
|
||||
return False
|
||||
|
||||
print ("Tuple check %d OK" % N)
|
||||
print("Tuple check %d OK" % N)
|
||||
return True
|
||||
|
||||
# test:
|
||||
|
|
|
@ -622,3 +622,39 @@ function(add_java_test)
|
|||
-Djava.library.path=${CMAKE_BINARY_DIR}/lib
|
||||
${T_CLASS} "@CLUSTER_FILE@")
|
||||
endfunction()
|
||||
|
||||
# Adds a FDB test implemented by a script that does the full setup, such as creating cluster
|
||||
# and running client binaries as necessary
|
||||
function(add_scripted_fdb_test)
|
||||
set(options DISABLED ENABLED)
|
||||
set(oneValueArgs NAME TEST_TIMEOUT)
|
||||
set(multiValueArgs COMMAND)
|
||||
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
|
||||
if(OPEN_FOR_IDE)
|
||||
return()
|
||||
endif()
|
||||
if(NOT T_ENABLED AND T_DISABLED)
|
||||
return()
|
||||
endif()
|
||||
if(NOT T_NAME)
|
||||
message(FATAL_ERROR "NAME is a required argument for add_scripted_fdb_test")
|
||||
endif()
|
||||
if(NOT T_COMMAND)
|
||||
message(FATAL_ERROR "COMMAND is a required argument for add_scripted_fdb_test")
|
||||
endif()
|
||||
message(STATUS "Adding Scripted FDB test ${T_NAME}")
|
||||
add_test(NAME "${T_NAME}"
|
||||
COMMAND ${T_COMMAND})
|
||||
set_tests_properties("${T_NAME}" PROPERTIES ENVIRONMENT
|
||||
"${SANITIZER_OPTIONS};PYTHONPATH=${CMAKE_SOURCE_DIR}/tests/TestRunner:${CMAKE_BINARY_DIR}/tests/TestRunner")
|
||||
if (T_TEST_TIMEOUT)
|
||||
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT ${T_TEST_TIMEOUT})
|
||||
else()
|
||||
# default timeout
|
||||
if(USE_SANITIZER)
|
||||
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 1200)
|
||||
else()
|
||||
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 300)
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# FindRocksDB
|
||||
|
||||
find_package(RocksDB 6.27.3)
|
||||
find_package(RocksDB 7.7.3)
|
||||
|
||||
include(ExternalProject)
|
||||
|
||||
|
@ -49,8 +49,8 @@ if(ROCKSDB_FOUND)
|
|||
${BINARY_DIR}/librocksdb.a)
|
||||
else()
|
||||
ExternalProject_Add(rocksdb
|
||||
URL https://github.com/facebook/rocksdb/archive/refs/tags/v6.27.3.tar.gz
|
||||
URL_HASH SHA256=ee29901749b9132692b26f0a6c1d693f47d1a9ed8e3771e60556afe80282bf58
|
||||
URL https://github.com/facebook/rocksdb/archive/refs/tags/v7.7.3.tar.gz
|
||||
URL_HASH SHA256=b8ac9784a342b2e314c821f6d701148912215666ac5e9bdbccd93cf3767cb611
|
||||
CMAKE_ARGS ${RocksDB_CMAKE_ARGS}
|
||||
BUILD_BYPRODUCTS <BINARY_DIR>/librocksdb.a
|
||||
INSTALL_COMMAND ""
|
||||
|
|
|
@ -42,6 +42,7 @@ parser.add_argument('--no-graph', action='store_true', default=False, help='Disa
|
|||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def print_choices_list(context=None):
|
||||
if context == 'workload' or context is None:
|
||||
print('Workloads:')
|
||||
|
@ -70,6 +71,7 @@ def print_choices_list(context=None):
|
|||
name = name[0:-len('Limiter')]
|
||||
print(' %s' % name)
|
||||
|
||||
|
||||
if args.workload is None or args.ratekeeper is None:
|
||||
print('ERROR: A workload (-w/--workload) and ratekeeper model (-r/--ratekeeper) must be specified.\n')
|
||||
print_choices_list()
|
||||
|
@ -79,16 +81,18 @@ if args.list:
|
|||
print_choices_list()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def validate_class_type(var, name, superclass):
|
||||
cls = getattr(var, name, None)
|
||||
return cls is not None and inspect.isclass(cls) and issubclass(cls, superclass)
|
||||
|
||||
if not args.ratekeeper in ratekeeper_model.predefined_ratekeeper:
|
||||
|
||||
if args.ratekeeper not in ratekeeper_model.predefined_ratekeeper:
|
||||
print('Invalid ratekeeper model `%s\'' % args.ratekeeper)
|
||||
print_choices_list('ratekeeper')
|
||||
sys.exit(1)
|
||||
|
||||
if not args.workload in workload_model.predefined_workloads:
|
||||
if args.workload not in workload_model.predefined_workloads:
|
||||
print('Invalid workload model `%s\'' % args.workload)
|
||||
print_choices_list('workload')
|
||||
sys.exit(1)
|
||||
|
@ -120,11 +124,11 @@ for priority in workload.priorities():
|
|||
still_queued = sum([r.count for r in proxy.request_queue if r.priority == priority])
|
||||
|
||||
if len(latencies) > 0:
|
||||
print('\n%s: %d requests in %d seconds (rate=%f). %d still queued.' % (priority, total_started, proxy.time, float(total_started)/proxy.time, still_queued))
|
||||
print(' Median latency: %f' % latencies[len(latencies)//2])
|
||||
print(' 90%% latency: %f' % latencies[int(0.9*len(latencies))])
|
||||
print(' 99%% latency: %f' % latencies[int(0.99*len(latencies))])
|
||||
print(' 99.9%% latency: %f' % latencies[int(0.999*len(latencies))])
|
||||
print('\n%s: %d requests in %d seconds (rate=%f). %d still queued.' % (priority, total_started, proxy.time, float(total_started) / proxy.time, still_queued))
|
||||
print(' Median latency: %f' % latencies[len(latencies) // 2])
|
||||
print(' 90%% latency: %f' % latencies[int(0.9 * len(latencies))])
|
||||
print(' 99%% latency: %f' % latencies[int(0.99 * len(latencies))])
|
||||
print(' 99.9%% latency: %f' % latencies[int(0.999 * len(latencies))])
|
||||
print(' Max latency: %f' % latencies[-1])
|
||||
|
||||
print('')
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
class Plotter:
|
||||
def __init__(self, results):
|
||||
self.results = results
|
||||
|
@ -28,13 +29,13 @@ class Plotter:
|
|||
out_data = {}
|
||||
counts = {}
|
||||
for t in data.keys():
|
||||
out_data.setdefault(t//time_resolution*time_resolution, 0)
|
||||
counts.setdefault(t//time_resolution*time_resolution, 0)
|
||||
out_data[t//time_resolution*time_resolution] += data[t]
|
||||
counts[t//time_resolution*time_resolution] += 1
|
||||
out_data.setdefault(t // time_resolution * time_resolution, 0)
|
||||
counts.setdefault(t // time_resolution * time_resolution, 0)
|
||||
out_data[t // time_resolution * time_resolution] += data[t]
|
||||
counts[t // time_resolution * time_resolution] += 1
|
||||
|
||||
if use_avg:
|
||||
out_data = { t: v/counts[t] for t,v in out_data.items() }
|
||||
out_data = {t: v / counts[t] for t, v in out_data.items()}
|
||||
|
||||
plt.plot(list(out_data.keys()), list(out_data.values()), label=label)
|
||||
|
||||
|
@ -42,7 +43,7 @@ class Plotter:
|
|||
plt.plot(list(data.keys()), list(data.values()), label=label)
|
||||
|
||||
def display(self, time_resolution=0.1):
|
||||
plt.figure(figsize=(40,9))
|
||||
plt.figure(figsize=(40, 9))
|
||||
plt.subplot(3, 3, 1)
|
||||
for priority in self.results.started.keys():
|
||||
Plotter.add_plot(self.results.started[priority], time_resolution, priority)
|
||||
|
@ -61,7 +62,7 @@ class Plotter:
|
|||
|
||||
plt.subplot(3, 3, 3)
|
||||
for priority in self.results.unprocessed_queue_sizes.keys():
|
||||
data = {k: max(v) for (k,v) in self.results.unprocessed_queue_sizes[priority].items()}
|
||||
data = {k: max(v) for (k, v) in self.results.unprocessed_queue_sizes[priority].items()}
|
||||
Plotter.add_plot(data, time_resolution, priority)
|
||||
|
||||
plt.xlabel('Time (s)')
|
||||
|
@ -71,9 +72,11 @@ class Plotter:
|
|||
num = 4
|
||||
for priority in self.results.latencies.keys():
|
||||
plt.subplot(3, 3, num)
|
||||
median_latencies = {k: v[int(0.5*len(v))] if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()}
|
||||
percentile90_latencies = {k: v[int(0.9*len(v))] if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()}
|
||||
max_latencies = {k: max(v) if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()}
|
||||
median_latencies = {k: v[int(0.5 * len(v))] if len(v) > 0 else 0 for (k, v) in
|
||||
self.results.latencies[priority].items()}
|
||||
percentile90_latencies = {k: v[int(0.9 * len(v))] if len(v) > 0 else 0 for (k, v) in
|
||||
self.results.latencies[priority].items()}
|
||||
max_latencies = {k: max(v) if len(v) > 0 else 0 for (k, v) in self.results.latencies[priority].items()}
|
||||
|
||||
Plotter.add_plot(median_latencies, time_resolution, 'median')
|
||||
Plotter.add_plot(percentile90_latencies, time_resolution, '90th percentile')
|
||||
|
@ -94,7 +97,8 @@ class Plotter:
|
|||
if len(self.results.limit[priority]) > 0:
|
||||
Plotter.add_plot(self.results.limit[priority], time_resolution, 'Limit', use_avg=True)
|
||||
if len(self.results.limit_and_budget[priority]) > 0:
|
||||
Plotter.add_plot(self.results.limit_and_budget[priority], time_resolution, 'Limit and budget', use_avg=True)
|
||||
Plotter.add_plot(self.results.limit_and_budget[priority], time_resolution, 'Limit and budget',
|
||||
use_avg=True)
|
||||
if len(self.results.budget[priority]) > 0:
|
||||
Plotter.add_plot(self.results.budget[priority], time_resolution, 'Budget', use_avg=True)
|
||||
|
||||
|
@ -104,4 +108,3 @@ class Plotter:
|
|||
num += 1
|
||||
|
||||
plt.show()
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
import functools
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
class Priority:
|
||||
def __init__(self, priority_value, label):
|
||||
|
@ -35,6 +36,7 @@ class Priority:
|
|||
def __repr__(self):
|
||||
return repr(self.label)
|
||||
|
||||
|
||||
Priority.SYSTEM = Priority(0, "System")
|
||||
Priority.DEFAULT = Priority(1, "Default")
|
||||
Priority.BATCH = Priority(2, "Batch")
|
||||
|
|
|
@ -25,6 +25,7 @@ import heapq
|
|||
from priority import Priority
|
||||
from smoother import Smoother
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
class Task:
|
||||
def __init__(self, time, fxn):
|
||||
|
@ -34,6 +35,7 @@ class Task:
|
|||
def __lt__(self, other):
|
||||
return self.time < other.time
|
||||
|
||||
|
||||
class Limiter:
|
||||
class UpdateRateParams:
|
||||
def __init__(self, time):
|
||||
|
@ -79,6 +81,7 @@ class Limiter:
|
|||
def update_budget(self, params):
|
||||
pass
|
||||
|
||||
|
||||
class OriginalLimiter(Limiter):
|
||||
def __init__(self, priority, limit_rate_model, proxy_model):
|
||||
Limiter.__init__(self, priority, limit_rate_model, proxy_model)
|
||||
|
@ -100,6 +103,7 @@ class OriginalLimiter(Limiter):
|
|||
def update_budget(self, params):
|
||||
self.limit -= params.num_started
|
||||
|
||||
|
||||
class PositiveBudgetLimiter(OriginalLimiter):
|
||||
def __init__(self, priority, limit_rate_model, proxy_model):
|
||||
OriginalLimiter.__init__(self, priority, limit_rate_model, proxy_model)
|
||||
|
@ -108,6 +112,7 @@ class PositiveBudgetLimiter(OriginalLimiter):
|
|||
self.limit += params.elapsed * self.rate
|
||||
self.limit = min(self.limit, 2.0 * self.rate)
|
||||
|
||||
|
||||
class ClampedBudgetLimiter(PositiveBudgetLimiter):
|
||||
def __init__(self, priority, limit_rate_model, proxy_model):
|
||||
PositiveBudgetLimiter.__init__(self, priority, limit_rate_model, proxy_model)
|
||||
|
@ -117,6 +122,7 @@ class ClampedBudgetLimiter(PositiveBudgetLimiter):
|
|||
if self.limit > min_budget:
|
||||
self.limit = max(self.limit - params.num_started, min_budget)
|
||||
|
||||
|
||||
class TimeLimiter(PositiveBudgetLimiter):
|
||||
def __init__(self, priority, limit_rate_model, proxy_model):
|
||||
PositiveBudgetLimiter.__init__(self, priority, limit_rate_model, proxy_model)
|
||||
|
@ -126,15 +132,17 @@ class TimeLimiter(PositiveBudgetLimiter):
|
|||
return params.time >= self.locked_until and PositiveBudgetLimiter.can_start(self, params)
|
||||
|
||||
def update_budget(self, params):
|
||||
#print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))
|
||||
# print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))
|
||||
|
||||
if params.min_priority >= self.priority or params.num_started < self.limit:
|
||||
self.limit -= params.num_started
|
||||
else:
|
||||
self.limit = min(self.limit, max(self.limit - params.num_started, -params.last_batch))
|
||||
self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + (params.num_started - self.limit)/self.rate)
|
||||
self.locked_until = min(params.time + 2.0,
|
||||
max(params.time, self.locked_until) + (params.num_started - self.limit) / self.rate)
|
||||
|
||||
# print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))
|
||||
|
||||
#print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))
|
||||
|
||||
class TimePositiveBudgetLimiter(PositiveBudgetLimiter):
|
||||
def __init__(self, priority, limit_rate_model, proxy_model):
|
||||
|
@ -149,17 +157,18 @@ class TimePositiveBudgetLimiter(PositiveBudgetLimiter):
|
|||
return params.num_started + params.count <= self.limit
|
||||
|
||||
def update_budget(self, params):
|
||||
#if params.num_started > 0:
|
||||
#print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))
|
||||
# if params.num_started > 0:
|
||||
# print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))
|
||||
|
||||
if params.num_started > self.limit:
|
||||
self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + penalty/self.rate)
|
||||
self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + (params.num_started - self.limit) / self.rate)
|
||||
self.limit = 0
|
||||
else:
|
||||
self.limit -= params.num_started
|
||||
|
||||
#if params.num_started > 0:
|
||||
#print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))
|
||||
# if params.num_started > 0:
|
||||
# print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))
|
||||
|
||||
|
||||
class SmoothingLimiter(OriginalLimiter):
|
||||
def __init__(self, priority, limit_rate_model, proxy_model):
|
||||
|
@ -177,7 +186,8 @@ class SmoothingLimiter(OriginalLimiter):
|
|||
self.smooth_rate_limit.set_total(params.time, self.rate)
|
||||
|
||||
def update_limit(self, params):
|
||||
self.limit = 2.0 * (self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))
|
||||
self.limit = 2.0 * (
|
||||
self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))
|
||||
|
||||
def can_start(self, params):
|
||||
return params.num_started + params.count <= self.limit
|
||||
|
@ -185,15 +195,17 @@ class SmoothingLimiter(OriginalLimiter):
|
|||
def update_budget(self, params):
|
||||
self.smooth_released.add_delta(params.time, params.num_started)
|
||||
|
||||
|
||||
class SmoothingBudgetLimiter(SmoothingLimiter):
|
||||
def __init__(self, priority, limit_rate_model, proxy_model):
|
||||
SmoothingLimiter.__init__(self, priority, limit_rate_model, proxy_model)
|
||||
#self.smooth_filled = Smoother(2)
|
||||
# self.smooth_filled = Smoother(2)
|
||||
self.budget = 0
|
||||
|
||||
def update_limit(self, params):
|
||||
release_rate = (self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))
|
||||
#self.smooth_filled.set_total(params.time, 1 if release_rate > 0 else 0)
|
||||
release_rate = (
|
||||
self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))
|
||||
# self.smooth_filled.set_total(params.time, 1 if release_rate > 0 else 0)
|
||||
self.limit = 2.0 * release_rate
|
||||
|
||||
self.proxy_model.results.rate[self.priority][params.time] = self.smooth_rate_limit.smooth_total(params.time)
|
||||
|
@ -202,15 +214,15 @@ class SmoothingBudgetLimiter(SmoothingLimiter):
|
|||
self.proxy_model.results.limit_and_budget[self.priority][params.time] = self.limit + self.budget
|
||||
self.proxy_model.results.budget[self.priority][params.time] = self.budget
|
||||
|
||||
#self.budget = max(0, self.budget + params.elapsed * self.smooth_rate_limit.smooth_total(params.time))
|
||||
# self.budget = max(0, self.budget + params.elapsed * self.smooth_rate_limit.smooth_total(params.time))
|
||||
|
||||
#if self.smooth_filled.smooth_total(params.time) >= 0.1:
|
||||
#self.budget += params.elapsed * self.smooth_rate_limit.smooth_total(params.time)
|
||||
# if self.smooth_filled.smooth_total(params.time) >= 0.1:
|
||||
# self.budget += params.elapsed * self.smooth_rate_limit.smooth_total(params.time)
|
||||
|
||||
#print('Update limit: time=%f, priority=%s, limit=%f, rate=%f, released=%f, budget=%f' % (params.time, self.priority, self.limit, self.smooth_rate_limit.smooth_total(params.time), self.smooth_released.smooth_rate(params.time), self.budget))
|
||||
# print('Update limit: time=%f, priority=%s, limit=%f, rate=%f, released=%f, budget=%f' % (params.time, self.priority, self.limit, self.smooth_rate_limit.smooth_total(params.time), self.smooth_released.smooth_rate(params.time), self.budget))
|
||||
|
||||
def can_start(self, params):
|
||||
return params.num_started + params.count <= self.limit + self.budget #or params.num_started + params.count <= self.budget
|
||||
return params.num_started + params.count <= self.limit + self.budget # or params.num_started + params.count <= self.budget
|
||||
|
||||
def update_budget(self, params):
|
||||
self.budget = max(0, self.budget + (self.limit - params.num_started_at_priority) / 2 * params.elapsed)
|
||||
|
@ -220,6 +232,7 @@ class SmoothingBudgetLimiter(SmoothingLimiter):
|
|||
|
||||
self.smooth_released.add_delta(params.time, params.num_started_at_priority)
|
||||
|
||||
|
||||
class ProxyModel:
|
||||
class Results:
|
||||
def __init__(self, priorities, duration):
|
||||
|
@ -228,11 +241,11 @@ class ProxyModel:
|
|||
self.latencies = self.init_result(priorities, [], duration)
|
||||
self.unprocessed_queue_sizes = self.init_result(priorities, [], duration)
|
||||
|
||||
self.rate = {p:{} for p in priorities}
|
||||
self.released = {p:{} for p in priorities}
|
||||
self.limit = {p:{} for p in priorities}
|
||||
self.limit_and_budget = {p:{} for p in priorities}
|
||||
self.budget = {p:{} for p in priorities}
|
||||
self.rate = {p: {} for p in priorities}
|
||||
self.released = {p: {} for p in priorities}
|
||||
self.limit = {p: {} for p in priorities}
|
||||
self.limit_and_budget = {p: {} for p in priorities}
|
||||
self.budget = {p: {} for p in priorities}
|
||||
|
||||
def init_result(self, priorities, starting_value, duration):
|
||||
return {p: {s: copy.copy(starting_value) for s in range(0, duration)} for p in priorities}
|
||||
|
@ -241,9 +254,10 @@ class ProxyModel:
|
|||
self.time = 0
|
||||
self.log_time = 0
|
||||
self.duration = duration
|
||||
self.priority_limiters = { priority: Limiter(priority, ratekeeper_model, self) for priority in workload_model.priorities() }
|
||||
self.priority_limiters = {priority: Limiter(priority, ratekeeper_model, self) for priority in
|
||||
workload_model.priorities()}
|
||||
self.workload_model = workload_model
|
||||
self.request_scheduled = { p: False for p in self.workload_model.priorities()}
|
||||
self.request_scheduled = {p: False for p in self.workload_model.priorities()}
|
||||
|
||||
self.tasks = []
|
||||
self.request_queue = []
|
||||
|
@ -256,13 +270,14 @@ class ProxyModel:
|
|||
for priority in self.workload_model.priorities():
|
||||
next_request = self.workload_model.next_request(self.time, priority)
|
||||
assert next_request is not None
|
||||
heapq.heappush(self.tasks, Task(next_request.time, lambda next_request=next_request: self.receive_request(next_request)))
|
||||
heapq.heappush(self.tasks, Task(next_request.time,
|
||||
lambda next_request=next_request: self.receive_request(next_request)))
|
||||
self.request_scheduled[priority] = True
|
||||
|
||||
while True:# or len(self.request_queue) > 0:
|
||||
while True: # or len(self.request_queue) > 0:
|
||||
if int(self.time) > self.log_time:
|
||||
self.log_time = int(self.time)
|
||||
#print(self.log_time)
|
||||
# print(self.log_time)
|
||||
|
||||
task = heapq.heappop(self.tasks)
|
||||
self.time = task.time
|
||||
|
@ -294,14 +309,15 @@ class ProxyModel:
|
|||
limiter.update_limit(Limiter.UpdateLimitParams(self.time, elapsed))
|
||||
|
||||
current_started = 0
|
||||
started = {p:0 for p in self.workload_model.priorities()}
|
||||
started = {p: 0 for p in self.workload_model.priorities()}
|
||||
|
||||
min_priority = Priority.SYSTEM
|
||||
last_batch = 0
|
||||
while len(self.request_queue) > 0:
|
||||
request = self.request_queue[0]
|
||||
|
||||
if not self.priority_limiters[request.priority].can_start(Limiter.CanStartParams(self.time, current_started, request.count)):
|
||||
if not self.priority_limiters[request.priority].can_start(
|
||||
Limiter.CanStartParams(self.time, current_started, request.count)):
|
||||
break
|
||||
|
||||
min_priority = request.priority
|
||||
|
@ -310,7 +326,8 @@ class ProxyModel:
|
|||
if self.workload_model.request_completed(request) and not self.request_scheduled[request.priority]:
|
||||
next_request = self.workload_model.next_request(self.time, request.priority)
|
||||
assert next_request is not None
|
||||
heapq.heappush(self.tasks, Task(next_request.time, lambda next_request=next_request: self.receive_request(next_request)))
|
||||
heapq.heappush(self.tasks, Task(next_request.time,
|
||||
lambda next_request=next_request: self.receive_request(next_request)))
|
||||
self.request_scheduled[request.priority] = True
|
||||
|
||||
current_started += request.count
|
||||
|
@ -318,21 +335,23 @@ class ProxyModel:
|
|||
|
||||
heapq.heappop(self.request_queue)
|
||||
self.results.started[request.priority][int(self.time)] += request.count
|
||||
self.results.latencies[request.priority][int(self.time)].append(self.time-request.time)
|
||||
self.results.latencies[request.priority][int(self.time)].append(self.time - request.time)
|
||||
|
||||
if len(self.request_queue) == 0:
|
||||
min_priority = Priority.BATCH
|
||||
|
||||
for priority, limiter in self.priority_limiters.items():
|
||||
started_at_priority = sum([v for p,v in started.items() if p <= priority])
|
||||
limiter.update_budget(Limiter.UpdateBudgetParams(self.time, current_started, started_at_priority, min_priority, last_batch, len(self.request_queue) == 0 or self.request_queue[0].priority > priority, elapsed))
|
||||
|
||||
started_at_priority = sum([v for p, v in started.items() if p <= priority])
|
||||
limiter.update_budget(
|
||||
Limiter.UpdateBudgetParams(self.time, current_started, started_at_priority, min_priority, last_batch,
|
||||
len(self.request_queue) == 0 or self.request_queue[0].priority > priority,
|
||||
elapsed))
|
||||
|
||||
for priority in self.workload_model.priorities():
|
||||
self.results.unprocessed_queue_sizes[priority][int(self.time)].append(self.workload_model.workload_models[priority].outstanding)
|
||||
self.results.unprocessed_queue_sizes[priority][int(self.time)].append(
|
||||
self.workload_model.workload_models[priority].outstanding)
|
||||
|
||||
current_time = self.time
|
||||
|
||||
delay = 0.001
|
||||
heapq.heappush(self.tasks, Task(self.time + delay, lambda: self.process_requests(current_time)))
|
||||
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
import numpy
|
||||
|
||||
|
||||
class RateModel:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
@ -27,6 +28,7 @@ class RateModel:
|
|||
def get_rate(self, time):
|
||||
pass
|
||||
|
||||
|
||||
class FixedRateModel(RateModel):
|
||||
def __init__(self, rate):
|
||||
RateModel.__init__(self)
|
||||
|
@ -35,10 +37,12 @@ class FixedRateModel(RateModel):
|
|||
def get_rate(self, time):
|
||||
return self.rate
|
||||
|
||||
|
||||
class UnlimitedRateModel(FixedRateModel):
|
||||
def __init__(self):
|
||||
self.rate = 1e9
|
||||
|
||||
|
||||
class IntervalRateModel(RateModel):
|
||||
def __init__(self, intervals):
|
||||
self.intervals = sorted(intervals)
|
||||
|
@ -46,16 +50,17 @@ class IntervalRateModel(RateModel):
|
|||
def get_rate(self, time):
|
||||
if len(self.intervals) == 0 or time < self.intervals[0][0]:
|
||||
return 0
|
||||
|
||||
target_interval = len(self.intervals)-1
|
||||
|
||||
target_interval = len(self.intervals) - 1
|
||||
for i in range(1, len(self.intervals)):
|
||||
if time < self.intervals[i][0]:
|
||||
target_interval = i-1
|
||||
target_interval = i - 1
|
||||
break
|
||||
|
||||
self.intervals = self.intervals[target_interval:]
|
||||
return self.intervals[0][1]
|
||||
|
||||
|
||||
class SawtoothRateModel(RateModel):
|
||||
def __init__(self, low, high, frequency):
|
||||
self.low = low
|
||||
|
@ -63,11 +68,12 @@ class SawtoothRateModel(RateModel):
|
|||
self.frequency = frequency
|
||||
|
||||
def get_rate(self, time):
|
||||
if int(2*time/self.frequency) % 2 == 0:
|
||||
if int(2 * time / self.frequency) % 2 == 0:
|
||||
return self.low
|
||||
else:
|
||||
return self.high
|
||||
|
||||
|
||||
class DistributionRateModel(RateModel):
|
||||
def __init__(self, distribution, frequency):
|
||||
self.distribution = distribution
|
||||
|
|
|
@ -22,6 +22,7 @@ import numpy
|
|||
import rate_model
|
||||
from priority import Priority
|
||||
|
||||
|
||||
class RatekeeperModel:
|
||||
def __init__(self, limit_models):
|
||||
self.limit_models = limit_models
|
||||
|
@ -29,39 +30,40 @@ class RatekeeperModel:
|
|||
def get_limit(self, time, priority):
|
||||
return self.limit_models[priority].get_rate(time)
|
||||
|
||||
|
||||
predefined_ratekeeper = {}
|
||||
|
||||
predefined_ratekeeper['default200_batch100'] = RatekeeperModel(
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.FixedRateModel(200),
|
||||
Priority.BATCH: rate_model.FixedRateModel(100)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.FixedRateModel(200),
|
||||
Priority.BATCH: rate_model.FixedRateModel(100)
|
||||
})
|
||||
|
||||
predefined_ratekeeper['default_sawtooth'] = RatekeeperModel(
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.SawtoothRateModel(10, 200, 1),
|
||||
Priority.BATCH: rate_model.FixedRateModel(0)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.SawtoothRateModel(10, 200, 1),
|
||||
Priority.BATCH: rate_model.FixedRateModel(0)
|
||||
})
|
||||
|
||||
predefined_ratekeeper['default_uniform_random'] = RatekeeperModel(
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.DistributionRateModel(lambda: numpy.random.uniform(10, 200), 1),
|
||||
Priority.BATCH: rate_model.FixedRateModel(0)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.DistributionRateModel(lambda: numpy.random.uniform(10, 200), 1),
|
||||
Priority.BATCH: rate_model.FixedRateModel(0)
|
||||
})
|
||||
|
||||
predefined_ratekeeper['default_trickle'] = RatekeeperModel(
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.FixedRateModel(3),
|
||||
Priority.BATCH: rate_model.FixedRateModel(0)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.FixedRateModel(3),
|
||||
Priority.BATCH: rate_model.FixedRateModel(0)
|
||||
})
|
||||
|
||||
predefined_ratekeeper['default1000'] = RatekeeperModel(
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.FixedRateModel(1000),
|
||||
Priority.BATCH: rate_model.FixedRateModel(500)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: rate_model.UnlimitedRateModel(),
|
||||
Priority.DEFAULT: rate_model.FixedRateModel(1000),
|
||||
Priority.BATCH: rate_model.FixedRateModel(500)
|
||||
})
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
import math
|
||||
|
||||
|
||||
class Smoother:
|
||||
def __init__(self, folding_time):
|
||||
self.folding_time = folding_time
|
||||
|
@ -28,10 +29,10 @@ class Smoother:
|
|||
def reset(self, value):
|
||||
self.time = 0
|
||||
self.total = value
|
||||
self.estimate = value
|
||||
|
||||
self.estimate = value
|
||||
|
||||
def set_total(self, time, total):
|
||||
self.add_delta(time, total-self.total)
|
||||
self.add_delta(time, total - self.total)
|
||||
|
||||
def add_delta(self, time, delta):
|
||||
self.update(time)
|
||||
|
@ -43,11 +44,10 @@ class Smoother:
|
|||
|
||||
def smooth_rate(self, time):
|
||||
self.update(time)
|
||||
return (self.total-self.estimate) / self.folding_time
|
||||
return (self.total - self.estimate) / self.folding_time
|
||||
|
||||
def update(self, time):
|
||||
elapsed = time - self.time
|
||||
if elapsed > 0:
|
||||
self.time = time
|
||||
self.estimate += (self.total-self.estimate) * (1-math.exp(-elapsed/self.folding_time))
|
||||
|
||||
self.estimate += (self.total - self.estimate) * (1 - math.exp(-elapsed / self.folding_time))
|
||||
|
|
|
@ -25,6 +25,7 @@ import math
|
|||
import rate_model
|
||||
from priority import Priority
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
class Request:
|
||||
def __init__(self, time, count, priority):
|
||||
|
@ -35,6 +36,7 @@ class Request:
|
|||
def __lt__(self, other):
|
||||
return self.priority < other.priority
|
||||
|
||||
|
||||
class PriorityWorkloadModel:
|
||||
def __init__(self, priority, rate_model, batch_model, generator, max_outstanding=1e9):
|
||||
self.priority = priority
|
||||
|
@ -59,6 +61,7 @@ class PriorityWorkloadModel:
|
|||
|
||||
return was_full and self.outstanding < self.max_outstanding
|
||||
|
||||
|
||||
class WorkloadModel:
|
||||
def __init__(self, workload_models):
|
||||
self.workload_models = workload_models
|
||||
|
@ -72,10 +75,17 @@ class WorkloadModel:
|
|||
def request_completed(self, request):
|
||||
return self.workload_models[request.priority].request_completed(request)
|
||||
|
||||
|
||||
class Distribution:
|
||||
EXPONENTIAL = lambda x: numpy.random.exponential(x)
|
||||
UNIFORM = lambda x: numpy.random.uniform(0, 2.0*x)
|
||||
FIXED = lambda x: x
|
||||
def exponential(x):
|
||||
return numpy.random.exponential(x)
|
||||
|
||||
def uniform(x):
|
||||
return numpy.random.uniform(0, 2.0 * x)
|
||||
|
||||
def fixed(x):
|
||||
return x
|
||||
|
||||
|
||||
class BatchGenerator:
|
||||
def __init__(self):
|
||||
|
@ -84,6 +94,7 @@ class BatchGenerator:
|
|||
def next_batch(self):
|
||||
pass
|
||||
|
||||
|
||||
class DistributionBatchGenerator(BatchGenerator):
|
||||
def __init__(self, distribution, size):
|
||||
BatchGenerator.__init__(self)
|
||||
|
@ -93,6 +104,7 @@ class DistributionBatchGenerator(BatchGenerator):
|
|||
def next_batch(self):
|
||||
return math.ceil(self.distribution(self.size))
|
||||
|
||||
|
||||
class RequestGenerator:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
@ -100,6 +112,7 @@ class RequestGenerator:
|
|||
def next_request_interval(self, rate):
|
||||
pass
|
||||
|
||||
|
||||
class DistributionRequestGenerator(RequestGenerator):
|
||||
def __init__(self, distribution):
|
||||
RequestGenerator.__init__(self)
|
||||
|
@ -109,93 +122,94 @@ class DistributionRequestGenerator(RequestGenerator):
|
|||
if rate == 0:
|
||||
return 1e9
|
||||
|
||||
return self.distribution(1.0/rate)
|
||||
return self.distribution(1.0 / rate)
|
||||
|
||||
|
||||
predefined_workloads = {}
|
||||
|
||||
predefined_workloads['slow_exponential'] = WorkloadModel(
|
||||
{
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.FixedRateModel(100),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.EXPONENTIAL),
|
||||
max_outstanding=100
|
||||
)
|
||||
})
|
||||
{
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.FixedRateModel(100),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.exponential),
|
||||
max_outstanding=100
|
||||
)
|
||||
})
|
||||
|
||||
predefined_workloads['fixed_uniform'] = WorkloadModel(
|
||||
{
|
||||
Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
|
||||
rate_model.FixedRateModel(0),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=10
|
||||
),
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.FixedRateModel(95),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 10),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=200
|
||||
),
|
||||
Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
|
||||
rate_model.FixedRateModel(1),
|
||||
DistributionBatchGenerator(Distribution.UNIFORM, 500),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=200
|
||||
)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
|
||||
rate_model.FixedRateModel(0),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=10
|
||||
),
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.FixedRateModel(95),
|
||||
DistributionBatchGenerator(Distribution.fixed, 10),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=200
|
||||
),
|
||||
Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
|
||||
rate_model.FixedRateModel(1),
|
||||
DistributionBatchGenerator(Distribution.uniform, 500),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=200
|
||||
)
|
||||
})
|
||||
|
||||
predefined_workloads['batch_starvation'] = WorkloadModel(
|
||||
{
|
||||
Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
|
||||
rate_model.FixedRateModel(1),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=10
|
||||
),
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.IntervalRateModel([(0,50), (60,150), (120,90)]),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=200
|
||||
),
|
||||
Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
|
||||
rate_model.FixedRateModel(100),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=200
|
||||
)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
|
||||
rate_model.FixedRateModel(1),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=10
|
||||
),
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.IntervalRateModel([(0, 50), (60, 150), (120, 90)]),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=200
|
||||
),
|
||||
Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
|
||||
rate_model.FixedRateModel(100),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=200
|
||||
)
|
||||
})
|
||||
|
||||
predefined_workloads['default_low_high_low'] = WorkloadModel(
|
||||
{
|
||||
Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
|
||||
rate_model.FixedRateModel(0),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=10
|
||||
),
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.IntervalRateModel([(0,100), (60,300), (120,100)]),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=200
|
||||
),
|
||||
Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
|
||||
rate_model.FixedRateModel(0),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.UNIFORM),
|
||||
max_outstanding=200
|
||||
)
|
||||
})
|
||||
{
|
||||
Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
|
||||
rate_model.FixedRateModel(0),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=10
|
||||
),
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.IntervalRateModel([(0, 100), (60, 300), (120, 100)]),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=200
|
||||
),
|
||||
Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
|
||||
rate_model.FixedRateModel(0),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.uniform),
|
||||
max_outstanding=200
|
||||
)
|
||||
})
|
||||
|
||||
for rate in [83, 100, 180, 190, 200]:
|
||||
predefined_workloads['default%d' % rate] = WorkloadModel(
|
||||
{
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.FixedRateModel(rate),
|
||||
DistributionBatchGenerator(Distribution.FIXED, 1),
|
||||
DistributionRequestGenerator(Distribution.EXPONENTIAL),
|
||||
max_outstanding=1000
|
||||
)
|
||||
})
|
||||
{
|
||||
Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
|
||||
rate_model.FixedRateModel(rate),
|
||||
DistributionBatchGenerator(Distribution.fixed, 1),
|
||||
DistributionRequestGenerator(Distribution.exponential),
|
||||
max_outstanding=1000
|
||||
)
|
||||
})
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
# LeakSanitizer suppressions file for FDB
|
||||
# https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer
|
||||
|
||||
# Not all incoming connections are cleanly shut down in client API tests
|
||||
leak:ConnectionReaderActorState
|
|
@ -24,10 +24,12 @@ import sys
|
|||
import platform
|
||||
import os
|
||||
|
||||
|
||||
def error(message):
|
||||
print(message)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_version_string(library_path):
|
||||
try:
|
||||
lib = ctypes.cdll.LoadLibrary(library_path)
|
||||
|
@ -58,6 +60,7 @@ def get_version_string(library_path):
|
|||
|
||||
return version_str
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if platform.system() == 'Linux':
|
||||
default_lib = 'libfdb_c.so'
|
||||
|
|
|
@ -28,7 +28,6 @@ optional packages:
|
|||
sortedcontainers (for estimating key range read/write density)
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
|
@ -55,7 +54,6 @@ supported_protocol_versions = frozenset([PROTOCOL_VERSION_5_2, PROTOCOL_VERSION_
|
|||
PROTOCOL_VERSION_6_2, PROTOCOL_VERSION_6_3, PROTOCOL_VERSION_7_0,
|
||||
PROTOCOL_VERSION_7_1, PROTOCOL_VERSION_7_2])
|
||||
|
||||
|
||||
fdb.api_version(520)
|
||||
|
||||
BASIC_FORMAT = "%(asctime)s - %(levelname)-8s %(message)s"
|
||||
|
@ -188,6 +186,7 @@ class BaseInfo(object):
|
|||
"""
|
||||
Corresponds to FdbClientLogEvents::Event
|
||||
"""
|
||||
|
||||
def __init__(self, bb, protocol_version):
|
||||
# we already read the EventType, so go straight to start_timestamp
|
||||
self.start_timestamp = bb.get_double()
|
||||
|
@ -197,6 +196,7 @@ class BaseInfo(object):
|
|||
if bb.get_bool():
|
||||
self.tenant = bb.get_bytes_with_length()
|
||||
|
||||
|
||||
class GetVersionInfo(BaseInfo):
|
||||
def __init__(self, bb, protocol_version):
|
||||
super().__init__(bb, protocol_version)
|
||||
|
@ -206,6 +206,7 @@ class GetVersionInfo(BaseInfo):
|
|||
if protocol_version >= PROTOCOL_VERSION_6_3:
|
||||
self.read_version = bb.get_long()
|
||||
|
||||
|
||||
class GetInfo(BaseInfo):
|
||||
def __init__(self, bb, protocol_version):
|
||||
super().__init__(bb, protocol_version)
|
||||
|
@ -244,11 +245,11 @@ class CommitInfo(BaseInfo):
|
|||
self.read_snapshot_version = bb.get_long()
|
||||
if protocol_version >= PROTOCOL_VERSION_6_3:
|
||||
self.report_conflicting_keys = bb.get_bool()
|
||||
|
||||
|
||||
if protocol_version >= PROTOCOL_VERSION_7_1:
|
||||
lock_aware = bb.get_bool()
|
||||
self.lock_aware = bb.get_bool()
|
||||
if bb.get_bool():
|
||||
spanId = bb.get_bytes(16)
|
||||
self.spanId = bb.get_bytes(16)
|
||||
|
||||
|
||||
class ErrorGetInfo(BaseInfo):
|
||||
|
@ -285,9 +286,9 @@ class ErrorCommitInfo(BaseInfo):
|
|||
self.report_conflicting_keys = bb.get_bool()
|
||||
|
||||
if protocol_version >= PROTOCOL_VERSION_7_1:
|
||||
lock_aware = bb.get_bool()
|
||||
self.lock_aware = bb.get_bool()
|
||||
if bb.get_bool():
|
||||
spanId = bb.get_bytes(16)
|
||||
self.spanId = bb.get_bytes(16)
|
||||
|
||||
|
||||
class UnsupportedProtocolVersionError(Exception):
|
||||
|
@ -314,52 +315,57 @@ class ClientTransactionInfo:
|
|||
if event == 0:
|
||||
# we need to read it to consume the buffer even if we don't want to store it
|
||||
get_version = GetVersionInfo(bb, protocol_version)
|
||||
if (not type_filter or "get_version" in type_filter):
|
||||
if not type_filter or "get_version" in type_filter:
|
||||
self.get_version = get_version
|
||||
elif event == 1:
|
||||
get = GetInfo(bb, protocol_version)
|
||||
if (not type_filter or "get" in type_filter):
|
||||
if not type_filter or "get" in type_filter:
|
||||
# because of the crappy json serializtion using __dict__ we have to set the list here otherwise
|
||||
# it doesn't print
|
||||
if not self.gets: self.gets = []
|
||||
if not self.gets:
|
||||
self.gets = []
|
||||
self.gets.append(get)
|
||||
elif event == 2:
|
||||
get_range = GetRangeInfo(bb, protocol_version)
|
||||
if (not type_filter or "get_range" in type_filter):
|
||||
if not self.get_ranges: self.get_ranges = []
|
||||
if not type_filter or "get_range" in type_filter:
|
||||
if not self.get_ranges:
|
||||
self.get_ranges = []
|
||||
self.get_ranges.append(get_range)
|
||||
elif event == 3:
|
||||
commit = CommitInfo(bb, protocol_version, full_output=full_output)
|
||||
if (not type_filter or "commit" in type_filter):
|
||||
if not type_filter or "commit" in type_filter:
|
||||
self.commit = commit
|
||||
elif event == 4:
|
||||
error_get = ErrorGetInfo(bb, protocol_version)
|
||||
if (not type_filter or "error_gets" in type_filter):
|
||||
if not self.error_gets: self.error_gets = []
|
||||
if not type_filter or "error_gets" in type_filter:
|
||||
if not self.error_gets:
|
||||
self.error_gets = []
|
||||
self.error_gets.append(error_get)
|
||||
elif event == 5:
|
||||
error_get_range = ErrorGetRangeInfo(bb, protocol_version)
|
||||
if (not type_filter or "error_get_range" in type_filter):
|
||||
if not self.error_get_ranges: self.error_get_ranges = []
|
||||
if not type_filter or "error_get_range" in type_filter:
|
||||
if not self.error_get_ranges:
|
||||
self.error_get_ranges = []
|
||||
self.error_get_ranges.append(error_get_range)
|
||||
elif event == 6:
|
||||
error_commit = ErrorCommitInfo(bb, protocol_version, full_output=full_output)
|
||||
if (not type_filter or "error_commit" in type_filter):
|
||||
if not self.error_commits: self.error_commits = []
|
||||
if not type_filter or "error_commit" in type_filter:
|
||||
if not self.error_commits:
|
||||
self.error_commits = []
|
||||
self.error_commits.append(error_commit)
|
||||
else:
|
||||
raise Exception("Unknown event type %d" % event)
|
||||
|
||||
def has_types(self):
|
||||
return self.get_version or self.gets or self.get_ranges or self.commit or self.error_gets \
|
||||
or self.error_get_ranges or self.error_commits
|
||||
return self.get_version or self.gets or self.get_ranges or self.commit \
|
||||
or self.error_gets or self.error_get_ranges or self.error_commits
|
||||
|
||||
def to_json(self):
|
||||
return json.dumps(self, cls=ObjJsonEncoder, sort_keys=True)
|
||||
|
||||
|
||||
class TransactionInfoLoader(object):
|
||||
max_num_chunks_to_store = 1000 # Each chunk would be 100 KB in size
|
||||
max_num_chunks_to_store = 1000 # Each chunk would be 100 KB in size
|
||||
|
||||
def __init__(self, db, full_output=True, type_filter=None, min_timestamp=None, max_timestamp=None):
|
||||
self.db = db
|
||||
|
@ -433,7 +439,7 @@ class TransactionInfoLoader(object):
|
|||
reverse = False
|
||||
for k, v in tr.snapshot.get_range(start_key, end_key, limit=1, reverse=reverse):
|
||||
return fdb.tuple.unpack(v)[0]
|
||||
return 0 if start else 0x8000000000000000 # we didn't find any timekeeper data so find the max range
|
||||
return 0 if start else 0x8000000000000000 # we didn't find any timekeeper data so find the max range
|
||||
|
||||
def fetch_transaction_info(self):
|
||||
if self.min_timestamp:
|
||||
|
@ -469,12 +475,12 @@ class TransactionInfoLoader(object):
|
|||
streaming_mode=fdb.impl.StreamingMode.want_all)
|
||||
for k, v in transaction_info_range:
|
||||
found += 1
|
||||
#logger.debug(k)
|
||||
# logger.debug(k)
|
||||
start_key = fdb.KeySelector.first_greater_than(k)
|
||||
|
||||
_, tr_id, num_chunks, chunk_num = self.parse_key(k)
|
||||
|
||||
#logger.debug("num_chunks=%d, chunk_num=%d" % (num_chunks,chunk_num))
|
||||
# logger.debug("num_chunks=%d, chunk_num=%d" % (num_chunks,chunk_num))
|
||||
|
||||
if num_chunks == 1:
|
||||
assert chunk_num == 1
|
||||
|
@ -482,7 +488,7 @@ class TransactionInfoLoader(object):
|
|||
info = build_client_transaction_info(v)
|
||||
if info.has_types():
|
||||
buffer.append(info)
|
||||
except UnsupportedProtocolVersionError as e:
|
||||
except UnsupportedProtocolVersionError:
|
||||
invalid_transaction_infos += 1
|
||||
except ValueError:
|
||||
invalid_transaction_infos += 1
|
||||
|
@ -497,7 +503,8 @@ class TransactionInfoLoader(object):
|
|||
self._check_and_adjust_chunk_cache_size()
|
||||
else:
|
||||
if tr_id not in self.tr_info_map:
|
||||
logger.error("Got a middle chunk without getting beginning part. Discarding transaction id: %s\n" % tr_id)
|
||||
logger.error(
|
||||
"Got a middle chunk without getting beginning part. Discarding transaction id: %s\n" % tr_id)
|
||||
continue
|
||||
c_list = self.tr_info_map[tr_id]
|
||||
if c_list[-1].num_chunks != num_chunks or c_list[-1].chunk_num != chunk_num - 1:
|
||||
|
@ -513,7 +520,7 @@ class TransactionInfoLoader(object):
|
|||
info = build_client_transaction_info(b''.join([chunk.value for chunk in c_list]))
|
||||
if info.has_types():
|
||||
buffer.append(info)
|
||||
except UnsupportedProtocolVersionError as e:
|
||||
except UnsupportedProtocolVersionError:
|
||||
invalid_transaction_infos += 1
|
||||
except ValueError:
|
||||
invalid_transaction_infos += 1
|
||||
|
@ -553,6 +560,7 @@ def has_dateparser():
|
|||
logger.warn("Can't find dateparser so disabling human date parsing")
|
||||
return False
|
||||
|
||||
|
||||
class ReadCounter(object):
|
||||
def __init__(self):
|
||||
from sortedcontainers import SortedDict
|
||||
|
@ -560,7 +568,7 @@ class ReadCounter(object):
|
|||
self.reads[b''] = [0, 0]
|
||||
|
||||
self.read_counts = {}
|
||||
self.hit_count=0
|
||||
self.hit_count = 0
|
||||
|
||||
def process(self, transaction_info):
|
||||
for get in transaction_info.gets:
|
||||
|
@ -576,7 +584,7 @@ class ReadCounter(object):
|
|||
if end_key is not None:
|
||||
self.reads.setdefault(end_key, [0, 0])[1] += 1
|
||||
else:
|
||||
self.reads.setdefault(start_key+b'\x00', [0, 0])[1] += 1
|
||||
self.reads.setdefault(start_key + b'\x00', [0, 0])[1] += 1
|
||||
|
||||
def get_total_reads(self):
|
||||
return sum([v for v in self.read_counts.values()])
|
||||
|
@ -673,8 +681,8 @@ class ShardFinder(object):
|
|||
self.shard_cache = {}
|
||||
|
||||
def _get_boundary_keys(self, begin, end):
|
||||
start_pos = max(0, bisect_right(self.boundary_keys, begin)-1)
|
||||
end_pos = max(0, bisect_right(self.boundary_keys, end)-1)
|
||||
start_pos = max(0, bisect_right(self.boundary_keys, begin) - 1)
|
||||
end_pos = max(0, bisect_right(self.boundary_keys, end) - 1)
|
||||
|
||||
return self.boundary_keys[start_pos:end_pos]
|
||||
|
||||
|
@ -691,9 +699,9 @@ class ShardFinder(object):
|
|||
return len(self._get_boundary_keys(start_key, end_key)) + 1
|
||||
|
||||
def get_addresses_for_key(self, key):
|
||||
shard = self.boundary_keys[max(0, bisect_right(self.boundary_keys, key)-1)]
|
||||
shard = self.boundary_keys[max(0, bisect_right(self.boundary_keys, key) - 1)]
|
||||
do_load = False
|
||||
if not shard in self.shard_cache:
|
||||
if shard not in self.shard_cache:
|
||||
do_load = True
|
||||
elif self.shard_cache[shard].is_ready():
|
||||
try:
|
||||
|
@ -708,7 +716,7 @@ class ShardFinder(object):
|
|||
for f in self.outstanding:
|
||||
try:
|
||||
f.wait()
|
||||
except fdb.FDBError as e:
|
||||
except fdb.FDBError:
|
||||
pass
|
||||
|
||||
self.outstanding = []
|
||||
|
@ -726,10 +734,13 @@ class ShardFinder(object):
|
|||
if item[addr_idx] is not None:
|
||||
while True:
|
||||
try:
|
||||
ranges[index] = item[0:addr_idx] + ([a.decode('ascii') for a in item[addr_idx].wait()],) + item[addr_idx+1:]
|
||||
ranges[index] = item[0:addr_idx] + ([a.decode('ascii') for a in item[addr_idx].wait()],) \
|
||||
+ item[addr_idx + 1:]
|
||||
break
|
||||
except fdb.FDBError as e:
|
||||
ranges[index] = item[0:addr_idx] + (self.get_addresses_for_key(item[key_idx]),) + item[addr_idx+1:]
|
||||
except fdb.FDBError:
|
||||
ranges[index] = item[0:addr_idx] + (self.get_addresses_for_key(item[key_idx]),) \
|
||||
+ item[addr_idx + 1:]
|
||||
|
||||
|
||||
class WriteCounter(object):
|
||||
mutation_types_to_consider = frozenset([MutationType.SET_VALUE, MutationType.ADD_VALUE])
|
||||
|
@ -795,10 +806,11 @@ class WriteCounter(object):
|
|||
filter_addresses = set(filter_addresses)
|
||||
results = [r for r in results if filter_addresses.issubset(set(r[3]))][0:num]
|
||||
else:
|
||||
results = [(key, end, count) for (count, key) in count_pairs[0:num]]
|
||||
results = [(key, None, count) for (count, key) in count_pairs[0:num]]
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def connect(cluster_file=None):
|
||||
db = fdb.open(cluster_file=cluster_file)
|
||||
return db
|
||||
|
@ -831,22 +843,34 @@ def main():
|
|||
end_time_group = parser.add_mutually_exclusive_group()
|
||||
end_time_group.add_argument("--max-timestamp", type=int, help="Don't return events newer than this epoch time")
|
||||
end_time_group.add_argument("-e", "--end-time", type=str, help="Don't return events older than this parsed time")
|
||||
parser.add_argument("--num-buckets", type=int, help="The number of buckets to partition the key-space into for operation counts", default=100)
|
||||
parser.add_argument("--top-requests", type=int, help="If specified will output this many top keys for reads or writes", default=0)
|
||||
parser.add_argument("--exclude-ports", action="store_true", help="Print addresses without the port number. Only works in versions older than 6.3, and is required in versions older than 6.2.")
|
||||
parser.add_argument("--single-shard-ranges-only", action="store_true", help="Only print range boundaries that exist in a single shard")
|
||||
parser.add_argument("-a", "--filter-address", action="append", help="Only print range boundaries that include the given address. This option can used multiple times to include more than one address in the filter, in which case all addresses must match.")
|
||||
parser.add_argument("--num-buckets", type=int,
|
||||
help="The number of buckets to partition the key-space into for operation counts", default=100)
|
||||
parser.add_argument("--top-requests", type=int,
|
||||
help="If specified will output this many top keys for reads or writes", default=0)
|
||||
parser.add_argument("--exclude-ports", action="store_true",
|
||||
help="Print addresses without the port number. Only works in versions older than 6.3, and is required in versions older than 6.2.")
|
||||
parser.add_argument("--single-shard-ranges-only", action="store_true",
|
||||
help="Only print range boundaries that exist in a single shard")
|
||||
parser.add_argument("-a", "--filter-address", action="append",
|
||||
help="Only print range boundaries that include the given address. This option can used multiple times to include more than one address in the filter, in which case all addresses must match.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
type_filter = set()
|
||||
if args.filter_get_version: type_filter.add("get_version")
|
||||
if args.filter_get or args.filter_reads: type_filter.add("get")
|
||||
if args.filter_get_range or args.filter_reads: type_filter.add("get_range")
|
||||
if args.filter_commit: type_filter.add("commit")
|
||||
if args.filter_error_get: type_filter.add("error_get")
|
||||
if args.filter_error_get_range: type_filter.add("error_get_range")
|
||||
if args.filter_error_commit: type_filter.add("error_commit")
|
||||
if args.filter_get_version:
|
||||
type_filter.add("get_version")
|
||||
if args.filter_get or args.filter_reads:
|
||||
type_filter.add("get")
|
||||
if args.filter_get_range or args.filter_reads:
|
||||
type_filter.add("get_range")
|
||||
if args.filter_commit:
|
||||
type_filter.add("commit")
|
||||
if args.filter_error_get:
|
||||
type_filter.add("error_get")
|
||||
if args.filter_error_get_range:
|
||||
type_filter.add("error_get_range")
|
||||
if args.filter_error_commit:
|
||||
type_filter.add("error_commit")
|
||||
|
||||
if (not type_filter or "commit" in type_filter):
|
||||
write_counter = WriteCounter() if args.num_buckets else None
|
||||
|
@ -912,7 +936,8 @@ def main():
|
|||
else:
|
||||
op_str = 'Key %r' % start
|
||||
|
||||
print(" %d. %s\n %d sampled %s (%.2f%%, %.2f%% cumulative)" % (idx+1, op_str, count, context, 100*count/total, 100*running_count/total))
|
||||
print(" %d. %s\n %d sampled %s (%.2f%%, %.2f%% cumulative)" % (
|
||||
idx + 1, op_str, count, context, 100 * count / total, 100 * running_count / total))
|
||||
print(" shard addresses: %s\n" % ", ".join(addresses))
|
||||
|
||||
else:
|
||||
|
@ -933,10 +958,10 @@ def main():
|
|||
|
||||
if not omit:
|
||||
if omit_start is not None:
|
||||
if omit_start == idx-1:
|
||||
if omit_start == idx - 1:
|
||||
print(" %d. Omitted\n" % (idx))
|
||||
else:
|
||||
print(" %d - %d. Omitted\n" % (omit_start+1, idx))
|
||||
print(" %d - %d. Omitted\n" % (omit_start + 1, idx))
|
||||
omit_start = None
|
||||
|
||||
if total_count is None:
|
||||
|
@ -944,18 +969,19 @@ def main():
|
|||
else:
|
||||
count_str = '%d sampled %s (%d intersecting)' % (start_count, context, total_count)
|
||||
if not shard_count:
|
||||
print(" %d. [%s, %s]\n %d sampled %s\n" % (idx+1, start, end, count, context))
|
||||
print(" %d. [%s, %s]\n %s\n" % (idx + 1, start, end, count_str))
|
||||
else:
|
||||
addresses_string = "; addresses=%s" % ', '.join(addresses) if addresses else ''
|
||||
print(" %d. [%s, %s]\n %s spanning %d shard(s)%s\n" % (idx+1, start, end, count_str, shard_count, addresses_string))
|
||||
print(" %d. [%s, %s]\n %s spanning %d shard(s)%s\n" % (
|
||||
idx + 1, start, end, count_str, shard_count, addresses_string))
|
||||
elif omit_start is None:
|
||||
omit_start = idx
|
||||
|
||||
if omit_start is not None:
|
||||
if omit_start == len(range_boundaries)-1:
|
||||
if omit_start == len(range_boundaries) - 1:
|
||||
print(" %d. Omitted\n" % len(range_boundaries))
|
||||
else:
|
||||
print(" %d - %d. Omitted\n" % (omit_start+1, len(range_boundaries)))
|
||||
print(" %d - %d. Omitted\n" % (omit_start + 1, len(range_boundaries)))
|
||||
|
||||
shard_finder = ShardFinder(db, args.exclude_ports)
|
||||
|
||||
|
@ -963,7 +989,8 @@ def main():
|
|||
|
||||
if write_counter:
|
||||
if args.top_requests:
|
||||
top_writes = write_counter.get_top_k_writes(args.top_requests, args.filter_address, shard_finder=shard_finder)
|
||||
top_writes = write_counter.get_top_k_writes(args.top_requests, args.filter_address,
|
||||
shard_finder=shard_finder)
|
||||
|
||||
range_boundaries = write_counter.get_range_boundaries(args.num_buckets, shard_finder=shard_finder)
|
||||
num_writes = write_counter.get_total_writes()
|
||||
|
@ -1014,5 +1041,6 @@ def main():
|
|||
print("Key-space boundaries with approximately equal read counts:\n")
|
||||
print_range_boundaries(range_boundaries, "reads")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -105,8 +105,8 @@ class RangeCounterTest(unittest.TestCase):
|
|||
assert rc_count == v, "Counts for %s mismatch. Expected %d got %d" % (k, v, rc_count)
|
||||
|
||||
for _ in range(0, 100):
|
||||
i = random.randint(0, len(letters)-1)
|
||||
j = random.randint(0, len(letters)-2)
|
||||
i = random.randint(0, len(letters) - 1)
|
||||
j = random.randint(0, len(letters) - 2)
|
||||
if i == j:
|
||||
j += 1
|
||||
start_index = min(i, j)
|
||||
|
@ -123,4 +123,4 @@ class RangeCounterTest(unittest.TestCase):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main() # run all tests
|
||||
unittest.main() # run all tests
|
||||
|
|
|
@ -321,7 +321,7 @@ and pass the test with ``-f``:
|
|||
Running a Workload on an actual Cluster
|
||||
=======================================
|
||||
|
||||
Running a workload on a cluster works basically the smae way. However, one must
|
||||
Running a workload on a cluster works basically the same way. However, one must
|
||||
actually setup a cluster first. This cluster must run between one and many server
|
||||
processes with the class test. So above 2-step process becomes a bit more complex:
|
||||
|
||||
|
|
|
@ -890,8 +890,18 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"tenants":{
|
||||
"num_tenants":0
|
||||
"metacluster" : {
|
||||
"cluster_type" : "management", // management, data, or standalone
|
||||
"metacluster_name" : "metacluster1",
|
||||
"metacluster_id" : 12345,
|
||||
"data_cluster_name" : "data_cluster1", // data cluster only
|
||||
"data_cluster_id" : 12346, // data cluster only
|
||||
"num_data_clusters": 10 // management cluster only
|
||||
},
|
||||
"tenants" : {
|
||||
"num_tenants" : 1, // on data cluster, local count; on management cluster, total metacluster count
|
||||
"num_tenant_groups" : 10,
|
||||
"tenant_group_capacity" : 20,
|
||||
}
|
||||
},
|
||||
"client":{
|
||||
|
|
|
@ -36,7 +36,8 @@ ACTOR Future<bool> blobRestoreCommandActor(Database localDb, std::vector<StringR
|
|||
state bool success = false;
|
||||
wait(store(success, localDb->blobRestore(normalKeys)));
|
||||
if (success) {
|
||||
fmt::print("Started blob restore for the full cluster. Please use 'status' command to check progress.\n");
|
||||
fmt::print(
|
||||
"Started blob restore for the full cluster. Please use 'status details' command to check progress.\n");
|
||||
} else {
|
||||
fmt::print("Fail to start a new blob restore while there is a pending one.\n");
|
||||
}
|
||||
|
|
|
@ -326,7 +326,7 @@ CommandFactory configureFactory(
|
|||
"count=<TSS_COUNT>|perpetual_storage_wiggle=<WIGGLE_SPEED>|perpetual_storage_wiggle_locality="
|
||||
"<<LOCALITY_KEY>:<LOCALITY_VALUE>|0>|storage_migration_type={disabled|gradual|aggressive}"
|
||||
"|tenant_mode={disabled|optional_experimental|required_experimental}|blob_granules_enabled={0|1}"
|
||||
"|encryption_at_rest_mode={disabled|aes_256_ctr}",
|
||||
"|encryption_at_rest_mode={disabled|domain_aware|cluster_aware}",
|
||||
"change the database configuration",
|
||||
"The `new' option, if present, initializes a new database with the given configuration rather than changing "
|
||||
"the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
|
||||
|
@ -360,7 +360,8 @@ CommandFactory configureFactory(
|
|||
"tenant_mode=<disabled|optional_experimental|required_experimental>: Sets the tenant mode for the cluster. If "
|
||||
"optional, then transactions can be run with or without specifying tenants. If required, all data must be "
|
||||
"accessed using tenants.\n\n"
|
||||
"encryption_at_rest_mode=<disabled|aes_256_ctr>: Sets the cluster encryption data at-rest support for the "
|
||||
"encryption_at_rest_mode=<disabled|domain_aware|cluster_aware>: Sets the cluster encryption data at-rest "
|
||||
"support for the "
|
||||
"database. The configuration can be updated ONLY at the time of database creation and once set can't be "
|
||||
"updated for the lifetime of the database.\n\n"
|
||||
|
||||
|
|
|
@ -1125,6 +1125,15 @@ void printStatus(StatusObjectReader statusObj,
|
|||
outputString += "\n Number of Workers - " + format("%d", numWorkers);
|
||||
auto numKeyRanges = statusObjBlobGranules["number_of_key_ranges"].get_int();
|
||||
outputString += "\n Number of Key Ranges - " + format("%d", numKeyRanges);
|
||||
if (statusObjCluster.has("blob_restore")) {
|
||||
StatusObjectReader statusObjBlobRestore = statusObjCluster["blob_restore"];
|
||||
std::string restoreStatus = statusObjBlobRestore["blob_full_restore_phase"].get_str();
|
||||
if (statusObjBlobRestore.has("blob_full_restore_progress")) {
|
||||
auto progress = statusObjBlobRestore["blob_full_restore_progress"].get_int();
|
||||
restoreStatus += " " + format("%d%%", progress);
|
||||
}
|
||||
outputString += "\n Full Restore - " + restoreStatus;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -294,6 +294,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
init( METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY, 1.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY = deterministicRandom()->random01() * 60;
|
||||
init( METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT, 10.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT = 1 + deterministicRandom()->random01() * 59;
|
||||
init( TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( CLIENT_ENABLE_USING_CLUSTER_ID_KEY, false );
|
||||
|
||||
init( ENABLE_ENCRYPTION_CPU_TIME_LOGGING, false );
|
||||
// clang-format on
|
||||
|
|
|
@ -206,10 +206,12 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
|||
EncryptionAtRestMode mode;
|
||||
if (value == "disabled") {
|
||||
mode = EncryptionAtRestMode::DISABLED;
|
||||
} else if (value == "aes_256_ctr") {
|
||||
mode = EncryptionAtRestMode::AES_256_CTR;
|
||||
} else if (value == "domain_aware") {
|
||||
mode = EncryptionAtRestMode::DOMAIN_AWARE;
|
||||
} else if (value == "cluster_aware") {
|
||||
mode = EncryptionAtRestMode::CLUSTER_AWARE;
|
||||
} else {
|
||||
printf("Error: Only disabled|aes_256_ctr are valid for encryption_at_rest_mode.\n");
|
||||
printf("Error: Only disabled|domain_aware|cluster_aware are valid for encryption_at_rest_mode.\n");
|
||||
return out;
|
||||
}
|
||||
out[p + key] = format("%d", mode);
|
||||
|
@ -465,6 +467,168 @@ bool isCompleteConfiguration(std::map<std::string, std::string> const& options)
|
|||
options.count(p + "storage_engine") == 1;
|
||||
}
|
||||
|
||||
/*
|
||||
- Validates encryption and tenant mode configurations
|
||||
- During cluster creation (configure new) we allow the following:
|
||||
- If encryption mode is disabled/cluster_aware then any tenant mode is allowed
|
||||
- If the encryption mode is domain_aware then the only allowed tenant mode is required
|
||||
- During cluster configuration changes the following is allowed:
|
||||
- Encryption mode cannot be changed (can only be set during creation)
|
||||
- If the encryption mode is disabled/cluster_aware then any tenant mode changes are allowed
|
||||
- If the encryption mode is domain_aware then tenant mode changes are not allowed (as the only supported mode is
|
||||
required)
|
||||
*/
|
||||
bool isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration> oldConfiguration,
|
||||
std::map<std::string, std::string> newConfig,
|
||||
bool creating) {
|
||||
EncryptionAtRestMode encryptMode;
|
||||
TenantMode tenantMode;
|
||||
if (creating) {
|
||||
if (newConfig.count(encryptionAtRestModeConfKey.toString()) != 0) {
|
||||
encryptMode = EncryptionAtRestMode::fromValueRef(
|
||||
ValueRef(newConfig.find(encryptionAtRestModeConfKey.toString())->second));
|
||||
// check if the tenant mode is being set during configure new (otherwise assume tenants are disabled)
|
||||
if (newConfig.count(tenantModeConfKey.toString()) != 0) {
|
||||
tenantMode = TenantMode::fromValue(ValueRef(newConfig.find(tenantModeConfKey.toString())->second));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ASSERT(oldConfiguration.present());
|
||||
encryptMode = oldConfiguration.get().encryptionAtRestMode;
|
||||
if (newConfig.count(tenantModeConfKey.toString()) != 0) {
|
||||
tenantMode = TenantMode::fromValue(ValueRef(newConfig.find(tenantModeConfKey.toString())->second));
|
||||
} else {
|
||||
// Tenant mode and encryption mode didn't change
|
||||
return true;
|
||||
}
|
||||
}
|
||||
TraceEvent(SevDebug, "EncryptAndTenantModes")
|
||||
.detail("EncryptMode", encryptMode.toString())
|
||||
.detail("TenantMode", tenantMode.toString());
|
||||
|
||||
if (encryptMode.mode == EncryptionAtRestMode::DOMAIN_AWARE && tenantMode != TenantMode::REQUIRED) {
|
||||
// For domain aware encryption only the required tenant mode is currently supported
|
||||
TraceEvent(SevWarnAlways, "InvalidEncryptAndTenantConfiguration")
|
||||
.detail("EncryptMode", encryptMode.toString())
|
||||
.detail("TenantMode", tenantMode.toString());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isTenantModeModeConfigValid(DatabaseConfiguration oldConfiguration, DatabaseConfiguration newConfiguration) {
|
||||
TenantMode oldTenantMode = oldConfiguration.tenantMode;
|
||||
TenantMode newTenantMode = newConfiguration.tenantMode;
|
||||
TraceEvent(SevDebug, "TenantModes")
|
||||
.detail("OldTenantMode", oldTenantMode.toString())
|
||||
.detail("NewTenantMode", newTenantMode.toString());
|
||||
if (oldTenantMode != TenantMode::REQUIRED && newTenantMode == TenantMode::REQUIRED) {
|
||||
// TODO: Changing from optional/disabled to required tenant mode should be allowed if there is no non-tenant
|
||||
// data present
|
||||
TraceEvent(SevWarnAlways, "InvalidTenantConfiguration")
|
||||
.detail("OldTenantMode", oldTenantMode.toString())
|
||||
.detail("NewTenantMode", newTenantMode.toString());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_CASE("/ManagementAPI/ChangeConfig/TenantMode") {
|
||||
DatabaseConfiguration oldConfig;
|
||||
DatabaseConfiguration newConfig;
|
||||
std::vector<TenantMode> tenantModes = { TenantMode::DISABLED, TenantMode::OPTIONAL_TENANT, TenantMode::REQUIRED };
|
||||
// required tenant mode can change to any other tenant mode
|
||||
oldConfig.tenantMode = TenantMode::REQUIRED;
|
||||
newConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
|
||||
ASSERT(isTenantModeModeConfigValid(oldConfig, newConfig));
|
||||
// optional/disabled tenant mode can switch to optional/disabled tenant mode
|
||||
oldConfig.tenantMode = deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT;
|
||||
newConfig.tenantMode = deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT;
|
||||
ASSERT(isTenantModeModeConfigValid(oldConfig, newConfig));
|
||||
// optional/disabled tenant mode CANNOT switch to required tenant mode
|
||||
oldConfig.tenantMode = deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT;
|
||||
newConfig.tenantMode = TenantMode::REQUIRED;
|
||||
ASSERT(!isTenantModeModeConfigValid(oldConfig, newConfig));
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// unit test for changing encryption/tenant mode config options
|
||||
TEST_CASE("/ManagementAPI/ChangeConfig/TenantAndEncryptMode") {
|
||||
std::map<std::string, std::string> newConfig;
|
||||
std::string encryptModeKey = encryptionAtRestModeConfKey.toString();
|
||||
std::string tenantModeKey = tenantModeConfKey.toString();
|
||||
std::vector<TenantMode> tenantModes = { TenantMode::DISABLED, TenantMode::OPTIONAL_TENANT, TenantMode::REQUIRED };
|
||||
std::vector<EncryptionAtRestMode> encryptionModes = { EncryptionAtRestMode::DISABLED,
|
||||
EncryptionAtRestMode::CLUSTER_AWARE,
|
||||
EncryptionAtRestMode::DOMAIN_AWARE };
|
||||
// configure new test cases
|
||||
|
||||
// encryption disabled checks
|
||||
newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::DISABLED);
|
||||
newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
|
||||
|
||||
// cluster aware encryption checks
|
||||
newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::CLUSTER_AWARE);
|
||||
newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
|
||||
|
||||
// domain aware encryption checks
|
||||
newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::DOMAIN_AWARE);
|
||||
newConfig[tenantModeKey] =
|
||||
std::to_string(deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT);
|
||||
ASSERT(!isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
|
||||
newConfig[tenantModeKey] = std::to_string(TenantMode::REQUIRED);
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
|
||||
|
||||
// no encrypt mode present
|
||||
newConfig.erase(encryptModeKey);
|
||||
newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
|
||||
|
||||
// no tenant mode present
|
||||
newConfig.erase(tenantModeKey);
|
||||
newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::DOMAIN_AWARE);
|
||||
ASSERT(!isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
|
||||
newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::CLUSTER_AWARE);
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
|
||||
|
||||
// change config test cases
|
||||
DatabaseConfiguration oldConfig;
|
||||
|
||||
// encryption disabled checks
|
||||
oldConfig.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
|
||||
oldConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
|
||||
newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
|
||||
|
||||
// domain aware encryption checks
|
||||
oldConfig.encryptionAtRestMode = EncryptionAtRestMode::DOMAIN_AWARE;
|
||||
oldConfig.tenantMode = TenantMode::REQUIRED;
|
||||
newConfig[tenantModeKey] =
|
||||
std::to_string(deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT);
|
||||
ASSERT(!isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
|
||||
newConfig[tenantModeKey] = std::to_string(TenantMode::REQUIRED);
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
|
||||
|
||||
// cluster aware encryption checks
|
||||
oldConfig.encryptionAtRestMode = EncryptionAtRestMode::CLUSTER_AWARE;
|
||||
// required tenant mode can switch to any other tenant mode with cluster aware encryption
|
||||
oldConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
|
||||
newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
|
||||
|
||||
// no tenant mode present
|
||||
newConfig.erase(tenantModeKey);
|
||||
oldConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
|
||||
oldConfig.encryptionAtRestMode = deterministicRandom()->randomChoice(encryptionModes);
|
||||
ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Transaction* tr) {
|
||||
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
|
@ -962,6 +1126,14 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
|||
if (!disableConfigDB) {
|
||||
wait(verifyConfigurationDatabaseAlive(tr->getDatabase()));
|
||||
}
|
||||
if (BUGGIFY_WITH_PROB(0.1)) {
|
||||
// Introduce a random delay in simulation to allow processes to be
|
||||
// killed before previousCoordinatorKeys has been reset. This will
|
||||
// help test scenarios where the previous configuration database
|
||||
// state has been transferred to the new coordinators but the
|
||||
// broadcaster thinks it has not been transferred.
|
||||
wait(delay(deterministicRandom()->random01() * 10));
|
||||
}
|
||||
wait(resetPreviousCoordinatorsKey(tr->getDatabase()));
|
||||
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
||||
}
|
||||
|
|
|
@ -1548,17 +1548,19 @@ ThreadFuture<Void> MultiVersionTransaction::onError(Error const& e) {
|
|||
auto f = tr.transaction ? tr.transaction->onError(e) : makeTimeout<Void>();
|
||||
f = abortableFuture(f, tr.onChange);
|
||||
|
||||
return flatMapThreadFuture<Void, Void>(f, [this, e](ErrorOr<Void> ready) {
|
||||
if (!ready.isError() || ready.getError().code() != error_code_cluster_version_changed) {
|
||||
if (ready.isError()) {
|
||||
return ErrorOr<ThreadFuture<Void>>(ready.getError());
|
||||
}
|
||||
|
||||
return flatMapThreadFuture<Void, Void>(f, [this](ErrorOr<Void> ready) {
|
||||
if (ready.isError() && ready.getError().code() == error_code_cluster_version_changed) {
|
||||
// In case of a cluster version change, upgrade (or downgrade) the transaction
|
||||
// and let it to be retried independently of the original error
|
||||
updateTransaction();
|
||||
return ErrorOr<ThreadFuture<Void>>(Void());
|
||||
}
|
||||
// In all other cases forward the result of the inner onError call
|
||||
if (ready.isError()) {
|
||||
return ErrorOr<ThreadFuture<Void>>(ready.getError());
|
||||
} else {
|
||||
return ErrorOr<ThreadFuture<Void>>(Void());
|
||||
}
|
||||
|
||||
updateTransaction();
|
||||
return ErrorOr<ThreadFuture<Void>>(onError(e));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -2968,7 +2970,7 @@ ACTOR Future<std::string> updateClusterSharedStateMapImpl(MultiVersionApi* self,
|
|||
// The cluster ID will be the connection record string (either a filename or the connection string itself)
|
||||
// in versions before we could read the cluster ID.
|
||||
state std::string clusterId = connectionRecord.toString();
|
||||
if (dbProtocolVersion.hasClusterIdSpecialKey()) {
|
||||
if (CLIENT_KNOBS->CLIENT_ENABLE_USING_CLUSTER_ID_KEY && dbProtocolVersion.hasClusterIdSpecialKey()) {
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
|
|
|
@ -7110,11 +7110,11 @@ ACTOR Future<Void> readVersionBatcher(DatabaseContext* cx,
|
|||
state Reference<Histogram> batchIntervalDist =
|
||||
Histogram::getHistogram("GrvBatcher"_sr,
|
||||
"ClientGrvBatchInterval"_sr,
|
||||
Histogram::Unit::microseconds,
|
||||
Histogram::Unit::milliseconds,
|
||||
0,
|
||||
CLIENT_KNOBS->GRV_BATCH_TIMEOUT * 1000000 * 2);
|
||||
state Reference<Histogram> grvReplyLatencyDist =
|
||||
Histogram::getHistogram("GrvBatcher"_sr, "ClientGrvReplyLatency"_sr, Histogram::Unit::microseconds);
|
||||
Histogram::getHistogram("GrvBatcher"_sr, "ClientGrvReplyLatency"_sr, Histogram::Unit::milliseconds);
|
||||
state double lastRequestTime = now();
|
||||
|
||||
state TransactionTagMap<uint32_t> tags;
|
||||
|
@ -10732,12 +10732,13 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
|
|||
|
||||
// must be aligned to blob range(s)
|
||||
state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedBegin =
|
||||
getBlobRanges(&tr, KeyRangeRef(purgeRange.begin, purgeRange.begin), 2);
|
||||
getBlobRanges(&tr, KeyRangeRef(purgeRange.begin, keyAfter(purgeRange.begin)), 1);
|
||||
state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedEnd =
|
||||
getBlobRanges(&tr, KeyRangeRef(purgeRange.end, purgeRange.end), 2);
|
||||
getBlobRanges(&tr, KeyRangeRef(purgeRange.end, keyAfter(purgeRange.end)), 1);
|
||||
wait(success(blobbifiedBegin) && success(blobbifiedEnd));
|
||||
// If there are no blob ranges on the boundary that's okay as we allow purging of multiple full ranges.
|
||||
if ((!blobbifiedBegin.get().empty() && blobbifiedBegin.get().front().begin < purgeRange.begin) ||
|
||||
(!blobbifiedEnd.get().empty() && blobbifiedEnd.get().back().end > purgeRange.end)) {
|
||||
(!blobbifiedEnd.get().empty() && blobbifiedEnd.get().front().begin < purgeRange.end)) {
|
||||
TraceEvent("UnalignedPurge")
|
||||
.detail("Range", range)
|
||||
.detail("Version", purgeVersion)
|
||||
|
@ -10941,8 +10942,7 @@ ACTOR Future<bool> blobRestoreActor(Reference<DatabaseContext> cx, KeyRange rang
|
|||
return false; // stop if there is in-progress restore.
|
||||
}
|
||||
}
|
||||
Standalone<BlobRestoreStatus> status;
|
||||
status.progress = 0;
|
||||
BlobRestoreStatus status(BlobRestorePhase::INIT);
|
||||
Value newValue = blobRestoreCommandValueFor(status);
|
||||
tr->set(key, newValue);
|
||||
wait(tr->commit());
|
||||
|
|
|
@ -218,8 +218,12 @@ class GetGenerationQuorum {
|
|||
if (self->coordinatorsChangedFuture.isReady()) {
|
||||
throw coordinators_changed();
|
||||
}
|
||||
wait(delayJittered(std::clamp(
|
||||
0.005 * (1 << std::min(retries, 30)), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND)));
|
||||
if (deterministicRandom()->random01() < 0.95) {
|
||||
// Add some random jitter to prevent clients from
|
||||
// contending.
|
||||
wait(delayJittered(std::clamp(
|
||||
0.006 * (1 << std::min(retries, 30)), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND)));
|
||||
}
|
||||
if (deterministicRandom()->random01() < 0.05) {
|
||||
// Randomly inject a delay of at least the generation
|
||||
// reply timeout, to try to prevent contention between
|
||||
|
|
|
@ -855,7 +855,8 @@ const KeyRef JSONSchemas::statusSchema = R"statusSchema(
|
|||
"encryption_at_rest_mode": {
|
||||
"$enum":[
|
||||
"disabled",
|
||||
"aes_256_ctr"
|
||||
"domain_aware",
|
||||
"cluster_aware"
|
||||
]}
|
||||
},
|
||||
"consistency_scan_info":{
|
||||
|
@ -963,11 +964,18 @@ const KeyRef JSONSchemas::statusSchema = R"statusSchema(
|
|||
}
|
||||
}
|
||||
},
|
||||
"tenants":{
|
||||
"num_tenants":0
|
||||
},
|
||||
"metacluster" : {
|
||||
"cluster_type" : "standalone"
|
||||
"cluster_type" : "management",
|
||||
"metacluster_name":"metacluster1",
|
||||
"metacluster_id":12345,
|
||||
"data_cluster_name" : "data_cluster1",
|
||||
"data_cluster_id" : 12346,
|
||||
"num_data_clusters":10
|
||||
},
|
||||
"tenants":{
|
||||
"num_tenants":0,
|
||||
"num_tenant_groups":10,
|
||||
"tenant_group_capacity":20
|
||||
}
|
||||
},
|
||||
"client":{
|
||||
|
|
|
@ -301,6 +301,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( TENANT_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL, 10 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_USAGE_TRACE_INTERVAL, 300 );
|
||||
init( CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL, 5 ); if( randomize && BUGGIFY ) CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
|
||||
// TeamRemover
|
||||
|
@ -390,19 +391,22 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
// If true, do not process and store RocksDB logs
|
||||
init( ROCKSDB_MUTE_LOGS, true );
|
||||
// Use a smaller memtable in simulation to avoid OOMs.
|
||||
int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024;
|
||||
int64_t memtableBytes = isSimulated ? 1024 * 1024 : 512 * 1024 * 1024;
|
||||
init( ROCKSDB_MEMTABLE_BYTES, memtableBytes );
|
||||
init( ROCKSDB_LEVEL_STYLE_COMPACTION, true );
|
||||
init( ROCKSDB_UNSAFE_AUTO_FSYNC, false );
|
||||
init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 );
|
||||
init( ROCKSDB_PREFIX_LEN, 0 );
|
||||
// If rocksdb block cache size is 0, the default 8MB is used.
|
||||
int64_t blockCacheSize = isSimulated ? 0 : 1024 * 1024 * 1024 /* 1GB */;
|
||||
int64_t blockCacheSize = isSimulated ? 16 * 1024 * 1024 : 1024 * 1024 * 1024 /* 1GB */;
|
||||
init( ROCKSDB_BLOCK_CACHE_SIZE, blockCacheSize );
|
||||
init( ROCKSDB_METRICS_DELAY, 60.0 );
|
||||
init( ROCKSDB_READ_VALUE_TIMEOUT, isSimulated ? 5.0 : 200.0 );
|
||||
init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, isSimulated ? 5.0 : 200.0 );
|
||||
init( ROCKSDB_READ_RANGE_TIMEOUT, isSimulated ? 5.0 : 200.0 );
|
||||
// ROCKSDB_READ_VALUE_TIMEOUT, ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, ROCKSDB_READ_RANGE_TIMEOUT knobs:
|
||||
// In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have
|
||||
// very high load and single read thread cannot process all the load within the timeouts.
|
||||
init( ROCKSDB_READ_VALUE_TIMEOUT, 5.0 ); if (isSimulated) ROCKSDB_READ_VALUE_TIMEOUT = 5 * 60;
|
||||
init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, 5.0 ); if (isSimulated) ROCKSDB_READ_VALUE_PREFIX_TIMEOUT = 5 * 60;
|
||||
init( ROCKSDB_READ_RANGE_TIMEOUT, 5.0 ); if (isSimulated) ROCKSDB_READ_RANGE_TIMEOUT = 5 * 60;
|
||||
init( ROCKSDB_READ_QUEUE_WAIT, 1.0 );
|
||||
init( ROCKSDB_READ_QUEUE_HARD_MAX, 1000 );
|
||||
init( ROCKSDB_READ_QUEUE_SOFT_MAX, 500 );
|
||||
|
@ -436,6 +440,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT, 200000 ); // 200KB
|
||||
init( ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS, true ); if( randomize && BUGGIFY ) ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS = deterministicRandom()->coinflip();
|
||||
// ROCKSDB_STATS_LEVEL=1 indicates rocksdb::StatsLevel::kExceptHistogramOrTimers
|
||||
// Refer StatsLevel: https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L594
|
||||
init( ROCKSDB_STATS_LEVEL, 1 ); if( randomize && BUGGIFY ) ROCKSDB_STATS_LEVEL = deterministicRandom()->randomInt(0, 6);
|
||||
// Can commit will delay ROCKSDB_CAN_COMMIT_DELAY_ON_OVERLOAD seconds for
|
||||
// ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD times, if rocksdb overloaded.
|
||||
|
@ -555,7 +560,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BACKUP_TIMEOUT, 0.4 );
|
||||
init( BACKUP_NOOP_POP_DELAY, 5.0 );
|
||||
init( BACKUP_FILE_BLOCK_BYTES, 1024 * 1024 );
|
||||
init( BACKUP_LOCK_BYTES, 3e9 ); if(randomize && BUGGIFY) BACKUP_LOCK_BYTES = deterministicRandom()->randomInt(1024, 4096) * 15 * 1024;
|
||||
init( BACKUP_LOCK_BYTES, 3e9 ); if(randomize && BUGGIFY) BACKUP_LOCK_BYTES = deterministicRandom()->randomInt(1024, 4096) * 30 * 1024;
|
||||
init( BACKUP_UPLOAD_DELAY, 10.0 ); if(randomize && BUGGIFY) BACKUP_UPLOAD_DELAY = deterministicRandom()->random01() * 60;
|
||||
|
||||
//Cluster Controller
|
||||
|
|
|
@ -876,6 +876,7 @@ const KeyRef triggerDDTeamInfoPrintKey("\xff/triggerDDTeamInfoPrint"_sr);
|
|||
const KeyRef consistencyScanInfoKey = "\xff/consistencyScanInfo"_sr;
|
||||
|
||||
const KeyRef encryptionAtRestModeConfKey("\xff/conf/encryption_at_rest_mode"_sr);
|
||||
const KeyRef tenantModeConfKey("\xff/conf/tenant_mode"_sr);
|
||||
|
||||
const KeyRangeRef excludedServersKeys("\xff/conf/excluded/"_sr, "\xff/conf/excluded0"_sr);
|
||||
const KeyRef excludedServersPrefix = excludedServersKeys.begin;
|
||||
|
|
|
@ -355,21 +355,25 @@ Span& Span::operator=(Span&& o) {
|
|||
g_tracer->trace(*this);
|
||||
}
|
||||
arena = std::move(o.arena);
|
||||
context = o.context;
|
||||
parentContext = o.parentContext;
|
||||
begin = o.begin;
|
||||
end = o.end;
|
||||
location = o.location;
|
||||
links = std::move(o.links);
|
||||
events = std::move(o.events);
|
||||
status = o.status;
|
||||
kind = o.kind;
|
||||
o.context = SpanContext();
|
||||
o.parentContext = SpanContext();
|
||||
o.kind = SpanKind::INTERNAL;
|
||||
o.begin = 0.0;
|
||||
o.end = 0.0;
|
||||
o.status = SpanStatus::UNSET;
|
||||
// All memory referenced in *Ref fields of Span is now (potentially)
|
||||
// invalid, and o no longer has ownership of any memory referenced by *Ref
|
||||
// fields of o. We must ensure that o no longer references any memory it no
|
||||
// longer owns, and that *this no longer references any memory it no longer
|
||||
// owns. Not every field references arena memory, but this std::exchange
|
||||
// pattern provides a nice template for getting this right in a concise way
|
||||
// should we add more fields to Span.
|
||||
|
||||
attributes = std::exchange(o.attributes, decltype(o.attributes)());
|
||||
begin = std::exchange(o.begin, decltype(o.begin)());
|
||||
context = std::exchange(o.context, decltype(o.context)());
|
||||
end = std::exchange(o.end, decltype(o.end)());
|
||||
events = std::exchange(o.events, decltype(o.events)());
|
||||
kind = std::exchange(o.kind, decltype(o.kind)());
|
||||
links = std::exchange(o.links, decltype(o.links)());
|
||||
location = std::exchange(o.location, decltype(o.location)());
|
||||
parentContext = std::exchange(o.parentContext, decltype(o.parentContext)());
|
||||
status = std::exchange(o.status, decltype(o.status)());
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
|
@ -314,13 +314,19 @@ struct BlobManifest {
|
|||
};
|
||||
|
||||
// Defines blob restore status
|
||||
enum BlobRestorePhase { INIT = 0, LOAD_MANIFEST = 1, MANIFEST_DONE = 2, MIGRATE = 3, APPLY_MLOGS = 4, DONE = 5 };
|
||||
struct BlobRestoreStatus {
|
||||
constexpr static FileIdentifier file_identifier = 378657;
|
||||
BlobRestorePhase phase;
|
||||
int progress;
|
||||
|
||||
BlobRestoreStatus() : phase(BlobRestorePhase::INIT){};
|
||||
BlobRestoreStatus(BlobRestorePhase pha) : phase(pha), progress(0){};
|
||||
BlobRestoreStatus(BlobRestorePhase pha, int prog) : phase(pha), progress(prog){};
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, progress);
|
||||
serializer(ar, phase, progress);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -289,6 +289,7 @@ public:
|
|||
double METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY;
|
||||
double METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT;
|
||||
int TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantEntryCache is refreshed
|
||||
bool CLIENT_ENABLE_USING_CLUSTER_ID_KEY;
|
||||
|
||||
// Encryption-at-rest
|
||||
bool ENABLE_ENCRYPTION_CPU_TIME_LOGGING;
|
||||
|
|
|
@ -1464,7 +1464,7 @@ struct TenantMode {
|
|||
struct EncryptionAtRestMode {
|
||||
// These enumerated values are stored in the database configuration, so can NEVER be changed. Only add new ones
|
||||
// just before END.
|
||||
enum Mode { DISABLED = 0, AES_256_CTR = 1, END = 2 };
|
||||
enum Mode { DISABLED = 0, DOMAIN_AWARE = 1, CLUSTER_AWARE = 2, END = 3 };
|
||||
|
||||
EncryptionAtRestMode() : mode(DISABLED) {}
|
||||
EncryptionAtRestMode(Mode mode) : mode(mode) {
|
||||
|
@ -1483,14 +1483,30 @@ struct EncryptionAtRestMode {
|
|||
switch (mode) {
|
||||
case DISABLED:
|
||||
return "disabled";
|
||||
case AES_256_CTR:
|
||||
return "aes_256_ctr";
|
||||
case DOMAIN_AWARE:
|
||||
return "domain_aware";
|
||||
case CLUSTER_AWARE:
|
||||
return "cluster_aware";
|
||||
default:
|
||||
ASSERT(false);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static EncryptionAtRestMode fromString(std::string mode) {
|
||||
if (mode == "disabled") {
|
||||
return EncryptionAtRestMode::DISABLED;
|
||||
} else if (mode == "cluster_aware") {
|
||||
return EncryptionAtRestMode::CLUSTER_AWARE;
|
||||
} else if (mode == "domain_aware") {
|
||||
return EncryptionAtRestMode::DOMAIN_AWARE;
|
||||
} else {
|
||||
TraceEvent(SevError, "UnknownEncryptMode").detail("EncryptMode", mode);
|
||||
ASSERT(false);
|
||||
throw internal_error();
|
||||
}
|
||||
}
|
||||
|
||||
Value toValue() const { return ValueRef(format("%d", (int)mode)); }
|
||||
|
||||
bool isEquals(const EncryptionAtRestMode& e) const { return this->mode == e.mode; }
|
||||
|
|
|
@ -133,6 +133,11 @@ bool isCompleteConfiguration(std::map<std::string, std::string> const& options);
|
|||
|
||||
ConfigureAutoResult parseConfig(StatusObject const& status);
|
||||
|
||||
bool isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration> oldConfiguration,
|
||||
std::map<std::string, std::string> newConfig,
|
||||
bool creating);
|
||||
bool isTenantModeModeConfigValid(DatabaseConfiguration oldConfiguration, DatabaseConfiguration newConfiguration);
|
||||
|
||||
// Management API written in template code to support both IClientAPI and NativeAPI
|
||||
namespace ManagementAPI {
|
||||
|
||||
|
@ -276,6 +281,9 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
|
|||
if (!isCompleteConfiguration(m)) {
|
||||
return ConfigurationResult::INCOMPLETE_CONFIGURATION;
|
||||
}
|
||||
if (!isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), m, creating)) {
|
||||
return ConfigurationResult::INVALID_CONFIGURATION;
|
||||
}
|
||||
} else if (m.count(encryptionAtRestModeConfKey.toString()) != 0) {
|
||||
// Encryption data at-rest mode can be set only at the time of database creation
|
||||
return ConfigurationResult::ENCRYPTION_AT_REST_MODE_ALREADY_SET;
|
||||
|
@ -322,6 +330,12 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
|
|||
if (!newConfig.isValid()) {
|
||||
return ConfigurationResult::INVALID_CONFIGURATION;
|
||||
}
|
||||
if (!isEncryptionAtRestModeConfigValid(oldConfig, m, creating)) {
|
||||
return ConfigurationResult::INVALID_CONFIGURATION;
|
||||
}
|
||||
if (!isTenantModeModeConfigValid(oldConfig, newConfig)) {
|
||||
return ConfigurationResult::INVALID_CONFIGURATION;
|
||||
}
|
||||
|
||||
if (newConfig.tLogPolicy->attributeKeys().count("dcid") && newConfig.regions.size() > 0) {
|
||||
return ConfigurationResult::REGION_REPLICATION_MISMATCH;
|
||||
|
|
|
@ -244,6 +244,8 @@ public:
|
|||
// in the TenantCache
|
||||
int TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL; // How often the storage quota allocated to each tenant is
|
||||
// refreshed in the TenantCache
|
||||
int TENANT_CACHE_STORAGE_USAGE_TRACE_INTERVAL; // The minimum interval between consecutive trace events logging the
|
||||
// storage bytes used by a tenant group
|
||||
int CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL; // How often the commit proxies send requests to the data
|
||||
// distributor to fetch the list of tenants over storage quota
|
||||
|
||||
|
@ -313,7 +315,7 @@ public:
|
|||
// KeyValueStoreRocksDB
|
||||
bool ROCKSDB_SET_READ_TIMEOUT;
|
||||
bool ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES;
|
||||
int ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE;
|
||||
bool ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE;
|
||||
int ROCKSDB_READ_RANGE_ROW_LIMIT;
|
||||
int ROCKSDB_READER_THREAD_PRIORITY;
|
||||
int ROCKSDB_WRITER_THREAD_PRIORITY;
|
||||
|
|
|
@ -284,6 +284,9 @@ extern const KeyRef triggerDDTeamInfoPrintKey;
|
|||
// Encryption data at-rest config key
|
||||
extern const KeyRef encryptionAtRestModeConfKey;
|
||||
|
||||
// Tenant mode config key
|
||||
extern const KeyRef tenantModeConfKey;
|
||||
|
||||
// The differences between excluded and failed can be found in "command-line-interface.rst"
|
||||
// and in the help message of the fdbcli command "exclude".
|
||||
|
||||
|
|
|
@ -235,7 +235,6 @@ struct TenantNameUniqueSet {
|
|||
return tenantNames.empty();
|
||||
}
|
||||
};
|
||||
|
||||
class TenantPrefixIndex : public VersionedMap<Key, TenantNameUniqueSet>, public ReferenceCounted<TenantPrefixIndex> {};
|
||||
typedef VersionedMap<Key, TenantNameUniqueSet> TenantPrefixIndex;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -243,7 +243,7 @@ ACTOR Future<Void> read_http_response(Reference<HTTP::Response> r, Reference<ICo
|
|||
|
||||
auto i = r->headers.find("Content-Length");
|
||||
if (i != r->headers.end())
|
||||
r->contentLen = atoi(i->second.c_str());
|
||||
r->contentLen = strtoll(i->second.c_str(), NULL, 10);
|
||||
else
|
||||
r->contentLen = -1; // Content length unknown
|
||||
|
||||
|
@ -481,7 +481,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
}
|
||||
|
||||
if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 0) {
|
||||
printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n",
|
||||
printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %lld]\n",
|
||||
conn->getDebugID().toString().c_str(),
|
||||
(err.present() ? format("*ERROR*=%s ", err.get().name()).c_str() : ""),
|
||||
r->code,
|
||||
|
@ -491,7 +491,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
resource.c_str(),
|
||||
contentLen,
|
||||
total_sent,
|
||||
(int)r->contentLen);
|
||||
r->contentLen);
|
||||
}
|
||||
if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
|
||||
printf("[%s] HTTP RESPONSE: %s %s\n%s\n",
|
||||
|
|
|
@ -102,7 +102,7 @@ public:
|
|||
// If not found, start the read.
|
||||
if (i == f->m_blocks.end() || (i->second.isValid() && i->second.isError())) {
|
||||
// printf("starting read of %s block %d\n", f->getFilename().c_str(), blockNum);
|
||||
fblock = readBlock(f.getPtr(), f->m_block_size, f->m_block_size * blockNum);
|
||||
fblock = readBlock(f.getPtr(), f->m_block_size, (int64_t)f->m_block_size * blockNum);
|
||||
f->m_blocks[blockNum] = fblock;
|
||||
} else
|
||||
fblock = i->second;
|
||||
|
@ -121,7 +121,7 @@ public:
|
|||
// Calculate the block-relative read range. It's a given that the offset / length range touches this block
|
||||
// so readStart will never be greater than blocksize (though it could be past the actual end of a short
|
||||
// block).
|
||||
int64_t blockStart = blockNum * f->m_block_size;
|
||||
int64_t blockStart = (int64_t)blockNum * f->m_block_size;
|
||||
int64_t readStart = std::max<int64_t>(0, offset - blockStart);
|
||||
int64_t readEnd = std::min<int64_t>(f->m_block_size, offset + length - blockStart);
|
||||
int rlen = readEnd - readStart;
|
||||
|
|
|
@ -56,7 +56,7 @@ public:
|
|||
FailDisk,
|
||||
RebootAndDelete,
|
||||
RebootProcessAndDelete,
|
||||
RebootProcessAndSwitch,
|
||||
RebootProcessAndSwitch, // Reboot and switch cluster file
|
||||
Reboot,
|
||||
RebootProcess,
|
||||
None
|
||||
|
|
|
@ -63,7 +63,8 @@ ISimulator::ISimulator()
|
|||
: desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1), usableRegions(1),
|
||||
allowLogSetKills(true), tssMode(TSSMode::Disabled), configDBType(ConfigDBType::DISABLED), isStopped(false),
|
||||
lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false),
|
||||
backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType), allSwapsDisabled(false) {}
|
||||
backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType), allSwapsDisabled(false),
|
||||
blobGranulesEnabled(false) {}
|
||||
ISimulator::~ISimulator() = default;
|
||||
|
||||
bool simulator_should_inject_fault(const char* context, const char* file, int line, int error_code) {
|
||||
|
|
|
@ -49,8 +49,8 @@ struct VersionedMessage {
|
|||
Arena decryptArena; // Arena used for decrypt buffer.
|
||||
size_t bytes; // arena's size when inserted, which can grow afterwards
|
||||
|
||||
VersionedMessage(LogMessageVersion v, StringRef m, const VectorRef<Tag>& t, const Arena& a)
|
||||
: version(v), message(m), tags(t), arena(a), bytes(a.getSize()) {}
|
||||
VersionedMessage(LogMessageVersion v, StringRef m, const VectorRef<Tag>& t, const Arena& a, size_t n)
|
||||
: version(v), message(m), tags(t), arena(a), bytes(n) {}
|
||||
Version getVersion() const { return version.version; }
|
||||
uint32_t getSubVersion() const { return version.sub; }
|
||||
|
||||
|
@ -977,15 +977,17 @@ ACTOR Future<Void> pullAsyncData(BackupData* self) {
|
|||
// Note we aggressively peek (uncommitted) messages, but only committed
|
||||
// messages/mutations will be flushed to disk/blob in uploadData().
|
||||
while (r->hasMessage()) {
|
||||
state size_t takeBytes = 0;
|
||||
if (!prev.sameArena(r->arena())) {
|
||||
TraceEvent(SevDebugMemory, "BackupWorkerMemory", self->myId)
|
||||
.detail("Take", r->arena().getSize())
|
||||
.detail("Current", self->lock->activePermits());
|
||||
|
||||
wait(self->lock->take(TaskPriority::DefaultYield, r->arena().getSize()));
|
||||
takeBytes = r->arena().getSize(); // more bytes can be allocated after the wait.
|
||||
wait(self->lock->take(TaskPriority::DefaultYield, takeBytes));
|
||||
prev = r->arena();
|
||||
}
|
||||
self->messages.emplace_back(r->version(), r->getMessage(), r->getTags(), r->arena());
|
||||
self->messages.emplace_back(r->version(), r->getMessage(), r->getTags(), r->arena(), takeBytes);
|
||||
r->nextMessage();
|
||||
}
|
||||
|
||||
|
|
|
@ -3547,10 +3547,16 @@ ACTOR Future<Void> recoverBlobManager(Reference<BlobManagerData> bmData) {
|
|||
bool isFullRestore = wait(isFullRestoreMode(bmData->db, normalKeys));
|
||||
bmData->isFullRestoreMode = isFullRestore;
|
||||
if (bmData->isFullRestoreMode) {
|
||||
BlobRestoreStatus initStatus(BlobRestorePhase::LOAD_MANIFEST);
|
||||
wait(updateRestoreStatus(bmData->db, normalKeys, initStatus));
|
||||
|
||||
wait(loadManifest(bmData->db, bmData->bstore));
|
||||
|
||||
int64_t epoc = wait(lastBlobEpoc(bmData->db, bmData->bstore));
|
||||
wait(updateEpoch(bmData, epoc + 1));
|
||||
|
||||
BlobRestoreStatus completedStatus(BlobRestorePhase::MANIFEST_DONE);
|
||||
wait(updateRestoreStatus(bmData->db, normalKeys, completedStatus));
|
||||
}
|
||||
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(bmData->db);
|
||||
|
|
|
@ -545,7 +545,7 @@ ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef keys) {
|
|||
KeyRange keyRange = decodeBlobRestoreCommandKeyFor(r.key);
|
||||
if (keyRange.contains(keys)) {
|
||||
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(r.value);
|
||||
return status.progress < 100; // progress is less than 100
|
||||
return status.phase < BlobRestorePhase::DONE;
|
||||
}
|
||||
}
|
||||
if (!ranges.more) {
|
||||
|
@ -563,3 +563,44 @@ ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef keys) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update restore status
|
||||
ACTOR Future<Void> updateRestoreStatus(Database db, KeyRangeRef range, BlobRestoreStatus status) {
|
||||
state Transaction tr(db);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
Key key = blobRestoreCommandKeyFor(range);
|
||||
Value value = blobRestoreCommandValueFor(status);
|
||||
tr.set(key, value);
|
||||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get restore status
|
||||
ACTOR Future<Optional<BlobRestoreStatus>> getRestoreStatus(Database db, KeyRangeRef range) {
|
||||
state Transaction tr(db);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
Key key = blobRestoreCommandKeyFor(range);
|
||||
Optional<Value> value = wait(tr.get(key));
|
||||
Optional<BlobRestoreStatus> result;
|
||||
if (value.present()) {
|
||||
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(value.get());
|
||||
result = status;
|
||||
}
|
||||
return result;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/BlobGranuleCommon.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/FastRef.h"
|
||||
#include "flow/IRandom.h"
|
||||
|
@ -75,8 +76,8 @@ private:
|
|||
// Check if blob manifest is loaded so that blob migration can start
|
||||
ACTOR static Future<Void> checkIfReadyForMigration(Reference<BlobMigrator> self) {
|
||||
loop {
|
||||
bool isFullRestore = wait(isFullRestoreMode(self->db_, normalKeys));
|
||||
if (isFullRestore) {
|
||||
Optional<BlobRestoreStatus> status = wait(getRestoreStatus(self->db_, normalKeys));
|
||||
if (canStartMigration(status)) {
|
||||
BlobGranuleRestoreVersionVector granules = wait(listBlobGranules(self->db_, self->blobConn_));
|
||||
if (!granules.empty()) {
|
||||
self->blobGranules_ = granules;
|
||||
|
@ -87,6 +88,9 @@ private:
|
|||
.detail("Version", granule.version)
|
||||
.detail("SizeInBytes", granule.sizeInBytes);
|
||||
}
|
||||
|
||||
BlobRestoreStatus status(BlobRestorePhase::MIGRATE, 0);
|
||||
wait(updateRestoreStatus(self->db_, normalKeys, status));
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
@ -94,6 +98,15 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
// Check if we should start migration. Migration can be started after manifest is fully loaded
|
||||
static bool canStartMigration(Optional<BlobRestoreStatus> status) {
|
||||
if (status.present()) {
|
||||
BlobRestoreStatus value = status.get();
|
||||
return value.phase == BlobRestorePhase::MANIFEST_DONE; // manifest is loaded successfully
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Prepare for data migration for given key range.
|
||||
ACTOR static Future<Void> prepare(Reference<BlobMigrator> self, KeyRangeRef keys) {
|
||||
// Register as a storage server, so that DataDistributor could start data movement after
|
||||
|
@ -120,8 +133,8 @@ private:
|
|||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
try {
|
||||
state Value value = keyServersValue(std::vector<UID>({ serverUID }), std::vector<UID>(), UID(), UID());
|
||||
wait(krmSetRange(&tr, keyServersPrefix, keys, value));
|
||||
wait(krmSetRange(&tr, serverKeysPrefixFor(serverUID), keys, serverKeysTrue));
|
||||
wait(krmSetRangeCoalescing(&tr, keyServersPrefix, keys, allKeys, value));
|
||||
wait(krmSetRangeCoalescing(&tr, serverKeysPrefixFor(serverUID), keys, allKeys, serverKeysTrue));
|
||||
wait(tr.commit());
|
||||
dprint("Assign {} to server {}\n", normalKeys.toString(), serverUID.toString());
|
||||
return Void();
|
||||
|
@ -152,7 +165,7 @@ private:
|
|||
}
|
||||
}
|
||||
if (owning) {
|
||||
wait(krmSetRange(&tr, serverKeysPrefixFor(id), keys, serverKeysFalse));
|
||||
wait(krmSetRangeCoalescing(&tr, serverKeysPrefixFor(id), keys, allKeys, serverKeysFalse));
|
||||
dprint("Unassign {} from storage server {}\n", keys.toString(), id.toString());
|
||||
TraceEvent("UnassignKeys").detail("Keys", keys.toString()).detail("From", id.toString());
|
||||
}
|
||||
|
@ -169,8 +182,12 @@ private:
|
|||
ACTOR static Future<Void> logProgress(Reference<BlobMigrator> self) {
|
||||
loop {
|
||||
bool done = wait(checkProgress(self));
|
||||
if (done)
|
||||
if (done) {
|
||||
BlobRestoreStatus status(BlobRestorePhase::DONE);
|
||||
wait(updateRestoreStatus(self->db_, normalKeys, status));
|
||||
|
||||
return Void();
|
||||
}
|
||||
wait(delay(SERVER_KNOBS->BLOB_MIGRATOR_CHECK_INTERVAL));
|
||||
}
|
||||
}
|
||||
|
@ -205,7 +222,8 @@ private:
|
|||
state bool done = incompleted == 0;
|
||||
dprint("Migration progress :{}%. done {}\n", progress, done);
|
||||
TraceEvent("BlobMigratorProgress").detail("Progress", progress).detail("Done", done);
|
||||
wait(updateProgress(self, normalKeys, progress));
|
||||
BlobRestoreStatus status(BlobRestorePhase::MIGRATE, progress);
|
||||
wait(updateRestoreStatus(self->db_, normalKeys, status));
|
||||
return done;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
|
@ -213,32 +231,6 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
// Update restore progress
|
||||
ACTOR static Future<Void> updateProgress(Reference<BlobMigrator> self, KeyRangeRef range, int progress) {
|
||||
state Transaction tr(self->db_);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
state Key key = blobRestoreCommandKeyFor(range);
|
||||
Optional<Value> value = wait(tr.get(key));
|
||||
if (value.present()) {
|
||||
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(value.get());
|
||||
if (progress > status.progress) {
|
||||
status.progress = progress;
|
||||
Value updatedValue = blobRestoreCommandValueFor(status);
|
||||
tr.set(key, updatedValue);
|
||||
wait(tr.commit());
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Advance version, so that future commits will have a larger version than the restored data
|
||||
ACTOR static Future<Void> advanceVersion(Reference<BlobMigrator> self) {
|
||||
state Transaction tr(self->db_);
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "fdbclient/BlobGranuleCommon.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
|
@ -2565,8 +2566,8 @@ ACTOR Future<Void> watchBlobRestoreCommand(ClusterControllerData* self) {
|
|||
Optional<Value> blobRestoreCommand = wait(tr->get(blobRestoreCommandKey));
|
||||
if (blobRestoreCommand.present()) {
|
||||
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(blobRestoreCommand.get());
|
||||
TraceEvent("WatchBlobRestoreCommand").detail("Progress", status.progress);
|
||||
if (status.progress == 0) {
|
||||
TraceEvent("WatchBlobRestoreCommand").detail("Progress", status.progress).detail("Phase", status.phase);
|
||||
if (status.phase == BlobRestorePhase::INIT) {
|
||||
self->db.blobRestoreEnabled.set(true);
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
const auto& blobManager = self->db.serverInfo->get().blobManager;
|
||||
|
|
|
@ -435,7 +435,7 @@ namespace {
|
|||
EncryptionAtRestMode getEncryptionAtRest() {
|
||||
// TODO: Use db-config encryption config to determine cluster encryption status
|
||||
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
|
||||
return EncryptionAtRestMode(EncryptionAtRestMode::Mode::AES_256_CTR);
|
||||
return EncryptionAtRestMode(EncryptionAtRestMode::Mode::DOMAIN_AWARE);
|
||||
} else {
|
||||
return EncryptionAtRestMode();
|
||||
}
|
||||
|
|
|
@ -2910,7 +2910,7 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
|
|||
ASSERT(commitData.resolvers.size() != 0);
|
||||
for (int i = 0; i < commitData.resolvers.size(); ++i) {
|
||||
commitData.stats.resolverDist.push_back(Histogram::getHistogram(
|
||||
"CommitProxy"_sr, "ToResolver_" + commitData.resolvers[i].id().toString(), Histogram::Unit::microseconds));
|
||||
"CommitProxy"_sr, "ToResolver_" + commitData.resolvers[i].id().toString(), Histogram::Unit::milliseconds));
|
||||
}
|
||||
|
||||
// Initialize keyResolvers map
|
||||
|
|
|
@ -92,10 +92,10 @@ class ConfigBroadcasterImpl {
|
|||
|
||||
// Used to read a snapshot from the previous coordinators after a change
|
||||
// coordinators command.
|
||||
Version maxLastSeenVersion = ::invalidVersion;
|
||||
Future<Optional<Value>> previousCoordinatorsFuture;
|
||||
std::unique_ptr<IConfigConsumer> previousCoordinatorsConsumer;
|
||||
Future<Void> previousCoordinatorsSnapshotFuture;
|
||||
Version largestConfigNodeVersion{ ::invalidVersion };
|
||||
|
||||
UID id;
|
||||
CounterCollection cc;
|
||||
|
@ -106,6 +106,7 @@ class ConfigBroadcasterImpl {
|
|||
Future<Void> logger;
|
||||
|
||||
int coordinators = 0;
|
||||
std::unordered_set<NetworkAddress> registeredConfigNodes;
|
||||
std::unordered_set<NetworkAddress> activeConfigNodes;
|
||||
std::unordered_set<NetworkAddress> registrationResponses;
|
||||
std::unordered_set<NetworkAddress> registrationResponsesUnregistered;
|
||||
|
@ -268,7 +269,7 @@ class ConfigBroadcasterImpl {
|
|||
// Ask the registering ConfigNode whether it has registered in the past.
|
||||
state ConfigBroadcastRegisteredReply reply = wait(
|
||||
brokenPromiseToNever(configBroadcastInterface.registered.getReply(ConfigBroadcastRegisteredRequest{})));
|
||||
self->maxLastSeenVersion = std::max(self->maxLastSeenVersion, reply.lastSeenVersion);
|
||||
self->largestConfigNodeVersion = std::max(self->largestConfigNodeVersion, reply.lastSeenVersion);
|
||||
state bool registered = reply.registered;
|
||||
TraceEvent("ConfigBroadcasterRegisterNodeReceivedRegistrationReply", self->id)
|
||||
.detail("Address", address)
|
||||
|
@ -302,6 +303,7 @@ class ConfigBroadcasterImpl {
|
|||
int nodesTillQuorum = self->coordinators / 2 + 1 - (int)self->activeConfigNodes.size();
|
||||
|
||||
if (registered) {
|
||||
self->registeredConfigNodes.insert(address);
|
||||
self->activeConfigNodes.insert(address);
|
||||
self->disallowUnregistered = true;
|
||||
} else if ((self->activeConfigNodes.size() < self->coordinators / 2 + 1 && !self->disallowUnregistered) ||
|
||||
|
@ -365,6 +367,52 @@ class ConfigBroadcasterImpl {
|
|||
|
||||
state bool sendSnapshot =
|
||||
self->previousCoordinatorsConsumer && reply.lastSeenVersion <= self->mostRecentVersion;
|
||||
|
||||
// If a coordinator change is ongoing, a quorum of ConfigNodes are
|
||||
// already registered and the largest version at least one of those
|
||||
// ConfigNodes knows about is greater than the version of the latest
|
||||
// snapshot the broadcaster has, don't send a snapshot to any
|
||||
// ConfigNodes. This could end up overwriting committed data. Consider
|
||||
// the following scenario, with three ConfigNodes:
|
||||
//
|
||||
// T=0:
|
||||
// A: v5
|
||||
// T=1:
|
||||
// change coordinators, new coordinators are B, C, D
|
||||
// T=2:
|
||||
// B: v5, C: v5, D: v5
|
||||
// T=3:
|
||||
// B: v5, C: v10, D: v10
|
||||
// (some commits happen on only C and D)
|
||||
// (previousCoordinatorsKey has not been cleared yet)
|
||||
// T=4:
|
||||
// D dies and loses its data
|
||||
// T=5:
|
||||
// D starts
|
||||
// B: v5 (registered=yes), C: v10 (registered=yes), D: v0 (registered=no)
|
||||
// Broadcaster: has an old snapshot, only knows about v5
|
||||
// self->mostRecentVersion=5
|
||||
// T=6:
|
||||
// B, C, D (re-)register with broadcaster
|
||||
//
|
||||
// At T=5, the broadcaster would send snapshots to B and D because the
|
||||
// largest version they know about (5) is less than or equal to
|
||||
// self->mostRecentVersion (5). But this would cause a majority of
|
||||
// nodes to think v5 is the latest committed version, causing C to be
|
||||
// rolled back, and losing commit data between versions 5 and 10.
|
||||
//
|
||||
// This is a special case where the coordinators are being changed.
|
||||
// During a coordinator change, a majority of ConfigNodes being
|
||||
// registered means the coordinator change already took place, and it
|
||||
// is being retried due to some failure. In that case, we don't want to
|
||||
// resend snapshots if a majority of the new ConfigNodes are
|
||||
// registered, because they could have been accepting commits. Instead,
|
||||
// let the rollback/rollforward algorithm update the out of date nodes.
|
||||
if (self->previousCoordinatorsConsumer && self->largestConfigNodeVersion > self->mostRecentVersion &&
|
||||
self->registeredConfigNodes.size() >= self->coordinators / 2 + 1) {
|
||||
sendSnapshot = false;
|
||||
}
|
||||
|
||||
// Unregistered nodes need to wait for either:
|
||||
// 1. A quorum of registered nodes to register and send their
|
||||
// snapshots, so the unregistered nodes can be rolled forward, or
|
||||
|
|
|
@ -234,10 +234,13 @@ class ConfigNodeImpl {
|
|||
req.reply.sendError(process_behind()); // Reuse the process_behind error
|
||||
return Void();
|
||||
}
|
||||
if (BUGGIFY) {
|
||||
wait(delay(deterministicRandom()->random01() * 2));
|
||||
}
|
||||
state Standalone<VectorRef<VersionedConfigMutationRef>> versionedMutations =
|
||||
wait(getMutations(self, req.lastSeenVersion + 1, committedVersion));
|
||||
wait(getMutations(self, req.lastSeenVersion + 1, req.mostRecentVersion));
|
||||
state Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> versionedAnnotations =
|
||||
wait(getAnnotations(self, req.lastSeenVersion + 1, committedVersion));
|
||||
wait(getAnnotations(self, req.lastSeenVersion + 1, req.mostRecentVersion));
|
||||
TraceEvent(SevInfo, "ConfigNodeSendingChanges", self->id)
|
||||
.detail("ReqLastSeenVersion", req.lastSeenVersion)
|
||||
.detail("ReqMostRecentVersion", req.mostRecentVersion)
|
||||
|
@ -245,7 +248,7 @@ class ConfigNodeImpl {
|
|||
.detail("NumMutations", versionedMutations.size())
|
||||
.detail("NumCommits", versionedAnnotations.size());
|
||||
++self->successfulChangeRequests;
|
||||
req.reply.send(ConfigFollowerGetChangesReply{ committedVersion, versionedMutations, versionedAnnotations });
|
||||
req.reply.send(ConfigFollowerGetChangesReply{ versionedMutations, versionedAnnotations });
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -520,6 +523,18 @@ class ConfigNodeImpl {
|
|||
ObjectReader::fromStringRef<KnobValue>(kv.value, IncludeVersion());
|
||||
}
|
||||
wait(store(reply.snapshotVersion, getLastCompactedVersion(self)));
|
||||
if (req.mostRecentVersion < reply.snapshotVersion) {
|
||||
// The version in the request can be less than the last compacted
|
||||
// version in certain circumstances where the coordinators are
|
||||
// being changed and the consumer reads the latest committed
|
||||
// version from a majority of ConfigNodes before they have received
|
||||
// up to date snapshots. This should be fine, it just means the
|
||||
// consumer needs to fetch the latest version and retry its
|
||||
// request.
|
||||
CODE_PROBE(true, "ConfigNode ahead of consumer", probe::decoration::rare);
|
||||
req.reply.sendError(version_already_compacted());
|
||||
return Void();
|
||||
}
|
||||
wait(store(reply.changes, getMutations(self, reply.snapshotVersion + 1, req.mostRecentVersion)));
|
||||
wait(store(reply.annotations, getAnnotations(self, reply.snapshotVersion + 1, req.mostRecentVersion)));
|
||||
TraceEvent(SevInfo, "ConfigNodeGettingSnapshot", self->id)
|
||||
|
|
|
@ -1548,14 +1548,20 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
if (enableShardMove && tciIndex == 1) {
|
||||
ASSERT(physicalShardIDCandidate != UID().first() &&
|
||||
physicalShardIDCandidate != anonymousShardId.first());
|
||||
Optional<ShardsAffectedByTeamFailure::Team> remoteTeamWithPhysicalShard =
|
||||
std::pair<Optional<ShardsAffectedByTeamFailure::Team>, bool> remoteTeamWithPhysicalShard =
|
||||
self->physicalShardCollection->tryGetAvailableRemoteTeamWith(
|
||||
physicalShardIDCandidate, metrics, debugID);
|
||||
// TODO: when we know that `physicalShardIDCandidate` exists, remote team must also exists.
|
||||
if (remoteTeamWithPhysicalShard.present()) {
|
||||
if (!remoteTeamWithPhysicalShard.second) {
|
||||
// Physical shard with `physicalShardIDCandidate` is not available. Retry selecting new
|
||||
// dst physical shard.
|
||||
self->retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]++;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
if (remoteTeamWithPhysicalShard.first.present()) {
|
||||
// Exists a remoteTeam in the mapping that has the physicalShardIDCandidate
|
||||
// use the remoteTeam with the physicalShard as the bestTeam
|
||||
req = GetTeamRequest(remoteTeamWithPhysicalShard.get().servers);
|
||||
req = GetTeamRequest(remoteTeamWithPhysicalShard.first.get().servers);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1853,19 +1859,35 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
state Error error = success();
|
||||
state Promise<Void> dataMovementComplete;
|
||||
// Move keys from source to destination by changing the serverKeyList and keyServerList system keys
|
||||
state Future<Void> doMoveKeys =
|
||||
self->txnProcessor->moveKeys(MoveKeysParams{ rd.dataMoveId,
|
||||
rd.keys,
|
||||
destIds,
|
||||
healthyIds,
|
||||
self->lock,
|
||||
dataMovementComplete,
|
||||
&self->startMoveKeysParallelismLock,
|
||||
&self->finishMoveKeysParallelismLock,
|
||||
self->teamCollections.size() > 1,
|
||||
relocateShardInterval.pairID,
|
||||
ddEnabledState,
|
||||
CancelConflictingDataMoves::False });
|
||||
std::unique_ptr<MoveKeysParams> params;
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
|
||||
std::vector<KeyRange>{ rd.keys },
|
||||
destIds,
|
||||
healthyIds,
|
||||
self->lock,
|
||||
dataMovementComplete,
|
||||
&self->startMoveKeysParallelismLock,
|
||||
&self->finishMoveKeysParallelismLock,
|
||||
self->teamCollections.size() > 1,
|
||||
relocateShardInterval.pairID,
|
||||
ddEnabledState,
|
||||
CancelConflictingDataMoves::False);
|
||||
} else {
|
||||
params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
|
||||
rd.keys,
|
||||
destIds,
|
||||
healthyIds,
|
||||
self->lock,
|
||||
dataMovementComplete,
|
||||
&self->startMoveKeysParallelismLock,
|
||||
&self->finishMoveKeysParallelismLock,
|
||||
self->teamCollections.size() > 1,
|
||||
relocateShardInterval.pairID,
|
||||
ddEnabledState,
|
||||
CancelConflictingDataMoves::False);
|
||||
}
|
||||
state Future<Void> doMoveKeys = self->txnProcessor->moveKeys(*params);
|
||||
state Future<Void> pollHealth =
|
||||
signalledTransferComplete ? Never()
|
||||
: delay(SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch);
|
||||
|
@ -1878,19 +1900,35 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
healthyIds.insert(healthyIds.end(), extraIds.begin(), extraIds.end());
|
||||
extraIds.clear();
|
||||
ASSERT(totalIds == destIds.size()); // Sanity check the destIDs before we move keys
|
||||
doMoveKeys =
|
||||
self->txnProcessor->moveKeys(MoveKeysParams{ rd.dataMoveId,
|
||||
rd.keys,
|
||||
destIds,
|
||||
healthyIds,
|
||||
self->lock,
|
||||
Promise<Void>(),
|
||||
&self->startMoveKeysParallelismLock,
|
||||
&self->finishMoveKeysParallelismLock,
|
||||
self->teamCollections.size() > 1,
|
||||
relocateShardInterval.pairID,
|
||||
ddEnabledState,
|
||||
CancelConflictingDataMoves::False });
|
||||
std::unique_ptr<MoveKeysParams> params;
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
|
||||
std::vector<KeyRange>{ rd.keys },
|
||||
destIds,
|
||||
healthyIds,
|
||||
self->lock,
|
||||
Promise<Void>(),
|
||||
&self->startMoveKeysParallelismLock,
|
||||
&self->finishMoveKeysParallelismLock,
|
||||
self->teamCollections.size() > 1,
|
||||
relocateShardInterval.pairID,
|
||||
ddEnabledState,
|
||||
CancelConflictingDataMoves::False);
|
||||
} else {
|
||||
params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
|
||||
rd.keys,
|
||||
destIds,
|
||||
healthyIds,
|
||||
self->lock,
|
||||
Promise<Void>(),
|
||||
&self->startMoveKeysParallelismLock,
|
||||
&self->finishMoveKeysParallelismLock,
|
||||
self->teamCollections.size() > 1,
|
||||
relocateShardInterval.pairID,
|
||||
ddEnabledState,
|
||||
CancelConflictingDataMoves::False);
|
||||
}
|
||||
doMoveKeys = self->txnProcessor->moveKeys(*params);
|
||||
} else {
|
||||
self->fetchKeysComplete.insert(rd);
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
|
|
|
@ -1756,7 +1756,7 @@ InOverSizePhysicalShard PhysicalShardCollection::isInOverSizePhysicalShard(KeyRa
|
|||
}
|
||||
|
||||
// May return a problematic remote team
|
||||
Optional<ShardsAffectedByTeamFailure::Team> PhysicalShardCollection::tryGetAvailableRemoteTeamWith(
|
||||
std::pair<Optional<ShardsAffectedByTeamFailure::Team>, bool> PhysicalShardCollection::tryGetAvailableRemoteTeamWith(
|
||||
uint64_t inputPhysicalShardID,
|
||||
StorageMetrics const& moveInMetrics,
|
||||
uint64_t debugID) {
|
||||
|
@ -1764,10 +1764,10 @@ Optional<ShardsAffectedByTeamFailure::Team> PhysicalShardCollection::tryGetAvail
|
|||
ASSERT(SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD);
|
||||
ASSERT(inputPhysicalShardID != anonymousShardId.first() && inputPhysicalShardID != UID().first());
|
||||
if (physicalShardInstances.count(inputPhysicalShardID) == 0) {
|
||||
return Optional<ShardsAffectedByTeamFailure::Team>();
|
||||
return { Optional<ShardsAffectedByTeamFailure::Team>(), true };
|
||||
}
|
||||
if (!checkPhysicalShardAvailable(inputPhysicalShardID, moveInMetrics)) {
|
||||
return Optional<ShardsAffectedByTeamFailure::Team>();
|
||||
return { Optional<ShardsAffectedByTeamFailure::Team>(), false };
|
||||
}
|
||||
for (auto team : physicalShardInstances[inputPhysicalShardID].teams) {
|
||||
if (team.primary == false) {
|
||||
|
@ -1777,10 +1777,12 @@ Optional<ShardsAffectedByTeamFailure::Team> PhysicalShardCollection::tryGetAvail
|
|||
.detail("TeamSize", team.servers.size())
|
||||
.detail("PhysicalShardsOfTeam", convertIDsToString(teamPhysicalShardIDs[team]))
|
||||
.detail("DebugID", debugID);*/
|
||||
return team;
|
||||
return { team, true };
|
||||
}
|
||||
}
|
||||
UNREACHABLE();
|
||||
// In this case, the physical shard may not be populated in the remote region yet, e.g., we are making a
|
||||
// configuration change to turn a single region cluster into HA mode.
|
||||
return { Optional<ShardsAffectedByTeamFailure::Team>(), true };
|
||||
}
|
||||
|
||||
// The update of PhysicalShardToTeams, Collection, keyRangePhysicalShardIDMap should be atomic
|
||||
|
|
|
@ -723,6 +723,17 @@ struct DDMockTxnProcessorImpl {
|
|||
return Void();
|
||||
}
|
||||
|
||||
static Future<Void> rawCheckFetchingState(DDMockTxnProcessor* self, const MoveKeysParams& params) {
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
ASSERT(params.ranges.present());
|
||||
// TODO: make startMoveShards work with multiple ranges.
|
||||
ASSERT(params.ranges.get().size() == 1);
|
||||
return checkFetchingState(self, params.destinationTeam, params.ranges.get().at(0));
|
||||
}
|
||||
ASSERT(params.keys.present());
|
||||
return checkFetchingState(self, params.destinationTeam, params.keys.get());
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> moveKeys(DDMockTxnProcessor* self, MoveKeysParams params) {
|
||||
state std::map<UID, StorageServerInterface> tssMapping;
|
||||
// Because SFBTF::Team requires the ID is ordered
|
||||
|
@ -732,7 +743,7 @@ struct DDMockTxnProcessorImpl {
|
|||
wait(self->rawStartMovement(params, tssMapping));
|
||||
ASSERT(tssMapping.empty());
|
||||
|
||||
wait(checkFetchingState(self, params.destinationTeam, params.keys));
|
||||
wait(rawCheckFetchingState(self, params));
|
||||
|
||||
wait(self->rawFinishMovement(params, tssMapping));
|
||||
if (!params.dataMovementComplete.isSet())
|
||||
|
@ -915,6 +926,16 @@ Future<std::vector<ProcessData>> DDMockTxnProcessor::getWorkers() const {
|
|||
ACTOR Future<Void> rawStartMovement(std::shared_ptr<MockGlobalState> mgs,
|
||||
MoveKeysParams params,
|
||||
std::map<UID, StorageServerInterface> tssMapping) {
|
||||
state KeyRange keys;
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
ASSERT(params.ranges.present());
|
||||
// TODO: make startMoveShards work with multiple ranges.
|
||||
ASSERT(params.ranges.get().size() == 1);
|
||||
keys = params.ranges.get().at(0);
|
||||
} else {
|
||||
ASSERT(params.keys.present());
|
||||
keys = params.keys.get();
|
||||
}
|
||||
// There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code
|
||||
// will always finish without coroutine switch.
|
||||
ASSERT(params.startMoveKeysParallelismLock->activePermits() == 0);
|
||||
|
@ -925,15 +946,15 @@ ACTOR Future<Void> rawStartMovement(std::shared_ptr<MockGlobalState> mgs,
|
|||
destTeams.emplace_back(params.destinationTeam, true);
|
||||
// invariant: the splitting and merge operation won't happen at the same moveKeys action. For example, if [a,c) [c,
|
||||
// e) exists, the params.keys won't be [b, d).
|
||||
auto intersectRanges = mgs->shardMapping->intersectingRanges(params.keys);
|
||||
auto intersectRanges = mgs->shardMapping->intersectingRanges(keys);
|
||||
// 1. splitting or just move a range. The new boundary need to be defined in startMovement
|
||||
if (intersectRanges.begin().range().contains(params.keys)) {
|
||||
mgs->shardMapping->defineShard(params.keys);
|
||||
if (intersectRanges.begin().range().contains(keys)) {
|
||||
mgs->shardMapping->defineShard(keys);
|
||||
}
|
||||
// 2. merge ops will coalesce the boundary in finishMovement;
|
||||
intersectRanges = mgs->shardMapping->intersectingRanges(params.keys);
|
||||
ASSERT(params.keys.begin == intersectRanges.begin().begin());
|
||||
ASSERT(params.keys.end == intersectRanges.end().begin());
|
||||
intersectRanges = mgs->shardMapping->intersectingRanges(keys);
|
||||
ASSERT(keys.begin == intersectRanges.begin().begin());
|
||||
ASSERT(keys.end == intersectRanges.end().begin());
|
||||
|
||||
for (auto it = intersectRanges.begin(); it != intersectRanges.end(); ++it) {
|
||||
auto teamPair = mgs->shardMapping->getTeamsFor(it->begin());
|
||||
|
@ -945,8 +966,8 @@ ACTOR Future<Void> rawStartMovement(std::shared_ptr<MockGlobalState> mgs,
|
|||
deterministicRandom()->randomInt64(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
for (auto& id : params.destinationTeam) {
|
||||
auto& server = mgs->allServers.at(id);
|
||||
server.setShardStatus(params.keys, MockShardStatus::INFLIGHT, mgs->restrictSize);
|
||||
server.signalFetchKeys(params.keys, randomRangeSize);
|
||||
server.setShardStatus(keys, MockShardStatus::INFLIGHT, mgs->restrictSize);
|
||||
server.signalFetchKeys(keys, randomRangeSize);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
@ -959,6 +980,17 @@ Future<Void> DDMockTxnProcessor::rawStartMovement(const MoveKeysParams& params,
|
|||
ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
|
||||
MoveKeysParams params,
|
||||
std::map<UID, StorageServerInterface> tssMapping) {
|
||||
state KeyRange keys;
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
ASSERT(params.ranges.present());
|
||||
// TODO: make startMoveShards work with multiple ranges.
|
||||
ASSERT(params.ranges.get().size() == 1);
|
||||
keys = params.ranges.get().at(0);
|
||||
} else {
|
||||
ASSERT(params.keys.present());
|
||||
keys = params.keys.get();
|
||||
}
|
||||
|
||||
// There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code
|
||||
// will always finish without coroutine switch.
|
||||
ASSERT(params.finishMoveKeysParallelismLock->activePermits() == 0);
|
||||
|
@ -966,7 +998,7 @@ ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
|
|||
state FlowLock::Releaser releaser(*params.finishMoveKeysParallelismLock);
|
||||
|
||||
// get source and dest teams
|
||||
auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsForFirstShard(params.keys);
|
||||
auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsForFirstShard(keys);
|
||||
|
||||
ASSERT_EQ(destTeams.size(), 1); // Will the multi-region or dynamic replica make destTeam.size() > 1?
|
||||
if (destTeams.front() != ShardsAffectedByTeamFailure::Team{ params.destinationTeam, true }) {
|
||||
|
@ -978,7 +1010,7 @@ ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
|
|||
}
|
||||
|
||||
for (auto& id : params.destinationTeam) {
|
||||
mgs->allServers.at(id).setShardStatus(params.keys, MockShardStatus::COMPLETED, mgs->restrictSize);
|
||||
mgs->allServers.at(id).setShardStatus(keys, MockShardStatus::COMPLETED, mgs->restrictSize);
|
||||
}
|
||||
|
||||
// remove destination servers from source servers
|
||||
|
@ -986,11 +1018,11 @@ ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
|
|||
for (auto& id : srcTeams.front().servers) {
|
||||
// the only caller moveKeys will always make sure the UID are sorted
|
||||
if (!std::binary_search(params.destinationTeam.begin(), params.destinationTeam.end(), id)) {
|
||||
mgs->allServers.at(id).removeShard(params.keys);
|
||||
mgs->allServers.at(id).removeShard(keys);
|
||||
}
|
||||
}
|
||||
mgs->shardMapping->finishMove(params.keys);
|
||||
mgs->shardMapping->defineShard(params.keys); // coalesce for merge
|
||||
mgs->shardMapping->finishMove(keys);
|
||||
mgs->shardMapping->defineShard(keys); // coalesce for merge
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
|
|
@ -134,9 +134,9 @@ struct GrvProxyStats {
|
|||
recentRequests(0), lastBucketBegin(now()),
|
||||
bucketInterval(FLOW_KNOBS->BASIC_LOAD_BALANCE_UPDATE_RATE / FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS),
|
||||
grvConfirmEpochLiveDist(
|
||||
Histogram::getHistogram("GrvProxy"_sr, "GrvConfirmEpochLive"_sr, Histogram::Unit::microseconds)),
|
||||
Histogram::getHistogram("GrvProxy"_sr, "GrvConfirmEpochLive"_sr, Histogram::Unit::milliseconds)),
|
||||
grvGetCommittedVersionRpcDist(
|
||||
Histogram::getHistogram("GrvProxy"_sr, "GrvGetCommittedVersionRpc"_sr, Histogram::Unit::microseconds)) {
|
||||
Histogram::getHistogram("GrvProxy"_sr, "GrvGetCommittedVersionRpc"_sr, Histogram::Unit::milliseconds)) {
|
||||
// The rate at which the limit(budget) is allowed to grow.
|
||||
specialCounter(cc, "SystemGRVQueueSize", [this]() { return this->systemGRVQueueSize; });
|
||||
specialCounter(cc, "DefaultGRVQueueSize", [this]() { return this->defaultGRVQueueSize; });
|
||||
|
|
|
@ -68,12 +68,9 @@
|
|||
|
||||
#ifdef SSD_ROCKSDB_EXPERIMENTAL
|
||||
|
||||
// Enforcing rocksdb version to be 6.27.3 or greater.
|
||||
static_assert(ROCKSDB_MAJOR >= 6, "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
|
||||
static_assert(ROCKSDB_MAJOR == 6 ? ROCKSDB_MINOR >= 27 : true,
|
||||
"Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
|
||||
static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 27) ? ROCKSDB_PATCH >= 3 : true,
|
||||
"Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
|
||||
// Enforcing rocksdb version to be 7.7.3.
|
||||
static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3),
|
||||
"Unsupported rocksdb version. Update the rocksdb to 7.7.3 version");
|
||||
|
||||
namespace {
|
||||
using rocksdb::BackgroundErrorReason;
|
||||
|
@ -901,6 +898,7 @@ ACTOR Future<Void> rocksDBMetricLogger(UID id,
|
|||
};
|
||||
|
||||
// To control the rocksdb::StatsLevel, use ROCKSDB_STATS_LEVEL knob.
|
||||
// Refer StatsLevel: https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L594
|
||||
state std::vector<std::pair<const char*, uint32_t>> histogramStats = {
|
||||
{ "CompactionTime", rocksdb::COMPACTION_TIME }, // enabled if rocksdb::StatsLevel > kExceptTimers(2)
|
||||
{ "CompactionCPUTime", rocksdb::COMPACTION_CPU_TIME }, // enabled if rocksdb::StatsLevel > kExceptTimers(2)
|
||||
|
@ -970,6 +968,7 @@ ACTOR Future<Void> rocksDBMetricLogger(UID id,
|
|||
}
|
||||
|
||||
// None of the histogramStats are enabled unless the ROCKSDB_STATS_LEVEL > kExceptHistogramOrTimers(1)
|
||||
// Refer StatsLevel: https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L594
|
||||
if (SERVER_KNOBS->ROCKSDB_STATS_LEVEL > rocksdb::kExceptHistogramOrTimers) {
|
||||
for (auto& [name, histogram] : histogramStats) {
|
||||
rocksdb::HistogramData histogram_data;
|
||||
|
@ -1031,7 +1030,10 @@ void logRocksDBError(UID id,
|
|||
Optional<Severity> sev = Optional<Severity>()) {
|
||||
Severity level = sev.present() ? sev.get() : (status.IsTimedOut() ? SevWarn : SevError);
|
||||
TraceEvent e(level, "RocksDBError", id);
|
||||
e.detail("Error", status.ToString()).detail("Method", method).detail("RocksDBSeverity", status.severity());
|
||||
e.setMaxFieldLength(10000)
|
||||
.detail("Error", status.ToString())
|
||||
.detail("Method", method)
|
||||
.detail("RocksDBSeverity", status.severity());
|
||||
if (status.IsIOError()) {
|
||||
e.detail("SubCode", status.subcode());
|
||||
}
|
||||
|
@ -1253,15 +1255,18 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
std::make_pair(ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM.toString(), commitBeginTime - a.startTime));
|
||||
}
|
||||
Standalone<VectorRef<KeyRangeRef>> deletes;
|
||||
DeleteVisitor dv(deletes, deletes.arena());
|
||||
rocksdb::Status s = a.batchToCommit->Iterate(&dv);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(id, s, "CommitDeleteVisitor");
|
||||
a.done.sendError(statusToError(s));
|
||||
return;
|
||||
if (SERVER_KNOBS->ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE) {
|
||||
DeleteVisitor dv(deletes, deletes.arena());
|
||||
rocksdb::Status s = a.batchToCommit->Iterate(&dv);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(id, s, "CommitDeleteVisitor");
|
||||
a.done.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
// If there are any range deletes, we should have added them to be deleted.
|
||||
ASSERT(!deletes.empty() || !a.batchToCommit->HasDeleteRange());
|
||||
}
|
||||
// If there are any range deletes, we should have added them to be deleted.
|
||||
ASSERT(!deletes.empty() || !a.batchToCommit->HasDeleteRange());
|
||||
|
||||
rocksdb::WriteOptions options;
|
||||
options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
|
||||
if (SERVER_KNOBS->ROCKSDB_DISABLE_WAL_EXPERIMENTAL) {
|
||||
|
@ -1275,7 +1280,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
// Request for batchToCommit bytes. If this request cannot be satisfied, the call is blocked.
|
||||
rateLimiter->Request(a.batchToCommit->GetDataSize() /* bytes */, rocksdb::Env::IO_HIGH);
|
||||
}
|
||||
s = db->Write(options, a.batchToCommit.get());
|
||||
rocksdb::Status s = db->Write(options, a.batchToCommit.get());
|
||||
readIterPool->update();
|
||||
double currTime = timer_monotonic();
|
||||
sharedState->dbWriteLatency.addMeasurement(currTime - writeBeginTime);
|
||||
|
@ -1402,17 +1407,11 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
ThreadReturnPromiseStream<std::pair<std::string, double>>* metricPromiseStream)
|
||||
: id(id), db(db), cf(cf), sharedState(sharedState), readIterPool(readIterPool),
|
||||
perfContextMetrics(perfContextMetrics), metricPromiseStream(metricPromiseStream), threadIndex(threadIndex) {
|
||||
if (g_network->isSimulated()) {
|
||||
// In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have
|
||||
// very high load and single read thread cannot process all the load within the timeouts.
|
||||
readValueTimeout = 5 * 60;
|
||||
readValuePrefixTimeout = 5 * 60;
|
||||
readRangeTimeout = 5 * 60;
|
||||
} else {
|
||||
readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT;
|
||||
readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT;
|
||||
readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT;
|
||||
}
|
||||
|
||||
readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT;
|
||||
readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT;
|
||||
readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT;
|
||||
|
||||
if (SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE) {
|
||||
// Enable perf context on the same thread with the db thread
|
||||
rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTimeExceptForMutex);
|
||||
|
@ -1792,39 +1791,39 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
|
||||
ACTOR Future<Void> updateHistogram(FutureStream<std::pair<std::string, double>> metricFutureStream) {
|
||||
state Reference<Histogram> commitLatencyHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> commitActionHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> commitQueueWaitHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> writeHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> deleteCompactRangeHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readRangeLatencyHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readValueLatencyHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readPrefixLatencyHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readRangeActionHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readValueActionHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readPrefixActionHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readRangeQueueWaitHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readValueQueueWaitHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readPrefixQueueWaitHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readRangeNewIteratorHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readValueGetHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
state Reference<Histogram> readPrefixGetHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
loop {
|
||||
choose {
|
||||
when(std::pair<std::string, double> measure = waitNext(metricFutureStream)) {
|
||||
|
|
|
@ -41,12 +41,9 @@
|
|||
|
||||
#ifdef SSD_ROCKSDB_EXPERIMENTAL
|
||||
|
||||
// Enforcing rocksdb version to be 6.27.3 or greater.
|
||||
static_assert(ROCKSDB_MAJOR >= 6, "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
|
||||
static_assert(ROCKSDB_MAJOR == 6 ? ROCKSDB_MINOR >= 27 : true,
|
||||
"Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
|
||||
static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 27) ? ROCKSDB_PATCH >= 3 : true,
|
||||
"Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
|
||||
// Enforcing rocksdb version to be 7.7.3.
|
||||
static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3),
|
||||
"Unsupported rocksdb version. Update the rocksdb to 7.7.3 version");
|
||||
|
||||
const std::string rocksDataFolderSuffix = "-data";
|
||||
const std::string METADATA_SHARD_ID = "kvs-metadata";
|
||||
|
@ -170,7 +167,10 @@ std::string getShardMappingKey(KeyRef key, StringRef prefix) {
|
|||
void logRocksDBError(const rocksdb::Status& status, const std::string& method) {
|
||||
auto level = status.IsTimedOut() ? SevWarn : SevError;
|
||||
TraceEvent e(level, "ShardedRocksDBError");
|
||||
e.detail("Error", status.ToString()).detail("Method", method).detail("ShardedRocksDBSeverity", status.severity());
|
||||
e.setMaxFieldLength(10000)
|
||||
.detail("Error", status.ToString())
|
||||
.detail("Method", method)
|
||||
.detail("ShardedRocksDBSeverity", status.severity());
|
||||
if (status.IsIOError()) {
|
||||
e.detail("SubCode", status.subcode());
|
||||
}
|
||||
|
@ -449,7 +449,8 @@ struct DataShard {
|
|||
// PhysicalShard represent a collection of logical shards. A PhysicalShard could have one or more DataShards. A
|
||||
// PhysicalShard is stored as a column family in rocksdb. Each PhysicalShard has its own iterator pool.
|
||||
struct PhysicalShard {
|
||||
PhysicalShard(rocksdb::DB* db, std::string id) : db(db), id(id), isInitialized(false) {}
|
||||
PhysicalShard(rocksdb::DB* db, std::string id, const rocksdb::ColumnFamilyOptions& options)
|
||||
: db(db), id(id), cfOptions(options), isInitialized(false) {}
|
||||
PhysicalShard(rocksdb::DB* db, std::string id, rocksdb::ColumnFamilyHandle* handle)
|
||||
: db(db), id(id), cf(handle), isInitialized(true) {
|
||||
ASSERT(cf);
|
||||
|
@ -460,7 +461,7 @@ struct PhysicalShard {
|
|||
if (cf) {
|
||||
return rocksdb::Status::OK();
|
||||
}
|
||||
auto status = db->CreateColumnFamily(getCFOptions(), id, &cf);
|
||||
auto status = db->CreateColumnFamily(cfOptions, id, &cf);
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "AddCF");
|
||||
return status;
|
||||
|
@ -516,6 +517,7 @@ struct PhysicalShard {
|
|||
|
||||
rocksdb::DB* db;
|
||||
std::string id;
|
||||
rocksdb::ColumnFamilyOptions cfOptions;
|
||||
rocksdb::ColumnFamilyHandle* cf = nullptr;
|
||||
std::unordered_map<std::string, std::unique_ptr<DataShard>> dataShards;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
|
@ -586,7 +588,8 @@ int readRangeInDb(PhysicalShard* shard, const KeyRangeRef range, int rowLimit, i
|
|||
// Manages physical shards and maintains logical shard mapping.
|
||||
class ShardManager {
|
||||
public:
|
||||
ShardManager(std::string path, UID logId) : path(path), logId(logId), dataShardMap(nullptr, specialKeys.end) {}
|
||||
ShardManager(std::string path, UID logId, const rocksdb::Options& options)
|
||||
: path(path), logId(logId), dbOptions(options), dataShardMap(nullptr, specialKeys.end) {}
|
||||
|
||||
ACTOR static Future<Void> shardMetricsLogger(std::shared_ptr<ShardedRocksDBState> rState,
|
||||
Future<Void> openFuture,
|
||||
|
@ -637,31 +640,31 @@ public:
|
|||
return Void();
|
||||
}
|
||||
|
||||
rocksdb::Status init(rocksdb::Options options) {
|
||||
rocksdb::Status init() {
|
||||
// Open instance.
|
||||
TraceEvent(SevInfo, "ShardedRocksShardManagerInitBegin", this->logId).detail("DataPath", path);
|
||||
std::vector<std::string> columnFamilies;
|
||||
rocksdb::Status status = rocksdb::DB::ListColumnFamilies(options, path, &columnFamilies);
|
||||
rocksdb::Status status = rocksdb::DB::ListColumnFamilies(dbOptions, path, &columnFamilies);
|
||||
|
||||
rocksdb::ColumnFamilyOptions cfOptions = getCFOptions();
|
||||
std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
|
||||
bool foundMetadata = false;
|
||||
for (const auto& name : columnFamilies) {
|
||||
if (name == METADATA_SHARD_ID) {
|
||||
foundMetadata = true;
|
||||
}
|
||||
descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ name, cfOptions });
|
||||
descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ name, rocksdb::ColumnFamilyOptions(dbOptions) });
|
||||
}
|
||||
|
||||
ASSERT(foundMetadata || descriptors.size() == 0);
|
||||
|
||||
// Add default column family if it's a newly opened database.
|
||||
if (descriptors.size() == 0) {
|
||||
descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ "default", cfOptions });
|
||||
descriptors.push_back(
|
||||
rocksdb::ColumnFamilyDescriptor{ "default", rocksdb::ColumnFamilyOptions(dbOptions) });
|
||||
}
|
||||
|
||||
std::vector<rocksdb::ColumnFamilyHandle*> handles;
|
||||
status = rocksdb::DB::Open(options, path, descriptors, &handles, &db);
|
||||
status = rocksdb::DB::Open(dbOptions, path, descriptors, &handles, &db);
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "Open");
|
||||
return status;
|
||||
|
@ -766,7 +769,8 @@ public:
|
|||
physicalShards[defaultShard->id] = defaultShard;
|
||||
|
||||
// Create metadata shard.
|
||||
auto metadataShard = std::make_shared<PhysicalShard>(db, METADATA_SHARD_ID);
|
||||
auto metadataShard =
|
||||
std::make_shared<PhysicalShard>(db, METADATA_SHARD_ID, rocksdb::ColumnFamilyOptions(dbOptions));
|
||||
metadataShard->init();
|
||||
columnFamilyMap[metadataShard->cf->GetID()] = metadataShard->cf;
|
||||
physicalShards[METADATA_SHARD_ID] = metadataShard;
|
||||
|
@ -832,7 +836,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
auto [it, inserted] = physicalShards.emplace(id, std::make_shared<PhysicalShard>(db, id));
|
||||
auto [it, inserted] = physicalShards.emplace(
|
||||
id, std::make_shared<PhysicalShard>(db, id, rocksdb::ColumnFamilyOptions(dbOptions)));
|
||||
std::shared_ptr<PhysicalShard>& shard = it->second;
|
||||
|
||||
activePhysicalShardIds.emplace(id);
|
||||
|
@ -1146,6 +1151,7 @@ public:
|
|||
private:
|
||||
const std::string path;
|
||||
const UID logId;
|
||||
rocksdb::Options dbOptions;
|
||||
rocksdb::DB* db = nullptr;
|
||||
std::unordered_map<std::string, std::shared_ptr<PhysicalShard>> physicalShards;
|
||||
std::unordered_set<std::string> activePhysicalShardIds;
|
||||
|
@ -1421,40 +1427,40 @@ RocksDBMetrics::RocksDBMetrics(UID debugID, std::shared_ptr<rocksdb::Statistics>
|
|||
}
|
||||
for (int i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; i++) {
|
||||
readRangeLatencyHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readValueLatencyHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readPrefixLatencyHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readRangeActionHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readValueActionHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readPrefixActionHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readRangeQueueWaitHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readValueQueueWaitHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readPrefixQueueWaitHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readRangeNewIteratorHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readValueGetHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
readPrefixGetHistograms.push_back(Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::microseconds));
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::milliseconds));
|
||||
}
|
||||
commitLatencyHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
commitActionHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
commitQueueWaitHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
writeHistogram =
|
||||
Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
deleteCompactRangeHistogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::milliseconds);
|
||||
}
|
||||
|
||||
void RocksDBMetrics::logStats(rocksdb::DB* db) {
|
||||
|
@ -1689,7 +1695,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
Future<Void> readyToStart,
|
||||
std::unordered_map<std::string, std::shared_ptr<PhysicalShard>>* physicalShards) {
|
||||
state Reference<Histogram> histogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, "TimeSpentRefreshIterators"_sr, Histogram::Unit::microseconds);
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, "TimeSpentRefreshIterators"_sr, Histogram::Unit::milliseconds);
|
||||
|
||||
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
|
||||
try {
|
||||
|
@ -1755,7 +1761,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
|
||||
struct OpenAction : TypedAction<Writer, OpenAction> {
|
||||
ShardManager* shardManager;
|
||||
rocksdb::Options dbOptions;
|
||||
ThreadReturnPromise<Void> done;
|
||||
Optional<Future<Void>>& metrics;
|
||||
const FlowLock* readLock;
|
||||
|
@ -1763,19 +1768,18 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
std::shared_ptr<RocksDBErrorListener> errorListener;
|
||||
|
||||
OpenAction(ShardManager* shardManager,
|
||||
rocksdb::Options dbOptions,
|
||||
Optional<Future<Void>>& metrics,
|
||||
const FlowLock* readLock,
|
||||
const FlowLock* fetchLock,
|
||||
std::shared_ptr<RocksDBErrorListener> errorListener)
|
||||
: shardManager(shardManager), dbOptions(dbOptions), metrics(metrics), readLock(readLock),
|
||||
fetchLock(fetchLock), errorListener(errorListener) {}
|
||||
: shardManager(shardManager), metrics(metrics), readLock(readLock), fetchLock(fetchLock),
|
||||
errorListener(errorListener) {}
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
};
|
||||
|
||||
void action(OpenAction& a) {
|
||||
auto status = a.shardManager->init(a.dbOptions);
|
||||
auto status = a.shardManager->init();
|
||||
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "Open");
|
||||
|
@ -1886,21 +1890,23 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
rocksdb::DB* db,
|
||||
std::vector<std::pair<uint32_t, KeyRange>>* deletes,
|
||||
bool sample) {
|
||||
DeleteVisitor dv(deletes);
|
||||
rocksdb::Status s = batch->Iterate(&dv);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "CommitDeleteVisitor");
|
||||
return s;
|
||||
}
|
||||
if (SERVER_KNOBS->ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE) {
|
||||
DeleteVisitor dv(deletes);
|
||||
rocksdb::Status s = batch->Iterate(&dv);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "CommitDeleteVisitor");
|
||||
return s;
|
||||
}
|
||||
|
||||
// If there are any range deletes, we should have added them to be deleted.
|
||||
ASSERT(!deletes->empty() || !batch->HasDeleteRange());
|
||||
// If there are any range deletes, we should have added them to be deleted.
|
||||
ASSERT(!deletes->empty() || !batch->HasDeleteRange());
|
||||
}
|
||||
|
||||
rocksdb::WriteOptions options;
|
||||
options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
|
||||
|
||||
double writeBeginTime = sample ? timer_monotonic() : 0;
|
||||
s = db->Write(options, batch);
|
||||
rocksdb::Status s = db->Write(options, batch);
|
||||
if (sample) {
|
||||
rocksDBMetrics->getWriteHistogram()->sampleSeconds(timer_monotonic() - writeBeginTime);
|
||||
}
|
||||
|
@ -2280,7 +2286,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
numReadWaiters(SERVER_KNOBS->ROCKSDB_READ_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
numFetchWaiters(SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_SOFT_MAX),
|
||||
errorListener(std::make_shared<RocksDBErrorListener>()), errorFuture(errorListener->getFuture()),
|
||||
shardManager(path, id), dbOptions(getOptions()),
|
||||
dbOptions(getOptions()), shardManager(path, id, dbOptions),
|
||||
rocksDBMetrics(std::make_shared<RocksDBMetrics>(id, dbOptions.statistics)) {
|
||||
// In simluation, run the reader/writer threads as Coro threads (i.e. in the network thread. The storage
|
||||
// engine is still multi-threaded as background compaction threads are still present. Reads/writes to disk
|
||||
|
@ -2347,7 +2353,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
// mapping data.
|
||||
} else {
|
||||
auto a = std::make_unique<Writer::OpenAction>(
|
||||
&shardManager, dbOptions, metrics, &readSemaphore, &fetchSemaphore, errorListener);
|
||||
&shardManager, metrics, &readSemaphore, &fetchSemaphore, errorListener);
|
||||
openFuture = a->done.getFuture();
|
||||
this->metrics = ShardManager::shardMetricsLogger(this->rState, openFuture, &shardManager) &&
|
||||
rocksDBAggregatedMetricsLogger(this->rState, openFuture, rocksDBMetrics, &shardManager);
|
||||
|
@ -2581,8 +2587,8 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
std::vector<std::pair<KeyRange, std::string>> getDataMapping() { return shardManager.getDataMapping(); }
|
||||
|
||||
std::shared_ptr<ShardedRocksDBState> rState;
|
||||
ShardManager shardManager;
|
||||
rocksdb::Options dbOptions;
|
||||
ShardManager shardManager;
|
||||
std::shared_ptr<RocksDBMetrics> rocksDBMetrics;
|
||||
std::string path;
|
||||
UID id;
|
||||
|
|
|
@ -138,7 +138,7 @@ struct LogRouterData {
|
|||
: dbgid(dbgid), logSystem(new AsyncVar<Reference<ILogSystem>>()), version(req.startVersion - 1), minPopped(0),
|
||||
startVersion(req.startVersion), minKnownCommittedVersion(0), poppedVersion(0), routerTag(req.routerTag),
|
||||
allowPops(false), foundEpochEnd(false), generation(req.recoveryCount),
|
||||
peekLatencyDist(Histogram::getHistogram("LogRouter"_sr, "PeekTLogLatency"_sr, Histogram::Unit::microseconds)),
|
||||
peekLatencyDist(Histogram::getHistogram("LogRouter"_sr, "PeekTLogLatency"_sr, Histogram::Unit::milliseconds)),
|
||||
cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc),
|
||||
getMoreBlockedCount("GetMoreBlockedCount", cc) {
|
||||
// setup just enough of a logSet to be able to call getPushLocations
|
||||
|
|
|
@ -375,7 +375,7 @@ bool LogPushData::writeTransactionInfo(int location, uint32_t subseq) {
|
|||
// parent->child.
|
||||
SpanContextMessage contextMessage;
|
||||
if (spanContext.isSampled()) {
|
||||
CODE_PROBE(true, "Converting OTELSpanContextMessage to traced SpanContextMessage", probe::decoration::rare);
|
||||
CODE_PROBE(true, "Converting OTELSpanContextMessage to traced SpanContextMessage");
|
||||
contextMessage = SpanContextMessage(UID(spanContext.traceID.first(), spanContext.traceID.second()));
|
||||
} else {
|
||||
CODE_PROBE(true, "Converting OTELSpanContextMessage to untraced SpanContextMessage");
|
||||
|
|
|
@ -1241,7 +1241,7 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
// Set dataMoves[dataMoveId] = DataMoveMetaData.
|
||||
ACTOR static Future<Void> startMoveShards(Database occ,
|
||||
UID dataMoveId,
|
||||
KeyRange keys,
|
||||
std::vector<KeyRange> ranges,
|
||||
std::vector<UID> servers,
|
||||
MoveKeysLock lock,
|
||||
FlowLock* startMoveKeysLock,
|
||||
|
@ -1257,8 +1257,11 @@ ACTOR static Future<Void> startMoveShards(Database occ,
|
|||
|
||||
TraceEvent(SevDebug, "StartMoveShardsBegin", relocationIntervalId)
|
||||
.detail("DataMoveID", dataMoveId)
|
||||
.detail("TargetRange", keys);
|
||||
.detail("TargetRange", describe(ranges));
|
||||
|
||||
// TODO: make startMoveShards work with multiple ranges.
|
||||
ASSERT(ranges.size() == 1);
|
||||
state KeyRangeRef keys = ranges[0];
|
||||
try {
|
||||
state Key begin = keys.begin;
|
||||
state KeyRange currentKeys = keys;
|
||||
|
@ -1576,7 +1579,7 @@ ACTOR static Future<Void> checkDataMoveComplete(Database occ, UID dataMoveId, Ke
|
|||
// Clear dataMoves[dataMoveId].
|
||||
ACTOR static Future<Void> finishMoveShards(Database occ,
|
||||
UID dataMoveId,
|
||||
KeyRange targetKeys,
|
||||
std::vector<KeyRange> targetRanges,
|
||||
std::vector<UID> destinationTeam,
|
||||
MoveKeysLock lock,
|
||||
FlowLock* finishMoveKeysParallelismLock,
|
||||
|
@ -1585,7 +1588,10 @@ ACTOR static Future<Void> finishMoveShards(Database occ,
|
|||
std::map<UID, StorageServerInterface> tssMapping,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
ASSERT(SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA);
|
||||
state KeyRange keys = targetKeys;
|
||||
|
||||
// TODO: make startMoveShards work with multiple ranges.
|
||||
ASSERT(targetRanges.size() == 1);
|
||||
state KeyRange keys = targetRanges[0];
|
||||
state Future<Void> warningLogger = logWarningAfter("FinishMoveShardsTooLong", 600, destinationTeam);
|
||||
state int retries = 0;
|
||||
state DataMoveMetaData dataMove;
|
||||
|
@ -1636,7 +1642,7 @@ ACTOR static Future<Void> finishMoveShards(Database occ,
|
|||
} else {
|
||||
TraceEvent(SevWarn, "FinishMoveShardsDataMoveDeleted", relocationIntervalId)
|
||||
.detail("DataMoveID", dataMoveId);
|
||||
wait(checkDataMoveComplete(occ, dataMoveId, targetKeys, relocationIntervalId));
|
||||
wait(checkDataMoveComplete(occ, dataMoveId, keys, relocationIntervalId));
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -2485,9 +2491,10 @@ Future<Void> rawStartMovement(Database occ,
|
|||
const MoveKeysParams& params,
|
||||
std::map<UID, StorageServerInterface>& tssMapping) {
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
ASSERT(params.ranges.present());
|
||||
return startMoveShards(std::move(occ),
|
||||
params.dataMoveId,
|
||||
params.keys,
|
||||
params.ranges.get(),
|
||||
params.destinationTeam,
|
||||
params.lock,
|
||||
params.startMoveKeysParallelismLock,
|
||||
|
@ -2495,8 +2502,9 @@ Future<Void> rawStartMovement(Database occ,
|
|||
params.ddEnabledState,
|
||||
params.cancelConflictingDataMoves);
|
||||
}
|
||||
ASSERT(params.keys.present());
|
||||
return startMoveKeys(std::move(occ),
|
||||
params.keys,
|
||||
params.keys.get(),
|
||||
params.destinationTeam,
|
||||
params.lock,
|
||||
params.startMoveKeysParallelismLock,
|
||||
|
@ -2505,13 +2513,37 @@ Future<Void> rawStartMovement(Database occ,
|
|||
params.ddEnabledState);
|
||||
}
|
||||
|
||||
Future<Void> rawCheckFetchingState(const Database& cx,
|
||||
const MoveKeysParams& params,
|
||||
const std::map<UID, StorageServerInterface>& tssMapping) {
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
ASSERT(params.ranges.present());
|
||||
// TODO: make startMoveShards work with multiple ranges.
|
||||
ASSERT(params.ranges.get().size() == 1);
|
||||
return checkFetchingState(cx,
|
||||
params.healthyDestinations,
|
||||
params.ranges.get().at(0),
|
||||
params.dataMovementComplete,
|
||||
params.relocationIntervalId,
|
||||
tssMapping);
|
||||
}
|
||||
ASSERT(params.keys.present());
|
||||
return checkFetchingState(cx,
|
||||
params.healthyDestinations,
|
||||
params.keys.get(),
|
||||
params.dataMovementComplete,
|
||||
params.relocationIntervalId,
|
||||
tssMapping);
|
||||
}
|
||||
|
||||
Future<Void> rawFinishMovement(Database occ,
|
||||
const MoveKeysParams& params,
|
||||
const std::map<UID, StorageServerInterface>& tssMapping) {
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
||||
ASSERT(params.ranges.present());
|
||||
return finishMoveShards(std::move(occ),
|
||||
params.dataMoveId,
|
||||
params.keys,
|
||||
params.ranges.get(),
|
||||
params.destinationTeam,
|
||||
params.lock,
|
||||
params.finishMoveKeysParallelismLock,
|
||||
|
@ -2520,8 +2552,9 @@ Future<Void> rawFinishMovement(Database occ,
|
|||
tssMapping,
|
||||
params.ddEnabledState);
|
||||
}
|
||||
ASSERT(params.keys.present());
|
||||
return finishMoveKeys(std::move(occ),
|
||||
params.keys,
|
||||
params.keys.get(),
|
||||
params.destinationTeam,
|
||||
params.lock,
|
||||
params.finishMoveKeysParallelismLock,
|
||||
|
@ -2539,12 +2572,7 @@ ACTOR Future<Void> moveKeys(Database occ, MoveKeysParams params) {
|
|||
|
||||
wait(rawStartMovement(occ, params, tssMapping));
|
||||
|
||||
state Future<Void> completionSignaller = checkFetchingState(occ,
|
||||
params.healthyDestinations,
|
||||
params.keys,
|
||||
params.dataMovementComplete,
|
||||
params.relocationIntervalId,
|
||||
tssMapping);
|
||||
state Future<Void> completionSignaller = rawCheckFetchingState(occ, params, tssMapping);
|
||||
|
||||
wait(rawFinishMovement(occ, params, tssMapping));
|
||||
|
||||
|
|
|
@ -98,7 +98,6 @@ TraceEvent debugTagsAndMessageEnabled(const char* context, Version version, Stri
|
|||
SpanContextMessage scm;
|
||||
br >> scm;
|
||||
} else if (OTELSpanContextMessage::startsOTELSpanContextMessage(mutationType)) {
|
||||
CODE_PROBE(true, "MutationTracking reading OTELSpanContextMessage", probe::decoration::rare);
|
||||
BinaryReader br(mutationData, AssumeVersion(rdr.protocolVersion()));
|
||||
OTELSpanContextMessage scm;
|
||||
br >> scm;
|
||||
|
|
|
@ -1633,7 +1633,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
TraceEvent("SharedTlog", tlogId).detail("Version", "4.6");
|
||||
|
||||
try {
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));
|
||||
wait(restorePersistentState(&self, locality));
|
||||
|
||||
self.sharedActors.send(cleanupPeekTrackers(&self));
|
||||
|
|
|
@ -1484,7 +1484,7 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
|
|||
self->largeDiskQueueCommitBytes.set(false);
|
||||
|
||||
wait(ioDegradedOrTimeoutError(
|
||||
c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION, "TLogCommit"));
|
||||
if (g_network->isSimulated() && !g_simulator->speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
|
||||
wait(delay(6.0));
|
||||
}
|
||||
|
@ -1701,7 +1701,7 @@ ACTOR Future<Void> initPersistentState(TLogData* self, Reference<LogData> logDat
|
|||
}
|
||||
|
||||
TraceEvent("TLogInitCommit", logData->logId).log();
|
||||
wait(ioTimeoutError(self->persistentData->commit(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
wait(ioTimeoutError(self->persistentData->commit(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogCommit"));
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -2801,13 +2801,13 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
TraceEvent("SharedTlog", tlogId).detail("Version", "6.0");
|
||||
|
||||
try {
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));
|
||||
|
||||
if (restoreFromDisk) {
|
||||
wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
|
||||
} else {
|
||||
wait(ioTimeoutError(checkEmptyQueue(&self) && checkRecovered(&self),
|
||||
SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
wait(ioTimeoutError(
|
||||
checkEmptyQueue(&self) && checkRecovered(&self), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));
|
||||
}
|
||||
|
||||
// Disk errors need a chance to kill this actor.
|
||||
|
|
|
@ -3291,7 +3291,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
TraceEvent("SharedTlog", tlogId).detail("Version", "6.2");
|
||||
|
||||
try {
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));
|
||||
|
||||
if (restoreFromDisk) {
|
||||
wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
|
||||
|
|
|
@ -487,12 +487,12 @@ class PaxosConfigConsumerImpl {
|
|||
.detail("LargestLiveVersion", self->getCommittedVersionQuorum.getLargestLive())
|
||||
.detail("SmallestCommitted", smallestCommitted);
|
||||
ASSERT_GE(committedVersion, self->lastSeenVersion);
|
||||
self->lastSeenVersion = committedVersion;
|
||||
self->lastSeenVersion = std::max(self->lastSeenVersion, committedVersion);
|
||||
self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
|
||||
broadcaster->applySnapshotAndChanges(std::move(reply.snapshot),
|
||||
reply.snapshotVersion,
|
||||
reply.changes,
|
||||
committedVersion,
|
||||
self->lastSeenVersion,
|
||||
reply.annotations,
|
||||
self->getCommittedVersionQuorum.getReadReplicas(),
|
||||
self->getCommittedVersionQuorum.getLargestLive(),
|
||||
|
@ -534,6 +534,13 @@ class PaxosConfigConsumerImpl {
|
|||
if (committedVersion > self->lastSeenVersion) {
|
||||
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1 ||
|
||||
self->getCommittedVersionQuorum.isSpecialZeroQuorum());
|
||||
if (BUGGIFY) {
|
||||
// Inject a random delay between getting the committed
|
||||
// version and reading any changes. The goal is to
|
||||
// allow attrition to occasionally kill ConfigNodes in
|
||||
// this in-between state.
|
||||
wait(delay(deterministicRandom()->random01() * 5));
|
||||
}
|
||||
state std::vector<ConfigFollowerInterface> readReplicas =
|
||||
self->getCommittedVersionQuorum.getReadReplicas();
|
||||
std::vector<Future<Void>> fs;
|
||||
|
@ -567,7 +574,7 @@ class PaxosConfigConsumerImpl {
|
|||
Version smallestCommitted = self->getCommittedVersionQuorum.getSmallestCommitted();
|
||||
self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
|
||||
broadcaster->applyChanges(reply.changes,
|
||||
committedVersion,
|
||||
self->lastSeenVersion,
|
||||
reply.annotations,
|
||||
self->getCommittedVersionQuorum.getReadReplicas());
|
||||
} else if (committedVersion == self->lastSeenVersion) {
|
||||
|
|
|
@ -43,9 +43,9 @@
|
|||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
#ifdef SSD_ROCKSDB_EXPERIMENTAL
|
||||
// Enforcing rocksdb version to be 6.22.1 or greater.
|
||||
static_assert(ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR >= 22 && ROCKSDB_PATCH >= 1,
|
||||
"Unsupported rocksdb version. Update the rocksdb to at least 6.22.1 version");
|
||||
// Enforcing rocksdb version to be 7.7.3.
|
||||
static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3),
|
||||
"Unsupported rocksdb version. Update the rocksdb to 7.7.3 version");
|
||||
|
||||
namespace {
|
||||
|
||||
|
|
|
@ -328,6 +328,13 @@ class TestConfig : public BasicTestConfig {
|
|||
if (attrib == "disableEncryption") {
|
||||
disableEncryption = strcmp(value.c_str(), "true") == 0;
|
||||
}
|
||||
if (attrib == "encryptModes") {
|
||||
std::stringstream ss(value);
|
||||
std::string token;
|
||||
while (std::getline(ss, token, ',')) {
|
||||
encryptModes.push_back(token);
|
||||
}
|
||||
}
|
||||
if (attrib == "restartInfoLocation") {
|
||||
isFirstTestInRestart = true;
|
||||
}
|
||||
|
@ -397,6 +404,9 @@ public:
|
|||
bool disableRemoteKVS = false;
|
||||
// 7.2 cannot be downgraded to 7.1 or below after enabling encryption-at-rest.
|
||||
bool disableEncryption = false;
|
||||
// By default, encryption mode is set randomly (based on the tenant mode)
|
||||
// If provided, set using EncryptionAtRestMode::fromString
|
||||
std::vector<std::string> encryptModes;
|
||||
// Storage Engine Types: Verify match with SimulationConfig::generateNormalConfig
|
||||
// 0 = "ssd"
|
||||
// 1 = "memory"
|
||||
|
@ -474,6 +484,7 @@ public:
|
|||
.add("disableHostname", &disableHostname)
|
||||
.add("disableRemoteKVS", &disableRemoteKVS)
|
||||
.add("disableEncryption", &disableEncryption)
|
||||
.add("encryptModes", &encryptModes)
|
||||
.add("simpleConfig", &simpleConfig)
|
||||
.add("generateFearless", &generateFearless)
|
||||
.add("datacenters", &datacenters)
|
||||
|
@ -1274,6 +1285,7 @@ ACTOR Future<Void> restartSimulatedSystem(std::vector<Future<Void>>* systemActor
|
|||
g_knobs.setKnob("remote_kv_store", KnobValueRef::create(bool{ false }));
|
||||
TraceEvent(SevDebug, "DisableRemoteKVS");
|
||||
}
|
||||
// TODO: Remove this code when encryption knobs are removed
|
||||
if (testConfig->disableEncryption) {
|
||||
g_knobs.setKnob("enable_encryption", KnobValueRef::create(bool{ false }));
|
||||
g_knobs.setKnob("enable_tlog_encryption", KnobValueRef::create(bool{ false }));
|
||||
|
@ -2052,6 +2064,19 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
|
||||
simconfig.db.tenantMode = tenantMode;
|
||||
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
|
||||
if (!testConfig.encryptModes.empty()) {
|
||||
simconfig.db.encryptionAtRestMode =
|
||||
EncryptionAtRestMode::fromString(deterministicRandom()->randomChoice(testConfig.encryptModes));
|
||||
} else if (!testConfig.disableEncryption && deterministicRandom()->coinflip()) {
|
||||
if (tenantMode == TenantMode::DISABLED || tenantMode == TenantMode::OPTIONAL_TENANT ||
|
||||
deterministicRandom()->coinflip()) {
|
||||
// optional and disabled tenant modes currently only support cluster aware encryption
|
||||
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::CLUSTER_AWARE;
|
||||
} else {
|
||||
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DOMAIN_AWARE;
|
||||
}
|
||||
}
|
||||
TraceEvent("SimulatedClusterEncryptionMode").detail("Mode", simconfig.db.encryptionAtRestMode.toString());
|
||||
|
||||
g_simulator->blobGranulesEnabled = simconfig.db.blobGranulesEnabled;
|
||||
|
||||
|
@ -2065,6 +2090,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
g_knobs.setKnob("remote_kv_store", KnobValueRef::create(bool{ false }));
|
||||
TraceEvent(SevDebug, "DisableRemoteKVS");
|
||||
}
|
||||
// TODO: Remove this code once encryption knobs are removed
|
||||
if (testConfig.disableEncryption) {
|
||||
g_knobs.setKnob("enable_encryption", KnobValueRef::create(bool{ false }));
|
||||
g_knobs.setKnob("enable_tlog_encryption", KnobValueRef::create(bool{ false }));
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
*/
|
||||
|
||||
#include <cinttypes>
|
||||
#include "fdbclient/BlobGranuleCommon.h"
|
||||
#include "fdbserver/BlobGranuleServerCommon.actor.h"
|
||||
#include "fmt/format.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/BlobWorkerInterface.h"
|
||||
|
@ -2443,6 +2445,47 @@ ACTOR static Future<JsonBuilderObject> blobWorkerStatusFetcher(
|
|||
return statusObj;
|
||||
}
|
||||
|
||||
ACTOR static Future<JsonBuilderObject> blobRestoreStatusFetcher(Database db, std::set<std::string>* incompleteReason) {
|
||||
|
||||
state JsonBuilderObject statusObj;
|
||||
state std::vector<Future<Optional<TraceEventFields>>> futures;
|
||||
|
||||
try {
|
||||
Optional<BlobRestoreStatus> status = wait(getRestoreStatus(db, normalKeys));
|
||||
if (status.present()) {
|
||||
switch (status.get().phase) {
|
||||
case BlobRestorePhase::INIT:
|
||||
statusObj["blob_full_restore_phase"] = "Initializing";
|
||||
break;
|
||||
case BlobRestorePhase::LOAD_MANIFEST:
|
||||
statusObj["blob_full_restore_phase"] = "Loading manifest";
|
||||
break;
|
||||
case BlobRestorePhase::MANIFEST_DONE:
|
||||
statusObj["blob_full_restore_phase"] = "Manifest loaded";
|
||||
break;
|
||||
case BlobRestorePhase::MIGRATE:
|
||||
statusObj["blob_full_restore_phase"] = "Copying data";
|
||||
statusObj["blob_full_restore_progress"] = status.get().progress;
|
||||
break;
|
||||
case BlobRestorePhase::APPLY_MLOGS:
|
||||
statusObj["blob_full_restore_phase"] = "Applying mutation logs";
|
||||
statusObj["blob_full_restore_progress"] = status.get().progress;
|
||||
break;
|
||||
case BlobRestorePhase::DONE:
|
||||
statusObj["blob_full_restore_phase"] = "Completed";
|
||||
break;
|
||||
default:
|
||||
statusObj["blob_full_restore_phase"] = "Unexpected phase";
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled)
|
||||
throw;
|
||||
incompleteReason->insert("Unable to query blob restore status");
|
||||
}
|
||||
return statusObj;
|
||||
}
|
||||
|
||||
static JsonBuilderObject tlogFetcher(int* logFaultTolerance,
|
||||
const std::vector<TLogSet>& tLogs,
|
||||
std::unordered_map<NetworkAddress, WorkerInterface> const& address_workers) {
|
||||
|
@ -3409,6 +3452,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
JsonBuilderObject blobGranuelsStatus =
|
||||
wait(blobWorkerStatusFetcher(blobWorkers, address_workers, &status_incomplete_reasons));
|
||||
statusObj["blob_granules"] = blobGranuelsStatus;
|
||||
JsonBuilderObject blobRestoreStatus = wait(blobRestoreStatusFetcher(cx, &status_incomplete_reasons));
|
||||
statusObj["blob_restore"] = blobRestoreStatus;
|
||||
}
|
||||
|
||||
JsonBuilderArray incompatibleConnectionsArray;
|
||||
|
|
|
@ -375,7 +375,7 @@ struct TLogData : NonCopyable {
|
|||
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopDeadline(0), dataFolder(folder),
|
||||
degraded(degraded),
|
||||
commitLatencyDist(Histogram::getHistogram("tLog"_sr, "commit"_sr, Histogram::Unit::microseconds)) {
|
||||
commitLatencyDist(Histogram::getHistogram("tLog"_sr, "commit"_sr, Histogram::Unit::milliseconds)) {
|
||||
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
|
||||
}
|
||||
};
|
||||
|
@ -1098,7 +1098,7 @@ ACTOR Future<Void> updatePersistentData(TLogData* self, Reference<LogData> logDa
|
|||
}
|
||||
// SOMEDAY: This seems to be running pretty often, should we slow it down???
|
||||
// This needs a timeout since nothing prevents I/O operations from hanging indefinitely.
|
||||
wait(ioTimeoutError(self->persistentData->commit(), tLogMaxCreateDuration));
|
||||
wait(ioTimeoutError(self->persistentData->commit(), tLogMaxCreateDuration, "TLogCommit"));
|
||||
|
||||
wait(delay(0, TaskPriority::UpdateStorage));
|
||||
|
||||
|
@ -2160,7 +2160,7 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
|
|||
self->largeDiskQueueCommitBytes.set(false);
|
||||
|
||||
wait(ioDegradedOrTimeoutError(
|
||||
c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION, "TLogCommit"));
|
||||
if (g_network->isSimulated() && !g_simulator->speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
|
||||
wait(delay(6.0));
|
||||
}
|
||||
|
@ -3464,7 +3464,8 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
|
|||
logData->unpoppedRecoveredTagCount = req.allTags.size();
|
||||
logData->unpoppedRecoveredTags = std::set<Tag>(req.allTags.begin(), req.allTags.end());
|
||||
wait(ioTimeoutError(initPersistentState(self, logData) || logData->removed,
|
||||
SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
SERVER_KNOBS->TLOG_MAX_CREATE_DURATION,
|
||||
"TLogInit"));
|
||||
|
||||
TraceEvent("TLogRecover", self->dbgid)
|
||||
.detail("LogId", logData->logId)
|
||||
|
@ -3529,7 +3530,8 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
|
|||
} else {
|
||||
// Brand new tlog, initialization has already been done by caller
|
||||
wait(ioTimeoutError(initPersistentState(self, logData) || logData->removed,
|
||||
SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
SERVER_KNOBS->TLOG_MAX_CREATE_DURATION,
|
||||
"TLogInit"));
|
||||
|
||||
if (logData->recoveryComplete.isSet()) {
|
||||
throw worker_removed();
|
||||
|
@ -3600,13 +3602,14 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
|
||||
TraceEvent("SharedTlog", tlogId);
|
||||
try {
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));
|
||||
|
||||
if (restoreFromDisk) {
|
||||
wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
|
||||
} else {
|
||||
wait(ioTimeoutError(checkEmptyQueue(&self) && initPersistentStorage(&self),
|
||||
SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
|
||||
SERVER_KNOBS->TLOG_MAX_CREATE_DURATION,
|
||||
"TLogInit"));
|
||||
}
|
||||
|
||||
// Disk errors need a chance to kill this actor.
|
||||
|
|
|
@ -546,7 +546,7 @@ Future<Version> TagPartitionedLogSystem::push(Version prevVersion,
|
|||
it->tlogPushDistTrackers.push_back(
|
||||
Histogram::getHistogram("ToTlog_" + it->logServers[i]->get().interf().uniqueID.toString(),
|
||||
it->logServers[i]->get().interf().address().toString(),
|
||||
Histogram::Unit::microseconds));
|
||||
Histogram::Unit::milliseconds));
|
||||
}
|
||||
}
|
||||
std::vector<Future<Void>> tLogCommitResults;
|
||||
|
|
|
@ -124,9 +124,17 @@ public:
|
|||
|
||||
state int refreshInterval = SERVER_KNOBS->TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL;
|
||||
state double lastTenantListFetchTime = now();
|
||||
state double lastTraceTime = 0;
|
||||
|
||||
loop {
|
||||
state double fetchStartTime = now();
|
||||
|
||||
state bool toTrace = false;
|
||||
if (fetchStartTime - lastTraceTime > SERVER_KNOBS->TENANT_CACHE_STORAGE_USAGE_TRACE_INTERVAL) {
|
||||
toTrace = true;
|
||||
lastTraceTime = fetchStartTime;
|
||||
}
|
||||
|
||||
state std::vector<TenantGroupName> groups;
|
||||
for (const auto& [group, storage] : tenantCache->tenantStorageMap) {
|
||||
groups.push_back(group);
|
||||
|
@ -159,6 +167,14 @@ public:
|
|||
}
|
||||
}
|
||||
tenantCache->tenantStorageMap[group].usage = usage;
|
||||
|
||||
if (toTrace) {
|
||||
// Trace the storage used by all tenant groups for visibility.
|
||||
TraceEvent(SevInfo, "StorageUsageUpdated", tenantCache->id())
|
||||
.detail("TenantGroup", group)
|
||||
.detail("Quota", tenantCache->tenantStorageMap[group].quota)
|
||||
.detail("Usage", tenantCache->tenantStorageMap[group].usage);
|
||||
}
|
||||
}
|
||||
|
||||
lastTenantListFetchTime = now();
|
||||
|
|
|
@ -459,7 +459,13 @@ public:
|
|||
// Since cursors can have async operations pending which modify their state they can't be copied cleanly
|
||||
Cursor(const Cursor& other) = delete;
|
||||
|
||||
~Cursor() { writeOperations.cancel(); }
|
||||
~Cursor() { cancel(); }
|
||||
|
||||
// Cancel outstanding operations. Further use of cursor is not allowed.
|
||||
void cancel() {
|
||||
nextPageReader.cancel();
|
||||
writeOperations.cancel();
|
||||
}
|
||||
|
||||
// A read cursor can be initialized from a pop cursor
|
||||
void initReadOnly(const Cursor& c, bool readExtents = false) {
|
||||
|
@ -921,7 +927,15 @@ public:
|
|||
public:
|
||||
FIFOQueue() : pager(nullptr) {}
|
||||
|
||||
~FIFOQueue() { newTailPage.cancel(); }
|
||||
~FIFOQueue() { cancel(); }
|
||||
|
||||
// Cancel outstanding operations. Further use of queue is not allowed.
|
||||
void cancel() {
|
||||
headReader.cancel();
|
||||
tailWriter.cancel();
|
||||
headWriter.cancel();
|
||||
newTailPage.cancel();
|
||||
}
|
||||
|
||||
FIFOQueue(const FIFOQueue& other) = delete;
|
||||
void operator=(const FIFOQueue& rhs) = delete;
|
||||
|
@ -3627,6 +3641,13 @@ public:
|
|||
}
|
||||
self->operations.clear();
|
||||
|
||||
debug_printf("DWALPager(%s) shutdown cancel queues\n", self->filename.c_str());
|
||||
self->freeList.cancel();
|
||||
self->delayedFreeList.cancel();
|
||||
self->remapQueue.cancel();
|
||||
self->extentFreeList.cancel();
|
||||
self->extentUsedList.cancel();
|
||||
|
||||
debug_printf("DWALPager(%s) shutdown destroy page cache\n", self->filename.c_str());
|
||||
wait(self->extentCache.clear());
|
||||
wait(self->pageCache.clear());
|
||||
|
@ -4697,21 +4718,15 @@ public:
|
|||
|
||||
if (domainId.present()) {
|
||||
ASSERT(keyProvider && keyProvider->enableEncryptionDomain());
|
||||
// Temporarily disabling the check, since if a tenant is removed, where the key provider
|
||||
// would not find the domain, the data for the tenant may still be in Redwood and being read.
|
||||
// TODO(yiwu): re-enable the check.
|
||||
/*
|
||||
if (domainId.get() != keyProvider->getDefaultEncryptionDomainId() &&
|
||||
!keyProvider->keyFitsInDomain(domainId.get(), lowerBound, false)) {
|
||||
fprintf(stderr,
|
||||
"Page lower bound not in domain: %s %s, domain id %s, lower bound '%s'\n",
|
||||
::toString(id).c_str(),
|
||||
::toString(v).c_str(),
|
||||
::toString(domainId).c_str(),
|
||||
lowerBound.printable().c_str());
|
||||
return false;
|
||||
if (!keyProvider->keyFitsInDomain(domainId.get(), lowerBound, true)) {
|
||||
fprintf(stderr,
|
||||
"Page lower bound not in domain: %s %s, domain id %s, lower bound '%s'\n",
|
||||
::toString(id).c_str(),
|
||||
::toString(v).c_str(),
|
||||
::toString(domainId).c_str(),
|
||||
lowerBound.printable().c_str());
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
auto& b = boundariesByPageID[id.front()][v];
|
||||
|
@ -4759,45 +4774,27 @@ public:
|
|||
::toString(b->second.domainId).c_str());
|
||||
return false;
|
||||
}
|
||||
// Temporarily disabling the check, since if a tenant is removed, where the key provider
|
||||
// would not find the domain, the data for the tenant may still be in Redwood and being read.
|
||||
// TODO(yiwu): re-enable the check.
|
||||
/*
|
||||
ASSERT(domainId.present());
|
||||
auto checkKeyFitsInDomain = [&]() -> bool {
|
||||
if (!keyProvider->keyFitsInDomain(domainId.get(), cursor.get().key, b->second.height > 1)) {
|
||||
fprintf(stderr,
|
||||
"Encryption domain mismatch on %s, %s, domain: %s, key %s\n",
|
||||
::toString(id).c_str(),
|
||||
::toString(v).c_str(),
|
||||
::toString(domainId).c_str(),
|
||||
cursor.get().key.printable().c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
if (!keyProvider->keyFitsInDomain(domainId.get(), cursor.get().key, b->second.height > 1)) {
|
||||
fprintf(stderr,
|
||||
"Encryption domain mismatch on %s, %s, domain: %s, key %s\n",
|
||||
::toString(id).c_str(),
|
||||
::toString(v).c_str(),
|
||||
::toString(domainId).c_str(),
|
||||
cursor.get().key.printable().c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
if (domainId.get() != keyProvider->getDefaultEncryptionDomainId()) {
|
||||
cursor.moveFirst();
|
||||
if (cursor.valid() && !checkKeyFitsInDomain()) {
|
||||
return false;
|
||||
}
|
||||
cursor.moveLast();
|
||||
if (cursor.valid() && !checkKeyFitsInDomain()) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (deterministicRandom()->random01() < domainPrefixScanProbability) {
|
||||
cursor.moveFirst();
|
||||
while (cursor.valid()) {
|
||||
if (!checkKeyFitsInDomain()) {
|
||||
return false;
|
||||
}
|
||||
cursor.moveNext();
|
||||
}
|
||||
domainPrefixScanCount++;
|
||||
}
|
||||
cursor.moveFirst();
|
||||
if (cursor.valid() && !checkKeyFitsInDomain()) {
|
||||
return false;
|
||||
}
|
||||
cursor.moveLast();
|
||||
if (cursor.valid() && !checkKeyFitsInDomain()) {
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -5674,8 +5671,8 @@ private:
|
|||
int64_t defaultDomainId = keyProvider->getDefaultEncryptionDomainId();
|
||||
int64_t currentDomainId;
|
||||
size_t prefixLength;
|
||||
if (count == 0 || (splitByDomain && count > 0)) {
|
||||
std::tie(currentDomainId, prefixLength) = keyProvider->getEncryptionDomain(rec.key, domainId);
|
||||
if (count == 0 || splitByDomain) {
|
||||
std::tie(currentDomainId, prefixLength) = keyProvider->getEncryptionDomain(rec.key);
|
||||
}
|
||||
if (count == 0) {
|
||||
domainId = currentDomainId;
|
||||
|
@ -5886,12 +5883,18 @@ private:
|
|||
if (useEncryptionDomain) {
|
||||
ASSERT(pagesToBuild[0].domainId.present());
|
||||
int64_t domainId = pagesToBuild[0].domainId.get();
|
||||
// We need to make sure we use the domain prefix as the page lower bound, for the first page
|
||||
// of a non-default domain on a level. That way we ensure that pages for a domain form a full subtree
|
||||
// (i.e. have a single root) in the B-tree.
|
||||
if (domainId != self->m_keyProvider->getDefaultEncryptionDomainId() &&
|
||||
!self->m_keyProvider->keyFitsInDomain(domainId, pageLowerBound.key, false)) {
|
||||
pageLowerBound = RedwoodRecordRef(entries[0].key.substr(0, pagesToBuild[0].domainPrefixLength));
|
||||
// We make sure the page lower bound fits in the domain of the page.
|
||||
// If the page domain is the default domain, we make sure the page doesn't fall within a domain
|
||||
// specific subtree.
|
||||
// If the page domain is non-default, in addition, we make the first page of the domain on a level
|
||||
// use the domain prefix as the lower bound. Such a lower bound will ensure that pages for a domain
|
||||
// form a full subtree (i.e. have a single root) in the B-tree.
|
||||
if (!self->m_keyProvider->keyFitsInDomain(domainId, pageLowerBound.key, true)) {
|
||||
if (domainId == self->m_keyProvider->getDefaultEncryptionDomainId()) {
|
||||
pageLowerBound = RedwoodRecordRef(entries[0].key);
|
||||
} else {
|
||||
pageLowerBound = RedwoodRecordRef(entries[0].key.substr(0, pagesToBuild[0].domainPrefixLength));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -163,7 +163,8 @@ ACTOR Future<Void> printRestoreSummary(Database db, Reference<BlobConnectionProv
|
|||
ACTOR Future<BlobGranuleRestoreVersionVector> listBlobGranules(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||
ACTOR Future<int64_t> lastBlobEpoc(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||
ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef range);
|
||||
|
||||
ACTOR Future<Void> updateRestoreStatus(Database db, KeyRangeRef range, BlobRestoreStatus status);
|
||||
ACTOR Future<Optional<BlobRestoreStatus>> getRestoreStatus(Database db, KeyRangeRef range);
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -920,7 +920,7 @@ public:
|
|||
}
|
||||
if (fitness == ProcessClass::NeverAssign) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "complex", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
|
||||
SevDebug, id, "simple", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) {
|
||||
|
@ -1072,7 +1072,7 @@ public:
|
|||
}
|
||||
if (fitness == ProcessClass::NeverAssign) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "complex", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
|
||||
SevDebug, id, "deprecated", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) {
|
||||
|
|
|
@ -110,8 +110,7 @@ struct ConfigFollowerGetChangesReply {
|
|||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> annotations;
|
||||
|
||||
ConfigFollowerGetChangesReply() = default;
|
||||
explicit ConfigFollowerGetChangesReply(Version mostRecentVersion,
|
||||
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||
explicit ConfigFollowerGetChangesReply(Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations)
|
||||
: changes(changes), annotations(annotations) {}
|
||||
|
||||
|
|
|
@ -284,12 +284,12 @@ public:
|
|||
const std::unordered_set<uint64_t>& excludedPhysicalShards,
|
||||
uint64_t debugID);
|
||||
|
||||
// Step 2: get a remote team which has the input physical shard
|
||||
// Return empty if no such remote team
|
||||
// May return a problematic remote team, and re-selection is required for this case
|
||||
Optional<ShardsAffectedByTeamFailure::Team> tryGetAvailableRemoteTeamWith(uint64_t inputPhysicalShardID,
|
||||
StorageMetrics const& moveInMetrics,
|
||||
uint64_t debugID);
|
||||
// Step 2: get a remote team which has the input physical shard.
|
||||
// Second field in the returned pair indicates whether this physical shard is available or not.
|
||||
// Return empty if no such remote team.
|
||||
// May return a problematic remote team, and re-selection is required for this case.
|
||||
std::pair<Optional<ShardsAffectedByTeamFailure::Team>, bool>
|
||||
tryGetAvailableRemoteTeamWith(uint64_t inputPhysicalShardID, StorageMetrics const& moveInMetrics, uint64_t debugID);
|
||||
// Invariant:
|
||||
// (1) If forceToUseNewPhysicalShard is set, use the bestTeams selected by getTeam(), and create a new physical
|
||||
// shard for the teams
|
||||
|
|
|
@ -90,21 +90,11 @@ public:
|
|||
virtual int64_t getDefaultEncryptionDomainId() const { throw not_implemented(); }
|
||||
|
||||
// Get encryption domain from a key. Return the domain id, and the size of the encryption domain prefix.
|
||||
// It is assumed that all keys with the same encryption domain prefix as the given key falls in the same encryption
|
||||
// domain. If possibleDomainId is given, it is a valid domain id previously returned by the key provider,
|
||||
// potentially for a different key. The possibleDomainId parm is used by TenantAwareEncryptionKeyProvider to speed
|
||||
// up encryption domain lookup.
|
||||
virtual std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key,
|
||||
Optional<int64_t> possibleDomainId = Optional<int64_t>()) {
|
||||
throw not_implemented();
|
||||
}
|
||||
virtual std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key) { throw not_implemented(); }
|
||||
|
||||
// Get encryption domain of a page given encoding header.
|
||||
virtual int64_t getEncryptionDomainIdFromHeader(const void* encodingHeader) { throw not_implemented(); }
|
||||
|
||||
// Setting tenant prefix to tenant name map. Used by TenantAwareEncryptionKeyProvider.
|
||||
virtual void setTenantPrefixIndex(Reference<TenantPrefixIndex> tenantPrefixIndex) {}
|
||||
|
||||
// Helper methods.
|
||||
|
||||
// Check if a key fits in an encryption domain.
|
||||
|
@ -220,7 +210,7 @@ public:
|
|||
|
||||
int64_t getDefaultEncryptionDomainId() const override { return FDB_DEFAULT_ENCRYPT_DOMAIN_ID; }
|
||||
|
||||
std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key, Optional<int64_t>) override {
|
||||
std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key) override {
|
||||
int64_t domainId;
|
||||
if (key.size() < PREFIX_LENGTH) {
|
||||
domainId = getDefaultEncryptionDomainId();
|
||||
|
@ -291,6 +281,8 @@ class TenantAwareEncryptionKeyProvider : public IPageEncryptionKeyProvider {
|
|||
public:
|
||||
using EncodingHeader = ArenaPage::AESEncryptionV1Encoder::Header;
|
||||
|
||||
const StringRef systemKeysPrefix = systemKeys.begin;
|
||||
|
||||
TenantAwareEncryptionKeyProvider(Reference<AsyncVar<ServerDBInfo> const> db) : db(db) {}
|
||||
|
||||
virtual ~TenantAwareEncryptionKeyProvider() = default;
|
||||
|
@ -337,10 +329,10 @@ public:
|
|||
|
||||
int64_t getDefaultEncryptionDomainId() const override { return FDB_DEFAULT_ENCRYPT_DOMAIN_ID; }
|
||||
|
||||
std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key, Optional<int64_t> possibleDomainId) override {
|
||||
std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key) override {
|
||||
// System key.
|
||||
if (key.startsWith(systemKeys.begin)) {
|
||||
return { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, 2 };
|
||||
if (key.startsWith(systemKeysPrefix)) {
|
||||
return { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, systemKeysPrefix.size() };
|
||||
}
|
||||
// Key smaller than tenant prefix in size belongs to the default domain.
|
||||
if (key.size() < TENANT_PREFIX_SIZE) {
|
||||
|
@ -352,21 +344,7 @@ public:
|
|||
if (tenantId < 0) {
|
||||
return { FDB_DEFAULT_ENCRYPT_DOMAIN_ID, 0 };
|
||||
}
|
||||
// Optimization: Caller guarantee possibleDomainId is a valid domain id that we previously returned.
|
||||
// We can return immediately without checking with tenant map.
|
||||
if (possibleDomainId.present() && possibleDomainId.get() == tenantId) {
|
||||
return { tenantId, TENANT_PREFIX_SIZE };
|
||||
}
|
||||
if (tenantPrefixIndex.isValid()) {
|
||||
auto view = tenantPrefixIndex->atLatest();
|
||||
auto itr = view.find(prefix);
|
||||
if (itr != view.end()) {
|
||||
// Tenant not found. Tenant must be disabled, or in optional mode.
|
||||
return { tenantId, TENANT_PREFIX_SIZE };
|
||||
}
|
||||
}
|
||||
// The prefix does not belong to any tenant. The key belongs to the default domain.
|
||||
return { FDB_DEFAULT_ENCRYPT_DOMAIN_ID, 0 };
|
||||
return { tenantId, TENANT_PREFIX_SIZE };
|
||||
}
|
||||
|
||||
int64_t getEncryptionDomainIdFromHeader(const void* encodingHeader) override {
|
||||
|
@ -375,13 +353,8 @@ public:
|
|||
return header->cipherTextDetails.encryptDomainId;
|
||||
}
|
||||
|
||||
void setTenantPrefixIndex(Reference<TenantPrefixIndex> tenantPrefixIndex) override {
|
||||
this->tenantPrefixIndex = tenantPrefixIndex;
|
||||
}
|
||||
|
||||
private:
|
||||
Reference<AsyncVar<ServerDBInfo> const> db;
|
||||
Reference<TenantPrefixIndex> tenantPrefixIndex;
|
||||
};
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
|
|
@ -58,7 +58,12 @@ public:
|
|||
|
||||
struct MoveKeysParams {
|
||||
UID dataMoveId;
|
||||
KeyRange keys;
|
||||
|
||||
// Only one of `keys` and `ranges` can be set. `ranges` is created mainly for physical shard moves to move a full
|
||||
// physical shard with multiple key ranges.
|
||||
Optional<KeyRange> keys;
|
||||
Optional<std::vector<KeyRange>> ranges;
|
||||
|
||||
std::vector<UID> destinationTeam, healthyDestinations;
|
||||
MoveKeysLock lock;
|
||||
Promise<Void> dataMovementComplete;
|
||||
|
@ -68,6 +73,46 @@ struct MoveKeysParams {
|
|||
UID relocationIntervalId;
|
||||
const DDEnabledState* ddEnabledState = nullptr;
|
||||
CancelConflictingDataMoves cancelConflictingDataMoves = CancelConflictingDataMoves::False;
|
||||
|
||||
MoveKeysParams() {}
|
||||
|
||||
MoveKeysParams(UID dataMoveId,
|
||||
const KeyRange& keys,
|
||||
const std::vector<UID>& destinationTeam,
|
||||
const std::vector<UID>& healthyDestinations,
|
||||
const MoveKeysLock& lock,
|
||||
const Promise<Void>& dataMovementComplete,
|
||||
FlowLock* startMoveKeysParallelismLock,
|
||||
FlowLock* finishMoveKeysParallelismLock,
|
||||
bool hasRemote,
|
||||
UID relocationIntervalId,
|
||||
const DDEnabledState* ddEnabledState,
|
||||
CancelConflictingDataMoves cancelConflictingDataMoves)
|
||||
: dataMoveId(dataMoveId), keys(keys), destinationTeam(destinationTeam), healthyDestinations(healthyDestinations),
|
||||
lock(lock), dataMovementComplete(dataMovementComplete),
|
||||
startMoveKeysParallelismLock(startMoveKeysParallelismLock),
|
||||
finishMoveKeysParallelismLock(finishMoveKeysParallelismLock), hasRemote(hasRemote),
|
||||
relocationIntervalId(relocationIntervalId), ddEnabledState(ddEnabledState),
|
||||
cancelConflictingDataMoves(cancelConflictingDataMoves) {}
|
||||
|
||||
MoveKeysParams(UID dataMoveId,
|
||||
const std::vector<KeyRange>& ranges,
|
||||
const std::vector<UID>& destinationTeam,
|
||||
const std::vector<UID>& healthyDestinations,
|
||||
const MoveKeysLock& lock,
|
||||
const Promise<Void>& dataMovementComplete,
|
||||
FlowLock* startMoveKeysParallelismLock,
|
||||
FlowLock* finishMoveKeysParallelismLock,
|
||||
bool hasRemote,
|
||||
UID relocationIntervalId,
|
||||
const DDEnabledState* ddEnabledState,
|
||||
CancelConflictingDataMoves cancelConflictingDataMoves)
|
||||
: dataMoveId(dataMoveId), ranges(ranges), destinationTeam(destinationTeam),
|
||||
healthyDestinations(healthyDestinations), lock(lock), dataMovementComplete(dataMovementComplete),
|
||||
startMoveKeysParallelismLock(startMoveKeysParallelismLock),
|
||||
finishMoveKeysParallelismLock(finishMoveKeysParallelismLock), hasRemote(hasRemote),
|
||||
relocationIntervalId(relocationIntervalId), ddEnabledState(ddEnabledState),
|
||||
cancelConflictingDataMoves(cancelConflictingDataMoves) {}
|
||||
};
|
||||
|
||||
// read the lock value in system keyspace but do not change anything
|
||||
|
|
|
@ -137,16 +137,16 @@ struct ProxyStats {
|
|||
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
|
||||
maxComputeNS(0), minComputeNS(1e12),
|
||||
commitBatchQueuingDist(
|
||||
Histogram::getHistogram("CommitProxy"_sr, "CommitBatchQueuing"_sr, Histogram::Unit::microseconds)),
|
||||
Histogram::getHistogram("CommitProxy"_sr, "CommitBatchQueuing"_sr, Histogram::Unit::milliseconds)),
|
||||
getCommitVersionDist(
|
||||
Histogram::getHistogram("CommitProxy"_sr, "GetCommitVersion"_sr, Histogram::Unit::microseconds)),
|
||||
resolutionDist(Histogram::getHistogram("CommitProxy"_sr, "Resolution"_sr, Histogram::Unit::microseconds)),
|
||||
Histogram::getHistogram("CommitProxy"_sr, "GetCommitVersion"_sr, Histogram::Unit::milliseconds)),
|
||||
resolutionDist(Histogram::getHistogram("CommitProxy"_sr, "Resolution"_sr, Histogram::Unit::milliseconds)),
|
||||
postResolutionDist(
|
||||
Histogram::getHistogram("CommitProxy"_sr, "PostResolutionQueuing"_sr, Histogram::Unit::microseconds)),
|
||||
Histogram::getHistogram("CommitProxy"_sr, "PostResolutionQueuing"_sr, Histogram::Unit::milliseconds)),
|
||||
processingMutationDist(
|
||||
Histogram::getHistogram("CommitProxy"_sr, "ProcessingMutation"_sr, Histogram::Unit::microseconds)),
|
||||
tlogLoggingDist(Histogram::getHistogram("CommitProxy"_sr, "TlogLogging"_sr, Histogram::Unit::microseconds)),
|
||||
replyCommitDist(Histogram::getHistogram("CommitProxy"_sr, "ReplyCommit"_sr, Histogram::Unit::microseconds)) {
|
||||
Histogram::getHistogram("CommitProxy"_sr, "ProcessingMutation"_sr, Histogram::Unit::milliseconds)),
|
||||
tlogLoggingDist(Histogram::getHistogram("CommitProxy"_sr, "TlogLogging"_sr, Histogram::Unit::milliseconds)),
|
||||
replyCommitDist(Histogram::getHistogram("CommitProxy"_sr, "ReplyCommit"_sr, Histogram::Unit::milliseconds)) {
|
||||
specialCounter(cc, "LastAssignedCommitVersion", [this]() { return this->lastCommitVersionAssigned; });
|
||||
specialCounter(cc, "Version", [pVersion]() { return pVersion->get(); });
|
||||
specialCounter(cc, "CommittedVersion", [pCommittedVersion]() { return pCommittedVersion->get(); });
|
||||
|
|
|
@ -1284,7 +1284,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
typedef decltype(&tLog) TLogFn;
|
||||
|
||||
ACTOR template <class T>
|
||||
Future<T> ioTimeoutError(Future<T> what, double time) {
|
||||
Future<T> ioTimeoutError(Future<T> what, double time, const char* context = nullptr) {
|
||||
// Before simulation is sped up, IO operations can take a very long time so limit timeouts
|
||||
// to not end until at least time after simulation is sped up.
|
||||
if (g_network->isSimulated() && !g_simulator->speedUpSimulation) {
|
||||
|
@ -1298,7 +1298,12 @@ Future<T> ioTimeoutError(Future<T> what, double time) {
|
|||
if (g_network->isSimulated() && !g_simulator->getCurrentProcess()->isReliable()) {
|
||||
err = err.asInjectedFault();
|
||||
}
|
||||
TraceEvent(SevError, "IoTimeoutError").error(err);
|
||||
TraceEvent e(SevError, "IoTimeoutError");
|
||||
e.error(err);
|
||||
if (context != nullptr) {
|
||||
e.detail("Context", context);
|
||||
}
|
||||
e.log();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
@ -1308,7 +1313,8 @@ ACTOR template <class T>
|
|||
Future<T> ioDegradedOrTimeoutError(Future<T> what,
|
||||
double errTime,
|
||||
Reference<AsyncVar<bool>> degraded,
|
||||
double degradedTime) {
|
||||
double degradedTime,
|
||||
const char* context = nullptr) {
|
||||
// Before simulation is sped up, IO operations can take a very long time so limit timeouts
|
||||
// to not end until at least time after simulation is sped up.
|
||||
if (g_network->isSimulated() && !g_simulator->speedUpSimulation) {
|
||||
|
@ -1337,7 +1343,12 @@ Future<T> ioDegradedOrTimeoutError(Future<T> what,
|
|||
if (g_network->isSimulated() && !g_simulator->getCurrentProcess()->isReliable()) {
|
||||
err = err.asInjectedFault();
|
||||
}
|
||||
TraceEvent(SevError, "IoTimeoutError").error(err);
|
||||
TraceEvent e(SevError, "IoTimeoutError");
|
||||
e.error(err);
|
||||
if (context != nullptr) {
|
||||
e.detail("Context", context);
|
||||
}
|
||||
e.log();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -159,8 +159,7 @@ bool canReplyWith(Error e) {
|
|||
|
||||
#define PERSIST_PREFIX "\xff\xff"
|
||||
|
||||
FDB_DECLARE_BOOLEAN_PARAM(UnlimitedCommitBytes);
|
||||
FDB_DEFINE_BOOLEAN_PARAM(UnlimitedCommitBytes);
|
||||
FDB_BOOLEAN_PARAM(UnlimitedCommitBytes);
|
||||
|
||||
// Immutable
|
||||
static const KeyValueRef persistFormat(PERSIST_PREFIX "Format"_sr, "FoundationDB/StorageServer/1/4"_sr);
|
||||
|
@ -786,7 +785,7 @@ public:
|
|||
std::map<Version, std::vector<CheckpointMetaData>> pendingCheckpoints; // Pending checkpoint requests
|
||||
std::unordered_map<UID, CheckpointMetaData> checkpoints; // Existing and deleting checkpoints
|
||||
TenantMap tenantMap;
|
||||
Reference<TenantPrefixIndex> tenantPrefixIndex;
|
||||
TenantPrefixIndex tenantPrefixIndex;
|
||||
std::map<Version, std::vector<PendingNewShard>>
|
||||
pendingAddRanges; // Pending requests to add ranges to physical shards
|
||||
std::map<Version, std::vector<KeyRange>>
|
||||
|
@ -805,7 +804,7 @@ public:
|
|||
FetchKeysHistograms()
|
||||
: latency(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
FETCH_KEYS_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
bytes(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
FETCH_KEYS_BYTES_HISTOGRAM,
|
||||
Histogram::Unit::bytes)),
|
||||
|
@ -1369,31 +1368,31 @@ public:
|
|||
Reference<AsyncVar<ServerDBInfo> const> const& db,
|
||||
StorageServerInterface const& ssi,
|
||||
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider)
|
||||
: tenantPrefixIndex(makeReference<TenantPrefixIndex>()), encryptionKeyProvider(encryptionKeyProvider),
|
||||
shardAware(false), tlogCursorReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
TLOG_CURSOR_READS_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
: encryptionKeyProvider(encryptionKeyProvider), shardAware(false),
|
||||
tlogCursorReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
TLOG_CURSOR_READS_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::milliseconds)),
|
||||
ssVersionLockLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
SS_VERSION_LOCK_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
eagerReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
EAGER_READS_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
fetchKeysPTreeUpdatesLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
FETCH_KEYS_PTREE_UPDATES_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
tLogMsgsPTreeUpdatesLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
TLOG_MSGS_PTREE_UPDATES_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
storageUpdatesDurableLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
STORAGE_UPDATES_DURABLE_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
storageCommitLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
STORAGE_COMMIT_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
ssDurableVersionUpdateLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
SS_DURABLE_VERSION_UPDATE_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
Histogram::Unit::milliseconds)),
|
||||
readRangeBytesReturnedHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
|
||||
SS_READ_RANGE_BYTES_RETURNED_HISTOGRAM,
|
||||
Histogram::Unit::bytes)),
|
||||
|
@ -5111,7 +5110,7 @@ ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRe
|
|||
throw tenant_name_required();
|
||||
}
|
||||
|
||||
if (rangeIntersectsAnyTenant(*(data->tenantPrefixIndex), KeyRangeRef(begin, end), req.version)) {
|
||||
if (rangeIntersectsAnyTenant(data->tenantPrefixIndex, KeyRangeRef(begin, end), req.version)) {
|
||||
throw tenant_name_required();
|
||||
}
|
||||
}
|
||||
|
@ -8616,11 +8615,11 @@ private:
|
|||
bool StorageServer::insertTenant(TenantNameRef tenantName, TenantMapEntry tenantEntry, Version version) {
|
||||
if (version >= tenantMap.getLatestVersion()) {
|
||||
tenantMap.createNewVersion(version);
|
||||
tenantPrefixIndex->createNewVersion(version);
|
||||
tenantPrefixIndex.createNewVersion(version);
|
||||
|
||||
tenantMap.insert(tenantName, tenantEntry);
|
||||
|
||||
auto view = tenantPrefixIndex->at(version);
|
||||
auto view = tenantPrefixIndex.at(version);
|
||||
auto itr = view.find(tenantEntry.prefix);
|
||||
TenantNameUniqueSet nameSet;
|
||||
if (itr != view.end()) {
|
||||
|
@ -8628,7 +8627,7 @@ bool StorageServer::insertTenant(TenantNameRef tenantName, TenantMapEntry tenant
|
|||
}
|
||||
|
||||
nameSet.insert(tenantName);
|
||||
tenantPrefixIndex->insert(tenantEntry.prefix, nameSet);
|
||||
tenantPrefixIndex.insert(tenantEntry.prefix, nameSet);
|
||||
|
||||
TraceEvent("InsertTenant", thisServerID).detail("Tenant", tenantName).detail("Version", version);
|
||||
return true;
|
||||
|
@ -8648,20 +8647,20 @@ void StorageServer::insertTenant(TenantNameRef tenantName, ValueRef value, Versi
|
|||
void StorageServer::clearTenants(TenantNameRef startTenant, TenantNameRef endTenant, Version version) {
|
||||
if (version >= tenantMap.getLatestVersion()) {
|
||||
tenantMap.createNewVersion(version);
|
||||
tenantPrefixIndex->createNewVersion(version);
|
||||
tenantPrefixIndex.createNewVersion(version);
|
||||
|
||||
auto view = tenantMap.at(version);
|
||||
for (auto itr = view.lower_bound(startTenant); itr != view.lower_bound(endTenant); ++itr) {
|
||||
auto indexView = tenantPrefixIndex->at(version);
|
||||
auto indexView = tenantPrefixIndex.at(version);
|
||||
// Trigger any watches on the prefix associated with the tenant.
|
||||
watches.triggerRange(itr->prefix, strinc(itr->prefix));
|
||||
auto indexItr = indexView.find(itr->prefix);
|
||||
ASSERT(indexItr != indexView.end());
|
||||
TenantNameUniqueSet nameSet = *indexItr;
|
||||
if (nameSet.remove(itr.key())) {
|
||||
tenantPrefixIndex->erase(itr->prefix);
|
||||
tenantPrefixIndex.erase(itr->prefix);
|
||||
} else {
|
||||
tenantPrefixIndex->insert(itr->prefix, nameSet);
|
||||
tenantPrefixIndex.insert(itr->prefix, nameSet);
|
||||
}
|
||||
TraceEvent("EraseTenant", thisServerID).detail("Tenant", itr.key()).detail("Version", version);
|
||||
}
|
||||
|
@ -9348,7 +9347,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
newOldestVersion, desiredVersion, bytesLeft, unlimitedCommitBytes);
|
||||
if (data->tenantMap.getLatestVersion() < newOldestVersion) {
|
||||
data->tenantMap.createNewVersion(newOldestVersion);
|
||||
data->tenantPrefixIndex->createNewVersion(newOldestVersion);
|
||||
data->tenantPrefixIndex.createNewVersion(newOldestVersion);
|
||||
}
|
||||
// We want to forget things from these data structures atomically with changing oldestVersion (and "before",
|
||||
// since oldestVersion.set() may trigger waiting actors) forgetVersionsBeforeAsync visibly forgets
|
||||
|
@ -9356,7 +9355,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
Future<Void> finishedForgetting =
|
||||
data->mutableData().forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage) &&
|
||||
data->tenantMap.forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage) &&
|
||||
data->tenantPrefixIndex->forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage);
|
||||
data->tenantPrefixIndex.forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage);
|
||||
data->oldestVersion.set(newOldestVersion);
|
||||
wait(finishedForgetting);
|
||||
wait(yield(TaskPriority::UpdateStorage));
|
||||
|
@ -9468,7 +9467,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
durableDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
|
||||
}
|
||||
|
||||
wait(ioTimeoutError(durable, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME));
|
||||
wait(ioTimeoutError(durable, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, "StorageCommit"));
|
||||
data->storageCommitLatencyHistogram->sampleSeconds(now() - beforeStorageCommit);
|
||||
|
||||
debug_advanceMinCommittedVersion(data->thisServerID, data->storageMinRecoverVersion);
|
||||
|
@ -10165,7 +10164,7 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
|
|||
|
||||
data->tenantMap.insert(tenantName, tenantEntry);
|
||||
|
||||
auto view = data->tenantPrefixIndex->at(version);
|
||||
auto view = data->tenantPrefixIndex.at(version);
|
||||
auto itr = view.find(tenantEntry.prefix);
|
||||
TenantNameUniqueSet nameSet;
|
||||
if (itr != view.end()) {
|
||||
|
@ -10173,7 +10172,7 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
|
|||
}
|
||||
|
||||
nameSet.insert(tenantName);
|
||||
data->tenantPrefixIndex->insert(tenantEntry.prefix, nameSet);
|
||||
data->tenantPrefixIndex.insert(tenantEntry.prefix, nameSet);
|
||||
|
||||
TraceEvent("RestoringTenant", data->thisServerID)
|
||||
.detail("Key", tenantMap[tenantMapLoc].key)
|
||||
|
@ -11275,7 +11274,6 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
self.tag = seedTag;
|
||||
}
|
||||
|
||||
self.encryptionKeyProvider->setTenantPrefixIndex(self.tenantPrefixIndex);
|
||||
self.storage.makeNewStorageServerDurable(self.shardAware);
|
||||
wait(self.storage.commit());
|
||||
++self.counters.kvCommits;
|
||||
|
@ -11358,13 +11356,6 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
recovered.send(Void());
|
||||
return Void();
|
||||
}
|
||||
// Pass a reference of tenantPrefixIndex to the storage engine to support per-tenant data encryption,
|
||||
// after the tenant map is recovered in restoreDurableState. In case of a storage server reboot,
|
||||
// it is possible that the storage engine is still holding a pre-reboot tenantPrefixIndex, and use that
|
||||
// for its own recovery, before we set the tenantPrefixIndex here.
|
||||
if (self.encryptionKeyProvider.isValid()) {
|
||||
self.encryptionKeyProvider->setTenantPrefixIndex(self.tenantPrefixIndex);
|
||||
}
|
||||
TraceEvent("SSTimeRestoreDurableState", self.thisServerID).detail("TimeTaken", now() - start);
|
||||
|
||||
// if this is a tss storage file, use that as source of truth for this server being a tss instead of the
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue