Merge remote-tracking branch 'origin/main' into features/tenant-lock-fdbcli

# Conflicts:
#	fdbserver/ApplyMetadataMutation.cpp
This commit is contained in:
Markus Pilman 2023-02-15 18:37:18 +01:00
commit b79885d5b9
76 changed files with 4716 additions and 1225 deletions

View File

@ -1044,6 +1044,20 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1verify
return (jlong)f;
}
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1getClientStatus(JNIEnv* jenv,
jobject,
jlong dbPtr) {
if (!dbPtr) {
throwParamNotNull(jenv);
return 0;
}
FDBDatabase* database = (FDBDatabase*)dbPtr;
FDBFuture* f = fdb_database_get_client_status(database);
return (jlong)f;
}
JNIEXPORT jboolean JNICALL Java_com_apple_foundationdb_FDB_Error_1predicate(JNIEnv* jenv,
jobject,
jint predicate,

View File

@ -0,0 +1,48 @@
/*
* GetClientStatusIntegrationTest.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2023 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
/**
* Integration tests around Range Queries. This requires a running FDB instance to work properly;
* all tests will be skipped if it can't connect to a running instance relatively quickly.
*/
class GetClientStatusIntegrationTest {
public static final int API_VERSION = 720;
private static final FDB fdb = FDB.selectAPIVersion(API_VERSION);
@Test
public void clientStatusIsHealthy() throws Exception {
try (Database db = fdb.open()) {
// Run a simple transaction to make sure the database is fully initialized
db.run(tr -> {
return tr.getReadVersion();
});
// Here we just check if a meaningful client report status is returned
// Different report attributes and error cases are covered by C API tests
String statusStr = new String(db.getClientStatus().join());
Assertions.assertTrue(statusStr.contains("\"Healthy\":true"),
String.format("Healthy:true not found in client status: %s", statusStr));
}
}
}

View File

@ -507,4 +507,21 @@ public interface Database extends AutoCloseable, TransactionContext {
*/
@Override
void close();
/**
* Returns client-side status information
*
* @return a {@code CompletableFuture} containing a JSON string with client status health information
*/
default CompletableFuture<byte[]> getClientStatus() {
return getClientStatus(getExecutor());
}
/**
* Returns client-side status information
*
* @param e the {@link Executor} to use for asynchronous callbacks
* @return a {@code CompletableFuture} containing a JSON string with client status health information
*/
CompletableFuture<byte[]> getClientStatus(Executor e);
}

View File

@ -27,7 +27,6 @@ import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import com.apple.foundationdb.async.AsyncUtil;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import com.apple.foundationdb.tuple.Tuple;
class FDBDatabase extends NativeObjectWrapper implements Database, OptionConsumer {
@ -270,6 +269,16 @@ class FDBDatabase extends NativeObjectWrapper implements Database, OptionConsume
Database_dispose(cPtr);
}
@Override
public CompletableFuture<byte[]> getClientStatus(Executor e) {
pointerReadLock.lock();
try {
return new FutureKey(Database_getClientStatus(getPtr()), e, eventKeeper);
} finally {
pointerReadLock.unlock();
}
}
private native long Database_openTenant(long cPtr, byte[] tenantName);
private native long Database_createTransaction(long cPtr);
private native void Database_dispose(long cPtr);
@ -281,4 +290,5 @@ class FDBDatabase extends NativeObjectWrapper implements Database, OptionConsume
private native long Database_unblobbifyRange(long cPtr, byte[] beginKey, byte[] endKey);
private native long Database_listBlobbifiedRanges(long cPtr, byte[] beginKey, byte[] endKey, int rangeLimit);
private native long Database_verifyBlobRange(long cPtr, byte[] beginKey, byte[] endKey, long version);
private native long Database_getClientStatus(long cPtr);
}

View File

@ -54,6 +54,7 @@ set(JAVA_INTEGRATION_TESTS
src/integration/com/apple/foundationdb/RepeatableReadMultiThreadClientTest.java
src/integration/com/apple/foundationdb/MappedRangeQueryIntegrationTest.java
src/integration/com/apple/foundationdb/BlobGranuleIntegrationTest.java
src/integration/com/apple/foundationdb/GetClientStatusIntegrationTest.java
)
# Resources that are used in integration testing, but are not explicitly test files (JUnit rules,

View File

@ -75,3 +75,10 @@ add_custom_command(OUTPUT ${package_file}
add_custom_target(python_package DEPENDS ${package_file})
add_dependencies(python_package python_binding)
add_dependencies(packages python_package)
add_fdbclient_test(
NAME python_unit_tests
COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/tests/unit_tests.py
--cluster-file @CLUSTER_FILE@ --verbose
DISABLE_LOG_DUMP
)

View File

@ -1326,6 +1326,9 @@ class Database(_TransactionCreator):
self.capi.fdb_database_create_transaction(self.dpointer, ctypes.byref(pointer))
return Transaction(pointer.value, self)
def get_client_status(self):
return Key(self.capi.fdb_database_get_client_status(self.dpointer))
class Tenant(_TransactionCreator):
def __init__(self, tpointer):
@ -1456,7 +1459,7 @@ def check_error_code(code, func, arguments):
return None
if sys.maxsize <= 2 ** 32:
if sys.maxsize <= 2**32:
raise Exception("FoundationDB API requires a 64-bit python interpreter!")
if platform.system() == "Windows":
capi_name = "fdb_c.dll"
@ -1710,6 +1713,9 @@ def init_c_api():
_capi.fdb_database_set_option.restype = ctypes.c_int
_capi.fdb_database_set_option.errcheck = check_error_code
_capi.fdb_database_get_client_status.argtypes = [ctypes.c_void_p]
_capi.fdb_database_get_client_status.restype = ctypes.c_void_p
_capi.fdb_tenant_destroy.argtypes = [ctypes.c_void_p]
_capi.fdb_tenant_destroy.restype = None
@ -1891,7 +1897,6 @@ if hasattr(ctypes.pythonapi, "Py_IncRef"):
def _unpin_callback(cb):
ctypes.pythonapi.Py_DecRef(ctypes.py_object(cb))
else:
_active_callbacks = set()
_pin_callback = _active_callbacks.add

View File

@ -19,37 +19,24 @@
# limitations under the License.
#
import ctypes
import math
import sys
import os
import struct
import threading
import random
import time
import traceback
import json
import math
import os
import random
import struct
import sys
import threading
sys.path[:0] = [os.path.join(os.path.dirname(__file__), '..')]
sys.path[:0] = [os.path.join(os.path.dirname(__file__), "..")]
import fdb
fdb.api_version(int(sys.argv[2]))
import fdb.tuple
from directory_extension import DirectoryExtension
from fdb import six
from fdb.impl import strinc
import fdb.tuple
from directory_extension import DirectoryExtension
from cancellation_timeout_tests import test_timeouts
from cancellation_timeout_tests import test_db_timeouts
from cancellation_timeout_tests import test_cancellation
from cancellation_timeout_tests import test_retry_limits
from cancellation_timeout_tests import test_db_retry_limits
from cancellation_timeout_tests import test_combinations
from size_limit_tests import test_size_limit_option, test_get_approximate_size
from tenant_tests import test_tenants
from unit_tests import run_unit_tests
random.seed(0)
@ -92,13 +79,16 @@ class Stack:
if isinstance(raw[i][1], fdb.Future):
try:
val = raw[i][1].wait()
if val is None or (hasattr(val, 'present') and not val.present()):
raw[i] = (raw[i][0], b'RESULT_NOT_PRESENT')
if val is None or (hasattr(val, "present") and not val.present()):
raw[i] = (raw[i][0], b"RESULT_NOT_PRESENT")
else:
raw[i] = (raw[i][0], val)
except fdb.FDBError as e:
# print('ERROR: %r' % e)
raw[i] = (raw[i][0], fdb.tuple.pack((b'ERROR', str(e.code).encode('ascii'))))
raw[i] = (
raw[i][0],
fdb.tuple.pack((b"ERROR", str(e.code).encode("ascii"))),
)
if count is None:
if with_idx:
@ -113,7 +103,9 @@ class Stack:
class Instruction:
def __init__(self, tr, stack, op, index, isDatabase=False, isTenant=False, isSnapshot=False):
def __init__(
self, tr, stack, op, index, isDatabase=False, isTenant=False, isSnapshot=False
):
self.tr = tr
self.stack = stack
self.op = op
@ -129,151 +121,6 @@ class Instruction:
self.stack.push(self.index, val)
def test_fdb_transactional_generator(db):
try:
@fdb.transactional
def function_that_yields(tr):
yield 0
assert fdb.get_api_version() < 630, "Pre-6.3, a decorator may wrap a function that yields"
except ValueError:
assert fdb.get_api_version() >= 630, "Post-6.3, a decorator should throw if wrapped function yields"
def test_fdb_transactional_returns_generator(db):
try:
def function_that_yields(tr):
yield 0
@fdb.transactional
def function_that_returns(tr):
return function_that_yields(tr)
function_that_returns()
assert fdb.get_api_version() < 630, "Pre-6.3, returning a generator is allowed"
except ValueError:
assert fdb.get_api_version() >= 630, "Post-6.3, returning a generator should throw"
def test_db_options(db):
db.options.set_location_cache_size(100001)
db.options.set_max_watches(100001)
db.options.set_datacenter_id("dc_id")
db.options.set_machine_id("machine_id")
db.options.set_snapshot_ryw_enable()
db.options.set_snapshot_ryw_disable()
db.options.set_transaction_logging_max_field_length(1000)
db.options.set_transaction_timeout(100000)
db.options.set_transaction_timeout(0)
db.options.set_transaction_timeout(0)
db.options.set_transaction_max_retry_delay(100)
db.options.set_transaction_size_limit(100000)
db.options.set_transaction_retry_limit(10)
db.options.set_transaction_retry_limit(-1)
db.options.set_transaction_causal_read_risky()
db.options.set_transaction_include_port_in_address()
@fdb.transactional
def test_options(tr):
tr.options.set_priority_system_immediate()
tr.options.set_priority_batch()
tr.options.set_causal_read_risky()
tr.options.set_causal_write_risky()
tr.options.set_read_your_writes_disable()
tr.options.set_read_system_keys()
tr.options.set_access_system_keys()
tr.options.set_transaction_logging_max_field_length(1000)
tr.options.set_timeout(60 * 1000)
tr.options.set_retry_limit(50)
tr.options.set_max_retry_delay(100)
tr.options.set_used_during_commit_protection_disable()
tr.options.set_debug_transaction_identifier('my_transaction')
tr.options.set_log_transaction()
tr.options.set_read_lock_aware()
tr.options.set_lock_aware()
tr.options.set_include_port_in_address()
tr.get(b'\xff').wait()
def check_watches(db, watches, expected):
for i, watch in enumerate(watches):
if watch.is_ready() or expected:
try:
watch.wait()
if not expected:
assert False, "Watch %d is ready" % i
except fdb.FDBError as e:
tr = db.create_transaction()
tr.on_error(e).wait()
return False
return True
def test_watches(db):
while True:
db[b'w0'] = b'0'
db[b'w3'] = b'3'
watches = [None]
@fdb.transactional
def txn1(tr):
watches[0] = tr.watch(b'w0')
tr.set(b'w0', b'0')
assert not watches[0].is_ready()
txn1(db)
watches.append(db.clear_and_watch(b'w1'))
watches.append(db.set_and_watch(b'w2', b'2'))
watches.append(db.get_and_watch(b'w3'))
assert watches[3][0] == b'3'
watches[3] = watches[3][1]
time.sleep(1)
if not check_watches(db, watches, False):
continue
del db[b'w1']
time.sleep(5)
if not check_watches(db, watches, False):
continue
db[b'w0'] = b'a'
db[b'w1'] = b'b'
del db[b'w2']
db.bit_xor(b'w3', b'\xff\xff')
if check_watches(db, watches, True):
return
@fdb.transactional
def test_locality(tr):
tr.options.set_timeout(60 * 1000)
tr.options.set_read_system_keys() # We do this because the last shard (for now, someday the last N shards) is in the /FF/ keyspace
# This isn't strictly transactional, thought we expect it to be given the size of our database
boundary_keys = list(fdb.locality.get_boundary_keys(tr, b'', b'\xff\xff')) + [b'\xff\xff']
end_keys = [tr.get_key(fdb.KeySelector.last_less_than(k)) for k in boundary_keys[1:]]
start_addresses = [fdb.locality.get_addresses_for_key(tr, k) for k in boundary_keys[:-1]]
end_addresses = [fdb.locality.get_addresses_for_key(tr, k) for k in end_keys]
if [set(s.wait()) for s in start_addresses] != [set(e.wait()) for e in end_addresses]:
raise Exception("Locality not internally consistent.")
def test_predicates():
assert fdb.predicates.is_retryable(fdb.FDBError(1020))
assert not fdb.predicates.is_retryable(fdb.FDBError(10))
class Tester:
tr_map = {}
tr_map_lock = threading.RLock()
@ -339,9 +186,9 @@ class Tester:
# if op != "PUSH" and op != "SWAP":
# print("%d. Instruction is %s" % (idx, op))
isDatabase = op.endswith(six.u('_DATABASE'))
isTenant = op.endswith(six.u('_TENANT'))
isSnapshot = op.endswith(six.u('_SNAPSHOT'))
isDatabase = op.endswith(six.u("_DATABASE"))
isTenant = op.endswith(six.u("_TENANT"))
isSnapshot = op.endswith(six.u("_SNAPSHOT"))
if isDatabase:
op = op[:-9]
@ -355,7 +202,9 @@ class Tester:
else:
obj = self.current_transaction()
inst = Instruction(obj, self.stack, op, idx, isDatabase, isTenant, isSnapshot)
inst = Instruction(
obj, self.stack, op, idx, isDatabase, isTenant, isSnapshot
)
try:
if inst.op == six.u("PUSH"):
@ -395,7 +244,7 @@ class Tester:
f = obj.__getitem__(key)
if f == None:
inst.push(b'RESULT_NOT_PRESENT')
inst.push(b"RESULT_NOT_PRESENT")
else:
inst.push(f)
elif inst.op == six.u("GET_ESTIMATED_RANGE_SIZE"):
@ -429,9 +278,22 @@ class Tester:
self.push_range(inst, r)
elif inst.op == six.u("GET_RANGE_STARTS_WITH"):
prefix, limit, reverse, mode = inst.pop(4)
self.push_range(inst, obj.get_range_startswith(prefix, limit, reverse, mode))
self.push_range(
inst, obj.get_range_startswith(prefix, limit, reverse, mode)
)
elif inst.op == six.u("GET_RANGE_SELECTOR"):
begin_key, begin_or_equal, begin_offset, end_key, end_or_equal, end_offset, limit, reverse, mode, prefix = inst.pop(10)
(
begin_key,
begin_or_equal,
begin_offset,
end_key,
end_or_equal,
end_offset,
limit,
reverse,
mode,
prefix,
) = inst.pop(10)
beginSel = fdb.KeySelector(begin_key, begin_or_equal, begin_offset)
endSel = fdb.KeySelector(end_key, end_or_equal, end_offset)
if limit == 0 and mode == -1 and random.random() < 0.5:
@ -534,11 +396,16 @@ class Tester:
prefix = inst.pop()
count = inst.pop()
items = inst.pop(count)
if not fdb.tuple.has_incomplete_versionstamp(items) and random.random() < 0.5:
if (
not fdb.tuple.has_incomplete_versionstamp(items)
and random.random() < 0.5
):
inst.push(b"ERROR: NONE")
else:
try:
packed = fdb.tuple.pack_with_versionstamp(tuple(items), prefix=prefix)
packed = fdb.tuple.pack_with_versionstamp(
tuple(items), prefix=prefix
)
inst.push(b"OK")
inst.push(packed)
except ValueError as e:
@ -568,7 +435,12 @@ class Tester:
elif inst.op == six.u("ENCODE_FLOAT"):
f_bytes = inst.pop()
f = struct.unpack(">f", f_bytes)[0]
if not math.isnan(f) and not math.isinf(f) and not f == -0.0 and f == int(f):
if (
not math.isnan(f)
and not math.isinf(f)
and not f == -0.0
and f == int(f)
):
f = int(f)
inst.push(fdb.tuple.SingleFloat(f))
elif inst.op == six.u("ENCODE_DOUBLE"):
@ -609,7 +481,9 @@ class Tester:
self.tenant = None
elif inst.op == six.u("TENANT_LIST"):
begin, end, limit = inst.pop(3)
tenant_list = fdb.tenant_management.list_tenants(self.db, begin, end, limit)
tenant_list = fdb.tenant_management.list_tenants(
self.db, begin, end, limit
)
result = []
for tenant in tenant_list:
result += [tenant.key]
@ -627,37 +501,16 @@ class Tester:
else:
inst.push(b"NO_ACTIVE_TENANT")
elif inst.op == six.u("UNIT_TESTS"):
try:
test_db_options(db)
test_options(db)
test_watches(db)
test_cancellation(db)
test_retry_limits(db)
test_db_retry_limits(db)
test_timeouts(db)
test_db_timeouts(db)
test_combinations(db)
test_locality(db)
test_predicates()
test_size_limit_option(db)
test_get_approximate_size(db)
if fdb.get_api_version() >= 710:
test_tenants(db)
except fdb.FDBError as e:
print("Unit tests failed: %s" % e.description)
traceback.print_exc()
raise Exception("Unit tests failed: %s" % e.description)
elif inst.op.startswith(six.u('DIRECTORY_')):
run_unit_tests(db)
elif inst.op.startswith(six.u("DIRECTORY_")):
self.directory_extension.process_instruction(inst)
else:
raise Exception("Unknown op %s" % inst.op)
except fdb.FDBError as e:
# print('ERROR: %r' % e)
inst.stack.push(idx, fdb.tuple.pack((b"ERROR", str(e.code).encode('ascii'))))
inst.stack.push(
idx, fdb.tuple.pack((b"ERROR", str(e.code).encode("ascii")))
)
# print(" to %s" % self.stack)
# print()
@ -665,6 +518,6 @@ class Tester:
[thr.join() for thr in self.threads]
if __name__ == '__main__':
t = Tester(db, sys.argv[1].encode('ascii'))
if __name__ == "__main__":
t = Tester(db, sys.argv[1].encode("ascii"))
t.run()

View File

@ -0,0 +1,296 @@
#!/usr/bin/python
#
# unit_tests.py
#
# This source file is part of the FoundationDB open source project
#
# Copyright 2013-2023 Apple Inc. and the FoundationDB project authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os
import sys
import time
import traceback
import json
import fdb
if __name__ == "__main__":
fdb.api_version(720)
from cancellation_timeout_tests import test_timeouts
from cancellation_timeout_tests import test_db_timeouts
from cancellation_timeout_tests import test_cancellation
from cancellation_timeout_tests import test_retry_limits
from cancellation_timeout_tests import test_db_retry_limits
from cancellation_timeout_tests import test_combinations
from size_limit_tests import test_size_limit_option, test_get_approximate_size
from tenant_tests import test_tenants
VERBOSE = False
def log(msg):
if VERBOSE:
print(msg, file=sys.stderr, flush=True)
def test_fdb_transactional_generator(db):
try:
@fdb.transactional
def function_that_yields(tr):
yield 0
assert (
fdb.get_api_version() < 630
), "Pre-6.3, a decorator may wrap a function that yields"
except ValueError:
assert (
fdb.get_api_version() >= 630
), "Post-6.3, a decorator should throw if wrapped function yields"
def test_fdb_transactional_returns_generator(db):
try:
def function_that_yields(tr):
yield 0
@fdb.transactional
def function_that_returns(tr):
return function_that_yields(tr)
function_that_returns()
assert fdb.get_api_version() < 630, "Pre-6.3, returning a generator is allowed"
except ValueError:
assert (
fdb.get_api_version() >= 630
), "Post-6.3, returning a generator should throw"
def test_db_options(db):
db.options.set_location_cache_size(100001)
db.options.set_max_watches(100001)
db.options.set_datacenter_id("dc_id")
db.options.set_machine_id("machine_id")
db.options.set_snapshot_ryw_enable()
db.options.set_snapshot_ryw_disable()
db.options.set_transaction_logging_max_field_length(1000)
db.options.set_transaction_timeout(100000)
db.options.set_transaction_timeout(0)
db.options.set_transaction_timeout(0)
db.options.set_transaction_max_retry_delay(100)
db.options.set_transaction_size_limit(100000)
db.options.set_transaction_retry_limit(10)
db.options.set_transaction_retry_limit(-1)
db.options.set_transaction_causal_read_risky()
db.options.set_transaction_include_port_in_address()
@fdb.transactional
def test_options(tr):
tr.options.set_priority_system_immediate()
tr.options.set_priority_batch()
tr.options.set_causal_read_risky()
tr.options.set_causal_write_risky()
tr.options.set_read_your_writes_disable()
tr.options.set_read_system_keys()
tr.options.set_access_system_keys()
tr.options.set_transaction_logging_max_field_length(1000)
tr.options.set_timeout(60 * 1000)
tr.options.set_retry_limit(50)
tr.options.set_max_retry_delay(100)
tr.options.set_used_during_commit_protection_disable()
tr.options.set_debug_transaction_identifier("my_transaction")
tr.options.set_log_transaction()
tr.options.set_read_lock_aware()
tr.options.set_lock_aware()
tr.options.set_include_port_in_address()
tr.get(b"\xff").wait()
def check_watches(db, watches, expected):
for i, watch in enumerate(watches):
if watch.is_ready() or expected:
try:
watch.wait()
if not expected:
assert False, "Watch %d is ready" % i
except fdb.FDBError as e:
tr = db.create_transaction()
tr.on_error(e).wait()
return False
return True
def test_watches(db):
while True:
db[b"w0"] = b"0"
db[b"w3"] = b"3"
watches = [None]
@fdb.transactional
def txn1(tr):
watches[0] = tr.watch(b"w0")
tr.set(b"w0", b"0")
assert not watches[0].is_ready()
txn1(db)
watches.append(db.clear_and_watch(b"w1"))
watches.append(db.set_and_watch(b"w2", b"2"))
watches.append(db.get_and_watch(b"w3"))
assert watches[3][0] == b"3"
watches[3] = watches[3][1]
time.sleep(1)
if not check_watches(db, watches, False):
continue
del db[b"w1"]
time.sleep(5)
if not check_watches(db, watches, False):
continue
db[b"w0"] = b"a"
db[b"w1"] = b"b"
del db[b"w2"]
db.bit_xor(b"w3", b"\xff\xff")
if check_watches(db, watches, True):
return
@fdb.transactional
def test_locality(tr):
tr.options.set_timeout(60 * 1000)
tr.options.set_read_system_keys() # We do this because the last shard (for now, someday the last N shards) is in the /FF/ keyspace
# This isn't strictly transactional, thought we expect it to be given the size of our database
boundary_keys = list(fdb.locality.get_boundary_keys(tr, b"", b"\xff\xff")) + [
b"\xff\xff"
]
end_keys = [
tr.get_key(fdb.KeySelector.last_less_than(k)) for k in boundary_keys[1:]
]
start_addresses = [
fdb.locality.get_addresses_for_key(tr, k) for k in boundary_keys[:-1]
]
end_addresses = [fdb.locality.get_addresses_for_key(tr, k) for k in end_keys]
if [set(s.wait()) for s in start_addresses] != [
set(e.wait()) for e in end_addresses
]:
raise Exception("Locality not internally consistent.")
def test_predicates():
assert fdb.predicates.is_retryable(fdb.FDBError(1020))
assert not fdb.predicates.is_retryable(fdb.FDBError(10))
def test_get_client_status(db):
@fdb.transactional
def simple_txn(tr):
tr.get_read_version().wait()
# Execute a simple transaction
# to make sure the database is initialized
simple_txn(db)
# Here we just check if a meaningful client report status is returned
# Different report attributes and error cases are covered by C API tests
status_str = db.get_client_status().wait()
status = json.loads(status_str)
assert "Healthy" in status
assert status["Healthy"]
def run_unit_tests(db):
try:
log("test_db_options")
test_db_options(db)
log("test_options")
test_options(db)
log("test_watches")
test_watches(db)
log("test_cancellation")
test_cancellation(db)
log("test_retry_limits")
test_retry_limits(db)
log("test_db_retry_limits")
test_db_retry_limits(db)
log("test_timeouts")
test_timeouts(db)
log("test_db_timeouts")
test_db_timeouts(db)
log("test_combinations")
test_combinations(db)
log("test_locality")
test_locality(db)
log("test_predicates")
test_predicates()
log("test_size_limit_option")
test_size_limit_option(db)
log("test_get_approximate_size")
test_get_approximate_size(db)
log("test_get_client_status")
test_get_client_status(db)
if fdb.get_api_version() >= 710:
log("test_tenants")
test_tenants(db)
except fdb.FDBError as e:
print("Unit tests failed: %s" % e.description)
traceback.print_exc()
raise Exception("Unit tests failed: %s" % e.description)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Unit tests for python FDB API.
""",
)
parser.add_argument(
"--cluster-file",
"-C",
help="FDB cluster file",
required=True,
)
parser.add_argument(
"--verbose",
"-V",
help="Print diagnostic info",
action="store_true",
)
args = parser.parse_args()
if args.verbose:
VERBOSE = True
log("Opening database {}".format(args.cluster_file))
db = fdb.open(args.cluster_file)
run_unit_tests(db)

View File

@ -563,6 +563,7 @@ string(APPEND test_venv_cmd "${Python3_EXECUTABLE} -m venv ${test_venv_dir} ")
string(APPEND test_venv_cmd "&& ${test_venv_activate} ")
string(APPEND test_venv_cmd "&& pip install --upgrade pip ")
string(APPEND test_venv_cmd "&& pip install -r ${CMAKE_SOURCE_DIR}/tests/TestRunner/requirements.txt")
string(APPEND test_venv_cmd "&& (cd ${CMAKE_BINARY_DIR}/bindings/python && python3 setup.py install) ")
add_test(
NAME test_venv_setup
COMMAND bash -c ${test_venv_cmd}
@ -602,6 +603,12 @@ function(add_python_venv_test)
COMMAND ${shell_cmd} ${shell_opt} "${test_venv_activate} && ${T_COMMAND}")
set_tests_properties(${T_NAME} PROPERTIES FIXTURES_REQUIRED test_virtual_env_setup TIMEOUT ${T_TEST_TIMEOUT})
set(test_env_vars "PYTHONPATH=${CMAKE_SOURCE_DIR}/tests/TestRunner:${CMAKE_BINARY_DIR}/tests/TestRunner")
if(APPLE)
set(ld_env_name "DYLD_LIBRARY_PATH")
else()
set(ld_env_name "LD_LIBRARY_PATH")
endif()
set(test_env_vars PROPERTIES ENVIRONMENT "${test_env_vars};${ld_env_name}=${CMAKE_BINARY_DIR}/lib:$ENV{${ld_env_name}}")
if(USE_SANITIZER)
set(test_env_vars "${test_env_vars};${SANITIZER_OPTIONS}")
endif()

View File

@ -58,7 +58,7 @@ class StatFetcher:
class TestPicker:
def __init__(self, test_dir: Path):
def __init__(self, test_dir: Path, binaries: OrderedDict[Version, Path]):
if not test_dir.exists():
raise RuntimeError("{} is neither a directory nor a file".format(test_dir))
self.include_files_regex = re.compile(config.include_test_files)
@ -69,6 +69,7 @@ class TestPicker:
self.tests: OrderedDict[str, TestDescription] = collections.OrderedDict()
self.restart_test: Pattern = re.compile(r".*-\d+\.(txt|toml)")
self.follow_test: Pattern = re.compile(r".*-[2-9]\d*\.(txt|toml)")
self.old_binaries: OrderedDict[Version, Path] = binaries
for subdir in self.test_dir.iterdir():
if subdir.is_dir() and subdir.name in config.test_dirs:
@ -85,6 +86,10 @@ class TestPicker:
else:
self.fetch_stats()
if not self.tests:
raise Exception(
"No tests to run! Please check if tests are included/excluded incorrectly or old binaries are missing for restarting tests")
def add_time(self, test_file: Path, run_time: int, out: SummaryTree) -> None:
# getting the test name is fairly inefficient. But since we only have 100s of tests, I won't bother
test_name: str | None = None
@ -132,6 +137,23 @@ class TestPicker:
or self.exclude_files_regex.search(str(path)) is not None
):
return
# Skip restarting tests that do not have old binaries in the given version range
# In particular, this is only for restarting tests with the "until" keyword,
# since without "until", it will at least run with the current binary.
if is_restarting_test(path):
candidates: List[Path] = []
dirs = path.parent.parts
version_expr = dirs[-1].split("_")
if (version_expr[0] == "from" or version_expr[0] == "to") and len(version_expr) == 4 and version_expr[2] == "until":
max_version = Version.parse(version_expr[3])
min_version = Version.parse(version_expr[1])
for ver, binary in self.old_binaries.items():
if min_version <= ver < max_version:
candidates.append(binary)
if not len(candidates):
# No valid old binary found
return
with path.open("r") as f:
test_name: str | None = None
test_class: str | None = None
@ -263,7 +285,7 @@ class OldBinaries:
max_version = Version.parse(version_expr[3])
candidates: List[Path] = []
for ver, binary in self.binaries.items():
if min_version <= ver <= max_version:
if min_version <= ver < max_version:
candidates.append(binary)
if len(candidates) == 0:
return config.binary
@ -474,7 +496,7 @@ class TestRunner:
self.cluster_file: str | None = None
self.fdb_app_dir: str | None = None
self.binary_chooser = OldBinaries()
self.test_picker = TestPicker(self.test_path)
self.test_picker = TestPicker(self.test_path, self.binary_chooser.binaries)
def backup_sim_dir(self, seed: int):
temp_dir = config.run_dir / str(self.uid)

View File

@ -35,13 +35,16 @@
namespace fdb_cli {
Optional<std::pair<Optional<ClusterConnectionString>, Optional<DataClusterEntry>>>
parseClusterConfiguration(std::vector<StringRef> const& tokens, DataClusterEntry const& defaults, int startIndex) {
Optional<std::pair<Optional<ClusterConnectionString>, Optional<DataClusterEntry>>> parseClusterConfiguration(
std::vector<StringRef> const& tokens,
DataClusterEntry const& defaults,
int startIndex,
int endIndex) {
Optional<DataClusterEntry> entry;
Optional<ClusterConnectionString> connectionString;
std::set<std::string> usedParams;
for (int tokenNum = startIndex; tokenNum < tokens.size(); ++tokenNum) {
for (int tokenNum = startIndex; tokenNum < endIndex; ++tokenNum) {
StringRef token = tokens[tokenNum];
bool foundEquals;
StringRef param = token.eat("=", &foundEquals);
@ -137,7 +140,7 @@ ACTOR Future<bool> metaclusterRegisterCommand(Reference<IDatabase> db, std::vect
}
DataClusterEntry defaultEntry;
auto config = parseClusterConfiguration(tokens, defaultEntry, 3);
auto config = parseClusterConfiguration(tokens, defaultEntry, 3, tokens.size());
if (!config.present()) {
return false;
} else if (!config.get().first.present()) {
@ -159,16 +162,160 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
fmt::print("Removes the specified data cluster from a metacluster.\n");
fmt::print("If FORCE is specified, then the cluster will be detached even if it has\n"
"tenants assigned to it.\n");
fmt::print("If run on a data cluster, the data cluster will remove its association\n"
"with the metacluster without modifying the management cluster. Doing so\n"
"requires the FORCE option to be set. Use of this mode is required to\n"
"repopulate a management cluster from a data cluster using the\n"
"`metacluster restore' command.\n");
return false;
}
state ClusterNameRef clusterName = tokens[tokens.size() - 1];
wait(MetaclusterAPI::removeCluster(db, clusterName, tokens.size() == 4));
state bool force = tokens.size() == 4;
fmt::print("The cluster `{}' has been removed\n", printable(clusterName).c_str());
state ClusterType clusterType = wait(runTransaction(db, [](Reference<ITransaction> tr) {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
return TenantAPI::getClusterType(tr);
}));
if (clusterType == ClusterType::METACLUSTER_DATA && !force) {
if (tokens[2] == "FORCE"_sr) {
fmt::print("ERROR: a cluster name must be specified.\n");
} else {
fmt::print("ERROR: cannot remove a data cluster directly. To remove a data cluster,\n"
"use the `remove' command on the management cluster. To force a data cluster\n"
"to forget its metacluster association without fully removing it, use FORCE.\n");
}
return false;
}
bool updatedDataCluster =
wait(MetaclusterAPI::removeCluster(db, clusterName, clusterType, tokens.size() == 4, 15.0));
if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
fmt::print("The cluster `{}' has been removed\n", printable(clusterName).c_str());
if (!updatedDataCluster) {
fmt::print("WARNING: the data cluster could not be updated and may still contains its\n"
"metacluster registration info. To finish removing it, FORCE remove the\n"
"data cluster directly.\n");
}
} else {
ASSERT(updatedDataCluster);
fmt::print("The cluster `{}' has removed its association with its metacluster.\n"
"The metacluster has not been modified.\n",
printable(clusterName).c_str());
}
return true;
}
void printRestoreUsage() {
fmt::print("Usage: metacluster restore <NAME> [dryrun] connection_string=<CONNECTION_STRING>\n"
"<restore_known_data_cluster|repopulate_from_data_cluster> [force_join_new_metacluster]\n\n");
fmt::print("Add a restored data cluster back to a metacluster.\n\n");
fmt::print("Use `dryrun' to report what changes a restore would make and whether any\n");
fmt::print("failures would occur. Without `dryrun', the restore will modify the metacluster\n");
fmt::print("with the changes required to perform the restore.\n\n");
fmt::print("Use `restore_known_data_cluster' to add back a restored copy of a data cluster\n");
fmt::print("that the metacluster is already tracking. This mode should be used if only data\n");
fmt::print("clusters are being restored, and any discrepancies between the management and\n");
fmt::print("data clusters will be resolved using the management cluster metadata.\n");
fmt::print("If `force_join_new_metacluster' is specified, the cluster will try to restore\n");
fmt::print("to a different metacluster than it was originally registered to.\n\n");
fmt::print("Use `repopulate_from_data_cluster' to rebuild a lost management cluster from the\n");
fmt::print("data clusters in a metacluster. This mode should be used if the management\n");
fmt::print("cluster is being restored. If any data clusters are also being restored, the\n");
fmt::print("oldest data clusters should be added first before any non-recovered data\n");
fmt::print("clusters. Any conflicts arising between the added data cluster and existing data\n");
fmt::print("will cause the restore to fail. Before repopulating a metacluster from a data\n");
fmt::print("cluster, that data cluster needs to be detached from its prior metacluster using\n");
fmt::print("the `metacluster remove' command.\n");
}
// metacluster restore command
ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vector<StringRef> tokens) {
if (tokens.size() < 5 || tokens.size() > 7) {
printRestoreUsage();
return false;
}
state bool dryRun = tokens[3] == "dryrun"_sr;
state bool forceJoin = tokens[tokens.size() - 1] == "force_join_new_metacluster"_sr;
if (tokens.size() < 5 + (int)dryRun + (int)forceJoin) {
printRestoreUsage();
return false;
}
state ClusterName clusterName = tokens[2];
state StringRef restoreType = tokens[tokens.size() - 1 - (int)forceJoin];
// connection string
DataClusterEntry defaultEntry;
auto config = parseClusterConfiguration(tokens, defaultEntry, 3 + (int)dryRun, 3 + (int)dryRun + 1);
if (!config.present()) {
return false;
} else if (!config.get().first.present()) {
fmt::print(stderr, "ERROR: connection_string must be configured when registering a cluster.\n");
return false;
}
state std::vector<std::string> messages;
state bool success = true;
try {
if (restoreType == "restore_known_data_cluster"_sr) {
wait(MetaclusterAPI::restoreCluster(db,
clusterName,
config.get().first.get(),
ApplyManagementClusterUpdates::True,
RestoreDryRun(dryRun),
ForceJoinNewMetacluster(forceJoin),
&messages));
} else if (restoreType == "repopulate_from_data_cluster"_sr) {
wait(MetaclusterAPI::restoreCluster(db,
clusterName,
config.get().first.get(),
ApplyManagementClusterUpdates::False,
RestoreDryRun(dryRun),
ForceJoinNewMetacluster(forceJoin),
&messages));
} else {
fmt::print(stderr, "ERROR: unrecognized restore mode `{}'\n", printable(restoreType));
success = false;
}
} catch (Error& e) {
success = false;
fmt::print(stderr, "ERROR: {} ({})\n", e.what(), e.code());
}
if (!messages.empty()) {
fmt::print(success ? stdout : stderr, "\nThe restore reported the following messages:\n\n");
for (int i = 0; i < messages.size(); ++i) {
fmt::print(success ? stdout : stderr, " {}. {}\n", i + 1, messages[i]);
}
if (success) {
fmt::print("\n");
}
}
if (success) {
if (dryRun) {
fmt::print("The restore dry run completed successfully. To perform the restore, run the same command\n");
fmt::print("without the `dryrun' argument.\n");
} else {
fmt::print("The cluster `{}' has been restored\n", printable(clusterName).c_str());
}
}
return success;
}
// metacluster configure command
ACTOR Future<bool> metaclusterConfigureCommand(Reference<IDatabase> db, std::vector<StringRef> tokens) {
if (tokens.size() < 4) {
@ -190,7 +337,7 @@ ACTOR Future<bool> metaclusterConfigureCommand(Reference<IDatabase> db, std::vec
throw cluster_not_found();
}
auto config = parseClusterConfiguration(tokens, metadata.get().entry, 3);
auto config = parseClusterConfiguration(tokens, metadata.get().entry, 3, tokens.size());
if (!config.present()) {
return false;
}
@ -267,6 +414,7 @@ ACTOR Future<bool> metaclusterGetCommand(Reference<IDatabase> db, std::vector<St
obj[msgClusterKey] = metadata.toJson();
fmt::print("{}\n", json_spirit::write_string(json_spirit::mValue(obj), json_spirit::pretty_print).c_str());
} else {
fmt::print(" id: {}\n", metadata.entry.id.toString().c_str());
fmt::print(" connection string: {}\n", metadata.connectionString.toString().c_str());
fmt::print(" cluster state: {}\n", DataClusterEntry::clusterStateToString(metadata.entry.clusterState));
fmt::print(" tenant group capacity: {}\n", metadata.entry.capacity.numTenantGroups);
@ -394,6 +542,8 @@ Future<bool> metaclusterCommand(Reference<IDatabase> db, std::vector<StringRef>
return metaclusterRegisterCommand(db, tokens);
} else if (tokencmp(tokens[1], "remove")) {
return metaclusterRemoveCommand(db, tokens);
} else if (tokencmp(tokens[1], "restore")) {
return metaclusterRestoreCommand(db, tokens);
} else if (tokencmp(tokens[1], "configure")) {
return metaclusterConfigureCommand(db, tokens);
} else if (tokencmp(tokens[1], "list")) {
@ -413,9 +563,8 @@ void metaclusterGenerator(const char* text,
std::vector<std::string>& lc,
std::vector<StringRef> const& tokens) {
if (tokens.size() == 1) {
const char* opts[] = {
"create_experimental", "decommission", "register", "remove", "configure", "list", "get", "status", nullptr
};
const char* opts[] = { "create_experimental", "decommission", "register", "remove", "restore",
"configure", "list", "get", "status", nullptr };
arrayGenerator(text, line, opts, lc);
} else if (tokens.size() > 1 && (tokencmp(tokens[1], "register") || tokencmp(tokens[1], "configure"))) {
const char* opts[] = { "max_tenant_groups=", "connection_string=", nullptr };
@ -424,14 +573,35 @@ void metaclusterGenerator(const char* text,
(tokens.size() == 3 && tokencmp(tokens[1], "get"))) {
const char* opts[] = { "JSON", nullptr };
arrayGenerator(text, line, opts, lc);
} else if (tokens.size() == 2 && tokencmp(tokens[1], "remove")) {
const char* opts[] = { "FORCE", nullptr };
arrayGenerator(text, line, opts, lc);
} else if (tokens.size() > 1 && tokencmp(tokens[1], "restore")) {
if (tokens.size() == 3) {
const char* opts[] = { "dryrun", "connection_string=", nullptr };
arrayGenerator(text, line, opts, lc);
} else {
bool dryrun = tokens[3] == "dryrun"_sr;
if (tokens.size() == 3 + (int)dryrun) {
const char* opts[] = { "connection_string=", nullptr };
arrayGenerator(text, line, opts, lc);
} else if (tokens.size() == 4 + (int)dryrun) {
const char* opts[] = { "restore_known_data_cluster", "repopulate_from_data_cluster", nullptr };
arrayGenerator(text, line, opts, lc);
} else if (tokens.size() == 5 + (int)dryrun) {
const char* opts[] = { "force_join_new_metacluster", nullptr };
arrayGenerator(text, line, opts, lc);
}
}
}
}
std::vector<const char*> metaclusterHintGenerator(std::vector<StringRef> const& tokens, bool inArgument) {
if (tokens.size() == 1) {
return { "<create_experimental|decommission|register|remove|configure|list|get|status>", "[ARGS]" };
} else if (tokencmp(tokens[1], "create_experimental")) {
return { "<NAME> <TENANT_ID_PREFIX>" };
return { "<create_experimental|decommission|register|remove|restore|configure|list|get|status>", "[ARGS]" };
} else if (tokencmp(tokens[1], "create_experimental") && tokens.size() < 4) {
static std::vector<const char*> opts = { "<NAME>", "<TENANT_ID_PREFIX>" };
return std::vector<const char*>(opts.begin() + tokens.size() - 2, opts.end());
} else if (tokencmp(tokens[1], "decommission")) {
return {};
} else if (tokencmp(tokens[1], "register") && tokens.size() < 5) {
@ -449,6 +619,19 @@ std::vector<const char*> metaclusterHintGenerator(std::vector<StringRef> const&
} else {
return {};
}
} else if (tokencmp(tokens[1], "restore") && tokens.size() < 7) {
static std::vector<const char*> opts = { "<NAME>",
"[dryrun]",
"connection_string=<CONNECTION_STRING>",
"<restore_known_data_cluster|repopulate_from_data_cluster>",
"[force_join_new_metacluster]" };
if (tokens.size() < 4 || (tokens[3].size() <= 6 && "dryrun"_sr.startsWith(tokens[3]))) {
return std::vector<const char*>(opts.begin() + tokens.size() - 2, opts.end());
} else if (tokens.size() < 6) {
return std::vector<const char*>(opts.begin() + tokens.size() - 1, opts.end());
} else {
return {};
}
} else if (tokencmp(tokens[1], "configure")) {
static std::vector<const char*> opts = {
"<NAME>", "<max_tenant_groups=<NUM_GROUPS>|connection_string=<CONNECTION_STRING>>"
@ -469,14 +652,16 @@ std::vector<const char*> metaclusterHintGenerator(std::vector<StringRef> const&
CommandFactory metaclusterRegisterFactory(
"metacluster",
CommandHelp("metacluster <create_experimental|decommission|register|remove|configure|list|get|status> [ARGS]",
"view and manage a metacluster",
"`create_experimental' and `decommission' set up or deconfigure a metacluster.\n"
"`register' and `remove' add and remove data clusters from the metacluster.\n"
"`configure' updates the configuration of a data cluster.\n"
"`list' prints a list of data clusters in the metacluster.\n"
"`get' prints the metadata for a particular data cluster.\n"
"`status' prints metacluster metadata.\n"),
CommandHelp(
"metacluster <create_experimental|decommission|register|remove|restore|configure|list|get|status> [ARGS]",
"view and manage a metacluster",
"`create_experimental' and `decommission' set up or deconfigure a metacluster.\n"
"`register' and `remove' add and remove data clusters from the metacluster.\n"
"`configure' updates the configuration of a data cluster.\n"
"`restore' is used to recover from lost management or data clusters.\n"
"`list' prints a list of data clusters in the metacluster.\n"
"`get' prints the metadata for a particular data cluster.\n"
"`status' prints metacluster metadata.\n"),
&metaclusterGenerator,
&metaclusterHintGenerator);

View File

@ -463,13 +463,15 @@ ACTOR Future<bool> tenantGetCommand(Reference<IDatabase> db, std::vector<StringR
std::string tenantState;
std::string tenantGroup;
std::string assignedCluster;
std::string error;
doc.get("id", id);
doc.get("prefix.printable", prefix);
doc.get("tenant_state", tenantState);
bool hasTenantGroup = doc.tryGet("tenant_group.printable", tenantGroup);
bool hasAssignedCluster = doc.tryGet("assigned_cluster", assignedCluster);
bool hasAssignedCluster = doc.tryGet("assigned_cluster.printable", assignedCluster);
bool hasError = doc.tryGet("error", error);
fmt::print(" id: {}\n", id);
fmt::print(" prefix: {}\n", printable(prefix).c_str());
@ -480,6 +482,9 @@ ACTOR Future<bool> tenantGetCommand(Reference<IDatabase> db, std::vector<StringR
if (hasAssignedCluster) {
fmt::print(" assigned cluster: {}\n", printable(assignedCluster).c_str());
}
if (hasError) {
fmt::print(" error: {}\n", error);
}
}
return true;
} catch (Error& e) {

View File

@ -23,6 +23,7 @@
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/BlobCipher.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/CommitTransaction.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/GetEncryptCipherKeys.actor.h"
@ -290,10 +291,28 @@ std::pair<Version, uint32_t> decodeBKMutationLogKey(Key key) {
bigEndian32(*(int32_t*)(key.begin() + backupLogPrefixBytes + sizeof(UID) + sizeof(uint8_t) + sizeof(int64_t))));
}
void _addResult(bool* tenantMapChanging,
VectorRef<MutationRef>* result,
int* mutationSize,
Arena* arena,
MutationRef logValue,
KeyRangeRef tenantMapRange) {
*tenantMapChanging = *tenantMapChanging || TenantAPI::tenantMapChanging(logValue, tenantMapRange);
result->push_back_deep(*arena, logValue);
*mutationSize += logValue.expectedSize();
}
/*
This actor is responsible for taking an original transaction which was added to the backup mutation log (represented
by "value" parameter), breaking it up into the individual MutationRefs (that constitute the transaction), decrypting
each mutation (if needed) and adding/removing prefixes from the mutations. The final mutations are then added to the
"result" vector alongside their encrypted counterparts (which is added to the "encryptedResult" vector)
*/
ACTOR static Future<Void> decodeBackupLogValue(Arena* arena,
VectorRef<MutationRef>* result,
VectorRef<Optional<MutationRef>>* encryptedResult,
int* mutationSize,
bool* tenantMapChanging,
Standalone<StringRef> value,
Key addPrefix,
Key removePrefix,
@ -325,6 +344,7 @@ ACTOR static Future<Void> decodeBackupLogValue(Arena* arena,
state int originalOffset = offset;
state DatabaseConfiguration config = wait(getDatabaseConfiguration(cx));
state KeyRangeRef tenantMapRange = TenantMetadata::tenantMap().subspace;
while (consumed < totalBytes) {
uint32_t type = 0;
@ -410,8 +430,7 @@ ACTOR static Future<Void> decodeBackupLogValue(Arena* arena,
logValue.param1 = logValue.param1.withPrefix(addPrefix, tempArena);
}
logValue.param2 = addPrefix == StringRef() ? allKeys.end : strinc(addPrefix, tempArena);
result->push_back_deep(*arena, logValue);
*mutationSize += logValue.expectedSize();
_addResult(tenantMapChanging, result, mutationSize, arena, logValue, tenantMapRange);
} else {
logValue.param1 = std::max(r.range().begin, range.begin);
logValue.param2 = minKey;
@ -423,8 +442,7 @@ ACTOR static Future<Void> decodeBackupLogValue(Arena* arena,
logValue.param1 = logValue.param1.withPrefix(addPrefix, tempArena);
logValue.param2 = logValue.param2.withPrefix(addPrefix, tempArena);
}
result->push_back_deep(*arena, logValue);
*mutationSize += logValue.expectedSize();
_addResult(tenantMapChanging, result, mutationSize, arena, logValue, tenantMapRange);
}
if (originalLogValue.param1 == logValue.param1 && originalLogValue.param2 == logValue.param2) {
encryptedResult->push_back_deep(*arena, encryptedLogValue);
@ -443,8 +461,7 @@ ACTOR static Future<Void> decodeBackupLogValue(Arena* arena,
if (addPrefix.size()) {
logValue.param1 = logValue.param1.withPrefix(addPrefix, tempArena);
}
result->push_back_deep(*arena, logValue);
*mutationSize += logValue.expectedSize();
_addResult(tenantMapChanging, result, mutationSize, arena, logValue, tenantMapRange);
// If we did not remove/add prefixes to the mutation then keep the original encrypted mutation so we
// do not have to re-encrypt unnecessarily
if (originalLogValue.param1 == logValue.param1 && originalLogValue.param2 == logValue.param2) {
@ -695,6 +712,41 @@ Future<Void> readCommitted(Database cx,
cx, results, Void(), lock, range, groupBy, Terminator::True, AccessSystemKeys::True, LockAware::True);
}
ACTOR Future<Void> sendCommitTransactionRequest(CommitTransactionRequest req,
Key uid,
Version newBeginVersion,
Key rangeBegin,
NotifiedVersion* committedVersion,
int* totalBytes,
int* mutationSize,
PromiseStream<Future<Void>> addActor,
FlowLock* commitLock,
PublicRequestStream<CommitTransactionRequest> commit) {
Key applyBegin = uid.withPrefix(applyMutationsBeginRange.begin);
Key versionKey = BinaryWriter::toValue(newBeginVersion, Unversioned());
Key rangeEnd = getApplyKey(newBeginVersion, uid);
// mutations and encrypted mutations (and their relationship) is described in greater detail in the defenition of
// CommitTransactionRef in CommitTransaction.h
req.transaction.mutations.push_back_deep(req.arena, MutationRef(MutationRef::SetValue, applyBegin, versionKey));
req.transaction.encryptedMutations.push_back_deep(req.arena, Optional<MutationRef>());
req.transaction.write_conflict_ranges.push_back_deep(req.arena, singleKeyRange(applyBegin));
req.transaction.mutations.push_back_deep(req.arena, MutationRef(MutationRef::ClearRange, rangeBegin, rangeEnd));
req.transaction.encryptedMutations.push_back_deep(req.arena, Optional<MutationRef>());
req.transaction.write_conflict_ranges.push_back_deep(req.arena, singleKeyRange(rangeBegin));
// The commit request contains no read conflict ranges, so regardless of what read version we
// choose, it's impossible for us to get a transaction_too_old error back, and it's impossible
// for our transaction to be aborted due to conflicts.
req.transaction.read_snapshot = committedVersion->get();
req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;
*totalBytes += *mutationSize;
wait(commitLock->take(TaskPriority::DefaultYield, *mutationSize));
addActor.send(commitLock->releaseWhen(success(commit.getReply(req)), *mutationSize));
return Void();
}
ACTOR Future<int> kvMutationLogToTransactions(Database cx,
PromiseStream<RCGroup> results,
Reference<FlowLock> lock,
@ -717,20 +769,26 @@ ACTOR Future<int> kvMutationLogToTransactions(Database cx,
state CommitTransactionRequest req;
state Version newBeginVersion = invalidVersion;
state int mutationSize = 0;
state bool tenantMapChanging = false;
loop {
try {
state RCGroup group = waitNext(results.getFuture());
state CommitTransactionRequest curReq;
lock->release(group.items.expectedSize());
state int curBatchMutationSize = 0;
tenantMapChanging = false;
BinaryWriter bw(Unversioned());
for (int i = 0; i < group.items.size(); ++i) {
bw.serializeBytes(group.items[i].value);
}
// Parse a single transaction from the backup mutation log
Standalone<StringRef> value = bw.toValue();
wait(decodeBackupLogValue(&req.arena,
&req.transaction.mutations,
&req.transaction.encryptedMutations,
&mutationSize,
wait(decodeBackupLogValue(&curReq.arena,
&curReq.transaction.mutations,
&curReq.transaction.encryptedMutations,
&curBatchMutationSize,
&tenantMapChanging,
value,
addPrefix,
removePrefix,
@ -739,8 +797,48 @@ ACTOR Future<int> kvMutationLogToTransactions(Database cx,
cx,
tenantMap,
provisionalProxy));
// A single call to decodeBackupLogValue (above) will only parse mutations from a single transaction,
// however in the code below we batch the results across several calls to decodeBackupLogValue and send
// it in one big CommitTransactionRequest (so one CTR contains mutations from multiple transactions).
// Generally, this would be fine since the mutations in the log are ordered (and thus so are the results
// after calling decodeBackupLogValue). However in the CommitProxy we do not allow mutations which
// change the tenant map to appear alongside regular normalKey mutations in a single
// CommitTransactionRequest. Thus the code below will immediately send any mutations accumulated thus
// far if the latest call to decodeBackupLogValue contained a transaction which changed the tenant map
// (before processing the mutations which caused the tenant map to change).
if (tenantMapChanging && req.transaction.mutations.size()) {
// If the tenantMap is changing send the previous CommitTransactionRequest to the CommitProxy
TraceEvent("MutationLogRestoreTenantMapChanging").detail("BeginVersion", newBeginVersion);
CODE_PROBE(true, "mutation log tenant map changing");
wait(sendCommitTransactionRequest(req,
uid,
newBeginVersion,
rangeBegin,
committedVersion,
&totalBytes,
&mutationSize,
addActor,
commitLock,
commit));
req = CommitTransactionRequest();
mutationSize = 0;
}
state int i;
for (i = 0; i < curReq.transaction.mutations.size(); i++) {
req.transaction.mutations.push_back_deep(req.arena, curReq.transaction.mutations[i]);
req.transaction.encryptedMutations.push_back_deep(req.arena,
curReq.transaction.encryptedMutations[i]);
}
mutationSize += curBatchMutationSize;
newBeginVersion = group.groupKey + 1;
if (mutationSize >= CLIENT_KNOBS->BACKUP_LOG_WRITE_BATCH_MAX_SIZE) {
// At this point if the tenant map changed we would have already sent any normalKey mutations
// accumulated thus far, so all thats left to do is to send all the mutations in the the offending
// transaction that changed the tenant map. This is necessary so that we don't batch these tenant map
// mutations with future normalKey mutations (which will result in the same problem discussed above).
if (tenantMapChanging || mutationSize >= CLIENT_KNOBS->BACKUP_LOG_WRITE_BATCH_MAX_SIZE) {
break;
}
} catch (Error& e) {
@ -756,28 +854,16 @@ ACTOR Future<int> kvMutationLogToTransactions(Database cx,
throw;
}
}
Key applyBegin = uid.withPrefix(applyMutationsBeginRange.begin);
Key versionKey = BinaryWriter::toValue(newBeginVersion, Unversioned());
Key rangeEnd = getApplyKey(newBeginVersion, uid);
req.transaction.mutations.push_back_deep(req.arena, MutationRef(MutationRef::SetValue, applyBegin, versionKey));
req.transaction.encryptedMutations.push_back_deep(req.arena, Optional<MutationRef>());
req.transaction.write_conflict_ranges.push_back_deep(req.arena, singleKeyRange(applyBegin));
req.transaction.mutations.push_back_deep(req.arena, MutationRef(MutationRef::ClearRange, rangeBegin, rangeEnd));
req.transaction.encryptedMutations.push_back_deep(req.arena, Optional<MutationRef>());
req.transaction.write_conflict_ranges.push_back_deep(req.arena, singleKeyRange(rangeBegin));
// The commit request contains no read conflict ranges, so regardless of what read version we
// choose, it's impossible for us to get a transaction_too_old error back, and it's impossible
// for our transaction to be aborted due to conflicts.
req.transaction.read_snapshot = committedVersion->get();
req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;
totalBytes += mutationSize;
wait(commitLock->take(TaskPriority::DefaultYield, mutationSize));
addActor.send(commitLock->releaseWhen(success(commit.getReply(req)), mutationSize));
wait(sendCommitTransactionRequest(req,
uid,
newBeginVersion,
rangeBegin,
committedVersion,
&totalBytes,
&mutationSize,
addActor,
commitLock,
commit));
if (endOfStream) {
return totalBytes;
}

File diff suppressed because it is too large Load Diff

View File

@ -220,9 +220,7 @@ void validateEncryptionHeaderDetails(const BlobGranuleFileEncryptionKeys& eKeys,
const BlobCipherEncryptHeader& header,
const StringRef& ivRef) {
// Validate encryption header 'cipherHeader' details sanity
if (!(header.cipherHeaderDetails.baseCipherId == eKeys.headerCipherKey->getBaseCipherId() &&
header.cipherHeaderDetails.encryptDomainId == eKeys.headerCipherKey->getDomainId() &&
header.cipherHeaderDetails.salt == eKeys.headerCipherKey->getSalt())) {
if (header.cipherHeaderDetails.isValid() && header.cipherHeaderDetails != eKeys.headerCipherKey->details()) {
TraceEvent(SevError, "EncryptionHeader_CipherHeaderMismatch")
.detail("HeaderDomainId", eKeys.headerCipherKey->getDomainId())
.detail("ExpectedHeaderDomainId", header.cipherHeaderDetails.encryptDomainId)
@ -233,9 +231,7 @@ void validateEncryptionHeaderDetails(const BlobGranuleFileEncryptionKeys& eKeys,
throw encrypt_header_metadata_mismatch();
}
// Validate encryption header 'cipherText' details sanity
if (!(header.cipherTextDetails.baseCipherId == eKeys.textCipherKey->getBaseCipherId() &&
header.cipherTextDetails.encryptDomainId == eKeys.textCipherKey->getDomainId() &&
header.cipherTextDetails.salt == eKeys.textCipherKey->getSalt())) {
if (!header.cipherTextDetails.isValid() || header.cipherTextDetails != eKeys.textCipherKey->details()) {
TraceEvent(SevError, "EncryptionHeader_CipherTextMismatch")
.detail("TextDomainId", eKeys.textCipherKey->getDomainId())
.detail("ExpectedTextDomainId", header.cipherTextDetails.encryptDomainId)
@ -257,12 +253,10 @@ void validateEncryptionHeaderDetails(const BlobGranuleFileEncryptionKeys& eKeys,
void validateEncryptionHeaderDetails(const BlobGranuleFileEncryptionKeys& eKeys,
const BlobCipherEncryptHeaderRef& headerRef,
const StringRef& ivRef) {
headerRef.validateEncryptionHeaderDetails(BlobCipherDetails(eKeys.textCipherKey->getDomainId(),
eKeys.textCipherKey->getBaseCipherId(),
eKeys.textCipherKey->getSalt()),
BlobCipherDetails(eKeys.headerCipherKey->getDomainId(),
eKeys.headerCipherKey->getBaseCipherId(),
eKeys.headerCipherKey->getSalt()),
ASSERT(eKeys.textCipherKey.isValid());
headerRef.validateEncryptionHeaderDetails(eKeys.textCipherKey->details(),
eKeys.headerCipherKey.isValid() ? eKeys.headerCipherKey->details()
: BlobCipherDetails(),
ivRef);
}

View File

@ -297,7 +297,8 @@ void ClientKnobs::initialize(Randomize randomize) {
init( METACLUSTER_ASSIGNMENT_CLUSTERS_TO_CHECK, 5 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_CLUSTERS_TO_CHECK = 1;
init( METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY, 1.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY = deterministicRandom()->random01() * 60;
init( METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT, 10.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT = 1 + deterministicRandom()->random01() * 59;
init( TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
init( METACLUSTER_RESTORE_BATCH_SIZE, 1000 ); if ( randomize && BUGGIFY ) METACLUSTER_RESTORE_BATCH_SIZE = 1 + deterministicRandom()->randomInt(0, 3);
init( TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
init( CLIENT_ENABLE_USING_CLUSTER_ID_KEY, false );
init( ENABLE_ENCRYPTION_CPU_TIME_LOGGING, false );

View File

@ -505,7 +505,7 @@ public:
struct SnapshotFileBackupEncryptionKeys {
Reference<BlobCipherKey> textCipherKey;
Reference<BlobCipherKey> headerCipherKey;
Optional<Reference<BlobCipherKey>> headerCipherKey;
StringRef ivRef;
};
@ -575,27 +575,24 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
wPtr = mutateString(buffer);
}
static void validateEncryptionHeader(Reference<BlobCipherKey> headerCipherKey,
static void validateEncryptionHeader(Optional<Reference<BlobCipherKey>> headerCipherKey,
Reference<BlobCipherKey> textCipherKey,
BlobCipherEncryptHeader& header) {
// Validate encryption header 'cipherHeader' details
if (!(header.cipherHeaderDetails.baseCipherId == headerCipherKey->getBaseCipherId() &&
header.cipherHeaderDetails.encryptDomainId == headerCipherKey->getDomainId() &&
header.cipherHeaderDetails.salt == headerCipherKey->getSalt())) {
if (header.cipherHeaderDetails.isValid() &&
(!headerCipherKey.present() || header.cipherHeaderDetails != headerCipherKey.get()->details())) {
TraceEvent(SevWarn, "EncryptionHeader_CipherHeaderMismatch")
.detail("HeaderDomainId", headerCipherKey->getDomainId())
.detail("HeaderDomainId", headerCipherKey.get()->getDomainId())
.detail("ExpectedHeaderDomainId", header.cipherHeaderDetails.encryptDomainId)
.detail("HeaderBaseCipherId", headerCipherKey->getBaseCipherId())
.detail("HeaderBaseCipherId", headerCipherKey.get()->getBaseCipherId())
.detail("ExpectedHeaderBaseCipherId", header.cipherHeaderDetails.baseCipherId)
.detail("HeaderSalt", headerCipherKey->getSalt())
.detail("HeaderSalt", headerCipherKey.get()->getSalt())
.detail("ExpectedHeaderSalt", header.cipherHeaderDetails.salt);
throw encrypt_header_metadata_mismatch();
}
// Validate encryption text 'cipherText' details sanity
if (!(header.cipherTextDetails.baseCipherId == textCipherKey->getBaseCipherId() &&
header.cipherTextDetails.encryptDomainId == textCipherKey->getDomainId() &&
header.cipherTextDetails.salt == textCipherKey->getSalt())) {
if (!header.cipherTextDetails.isValid() || header.cipherTextDetails != textCipherKey->details()) {
TraceEvent(SevWarn, "EncryptionHeader_CipherTextMismatch")
.detail("TextDomainId", textCipherKey->getDomainId())
.detail("ExpectedTextDomainId", header.cipherTextDetails.encryptDomainId)
@ -614,7 +611,6 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
Arena* arena) {
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::RESTORE));
ASSERT(cipherKeys.cipherHeaderKey.isValid() && cipherKeys.cipherTextKey.isValid());
validateEncryptionHeader(cipherKeys.cipherHeaderKey, cipherKeys.cipherTextKey, header);
DecryptBlobCipherAes256Ctr decryptor(
cipherKeys.cipherTextKey, cipherKeys.cipherHeaderKey, header.iv, BlobCipherMetrics::BACKUP);
@ -638,11 +634,14 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
}
ACTOR static Future<Void> encrypt(EncryptedRangeFileWriter* self) {
ASSERT(self->cipherKeys.headerCipherKey.isValid() && self->cipherKeys.textCipherKey.isValid());
// TODO: HeaderCipher key not needed for 'no authentication encryption'
ASSERT(self->cipherKeys.headerCipherKey.present() && self->cipherKeys.headerCipherKey.get().isValid() &&
self->cipherKeys.textCipherKey.isValid());
// Ensure that the keys we got are still valid before flushing the block
if (self->cipherKeys.headerCipherKey->isExpired() || self->cipherKeys.headerCipherKey->needsRefresh()) {
if (self->cipherKeys.headerCipherKey.get()->isExpired() ||
self->cipherKeys.headerCipherKey.get()->needsRefresh()) {
Reference<BlobCipherKey> cipherKey =
wait(refreshKey(self, self->cipherKeys.headerCipherKey->getDomainId()));
wait(refreshKey(self, self->cipherKeys.headerCipherKey.get()->getDomainId()));
self->cipherKeys.headerCipherKey = cipherKey;
}
if (self->cipherKeys.textCipherKey->isExpired() || self->cipherKeys.textCipherKey->needsRefresh()) {
@ -852,7 +851,8 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
// Start a new block if needed, then write the key and value
ACTOR static Future<Void> writeKV_impl(EncryptedRangeFileWriter* self, Key k, Value v) {
if (!self->cipherKeys.headerCipherKey.isValid() || !self->cipherKeys.textCipherKey.isValid()) {
if (!self->cipherKeys.headerCipherKey.present() || !self->cipherKeys.headerCipherKey.get().isValid() ||
!self->cipherKeys.textCipherKey.isValid()) {
wait(updateEncryptionKeysCtx(self, k));
}
state int toWrite = sizeof(int32_t) + k.size() + sizeof(int32_t) + v.size();
@ -874,7 +874,8 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
ACTOR static Future<Void> writeKey_impl(EncryptedRangeFileWriter* self, Key k) {
// TODO (Nim): Is it possible to write empty begin and end keys?
if (k.size() > 0 &&
(!self->cipherKeys.headerCipherKey.isValid() || !self->cipherKeys.textCipherKey.isValid())) {
(!self->cipherKeys.headerCipherKey.present() || !self->cipherKeys.headerCipherKey.get().isValid() ||
!self->cipherKeys.textCipherKey.isValid())) {
wait(updateEncryptionKeysCtx(self, k));
}
// Need to account for extra "empty" value being written in the case of crossing tenant boundaries

View File

@ -21,9 +21,15 @@
#include "fdbclient/Metacluster.h"
#include "fdbclient/MetaclusterManagement.actor.h"
FDB_DEFINE_BOOLEAN_PARAM(AddNewTenants);
FDB_DEFINE_BOOLEAN_PARAM(ApplyManagementClusterUpdates);
FDB_DEFINE_BOOLEAN_PARAM(RemoveMissingTenants);
FDB_DEFINE_BOOLEAN_PARAM(AssignClusterAutomatically);
FDB_DEFINE_BOOLEAN_PARAM(GroupAlreadyExists);
FDB_DEFINE_BOOLEAN_PARAM(IsRestoring);
FDB_DEFINE_BOOLEAN_PARAM(RunOnDisconnectedCluster);
FDB_DEFINE_BOOLEAN_PARAM(RunOnMismatchedCluster);
FDB_DEFINE_BOOLEAN_PARAM(RestoreDryRun);
FDB_DEFINE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
std::string clusterTypeToString(const ClusterType& clusterType) {
switch (clusterType) {
@ -40,6 +46,8 @@ std::string clusterTypeToString(const ClusterType& clusterType) {
std::string DataClusterEntry::clusterStateToString(DataClusterState clusterState) {
switch (clusterState) {
case DataClusterState::REGISTERING:
return "registering";
case DataClusterState::READY:
return "ready";
case DataClusterState::REMOVING:
@ -52,7 +60,9 @@ std::string DataClusterEntry::clusterStateToString(DataClusterState clusterState
}
DataClusterState DataClusterEntry::stringToClusterState(std::string stateStr) {
if (stateStr == "ready") {
if (stateStr == "registering") {
return DataClusterState::REGISTERING;
} else if (stateStr == "ready") {
return DataClusterState::READY;
} else if (stateStr == "removing") {
return DataClusterState::REMOVING;
@ -65,6 +75,7 @@ DataClusterState DataClusterEntry::stringToClusterState(std::string stateStr) {
json_spirit::mObject DataClusterEntry::toJson() const {
json_spirit::mObject obj;
obj["id"] = id.toString();
obj["capacity"] = capacity.toJson();
obj["allocated"] = allocated.toJson();
obj["cluster_state"] = DataClusterEntry::clusterStateToString(clusterState);
@ -82,4 +93,9 @@ MetaclusterMetadata::metaclusterRegistration() {
static KeyBackedObjectProperty<MetaclusterRegistrationEntry, decltype(IncludeVersion())> instance(
"\xff/metacluster/clusterRegistration"_sr, IncludeVersion());
return instance;
}
KeyBackedSet<UID>& MetaclusterMetadata::registrationTombstones() {
static KeyBackedSet<UID> instance("\xff/metacluster/registrationTombstones"_sr);
return instance;
}

View File

@ -174,6 +174,10 @@ std::string TenantMapEntry::toJson() const {
tenantEntry["tenant_group"] = binaryToJson(tenantGroup.get());
}
if (tenantState == TenantState::ERROR && error.size()) {
tenantEntry["error"] = error;
}
return json_spirit::write_string(json_spirit::mValue(tenantEntry));
}
@ -192,6 +196,13 @@ void TenantMapEntry::configure(Standalone<StringRef> parameter, Optional<Value>
}
}
bool TenantMapEntry::operator==(TenantMapEntry const& other) const {
return id == other.id && tenantName == other.tenantName && tenantState == other.tenantState &&
tenantLockState == other.tenantLockState && tenantGroup == other.tenantGroup &&
assignedCluster == other.assignedCluster && configurationSequenceNum == other.configurationSequenceNum &&
renameDestination == other.renameDestination && error == other.error;
}
json_spirit::mObject TenantGroupEntry::toJson() const {
json_spirit::mObject tenantGroupEntry;
if (assignedCluster.present()) {

View File

@ -67,6 +67,16 @@ int64_t extractTenantIdFromKeyRef(StringRef s) {
return TenantAPI::prefixToId(prefix, EnforceValidTenantId::False);
}
bool tenantMapChanging(MutationRef const& mutation, KeyRangeRef const& tenantMapRange) {
if (isSingleKeyMutation((MutationRef::Type)mutation.type) && mutation.param1.startsWith(tenantMapRange.begin)) {
return true;
} else if (mutation.type == MutationRef::ClearRange &&
tenantMapRange.intersects(KeyRangeRef(mutation.param1, mutation.param2))) {
return true;
}
return false;
}
// validates whether the lastTenantId and the nextTenantId share the same 2 byte prefix
bool nextTenantIdPrefixMatches(int64_t lastTenantId, int64_t nextTenantId) {
if (getTenantIdPrefix(nextTenantId) != getTenantIdPrefix(lastTenantId)) {

View File

@ -23,6 +23,7 @@
#include "fdbrpc/Stats.h"
#include "fdbclient/Knobs.h"
#include "flow/Arena.h"
#include "flow/EncryptUtils.h"
#include "flow/FastRef.h"
@ -76,6 +77,7 @@ public:
// Order of this enum has to match initializer of counterSets.
enum UsageType : int {
TLOG = 0,
TLOG_POST_RESOLUTION,
KV_MEMORY,
KV_REDWOOD,
BLOB_GRANULE,
@ -163,6 +165,10 @@ struct BlobCipherDetails {
// Random salt
EncryptCipherRandomSalt salt{};
static uint32_t getSize() {
return sizeof(EncryptCipherDomainId) + sizeof(EncryptCipherBaseKeyId) + sizeof(EncryptCipherRandomSalt);
}
BlobCipherDetails() {}
BlobCipherDetails(const EncryptCipherDomainId& dId,
const EncryptCipherBaseKeyId& bId,
@ -174,6 +180,11 @@ struct BlobCipherDetails {
}
bool operator!=(const BlobCipherDetails& o) const { return !(*this == o); }
bool isValid() const {
return this->encryptDomainId != INVALID_ENCRYPT_DOMAIN_ID &&
this->baseCipherId != INVALID_ENCRYPT_CIPHER_KEY_ID && this->salt != INVALID_ENCRYPT_RANDOM_SALT;
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, encryptDomainId, baseCipherId, salt);
@ -194,6 +205,15 @@ struct hash<BlobCipherDetails> {
};
} // namespace std
struct EncryptHeaderCipherDetails {
BlobCipherDetails textCipherDetails;
Optional<BlobCipherDetails> headerCipherDetails;
EncryptHeaderCipherDetails(const BlobCipherDetails& tCipherDetails) : textCipherDetails(tCipherDetails) {}
EncryptHeaderCipherDetails(const BlobCipherDetails& tCipherDetails, const BlobCipherDetails& hCipherDetails)
: textCipherDetails(tCipherDetails), headerCipherDetails(hCipherDetails) {}
};
#pragma pack(push, 1) // exact fit - no padding
// Why BinarySerialization instead of ObjectSerialization?
@ -205,9 +225,9 @@ struct hash<BlobCipherDetails> {
// ----------------------------------------------------------------------------------------------------------
// | S.No | ObjFlags | BinaryFlags | ObjectAlgo | BinaryAlgo | TotalObject | TotalBinary |
// | ----------------- | ----------- | ------------ | ----------- | ---------- | ------------ | ------------ |
// | AesCtrNoAuth | 40 | 3 | 104 | 40 | 208 | 47 |
// | AesCtrHmacSha | 40 | 3 | 184 | 96 | 288 | 103 |
// | AesCtrAesCmac | 40 | 3 | 168 | 80 | 272 | 87 |
// | AesCtrNoAuth | 40 | 3 | 104 | 40 | 208 | 46 |
// | AesCtrHmacSha | 40 | 3 | 184 | 96 | 288 | 102 |
// | AesCtrAesCmac | 40 | 3 | 168 | 80 | 272 | 86 |
// ----------------------------------------------------------------------------------------------------------
struct BlobCipherEncryptHeaderFlagsV1 {
@ -256,8 +276,10 @@ struct BlobCipherEncryptHeaderFlagsV1 {
// 'encrypted buffer', compared to reading only encryptionHeader and ensuring its sanity; for instance:
// backup-files.
template <uint32_t AuthTokenSize>
template <class Params>
struct AesCtrWithAuthV1 {
using Self = AesCtrWithAuthV1<Params>;
// Serializable fields
// Text cipher encryption information
@ -267,40 +289,96 @@ struct AesCtrWithAuthV1 {
// Initialization vector
uint8_t iv[AES_256_IV_LENGTH];
// Authentication token
uint8_t authToken[AuthTokenSize];
uint8_t authToken[Params::authTokenSize];
AesCtrWithAuthV1() {}
AesCtrWithAuthV1() = default;
AesCtrWithAuthV1(const BlobCipherDetails& textDetails,
const BlobCipherDetails& headerDetails,
const uint8_t* ivBuf,
const int ivLen,
Arena& arena)
const int ivLen)
: cipherTextDetails(textDetails), cipherHeaderDetails(headerDetails) {
ASSERT_EQ(ivLen, AES_256_IV_LENGTH);
memcpy(&iv[0], ivBuf, ivLen);
memset(&authToken[0], 0, AuthTokenSize);
memset(&authToken[0], 0, Params::authTokenSize);
}
bool operator==(const AesCtrWithAuthV1<AuthTokenSize>& o) const {
bool operator==(const Self& o) const {
return cipherHeaderDetails == o.cipherHeaderDetails && cipherTextDetails == o.cipherTextDetails &&
memcmp(&iv[0], &o.iv[0], AES_256_IV_LENGTH) == 0 &&
memcmp(&authToken[0], &o.authToken[0], AuthTokenSize) == 0;
memcmp(&authToken[0], &o.authToken[0], Params::authTokenSize) == 0;
}
static Standalone<StringRef> toStringRef(const AesCtrWithAuthV1<AuthTokenSize>& algoHeader, Arena& arena) {
BinaryWriter wr(AssumeVersion(ProtocolVersion::withEncryptionAtRest()));
wr.serializeBytes(&algoHeader, sizeof(AesCtrWithAuthV1<AuthTokenSize>));
return wr.toValue(arena);
}
static uint32_t getSize() { return BlobCipherDetails::getSize() * 2 + AES_256_IV_LENGTH + Params::authTokenSize; }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, cipherTextDetails, cipherHeaderDetails);
ar.serializeBytes(iv, AES_256_IV_LENGTH);
ar.serializeBytes(authToken, AuthTokenSize);
ar.serializeBytes(authToken, Params::authTokenSize);
}
};
template <class Params>
struct AesCtrWithAuth {
// Serializable fields
// Algorithm header version
uint8_t version = 1;
// List of supported versions.
union {
AesCtrWithAuthV1<Params> v1;
};
AesCtrWithAuth() {
// Only V1 is supported
ASSERT_EQ(1, Params::getDefaultHeaderVersion());
}
AesCtrWithAuth(AesCtrWithAuthV1<Params>& v) : v1(v) {
// Only V1 is supported
ASSERT_EQ(1, Params::getDefaultHeaderVersion());
}
static uint32_t getSize() { return AesCtrWithAuthV1<Params>::getSize() + 1; }
static Standalone<StringRef> toStringRef(const AesCtrWithAuth<Params>& algoHeader) {
BinaryWriter wr(AssumeVersion(ProtocolVersion::withEncryptionAtRest()));
wr << algoHeader;
return wr.toValue();
}
template <class Ar>
void serialize(Ar& ar) {
if (ar.isSerializing) {
ASSERT_EQ(1, version);
}
serializer(ar, version);
if (ar.isDeserializing && version != 1) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedAlgoHeaderVersion")
.detail("HeaderType", "AesCtrWith" + Params::authAlgoName())
.detail("Version", version);
throw not_implemented();
}
serializer(ar, v1);
}
};
struct AesCtrWithHmacParams {
static constexpr int authTokenSize = AUTH_TOKEN_HMAC_SHA_SIZE;
static std::string authAlgoName() { return "Hmac"; }
static uint8_t getDefaultHeaderVersion() { return CLIENT_KNOBS->ENCRYPT_HEADER_AES_CTR_HMAC_SHA_AUTH_VERSION; }
};
using AesCtrWithHmac = AesCtrWithAuth<AesCtrWithHmacParams>;
struct AesCtrWithCmacParams {
static constexpr int authTokenSize = AUTH_TOKEN_AES_CMAC_SIZE;
static std::string authAlgoName() { return "Cmac"; }
static uint8_t getDefaultHeaderVersion() { return CLIENT_KNOBS->ENCRYPT_HEADER_AES_CTR_AES_CMAC_AUTH_VERSION; }
};
using AesCtrWithCmac = AesCtrWithAuth<AesCtrWithCmacParams>;
struct AesCtrNoAuthV1 {
// Serializable fields
@ -309,8 +387,8 @@ struct AesCtrNoAuthV1 {
// Initialization vector
uint8_t iv[AES_256_IV_LENGTH];
AesCtrNoAuthV1() {}
AesCtrNoAuthV1(const BlobCipherDetails& textDetails, const uint8_t* ivBuf, const int ivLen, Arena& arena)
AesCtrNoAuthV1() = default;
AesCtrNoAuthV1(const BlobCipherDetails& textDetails, const uint8_t* ivBuf, const int ivLen)
: cipherTextDetails(textDetails) {
ASSERT_EQ(ivLen, AES_256_IV_LENGTH);
memcpy(&iv[0], ivBuf, ivLen);
@ -320,11 +398,7 @@ struct AesCtrNoAuthV1 {
return cipherTextDetails == o.cipherTextDetails && memcmp(&iv[0], &o.iv[0], AES_256_IV_LENGTH) == 0;
}
static Standalone<StringRef> toStringRef(const AesCtrNoAuthV1& algoHeader, Arena& arena) {
BinaryWriter wr(AssumeVersion(ProtocolVersion::withEncryptionAtRest()));
wr.serializeBytes(&algoHeader, sizeof(AesCtrNoAuthV1));
return wr.toValue(arena);
}
static uint32_t getSize() { return BlobCipherDetails::getSize() + AES_256_IV_LENGTH; }
template <class Ar>
void serialize(Ar& ar) {
@ -333,27 +407,57 @@ struct AesCtrNoAuthV1 {
}
};
struct BlobCipherEncryptHeaderRef {
struct AesCtrNoAuth {
// Serializable fields
// HeaderFlags version tracker
uint16_t flagsVersion;
// Encryption algorithm header version tracker
uint16_t algoHeaderVersion;
// Algorithm header version
uint8_t version = 1;
// List of supported versions.
union {
AesCtrNoAuthV1 v1;
};
// The on-disk format doesn't store std::variant, currently "serializer" doesn't support std::variant, the
// (de)serialization code serializes the relevant BlobCipherEncryptHeader and AlgoHeader structs for a given
// 'flagVersion' and 'algoHeaderVersion'. Refer BlobCipherEncryptHeaderRef::serialize() for more details.
AesCtrNoAuth() {
// Only V1 is supported
ASSERT_EQ(1, CLIENT_KNOBS->ENCRYPT_HEADER_AES_CTR_NO_AUTH_VERSION);
}
AesCtrNoAuth(AesCtrNoAuthV1& v) : v1(v) {
// Only V1 is supported
ASSERT_EQ(1, CLIENT_KNOBS->ENCRYPT_HEADER_AES_CTR_NO_AUTH_VERSION);
}
static uint32_t getSize() { return AesCtrNoAuthV1::getSize() + 1; }
static Standalone<StringRef> toStringRef(const AesCtrNoAuth& algoHeader) {
BinaryWriter wr(AssumeVersion(ProtocolVersion::withEncryptionAtRest()));
wr << algoHeader;
return wr.toValue();
}
template <class Ar>
void serialize(Ar& ar) {
if (ar.isSerializing) {
ASSERT_EQ(1, version);
}
serializer(ar, version);
if (ar.isDeserializing && version != 1) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedAlgoHeaderVersion")
.detail("HeaderType", "AesCtrNoAuth")
.detail("Version", version);
throw not_implemented();
}
serializer(ar, v1);
}
};
struct BlobCipherEncryptHeaderRef {
// Serializable fields
std::variant<BlobCipherEncryptHeaderFlagsV1> flags;
std::variant<AesCtrNoAuthV1, AesCtrWithAuthV1<AUTH_TOKEN_HMAC_SHA_SIZE>, AesCtrWithAuthV1<AUTH_TOKEN_AES_CMAC_SIZE>>
algoHeader;
std::variant<AesCtrNoAuth, AesCtrWithHmac, AesCtrWithCmac> algoHeader;
BlobCipherEncryptHeaderRef()
: flagsVersion(INVALID_ENCRYPT_HEADERS_FLAG_VERSION),
algoHeaderVersion(INVALID_ENCRYPT_HEADER_ALGO_HEADER_VERSION) {}
BlobCipherEncryptHeaderRef(const BlobCipherEncryptHeaderRef& src)
: flagsVersion(src.flagsVersion), algoHeaderVersion(src.algoHeaderVersion), flags(src.flags),
algoHeader(src.algoHeader) {}
BlobCipherEncryptHeaderRef() = default;
BlobCipherEncryptHeaderRef(const BlobCipherEncryptHeaderRef& src) = default;
static BlobCipherEncryptHeaderRef fromStringRef(const StringRef& header) {
return BinaryReader::fromStringRef<BlobCipherEncryptHeaderRef>(
@ -371,95 +475,21 @@ struct BlobCipherEncryptHeaderRef {
const EncryptAuthTokenMode authMode,
const EncryptAuthTokenAlgo authAlgo);
int flagsVersion() const { return flags.index() + 1; }
int algoHeaderVersion() const {
return std::visit([&](auto& h) { return h.version; }, algoHeader);
}
template <class Ar>
void serialize(Ar& ar) {
// TODO: once std::variant native (de)serialization support is added, the method would transform to much shorter
// implementaion
uint8_t encryptMode;
EncryptAuthTokenMode authMode;
EncryptAuthTokenAlgo authAlgo;
serializer(ar, flagsVersion, algoHeaderVersion);
if (ar.isSerializing) {
if (flagsVersion != 1) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedFlagVersion").detail("Version", flagsVersion);
throw not_implemented();
}
BlobCipherEncryptHeaderFlagsV1 f = std::get<BlobCipherEncryptHeaderFlagsV1>(flags);
encryptMode = f.encryptMode;
authMode = (EncryptAuthTokenMode)f.authTokenMode;
authAlgo = (EncryptAuthTokenAlgo)f.authTokenAlgo;
serializer(ar, f);
if (encryptMode != ENCRYPT_CIPHER_MODE_AES_256_CTR) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedEncryptMode").detail("Mode", encryptMode);
throw not_implemented();
}
if (algoHeaderVersion != 1) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedAlgoHeaderVersion")
.detail("Version", algoHeaderVersion);
throw not_implemented();
}
if (authMode == ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE) {
AesCtrNoAuthV1 noAuth = std::get<AesCtrNoAuthV1>(algoHeader);
serializer(ar, noAuth);
} else {
ASSERT_EQ(authMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
if (authAlgo == ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA) {
AesCtrWithAuthV1<AUTH_TOKEN_HMAC_SHA_SIZE> hmacSha =
std::get<AesCtrWithAuthV1<AUTH_TOKEN_HMAC_SHA_SIZE>>(algoHeader);
serializer(ar, hmacSha);
} else {
ASSERT_EQ(authAlgo, ENCRYPT_HEADER_AUTH_TOKEN_ALGO_AES_CMAC);
AesCtrWithAuthV1<AUTH_TOKEN_AES_CMAC_SIZE> aesCmac =
std::get<AesCtrWithAuthV1<AUTH_TOKEN_AES_CMAC_SIZE>>(algoHeader);
serializer(ar, aesCmac);
}
}
} else if (ar.isDeserializing) {
if (flagsVersion != 1) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedFlagVersion").detail("Version", flagsVersion);
throw not_implemented();
}
BlobCipherEncryptHeaderFlagsV1 f;
serializer(ar, f);
this->flags = f;
encryptMode = f.encryptMode;
authMode = (EncryptAuthTokenMode)f.authTokenMode;
authAlgo = (EncryptAuthTokenAlgo)f.authTokenAlgo;
if (encryptMode != ENCRYPT_CIPHER_MODE_AES_256_CTR) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedEncryptMode").detail("Mode", encryptMode);
throw not_implemented();
}
if (algoHeaderVersion != 1) {
TraceEvent(SevWarn, "BlobCipherEncryptHeaderUnsupportedAlgoHeaderVersion")
.detail("Version", algoHeaderVersion);
throw not_implemented();
}
if (authMode == ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE) {
AesCtrNoAuthV1 noAuth;
serializer(ar, noAuth);
this->algoHeader = noAuth;
} else {
ASSERT_EQ(authMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
if (authAlgo == ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA) {
AesCtrWithAuthV1<AUTH_TOKEN_HMAC_SHA_SIZE> hmacSha;
serializer(ar, hmacSha);
this->algoHeader = hmacSha;
} else {
ASSERT_EQ(authAlgo, ENCRYPT_HEADER_AUTH_TOKEN_ALGO_AES_CMAC);
AesCtrWithAuthV1<AUTH_TOKEN_AES_CMAC_SIZE> aesCmac;
serializer(ar, aesCmac);
this->algoHeader = aesCmac;
}
}
}
serializer(ar, flags, algoHeader);
}
const uint8_t* getIV() const;
const EncryptHeaderCipherDetails getCipherDetails() const;
EncryptAuthTokenMode getAuthTokenMode() const;
void validateEncryptionHeaderDetails(const BlobCipherDetails& textCipherDetails,
const BlobCipherDetails& headerCipherDetails,
const StringRef& ivRef) const;
@ -604,6 +634,8 @@ public:
return now() + INetwork::TIME_EPS >= expireAtTS ? true : false;
}
BlobCipherDetails details() const { return BlobCipherDetails{ encryptDomainId, baseCipherId, randomSalt }; }
void reset();
private:
@ -833,24 +865,24 @@ public:
static constexpr uint8_t ENCRYPT_HEADER_VERSION = 1;
EncryptBlobCipherAes265Ctr(Reference<BlobCipherKey> tCipherKey,
Reference<BlobCipherKey> hCipherKey,
Optional<Reference<BlobCipherKey>> hCipherKey,
const uint8_t* iv,
const int ivLen,
const EncryptAuthTokenMode mode,
BlobCipherMetrics::UsageType usageType);
EncryptBlobCipherAes265Ctr(Reference<BlobCipherKey> tCipherKey,
Reference<BlobCipherKey> hCipherKey,
Optional<Reference<BlobCipherKey>> hCipherKey,
const uint8_t* iv,
const int ivLen,
const EncryptAuthTokenMode mode,
const EncryptAuthTokenAlgo algo,
BlobCipherMetrics::UsageType usageType);
EncryptBlobCipherAes265Ctr(Reference<BlobCipherKey> tCipherKey,
Reference<BlobCipherKey> hCipherKey,
Optional<Reference<BlobCipherKey>> hCipherKey,
const EncryptAuthTokenMode mode,
BlobCipherMetrics::UsageType usageType);
EncryptBlobCipherAes265Ctr(Reference<BlobCipherKey> tCipherKey,
Reference<BlobCipherKey> hCipherKey,
Optional<Reference<BlobCipherKey>> hCipherKey,
const EncryptAuthTokenMode mode,
const EncryptAuthTokenAlgo algo,
BlobCipherMetrics::UsageType usageType);
@ -865,26 +897,22 @@ public:
private:
void init();
void updateEncryptHeader(const uint8_t*, const int, BlobCipherEncryptHeaderRef* headerRef, Arena& arena);
void updateEncryptHeaderFlagsV1(BlobCipherEncryptHeaderRef* headerRef,
BlobCipherEncryptHeaderFlagsV1* flags,
Arena& arena);
void updateEncryptHeader(const uint8_t*, const int, BlobCipherEncryptHeaderRef* headerRef);
void updateEncryptHeaderFlagsV1(BlobCipherEncryptHeaderRef* headerRef, BlobCipherEncryptHeaderFlagsV1* flags);
void setCipherAlgoHeaderV1(const uint8_t*,
const int,
const BlobCipherEncryptHeaderFlagsV1&,
BlobCipherEncryptHeaderRef*,
Arena&);
void setCipherAlgoHeaderNoAuthV1(const BlobCipherEncryptHeaderFlagsV1&, BlobCipherEncryptHeaderRef*, Arena&);
template <uint32_t S>
BlobCipherEncryptHeaderRef*);
void setCipherAlgoHeaderNoAuthV1(const BlobCipherEncryptHeaderFlagsV1&, BlobCipherEncryptHeaderRef*);
template <class Params>
void setCipherAlgoHeaderWithAuthV1(const uint8_t*,
const int,
const BlobCipherEncryptHeaderFlagsV1&,
BlobCipherEncryptHeaderRef*,
Arena&);
BlobCipherEncryptHeaderRef*);
EVP_CIPHER_CTX* ctx;
Reference<BlobCipherKey> textCipherKey;
Reference<BlobCipherKey> headerCipherKey;
Optional<Reference<BlobCipherKey>> headerCipherKeyOpt;
EncryptAuthTokenMode authTokenMode;
uint8_t iv[AES_256_IV_LENGTH];
BlobCipherMetrics::UsageType usageType;
@ -897,7 +925,7 @@ private:
class DecryptBlobCipherAes256Ctr final : NonCopyable, public ReferenceCounted<DecryptBlobCipherAes256Ctr> {
public:
DecryptBlobCipherAes256Ctr(Reference<BlobCipherKey> tCipherKey,
Reference<BlobCipherKey> hCipherKey,
Optional<Reference<BlobCipherKey>> hCipherKey,
const uint8_t* iv,
BlobCipherMetrics::UsageType usageType);
~DecryptBlobCipherAes256Ctr();
@ -915,7 +943,7 @@ private:
EVP_CIPHER_CTX* ctx;
BlobCipherMetrics::UsageType usageType;
Reference<BlobCipherKey> textCipherKey;
Reference<BlobCipherKey> headerCipherKey;
Optional<Reference<BlobCipherKey>> headerCipherKeyOpt;
bool authTokensValidationDone;
void validateEncryptHeader(const uint8_t*,
@ -932,26 +960,17 @@ private:
const int,
const BlobCipherEncryptHeaderFlagsV1&,
const BlobCipherEncryptHeaderRef&);
template <uint32_t S>
template <class Params>
void validateAuthTokenV1(const uint8_t* ciphertext,
const int ciphertextLen,
const BlobCipherEncryptHeaderFlagsV1&,
const BlobCipherEncryptHeaderRef& header);
void validateHeaderSingleAuthToken(const uint8_t* ciphertext,
const int ciphertextLen,
const BlobCipherEncryptHeaderRef& header,
Arena& arena);
void verifyEncryptHeaderMetadata(const BlobCipherEncryptHeader& header);
void verifyAuthTokens(const uint8_t* ciphertext,
const int ciphertextLen,
const BlobCipherEncryptHeader& header,
Arena& arena);
void verifyAuthTokens(const uint8_t* ciphertext, const int ciphertextLen, const BlobCipherEncryptHeader& header);
void verifyHeaderSingleAuthToken(const uint8_t* ciphertext,
const int ciphertextLen,
const BlobCipherEncryptHeader& header,
Arena& arena);
const BlobCipherEncryptHeader& header);
};
class HmacSha256DigestGen final : NonCopyable {

View File

@ -292,6 +292,7 @@ public:
int METACLUSTER_ASSIGNMENT_CLUSTERS_TO_CHECK;
double METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY;
double METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT;
int METACLUSTER_RESTORE_BATCH_SIZE;
int TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantEntryCache is refreshed
bool CLIENT_ENABLE_USING_CLUSTER_ID_KEY;

View File

@ -169,17 +169,23 @@ struct MutationRef {
Arena& arena,
BlobCipherMetrics::UsageType usageType) const {
ASSERT_NE(domainId, INVALID_ENCRYPT_DOMAIN_ID);
auto textCipherItr = cipherKeys.find(domainId);
auto headerCipherItr = cipherKeys.find(ENCRYPT_HEADER_DOMAIN_ID);
ASSERT(textCipherItr != cipherKeys.end() && textCipherItr->second.isValid());
ASSERT(headerCipherItr != cipherKeys.end() && headerCipherItr->second.isValid());
auto getCipherKey = [&](const EncryptCipherDomainId& domainId) {
auto iter = cipherKeys.find(domainId);
ASSERT(iter != cipherKeys.end() && iter->second.isValid());
return iter->second;
};
Reference<BlobCipherKey> textCipherKey = getCipherKey(domainId);
Reference<BlobCipherKey> headerCipherKey;
if (FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED) {
headerCipherKey = getCipherKey(ENCRYPT_HEADER_DOMAIN_ID);
}
uint8_t iv[AES_256_IV_LENGTH] = { 0 };
deterministicRandom()->randomBytes(iv, AES_256_IV_LENGTH);
BinaryWriter bw(AssumeVersion(ProtocolVersion::withEncryptionAtRest()));
bw << *this;
EncryptBlobCipherAes265Ctr cipher(
textCipherItr->second,
headerCipherItr->second,
textCipherKey,
headerCipherKey,
iv,
AES_256_IV_LENGTH,
getEncryptAuthTokenMode(EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE),
@ -217,27 +223,24 @@ struct MutationRef {
Arena& arena,
BlobCipherMetrics::UsageType usageType,
StringRef* buf = nullptr) const {
const BlobCipherEncryptHeader* header = encryptionHeader();
auto textCipherItr = cipherKeys.find(header->cipherTextDetails);
auto headerCipherItr = cipherKeys.find(header->cipherHeaderDetails);
ASSERT(textCipherItr != cipherKeys.end() && textCipherItr->second.isValid());
ASSERT(headerCipherItr != cipherKeys.end() && headerCipherItr->second.isValid());
TextAndHeaderCipherKeys textAndHeaderKeys;
textAndHeaderKeys.cipherHeaderKey = headerCipherItr->second;
textAndHeaderKeys.cipherTextKey = textCipherItr->second;
TextAndHeaderCipherKeys textAndHeaderKeys = getCipherKeys(cipherKeys);
return decrypt(textAndHeaderKeys, arena, usageType, buf);
}
TextAndHeaderCipherKeys getCipherKeys(
const std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>& cipherKeys) {
const std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>& cipherKeys) const {
const BlobCipherEncryptHeader* header = encryptionHeader();
auto textCipherItr = cipherKeys.find(header->cipherTextDetails);
auto headerCipherItr = cipherKeys.find(header->cipherHeaderDetails);
ASSERT(textCipherItr != cipherKeys.end() && textCipherItr->second.isValid());
ASSERT(headerCipherItr != cipherKeys.end() && headerCipherItr->second.isValid());
auto getCipherKey = [&](const BlobCipherDetails& details) -> Reference<BlobCipherKey> {
if (!details.isValid()) {
return {};
}
auto iter = cipherKeys.find(details);
ASSERT(iter != cipherKeys.end() && iter->second.isValid());
return iter->second;
};
TextAndHeaderCipherKeys textAndHeaderKeys;
textAndHeaderKeys.cipherHeaderKey = headerCipherItr->second;
textAndHeaderKeys.cipherTextKey = textCipherItr->second;
textAndHeaderKeys.cipherHeaderKey = getCipherKey(header->cipherHeaderDetails);
textAndHeaderKeys.cipherTextKey = getCipherKey(header->cipherTextDetails);
return textAndHeaderKeys;
}

View File

@ -34,6 +34,7 @@
#include "flow/Knobs.h"
#include "flow/IRandom.h"
#include <algorithm>
#include <unordered_map>
#include <unordered_set>
@ -296,6 +297,7 @@ ACTOR template <class T>
Future<TextAndHeaderCipherKeys> getLatestEncryptCipherKeysForDomain(Reference<AsyncVar<T> const> db,
EncryptCipherDomainId domainId,
BlobCipherMetrics::UsageType usageType) {
// TODO: Do not fetch header cipher key if authentication is diabled.
std::unordered_set<EncryptCipherDomainId> domainIds = { domainId, ENCRYPT_HEADER_DOMAIN_ID };
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys =
wait(getLatestEncryptCipherKeys(db, domainIds, usageType));
@ -317,15 +319,71 @@ ACTOR template <class T>
Future<TextAndHeaderCipherKeys> getEncryptCipherKeys(Reference<AsyncVar<T> const> db,
BlobCipherEncryptHeader header,
BlobCipherMetrics::UsageType usageType) {
std::unordered_set<BlobCipherDetails> cipherDetails{ header.cipherTextDetails, header.cipherHeaderDetails };
state bool authenticatedEncryption = header.flags.authTokenMode != ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE;
ASSERT(header.cipherTextDetails.isValid());
ASSERT(!authenticatedEncryption || header.cipherHeaderDetails.isValid());
std::unordered_set<BlobCipherDetails> cipherDetails{ header.cipherTextDetails };
if (authenticatedEncryption) {
cipherDetails.insert(header.cipherHeaderDetails);
}
std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>> cipherKeys =
wait(getEncryptCipherKeys(db, cipherDetails, usageType));
ASSERT(cipherKeys.count(header.cipherTextDetails) > 0);
ASSERT(cipherKeys.count(header.cipherHeaderDetails) > 0);
TextAndHeaderCipherKeys result{ cipherKeys.at(header.cipherTextDetails),
cipherKeys.at(header.cipherHeaderDetails) };
ASSERT(result.cipherTextKey.isValid());
ASSERT(result.cipherHeaderKey.isValid());
TextAndHeaderCipherKeys result;
auto setCipherKey = [&](const BlobCipherDetails& details, TextAndHeaderCipherKeys& result) {
ASSERT(details.isValid());
auto iter = cipherKeys.find(details);
ASSERT(iter != cipherKeys.end() && iter->second.isValid());
isEncryptHeaderDomain(details.encryptDomainId) ? result.cipherHeaderKey = iter->second
: result.cipherTextKey = iter->second;
};
setCipherKey(header.cipherTextDetails, result);
if (authenticatedEncryption) {
setCipherKey(header.cipherHeaderDetails, result);
}
ASSERT(result.cipherTextKey.isValid() && (!authenticatedEncryption || result.cipherHeaderKey.isValid()));
return result;
}
ACTOR template <class T>
Future<TextAndHeaderCipherKeys> getEncryptCipherKeys(Reference<AsyncVar<T> const> db,
BlobCipherEncryptHeaderRef header,
BlobCipherMetrics::UsageType usageType) {
ASSERT(CLIENT_KNOBS->ENABLE_CONFIGURABLE_ENCRYPTION);
state bool authenticatedEncryption = header.getAuthTokenMode() != ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE;
state EncryptHeaderCipherDetails details = header.getCipherDetails();
ASSERT(details.textCipherDetails.isValid());
ASSERT(!authenticatedEncryption ||
(details.headerCipherDetails.present() && details.headerCipherDetails.get().isValid()));
std::unordered_set<BlobCipherDetails> cipherDetails{ details.textCipherDetails };
if (authenticatedEncryption) {
cipherDetails.insert(details.headerCipherDetails.get());
}
std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>> cipherKeys =
wait(getEncryptCipherKeys(db, cipherDetails, usageType));
TextAndHeaderCipherKeys result;
auto setCipherKey = [&](const BlobCipherDetails& details, TextAndHeaderCipherKeys& result) {
ASSERT(details.isValid());
auto iter = cipherKeys.find(details);
ASSERT(iter != cipherKeys.end() && iter->second.isValid());
isEncryptHeaderDomain(details.encryptDomainId) ? result.cipherHeaderKey = iter->second
: result.cipherTextKey = iter->second;
};
setCipherKey(details.textCipherDetails, result);
if (authenticatedEncryption) {
setCipherKey(details.headerCipherDetails.get(), result);
}
ASSERT(result.cipherTextKey.isValid() && (!authenticatedEncryption || result.cipherHeaderKey.isValid()));
return result;
}

View File

@ -57,11 +57,12 @@ std::string clusterTypeToString(const ClusterType& clusterType);
// Represents the various states that a data cluster could be in.
//
// REGISTERING - the data cluster is being registered with the metacluster
// READY - the data cluster is active
// REMOVING - the data cluster is being removed and cannot have its configuration changed or any tenants created
// RESTORING - the data cluster is being restored and cannot have its configuration changed or any tenants
// created/updated/deleted.
enum class DataClusterState { READY, REMOVING, RESTORING };
enum class DataClusterState { REGISTERING, READY, REMOVING, RESTORING };
struct DataClusterEntry {
constexpr static FileIdentifier file_identifier = 929511;
@ -81,9 +82,7 @@ struct DataClusterEntry {
: id(id), capacity(capacity), allocated(allocated) {}
// Returns true if all configurable properties match
bool matchesConfiguration(DataClusterEntry const& other) const {
return id == other.id && capacity == other.capacity;
}
bool matchesConfiguration(DataClusterEntry const& other) const { return capacity == other.capacity; }
bool hasCapacity() const { return allocated < capacity; }
@ -188,6 +187,7 @@ struct Traceable<MetaclusterRegistrationEntry> : std::true_type {
struct MetaclusterMetadata {
// Registration information for a metacluster, stored on both management and data clusters
static KeyBackedObjectProperty<MetaclusterRegistrationEntry, decltype(IncludeVersion())>& metaclusterRegistration();
static KeyBackedSet<UID>& registrationTombstones();
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -51,5 +51,20 @@ Future<decltype(std::declval<Function>()(Reference<typename DB::TransactionT>())
}
}
ACTOR template <class Function, class DB>
Future<Void> runTransactionVoid(Reference<DB> db, Function func) {
state Reference<typename DB::TransactionT> tr = db->createTransaction();
loop {
try {
// func should be idempotent; otherwise, retry will get undefined result
wait(func(tr));
wait(safeThreadFutureToFuture(tr->commit()));
return Void();
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
#include "flow/unactorcompiler.h"
#endif

View File

@ -106,6 +106,8 @@ struct TenantMapEntry {
return ObjectReader::fromStringRef<TenantMapEntry>(value, IncludeVersion());
}
bool operator==(TenantMapEntry const& other) const;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar,

View File

@ -126,6 +126,7 @@ Future<Void> checkTenantMode(Transaction tr, ClusterType expectedClusterType) {
TenantMode tenantModeForClusterType(ClusterType clusterType, TenantMode tenantMode);
int64_t extractTenantIdFromMutation(MutationRef m);
int64_t extractTenantIdFromKeyRef(StringRef s);
bool tenantMapChanging(MutationRef const& mutation, KeyRangeRef const& tenantMapRange);
bool nextTenantIdPrefixMatches(int64_t lastTenantId, int64_t nextTenantId);
int64_t getMaxAllowableTenantId(int64_t curTenantId);
int64_t getTenantIdPrefix(int64_t tenantId);
@ -449,6 +450,7 @@ Future<Void> configureTenantTransaction(Transaction tr,
TenantMapEntry originalEntry,
TenantMapEntry updatedTenantEntry) {
ASSERT(updatedTenantEntry.id == originalEntry.id);
ASSERT(!updatedTenantEntry.assignedCluster.present());
tr->setOption(FDBTransactionOptions::RAW_ACCESS);
TenantMetadata::tenantMap().set(tr, updatedTenantEntry.id, updatedTenantEntry);
@ -614,7 +616,7 @@ Future<Void> renameTenantTransaction(Transaction tr,
}
if (configureSequenceNum.present()) {
if (entry.configurationSequenceNum >= configureSequenceNum.get()) {
if (entry.configurationSequenceNum > configureSequenceNum.get()) {
return Void();
}
entry.configurationSequenceNum = configureSequenceNum.get();

View File

@ -87,7 +87,15 @@ public:
storageCache(&proxyCommitData_.storageCache), tag_popped(&proxyCommitData_.tag_popped),
tssMapping(&proxyCommitData_.tssMapping), tenantMap(&proxyCommitData_.tenantMap),
tenantNameIndex(&proxyCommitData_.tenantNameIndex), lockedTenants(&proxyCommitData_.lockedTenants),
initialCommit(initialCommit_), provisionalCommitProxy(provisionalCommitProxy_) {}
initialCommit(initialCommit_), provisionalCommitProxy(provisionalCommitProxy_) {
if (encryptMode.isEncryptionEnabled()) {
ASSERT(cipherKeys != nullptr);
ASSERT(cipherKeys->count(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID) > 0);
if (FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED) {
ASSERT(cipherKeys->count(ENCRYPT_HEADER_DOMAIN_ID));
}
}
}
ApplyMetadataMutationsImpl(const SpanContext& spanContext_,
ResolverData& resolverData_,
@ -98,7 +106,15 @@ public:
cipherKeys(cipherKeys_), encryptMode(encryptMode), txnStateStore(resolverData_.txnStateStore),
toCommit(resolverData_.toCommit), confChange(resolverData_.confChanges), logSystem(resolverData_.logSystem),
popVersion(resolverData_.popVersion), keyInfo(resolverData_.keyInfo), storageCache(resolverData_.storageCache),
initialCommit(resolverData_.initialCommit), forResolver(true) {}
initialCommit(resolverData_.initialCommit), forResolver(true) {
if (encryptMode.isEncryptionEnabled()) {
ASSERT(cipherKeys != nullptr);
ASSERT(cipherKeys->count(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID) > 0);
if (FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED) {
ASSERT(cipherKeys->count(ENCRYPT_HEADER_DOMAIN_ID));
}
}
}
private:
// The following variables are incoming parameters

View File

@ -107,7 +107,9 @@ struct VersionedMessage {
reader >> m;
const BlobCipherEncryptHeader* header = m.encryptionHeader();
cipherDetails.insert(header->cipherTextDetails);
cipherDetails.insert(header->cipherHeaderDetails);
if (header->cipherHeaderDetails.isValid()) {
cipherDetails.insert(header->cipherHeaderDetails);
}
}
}
};

View File

@ -476,6 +476,7 @@ ACTOR Future<BlobGranuleCipherKeysCtx> getLatestGranuleCipherKeys(Reference<Blob
TextAndHeaderCipherKeys systemCipherKeys =
wait(getLatestSystemEncryptCipherKeys(bwData->dbInfo, BlobCipherMetrics::BLOB_GRANULE));
ASSERT(systemCipherKeys.cipherHeaderKey.isValid());
cipherKeysCtx.headerCipherKey = BlobGranuleCipherKey::fromBlobCipherKey(systemCipherKeys.cipherHeaderKey, *arena);
cipherKeysCtx.ivRef = makeString(AES_256_IV_LENGTH, *arena);

View File

@ -999,10 +999,11 @@ ACTOR Future<Void> getResolution(CommitBatchContext* self) {
// Fetch cipher keys if needed.
state Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getCipherKeys;
if (pProxyCommitData->encryptMode.isEncryptionEnabled()) {
static const std::unordered_set<EncryptCipherDomainId> defaultDomainIds = { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID,
ENCRYPT_HEADER_DOMAIN_ID,
FDB_DEFAULT_ENCRYPT_DOMAIN_ID };
std::unordered_set<EncryptCipherDomainId> encryptDomainIds = defaultDomainIds;
std::unordered_set<EncryptCipherDomainId> encryptDomainIds = { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID,
FDB_DEFAULT_ENCRYPT_DOMAIN_ID };
if (FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED) {
encryptDomainIds.insert(ENCRYPT_HEADER_DOMAIN_ID);
}
// For cluster aware encryption only the default domain id is needed
if (pProxyCommitData->encryptMode.mode == EncryptionAtRestMode::DOMAIN_AWARE) {
for (int t = 0; t < trs.size(); t++) {
@ -1010,18 +1011,6 @@ ACTOR Future<Void> getResolution(CommitBatchContext* self) {
int64_t tenantId = tenantInfo.tenantId;
if (tenantId != TenantInfo::INVALID_TENANT) {
encryptDomainIds.emplace(tenantId);
} else {
// Optimization: avoid enumerating mutations if cluster only serves default encryption domains
if (pProxyCommitData->tenantMap.size() > 0) {
for (auto m : trs[t].transaction.mutations) {
EncryptCipherDomainId domainId = getEncryptDetailsFromMutationRef(pProxyCommitData, m);
encryptDomainIds.emplace(domainId);
}
} else {
// Ensure default encryption domain-ids are present.
ASSERT_EQ(encryptDomainIds.count(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID), 1);
ASSERT_EQ(encryptDomainIds.count(FDB_DEFAULT_ENCRYPT_DOMAIN_ID), 1);
}
}
}
}
@ -1081,17 +1070,6 @@ bool validTenantAccess(MutationRef m, std::map<int64_t, TenantName> const& tenan
return true;
}
inline bool tenantMapChanging(MutationRef const& mutation) {
const KeyRangeRef tenantMapRange = TenantMetadata::tenantMap().subspace;
if (isSingleKeyMutation((MutationRef::Type)mutation.type) && mutation.param1.startsWith(tenantMapRange.begin)) {
return true;
} else if (mutation.type == MutationRef::ClearRange &&
tenantMapRange.intersects(KeyRangeRef(mutation.param1, mutation.param2))) {
return true;
}
return false;
}
// return an iterator to the first tenantId whose idToPrefix(id) >= prefix[0..8] in lexicographic order. If no such id,
// return tenantMap.end()
inline auto lowerBoundTenantId(const StringRef& prefix, const std::map<int64_t, TenantName>& tenantMap) {
@ -1151,10 +1129,14 @@ TEST_CASE("/CommitProxy/SplitRange/LowerBoundTenantId") {
// t1_end), ... [tn_begin, b); The references are allocated on arena;
std::vector<MutationRef> splitClearRangeByTenant(Arena& arena,
const MutationRef& mutation,
const std::map<int64_t, TenantName>& tenantMap) {
const std::map<int64_t, TenantName>& tenantMap,
std::vector<int64_t>* tenantIds = nullptr) {
std::vector<MutationRef> results;
auto it = lowerBoundTenantId(mutation.param1, tenantMap);
while (it != tenantMap.end()) {
if (tenantIds != nullptr) {
tenantIds->push_back(it->first);
}
KeyRef tPrefix = TenantAPI::idToPrefix(arena, it->first);
if (tPrefix >= mutation.param2) {
break;
@ -1298,8 +1280,9 @@ size_t processClearRangeMutation(Arena& arena,
MutationRef& mutation,
int mutationIdx,
int& newMutationSize,
std::vector<std::pair<int, std::vector<MutationRef>>>& idxSplitMutations) {
std::vector<MutationRef> newClears = splitClearRangeByTenant(arena, mutation, tenantMap);
std::vector<std::pair<int, std::vector<MutationRef>>>& idxSplitMutations,
std::vector<int64_t>* tenantIds = nullptr) {
std::vector<MutationRef> newClears = splitClearRangeByTenant(arena, mutation, tenantMap, tenantIds);
if (newClears.size() == 1) {
mutation = newClears[0];
} else if (newClears.size() > 1) {
@ -1377,22 +1360,25 @@ TEST_CASE("/CommitProxy/SplitRange/replaceRawClearRanges") {
Error validateAndProcessTenantAccess(Arena& arena,
VectorRef<MutationRef>& mutations,
ProxyCommitData* const pProxyCommitData,
std::unordered_set<int64_t>& rawAccessTenantIds,
Optional<UID> debugId = Optional<UID>(),
const char* context = "") {
bool changeTenant = false;
bool writeNormalKey = false;
std::vector<int64_t> tids; // tenant ids accessed by the raw access transaction
std::vector<std::pair<int, std::vector<MutationRef>>> idxSplitMutations;
int newMutationSize = mutations.size();
KeyRangeRef tenantMapRange = TenantMetadata::tenantMap().subspace;
for (int i = 0; i < mutations.size(); ++i) {
auto& mutation = mutations[i];
Optional<int64_t> tenantId;
bool validAccess = true;
changeTenant = changeTenant || tenantMapChanging(mutation);
changeTenant = changeTenant || TenantAPI::tenantMapChanging(mutation, tenantMapRange);
if (mutation.type == MutationRef::ClearRange) {
int newClearSize = processClearRangeMutation(
arena, pProxyCommitData->tenantMap, mutation, i, newMutationSize, idxSplitMutations);
arena, pProxyCommitData->tenantMap, mutation, i, newMutationSize, idxSplitMutations, &tids);
if (debugId.present()) {
DisabledTraceEvent(SevDebug, "SplitTenantClearRange", pProxyCommitData->dbgid)
@ -1435,13 +1421,21 @@ Error validateAndProcessTenantAccess(Arena& arena,
.detail("Reason", "Tenant change and normal key write in same transaction");
return illegal_tenant_access();
}
if (tenantId.present()) {
ASSERT(tenantId.get() != TenantInfo::INVALID_TENANT);
tids.push_back(tenantId.get());
}
}
rawAccessTenantIds.insert(tids.begin(), tids.end());
replaceRawClearRanges(arena, mutations, idxSplitMutations, newMutationSize);
return success();
}
Error validateAndProcessTenantAccess(CommitTransactionRequest& tr, ProxyCommitData* const pProxyCommitData) {
// If the validation success, return the list of tenant Ids refered by the transaction via tenantIds.
Error validateAndProcessTenantAccess(CommitTransactionRequest& tr,
ProxyCommitData* const pProxyCommitData,
std::unordered_set<int64_t>& rawAccessTenantIds) {
bool isValid = checkTenantNoWait(pProxyCommitData, tr.tenantInfo.tenantId, "Commit", true);
if (!isValid) {
return tenant_not_found();
@ -1452,17 +1446,25 @@ Error validateAndProcessTenantAccess(CommitTransactionRequest& tr, ProxyCommitDa
// only do the mutation check when the transaction use raw_access option and the tenant mode is required
if (pProxyCommitData->getTenantMode() != TenantMode::REQUIRED || tr.tenantInfo.hasTenant()) {
if (tr.tenantInfo.hasTenant()) {
rawAccessTenantIds.insert(tr.tenantInfo.tenantId);
}
return success();
}
return validateAndProcessTenantAccess(
tr.arena, tr.transaction.mutations, pProxyCommitData, tr.debugID, "validateAndProcessTenantAccess");
return validateAndProcessTenantAccess(tr.arena,
tr.transaction.mutations,
pProxyCommitData,
rawAccessTenantIds,
tr.debugID,
"validateAndProcessTenantAccess");
}
// Compute and apply "metadata" effects of each other proxy's most recent batch
void applyMetadataEffect(CommitBatchContext* self) {
bool initialState = self->isMyFirstBatch;
self->firstStateMutations = self->isMyFirstBatch;
KeyRangeRef tenantMapRange = TenantMetadata::tenantMap().subspace;
for (int versionIndex = 0; versionIndex < self->resolution[0].stateMutations.size(); versionIndex++) {
// pProxyCommitData->logAdapter->setNextVersion( ??? ); << Ideally we would be telling the log adapter that the
// pushes in this commit will be in the version at which these state mutations were committed by another proxy,
@ -1484,7 +1486,9 @@ void applyMetadataEffect(CommitBatchContext* self) {
// fail transaction if it contain both of tenant changes and normal key writing
auto& mutations = self->resolution[0].stateMutations[versionIndex][transactionIndex].mutations;
committed =
tenantIds.get().empty() || std::none_of(mutations.begin(), mutations.end(), tenantMapChanging);
tenantIds.get().empty() || std::none_of(mutations.begin(), mutations.end(), [&](MutationRef m) {
return TenantAPI::tenantMapChanging(m, tenantMapRange);
});
// check if all tenant ids are valid if committed == true
committed = committed &&
@ -1590,13 +1594,14 @@ void determineCommittedTransactions(CommitBatchContext* self) {
// This first pass through committed transactions deals with "metadata" effects (modifications of txnStateStore, changes
// to storage servers' responsibilities)
ACTOR Future<Void> applyMetadataToCommittedTransactions(CommitBatchContext* self) {
auto pProxyCommitData = self->pProxyCommitData;
state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData;
state std::unordered_set<int64_t> rawAccessTenantIds;
auto& trs = self->trs;
int t;
for (t = 0; t < trs.size() && !self->forceRecovery; t++) {
if (self->committed[t] == ConflictBatch::TransactionCommitted && (!self->locked || trs[t].isLockAware())) {
Error e = validateAndProcessTenantAccess(trs[t], pProxyCommitData);
Error e = validateAndProcessTenantAccess(trs[t], pProxyCommitData, rawAccessTenantIds);
if (e.code() != error_code_success) {
trs[t].reply.sendError(e);
self->committed[t] = ConflictBatch::TransactionTenantFailure;
@ -1608,8 +1613,7 @@ ACTOR Future<Void> applyMetadataToCommittedTransactions(CommitBatchContext* self
pProxyCommitData->logSystem,
trs[t].transaction.mutations,
SERVER_KNOBS->PROXY_USE_RESOLVER_PRIVATE_MUTATIONS ? nullptr : &self->toCommit,
pProxyCommitData->encryptMode.isEncryptionEnabled() ? &self->cipherKeys
: nullptr,
&self->cipherKeys,
pProxyCommitData->encryptMode,
self->forceRecovery,
self->commitVersion,
@ -1663,6 +1667,23 @@ ACTOR Future<Void> applyMetadataToCommittedTransactions(CommitBatchContext* self
ASSERT(false); // ChangeCoordinatorsRequest should always throw
}
// If there are raw access requests or cross-tenant boundary clear ranges in the batch, tenant ids for those
// requests are availalbe only after resolution. We need to fetch additional cipher keys for these requests.
if (pProxyCommitData->encryptMode == EncryptionAtRestMode::DOMAIN_AWARE && !rawAccessTenantIds.empty()) {
std::unordered_set<EncryptCipherDomainId> extraDomainIds;
for (auto tenantId : rawAccessTenantIds) {
if (self->cipherKeys.count(tenantId) == 0) {
extraDomainIds.insert(tenantId);
}
}
if (!extraDomainIds.empty()) {
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> extraCipherKeys =
wait(getLatestEncryptCipherKeys(
pProxyCommitData->db, extraDomainIds, BlobCipherMetrics::TLOG_POST_RESOLUTION));
self->cipherKeys.insert(extraCipherKeys.begin(), extraCipherKeys.end());
}
}
return Void();
}
@ -1693,30 +1714,6 @@ ACTOR Future<WriteMutationRefVar> writeMutationEncryptedMutation(CommitBatchCont
return encryptedMutation;
}
ACTOR Future<WriteMutationRefVar> writeMutationFetchEncryptKey(CommitBatchContext* self,
int64_t tenantId,
const MutationRef* mutation,
Arena* arena) {
state EncryptCipherDomainId domainId = tenantId;
state MutationRef encryptedMutation;
static_assert(TenantInfo::INVALID_TENANT == INVALID_ENCRYPT_DOMAIN_ID);
ASSERT(self->pProxyCommitData->encryptMode.isEncryptionEnabled());
ASSERT_NE((MutationRef::Type)mutation->type, MutationRef::Type::ClearRange);
domainId = getEncryptDetailsFromMutationRef(self->pProxyCommitData, *mutation);
Reference<BlobCipherKey> cipherKey =
wait(getLatestEncryptCipherKey(self->pProxyCommitData->db, domainId, BlobCipherMetrics::TLOG));
self->cipherKeys[domainId] = cipherKey;
CODE_PROBE(true, "Raw access mutation encryption", probe::decoration::rare);
ASSERT_NE(domainId, INVALID_ENCRYPT_DOMAIN_ID);
encryptedMutation = mutation->encrypt(self->cipherKeys, domainId, *arena, BlobCipherMetrics::TLOG);
self->toCommit.writeTypedMessage(encryptedMutation);
return encryptedMutation;
}
Future<WriteMutationRefVar> writeMutation(CommitBatchContext* self,
int64_t domainId,
const MutationRef* mutation,
@ -1756,14 +1753,10 @@ Future<WriteMutationRefVar> writeMutation(CommitBatchContext* self,
} else {
if (domainId == INVALID_ENCRYPT_DOMAIN_ID) {
domainId = getEncryptDetailsFromMutationRef(self->pProxyCommitData, *mutation);
if (self->cipherKeys.find(domainId) == self->cipherKeys.end()) {
return writeMutationFetchEncryptKey(self, domainId, mutation, arena);
}
CODE_PROBE(true, "Raw access mutation encryption");
}
ASSERT_NE(domainId, INVALID_ENCRYPT_DOMAIN_ID);
ASSERT(self->cipherKeys.count(domainId) > 0);
encryptedMutation = mutation->encrypt(self->cipherKeys, domainId, *arena, BlobCipherMetrics::TLOG);
}
ASSERT(encryptedMutation.isEncrypted());
@ -3222,13 +3215,11 @@ ACTOR Future<Void> processCompleteTransactionStateRequest(TransactionStateResolv
tag_uid[decodeServerTagValue(kv.value)] = decodeServerTagKey(kv.key);
}
state std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys;
state std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> systemCipherKeys;
if (pContext->pCommitData->encryptMode.isEncryptionEnabled()) {
static const std::unordered_set<EncryptCipherDomainId> metadataDomainIds = { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID,
ENCRYPT_HEADER_DOMAIN_ID };
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cks =
wait(getLatestEncryptCipherKeys(pContext->pCommitData->db, metadataDomainIds, BlobCipherMetrics::TLOG));
cipherKeys = cks;
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cks = wait(getLatestEncryptCipherKeys(
pContext->pCommitData->db, ENCRYPT_CIPHER_SYSTEM_DOMAINS, BlobCipherMetrics::TLOG));
systemCipherKeys = cks;
}
loop {
@ -3297,7 +3288,7 @@ ACTOR Future<Void> processCompleteTransactionStateRequest(TransactionStateResolv
Reference<ILogSystem>(),
mutations,
/* pToCommit= */ nullptr,
pContext->pCommitData->encryptMode.isEncryptionEnabled() ? &cipherKeys : nullptr,
&systemCipherKeys,
pContext->pCommitData->encryptMode,
confChanges,
/* version= */ 0,

View File

@ -375,8 +375,10 @@ ACTOR static Future<MutationRef> _decryptMutation(MutationRef mutation, Database
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
state const BlobCipherEncryptHeader* header = mutation.encryptionHeader();
std::unordered_set<BlobCipherDetails> cipherDetails;
cipherDetails.insert(header->cipherHeaderDetails);
cipherDetails.insert(header->cipherTextDetails);
if (header->cipherHeaderDetails.isValid()) {
cipherDetails.insert(header->cipherHeaderDetails);
}
std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>> getCipherKeysResult =
wait(getEncryptCipherKeys(dbInfo, cipherDetails, BlobCipherMetrics::BACKUP));
return mutation.decrypt(getCipherKeysResult, *arena, BlobCipherMetrics::BACKUP);

View File

@ -398,6 +398,7 @@ public:
ISimulator::ExtraDatabaseMode extraDatabaseMode = ISimulator::ExtraDatabaseMode::Disabled;
// The number of extra database used if the database mode is MULTIPLE
int extraDatabaseCount = 1;
bool extraDatabaseBackupAgents = false;
int minimumReplication = 0;
int minimumRegions = 0;
bool configureLocked = false;
@ -481,6 +482,7 @@ public:
.add("testPriority", &testPriority)
.add("extraDatabaseMode", &extraDatabaseModeStr)
.add("extraDatabaseCount", &extraDatabaseCount)
.add("extraDatabaseBackupAgents", &extraDatabaseBackupAgents)
.add("minimumReplication", &minimumReplication)
.add("minimumRegions", &minimumRegions)
.add("configureLocked", &configureLocked)
@ -2525,22 +2527,23 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newMachineId, dcUID);
localities.set("data_hall"_sr, dcUID);
systemActors->push_back(reportErrors(simulatedMachine(ClusterConnectionString(extraDatabase),
conn,
extraIps,
sslEnabled,
localities,
processClass,
baseFolder,
false,
machine == useSeedForMachine,
AgentNone,
sslOnly,
whitelistBinPaths,
protocolVersion,
configDBType,
true),
"SimulatedMachine"));
systemActors->push_back(
reportErrors(simulatedMachine(ClusterConnectionString(extraDatabase),
conn,
extraIps,
sslEnabled,
localities,
processClass,
baseFolder,
false,
machine == useSeedForMachine,
testConfig.extraDatabaseBackupAgents ? AgentAddition : AgentNone,
sslOnly,
whitelistBinPaths,
protocolVersion,
configDBType,
true),
"SimulatedMachine"));
++cluster;
}
}
@ -2666,14 +2669,6 @@ ACTOR void setupAndRun(std::string dataFolder,
testConfig.storageEngineExcludeTypes.push_back(5);
}
// The RocksDB storage engine does not support the restarting tests because you cannot consistently get a clean
// snapshot of the storage engine without a snapshotting file system.
// https://github.com/apple/foundationdb/issues/5155
if (std::string_view(testFile).find("restarting") != std::string_view::npos) {
testConfig.storageEngineExcludeTypes.push_back(4);
testConfig.storageEngineExcludeTypes.push_back(5);
}
// The RocksDB engine is not always built with the rest of fdbserver. Don't try to use it if it is not included
// in the build.
if (!rocksDBEnabled) {

View File

@ -1927,7 +1927,9 @@ ACTOR Future<Void> pullAsyncData(StorageCacheData* data) {
if (!cipherKeys.present()) {
const BlobCipherEncryptHeader* header = msg.encryptionHeader();
cipherDetails.insert(header->cipherTextDetails);
cipherDetails.insert(header->cipherHeaderDetails);
if (header->cipherHeaderDetails.isValid()) {
cipherDetails.insert(header->cipherHeaderDetails);
}
collectingCipherKeys = true;
} else {
msg = msg.decrypt(cipherKeys.get(), cloneReader.arena(), BlobCipherMetrics::TLOG);

View File

@ -4970,13 +4970,14 @@ public:
// VersionedBTree takes ownership of pager
VersionedBTree(IPager2* pager,
std::string name,
UID logID,
Reference<AsyncVar<ServerDBInfo> const> db,
Optional<EncryptionAtRestMode> expectedEncryptionMode,
EncodingType encodingType = EncodingType::MAX_ENCODING_TYPE,
Reference<IPageEncryptionKeyProvider> keyProvider = {})
: m_pager(pager), m_db(db), m_expectedEncryptionMode(expectedEncryptionMode), m_encodingType(encodingType),
m_enforceEncodingType(false), m_keyProvider(keyProvider), m_pBuffer(nullptr), m_mutationCount(0), m_name(name),
m_pBoundaryVerifier(DecodeBoundaryVerifier::getVerifier(name)) {
m_logID(logID), m_pBoundaryVerifier(DecodeBoundaryVerifier::getVerifier(name)) {
m_pDecodeCacheMemory = m_pager->getPageCachePenaltySource();
m_lazyClearActor = 0;
m_init = init_impl(this);
@ -5121,7 +5122,7 @@ public:
// default encoding is expected.
if (encodingType == EncodingType::MAX_ENCODING_TYPE) {
encodingType = expectedEncodingType;
if (encodingType == EncodingType::XXHash64 && g_network->isSimulated() && BUGGIFY) {
if (encodingType == EncodingType::XXHash64 && g_network->isSimulated() && m_logID.hash() % 2 == 0) {
encodingType = EncodingType::XOREncryption_TestOnly;
}
} else if (encodingType != expectedEncodingType) {
@ -5592,6 +5593,7 @@ private:
Future<Void> m_latestCommit;
Future<Void> m_init;
std::string m_name;
UID m_logID;
int m_blockSize;
ParentInfoMapT childUpdateTracker;
@ -7966,7 +7968,7 @@ public:
SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS,
false,
m_error);
m_tree = new VersionedBTree(pager, filename, db, encryptionMode, encodingType, keyProvider);
m_tree = new VersionedBTree(pager, filename, logID, db, encryptionMode, encodingType, keyProvider);
m_init = catchError(init_impl(this));
}
@ -10127,7 +10129,7 @@ TEST_CASE("Lredwood/correctness/btree") {
printf("Initializing...\n");
pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
state VersionedBTree* btree = new VersionedBTree(pager, file, {}, encryptionMode, encodingType, keyProvider);
state VersionedBTree* btree = new VersionedBTree(pager, file, UID(), {}, encryptionMode, encodingType, keyProvider);
wait(btree->init());
state DecodeBoundaryVerifier* pBoundaries = DecodeBoundaryVerifier::getVerifier(file);
@ -10366,7 +10368,7 @@ TEST_CASE("Lredwood/correctness/btree") {
printf("Reopening btree from disk.\n");
IPager2* pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, false);
btree = new VersionedBTree(pager, file, {}, encryptionMode, encodingType, keyProvider);
btree = new VersionedBTree(pager, file, UID(), {}, encryptionMode, encodingType, keyProvider);
wait(btree->init());
@ -10415,6 +10417,7 @@ TEST_CASE("Lredwood/correctness/btree") {
concurrentExtentReads,
pagerMemoryOnly),
file,
UID(),
{},
{},
encodingType,
@ -10751,8 +10754,8 @@ TEST_CASE(":/redwood/performance/set") {
DWALPager* pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
state VersionedBTree* btree =
new VersionedBTree(pager, file, {}, {}, EncodingType::XXHash64, makeReference<NullEncryptionKeyProvider>());
state VersionedBTree* btree = new VersionedBTree(
pager, file, UID(), {}, {}, EncodingType::XXHash64, makeReference<NullEncryptionKeyProvider>());
wait(btree->init());
printf("Initialized. StorageBytes=%s\n", btree->getStorageBytes().toString().c_str());

View File

@ -2270,6 +2270,9 @@ int main(int argc, char* argv[]) {
g_knobs.setKnob("encrypt_header_auth_token_algo",
KnobValue::create((int)ini.GetLongValue(
"META", "encryptHeaderAuthTokenAlgo", FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ALGO)));
g_knobs.setKnob(
"shard_encode_location_metadata",
KnobValue::create(ini.GetBoolValue("META", "enableShardEncodeLocationMetadata", false)));
}
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths);
g_simulator->run();

View File

@ -189,6 +189,7 @@ private:
if (!clusterMetadata.entry.hasCapacity()) {
ASSERT(allocatedItr == clusterAllocatedMap.end());
} else {
ASSERT(allocatedItr != clusterAllocatedMap.end());
ASSERT_EQ(allocatedItr->second, clusterMetadata.entry.allocated.numTenantGroups);
++numFoundInAllocatedMap;
}
@ -218,7 +219,6 @@ private:
std::set<TenantGroupName> processedTenantGroups;
for (auto [tenantId, entry] : managementMetadata.tenantMap) {
ASSERT(entry.assignedCluster.present());
ASSERT(TenantAPI::getTenantIdPrefix(tenantId) == managementMetadata.tenantIdPrefix.get());
// Each tenant should be assigned to the same cluster where it is stored in the cluster tenant index
auto clusterItr = managementMetadata.clusterTenantMap.find(entry.assignedCluster.get());
@ -310,7 +310,7 @@ private:
auto& expectedTenants = self->managementMetadata.clusterTenantMap[clusterName];
std::map<TenantGroupName, int> groupExpectedTenantCounts;
std::set<TenantGroupName> tenantGroupsWithCompletedTenants;
if (!self->allowPartialMetaclusterOperations) {
ASSERT_EQ(dataClusterTenantMap.size(), expectedTenants.size());
} else {
@ -318,13 +318,11 @@ private:
for (auto tenantName : expectedTenants) {
TenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantName];
if (!dataClusterTenantMap.count(tenantName)) {
if (metaclusterEntry.tenantGroup.present()) {
groupExpectedTenantCounts.try_emplace(metaclusterEntry.tenantGroup.get(), 0);
}
ASSERT(metaclusterEntry.tenantState == TenantState::REGISTERING ||
metaclusterEntry.tenantState == TenantState::REMOVING);
metaclusterEntry.tenantState == TenantState::REMOVING ||
metaclusterEntry.tenantState == TenantState::ERROR);
} else if (metaclusterEntry.tenantGroup.present()) {
++groupExpectedTenantCounts[metaclusterEntry.tenantGroup.get()];
tenantGroupsWithCompletedTenants.insert(metaclusterEntry.tenantGroup.get());
}
}
}
@ -334,7 +332,6 @@ private:
TenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantId];
ASSERT(!entry.assignedCluster.present());
ASSERT_EQ(entry.id, metaclusterEntry.id);
ASSERT(TenantAPI::getTenantIdPrefix(entry.id) == self->managementMetadata.tenantIdPrefix.get());
ASSERT(entry.tenantName == metaclusterEntry.tenantName);
ASSERT_EQ(entry.tenantState, TenantState::READY);
@ -360,15 +357,19 @@ private:
ASSERT_LE(dataClusterTenantGroupMap.size(), expectedTenantGroups.size());
for (auto const& name : expectedTenantGroups) {
if (!dataClusterTenantGroupMap.count(name)) {
auto itr = groupExpectedTenantCounts.find(name);
ASSERT(itr != groupExpectedTenantCounts.end());
ASSERT_EQ(itr->second, 0);
auto itr = tenantGroupsWithCompletedTenants.find(name);
ASSERT(itr == tenantGroupsWithCompletedTenants.end());
}
}
}
for (auto const& [name, entry] : dataClusterTenantGroupMap) {
ASSERT(expectedTenantGroups.count(name));
ASSERT(!entry.assignedCluster.present());
expectedTenantGroups.erase(name);
}
for (auto name : expectedTenantGroups) {
ASSERT(tenantGroupsWithCompletedTenants.count(name) == 0);
}
return Void();

View File

@ -153,7 +153,9 @@ private:
for (auto [tenantId, tenantMapEntry] : metadata.tenantMap) {
ASSERT_EQ(tenantId, tenantMapEntry.id);
if (metadata.clusterType != ClusterType::METACLUSTER_DATA) {
ASSERT_LE(tenantId, metadata.lastTenantId);
if (TenantAPI::getTenantIdPrefix(tenantId) == TenantAPI::getTenantIdPrefix(metadata.lastTenantId)) {
ASSERT_LE(tenantId, metadata.lastTenantId);
}
}
ASSERT_EQ(metadata.tenantNameIndex[tenantMapEntry.tenantName], tenantId);
@ -184,6 +186,9 @@ private:
ASSERT(!tenantMapEntry.assignedCluster.present());
ASSERT(!tenantMapEntry.renameDestination.present());
}
// An error string should be set if and only if the tenant state is an error
ASSERT((tenantMapEntry.tenantState == TenantState::ERROR) != tenantMapEntry.error.empty());
}
ASSERT_EQ(metadata.tenantMap.size() + renameCount, metadata.tenantNameIndex.size());

View File

@ -3035,7 +3035,9 @@ ACTOR Future<std::pair<ChangeFeedStreamReply, bool>> getChangeFeedMutations(Stor
if (m.isEncrypted()) {
const BlobCipherEncryptHeader* header = m.encryptionHeader();
cipherDetails.insert(header->cipherTextDetails);
cipherDetails.insert(header->cipherHeaderDetails);
if (header->cipherHeaderDetails.isValid()) {
cipherDetails.insert(header->cipherHeaderDetails);
}
}
}
}
@ -9273,7 +9275,9 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
if (!cipherKeys.present()) {
const BlobCipherEncryptHeader* header = msg.encryptionHeader();
cipherDetails.insert(header->cipherTextDetails);
cipherDetails.insert(header->cipherHeaderDetails);
if (header->cipherHeaderDetails.isValid()) {
cipherDetails.insert(header->cipherHeaderDetails);
}
collectingCipherKeys = true;
} else {
msg = msg.decrypt(cipherKeys.get(), eager.arena, BlobCipherMetrics::TLOG);

View File

@ -65,6 +65,9 @@ struct DiskFailureInjectionWorkload : FailureInjectionWorkload {
periodicBroadcastInterval = getOption(options, "periodicBroadcastInterval"_sr, periodicBroadcastInterval);
}
// TODO: Currently this workload doesn't play well with MachineAttrition.
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.insert("Attrition"); }
void initFailureInjectionMode(DeterministicRandom& random) override { enabled = clientId == 0; }
Future<Void> setup(Database const& cx) override { return Void(); }

View File

@ -315,8 +315,26 @@ struct EncryptionOpsWorkload : TestWorkload {
auto end = std::chrono::high_resolution_clock::now();
// validate encrypted buffer size and contents (not matching with plaintext)
const uint8_t* headerIV = headerRef->getIV();
ASSERT_EQ(memcmp(&headerIV[0], &iv[0], AES_256_IV_LENGTH), 0);
EncryptHeaderCipherDetails validateDetails = headerRef->getCipherDetails();
ASSERT(validateDetails.textCipherDetails.isValid() &&
validateDetails.textCipherDetails == BlobCipherDetails(textCipherKey->getDomainId(),
textCipherKey->getBaseCipherId(),
textCipherKey->getSalt()));
if (authMode == ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE) {
ASSERT(!validateDetails.headerCipherDetails.present());
} else {
ASSERT(validateDetails.headerCipherDetails.present() &&
validateDetails.headerCipherDetails.get().isValid() &&
validateDetails.headerCipherDetails.get() == BlobCipherDetails(headerCipherKey->getDomainId(),
headerCipherKey->getBaseCipherId(),
headerCipherKey->getSalt()));
}
ASSERT_EQ(encrypted.size(), len);
ASSERT_EQ(headerRef->flagsVersion, CLIENT_KNOBS->ENCRYPT_HEADER_FLAGS_VERSION);
ASSERT_EQ(headerRef->flagsVersion(), CLIENT_KNOBS->ENCRYPT_HEADER_FLAGS_VERSION);
ASSERT_NE(memcmp(encrypted.begin(), payload, len), 0);
metrics->updateEncryptionTime(std::chrono::duration<double, std::nano>(end - start).count());
@ -334,9 +352,12 @@ struct EncryptionOpsWorkload : TestWorkload {
Reference<BlobCipherKey> cipherKey = getEncryptionKey(header.cipherTextDetails.encryptDomainId,
header.cipherTextDetails.baseCipherId,
header.cipherTextDetails.salt);
Reference<BlobCipherKey> headerCipherKey = getEncryptionKey(header.cipherHeaderDetails.encryptDomainId,
header.cipherHeaderDetails.baseCipherId,
header.cipherHeaderDetails.salt);
Reference<BlobCipherKey> headerCipherKey;
if (header.flags.authTokenMode != EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE) {
headerCipherKey = getEncryptionKey(header.cipherHeaderDetails.encryptDomainId,
header.cipherHeaderDetails.baseCipherId,
header.cipherHeaderDetails.salt);
}
ASSERT(cipherKey.isValid());
ASSERT(cipherKey->isEqual(orgCipherKey));
ASSERT(headerCipherKey.isValid() ||
@ -361,35 +382,33 @@ struct EncryptionOpsWorkload : TestWorkload {
Reference<BlobCipherKey> orgCipherKey) {
BlobCipherEncryptHeaderRef headerRef = BlobCipherEncryptHeaderRef::fromStringRef(headerStr);
ASSERT_EQ(headerRef.flagsVersion, CLIENT_KNOBS->ENCRYPT_HEADER_FLAGS_VERSION);
ASSERT_EQ(headerRef.flagsVersion(), CLIENT_KNOBS->ENCRYPT_HEADER_FLAGS_VERSION);
// validate flags
BlobCipherDetails textCipherDetails;
BlobCipherDetails headerCipherDetails;
uint8_t iv[AES_256_IV_LENGTH];
if (std::holds_alternative<AesCtrNoAuthV1>(headerRef.algoHeader)) {
AesCtrNoAuthV1 noAuth = std::get<AesCtrNoAuthV1>(headerRef.algoHeader);
memcpy(&iv[0], &noAuth.iv[0], AES_256_IV_LENGTH);
textCipherDetails = noAuth.cipherTextDetails;
if (std::holds_alternative<AesCtrNoAuth>(headerRef.algoHeader)) {
AesCtrNoAuth noAuth = std::get<AesCtrNoAuth>(headerRef.algoHeader);
memcpy(&iv[0], &noAuth.v1.iv[0], AES_256_IV_LENGTH);
textCipherDetails = noAuth.v1.cipherTextDetails;
headerCipherDetails = BlobCipherDetails();
} else if (std::holds_alternative<AesCtrWithAuthV1<AUTH_TOKEN_HMAC_SHA_SIZE>>(headerRef.algoHeader)) {
AesCtrWithAuthV1<AUTH_TOKEN_HMAC_SHA_SIZE> hmacSha =
std::get<AesCtrWithAuthV1<AUTH_TOKEN_HMAC_SHA_SIZE>>(headerRef.algoHeader);
memcpy(&iv[0], &hmacSha.iv[0], AES_256_IV_LENGTH);
textCipherDetails = hmacSha.cipherTextDetails;
headerCipherDetails = hmacSha.cipherHeaderDetails;
} else if (std::holds_alternative<AesCtrWithHmac>(headerRef.algoHeader)) {
AesCtrWithHmac hmacSha = std::get<AesCtrWithHmac>(headerRef.algoHeader);
memcpy(&iv[0], &hmacSha.v1.iv[0], AES_256_IV_LENGTH);
textCipherDetails = hmacSha.v1.cipherTextDetails;
headerCipherDetails = hmacSha.v1.cipherHeaderDetails;
} else {
ASSERT(std::holds_alternative<AesCtrWithAuthV1<AUTH_TOKEN_AES_CMAC_SIZE>>(headerRef.algoHeader));
AesCtrWithAuthV1<AUTH_TOKEN_AES_CMAC_SIZE> aesCmac =
std::get<AesCtrWithAuthV1<AUTH_TOKEN_AES_CMAC_SIZE>>(headerRef.algoHeader);
memcpy(&iv[0], &aesCmac.iv[0], AES_256_IV_LENGTH);
textCipherDetails = aesCmac.cipherTextDetails;
headerCipherDetails = aesCmac.cipherHeaderDetails;
ASSERT(std::holds_alternative<AesCtrWithCmac>(headerRef.algoHeader));
AesCtrWithCmac aesCmac = std::get<AesCtrWithCmac>(headerRef.algoHeader);
memcpy(&iv[0], &aesCmac.v1.iv[0], AES_256_IV_LENGTH);
textCipherDetails = aesCmac.v1.cipherTextDetails;
headerCipherDetails = aesCmac.v1.cipherHeaderDetails;
}
Reference<BlobCipherKey> cipherKey =
getEncryptionKey(textCipherDetails.encryptDomainId, textCipherDetails.baseCipherId, textCipherDetails.salt);
Reference<BlobCipherKey> headerCipherKey =
headerCipherDetails.encryptDomainId == INVALID_ENCRYPT_DOMAIN_ID
!headerCipherDetails.isValid()
? Reference<BlobCipherKey>() // no authentication mode cipher header-key is not needed
: getEncryptionKey(
headerCipherDetails.encryptDomainId, headerCipherDetails.baseCipherId, headerCipherDetails.salt);

View File

@ -332,7 +332,7 @@ struct GetMappedRangeWorkload : ApiWorkload {
}
expectedCnt = std::min(expectedCnt, boundByRecord);
std::cout << "boundByRecord: " << boundByRecord << std::endl;
ASSERT(result.size() == expectedCnt);
ASSERT_LE(result.size(), expectedCnt);
beginSelector = KeySelector(firstGreaterThan(result.back().key));
}
} else {

View File

@ -0,0 +1,362 @@
/*
* MetaclusterManagementConcurrencyWorkload.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdint>
#include <limits>
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/GenericManagementAPI.actor.h"
#include "fdbclient/MetaclusterManagement.actor.h"
#include "fdbclient/ReadYourWrites.h"
#include "fdbclient/RunTransaction.actor.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbclient/ThreadSafeTransaction.h"
#include "fdbrpc/simulator.h"
#include "fdbserver/workloads/MetaclusterConsistency.actor.h"
#include "fdbserver/workloads/TenantConsistency.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/Knobs.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/ProtocolVersion.h"
#include "flow/flow.h"
#include "flow/actorcompiler.h" // This must be the last #include.
struct MetaclusterManagementConcurrencyWorkload : TestWorkload {
static constexpr auto NAME = "MetaclusterManagementConcurrency";
Reference<IDatabase> managementDb;
std::map<ClusterName, Database> dataDbs;
std::vector<ClusterName> dataDbIndex;
double testDuration;
MetaclusterManagementConcurrencyWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
testDuration = getOption(options, "testDuration"_sr, 120.0);
}
Future<Void> setup(Database const& cx) override { return _setup(cx, this); }
ACTOR static Future<Void> _setup(Database cx, MetaclusterManagementConcurrencyWorkload* self) {
Reference<IDatabase> threadSafeHandle =
wait(unsafeThreadFutureToFuture(ThreadSafeDatabase::createFromExistingDatabase(cx)));
MultiVersionApi::api->selectApiVersion(cx->apiVersion.version());
self->managementDb = MultiVersionDatabase::debugCreateFromExistingDatabase(threadSafeHandle);
ASSERT(g_simulator->extraDatabases.size() > 0);
for (auto connectionString : g_simulator->extraDatabases) {
ClusterConnectionString ccs(connectionString);
self->dataDbIndex.push_back(ClusterName(format("cluster_%08d", self->dataDbs.size())));
self->dataDbs[self->dataDbIndex.back()] =
Database::createSimulatedExtraDatabase(connectionString, cx->defaultTenant);
}
if (self->clientId == 0) {
wait(success(MetaclusterAPI::createMetacluster(
cx.getReference(),
"management_cluster"_sr,
deterministicRandom()->randomInt(TenantAPI::TENANT_ID_PREFIX_MIN_VALUE,
TenantAPI::TENANT_ID_PREFIX_MAX_VALUE + 1))));
}
return Void();
}
ClusterName chooseClusterName() { return dataDbIndex[deterministicRandom()->randomInt(0, dataDbIndex.size())]; }
static Future<Void> verifyClusterRecovered(Database db) {
return success(runTransaction(db.getReference(),
[](Reference<ReadYourWritesTransaction> tr) { return tr->getReadVersion(); }));
}
ACTOR static Future<Void> registerCluster(MetaclusterManagementConcurrencyWorkload* self) {
state ClusterName clusterName = self->chooseClusterName();
state Database dataDb = self->dataDbs[clusterName];
state UID debugId = deterministicRandom()->randomUniqueID();
try {
state DataClusterEntry entry;
entry.capacity.numTenantGroups = deterministicRandom()->randomInt(0, 4);
loop {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRegisteringCluster", debugId)
.detail("ClusterName", clusterName)
.detail("NumTenantGroups", entry.capacity.numTenantGroups);
Future<Void> registerFuture =
MetaclusterAPI::registerCluster(self->managementDb,
clusterName,
dataDb.getReference()->getConnectionRecord()->getConnectionString(),
entry);
Optional<Void> result = wait(timeout(registerFuture, deterministicRandom()->randomInt(1, 30)));
if (result.present()) {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRegisteredCluster", debugId)
.detail("ClusterName", clusterName)
.detail("NumTenantGroups", entry.capacity.numTenantGroups);
break;
}
}
} catch (Error& e) {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRegisterClusterError", debugId)
.error(e)
.detail("ClusterName", clusterName);
if (e.code() != error_code_cluster_already_exists && e.code() != error_code_cluster_not_empty &&
e.code() != error_code_cluster_already_registered && e.code() != error_code_cluster_removed) {
TraceEvent(SevError, "MetaclusterManagementConcurrencyRegisterClusterFailure", debugId)
.error(e)
.detail("ClusterName", clusterName);
ASSERT(false);
}
return Void();
}
wait(verifyClusterRecovered(dataDb));
return Void();
}
ACTOR static Future<Void> removeCluster(MetaclusterManagementConcurrencyWorkload* self) {
state ClusterName clusterName = self->chooseClusterName();
state Database dataDb = self->dataDbs[clusterName];
state UID debugId = deterministicRandom()->randomUniqueID();
try {
loop {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRemovingCluster", debugId)
.detail("ClusterName", clusterName);
Future<bool> removeFuture = MetaclusterAPI::removeCluster(
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, false);
Optional<bool> result = wait(timeout(removeFuture, deterministicRandom()->randomInt(1, 30)));
if (result.present()) {
ASSERT(result.get());
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRemovedCluster", debugId)
.detail("ClusterName", clusterName);
break;
}
}
} catch (Error& e) {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRemoveClusterError", debugId)
.error(e)
.detail("ClusterName", clusterName);
if (e.code() != error_code_cluster_not_found && e.code() != error_code_cluster_not_empty) {
TraceEvent(SevError, "MetaclusterManagementConcurrencyRemoveClusterFailure", debugId)
.error(e)
.detail("ClusterName", clusterName);
ASSERT(false);
}
return Void();
}
wait(verifyClusterRecovered(dataDb));
return Void();
}
ACTOR static Future<Void> listClusters(MetaclusterManagementConcurrencyWorkload* self) {
state ClusterName clusterName1 = self->chooseClusterName();
state ClusterName clusterName2 = self->chooseClusterName();
state int limit = deterministicRandom()->randomInt(1, self->dataDbs.size() + 1);
try {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyListClusters")
.detail("StartClusterName", clusterName1)
.detail("EndClusterName", clusterName2)
.detail("Limit", limit);
std::map<ClusterName, DataClusterMetadata> clusterList =
wait(MetaclusterAPI::listClusters(self->managementDb, clusterName1, clusterName2, limit));
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyListedClusters")
.detail("StartClusterName", clusterName1)
.detail("EndClusterName", clusterName2)
.detail("Limit", limit);
ASSERT(clusterName1 <= clusterName2);
ASSERT(clusterList.size() <= limit);
} catch (Error& e) {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyListClustersError")
.error(e)
.detail("StartClusterName", clusterName1)
.detail("EndClusterName", clusterName2)
.detail("Limit", limit);
if (e.code() != error_code_inverted_range) {
TraceEvent(SevError, "MetaclusterManagementConcurrencyListClusterFailure")
.error(e)
.detail("ClusterName1", clusterName1)
.detail("ClusterName2", clusterName2);
ASSERT(false);
}
return Void();
}
return Void();
}
ACTOR static Future<Void> getCluster(MetaclusterManagementConcurrencyWorkload* self) {
state ClusterName clusterName = self->chooseClusterName();
state Database dataDb = self->dataDbs[clusterName];
try {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyGetCluster").detail("ClusterName", clusterName);
DataClusterMetadata clusterMetadata = wait(MetaclusterAPI::getCluster(self->managementDb, clusterName));
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyGotCluster").detail("ClusterName", clusterName);
ASSERT(dataDb.getReference()->getConnectionRecord()->getConnectionString() ==
clusterMetadata.connectionString);
} catch (Error& e) {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyGetClusterError")
.error(e)
.detail("ClusterName", clusterName);
if (e.code() != error_code_cluster_not_found) {
TraceEvent(SevError, "MetaclusterManagementConcurrencyGetClusterFailure")
.error(e)
.detail("ClusterName", clusterName);
ASSERT(false);
}
return Void();
}
return Void();
}
ACTOR static Future<Optional<DataClusterEntry>> configureImpl(MetaclusterManagementConcurrencyWorkload* self,
ClusterName clusterName,
Optional<int64_t> numTenantGroups,
Optional<ClusterConnectionString> connectionString) {
state Reference<ITransaction> tr = self->managementDb->createTransaction();
loop {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
Optional<DataClusterMetadata> clusterMetadata =
wait(MetaclusterAPI::tryGetClusterTransaction(tr, clusterName));
state Optional<DataClusterEntry> entry;
if (clusterMetadata.present()) {
if (numTenantGroups.present()) {
entry = clusterMetadata.get().entry;
entry.get().capacity.numTenantGroups = numTenantGroups.get();
}
MetaclusterAPI::updateClusterMetadata(
tr, clusterName, clusterMetadata.get(), connectionString, entry);
wait(buggifiedCommit(tr, BUGGIFY_WITH_PROB(0.1)));
}
return entry;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
ACTOR static Future<Void> configureCluster(MetaclusterManagementConcurrencyWorkload* self) {
state ClusterName clusterName = self->chooseClusterName();
state Database dataDb = self->dataDbs[clusterName];
state UID debugId = deterministicRandom()->randomUniqueID();
state Optional<int64_t> newNumTenantGroups;
state Optional<ClusterConnectionString> connectionString;
if (deterministicRandom()->coinflip()) {
newNumTenantGroups = deterministicRandom()->randomInt(0, 4);
}
if (deterministicRandom()->coinflip()) {
connectionString = dataDb.getReference()->getConnectionRecord()->getConnectionString();
}
try {
loop {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyConfigureCluster", debugId)
.detail("ClusterName", clusterName)
.detail("NewNumTenantGroups", newNumTenantGroups.orDefault(-1))
.detail("NewConnectionString",
connectionString.map(&ClusterConnectionString::toString).orDefault(""));
Optional<Optional<DataClusterEntry>> result =
wait(timeout(configureImpl(self, clusterName, newNumTenantGroups, connectionString),
deterministicRandom()->randomInt(1, 30)));
if (result.present()) {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyConfiguredCluster", debugId)
.detail("ClusterName", clusterName)
.detail("NewNumTenantGroups", newNumTenantGroups.orDefault(-1))
.detail("NewConnectionString",
connectionString.map(&ClusterConnectionString::toString).orDefault(""));
break;
}
}
} catch (Error& e) {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyConfigureClusterError", debugId)
.error(e)
.detail("ClusterName", clusterName)
.detail("NewNumTenantGroups", newNumTenantGroups.orDefault(-1))
.detail("NewConnectionString", connectionString.map(&ClusterConnectionString::toString).orDefault(""));
if (e.code() != error_code_cluster_not_found && e.code() != error_code_cluster_removed &&
e.code() != error_code_invalid_metacluster_operation) {
TraceEvent(SevError, "MetaclusterManagementConcurrencyConfigureClusterFailure")
.error(e)
.detail("ClusterName", clusterName);
ASSERT(false);
}
}
return Void();
}
Future<Void> start(Database const& cx) override { return _start(cx, this); }
ACTOR static Future<Void> _start(Database cx, MetaclusterManagementConcurrencyWorkload* self) {
state double start = now();
// Run a random sequence of metacluster management operations for the duration of the test
while (now() < start + self->testDuration) {
state int operation = deterministicRandom()->randomInt(0, 5);
if (operation == 0) {
wait(registerCluster(self));
} else if (operation == 1) {
wait(removeCluster(self));
} else if (operation == 2) {
wait(listClusters(self));
} else if (operation == 3) {
wait(getCluster(self));
} else if (operation == 4) {
wait(configureCluster(self));
}
}
return Void();
}
Future<bool> check(Database const& cx) override {
if (clientId == 0) {
return _check(cx, this);
} else {
return true;
}
}
ACTOR static Future<bool> _check(Database cx, MetaclusterManagementConcurrencyWorkload* self) {
// The metacluster consistency check runs the tenant consistency check for each cluster
state MetaclusterConsistencyCheck<IDatabase> metaclusterConsistencyCheck(
self->managementDb, AllowPartialMetaclusterOperations::True);
wait(metaclusterConsistencyCheck.run());
return true;
}
void getMetrics(std::vector<PerfMetric>& m) override {}
};
WorkloadFactory<MetaclusterManagementConcurrencyWorkload> MetaclusterManagementConcurrencyWorkloadFactory;

View File

@ -48,6 +48,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
struct DataClusterData {
Database db;
bool registered = false;
bool detached = false;
int tenantGroupCapacity = 0;
std::set<TenantName> tenants;
@ -184,6 +185,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
}
ASSERT(!dataDb->registered);
ASSERT(!dataDb->detached || dataDb->tenants.empty());
dataDb->tenantGroupCapacity = entry.capacity.numTenantGroups;
self->totalTenantGroupCapacity += entry.capacity.numTenantGroups;
@ -196,6 +198,9 @@ struct MetaclusterManagementWorkload : TestWorkload {
if (e.code() == error_code_cluster_already_exists) {
ASSERT(dataDb->registered);
return Void();
} else if (e.code() == error_code_cluster_not_empty) {
ASSERT(dataDb->detached && !dataDb->tenants.empty());
return Void();
}
TraceEvent(SevError, "RegisterClusterFailure").error(e).detail("ClusterName", clusterName);
@ -209,14 +214,16 @@ struct MetaclusterManagementWorkload : TestWorkload {
state ClusterName clusterName = self->chooseClusterName();
state DataClusterData* dataDb = &self->dataDbs[clusterName];
state bool retried = false;
state bool detachCluster = false;
try {
loop {
// TODO: check force removal
Future<Void> removeFuture = MetaclusterAPI::removeCluster(self->managementDb, clusterName, false);
Future<bool> removeFuture = MetaclusterAPI::removeCluster(
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, detachCluster);
try {
Optional<Void> result = wait(timeout(removeFuture, deterministicRandom()->randomInt(1, 30)));
Optional<bool> result = wait(timeout(removeFuture, deterministicRandom()->randomInt(1, 30)));
if (result.present()) {
ASSERT(result.get());
break;
} else {
retried = true;
@ -235,11 +242,16 @@ struct MetaclusterManagementWorkload : TestWorkload {
}
ASSERT(dataDb->registered);
ASSERT(dataDb->tenants.empty());
ASSERT(detachCluster || dataDb->tenants.empty());
self->totalTenantGroupCapacity -= dataDb->tenantGroupCapacity;
dataDb->tenantGroupCapacity = 0;
dataDb->registered = false;
if (!detachCluster) {
dataDb->tenantGroupCapacity = 0;
dataDb->registered = false;
} else {
dataDb->registered = false;
dataDb->detached = true;
}
// Get a version to know that the cluster has recovered
wait(success(runTransaction(dataDb->db.getReference(),
@ -260,6 +272,45 @@ struct MetaclusterManagementWorkload : TestWorkload {
return Void();
}
ACTOR static Future<Void> restoreCluster(MetaclusterManagementWorkload* self) {
state ClusterName clusterName = self->chooseClusterName();
state DataClusterData* dataDb = &self->dataDbs[clusterName];
state bool dryRun = deterministicRandom()->coinflip();
state bool forceJoin = deterministicRandom()->coinflip();
state std::vector<std::string> messages;
try {
loop {
Future<Void> restoreFuture =
MetaclusterAPI::restoreCluster(self->managementDb,
clusterName,
dataDb->db->getConnectionRecord()->getConnectionString(),
ApplyManagementClusterUpdates::True,
RestoreDryRun(dryRun),
ForceJoinNewMetacluster(forceJoin),
&messages);
Optional<Void> result = wait(timeout(restoreFuture, deterministicRandom()->randomInt(1, 30)));
if (result.present()) {
break;
}
}
ASSERT(dataDb->registered);
if (!dryRun) {
dataDb->detached = false;
}
} catch (Error& e) {
if (e.code() == error_code_cluster_not_found) {
ASSERT(!dataDb->registered);
return Void();
}
TraceEvent(SevError, "RestoreClusterFailure").error(e).detail("ClusterName", clusterName);
ASSERT(false);
}
return Void();
}
ACTOR static Future<Void> listClusters(MetaclusterManagementWorkload* self) {
state ClusterName clusterName1 = self->chooseClusterName();
state ClusterName clusterName2 = self->chooseClusterName();
@ -448,11 +499,10 @@ struct MetaclusterManagementWorkload : TestWorkload {
// Choose between two preferred clusters because if we get a partial completion and
// retry, we want the operation to eventually succeed instead of having a chance of
// never re-visiting the original preferred cluster.
state std::pair<ClusterName, ClusterName> preferredClusters;
state Optional<ClusterName> originalPreferredCluster;
state std::vector<ClusterName> preferredClusters;
if (!assignClusterAutomatically) {
preferredClusters.first = self->chooseClusterName();
preferredClusters.second = self->chooseClusterName();
preferredClusters.push_back(self->chooseClusterName());
preferredClusters.push_back(self->chooseClusterName());
}
state TenantMapEntry tenantMapEntry;
@ -463,11 +513,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
loop {
try {
if (!assignClusterAutomatically && (!retried || deterministicRandom()->coinflip())) {
tenantMapEntry.assignedCluster =
deterministicRandom()->coinflip() ? preferredClusters.first : preferredClusters.second;
if (!originalPreferredCluster.present()) {
originalPreferredCluster = tenantMapEntry.assignedCluster.get();
}
tenantMapEntry.assignedCluster = deterministicRandom()->randomChoice(preferredClusters);
}
Future<Void> createFuture =
MetaclusterAPI::createTenant(self->managementDb, tenantMapEntry, assignClusterAutomatically);
@ -484,16 +530,23 @@ struct MetaclusterManagementWorkload : TestWorkload {
ASSERT(entry.present());
tenantMapEntry = entry.get();
break;
} else if (!assignClusterAutomatically && retried &&
originalPreferredCluster.get() != tenantMapEntry.assignedCluster.get() &&
(e.code() == error_code_cluster_no_capacity ||
e.code() == error_code_cluster_not_found ||
e.code() == error_code_invalid_tenant_configuration)) {
// When picking a different assigned cluster, it is possible to leave the
// tenant creation in a partially completed state, which we want to avoid.
// Continue retrying if the new preferred cluster throws errors rather than
// exiting immediately so we can allow the operation to finish.
continue;
} else if (!assignClusterAutomatically && (e.code() == error_code_cluster_no_capacity ||
e.code() == error_code_cluster_not_found ||
e.code() == error_code_invalid_tenant_configuration)) {
state Error error = e;
Optional<TenantMapEntry> entry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
if (entry.present() && entry.get().assignedCluster != tenantMapEntry.assignedCluster) {
// When picking a different assigned cluster, it is possible to leave the
// tenant creation in a partially completed state, which we want to avoid.
// Continue retrying if the new preferred cluster throws errors rather than
// exiting immediately so we can allow the operation to finish.
preferredClusters.clear();
preferredClusters.push_back(entry.get().assignedCluster.get());
tenantMapEntry.assignedCluster = entry.get().assignedCluster;
continue;
}
throw error;
} else {
throw;
}
@ -550,6 +603,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
ASSERT(tenantGroup.present());
ASSERT(tenantMapEntry.assignedCluster.present());
auto itr = self->tenantGroups.find(tenantGroup.get());
ASSERT(itr != self->tenantGroups.end());
ASSERT(itr->second.cluster != tenantMapEntry.assignedCluster.get());
return Void();
}
@ -837,7 +891,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
// Run a random sequence of operations for the duration of the test
while (now() < start + self->testDuration) {
state int operation = deterministicRandom()->randomInt(0, 9);
state int operation = deterministicRandom()->randomInt(0, 10);
if (operation == 0) {
wait(registerCluster(self));
} else if (operation == 1) {
@ -856,6 +910,8 @@ struct MetaclusterManagementWorkload : TestWorkload {
wait(configureTenant(self));
} else if (operation == 8) {
wait(renameTenant(self));
} else if (operation == 9) {
wait(restoreCluster(self));
}
}
@ -867,17 +923,16 @@ struct MetaclusterManagementWorkload : TestWorkload {
ClusterName clusterName,
DataClusterData clusterData) {
state Optional<MetaclusterRegistrationEntry> metaclusterRegistration;
state std::vector<std::pair<TenantName, int64_t>> tenants;
state std::vector<std::pair<TenantName, TenantMapEntry>> tenants;
state Reference<ReadYourWritesTransaction> tr = clusterData.db->createTransaction();
loop {
try {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
wait(
store(metaclusterRegistration,
MetaclusterMetadata::metaclusterRegistration().get(clusterData.db.getReference())) &&
store(tenants,
TenantAPI::listTenantsTransaction(tr, ""_sr, "\xff\xff"_sr, clusterData.tenants.size() + 1)));
wait(store(metaclusterRegistration, MetaclusterMetadata::metaclusterRegistration().get(tr)) &&
store(tenants,
TenantAPI::listTenantMetadataTransaction(
tr, ""_sr, "\xff\xff"_sr, clusterData.tenants.size() + 1)));
break;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
@ -892,9 +947,11 @@ struct MetaclusterManagementWorkload : TestWorkload {
}
ASSERT(tenants.size() == clusterData.tenants.size());
for (auto [tenantName, tid] : tenants) {
for (auto [tenantName, tenantEntry] : tenants) {
ASSERT(clusterData.tenants.count(tenantName));
ASSERT(self->createdTenants[tenantName].cluster == clusterName);
auto tenantData = self->createdTenants[tenantName];
ASSERT(tenantData.cluster == clusterName);
ASSERT(tenantData.tenantGroup == tenantEntry.tenantGroup);
}
return Void();
@ -922,8 +979,8 @@ struct MetaclusterManagementWorkload : TestWorkload {
std::vector<Future<Void>> removeClusterFutures;
for (auto [clusterName, clusterMetadata] : dataClusters) {
removeClusterFutures.push_back(
MetaclusterAPI::removeCluster(self->managementDb, clusterName, !deleteTenants));
removeClusterFutures.push_back(success(MetaclusterAPI::removeCluster(
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, !deleteTenants)));
}
wait(waitForAll(removeClusterFutures));

File diff suppressed because it is too large Load Diff

View File

@ -84,6 +84,7 @@ struct SaveAndKillWorkload : TestWorkload {
ini.SetBoolValue("META", "enableTLogEncryption", SERVER_KNOBS->ENABLE_TLOG_ENCRYPTION);
ini.SetBoolValue("META", "enableStorageServerEncryption", SERVER_KNOBS->ENABLE_STORAGE_SERVER_ENCRYPTION);
ini.SetBoolValue("META", "enableBlobGranuleEncryption", SERVER_KNOBS->ENABLE_BLOB_GRANULE_ENCRYPTION);
ini.SetBoolValue("META", "enableShardEncodeLocationMetadata", SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA);
ini.SetBoolValue("META", "encryptHeaderAuthTokenEnabled", FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED);
ini.SetLongValue("META", "encryptHeaderAuthTokenAlgo", FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ALGO);

View File

@ -47,6 +47,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
int maxTenantGroups;
double testDuration;
bool useMetacluster;
bool createMetacluster;
Reference<IDatabase> mvDb;
Database dataDb;
@ -55,8 +56,11 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
maxTenants = std::min<int>(1e8 - 1, getOption(options, "maxTenants"_sr, 100));
maxTenantGroups = std::min<int>(2 * maxTenants, getOption(options, "maxTenantGroups"_sr, 20));
testDuration = getOption(options, "testDuration"_sr, 120.0);
createMetacluster = getOption(options, "createMetacluster"_sr, true);
if (clientId == 0) {
if (hasOption(options, "useMetacluster"_sr)) {
useMetacluster = getOption(options, "useMetacluster"_sr, false);
} else if (clientId == 0) {
useMetacluster = deterministicRandom()->coinflip();
} else {
// Other clients read the metacluster state from the database
@ -100,16 +104,22 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
MultiVersionApi::api->selectApiVersion(cx->apiVersion.version());
self->mvDb = MultiVersionDatabase::debugCreateFromExistingDatabase(threadSafeHandle);
if (self->useMetacluster && self->clientId == 0) {
if (self->useMetacluster && self->createMetacluster && self->clientId == 0) {
wait(success(MetaclusterAPI::createMetacluster(
cx.getReference(),
"management_cluster"_sr,
deterministicRandom()->randomInt(TenantAPI::TENANT_ID_PREFIX_MIN_VALUE,
TenantAPI::TENANT_ID_PREFIX_MAX_VALUE + 1))));
DataClusterEntry entry;
entry.capacity.numTenantGroups = 1e9;
wait(MetaclusterAPI::registerCluster(self->mvDb, "cluster1"_sr, g_simulator->extraDatabases[0], entry));
state int extraDatabaseIdx;
for (extraDatabaseIdx = 0; extraDatabaseIdx < g_simulator->extraDatabases.size(); ++extraDatabaseIdx) {
DataClusterEntry entry;
entry.capacity.numTenantGroups = 1e9;
wait(MetaclusterAPI::registerCluster(self->mvDb,
ClusterName(fmt::format("cluster{}", extraDatabaseIdx)),
g_simulator->extraDatabases[extraDatabaseIdx],
entry));
}
}
state Transaction tr(cx);
@ -145,10 +155,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
}
}
if (self->useMetacluster) {
ASSERT(g_simulator->extraDatabases.size() == 1);
self->dataDb = Database::createSimulatedExtraDatabase(g_simulator->extraDatabases[0], cx->defaultTenant);
} else {
if (!self->useMetacluster) {
self->dataDb = cx;
}
@ -175,27 +182,45 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
ACTOR static Future<Void> createTenant(TenantManagementConcurrencyWorkload* self) {
state TenantName tenant = self->chooseTenantName();
state TenantMapEntry entry;
state UID debugId = deterministicRandom()->randomUniqueID();
entry.tenantName = tenant;
entry.tenantGroup = self->chooseTenantGroup();
try {
loop {
TraceEvent(SevDebug, "TenantManagementConcurrencyCreatingTenant", debugId)
.detail("TenantName", entry.tenantName)
.detail("TenantGroup", entry.tenantGroup);
Future<Void> createFuture =
self->useMetacluster
? MetaclusterAPI::createTenant(self->mvDb, entry, AssignClusterAutomatically::True)
: success(TenantAPI::createTenant(self->dataDb.getReference(), tenant, entry));
Optional<Void> result = wait(timeout(createFuture, 30));
if (result.present()) {
TraceEvent(SevDebug, "TenantManagementConcurrencyCreatedTenant", debugId)
.detail("TenantName", entry.tenantName)
.detail("TenantGroup", entry.tenantGroup);
break;
}
}
return Void();
} catch (Error& e) {
if (e.code() == error_code_tenant_removed) {
TraceEvent(SevDebug, "TenantManagementConcurrencyCreateTenantError", debugId)
.error(e)
.detail("TenantName", entry.tenantName)
.detail("TenantGroup", entry.tenantGroup);
if (e.code() == error_code_metacluster_no_capacity || e.code() == error_code_cluster_removed) {
ASSERT(self->useMetacluster && !self->createMetacluster);
} else if (e.code() == error_code_tenant_removed) {
ASSERT(self->useMetacluster);
} else if (e.code() != error_code_tenant_already_exists && e.code() != error_code_cluster_no_capacity) {
TraceEvent(SevError, "CreateTenantFailure").error(e).detail("TenantName", tenant);
TraceEvent(SevError, "TenantManagementConcurrencyCreateTenantFailure", debugId)
.error(e)
.detail("TenantName", entry.tenantName)
.detail("TenantGroup", entry.tenantGroup);
ASSERT(false);
}
@ -205,23 +230,36 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
ACTOR static Future<Void> deleteTenant(TenantManagementConcurrencyWorkload* self) {
state TenantName tenant = self->chooseTenantName();
state UID debugId = deterministicRandom()->randomUniqueID();
try {
loop {
TraceEvent(SevDebug, "TenantManagementConcurrencyDeletingTenant", debugId).detail("TenantName", tenant);
Future<Void> deleteFuture = self->useMetacluster
? MetaclusterAPI::deleteTenant(self->mvDb, tenant)
: TenantAPI::deleteTenant(self->dataDb.getReference(), tenant);
Optional<Void> result = wait(timeout(deleteFuture, 30));
if (result.present()) {
TraceEvent(SevDebug, "TenantManagementConcurrencyDeletedTenant", debugId)
.detail("TenantName", tenant);
break;
}
}
return Void();
} catch (Error& e) {
if (e.code() != error_code_tenant_not_found) {
TraceEvent(SevError, "DeleteTenantFailure").error(e).detail("TenantName", tenant);
TraceEvent(SevDebug, "TenantManagementConcurrencyDeleteTenantError", debugId)
.error(e)
.detail("TenantName", tenant);
if (e.code() == error_code_cluster_removed) {
ASSERT(self->useMetacluster && !self->createMetacluster);
} else if (e.code() != error_code_tenant_not_found) {
TraceEvent(SevError, "TenantManagementConcurrencyDeleteTenantFailure", debugId)
.error(e)
.detail("TenantName", tenant);
ASSERT(false);
}
return Void();
}
@ -257,21 +295,43 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
ACTOR static Future<Void> configureTenant(TenantManagementConcurrencyWorkload* self) {
state TenantName tenant = self->chooseTenantName();
state std::map<Standalone<StringRef>, Optional<Value>> configParams;
configParams["tenant_group"_sr] = self->chooseTenantGroup();
state Optional<TenantGroupName> tenantGroup = self->chooseTenantGroup();
state UID debugId = deterministicRandom()->randomUniqueID();
configParams["tenant_group"_sr] = tenantGroup;
try {
loop {
TraceEvent(SevDebug, "TenantManagementConcurrencyConfiguringTenant", debugId)
.detail("TenantName", tenant)
.detail("TenantGroup", tenantGroup);
Optional<Void> result = wait(timeout(configureImpl(self, tenant, configParams), 30));
if (result.present()) {
TraceEvent(SevDebug, "TenantManagementConcurrencyConfiguredTenant", debugId)
.detail("TenantName", tenant)
.detail("TenantGroup", tenantGroup);
break;
}
}
return Void();
} catch (Error& e) {
if (e.code() != error_code_tenant_not_found && e.code() != error_code_invalid_tenant_state) {
TraceEvent(SevError, "ConfigureTenantFailure").error(e).detail("TenantName", tenant);
TraceEvent(SevDebug, "TenantManagementConcurrencyConfigureTenantError", debugId)
.error(e)
.detail("TenantName", tenant)
.detail("TenantGroup", tenantGroup);
if (e.code() == error_code_cluster_removed) {
ASSERT(self->useMetacluster && !self->createMetacluster);
} else if (e.code() == error_code_cluster_no_capacity ||
e.code() == error_code_invalid_tenant_configuration) {
ASSERT(self->useMetacluster && !self->createMetacluster);
} else if (e.code() != error_code_tenant_not_found && e.code() != error_code_invalid_tenant_state) {
TraceEvent(SevError, "TenantManagementConcurrencyConfigureTenantFailure", debugId)
.error(e)
.detail("TenantName", tenant)
.detail("TenantGroup", tenantGroup);
ASSERT(false);
}
return Void();
}
@ -280,29 +340,43 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
ACTOR static Future<Void> renameTenant(TenantManagementConcurrencyWorkload* self) {
state TenantName oldTenant = self->chooseTenantName();
state TenantName newTenant = self->chooseTenantName();
state UID debugId = deterministicRandom()->randomUniqueID();
try {
loop {
TraceEvent(SevDebug, "TenantManagementConcurrencyRenamingTenant", debugId)
.detail("OldTenantName", oldTenant)
.detail("NewTenantName", newTenant);
Future<Void> renameFuture =
self->useMetacluster ? MetaclusterAPI::renameTenant(self->mvDb, oldTenant, newTenant)
: TenantAPI::renameTenant(self->dataDb.getReference(), oldTenant, newTenant);
Optional<Void> result = wait(timeout(renameFuture, 30));
if (result.present()) {
TraceEvent(SevDebug, "TenantManagementConcurrencyRenamedTenant", debugId)
.detail("OldTenantName", oldTenant)
.detail("NewTenantName", newTenant);
break;
}
}
return Void();
} catch (Error& e) {
if (e.code() == error_code_invalid_tenant_state || e.code() == error_code_tenant_removed ||
e.code() == error_code_cluster_no_capacity) {
TraceEvent(SevDebug, "TenantManagementConcurrencyRenameTenantError", debugId)
.error(e)
.detail("OldTenantName", oldTenant)
.detail("NewTenantName", newTenant);
if (e.code() == error_code_cluster_removed) {
ASSERT(self->useMetacluster && !self->createMetacluster);
} else if (e.code() == error_code_invalid_tenant_state || e.code() == error_code_tenant_removed ||
e.code() == error_code_cluster_no_capacity) {
ASSERT(self->useMetacluster);
} else if (e.code() != error_code_tenant_not_found && e.code() != error_code_tenant_already_exists) {
TraceEvent(SevError, "RenameTenantFailure")
TraceEvent(SevDebug, "TenantManagementConcurrencyRenameTenantFailure", debugId)
.error(e)
.detail("OldTenant", oldTenant)
.detail("NewTenant", newTenant);
.detail("OldTenantName", oldTenant)
.detail("NewTenantName", newTenant);
ASSERT(false);
}
return Void();
}

View File

@ -800,6 +800,7 @@ struct TenantManagementWorkload : TestWorkload {
if (!endTenant.present()) {
tenants[beginTenant] = anyExists ? itr->second.tenant->id() : TenantInfo::INVALID_TENANT;
} else if (endTenant.present()) {
anyExists = false;
for (auto itr = self->createdTenants.lower_bound(beginTenant);
itr != self->createdTenants.end() && itr->first < endTenant.get();
++itr) {
@ -1509,12 +1510,19 @@ struct TenantManagementWorkload : TestWorkload {
state bool specialKeysUseInvalidTuple =
operationType == OperationType::SPECIAL_KEYS && deterministicRandom()->random01() < 0.1;
// True if any selected options would change the tenant's configuration and we would expect an update to be
// written
state bool configurationChanging = false;
// Generate a tenant group. Sometimes do this at the same time that we include an invalid option to ensure
// that the configure function still fails
if (!hasInvalidOption || deterministicRandom()->coinflip()) {
newTenantGroup = self->chooseTenantGroup(true);
hasSystemTenantGroup = hasSystemTenantGroup || newTenantGroup.orDefault(""_sr).startsWith("\xff"_sr);
configuration["tenant_group"_sr] = newTenantGroup;
if (exists && itr->second.tenantGroup != newTenantGroup) {
configurationChanging = true;
}
}
if (hasInvalidOption) {
configuration["invalid_option"_sr] = ""_sr;
@ -1530,7 +1538,9 @@ struct TenantManagementWorkload : TestWorkload {
ASSERT(!hasSystemTenantGroup);
ASSERT(!specialKeysUseInvalidTuple);
Versionstamp currentVersionstamp = wait(getLastTenantModification(self, operationType));
ASSERT_GT(currentVersionstamp.version, originalReadVersion);
if (configurationChanging) {
ASSERT_GT(currentVersionstamp.version, originalReadVersion);
}
auto itr = self->createdTenants.find(tenant);
if (itr->second.tenantGroup.present()) {

View File

@ -131,4 +131,8 @@ EncryptAuthTokenAlgo getRandomAuthTokenAlgo() {
bool isReservedEncryptDomain(EncryptCipherDomainId domainId) {
return domainId == SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID || domainId == ENCRYPT_HEADER_DOMAIN_ID ||
domainId == FDB_DEFAULT_ENCRYPT_DOMAIN_ID;
}
bool isEncryptHeaderDomain(EncryptCipherDomainId domainId) {
return domainId == ENCRYPT_HEADER_DOMAIN_ID;
}

View File

@ -28,6 +28,7 @@
#include <limits>
#include <string>
#include <string_view>
#include <unordered_set>
constexpr const int AUTH_TOKEN_HMAC_SHA_SIZE = 32;
constexpr const int AUTH_TOKEN_AES_CMAC_SIZE = 16;
@ -46,6 +47,17 @@ constexpr const EncryptCipherBaseKeyId INVALID_ENCRYPT_CIPHER_KEY_ID = 0;
constexpr const EncryptCipherRandomSalt INVALID_ENCRYPT_RANDOM_SALT = 0;
static const std::unordered_set<EncryptCipherDomainId> ENCRYPT_CIPHER_SYSTEM_DOMAINS = {
SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID,
ENCRYPT_HEADER_DOMAIN_ID
};
static const std::unordered_set<EncryptCipherDomainId> ENCRYPT_CIPHER_DETAULT_DOMAINS = {
SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID,
ENCRYPT_HEADER_DOMAIN_ID,
FDB_DEFAULT_ENCRYPT_DOMAIN_ID,
};
typedef enum {
ENCRYPT_CIPHER_MODE_NONE = 0,
ENCRYPT_CIPHER_MODE_AES_256_CTR = 1,
@ -109,5 +121,6 @@ std::string getEncryptDbgTraceKeyWithTS(std::string_view prefix,
int getEncryptHeaderAuthTokenSize(int algo);
bool isReservedEncryptDomain(EncryptCipherDomainId domainId);
bool isEncryptHeaderDomain(EncryptCipherDomainId domainId);
#endif

View File

@ -269,6 +269,9 @@ ERROR( metacluster_no_capacity, 2166, "Metacluster does not have capacity to cre
ERROR( management_cluster_invalid_access, 2167, "Standard transactions cannot be run against the management cluster" )
ERROR( tenant_creation_permanently_failed, 2168, "The tenant creation did not complete in a timely manner and has permanently failed" )
ERROR( cluster_removed, 2169, "The cluster is being removed from the metacluster" )
ERROR( cluster_restoring, 2170, "The cluster is being restored to the metacluster" )
ERROR( invalid_data_cluster, 2171, "The data cluster being restored has no record of its metacluster" )
ERROR( metacluster_mismatch, 2172, "The cluster does not have the expected name or is associated with a different metacluster" )
// 2200 - errors from bindings and official APIs
ERROR( api_version_unset, 2200, "API version is not set" )

View File

@ -303,6 +303,37 @@ inline void load(Archive& ar, boost::container::flat_map<K, V>& value) {
ASSERT(ar.protocolVersion().isValid());
}
template <class Archive, class... Variants>
inline void save(Archive& ar, const std::variant<Variants...> value) {
ar << (uint8_t)value.index();
std::visit([&](auto& inner) { ar << inner; }, value);
ASSERT(ar.protocolVersion().isValid());
}
namespace {
template <class Archive, class Value, class Variant, class... Variants>
inline void loadVariant(Archive& ar, uint8_t index, Value& value) {
if (index == 0) {
Variant v;
ar >> v;
value = v;
} else if constexpr (sizeof...(Variants) > 0) {
loadVariant<Archive, Value, Variants...>(ar, index - 1, value);
} else {
ASSERT(false);
}
}
} // anonymous namespace
template <class Archive, class... Variants>
inline void load(Archive& ar, std::variant<Variants...>& value) {
uint8_t index;
ar >> index;
ASSERT(index < sizeof...(Variants));
loadVariant<Archive, std::variant<Variants...>, Variants...>(ar, index, value);
ASSERT(ar.protocolVersion().isValid());
}
#ifdef _MSC_VER
#pragma intrinsic(memcpy)
#endif

View File

@ -18,6 +18,7 @@ if(WITH_PYTHON)
find_program(OLD_FDBSERVER_BINARY
fdbserver fdbserver.exe
HINTS /opt/foundationdb/old /usr/sbin /usr/libexec /usr/local/sbin /usr/local/libexec)
if(OLD_FDBSERVER_BINARY)
message(STATUS "Use old fdb at ${OLD_FDBSERVER_BINARY}")
else()
@ -28,6 +29,7 @@ if(WITH_PYTHON)
set(fdbserver_location ${CMAKE_BINARY_DIR}/bin/fdbserver)
set(OLD_FDBSERVER_BINARY ${fdbserver_location} CACHE FILEPATH "Old fdbserver binary" FORCE)
endif()
message(WARNING "\
No old fdbserver binary found - using ${fdbserver_location} \
It is recommended to install the current stable version from https://www.foundationdb.org/download/ \
@ -93,11 +95,13 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES RocksDBTest.txt IGNORE)
add_fdb_test(TEST_FILES S3BlobStore.txt IGNORE)
add_fdb_test(TEST_FILES SampleNoSimAttrition.txt IGNORE)
if(NOT USE_UBSAN) # TODO re-enable in UBSAN after https://github.com/apple/foundationdb/issues/2410 is resolved
add_fdb_test(TEST_FILES SimpleExternalTest.txt)
else()
add_fdb_test(TEST_FILES SimpleExternalTest.txt IGNORE)
endif()
add_fdb_test(TEST_FILES noSim/SlowTask.txt IGNORE)
add_fdb_test(TEST_FILES SpecificUnitTest.txt IGNORE)
add_fdb_test(TEST_FILES StorageMetricsSampleTests.txt IGNORE)
@ -142,6 +146,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/BlobGranuleVerifyCycle.toml)
add_fdb_test(TEST_FILES fast/BlobGranuleVerifySmall.toml)
add_fdb_test(TEST_FILES fast/BlobGranuleVerifySmallClean.toml)
# TODO: test occasionally times out due to too many change feed shard parts
add_fdb_test(TEST_FILES fast/BlobGranuleMoveVerifyCycle.toml IGNORE)
add_fdb_test(TEST_FILES fast/BlobRestoreBasic.toml)
@ -174,6 +179,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/LocalRatekeeper.toml)
add_fdb_test(TEST_FILES fast/LongStackWriteDuringRead.toml)
add_fdb_test(TEST_FILES fast/LowLatency.toml)
# TODO: Fix failures and reenable this test:
add_fdb_test(TEST_FILES fast/LowLatencySingleClog.toml IGNORE)
add_fdb_test(TEST_FILES fast/MemoryLifetime.toml)
@ -217,6 +223,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/WriteDuringRead.toml)
add_fdb_test(TEST_FILES fast/WriteDuringReadClean.toml)
add_fdb_test(TEST_FILES noSim/RandomUnitTests.toml UNIT)
if(WITH_ROCKSDB_EXPERIMENTAL)
add_fdb_test(TEST_FILES fast/ValidateStorage.toml)
add_fdb_test(TEST_FILES noSim/KeyValueStoreRocksDBTest.toml UNIT)
@ -230,6 +237,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/PhysicalShardMove.toml IGNORE)
add_fdb_test(TEST_FILES fast/StorageServerCheckpointRestore.toml IGNORE)
endif()
add_fdb_test(TEST_FILES rare/BlobGranuleRanges.toml)
add_fdb_test(TEST_FILES rare/CheckRelocation.toml)
add_fdb_test(TEST_FILES rare/ClogUnclog.toml)
@ -334,7 +342,7 @@ if(WITH_PYTHON)
restarting/from_7.2.0_until_7.3.0/VersionVectorEnableRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.2.0/DrUpgradeRestart-1.toml
restarting/from_7.2.0/DrUpgradeRestart-2.toml)
restarting/from_7.2.0/DrUpgradeRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.2.4_until_7.3.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.2.4_until_7.3.0/UpgradeAndBackupRestore-2.toml)
@ -378,7 +386,6 @@ if(WITH_PYTHON)
TEST_FILES restarting/to_7.3.0/CycleTestRestart-1.toml
restarting/to_7.3.0/CycleTestRestart-2.toml)
add_fdb_test(TEST_FILES slow/ApiCorrectness.toml)
add_fdb_test(TEST_FILES slow/ApiCorrectnessAtomicRestore.toml)
add_fdb_test(TEST_FILES slow/ApiCorrectnessSwitchover.toml)
@ -403,6 +410,8 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES slow/LongRunning.toml LONG_RUNNING)
add_fdb_test(TEST_FILES slow/LowLatencyWithFailures.toml)
add_fdb_test(TEST_FILES slow/MetaclusterManagement.toml)
add_fdb_test(TEST_FILES slow/MetaclusterManagementConcurrency.toml)
add_fdb_test(TEST_FILES slow/MetaclusterRecovery.toml)
add_fdb_test(TEST_FILES slow/MoveKeysClean.toml)
add_fdb_test(TEST_FILES slow/MoveKeysSideband.toml)
add_fdb_test(TEST_FILES slow/RyowCorrectness.toml)
@ -435,6 +444,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES slow/ParallelRestoreOldBackupCorrectnessMultiCycles.toml)
add_fdb_test(TEST_FILES slow/ParallelRestoreOldBackupWriteDuringReadAtomicRestore.toml)
add_fdb_test(TEST_FILES ParallelRestoreOldBackupApiCorrectnessAtomicRestore.toml IGNORE)
# Note that status tests are not deterministic.
add_fdb_test(TEST_FILES status/invalid_proc_addresses.txt)
add_fdb_test(TEST_FILES status/local_6_machine_no_replicas_remain.txt)
@ -471,11 +481,14 @@ if(WITH_PYTHON)
endif()
verify_testing()
if(NOT OPEN_FOR_IDE AND NOT WIN32)
create_correctness_package()
if(USE_VALGRIND)
create_valgrind_correctness_package()
endif()
if(ENABLE_LONG_RUNNING_TESTS)
create_long_running_correctness_package()
endif()
@ -488,6 +501,7 @@ if(WITH_PYTHON)
else()
set(ld_env_name "LD_LIBRARY_PATH")
endif()
set(authz_venv_dir ${CMAKE_CURRENT_BINARY_DIR}/authorization_test_venv)
set(authz_venv_activate ". ${authz_venv_dir}/bin/activate")
set(authz_venv_stamp_file ${authz_venv_dir}/venv.ready)
@ -509,34 +523,39 @@ if(WITH_PYTHON)
set(authz_script_dir ${CMAKE_SOURCE_DIR}/tests/authorization)
set(enable_grv_cache 0 1)
set(force_mvc 0 1)
foreach(is_grv_cache_enabled IN LISTS enable_grv_cache)
foreach(is_mvc_forced IN LISTS force_mvc)
if(NOT is_mvc_forced AND is_grv_cache_enabled)
continue() # grv cache requires setting up of shared database state which is only available in MVC mode
endif()
set(authz_test_name "authz")
set(test_opt "")
if(is_grv_cache_enabled)
string(APPEND test_opt " --use-grv-cache")
string(APPEND authz_test_name "_with_grv_cache")
else()
string(APPEND authz_test_name "_no_grv_cache")
endif()
if(is_mvc_forced)
string(APPEND test_opt " --force-multi-version-client")
string(APPEND authz_test_name "_with_forced_mvc")
else()
string(APPEND authz_test_name "_no_forced_mvc")
endif()
set(authz_test_cmd "${authz_venv_activate} && pytest ${authz_script_dir}/authz_test.py -rA --build-dir ${CMAKE_BINARY_DIR} -vvv${test_opt}")
add_test(
NAME ${authz_test_name}
WORKING_DIRECTORY ${authz_venv_dir}
COMMAND bash -c ${authz_test_cmd})
set_tests_properties(${authz_test_name} PROPERTIES ENVIRONMENT "PYTHONPATH=${CMAKE_SOURCE_DIR}/tests/TestRunner;${ld_env_name}=${CMAKE_BINARY_DIR}/lib")
set_tests_properties(${authz_test_name} PROPERTIES FIXTURES_REQUIRED authz_virtual_env)
set_tests_properties(${authz_test_name} PROPERTIES TIMEOUT 120)
endforeach()
foreach(is_mvc_forced IN LISTS force_mvc)
if(NOT is_mvc_forced AND is_grv_cache_enabled)
continue() # grv cache requires setting up of shared database state which is only available in MVC mode
endif()
set(authz_test_name "authz")
set(test_opt "")
if(is_grv_cache_enabled)
string(APPEND test_opt " --use-grv-cache")
string(APPEND authz_test_name "_with_grv_cache")
else()
string(APPEND authz_test_name "_no_grv_cache")
endif()
if(is_mvc_forced)
string(APPEND test_opt " --force-multi-version-client")
string(APPEND authz_test_name "_with_forced_mvc")
else()
string(APPEND authz_test_name "_no_forced_mvc")
endif()
set(authz_test_cmd "${authz_venv_activate} && pytest ${authz_script_dir}/authz_test.py -rA --build-dir ${CMAKE_BINARY_DIR} -vvv${test_opt}")
add_test(
NAME ${authz_test_name}
WORKING_DIRECTORY ${authz_venv_dir}
COMMAND bash -c ${authz_test_cmd})
set_tests_properties(${authz_test_name} PROPERTIES ENVIRONMENT "PYTHONPATH=${CMAKE_SOURCE_DIR}/tests/TestRunner;${ld_env_name}=${CMAKE_BINARY_DIR}/lib")
set_tests_properties(${authz_test_name} PROPERTIES FIXTURES_REQUIRED authz_virtual_env)
set_tests_properties(${authz_test_name} PROPERTIES TIMEOUT 120)
endforeach()
endforeach()
endif()
else()

View File

@ -2,8 +2,6 @@
testClass = "BlobGranule"
blobGranulesEnabled = true
allowDefaultTenant = false
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4]
[[test]]
testTitle = 'BlobGranuleMoveVerifyCycle'

View File

@ -4,8 +4,6 @@ blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleVerifyAtomicOps'

View File

@ -4,8 +4,6 @@ blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleVerifyCycle'

View File

@ -4,8 +4,6 @@ blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleVerifySmall'

View File

@ -1,8 +1,6 @@
[configuration]
blobGranulesEnabled = true
allowDefaultTenant = false
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
testClass = "BlobGranule"
[[test]]

View File

@ -3,8 +3,6 @@ blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleRanges'

View File

@ -3,7 +3,7 @@ extraMachineCountDC = 2
maxTLogVersion=6
disableHostname=true
disableEncryption=true
storageEngineExcludeTypes=[3, 4]
storageEngineExcludeTypes=[3, 4, 5]
tenantModes=['disabled']
[[knobs]]

View File

@ -1,5 +1,5 @@
[configuration]
storageEngineExcludeTypes = [3]
storageEngineExcludeTypes = [3, 5]
maxTLogVersion = 6
disableTss = true
disableHostname = true

View File

@ -6,10 +6,6 @@ disableEncryption=true
storageEngineExcludeTypes=[4]
tenantModes=['disabled']
[[knobs]]
# This can be removed once the lower bound of this downgrade test is a version that understands the new protocol
shard_encode_location_metadata = false
[[test]]
testTitle = 'CloggedConfigureDatabaseTest'
clearAfterTest = false

View File

@ -5,10 +5,6 @@ disableHostname = true
disableEncryption = true
tenantModes=['disabled']
[[knobs]]
# This can be removed once the lower bound of this downgrade test is a version that understands the new protocol
shard_encode_location_metadata = false
[[test]]
testTitle = 'Clogged'
clearAfterTest = false

View File

@ -5,10 +5,6 @@ disableHostname=true
storageEngineExcludeTypes=[4]
tenantModes=['disabled']
[[knobs]]
# This can be removed once the lower bound of this downgrade test is a version that understands the new protocol
shard_encode_location_metadata = false
[[test]]
testTitle = 'CloggedConfigureDatabaseTest'
clearAfterTest = false

View File

@ -3,10 +3,6 @@ maxTLogVersion = 6
disableTss = true
disableHostname = true
[[knobs]]
# This can be removed once the lower bound of this downgrade test is a version that understands the new protocol
shard_encode_location_metadata = false
[[test]]
testTitle = 'Clogged'
clearAfterTest = false

View File

@ -4,8 +4,6 @@ allowDefaultTenant = false
tenantModes = ['optional', 'required']
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
encryptModes = ['domain_aware', 'cluster_aware']
[[knobs]]

View File

@ -2,8 +2,6 @@
blobGranulesEnabled = true
allowDefaultTenant = false
tenantModes = ['optional', 'required']
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
encryptModes = ['domain_aware', 'cluster_aware']
[[knobs]]

View File

@ -3,8 +3,6 @@ blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleVerifyBalance'

View File

@ -1,8 +1,6 @@
[configuration]
blobGranulesEnabled = true
allowDefaultTenant = false
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleVerifyBalanceClean'

View File

@ -3,8 +3,6 @@ blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleVerifyLarge'

View File

@ -1,8 +1,6 @@
[configuration]
blobGranulesEnabled = true
allowDefaultTenant = false
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleVerifyLargeClean'

View File

@ -0,0 +1,22 @@
[configuration]
allowDefaultTenant = false
tenantModes = ['optional', 'required']
allowCreatingTenants = false
extraDatabaseMode = 'Multiple'
extraDatabaseCount = 5
[[test]]
testTitle = 'MetaclusterManagementConcurrencyTest'
clearAfterTest = true
timeout = 2100
runSetup = true
[[test.workload]]
testName = 'MetaclusterManagementConcurrency'
testDuration = 30
[[test.workload]]
testName = 'TenantManagementConcurrency'
useMetacluster = true
createMetacluster = false
testDuration = 30

View File

@ -0,0 +1,19 @@
[configuration]
allowDefaultTenant = false
allowCreatingTenants = false
extraDatabaseMode = 'Multiple'
extraDatabaseCount = 5
extraDatabaseBackupAgents = true
tenantModes = ['optional', 'required']
[[test]]
testTitle = 'MetaclusterRestoreTest'
clearAfterTest = true
timeout = 2100
runSetup = true
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'MetaclusterRestore'
maxTenants = 1000
maxTenantGroups = 20