merge upstream/main

This commit is contained in:
Xiaoxi Wang 2022-05-13 12:22:09 -07:00
commit e59bdbf3a3
101 changed files with 4901 additions and 1769 deletions

View File

@ -306,7 +306,7 @@ endif()
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
--build-dir ${CMAKE_BINARY_DIR}
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
--upgrade-path "6.3.23" "7.0.0" "7.2.0"
--upgrade-path "6.3.23" "7.0.0" "7.1.3" "7.2.0"
--process-number 1
)
@ -314,7 +314,7 @@ endif()
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
--build-dir ${CMAKE_BINARY_DIR}
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
--upgrade-path "7.0.0" "7.2.0"
--upgrade-path "7.0.0" "7.1.3" "7.2.0"
--process-number 1
)
@ -322,7 +322,7 @@ endif()
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
--build-dir ${CMAKE_BINARY_DIR}
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
--upgrade-path "6.3.23" "7.0.0" "7.2.0"
--upgrade-path "6.3.23" "7.0.0" "7.1.3" "7.2.0"
--process-number 3
)
@ -330,9 +330,38 @@ endif()
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
--build-dir ${CMAKE_BINARY_DIR}
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
--upgrade-path "7.0.0" "7.2.0"
--upgrade-path "7.0.0" "7.1.3" "7.2.0"
--process-number 3
)
add_test(NAME fdb_c_upgrade_multi_threaded_710api
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
--build-dir ${CMAKE_BINARY_DIR}
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
--upgrade-path "7.1.3" "7.2.0" "7.1.3"
--process-number 3
)
add_test(NAME fdb_c_cluster_wiggle
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
--build-dir ${CMAKE_BINARY_DIR}
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
--upgrade-path "7.2.0" "wiggle"
--disable-log-dump
--process-number 3
--redundancy double
)
add_test(NAME fdb_c_wiggle_and_upgrade
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
--build-dir ${CMAKE_BINARY_DIR}
--test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
--upgrade-path "7.0.0" "wiggle" "7.2.0"
--disable-log-dump
--process-number 3
--redundancy double
)
endif()
endif()

View File

@ -655,6 +655,7 @@ extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_mapped_range(FDBTransaction*
int target_bytes,
FDBStreamingMode mode,
int iteration,
int matchIndex,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse);
@ -667,6 +668,7 @@ extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_mapped_range(FDBTransaction*
KeySelectorRef(KeyRef(end_key_name, end_key_name_length), end_or_equal, end_offset),
StringRef(mapper_name, mapper_name_length),
GetRangeLimits(limit, target_bytes),
matchIndex,
snapshot,
reverse)
.extractPtr());

View File

@ -384,6 +384,7 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_mapped_range(FDBTran
int target_bytes,
FDBStreamingMode mode,
int iteration,
int matchIndex,
fdb_bool_t snapshot,
fdb_bool_t reverse);

View File

@ -33,8 +33,8 @@
namespace FdbApiTester {
constexpr int LONG_WAIT_TIME_US = 1000000;
constexpr int LARGE_NUMBER_OF_RETRIES = 5;
constexpr int LONG_WAIT_TIME_US = 2000000;
constexpr int LARGE_NUMBER_OF_RETRIES = 10;
void TransactionActorBase::complete(fdb_error_t err) {
error = err;

View File

@ -271,6 +271,7 @@ MappedKeyValueArrayFuture Transaction::get_mapped_range(const uint8_t* begin_key
int target_bytes,
FDBStreamingMode mode,
int iteration,
int matchIndex,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
return MappedKeyValueArrayFuture(fdb_transaction_get_mapped_range(tr_,
@ -288,6 +289,7 @@ MappedKeyValueArrayFuture Transaction::get_mapped_range(const uint8_t* begin_key
target_bytes,
mode,
iteration,
matchIndex,
snapshot,
reverse));
}

View File

@ -304,6 +304,7 @@ public:
int target_bytes,
FDBStreamingMode mode,
int iteration,
int matchIndex,
fdb_bool_t snapshot,
fdb_bool_t reverse);

View File

@ -261,6 +261,7 @@ GetMappedRangeResult get_mapped_range(fdb::Transaction& tr,
int target_bytes,
FDBStreamingMode mode,
int iteration,
int matchIndex,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
fdb::MappedKeyValueArrayFuture f1 = tr.get_mapped_range(begin_key_name,
@ -277,6 +278,7 @@ GetMappedRangeResult get_mapped_range(fdb::Transaction& tr,
target_bytes,
mode,
iteration,
matchIndex,
snapshot,
reverse);
@ -951,7 +953,11 @@ std::map<std::string, std::string> fillInRecords(int n) {
return data;
}
GetMappedRangeResult getMappedIndexEntries(int beginId, int endId, fdb::Transaction& tr, std::string mapper) {
GetMappedRangeResult getMappedIndexEntries(int beginId,
int endId,
fdb::Transaction& tr,
std::string mapper,
int matchIndex = MATCH_INDEX_ALL) {
std::string indexEntryKeyBegin = indexEntryKey(beginId);
std::string indexEntryKeyEnd = indexEntryKey(endId);
@ -965,13 +971,17 @@ GetMappedRangeResult getMappedIndexEntries(int beginId, int endId, fdb::Transact
/* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL,
/* iteration */ 0,
/* matchIndex */ matchIndex,
/* snapshot */ false,
/* reverse */ 0);
}
GetMappedRangeResult getMappedIndexEntries(int beginId, int endId, fdb::Transaction& tr) {
GetMappedRangeResult getMappedIndexEntries(int beginId,
int endId,
fdb::Transaction& tr,
int matchIndex = MATCH_INDEX_ALL) {
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).append("{...}"_sr).pack().toString();
return getMappedIndexEntries(beginId, endId, tr, mapper);
return getMappedIndexEntries(beginId, endId, tr, mapper, matchIndex);
}
TEST_CASE("fdb_transaction_get_mapped_range") {
@ -983,7 +993,8 @@ TEST_CASE("fdb_transaction_get_mapped_range") {
while (1) {
int beginId = 1;
int endId = 19;
auto result = getMappedIndexEntries(beginId, endId, tr);
const int matchIndex = deterministicRandom()->random01() > 0.5 ? MATCH_INDEX_NONE : MATCH_INDEX_ALL;
auto result = getMappedIndexEntries(beginId, endId, tr, matchIndex);
if (result.err) {
fdb::EmptyFuture f1 = tr.on_error(result.err);
@ -998,7 +1009,11 @@ TEST_CASE("fdb_transaction_get_mapped_range") {
int id = beginId;
for (int i = 0; i < expectSize; i++, id++) {
const auto& [key, value, begin, end, range_results] = result.mkvs[i];
CHECK(indexEntryKey(id).compare(key) == 0);
if (matchIndex == MATCH_INDEX_ALL || i == 0 || i == expectSize - 1) {
CHECK(indexEntryKey(id).compare(key) == 0);
} else {
CHECK(EMPTY.compare(key) == 0);
}
CHECK(EMPTY.compare(value) == 0);
CHECK(range_results.size() == SPLIT_SIZE);
for (int split = 0; split < SPLIT_SIZE; split++) {
@ -1024,6 +1039,7 @@ TEST_CASE("fdb_transaction_get_mapped_range_restricted_to_serializable") {
/* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL,
/* iteration */ 0,
/* matchIndex */ MATCH_INDEX_ALL,
/* snapshot */ true, // Set snapshot to true
/* reverse */ 0);
ASSERT(result.err == error_code_unsupported_operation);
@ -1043,6 +1059,7 @@ TEST_CASE("fdb_transaction_get_mapped_range_restricted_to_ryw_enable") {
/* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL,
/* iteration */ 0,
/* matchIndex */ MATCH_INDEX_ALL,
/* snapshot */ false,
/* reverse */ 0);
ASSERT(result.err == error_code_unsupported_operation);

View File

@ -960,6 +960,7 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1
jint targetBytes,
jint streamingMode,
jint iteration,
jint matchIndex,
jboolean snapshot,
jboolean reverse) {
if (!tPtr || !keyBeginBytes || !keyEndBytes || !mapperBytes) {
@ -1007,6 +1008,7 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1
targetBytes,
(FDBStreamingMode)streamingMode,
iteration,
matchIndex,
snapshot,
reverse);
jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT);

View File

@ -192,12 +192,12 @@ class MappedRangeQueryIntegrationTest {
RangeQueryWithIndex mappedRangeQuery = (int begin, int end, Database db) -> db.run(tr -> {
try {
List<MappedKeyValue> kvs =
tr.getMappedRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL)
.asList()
.get();
List<MappedKeyValue> kvs = tr.getMappedRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED,
FDBTransaction.MATCH_INDEX_ALL, false, StreamingMode.WANT_ALL)
.asList()
.get();
Assertions.assertEquals(end - begin, kvs.size());
if (validate) {

View File

@ -32,6 +32,10 @@ import com.apple.foundationdb.async.AsyncUtil;
import com.apple.foundationdb.tuple.ByteArrayUtil;
class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionConsumer {
static public final int MATCH_INDEX_ALL = 0;
static public final int MATCH_INDEX_NONE = 1;
private final Database database;
private final Executor executor;
private final TransactionOptions options;
@ -93,7 +97,8 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
@Override
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper,
int limit, boolean reverse, StreamingMode mode) {
int limit, int matchIndex, boolean reverse,
StreamingMode mode) {
throw new UnsupportedOperationException("getMappedRange is only supported in serializable");
}
@ -346,8 +351,8 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
}
@Override
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper,
int limit, boolean reverse, StreamingMode mode) {
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper, int limit,
int matchIndex, boolean reverse, StreamingMode mode) {
if (mapper == null) {
throw new IllegalArgumentException("Mapper must be non-null");
}
@ -467,9 +472,9 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode,
iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/
return new FutureMappedResults(
Transaction_getMappedRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(),
end.getKey(), end.orEqual(), end.getOffset(), mapper, rowLimit,
targetBytes, streamingMode, iteration, isSnapshot, reverse),
Transaction_getMappedRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), end.getKey(),
end.orEqual(), end.getOffset(), mapper, rowLimit, targetBytes, streamingMode,
iteration, MATCH_INDEX_ALL, isSnapshot, reverse),
FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper);
} finally {
pointerReadLock.unlock();
@ -809,12 +814,11 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
byte[] keyEnd, boolean orEqualEnd, int offsetEnd,
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse);
private native long Transaction_getMappedRange(long cPtr, byte[] keyBegin, boolean orEqualBegin,
int offsetBegin, byte[] keyEnd, boolean orEqualEnd,
int offsetEnd,
byte[] mapper, // Nonnull
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse);
private native long Transaction_getMappedRange(long cPtr, byte[] keyBegin, boolean orEqualBegin, int offsetBegin,
byte[] keyEnd, boolean orEqualEnd, int offsetEnd,
byte[] mapper, // Nonnull
int rowLimit, int targetBytes, int streamingMode, int iteration,
int matchIndex, boolean isSnapshot, boolean reverse);
private native void Transaction_addConflictRange(long cPtr,
byte[] keyBegin, byte[] keyEnd, int conflictRangeType);
private native void Transaction_set(long cPtr, byte[] key, byte[] value);

View File

@ -460,7 +460,7 @@ public interface ReadTransaction extends ReadTransactionContext {
* @return a handle to access the results of the asynchronous call
*/
AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode);
int matchIndex, boolean reverse, StreamingMode mode);
/**
* Gets an estimate for the number of bytes stored in the given range.

View File

@ -123,7 +123,7 @@ set(FORCE_BOOST_BUILD OFF CACHE BOOL "Forces cmake to build boost and ignores an
if(Boost_FOUND AND Boost_filesystem_FOUND AND Boost_context_FOUND AND NOT FORCE_BOOST_BUILD)
add_library(boost_target INTERFACE)
target_link_libraries(boost_target INTERFACE Boost::boost Boost::context_FOUND Boost::filesystem)
target_link_libraries(boost_target INTERFACE Boost::boost Boost::context Boost::filesystem)
elseif(WIN32)
message(FATAL_ERROR "Could not find Boost")
else()

View File

@ -5,3 +5,4 @@ if(NOT WIN32)
add_subdirectory(TraceLogHelper)
add_subdirectory(TestHarness)
endif()
add_subdirectory(mockkms)

View File

@ -376,11 +376,13 @@ namespace SummarizeTest
bool useNewPlugin = (oldServerName == fdbserverName) || versionGreaterThanOrEqual(oldServerName.Split('-').Last(), "5.2.0");
bool useToml = File.Exists(testFile + "-1.toml");
string testFile1 = useToml ? testFile + "-1.toml" : testFile + "-1.txt";
result = RunTest(firstServerName, useNewPlugin ? tlsPluginFile : tlsPluginFile_5_1, summaryFileName, errorFileName, seed, buggify, testFile1, runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrind, false, true, oldServerName, traceToStdout, noSim, faultInjectionEnabled);
bool useValgrindRunOne = useValgrind && firstServerName == fdbserverName;
bool useValgrindRunTwo = useValgrind && secondServerName == fdbserverName;
result = RunTest(firstServerName, useNewPlugin ? tlsPluginFile : tlsPluginFile_5_1, summaryFileName, errorFileName, seed, buggify, testFile1, runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrindRunOne, false, true, oldServerName, traceToStdout, noSim, faultInjectionEnabled);
if (result == 0)
{
string testFile2 = useToml ? testFile + "-2.toml" : testFile + "-2.txt";
result = RunTest(secondServerName, tlsPluginFile, summaryFileName, errorFileName, seed+1, buggify, testFile2, runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrind, true, false, oldServerName, traceToStdout, noSim, faultInjectionEnabled);
result = RunTest(secondServerName, tlsPluginFile, summaryFileName, errorFileName, seed+1, buggify, testFile2, runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrindRunTwo, true, false, oldServerName, traceToStdout, noSim, faultInjectionEnabled);
}
}
else
@ -458,7 +460,7 @@ namespace SummarizeTest
role, IsRunningOnMono() ? "" : "-q", seed, testFile, buggify ? "on" : "off", faultInjectionArg, tlsPluginArg);
}
if (restarting) args = args + " --restarting";
if (useValgrind && !willRestart)
if (useValgrind)
{
valgrindOutputFile = string.Format("valgrind-{0}.xml", seed);
process.StartInfo.FileName = "valgrind";

View File

@ -0,0 +1,18 @@
if(WITH_GO_BINDING)
set(MOCK_KMS_SRC fault_injection.go get_encryption_keys.go mock_kms.go utils.go)
set(MOCK_KMS_TEST_SRC ${MOCK_KMS_SRC} mockkms_test.go)
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/mockkms
COMMAND go build -o ${CMAKE_BINARY_DIR}/bin/mockkms ${MOCK_KMS_SRC}
DEPENDS ${MOCK_KMS_SRC}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(mockkms ALL DEPENDS ${CMAKE_BINARY_DIR}/bin/mockkms)
fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/bin/mockkms DESTINATION bin COMPONENT server)
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/mockkms_test
COMMAND go test -c -o ${CMAKE_BINARY_DIR}/bin/mockkms_test ${MOCK_KMS_TEST_SRC}
DEPENDS ${MOCK_KMS_TEST_SRC}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(mockkms_test ALL DEPENDS ${CMAKE_BINARY_DIR}/bin/mockkms_test)
add_test(NAME mockkms COMMAND ${CMAKE_BINARY_DIR}/bin/mockkms_test)
endif()

View File

@ -0,0 +1,179 @@
/*
* fault_injection.go
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Interface supports client to inject fault(s)
// Module enables a client to update { FaultLocation -> FaultStatus } mapping in a
// thread-safe manner, however, client is responsible to synchronize fault status
// updates across 'getEncryptionKeys' REST requests to obtain predictable results.
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"sync"
)
type Fault struct {
Location int `json:"fault_location"`
Enable bool `json:"enable_fault"`
}
type FaultInjectionRequest struct {
Faults []Fault `json:"faults"`
}
type FaultInjectionResponse struct {
Faults []Fault `json:"faults"`
}
type faultLocMap struct {
locMap map[int]bool
rwLock sync.RWMutex
}
var (
faultLocMapInstance *faultLocMap // Singleton mapping of { FaultLocation -> FaultStatus }
)
// Caller is responsible for thread synchronization. Recommended to be invoked during package::init()
func NewFaultLocMap() *faultLocMap {
if faultLocMapInstance == nil {
faultLocMapInstance = &faultLocMap{}
faultLocMapInstance.rwLock = sync.RWMutex{}
faultLocMapInstance.locMap = map[int]bool {
READ_HTTP_REQUEST_BODY : false,
UNMARSHAL_REQUEST_BODY_JSON : false,
UNSUPPORTED_QUERY_MODE : false,
PARSE_HTTP_REQUEST : false,
MARSHAL_RESPONSE : false,
}
}
return faultLocMapInstance
}
func getLocFaultStatus(loc int) (val bool, found bool) {
if faultLocMapInstance == nil {
panic("FaultLocMap not intialized")
os.Exit(1)
}
faultLocMapInstance.rwLock.RLock()
defer faultLocMapInstance.rwLock.RUnlock()
val, found = faultLocMapInstance.locMap[loc]
if !found {
return
}
return
}
func updateLocFaultStatuses(faults []Fault) (updated []Fault, err error) {
if faultLocMapInstance == nil {
panic("FaultLocMap not intialized")
os.Exit(1)
}
updated = []Fault{}
err = nil
faultLocMapInstance.rwLock.Lock()
defer faultLocMapInstance.rwLock.Unlock()
for i := 0; i < len(faults); i++ {
fault := faults[i]
oldVal, found := faultLocMapInstance.locMap[fault.Location]
if !found {
err = fmt.Errorf("Unknown fault_location '%d'", fault.Location)
return
}
faultLocMapInstance.locMap[fault.Location] = fault.Enable
log.Printf("Update Location '%d' oldVal '%t' newVal '%t'", fault.Location, oldVal, fault.Enable)
}
// return the updated faultLocMap
for loc, enable := range faultLocMapInstance.locMap {
var f Fault
f.Location = loc
f.Enable = enable
updated = append(updated, f)
}
return
}
func jsonifyFaultArr(w http.ResponseWriter, faults []Fault) (jResp string) {
resp := FaultInjectionResponse{
Faults: faults,
}
mResp, err := json.Marshal(resp)
if err != nil {
log.Printf("Error marshaling response '%s'", err.Error())
sendErrorResponse(w, err)
return
}
jResp = string(mResp)
return
}
func updateFaultLocMap(w http.ResponseWriter, faults []Fault) {
updated , err := updateLocFaultStatuses(faults)
if err != nil {
sendErrorResponse(w, err)
return
}
fmt.Fprintf(w, jsonifyFaultArr(w, updated))
}
func shouldInjectFault(loc int) bool {
status, found := getLocFaultStatus(loc)
if !found {
log.Printf("Unknown fault_location '%d'", loc)
return false
}
return status
}
func handleUpdateFaultInjection(w http.ResponseWriter, r *http.Request) {
byteArr, err := ioutil.ReadAll(r.Body)
if err != nil {
log.Printf("Http request body read error '%s'", err.Error())
sendErrorResponse(w, err)
return
}
req := FaultInjectionRequest{}
err = json.Unmarshal(byteArr, &req)
if err != nil {
log.Printf("Error parsing FaultInjectionRequest '%s'", string(byteArr))
sendErrorResponse(w, err)
}
updateFaultLocMap(w, req.Faults)
}
func initFaultLocMap() {
faultLocMapInstance = NewFaultLocMap()
log.Printf("FaultLocMap int done")
}

View File

@ -0,0 +1,321 @@
/*
* get_encryption_keys.go
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// GetEncryptionKeys handler
// Handler is resposible for the following:
// 1. Parse the incoming HttpRequest and validate JSON request structural sanity
// 2. Ability to handle getEncryptionKeys by 'KeyId' or 'DomainId' as requested
// 3. Ability to inject faults if requested
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"math/rand"
"net/http"
)
type CipherDetailRes struct {
BaseCipherId uint64 `json:"base_cipher_id"`
EncryptDomainId int64 `json:"encrypt_domain_id"`
BaseCipher string `json:"base_cipher"`
}
type ValidationToken struct {
TokenName string `json:"token_name"`
TokenValue string `json:"token_value"`
}
type CipherDetailReq struct {
BaseCipherId uint64 `json:"base_cipher_id"`
EncryptDomainId int64 `json:"encrypt_domain_id"`
}
type GetEncryptKeysResponse struct {
CipherDetails []CipherDetailRes `json:"cipher_key_details"`
KmsUrls []string `json:"kms_urls"`
}
type GetEncryptKeysRequest struct {
QueryMode string `json:"query_mode"`
CipherDetails []CipherDetailReq `json:"cipher_key_details"`
ValidationTokens []ValidationToken `json:"validation_tokens"`
RefreshKmsUrls bool `json:"refresh_kms_urls"`
}
type cipherMapInstanceSingleton map[uint64][]byte
const (
READ_HTTP_REQUEST_BODY = iota
UNMARSHAL_REQUEST_BODY_JSON
UNSUPPORTED_QUERY_MODE
PARSE_HTTP_REQUEST
MARSHAL_RESPONSE
)
const (
maxCipherKeys = uint64(1024*1024) // Max cipher keys
maxCipherSize = 16 // Max cipher buffer size
)
var (
cipherMapInstance cipherMapInstanceSingleton // Singleton mapping of { baseCipherId -> baseCipher }
)
// const mapping of { Location -> errorString }
func errStrMap() func(int) string {
_errStrMap := map[int]string {
READ_HTTP_REQUEST_BODY : "Http request body read error",
UNMARSHAL_REQUEST_BODY_JSON : "Http request body unmarshal error",
UNSUPPORTED_QUERY_MODE : "Unsupported query_mode",
PARSE_HTTP_REQUEST : "Error parsing GetEncryptionKeys request",
MARSHAL_RESPONSE : "Error marshaling response",
}
return func(key int) string {
return _errStrMap[key]
}
}
// Caller is responsible for thread synchronization. Recommended to be invoked during package::init()
func NewCipherMap(maxKeys uint64, cipherSize int) cipherMapInstanceSingleton {
if cipherMapInstance == nil {
cipherMapInstance = make(map[uint64][]byte)
for i := uint64(1); i<= maxKeys; i++ {
cipher := make([]byte, cipherSize)
rand.Read(cipher)
cipherMapInstance[i] = cipher
}
log.Printf("KMS cipher map populate done, maxCiphers '%d'", maxCipherKeys)
}
return cipherMapInstance
}
func getKmsUrls() (urls []string) {
urlCount := rand.Intn(5) + 1
for i := 1; i <= urlCount; i++ {
url := fmt.Sprintf("https://KMS/%d:%d:%d:%d", i, i, i, i)
urls = append(urls, url)
}
return
}
func isEncryptDomainIdValid(id int64) bool {
if id > 0 || id == -1 || id == -2 {
return true
}
return false
}
func abs(x int64) int64 {
if x < 0 {
return -x
}
return x
}
func getBaseCipherIdFromDomainId(domainId int64) (baseCipherId uint64) {
baseCipherId = uint64(1) + uint64(abs(domainId)) % maxCipherKeys
return
}
func getEncryptionKeysByKeyIds(w http.ResponseWriter, byteArr []byte) {
req := GetEncryptKeysRequest{}
err := json.Unmarshal(byteArr, &req)
if err != nil || shouldInjectFault(PARSE_HTTP_REQUEST) {
var e error
if shouldInjectFault(PARSE_HTTP_REQUEST) {
e = fmt.Errorf("[FAULT] %s %s'", errStrMap()(PARSE_HTTP_REQUEST), string(byteArr))
} else {
e = fmt.Errorf("%s %s' err '%v'", errStrMap()(PARSE_HTTP_REQUEST), string(byteArr), err)
}
log.Println(e.Error())
sendErrorResponse(w, e)
return
}
var details []CipherDetailRes
for i := 0; i < len(req.CipherDetails); i++ {
var baseCipherId = uint64(req.CipherDetails[i].BaseCipherId)
var encryptDomainId = int64(req.CipherDetails[i].EncryptDomainId)
if !isEncryptDomainIdValid(encryptDomainId) {
e := fmt.Errorf("EncryptDomainId not valid '%d'", encryptDomainId)
sendErrorResponse(w, e)
return
}
cipher, found := cipherMapInstance[baseCipherId]
if !found {
e := fmt.Errorf("BaseCipherId not found '%d'", baseCipherId)
sendErrorResponse(w, e)
return
}
var detail = CipherDetailRes {
BaseCipherId: baseCipherId,
EncryptDomainId: encryptDomainId,
BaseCipher: string(cipher),
}
details = append(details, detail)
}
var urls []string
if req.RefreshKmsUrls {
urls = getKmsUrls()
}
resp := GetEncryptKeysResponse{
CipherDetails: details,
KmsUrls: urls,
}
mResp, err := json.Marshal(resp)
if err != nil || shouldInjectFault(MARSHAL_RESPONSE) {
var e error
if shouldInjectFault(MARSHAL_RESPONSE) {
e = fmt.Errorf("[FAULT] %s", errStrMap()(MARSHAL_RESPONSE))
} else {
e = fmt.Errorf("%s err '%v'", errStrMap()(MARSHAL_RESPONSE), err)
}
log.Println(e.Error())
sendErrorResponse(w, e)
return
}
fmt.Fprintf(w, string(mResp))
}
func getEncryptionKeysByDomainIds(w http.ResponseWriter, byteArr []byte) {
req := GetEncryptKeysRequest{}
err := json.Unmarshal(byteArr, &req)
if err != nil || shouldInjectFault(PARSE_HTTP_REQUEST) {
var e error
if shouldInjectFault(PARSE_HTTP_REQUEST) {
e = fmt.Errorf("[FAULT] %s '%s'", errStrMap()(PARSE_HTTP_REQUEST), string(byteArr))
} else {
e = fmt.Errorf("%s '%s' err '%v'", errStrMap()(PARSE_HTTP_REQUEST), string(byteArr), err)
}
log.Println(e.Error())
sendErrorResponse(w, e)
return
}
var details []CipherDetailRes
for i := 0; i < len(req.CipherDetails); i++ {
var encryptDomainId = int64(req.CipherDetails[i].EncryptDomainId)
if !isEncryptDomainIdValid(encryptDomainId) {
e := fmt.Errorf("EncryptDomainId not valid '%d'", encryptDomainId)
sendErrorResponse(w, e)
return
}
var baseCipherId = getBaseCipherIdFromDomainId(encryptDomainId)
cipher, found := cipherMapInstance[baseCipherId]
if !found {
e := fmt.Errorf("BaseCipherId not found '%d'", baseCipherId)
sendErrorResponse(w, e)
return
}
var detail = CipherDetailRes {
BaseCipherId: baseCipherId,
EncryptDomainId: encryptDomainId,
BaseCipher: string(cipher),
}
details = append(details, detail)
}
var urls []string
if req.RefreshKmsUrls {
urls = getKmsUrls()
}
resp := GetEncryptKeysResponse{
CipherDetails: details,
KmsUrls: urls,
}
mResp, err := json.Marshal(resp)
if err != nil || shouldInjectFault(MARSHAL_RESPONSE) {
var e error
if shouldInjectFault(MARSHAL_RESPONSE) {
e = fmt.Errorf("[FAULT] %s", errStrMap()(MARSHAL_RESPONSE))
} else {
e = fmt.Errorf("%s err '%v'", errStrMap()(MARSHAL_RESPONSE), err)
}
log.Println(e.Error())
sendErrorResponse(w, e)
return
}
fmt.Fprintf(w, string(mResp))
}
func handleGetEncryptionKeys(w http.ResponseWriter, r *http.Request) {
byteArr, err := ioutil.ReadAll(r.Body)
if err != nil || shouldInjectFault(READ_HTTP_REQUEST_BODY) {
var e error
if shouldInjectFault(READ_HTTP_REQUEST_BODY) {
e = fmt.Errorf("[FAULT] %s", errStrMap()(READ_HTTP_REQUEST_BODY))
} else {
e = fmt.Errorf("%s err '%v'", errStrMap()(READ_HTTP_REQUEST_BODY), err)
}
log.Println(e.Error())
sendErrorResponse(w, e)
return
}
var arbitrary_json map[string]interface{}
err = json.Unmarshal(byteArr, &arbitrary_json)
if err != nil || shouldInjectFault(UNMARSHAL_REQUEST_BODY_JSON) {
var e error
if shouldInjectFault(UNMARSHAL_REQUEST_BODY_JSON) {
e = fmt.Errorf("[FAULT] %s", errStrMap()(UNMARSHAL_REQUEST_BODY_JSON))
} else {
e = fmt.Errorf("%s err '%v'", errStrMap()(UNMARSHAL_REQUEST_BODY_JSON), err)
}
log.Println(e.Error())
sendErrorResponse(w, e)
return
}
if shouldInjectFault(UNSUPPORTED_QUERY_MODE) {
err = fmt.Errorf("[FAULT] %s '%s'", errStrMap()(UNSUPPORTED_QUERY_MODE), arbitrary_json["query_mode"])
sendErrorResponse(w, err)
return
} else if arbitrary_json["query_mode"] == "lookupByKeyId" {
getEncryptionKeysByKeyIds(w, byteArr)
} else if arbitrary_json["query_mode"] == "lookupByDomainId" {
getEncryptionKeysByDomainIds(w, byteArr)
} else {
err = fmt.Errorf("%s '%s'", errStrMap()(UNSUPPORTED_QUERY_MODE), arbitrary_json["query_mode"])
sendErrorResponse(w, err)
return
}
}
func initEncryptCipherMap() {
cipherMapInstance = NewCipherMap(maxCipherKeys, maxCipherSize)
}

View File

@ -0,0 +1,66 @@
/*
* mock_kms.go
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// FoundationDB Mock KMS (Key Management Solution/Service) interface
// Interface runs an HTTP server handling REST calls simulating FDB communications
// with an external KMS.
package main
import (
"log"
"math/rand"
"net/http"
"sync"
"time"
)
// KMS supported endpoints
const (
getEncryptionKeysEndpoint = "/getEncryptionKeys"
updateFaultInjectionEndpoint = "/updateFaultInjection"
)
// Routine is responsible to instantiate data-structures necessary for MockKMS functioning
func init () {
var wg sync.WaitGroup
wg.Add(2)
go func(){
initEncryptCipherMap()
wg.Done()
}()
go func(){
initFaultLocMap()
wg.Done()
}()
wg.Wait()
rand.Seed(time.Now().UTC().UnixNano())
}
func main() {
http.NewServeMux()
http.HandleFunc(getEncryptionKeysEndpoint, handleGetEncryptionKeys)
http.HandleFunc(updateFaultInjectionEndpoint, handleUpdateFaultInjection)
log.Fatal(http.ListenAndServe(":5001", nil))
}

View File

@ -0,0 +1,302 @@
/*
* mockkms_test.go
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// MockKMS unit tests, the coverage includes:
// 1. Mock HttpRequest creation and HttpResponse writer.
// 2. Construct fake request to validate the following scenarions:
// 2.1. Request with "unsupported query mode"
// 2.2. Get encryption keys by KeyIds; with and without 'RefreshKmsUrls' flag.
// 2.2. Get encryption keys by DomainIds; with and without 'RefreshKmsUrls' flag.
// 2.3. Random fault injection and response validation
package main
import (
"encoding/json"
"io/ioutil"
"math/rand"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
const (
ByKeyIdReqWithRefreshUrls = `{
"query_mode": "lookupByKeyId",
"cipher_key_details": [
{
"base_cipher_id": 77,
"encrypt_domain_id": 76
},
{
"base_cipher_id": 2,
"encrypt_domain_id": -1
}
],
"validation_tokens": [
{
"token_name": "1",
"token_value":"12344"
},
{
"token_name": "2",
"token_value":"12334"
}
],
"refresh_kms_urls": true
}`
ByKeyIdReqWithoutRefreshUrls = `{
"query_mode": "lookupByKeyId",
"cipher_key_details": [
{
"base_cipher_id": 77,
"encrypt_domain_id": 76
},
{
"base_cipher_id": 2,
"encrypt_domain_id": -1
}
],
"validation_tokens": [
{
"token_name": "1",
"token_value":"12344"
},
{
"token_name": "2",
"token_value":"12334"
}
],
"refresh_kms_urls": false
}`
ByDomainIdReqWithRefreshUrls = `{
"query_mode": "lookupByDomainId",
"cipher_key_details": [
{
"encrypt_domain_id": 76
},
{
"encrypt_domain_id": -1
}
],
"validation_tokens": [
{
"token_name": "1",
"token_value":"12344"
},
{
"token_name": "2",
"token_value":"12334"
}
],
"refresh_kms_urls": true
}`
ByDomainIdReqWithoutRefreshUrls = `{
"query_mode": "lookupByDomainId",
"cipher_key_details": [
{
"encrypt_domain_id": 76
},
{
"encrypt_domain_id": -1
}
],
"validation_tokens": [
{
"token_name": "1",
"token_value":"12344"
},
{
"token_name": "2",
"token_value":"12334"
}
],
"refresh_kms_urls": false
}`
UnsupportedQueryMode= `{
"query_mode": "foo_mode",
"cipher_key_details": [
{
"encrypt_domain_id": 76
},
{
"encrypt_domain_id": -1
}
],
"validation_tokens": [
{
"token_name": "1",
"token_value":"12344"
},
{
"token_name": "2",
"token_value":"12334"
}
],
"refresh_kms_urls": false
}`
)
func unmarshalValidResponse(data []byte, t *testing.T) (resp GetEncryptKeysResponse) {
resp = GetEncryptKeysResponse{}
err := json.Unmarshal(data, &resp)
if err != nil {
t.Errorf("Error unmarshaling valid response '%s' error '%v'", string(data), err)
t.Fail()
}
return
}
func unmarshalErrorResponse(data []byte, t *testing.T) (resp ErrorResponse) {
resp = ErrorResponse{}
err := json.Unmarshal(data, &resp)
if err != nil {
t.Errorf("Error unmarshaling error response resp '%s' error '%v'", string(data), err)
t.Fail()
}
return
}
func checkGetEncyptKeysResponseValidity(resp GetEncryptKeysResponse, t *testing.T) {
if len(resp.CipherDetails) != 2 {
t.Errorf("Unexpected CipherDetails count, expected '%d' actual '%d'", 2, len(resp.CipherDetails))
t.Fail()
}
baseCipherIds := [...]uint64 {uint64(77), uint64(2)}
encryptDomainIds := [...]int64 {int64(76), int64(-1)}
for i := 0; i < len(resp.CipherDetails); i++ {
if resp.CipherDetails[i].BaseCipherId != baseCipherIds[i] {
t.Errorf("Mismatch BaseCipherId, expected '%d' actual '%d'", baseCipherIds[i], resp.CipherDetails[i].BaseCipherId)
t.Fail()
}
if resp.CipherDetails[i].EncryptDomainId != encryptDomainIds[i] {
t.Errorf("Mismatch EncryptDomainId, expected '%d' actual '%d'", encryptDomainIds[i], resp.CipherDetails[i].EncryptDomainId)
t.Fail()
}
if len(resp.CipherDetails[i].BaseCipher) == 0 {
t.Error("Empty BaseCipher")
t.Fail()
}
}
}
func runQueryExpectingErrorResponse(payload string, url string, errSubStr string, t *testing.T) {
body := strings.NewReader(payload)
req := httptest.NewRequest(http.MethodPost, url, body)
w := httptest.NewRecorder()
handleGetEncryptionKeys(w, req)
res := w.Result()
defer res.Body.Close()
data, err := ioutil.ReadAll(res.Body)
if err != nil {
t.Errorf("Error %v", err)
}
resp := unmarshalErrorResponse(data, t)
if !strings.Contains(resp.Err.Detail, errSubStr) {
t.Errorf("Unexpected error response '%s'", resp.Err.Detail)
t.Fail()
}
}
func runQueryExpectingValidResponse(payload string, url string, t *testing.T) {
body := strings.NewReader(payload)
req := httptest.NewRequest(http.MethodPost, url, body)
w := httptest.NewRecorder()
handleGetEncryptionKeys(w, req)
res := w.Result()
defer res.Body.Close()
data, err := ioutil.ReadAll(res.Body)
if err != nil {
t.Errorf("Error %v", err)
}
resp := unmarshalValidResponse(data, t)
checkGetEncyptKeysResponseValidity(resp, t)
}
func TestUnsupportedQueryMode(t *testing.T) {
runQueryExpectingErrorResponse(UnsupportedQueryMode, getEncryptionKeysEndpoint, errStrMap()(UNSUPPORTED_QUERY_MODE), t)
}
func TestGetEncryptionKeysByKeyIdsWithRefreshUrls(t *testing.T) {
runQueryExpectingValidResponse(ByKeyIdReqWithRefreshUrls, getEncryptionKeysEndpoint, t)
}
func TestGetEncryptionKeysByKeyIdsWithoutRefreshUrls(t *testing.T) {
runQueryExpectingValidResponse(ByKeyIdReqWithoutRefreshUrls, getEncryptionKeysEndpoint, t)
}
func TestGetEncryptionKeysByDomainIdsWithRefreshUrls(t *testing.T) {
runQueryExpectingValidResponse(ByDomainIdReqWithRefreshUrls, getEncryptionKeysEndpoint, t)
}
func TestGetEncryptionKeysByDomainIdsWithoutRefreshUrls(t *testing.T) {
runQueryExpectingValidResponse(ByDomainIdReqWithoutRefreshUrls, getEncryptionKeysEndpoint, t)
}
func TestFaultInjection(t *testing.T) {
numIterations := rand.Intn(701) + 86
for i := 0; i < numIterations; i++ {
loc := rand.Intn(MARSHAL_RESPONSE + 1)
f := Fault{}
f.Location = loc
f.Enable = true
var faults []Fault
faults = append(faults, f)
fW := httptest.NewRecorder()
body := strings.NewReader(jsonifyFaultArr(fW, faults))
fReq := httptest.NewRequest(http.MethodPost, updateFaultInjectionEndpoint, body)
handleUpdateFaultInjection(fW, fReq)
if !shouldInjectFault(loc) {
t.Errorf("Expected fault enabled for loc '%d'", loc)
t.Fail()
}
var payload string
lottery := rand.Intn(100)
if lottery < 25 {
payload = ByKeyIdReqWithRefreshUrls
} else if lottery >= 25 && lottery < 50 {
payload = ByKeyIdReqWithoutRefreshUrls
} else if lottery >= 50 && lottery < 75 {
payload = ByDomainIdReqWithRefreshUrls
} else {
payload = ByDomainIdReqWithoutRefreshUrls
}
runQueryExpectingErrorResponse(payload, getEncryptionKeysEndpoint, errStrMap()(loc), t)
// reset Fault
faults[0].Enable = false
fW = httptest.NewRecorder()
body = strings.NewReader(jsonifyFaultArr(fW, faults))
fReq = httptest.NewRequest(http.MethodPost, updateFaultInjectionEndpoint, body)
handleUpdateFaultInjection(fW, fReq)
if shouldInjectFault(loc) {
t.Errorf("Expected fault disabled for loc '%d'", loc)
t.Fail()
}
}
}

51
contrib/mockkms/utils.go Normal file
View File

@ -0,0 +1,51 @@
/*
* utils.go
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
)
type ErrorDetail struct {
Detail string `json:"details"`
}
type ErrorResponse struct {
Err ErrorDetail `json:"error"`
}
func sendErrorResponse(w http.ResponseWriter, err error) {
e := ErrorDetail{}
e.Detail = fmt.Sprintf("Error: %s", err.Error())
resp := ErrorResponse{
Err: e,
}
mResp,err := json.Marshal(resp)
if err != nil {
log.Printf("Error marshalling error response %s", err.Error())
panic(err)
}
fmt.Fprintf(w, string(mResp))
}

View File

@ -194,7 +194,8 @@ class BaseInfo(object):
if protocol_version >= PROTOCOL_VERSION_6_3:
self.dc_id = bb.get_bytes_with_length()
if protocol_version >= PROTOCOL_VERSION_7_1:
self.tenant = bb.get_bytes_with_length()
if bb.get_bytes(1):
self.tenant = bb.get_bytes_with_length()
class GetVersionInfo(BaseInfo):
def __init__(self, bb, protocol_version):

View File

@ -4,6 +4,52 @@
Release Notes
#############
7.1.5
=====
* Fixed a fdbcli kill bug that was not killing in parallel. `(PR #7150) <https://github.com/apple/foundationdb/pull/7150>`_
* Fixed a bug that prevents a peer from sending messages on a previously incompatible connection. `(PR #7124) <https://github.com/apple/foundationdb/pull/7124>`_
* Added rocksdb throttling counters to trace event. `(PR #7096) <https://github.com/apple/foundationdb/pull/7096>`_
* Added a backtrace before throwing serialization_failed. `(PR #7155) <https://github.com/apple/foundationdb/pull/7155>`_
7.1.4
=====
* Fixed a bug that prevents client from connecting to a cluster. `(PR #7060) <https://github.com/apple/foundationdb/pull/7060>`_
* Fixed a performance bug that overloads Resolver CPU. `(PR #7068) <https://github.com/apple/foundationdb/pull/7068>`_
* Optimized storage server performance for "get range and flat map" feature. `(PR #7078) <https://github.com/apple/foundationdb/pull/7078>`_
* Optimized both Proxy performance and Resolver (when version vector is enabled) performance. `(PR #7076) <https://github.com/apple/foundationdb/pull/7076>`_
* Fixed a key size limit bug when using tenants. `(PR #6986) <https://github.com/apple/foundationdb/pull/6986>`_
* Fixed operation_failed thrown incorrectly from transactions. `(PR #6993) <https://github.com/apple/foundationdb/pull/6993>`_
* Fixed a version vector bug when GRV cache is used. `(PR #7057) <https://github.com/apple/foundationdb/pull/7057>`_
* Fixed orphaned storage server due to force recovery. `(PR #7028) <https://github.com/apple/foundationdb/pull/7028>`_
* Fixed a bug that a storage server reads stale cluster ID. `(PR #7026) <https://github.com/apple/foundationdb/pull/7026>`_
* Fixed a storage server exclusion status bug that affects wiggling. `(PR #6984) <https://github.com/apple/foundationdb/pull/6984>`_
* Fixed a bug that relocate shard tasks move data to a removed team. `(PR #7023) <https://github.com/apple/foundationdb/pull/7023>`_
* Fixed recruitment thrashing when there are temporarily multiple cluster controllers. `(PR #7001) <https://github.com/apple/foundationdb/pull/7001>`_
* Fixed change feed deletion due to multiple sources race. `(PR #6987) <https://github.com/apple/foundationdb/pull/6987>`_
* Fixed TLog crash if more TLogs are absent than the replication factor. `(PR #6991) <https://github.com/apple/foundationdb/pull/6991>`_
* Added hostname DNS resolution logic for cluster connection string. `(PR #6998) <https://github.com/apple/foundationdb/pull/6998>`_
* Fixed a limit bug in indexPrefetch. `(PR #7005) <https://github.com/apple/foundationdb/pull/7005>`_
7.1.3
=====
* Added logging measuring commit compute duration. `(PR #6906) <https://github.com/apple/foundationdb/pull/6906>`_
* RocksDb used aggregated property metrics for pending compaction bytes. `(PR #6867) <https://github.com/apple/foundationdb/pull/6867>`_
* Fixed a perpetual wiggle bug that would not react to a pause. `(PR #6933) <https://github.com/apple/foundationdb/pull/6933>`_
* Fixed a crash of data distributor. `(PR #6938) <https://github.com/apple/foundationdb/pull/6938>`_
* Added new c libs to client package. `(PR #6921) <https://github.com/apple/foundationdb/pull/6921>`_
* Fixed a bug that prevents a cluster from fully recovered state after taking a snapshot. `(PR #6892) <https://github.com/apple/foundationdb/pull/6892>`_
7.1.2
=====
* Fixed failing upgrades due to non-persisted initial cluster version. `(PR #6864) <https://github.com/apple/foundationdb/pull/6864>`_
* Fixed a client load balancing bug because ClientDBInfo may be unintentionally not set. `(PR #6878) <https://github.com/apple/foundationdb/pull/6878>`_
* Fixed stuck LogRouter due to races of multiple PeekStream requests. `(PR #6870) <https://github.com/apple/foundationdb/pull/6870>`_
* Fixed a client-side infinite loop due to provisional GRV Proxy ID not set in GetReadVersionReply. `(PR #6849) <https://github.com/apple/foundationdb/pull/6849>`_
7.1.1
=====
* Added new c libs to client package. `(PR #6828) <https://github.com/apple/foundationdb/pull/6828>`_
7.1.0
=====

View File

@ -620,6 +620,7 @@ CSimpleOpt::SOption g_rgBackupListOptions[] = {
#endif
{ OPT_BASEURL, "-b", SO_REQ_SEP },
{ OPT_BASEURL, "--base-url", SO_REQ_SEP },
{ OPT_PROXY, "--proxy", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
{ OPT_TRACE_DIR, "--logdir", SO_REQ_SEP },
{ OPT_TRACE_FORMAT, "--trace-format", SO_REQ_SEP },
@ -3336,6 +3337,10 @@ int main(int argc, char* argv[]) {
}
Optional<std::string> proxy;
std::string p;
if (platform::getEnvironmentVar("HTTP_PROXY", p) || platform::getEnvironmentVar("HTTPS_PROXY", p)) {
proxy = p;
}
std::string destinationContainer;
bool describeDeep = false;
bool describeTimestamps = false;

View File

@ -35,7 +35,10 @@
namespace fdb_cli {
ACTOR Future<bool> profileCommandActor(Reference<ITransaction> tr, std::vector<StringRef> tokens, bool intrans) {
ACTOR Future<bool> profileCommandActor(Database db,
Reference<ITransaction> tr,
std::vector<StringRef> tokens,
bool intrans) {
state bool result = true;
if (tokens.size() == 1) {
printUsage(tokens[0]);
@ -45,7 +48,7 @@ ACTOR Future<bool> profileCommandActor(Reference<ITransaction> tr, std::vector<S
fprintf(stderr, "ERROR: Usage: profile client <get|set>\n");
return false;
}
wait(GlobalConfig::globalConfig().onInitialized());
wait(db->globalConfig->onInitialized());
if (tokencmp(tokens[2], "get")) {
if (tokens.size() != 3) {
fprintf(stderr, "ERROR: Addtional arguments to `get` are not supported.\n");
@ -53,12 +56,12 @@ ACTOR Future<bool> profileCommandActor(Reference<ITransaction> tr, std::vector<S
}
std::string sampleRateStr = "default";
std::string sizeLimitStr = "default";
const double sampleRateDbl = GlobalConfig::globalConfig().get<double>(
fdbClientInfoTxnSampleRate, std::numeric_limits<double>::infinity());
const double sampleRateDbl =
db->globalConfig->get<double>(fdbClientInfoTxnSampleRate, std::numeric_limits<double>::infinity());
if (!std::isinf(sampleRateDbl)) {
sampleRateStr = std::to_string(sampleRateDbl);
}
const int64_t sizeLimit = GlobalConfig::globalConfig().get<int64_t>(fdbClientInfoTxnSizeLimit, -1);
const int64_t sizeLimit = db->globalConfig->get<int64_t>(fdbClientInfoTxnSizeLimit, -1);
if (sizeLimit != -1) {
sizeLimitStr = boost::lexical_cast<std::string>(sizeLimit);
}

View File

@ -782,7 +782,7 @@ void fdbcliCompCmd(std::string const& text, std::vector<std::string>& lc) {
int count = tokens.size();
// for(int i = 0; i < count; i++) {
// printf("Token (%d): `%s'\n", i, tokens[i].toString().c_str());
// printf("Token (%d): `%s'\n", i, tokens[i].toString().c_str());
// }
std::string ntext = "";
@ -1012,6 +1012,36 @@ Future<T> stopNetworkAfter(Future<T> what) {
}
}
ACTOR Future<Void> addInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv) {
wait(connectLock->take());
state FlowLock::Releaser releaser(*connectLock);
state ClientWorkerInterface workerInterf =
BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
choose {
when(Optional<LeaderInfo> rep =
wait(brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())))) {
StringRef ip_port =
(kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key)
.removePrefix(LiteralStringRef("\xff\xff/worker_interfaces/"));
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
if (workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
Key full_ip_port2 =
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls"))
? full_ip_port2.removeSuffix(LiteralStringRef(":tls"))
: full_ip_port2;
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
}
}
when(wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT))) {}
}
return Void();
}
ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
state LineNoise& linenoise = *plinenoise;
state bool intrans = false;
@ -1514,9 +1544,62 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
if (tokencmp(tokens[0], "kill")) {
getTransaction(db, managementTenant, tr, options, intrans);
bool _result = wait(makeInterruptable(killCommandActor(db, tr, tokens, &address_interface)));
if (!_result)
is_error = true;
if (tokens.size() == 1) {
state ThreadFuture<RangeResult> wInterfF =
tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"),
LiteralStringRef("\xff\xff/worker_interfaces0")),
CLIENT_KNOBS->TOO_MANY);
RangeResult kvs = wait(makeInterruptable(safeThreadFutureToFuture(wInterfF)));
ASSERT(!kvs.more);
auto connectLock = makeReference<FlowLock>(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM);
std::vector<Future<Void>> addInterfs;
for (auto it : kvs) {
addInterfs.push_back(addInterface(&address_interface, connectLock, it));
}
wait(waitForAll(addInterfs));
}
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
if (address_interface.size() == 0) {
printf("\nNo addresses can be killed.\n");
} else if (address_interface.size() == 1) {
printf("\nThe following address can be killed:\n");
} else {
printf("\nThe following %zu addresses can be killed:\n", address_interface.size());
}
for (auto it : address_interface) {
printf("%s\n", printable(it.first).c_str());
}
printf("\n");
} else if (tokencmp(tokens[1], "all")) {
for (auto it : address_interface) {
BinaryReader::fromStringRef<ClientWorkerInterface>(it.second.first, IncludeVersion())
.reboot.send(RebootRequest());
}
if (address_interface.size() == 0) {
fprintf(stderr,
"ERROR: no processes to kill. You must run the `kill command before "
"running `kill all.\n");
} else {
printf("Attempted to kill %zu processes\n", address_interface.size());
}
} else {
for (int i = 1; i < tokens.size(); i++) {
if (!address_interface.count(tokens[i])) {
fprintf(stderr, "ERROR: process `%s' not recognized.\n", printable(tokens[i]).c_str());
is_error = true;
break;
}
}
if (!is_error) {
for (int i = 1; i < tokens.size(); i++) {
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[tokens[i]].first,
IncludeVersion())
.reboot.send(RebootRequest());
}
printf("Attempted to kill %zu processes\n", tokens.size() - 1);
}
}
continue;
}
@ -1552,7 +1635,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
if (tokencmp(tokens[0], "profile")) {
getTransaction(db, managementTenant, tr, options, intrans);
bool _result = wait(makeInterruptable(profileCommandActor(tr, tokens, intrans)));
bool _result = wait(makeInterruptable(profileCommandActor(localDb, tr, tokens, intrans)));
if (!_result)
is_error = true;
continue;

View File

@ -121,10 +121,7 @@ extern const KeyRangeRef processClassTypeSpecialKeyRange;
// Other special keys
inline const KeyRef errorMsgSpecialKey = LiteralStringRef("\xff\xff/error_message");
// help functions (Copied from fdbcli.actor.cpp)
// decode worker interfaces
ACTOR Future<Void> addInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv);
// get all workers' info
ACTOR Future<bool> getWorkers(Reference<IDatabase> db, std::vector<ProcessData>* workers);
@ -217,7 +214,10 @@ ACTOR Future<bool> clearHealthyZone(Reference<IDatabase> db,
bool clearSSFailureZoneString = false);
ACTOR Future<bool> maintenanceCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
// profile command
ACTOR Future<bool> profileCommandActor(Reference<ITransaction> tr, std::vector<StringRef> tokens, bool intrans);
ACTOR Future<bool> profileCommandActor(Database db,
Reference<ITransaction> tr,
std::vector<StringRef> tokens,
bool intrans);
// setclass command
ACTOR Future<bool> setClassCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
// snapshot command

View File

@ -222,6 +222,8 @@ struct KeyRangeLocationInfo {
: tenantEntry(tenantEntry), range(range), locations(locations) {}
};
class GlobalConfig;
class DatabaseContext : public ReferenceCounted<DatabaseContext>, public FastAllocated<DatabaseContext>, NonCopyable {
public:
static DatabaseContext* allocateOnForeignThread() {
@ -627,6 +629,7 @@ public:
using TransactionT = ReadYourWritesTransaction;
Reference<TransactionT> createTransaction();
std::unique_ptr<GlobalConfig> globalConfig;
EventCacheHolder connectToDatabaseEventCacheHolder;
private:

View File

@ -148,6 +148,9 @@ static const Tag invalidTag{ tagLocalitySpecial, 0 };
static const Tag txsTag{ tagLocalitySpecial, 1 };
static const Tag cacheTag{ tagLocalitySpecial, 2 };
const int MATCH_INDEX_ALL = 0;
const int MATCH_INDEX_NONE = 1;
enum { txsTagOld = -1, invalidTagOld = -100 };
struct TagsAndMessage {

View File

@ -37,12 +37,33 @@ const KeyRef transactionTagSampleCost = LiteralStringRef("config/transaction_tag
const KeyRef samplingFrequency = LiteralStringRef("visibility/sampling/frequency");
const KeyRef samplingWindow = LiteralStringRef("visibility/sampling/window");
GlobalConfig::GlobalConfig(Database& cx) : cx(cx), lastUpdate(0) {}
GlobalConfig::GlobalConfig(const Database& cx) : cx(cx), lastUpdate(0) {}
GlobalConfig& GlobalConfig::globalConfig() {
void* res = g_network->global(INetwork::enGlobalConfig);
ASSERT(res);
return *reinterpret_cast<GlobalConfig*>(res);
void GlobalConfig::applyChanges(Transaction& tr,
const VectorRef<KeyValueRef>& insertions,
const VectorRef<KeyRangeRef>& clears) {
VersionHistory vh{ 0 };
for (const auto& kv : insertions) {
vh.mutations.emplace_back_deep(vh.mutations.arena(), MutationRef(MutationRef::SetValue, kv.key, kv.value));
tr.set(kv.key.withPrefix(globalConfigKeysPrefix), kv.value);
}
for (const auto& range : clears) {
vh.mutations.emplace_back_deep(vh.mutations.arena(),
MutationRef(MutationRef::ClearRange, range.begin, range.end));
tr.clear(
KeyRangeRef(range.begin.withPrefix(globalConfigKeysPrefix), range.end.withPrefix(globalConfigKeysPrefix)));
}
// Record the mutations in this commit into the global configuration history.
Key historyKey = addVersionStampAtEnd(globalConfigHistoryPrefix);
ObjectWriter historyWriter(IncludeVersion());
historyWriter.serialize(vh);
tr.atomicOp(historyKey, historyWriter.toStringRef(), MutationRef::SetVersionstampedKey);
// Write version key to trigger update in cluster controller.
tr.atomicOp(globalConfigVersionKey,
LiteralStringRef("0123456789\x00\x00\x00\x00"), // versionstamp
MutationRef::SetVersionstampedValue);
}
Key GlobalConfig::prefixedKey(KeyRef key) {

View File

@ -33,6 +33,7 @@
#include <unordered_map>
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/GlobalConfig.h"
#include "fdbclient/ReadYourWrites.h"
@ -66,34 +67,32 @@ struct ConfigValue : ReferenceCounted<ConfigValue> {
class GlobalConfig : NonCopyable {
public:
// Creates a GlobalConfig singleton, accessed by calling
// GlobalConfig::globalConfig(). This function requires a database object
// to allow global configuration to run transactions on the database, and
// an AsyncVar object to watch for changes on. The ClientDBInfo pointer
// Requires a database object to allow global configuration to run
// transactions on the database.
explicit GlobalConfig(const Database& cx);
// Requires an AsyncVar object to watch for changes on. The ClientDBInfo pointer
// should point to a ClientDBInfo object which will contain the updated
// global configuration history when the given AsyncVar changes. This
// function should be called whenever the database object changes, in order
// to allow global configuration to run transactions on the latest
// database.
template <class T>
static void create(Database& cx, Reference<AsyncVar<T> const> db, const ClientDBInfo* dbInfo) {
if (g_network->global(INetwork::enGlobalConfig) == nullptr) {
auto config = new GlobalConfig{ cx };
g_network->setGlobal(INetwork::enGlobalConfig, config);
config->_updater = updater(config, dbInfo);
// Bind changes in `db` to the `dbInfoChanged` AsyncTrigger.
// TODO: Change AsyncTrigger to a Reference
forward(db, std::addressof(config->dbInfoChanged));
} else {
GlobalConfig* config = reinterpret_cast<GlobalConfig*>(g_network->global(INetwork::enGlobalConfig));
config->cx = cx;
}
void init(Reference<AsyncVar<T> const> db, const ClientDBInfo* dbInfo) {
_updater = updater(this, dbInfo);
// Bind changes in `db` to the `dbInfoChanged` AsyncTrigger.
// TODO: Change AsyncTrigger to a Reference
_forward = forward(db, std::addressof(dbInfoChanged));
}
// Returns a reference to the global GlobalConfig object. Clients should
// call this function whenever they need to read a value out of the global
// configuration.
static GlobalConfig& globalConfig();
// Given a list of insertions and clears, applies the necessary changes to
// the given transaction to update the global configuration database. Keys
// in the list of mutations should not include the global configuration
// prefix (`\xff\xff/global_config/`). The caller must still commit the
// given transaction in order to persist the changes.
static void applyChanges(Transaction& tr,
const VectorRef<KeyValueRef>& insertions,
const VectorRef<KeyRangeRef>& clears);
// Use this function to turn a global configuration key defined above into
// the full path needed to set the value in the database.
@ -150,8 +149,6 @@ public:
void trigger(KeyRef key, std::function<void(std::optional<std::any>)> fn);
private:
GlobalConfig(Database& cx);
// The functions below only affect the local copy of the global
// configuration keyspace! To insert or remove values across all nodes you
// must use a transaction (see the note above).
@ -173,6 +170,7 @@ private:
Database cx;
AsyncTrigger dbInfoChanged;
Future<Void> _forward;
Future<Void> _updater;
Promise<Void> initialized;
AsyncTrigger configChanged;

View File

@ -68,6 +68,7 @@ public:
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
int matchIndex = MATCH_INDEX_ALL,
bool snapshot = false,
bool reverse = false) = 0;
virtual ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) = 0;

View File

@ -74,6 +74,7 @@ public:
KeySelector end,
Key mapper,
GetRangeLimits limits,
int matchIndex = MATCH_INDEX_ALL,
Snapshot = Snapshot::False,
Reverse = Reverse::False) = 0;
virtual Future<Standalone<VectorRef<const char*>>> getAddressesForKey(Key const& key) = 0;

View File

@ -270,6 +270,9 @@ TEST_CASE("/fdbclient/MonitorLeader/ConnectionString/hostname") {
ACTOR Future<std::vector<NetworkAddress>> tryResolveHostnamesImpl(ClusterConnectionString* self) {
state std::set<NetworkAddress> allCoordinatorsSet;
for (const auto& coord : self->coords) {
allCoordinatorsSet.insert(coord);
}
std::vector<Future<Void>> fs;
for (auto& hostname : self->hostnames) {
fs.push_back(map(hostname.resolve(), [&](Optional<NetworkAddress> const& addr) -> Void {
@ -280,9 +283,6 @@ ACTOR Future<std::vector<NetworkAddress>> tryResolveHostnamesImpl(ClusterConnect
}));
}
wait(waitForAll(fs));
for (const auto& coord : self->coords) {
allCoordinatorsSet.insert(coord);
}
std::vector<NetworkAddress> allCoordinators(allCoordinatorsSet.begin(), allCoordinatorsSet.end());
std::sort(allCoordinators.begin(), allCoordinators.end());
return allCoordinators;
@ -300,7 +300,7 @@ TEST_CASE("/fdbclient/MonitorLeader/PartialResolve") {
INetworkConnections::net()->addMockTCPEndpoint(hn, port, { address });
state ClusterConnectionString cs(connectionString);
ClusterConnectionString cs(connectionString);
state std::vector<NetworkAddress> allCoordinators = wait(cs.tryResolveHostnames());
ASSERT(allCoordinators.size() == 1 &&
std::find(allCoordinators.begin(), allCoordinators.end(), address) != allCoordinators.end());

View File

@ -158,6 +158,7 @@ ThreadFuture<MappedRangeResult> DLTransaction::getMappedRange(const KeySelectorR
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
int matchIndex,
bool snapshot,
bool reverse) {
FdbCApi::FDBFuture* f = api->transactionGetMappedRange(tr,
@ -175,6 +176,7 @@ ThreadFuture<MappedRangeResult> DLTransaction::getMappedRange(const KeySelectorR
limits.bytes,
FDB_STREAMING_MODE_EXACT,
0,
matchIndex,
snapshot,
reverse);
return toThreadFuture<MappedRangeResult>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
@ -971,10 +973,11 @@ ThreadFuture<MappedRangeResult> MultiVersionTransaction::getMappedRange(const Ke
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
int matchIndex,
bool snapshot,
bool reverse) {
auto tr = getTransaction();
auto f = tr.transaction ? tr.transaction->getMappedRange(begin, end, mapper, limits, snapshot, reverse)
auto f = tr.transaction ? tr.transaction->getMappedRange(begin, end, mapper, limits, matchIndex, snapshot, reverse)
: makeTimeout<MappedRangeResult>();
return abortableFuture(f, tr.onChange);
}
@ -1609,7 +1612,7 @@ void MultiVersionDatabase::DatabaseState::protocolVersionChanged(ProtocolVersion
// When the protocol version changes, clear the corresponding entry in the shared state map
// so it can be re-initialized. Only do so if there was a valid previous protocol version.
if (dbProtocolVersion.present() && MultiVersionApi::apiVersionAtLeast(710)) {
MultiVersionApi::api->clearClusterSharedStateMapEntry(clusterFilePath);
MultiVersionApi::api->clearClusterSharedStateMapEntry(clusterFilePath, dbProtocolVersion.get());
}
dbProtocolVersion = protocolVersion;
@ -1722,8 +1725,10 @@ void MultiVersionDatabase::DatabaseState::updateDatabase(Reference<IDatabase> ne
}
}
if (db.isValid() && dbProtocolVersion.present() && MultiVersionApi::apiVersionAtLeast(710)) {
auto updateResult = MultiVersionApi::api->updateClusterSharedStateMap(clusterFilePath, db);
auto updateResult =
MultiVersionApi::api->updateClusterSharedStateMap(clusterFilePath, dbProtocolVersion.get(), db);
auto handler = mapThreadFuture<Void, Void>(updateResult, [this](ErrorOr<Void> result) {
TraceEvent("ClusterSharedStateUpdated").detail("ClusterFilePath", clusterFilePath);
dbVar->set(db);
return ErrorOr<Void>(Void());
});
@ -2389,12 +2394,30 @@ void MultiVersionApi::updateSupportedVersions() {
}
}
ThreadFuture<Void> MultiVersionApi::updateClusterSharedStateMap(std::string clusterFilePath, Reference<IDatabase> db) {
ThreadFuture<Void> MultiVersionApi::updateClusterSharedStateMap(std::string clusterFilePath,
ProtocolVersion dbProtocolVersion,
Reference<IDatabase> db) {
MutexHolder holder(lock);
if (clusterSharedStateMap.find(clusterFilePath) == clusterSharedStateMap.end()) {
clusterSharedStateMap[clusterFilePath] = db->createSharedState();
TraceEvent("CreatingClusterSharedState")
.detail("ClusterFilePath", clusterFilePath)
.detail("ProtocolVersion", dbProtocolVersion);
clusterSharedStateMap[clusterFilePath] = { db->createSharedState(), dbProtocolVersion };
} else {
ThreadFuture<DatabaseSharedState*> entry = clusterSharedStateMap[clusterFilePath];
auto& sharedStateInfo = clusterSharedStateMap[clusterFilePath];
if (sharedStateInfo.protocolVersion != dbProtocolVersion) {
// This situation should never happen, because we are connecting to the same cluster,
// so the protocol version must be the same
TraceEvent(SevError, "ClusterStateProtocolVersionMismatch")
.detail("ClusterFilePath", clusterFilePath)
.detail("ProtocolVersionExpected", dbProtocolVersion)
.detail("ProtocolVersionFound", sharedStateInfo.protocolVersion);
return Void();
}
TraceEvent("SettingClusterSharedState")
.detail("ClusterFilePath", clusterFilePath)
.detail("ProtocolVersion", dbProtocolVersion);
ThreadFuture<DatabaseSharedState*> entry = sharedStateInfo.sharedStateFuture;
return mapThreadFuture<DatabaseSharedState*, Void>(entry, [db](ErrorOr<DatabaseSharedState*> result) {
if (result.isError()) {
return ErrorOr<Void>(result.getError());
@ -2407,16 +2430,29 @@ ThreadFuture<Void> MultiVersionApi::updateClusterSharedStateMap(std::string clus
return Void();
}
void MultiVersionApi::clearClusterSharedStateMapEntry(std::string clusterFilePath) {
void MultiVersionApi::clearClusterSharedStateMapEntry(std::string clusterFilePath, ProtocolVersion dbProtocolVersion) {
MutexHolder holder(lock);
auto mapEntry = clusterSharedStateMap.find(clusterFilePath);
// It can be that other database instances on the same cluster path are already upgraded and thus
// have cleared or even created a new shared object entry
if (mapEntry == clusterSharedStateMap.end()) {
TraceEvent(SevError, "ClusterSharedStateMapEntryNotFound").detail("ClusterFilePath", clusterFilePath);
TraceEvent("ClusterSharedStateMapEntryNotFound").detail("ClusterFilePath", clusterFilePath);
return;
}
auto ssPtr = mapEntry->second.get();
auto sharedStateInfo = mapEntry->second;
if (sharedStateInfo.protocolVersion != dbProtocolVersion) {
TraceEvent("ClusterSharedStateClearSkipped")
.detail("ClusterFilePath", clusterFilePath)
.detail("ProtocolVersionExpected", dbProtocolVersion)
.detail("ProtocolVersionFound", sharedStateInfo.protocolVersion);
return;
}
auto ssPtr = sharedStateInfo.sharedStateFuture.get();
ssPtr->delRef(ssPtr);
clusterSharedStateMap.erase(mapEntry);
TraceEvent("ClusterSharedStateCleared")
.detail("ClusterFilePath", clusterFilePath)
.detail("ProtocolVersion", dbProtocolVersion);
}
std::vector<std::string> parseOptionValues(std::string valueStr) {

View File

@ -218,6 +218,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
int targetBytes,
FDBStreamingMode mode,
int iteration,
int matchIndex,
fdb_bool_t snapshot,
fdb_bool_t reverse);
FDBFuture* (*transactionGetVersionstamp)(FDBTransaction* tr);
@ -349,6 +350,7 @@ public:
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
int matchIndex,
bool snapshot,
bool reverse) override;
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;
@ -537,6 +539,7 @@ public:
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
int matchIndex,
bool snapshot,
bool reverse) override;
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;
@ -861,8 +864,10 @@ public:
bool callbackOnMainThread;
bool localClientDisabled;
ThreadFuture<Void> updateClusterSharedStateMap(std::string clusterFilePath, Reference<IDatabase> db);
void clearClusterSharedStateMapEntry(std::string clusterFilePath);
ThreadFuture<Void> updateClusterSharedStateMap(std::string clusterFilePath,
ProtocolVersion dbProtocolVersion,
Reference<IDatabase> db);
void clearClusterSharedStateMapEntry(std::string clusterFilePath, ProtocolVersion dbProtocolVersion);
static bool apiVersionAtLeast(int minVersion);
@ -888,7 +893,11 @@ private:
std::map<std::string, std::vector<Reference<ClientInfo>>> externalClients;
// Map of clusterFilePath -> DatabaseSharedState pointer Future
// Upon cluster version upgrade, clear the map entry for that cluster
std::map<std::string, ThreadFuture<DatabaseSharedState*>> clusterSharedStateMap;
struct SharedStateInfo {
ThreadFuture<DatabaseSharedState*> sharedStateFuture;
ProtocolVersion protocolVersion;
};
std::map<std::string, SharedStateInfo> clusterSharedStateMap;
bool networkStartSetup;
volatile bool networkSetup;

View File

@ -23,6 +23,7 @@
#include <algorithm>
#include <cstdio>
#include <iterator>
#include <memory>
#include <regex>
#include <unordered_set>
#include <tuple>
@ -809,12 +810,12 @@ ACTOR static Future<Void> clientStatusUpdateActor(DatabaseContext* cx) {
}
}
cx->clientStatusUpdater.outStatusQ.clear();
wait(GlobalConfig::globalConfig().onInitialized());
double sampleRate = GlobalConfig::globalConfig().get<double>(fdbClientInfoTxnSampleRate,
std::numeric_limits<double>::infinity());
wait(cx->globalConfig->onInitialized());
double sampleRate =
cx->globalConfig->get<double>(fdbClientInfoTxnSampleRate, std::numeric_limits<double>::infinity());
double clientSamplingProbability =
std::isinf(sampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : sampleRate;
int64_t sizeLimit = GlobalConfig::globalConfig().get<int64_t>(fdbClientInfoTxnSizeLimit, -1);
int64_t sizeLimit = cx->globalConfig->get<int64_t>(fdbClientInfoTxnSizeLimit, -1);
int64_t clientTxnInfoSizeLimit = sizeLimit == -1 ? CLIENT_KNOBS->CSI_SIZE_LIMIT : sizeLimit;
if (!trChunksQ.empty() && deterministicRandom()->random01() < clientSamplingProbability)
wait(delExcessClntTxnEntriesActor(&tr, clientTxnInfoSizeLimit));
@ -1481,6 +1482,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
cacheListMonitor = monitorCacheList(this);
smoothMidShardSize.reset(CLIENT_KNOBS->INIT_MID_SHARD_BYTES);
globalConfig = std::make_unique<GlobalConfig>(Database(this));
if (apiVersionAtLeast(710)) {
registerSpecialKeysImpl(
@ -1937,13 +1939,12 @@ Future<Void> DatabaseContext::onProxiesChanged() const {
}
bool DatabaseContext::sampleReadTags() const {
double sampleRate = GlobalConfig::globalConfig().get(transactionTagSampleRate, CLIENT_KNOBS->READ_TAG_SAMPLE_RATE);
double sampleRate = globalConfig->get(transactionTagSampleRate, CLIENT_KNOBS->READ_TAG_SAMPLE_RATE);
return sampleRate > 0 && deterministicRandom()->random01() <= sampleRate;
}
bool DatabaseContext::sampleOnCost(uint64_t cost) const {
double sampleCost =
GlobalConfig::globalConfig().get<double>(transactionTagSampleCost, CLIENT_KNOBS->COMMIT_SAMPLE_COST);
double sampleCost = globalConfig->get<double>(transactionTagSampleCost, CLIENT_KNOBS->COMMIT_SAMPLE_COST);
if (sampleCost <= 0)
return false;
return deterministicRandom()->random01() <= (double)cost / sampleCost;
@ -2219,10 +2220,10 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
}
auto database = Database(db);
GlobalConfig::create(
database, Reference<AsyncVar<ClientDBInfo> const>(clientInfo), std::addressof(clientInfo->get()));
GlobalConfig::globalConfig().trigger(samplingFrequency, samplingProfilerUpdateFrequency);
GlobalConfig::globalConfig().trigger(samplingWindow, samplingProfilerUpdateWindow);
database->globalConfig->init(Reference<AsyncVar<ClientDBInfo> const>(clientInfo),
std::addressof(clientInfo->get()));
database->globalConfig->trigger(samplingFrequency, samplingProfilerUpdateFrequency);
database->globalConfig->trigger(samplingWindow, samplingProfilerUpdateWindow);
TraceEvent("ConnectToDatabase", database->dbId)
.detail("Version", FDB_VT_VERSION)
@ -3790,12 +3791,24 @@ PublicRequestStream<GetKeyValuesFamilyRequest> StorageServerInterface::*getRange
}
}
template <class GetKeyValuesFamilyRequest>
void setMatchIndex(GetKeyValuesFamilyRequest& req, int matchIndex) {
if constexpr (std::is_same<GetKeyValuesFamilyRequest, GetKeyValuesRequest>::value) {
// do nothing;
} else if (std::is_same<GetKeyValuesFamilyRequest, GetMappedKeyValuesRequest>::value) {
req.matchIndex = matchIndex;
} else {
UNREACHABLE();
}
}
ACTOR template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply, class RangeResultFamily>
Future<RangeResultFamily> getExactRange(Reference<TransactionState> trState,
Version version,
KeyRange keys,
Key mapper,
GetRangeLimits limits,
int matchIndex,
Reverse reverse,
UseTenant useTenant) {
state RangeResultFamily output;
@ -3829,6 +3842,7 @@ Future<RangeResultFamily> getExactRange(Reference<TransactionState> trState,
req.version = version;
req.begin = firstGreaterOrEqual(range.begin);
req.end = firstGreaterOrEqual(range.end);
setMatchIndex<GetKeyValuesFamilyRequest>(req, matchIndex);
req.spanContext = span.context;
trState->cx->getLatestCommitVersions(
locations[shard].locations, req.version, trState, req.ssLatestCommitVersions);
@ -4003,6 +4017,7 @@ Future<RangeResultFamily> getRangeFallback(Reference<TransactionState> trState,
KeySelector end,
Key mapper,
GetRangeLimits limits,
int matchIndex,
Reverse reverse,
UseTenant useTenant) {
if (version == latestVersion) {
@ -4028,7 +4043,7 @@ Future<RangeResultFamily> getRangeFallback(Reference<TransactionState> trState,
// or allKeys.begin exists in the database/tenant and will be part of the conflict range anyways
RangeResultFamily _r = wait(getExactRange<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply, RangeResultFamily>(
trState, version, KeyRangeRef(b, e), mapper, limits, reverse, useTenant));
trState, version, KeyRangeRef(b, e), mapper, limits, matchIndex, reverse, useTenant));
RangeResultFamily r = _r;
if (b == allKeys.begin && ((reverse && !r.more) || !reverse))
@ -4152,6 +4167,7 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
Key mapper,
GetRangeLimits limits,
Promise<std::pair<Key, Key>> conflictRange,
int matchIndex,
Snapshot snapshot,
Reverse reverse,
UseTenant useTenant = UseTenant::True) {
@ -4204,7 +4220,7 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
state GetKeyValuesFamilyRequest req;
req.mapper = mapper;
req.arena.dependsOn(mapper.arena());
setMatchIndex<GetKeyValuesFamilyRequest>(req, matchIndex);
req.tenantInfo = useTenant ? trState->getTenantInfo() : TenantInfo();
req.isFetchKeys = (trState->taskID == TaskPriority::FetchKeys);
req.version = readVersion;
@ -4384,6 +4400,7 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
originalEnd,
mapper,
originalLimits,
matchIndex,
reverse,
useTenant));
getRangeFinished(
@ -4424,6 +4441,7 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
originalEnd,
mapper,
originalLimits,
matchIndex,
reverse,
useTenant));
getRangeFinished(
@ -5009,6 +5027,7 @@ Future<RangeResult> getRange(Reference<TransactionState> const& trState,
""_sr,
limits,
Promise<std::pair<Key, Key>>(),
MATCH_INDEX_ALL,
Snapshot::True,
reverse,
useTenant);
@ -5363,6 +5382,7 @@ Future<RangeResultFamily> Transaction::getRangeInternal(const KeySelector& begin
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot snapshot,
Reverse reverse) {
++trState->cx->transactionLogicalReads;
@ -5405,7 +5425,7 @@ Future<RangeResultFamily> Transaction::getRangeInternal(const KeySelector& begin
}
return ::getRange<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply, RangeResultFamily>(
trState, getReadVersion(), b, e, mapper, limits, conflictRange, snapshot, reverse);
trState, getReadVersion(), b, e, mapper, limits, conflictRange, matchIndex, snapshot, reverse);
}
Future<RangeResult> Transaction::getRange(const KeySelector& begin,
@ -5414,17 +5434,18 @@ Future<RangeResult> Transaction::getRange(const KeySelector& begin,
Snapshot snapshot,
Reverse reverse) {
return getRangeInternal<GetKeyValuesRequest, GetKeyValuesReply, RangeResult>(
begin, end, ""_sr, limits, snapshot, reverse);
begin, end, ""_sr, limits, MATCH_INDEX_ALL, snapshot, reverse);
}
Future<MappedRangeResult> Transaction::getMappedRange(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot snapshot,
Reverse reverse) {
return getRangeInternal<GetMappedKeyValuesRequest, GetMappedKeyValuesReply, MappedRangeResult>(
begin, end, mapper, limits, snapshot, reverse);
begin, end, mapper, limits, matchIndex, snapshot, reverse);
}
Future<RangeResult> Transaction::getRange(const KeySelector& begin,
@ -7231,10 +7252,10 @@ ACTOR Future<Standalone<VectorRef<ReadHotRangeWithMetrics>>> getReadHotRanges(Da
// condition. Should we abort and wait for the newly split shards to be hot again?
state int nLocs = locations.size();
// if (nLocs > 1) {
// TraceEvent("RHDDebug")
// .detail("NumSSIs", nLocs)
// .detail("KeysBegin", keys.begin.printable().c_str())
// .detail("KeysEnd", keys.end.printable().c_str());
// TraceEvent("RHDDebug")
// .detail("NumSSIs", nLocs)
// .detail("KeysBegin", keys.begin.printable().c_str())
// .detail("KeysEnd", keys.end.printable().c_str());
// }
state std::vector<Future<ReadHotSubRangeReply>> fReplies(nLocs);
KeyRef partBegin, partEnd;
@ -7946,8 +7967,8 @@ void Transaction::checkDeferredError() const {
Reference<TransactionLogInfo> Transaction::createTrLogInfoProbabilistically(const Database& cx) {
if (!cx->isError()) {
double clientSamplingProbability = GlobalConfig::globalConfig().get<double>(
fdbClientInfoTxnSampleRate, CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY);
double clientSamplingProbability =
cx->globalConfig->get<double>(fdbClientInfoTxnSampleRate, CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY);
if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) &&
deterministicRandom()->random01() < clientSamplingProbability &&
(!g_network->isSimulated() || !g_simulator.speedUpSimulation)) {
@ -9419,4 +9440,4 @@ int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess) {
int64_t getMaxClearKeySize(KeyRef const& key) {
return getMaxKeySize(key);
}
}

View File

@ -329,6 +329,7 @@ public:
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
int matchIndex = MATCH_INDEX_ALL,
Snapshot = Snapshot::False,
Reverse = Reverse::False);
@ -338,6 +339,7 @@ private:
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot snapshot,
Reverse reverse);

View File

@ -54,6 +54,7 @@ public:
KeySelector end,
Key mapper,
GetRangeLimits limits,
int matchIndex = MATCH_INDEX_ALL,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override {
throw client_invalid_operation();

View File

@ -77,11 +77,12 @@ public:
template <bool reverse>
struct GetMappedRangeReq {
GetMappedRangeReq(KeySelector begin, KeySelector end, Key mapper, GetRangeLimits limits)
: begin(begin), end(end), mapper(mapper), limits(limits) {}
GetMappedRangeReq(KeySelector begin, KeySelector end, Key mapper, int matchIndex, GetRangeLimits limits)
: begin(begin), end(end), mapper(mapper), limits(limits), matchIndex(matchIndex) {}
KeySelector begin, end;
Key mapper;
GetRangeLimits limits;
int matchIndex;
using Result = MappedRangeResult;
};
@ -1140,9 +1141,13 @@ public:
else
read.end = KeySelector(firstGreaterOrEqual(key), key.arena());
}
MappedRangeResult v = wait(ryw->tr.getMappedRange(
read.begin, read.end, read.mapper, read.limits, snapshot, backwards ? Reverse::True : Reverse::False));
MappedRangeResult v = wait(ryw->tr.getMappedRange(read.begin,
read.end,
read.mapper,
read.limits,
read.matchIndex,
snapshot,
backwards ? Reverse::True : Reverse::False));
return v;
}
@ -1677,6 +1682,7 @@ Future<MappedRangeResult> ReadYourWritesTransaction::getMappedRange(KeySelector
KeySelector end,
Key mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot snapshot,
Reverse reverse) {
if (getDatabase()->apiVersionAtLeast(630)) {
@ -1724,9 +1730,9 @@ Future<MappedRangeResult> ReadYourWritesTransaction::getMappedRange(KeySelector
Future<MappedRangeResult> result =
reverse ? RYWImpl::readWithConflictRangeForGetMappedRange(
this, RYWImpl::GetMappedRangeReq<true>(begin, end, mapper, limits), snapshot)
this, RYWImpl::GetMappedRangeReq<true>(begin, end, mapper, matchIndex, limits), snapshot)
: RYWImpl::readWithConflictRangeForGetMappedRange(
this, RYWImpl::GetMappedRangeReq<false>(begin, end, mapper, limits), snapshot);
this, RYWImpl::GetMappedRangeReq<false>(begin, end, mapper, matchIndex, limits), snapshot);
return result;
}

View File

@ -112,6 +112,7 @@ public:
KeySelector end,
Key mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override;

View File

@ -185,11 +185,20 @@ Reference<S3BlobStoreEndpoint> S3BlobStoreEndpoint::fromString(const std::string
Optional<std::string> proxyHost, proxyPort;
if (proxy.present()) {
if (!Hostname::isHostname(proxy.get()) && !NetworkAddress::parseOptional(proxy.get()).present()) {
throw format("'%s' is not a valid value for proxy. Format should be either IP:port or host:port.",
proxy.get().c_str());
StringRef proxyRef(proxy.get());
if (proxy.get().find("://") != std::string::npos) {
StringRef proxyPrefix = proxyRef.eat("://");
if (proxyPrefix != "http"_sr) {
throw format("Invalid proxy URL prefix '%s'. Either don't use a prefix, or use http://",
proxyPrefix.toString().c_str());
}
}
StringRef p(proxy.get());
std::string proxyBody = proxyRef.eat().toString();
if (!Hostname::isHostname(proxyBody) && !NetworkAddress::parseOptional(proxyBody).present()) {
throw format("'%s' is not a valid value for proxy. Format should be either IP:port or host:port.",
proxyBody.c_str());
}
StringRef p(proxyBody);
proxyHost = p.eat(":").toString();
proxyPort = p.eat().toString();
}
@ -645,10 +654,24 @@ ACTOR Future<S3BlobStoreEndpoint::ReusableConnection> connect_impl(Reference<S3B
}
}
std::string host = b->host, service = b->service;
if (service.empty())
if (service.empty()) {
if (b->useProxy) {
fprintf(stderr, "ERROR: Port can't be empty when using HTTP proxy.\n");
throw connection_failed();
}
service = b->knobs.secure_connection ? "https" : "http";
state Reference<IConnection> conn =
wait(INetworkConnections::net()->connect(host, service, b->knobs.secure_connection ? true : false));
}
bool isTLS = b->knobs.secure_connection == 1;
if (b->useProxy) {
// TODO(renxuan): Support http proxy + TLS
if (isTLS || b->service == "443") {
fprintf(stderr, "ERROR: TLS is not supported yet when using HTTP proxy.\n");
throw connection_failed();
}
host = b->proxyHost.get();
service = b->proxyPort.get();
}
state Reference<IConnection> conn = wait(INetworkConnections::net()->connect(host, service, isTLS));
wait(conn->connectHandshake());
TraceEvent("S3BlobStoreEndpointNewConnection")
@ -752,6 +775,10 @@ ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<S3BlobStoreEndp
bstore->setAuthHeaders(verb, resource, headers);
}
if (bstore->useProxy) {
// Has to be in absolute-form.
resource = "http://" + bstore->host + ":" + bstore->service + resource;
}
remoteAddress = rconn.conn->getPeerAddress();
wait(bstore->requestRate->getAllowance(1));
Reference<HTTP::Response> _r = wait(timeoutError(HTTP::doRequest(rconn.conn,

View File

@ -857,6 +857,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( ENABLE_ENCRYPTION, false );
init( ENCRYPTION_MODE, "AES-256-CTR");
init( SIM_KMS_MAX_KEYS, 4096);
init( ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH, 100000);
// KMS connector type
init( KMS_CONNECTOR_TYPE, "RESTKmsConnector");

View File

@ -824,6 +824,7 @@ public:
bool ENABLE_ENCRYPTION;
std::string ENCRYPTION_MODE;
int SIM_KMS_MAX_KEYS;
int ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH;
// Key Management Service (KMS) Connector
std::string KMS_CONNECTOR_TYPE;

View File

@ -63,6 +63,7 @@ public:
KeySelector end,
Key mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override {
throw client_invalid_operation();

View File

@ -1464,11 +1464,9 @@ Future<RangeResult> GlobalConfigImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
RangeResult result;
auto& globalConfig = GlobalConfig::globalConfig();
KeyRangeRef modified =
KeyRangeRef(kr.begin.removePrefix(getKeyRange().begin), kr.end.removePrefix(getKeyRange().begin));
std::map<KeyRef, Reference<ConfigValue>> values = globalConfig.get(modified);
std::map<KeyRef, Reference<ConfigValue>> values = ryw->getDatabase()->globalConfig->get(modified);
for (const auto& [key, config] : values) {
Key prefixedKey = key.withPrefix(getKeyRange().begin);
if (config.isValid() && config->value.has_value()) {
@ -1519,7 +1517,8 @@ ACTOR Future<Optional<std::string>> globalConfigCommitActor(GlobalConfigImpl* gl
}
}
VersionHistory vh{ 0 };
Standalone<VectorRef<KeyValueRef>> insertions;
Standalone<VectorRef<KeyRangeRef>> clears;
// Transform writes from the special-key-space (\xff\xff/global_config/) to
// the system key space (\xff/globalConfig/), and writes mutations to
@ -1532,36 +1531,17 @@ ACTOR Future<Optional<std::string>> globalConfigCommitActor(GlobalConfigImpl* gl
if (entry.first) {
if (entry.second.present() && iter->begin().startsWith(globalConfig->getKeyRange().begin)) {
Key bareKey = iter->begin().removePrefix(globalConfig->getKeyRange().begin);
vh.mutations.emplace_back_deep(vh.mutations.arena(),
MutationRef(MutationRef::SetValue, bareKey, entry.second.get()));
Key systemKey = bareKey.withPrefix(globalConfigKeysPrefix);
tr.set(systemKey, entry.second.get());
insertions.push_back_deep(insertions.arena(), KeyValueRef(bareKey, entry.second.get()));
} else if (!entry.second.present() && iter->range().begin.startsWith(globalConfig->getKeyRange().begin) &&
iter->range().end.startsWith(globalConfig->getKeyRange().begin)) {
KeyRef bareRangeBegin = iter->range().begin.removePrefix(globalConfig->getKeyRange().begin);
KeyRef bareRangeEnd = iter->range().end.removePrefix(globalConfig->getKeyRange().begin);
vh.mutations.emplace_back_deep(vh.mutations.arena(),
MutationRef(MutationRef::ClearRange, bareRangeBegin, bareRangeEnd));
Key systemRangeBegin = bareRangeBegin.withPrefix(globalConfigKeysPrefix);
Key systemRangeEnd = bareRangeEnd.withPrefix(globalConfigKeysPrefix);
tr.clear(KeyRangeRef(systemRangeBegin, systemRangeEnd));
clears.push_back_deep(clears.arena(), KeyRangeRef(bareRangeBegin, bareRangeEnd));
}
}
++iter;
}
// Record the mutations in this commit into the global configuration history.
Key historyKey = addVersionStampAtEnd(globalConfigHistoryPrefix);
ObjectWriter historyWriter(IncludeVersion());
historyWriter.serialize(vh);
tr.atomicOp(historyKey, historyWriter.toStringRef(), MutationRef::SetVersionstampedKey);
// Write version key to trigger update in cluster controller.
tr.atomicOp(globalConfigVersionKey,
LiteralStringRef("0123456789\x00\x00\x00\x00"), // versionstamp
MutationRef::SetVersionstampedValue);
GlobalConfig::applyChanges(tr, insertions, clears);
return Optional<std::string>();
}
@ -1970,13 +1950,11 @@ ACTOR static Future<RangeResult> ClientProfilingGetRangeActor(ReadYourWritesTran
ASSERT(entry.second.present());
result.push_back_deep(result.arena(), KeyValueRef(sampleRateKey, entry.second.get()));
} else {
Optional<Value> f = wait(ryw->getTransaction().get(fdbClientInfoTxnSampleRate));
std::string sampleRateStr = "default";
if (f.present()) {
const double sampleRateDbl = BinaryReader::fromStringRef<double>(f.get(), Unversioned());
if (!std::isinf(sampleRateDbl)) {
sampleRateStr = boost::lexical_cast<std::string>(sampleRateDbl);
}
const double sampleRateDbl = ryw->getDatabase()->globalConfig->get<double>(
fdbClientInfoTxnSampleRate, std::numeric_limits<double>::infinity());
if (!std::isinf(sampleRateDbl)) {
sampleRateStr = std::to_string(sampleRateDbl);
}
result.push_back_deep(result.arena(), KeyValueRef(sampleRateKey, Value(sampleRateStr)));
}
@ -1990,13 +1968,10 @@ ACTOR static Future<RangeResult> ClientProfilingGetRangeActor(ReadYourWritesTran
ASSERT(entry.second.present());
result.push_back_deep(result.arena(), KeyValueRef(txnSizeLimitKey, entry.second.get()));
} else {
Optional<Value> f = wait(ryw->getTransaction().get(fdbClientInfoTxnSizeLimit));
std::string sizeLimitStr = "default";
if (f.present()) {
const int64_t sizeLimit = BinaryReader::fromStringRef<int64_t>(f.get(), Unversioned());
if (sizeLimit != -1) {
sizeLimitStr = boost::lexical_cast<std::string>(sizeLimit);
}
const int64_t sizeLimit = ryw->getDatabase()->globalConfig->get<int64_t>(fdbClientInfoTxnSizeLimit, -1);
if (sizeLimit != -1) {
sizeLimitStr = boost::lexical_cast<std::string>(sizeLimit);
}
result.push_back_deep(result.arena(), KeyValueRef(txnSizeLimitKey, Value(sizeLimitStr)));
}
@ -2013,43 +1988,49 @@ Future<RangeResult> ClientProfilingImpl::getRange(ReadYourWritesTransaction* ryw
Future<Optional<std::string>> ClientProfilingImpl::commit(ReadYourWritesTransaction* ryw) {
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Standalone<VectorRef<KeyValueRef>> insertions;
Standalone<VectorRef<KeyRangeRef>> clears;
// client_txn_sample_rate
Key sampleRateKey = LiteralStringRef("client_txn_sample_rate").withPrefix(getKeyRange().begin);
auto rateEntry = ryw->getSpecialKeySpaceWriteMap()[sampleRateKey];
if (rateEntry.first && rateEntry.second.present()) {
std::string sampleRateStr = rateEntry.second.get().toString();
double sampleRate;
if (sampleRateStr == "default")
sampleRate = std::numeric_limits<double>::infinity();
else {
if (sampleRateStr == "default") {
clears.push_back_deep(clears.arena(),
KeyRangeRef(fdbClientInfoTxnSampleRate, keyAfter(fdbClientInfoTxnSampleRate)));
} else {
try {
sampleRate = boost::lexical_cast<double>(sampleRateStr);
double sampleRate = boost::lexical_cast<double>(sampleRateStr);
Tuple rate = Tuple().appendDouble(sampleRate);
insertions.push_back_deep(insertions.arena(), KeyValueRef(fdbClientInfoTxnSampleRate, rate.pack()));
} catch (boost::bad_lexical_cast& e) {
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "profile", "Invalid transaction sample rate(double): " + sampleRateStr));
}
}
ryw->getTransaction().set(fdbClientInfoTxnSampleRate, BinaryWriter::toValue(sampleRate, Unversioned()));
}
// client_txn_size_limit
Key txnSizeLimitKey = LiteralStringRef("client_txn_size_limit").withPrefix(getKeyRange().begin);
auto sizeLimitEntry = ryw->getSpecialKeySpaceWriteMap()[txnSizeLimitKey];
if (sizeLimitEntry.first && sizeLimitEntry.second.present()) {
std::string sizeLimitStr = sizeLimitEntry.second.get().toString();
int64_t sizeLimit;
if (sizeLimitStr == "default")
sizeLimit = -1;
else {
if (sizeLimitStr == "default") {
clears.push_back_deep(clears.arena(),
KeyRangeRef(fdbClientInfoTxnSizeLimit, keyAfter(fdbClientInfoTxnSizeLimit)));
} else {
try {
sizeLimit = boost::lexical_cast<int64_t>(sizeLimitStr);
int64_t sizeLimit = boost::lexical_cast<int64_t>(sizeLimitStr);
Tuple size = Tuple().append(sizeLimit);
insertions.push_back_deep(insertions.arena(), KeyValueRef(fdbClientInfoTxnSizeLimit, size.pack()));
} catch (boost::bad_lexical_cast& e) {
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "profile", "Invalid transaction size limit(int64_t): " + sizeLimitStr));
}
}
ryw->getTransaction().set(fdbClientInfoTxnSizeLimit, BinaryWriter::toValue(sizeLimit, Unversioned()));
}
GlobalConfig::applyChanges(ryw->getTransaction(), insertions, clears);
return Optional<std::string>();
}

View File

@ -426,6 +426,7 @@ struct GetMappedKeyValuesRequest : TimedRequest {
KeyRef mapper;
Version version; // or latestVersion
int limit, limitBytes;
int matchIndex;
bool isFetchKeys;
Optional<TagSet> tags;
Optional<UID> debugID;
@ -451,7 +452,8 @@ struct GetMappedKeyValuesRequest : TimedRequest {
spanContext,
tenantInfo,
arena,
ssLatestCommitVersions);
ssLatestCommitVersions,
matchIndex);
}
};

View File

@ -55,7 +55,9 @@ private:
ASSERT(id >= 0);
prefix = makeString(8 + subspace.size());
uint8_t* data = mutateString(prefix);
memcpy(data, subspace.begin(), subspace.size());
if (subspace.size() > 0) {
memcpy(data, subspace.begin(), subspace.size());
}
int64_t swapped = bigEndian64(id);
memcpy(data + subspace.size(), &swapped, 8);
}

View File

@ -306,6 +306,7 @@ ThreadFuture<MappedRangeResult> ThreadSafeTransaction::getMappedRange(const KeyS
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
int matchIndex,
bool snapshot,
bool reverse) {
KeySelector b = begin;
@ -313,9 +314,9 @@ ThreadFuture<MappedRangeResult> ThreadSafeTransaction::getMappedRange(const KeyS
Key h = mapper;
ISingleThreadTransaction* tr = this->tr;
return onMainThread([tr, b, e, h, limits, snapshot, reverse]() -> Future<MappedRangeResult> {
return onMainThread([tr, b, e, h, limits, matchIndex, snapshot, reverse]() -> Future<MappedRangeResult> {
tr->checkDeferredError();
return tr->getMappedRange(b, e, h, limits, Snapshot{ snapshot }, Reverse{ reverse });
return tr->getMappedRange(b, e, h, limits, matchIndex, Snapshot{ snapshot }, Reverse{ reverse });
});
}

View File

@ -136,6 +136,7 @@ public:
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
int matchIndex,
bool snapshot,
bool reverse) override;
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;

View File

@ -479,6 +479,7 @@ struct ConnectPacket {
serializer(ar, connectPacketLength);
if (connectPacketLength > sizeof(ConnectPacket) - sizeof(connectPacketLength)) {
ASSERT(!g_network->isSimulated());
TraceEvent("SerializationFailed").backtrace();
throw serialization_failed();
}
@ -816,6 +817,9 @@ ACTOR Future<Void> connectionKeeper(Reference<Peer> self,
.errorUnsuppressed(e)
.suppressFor(1.0)
.detail("PeerAddr", self->destination);
// Since the connection has closed, we need to check the protocol version the next time we connect
self->compatible = true;
}
if (self->destination.isPublic() &&
@ -885,9 +889,9 @@ ACTOR Future<Void> connectionKeeper(Reference<Peer> self,
Peer::Peer(TransportData* transport, NetworkAddress const& destination)
: transport(transport), destination(destination), compatible(true), outgoingConnectionIdle(true),
lastConnectTime(0.0), reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), peerReferences(-1),
incompatibleProtocolVersionNewer(false), bytesReceived(0), bytesSent(0), lastDataPacketSentTime(now()),
outstandingReplies(0), pingLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SAMPLE_AMOUNT : 1),
lastLoggedTime(0.0), lastLoggedBytesReceived(0), lastLoggedBytesSent(0), timeoutCount(0),
bytesReceived(0), bytesSent(0), lastDataPacketSentTime(now()), outstandingReplies(0),
pingLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SAMPLE_AMOUNT : 1), lastLoggedTime(0.0),
lastLoggedBytesReceived(0), lastLoggedBytesSent(0), timeoutCount(0),
protocolVersion(Reference<AsyncVar<Optional<ProtocolVersion>>>(new AsyncVar<Optional<ProtocolVersion>>())),
connectOutgoingCount(0), connectIncomingCount(0), connectFailedCount(0),
connectLatencies(destination.isPublic() ? FLOW_KNOBS->NETWORK_CONNECT_SAMPLE_AMOUNT : 1) {
@ -1257,7 +1261,6 @@ ACTOR static Future<Void> connectionReader(TransportData* transport,
state bool expectConnectPacket = true;
state bool compatible = false;
state bool incompatiblePeerCounted = false;
state bool incompatibleProtocolVersionNewer = false;
state NetworkAddress peerAddress;
state ProtocolVersion peerProtocolVersion;
state Reference<AuthorizedTenants> authorizedTenants = makeReference<AuthorizedTenants>();
@ -1323,7 +1326,6 @@ ACTOR static Future<Void> connectionReader(TransportData* transport,
uint64_t connectionId = pkt.connectionId;
if (!pkt.protocolVersion.hasObjectSerializerFlag() ||
!pkt.protocolVersion.isCompatible(g_network->protocolVersion())) {
incompatibleProtocolVersionNewer = pkt.protocolVersion > g_network->protocolVersion();
NetworkAddress addr = pkt.canonicalRemotePort
? NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort)
: conn->getPeerAddress();
@ -1383,7 +1385,6 @@ ACTOR static Future<Void> connectionReader(TransportData* transport,
.suppressFor(1.0)
.detail("PeerAddr", NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort));
peer->compatible = compatible;
peer->incompatibleProtocolVersionNewer = incompatibleProtocolVersionNewer;
if (!compatible) {
peer->transport->numIncompatibleConnections++;
incompatiblePeerCounted = true;
@ -1401,7 +1402,6 @@ ACTOR static Future<Void> connectionReader(TransportData* transport,
}
peer = transport->getOrOpenPeer(peerAddress, false);
peer->compatible = compatible;
peer->incompatibleProtocolVersionNewer = incompatibleProtocolVersionNewer;
if (!compatible) {
peer->transport->numIncompatibleConnections++;
incompatiblePeerCounted = true;
@ -1741,8 +1741,7 @@ static ReliablePacket* sendPacket(TransportData* self,
// If there isn't an open connection, a public address, or the peer isn't compatible, we can't send
if (!peer || (peer->outgoingConnectionIdle && !destination.getPrimaryAddress().isPublic()) ||
(peer->incompatibleProtocolVersionNewer &&
destination.token != Endpoint::wellKnownToken(WLTOKEN_PING_PACKET))) {
(!peer->compatible && destination.token != Endpoint::wellKnownToken(WLTOKEN_PING_PACKET))) {
TEST(true); // Can't send to private address without a compatible open connection
return nullptr;
}

View File

@ -159,7 +159,6 @@ struct Peer : public ReferenceCounted<Peer> {
double lastConnectTime;
double reconnectionDelay;
int peerReferences;
bool incompatibleProtocolVersionNewer;
int64_t bytesReceived;
int64_t bytesSent;
double lastDataPacketSentTime;

View File

@ -24,10 +24,15 @@
#include "flow/Arena.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/MkCert.h"
#include "flow/Platform.h"
#include "flow/ScopeExit.h"
#include "flow/Trace.h"
#include "flow/UnitTest.h"
#include <type_traits>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/ec.h>
#include <openssl/err.h>
#include <openssl/evp.h>
@ -35,16 +40,6 @@
namespace {
template <typename Func>
class ExitGuard {
std::decay_t<Func> fn;
public:
ExitGuard(Func&& fn) : fn(std::forward<Func>(fn)) {}
~ExitGuard() { fn(); }
};
[[noreturn]] void traceAndThrow(const char* type) {
auto te = TraceEvent(SevWarnAlways, type);
te.suppressFor(60);
@ -53,63 +48,11 @@ public:
0,
};
::ERR_error_string_n(err, buf, sizeof(buf));
te.detail("OpenSSLError", buf);
te.detail("OpenSSLError", static_cast<const char*>(buf));
}
throw digital_signature_ops_error();
}
struct KeyPairRef {
StringRef privateKey;
StringRef publicKey;
};
Standalone<KeyPairRef> generateEcdsaKeyPair() {
auto params = std::add_pointer_t<EVP_PKEY>();
{
auto pctx = ::EVP_PKEY_CTX_new_id(EVP_PKEY_EC, nullptr);
ASSERT(pctx);
auto ctxGuard = ExitGuard([pctx]() { ::EVP_PKEY_CTX_free(pctx); });
ASSERT_LT(0, ::EVP_PKEY_paramgen_init(pctx));
ASSERT_LT(0, ::EVP_PKEY_CTX_set_ec_paramgen_curve_nid(pctx, NID_X9_62_prime256v1));
ASSERT_LT(0, ::EVP_PKEY_paramgen(pctx, &params));
ASSERT(params);
}
auto paramsGuard = ExitGuard([params]() { ::EVP_PKEY_free(params); });
// keygen
auto kctx = ::EVP_PKEY_CTX_new(params, nullptr);
ASSERT(kctx);
auto kctxGuard = ExitGuard([kctx]() { ::EVP_PKEY_CTX_free(kctx); });
auto key = std::add_pointer_t<EVP_PKEY>();
{
ASSERT_LT(0, ::EVP_PKEY_keygen_init(kctx));
ASSERT_LT(0, ::EVP_PKEY_keygen(kctx, &key));
}
ASSERT(key);
auto keyGuard = ExitGuard([key]() { ::EVP_PKEY_free(key); });
auto ret = Standalone<KeyPairRef>{};
auto& arena = ret.arena();
{
auto len = 0;
len = ::i2d_PrivateKey(key, nullptr);
ASSERT_LT(0, len);
auto buf = new (arena) uint8_t[len];
auto out = std::add_pointer_t<uint8_t>(buf);
len = ::i2d_PrivateKey(key, &out);
ret.privateKey = StringRef(buf, len);
}
{
auto len = 0;
len = ::i2d_PUBKEY(key, nullptr);
ASSERT_LT(0, len);
auto buf = new (arena) uint8_t[len];
auto out = std::add_pointer_t<uint8_t>(buf);
len = ::i2d_PUBKEY(key, &out);
ret.publicKey = StringRef(buf, len);
}
return ret;
}
} // namespace
Standalone<SignedAuthTokenRef> signToken(AuthTokenRef token, StringRef keyName, StringRef privateKeyDer) {
@ -124,11 +67,11 @@ Standalone<SignedAuthTokenRef> signToken(AuthTokenRef token, StringRef keyName,
if (!key) {
traceAndThrow("SignTokenBadKey");
}
auto keyGuard = ExitGuard([key]() { ::EVP_PKEY_free(key); });
auto keyGuard = ScopeExit([key]() { ::EVP_PKEY_free(key); });
auto mdctx = ::EVP_MD_CTX_create();
if (!mdctx)
traceAndThrow("SignTokenInitFail");
auto mdctxGuard = ExitGuard([mdctx]() { ::EVP_MD_CTX_free(mdctx); });
auto mdctxGuard = ScopeExit([mdctx]() { ::EVP_MD_CTX_free(mdctx); });
if (1 != ::EVP_DigestSignInit(mdctx, nullptr, ::EVP_sha256() /*Parameterize?*/, nullptr, key))
traceAndThrow("SignTokenInitFail");
if (1 != ::EVP_DigestSignUpdate(mdctx, tokenStr.begin(), tokenStr.size()))
@ -150,11 +93,11 @@ bool verifyToken(SignedAuthTokenRef signedToken, StringRef publicKeyDer) {
auto key = ::d2i_PUBKEY(nullptr, &rawPubKeyDer, publicKeyDer.size());
if (!key)
traceAndThrow("VerifyTokenBadKey");
auto keyGuard = ExitGuard([key]() { ::EVP_PKEY_free(key); });
auto keyGuard = ScopeExit([key]() { ::EVP_PKEY_free(key); });
auto mdctx = ::EVP_MD_CTX_create();
if (!mdctx)
traceAndThrow("VerifyTokenInitFail");
auto mdctxGuard = ExitGuard([mdctx]() { ::EVP_MD_CTX_free(mdctx); });
auto mdctxGuard = ScopeExit([mdctx]() { ::EVP_MD_CTX_free(mdctx); });
if (1 != ::EVP_DigestVerifyInit(mdctx, nullptr, ::EVP_sha256(), nullptr, key))
traceAndThrow("VerifyTokenInitFail");
if (1 != ::EVP_DigestVerifyUpdate(mdctx, signedToken.token.begin(), signedToken.token.size()))
@ -179,7 +122,8 @@ void forceLinkTokenSignTests() {}
TEST_CASE("/fdbrpc/TokenSign") {
const auto numIters = 100;
for (auto i = 0; i < numIters; i++) {
auto keyPair = generateEcdsaKeyPair();
auto kpArena = Arena();
auto keyPair = mkcert::KeyPairRef::make(kpArena);
auto token = Standalone<AuthTokenRef>{};
auto& arena = token.arena();
auto& rng = *deterministicRandom();
@ -206,15 +150,15 @@ TEST_CASE("/fdbrpc/TokenSign") {
token.tenants.push_back(arena, genRandomStringRef());
}
auto keyName = genRandomStringRef();
auto signedToken = signToken(token, keyName, keyPair.privateKey);
const auto verifyExpectOk = verifyToken(signedToken, keyPair.publicKey);
auto signedToken = signToken(token, keyName, keyPair.privateKeyDer);
const auto verifyExpectOk = verifyToken(signedToken, keyPair.publicKeyDer);
ASSERT(verifyExpectOk);
// try tampering with signed token by adding one more tenant
token.tenants.push_back(arena, genRandomStringRef());
auto writer = ObjectWriter([&arena](size_t len) { return new (arena) uint8_t[len]; }, IncludeVersion());
writer.serialize(token);
signedToken.token = writer.toStringRef();
const auto verifyExpectFail = verifyToken(signedToken, keyPair.publicKey);
const auto verifyExpectFail = verifyToken(signedToken, keyPair.publicKeyDer);
ASSERT(!verifyExpectFail);
}
printf("%d runs OK\n", numIters);

View File

@ -101,7 +101,6 @@ Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request, Hostname h
resetReply(request);
if (reply.getError().code() == error_code_request_maybe_delivered) {
// Connection failure.
hostname.resetToUnresolved();
INetworkConnections::net()->removeCachedDNS(hostname.host, hostname.service);
}
}
@ -126,7 +125,6 @@ Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request,
resetReply(request);
if (reply.getError().code() == error_code_request_maybe_delivered) {
// Connection failure.
hostname.resetToUnresolved();
INetworkConnections::net()->removeCachedDNS(hostname.host, hostname.service);
}
}
@ -149,7 +147,6 @@ Future<REPLY_TYPE(Req)> retryGetReplyFromHostname(Req request, Hostname hostname
// Connection failure.
wait(delay(reconnetInterval));
reconnetInterval = std::min(2 * reconnetInterval, FLOW_KNOBS->HOSTNAME_RECONNECT_MAX_INTERVAL);
hostname.resetToUnresolved();
INetworkConnections::net()->removeCachedDNS(hostname.host, hostname.service);
} else {
throw reply.getError();
@ -179,7 +176,6 @@ Future<REPLY_TYPE(Req)> retryGetReplyFromHostname(Req request,
// Connection failure.
wait(delay(reconnetInterval));
reconnetInterval = std::min(2 * reconnetInterval, FLOW_KNOBS->HOSTNAME_RECONNECT_MAX_INTERVAL);
hostname.resetToUnresolved();
INetworkConnections::net()->removeCachedDNS(hostname.host, hostname.service);
} else {
throw reply.getError();

View File

@ -1,392 +1,391 @@
set(FDBSERVER_SRCS
ApplyMetadataMutation.cpp
ApplyMetadataMutation.h
BackupInterface.h
BackupProgress.actor.cpp
BackupProgress.actor.h
BackupWorker.actor.cpp
BlobGranuleServerCommon.actor.cpp
BlobGranuleServerCommon.actor.h
BlobGranuleValidation.actor.cpp
BlobGranuleValidation.actor.h
BlobManager.actor.cpp
BlobManagerInterface.h
BlobWorker.actor.cpp
ClusterController.actor.cpp
ClusterController.actor.h
ClusterRecovery.actor.cpp
ClusterRecovery.actor.h
CommitProxyServer.actor.cpp
ConfigBroadcaster.actor.cpp
ConfigBroadcaster.h
ConfigDatabaseUnitTests.actor.cpp
ConfigFollowerInterface.cpp
ConfigFollowerInterface.h
ConfigNode.actor.cpp
ConfigNode.h
ConflictSet.h
CoordinatedState.actor.cpp
CoordinatedState.h
Coordination.actor.cpp
CoordinationInterface.h
CoroFlow.h
DataDistribution.actor.cpp
DataDistribution.actor.h
DataDistributionQueue.actor.cpp
DataDistributionTracker.actor.cpp
DataDistributorInterface.h
DBCoreState.h
DDTeamCollection.actor.cpp
DDTeamCollection.h
DiskQueue.actor.cpp
EncryptKeyProxy.actor.cpp
EncryptKeyProxyInterface.h
FDBExecHelper.actor.cpp
FDBExecHelper.actor.h
fdbserver.actor.cpp
GrvProxyServer.actor.cpp
IConfigConsumer.cpp
IConfigConsumer.h
IDiskQueue.h
IKeyValueContainer.h
IKeyValueStore.h
IPager.h
KeyValueStoreCompressTestData.actor.cpp
KeyValueStoreMemory.actor.cpp
KeyValueStoreRocksDB.actor.cpp
KeyValueStoreSQLite.actor.cpp
KmsConnector.h
KmsConnectorInterface.h
KnobProtectiveGroups.cpp
KnobProtectiveGroups.h
Knobs.h
LatencyBandConfig.cpp
LatencyBandConfig.h
LeaderElection.actor.cpp
LeaderElection.h
LocalConfiguration.actor.cpp
LocalConfiguration.h
LogProtocolMessage.h
LogRouter.actor.cpp
LogSystem.cpp
LogSystem.h
LogSystemConfig.cpp
LogSystemConfig.h
LogSystemDiskQueueAdapter.actor.cpp
LogSystemDiskQueueAdapter.h
LogSystemPeekCursor.actor.cpp
MasterInterface.h
masterserver.actor.cpp
MetricLogger.actor.cpp
MetricLogger.actor.h
MoveKeys.actor.cpp
MoveKeys.actor.h
MutationTracking.cpp
MutationTracking.h
networktest.actor.cpp
NetworkTest.h
OldTLogServer_4_6.actor.cpp
OldTLogServer_6_0.actor.cpp
OldTLogServer_6_2.actor.cpp
OTELSpanContextMessage.h
OnDemandStore.actor.cpp
OnDemandStore.h
PaxosConfigConsumer.actor.cpp
PaxosConfigConsumer.h
ProxyCommitData.actor.h
pubsub.actor.cpp
pubsub.h
QuietDatabase.actor.cpp
QuietDatabase.h
RadixTree.h
Ratekeeper.actor.cpp
Ratekeeper.h
RatekeeperInterface.h
RecoveryState.h
RemoteIKeyValueStore.actor.h
RemoteIKeyValueStore.actor.cpp
RESTKmsConnector.actor.h
RESTKmsConnector.actor.cpp
ResolutionBalancer.actor.cpp
ResolutionBalancer.actor.h
Resolver.actor.cpp
ResolverInterface.h
RestoreApplier.actor.cpp
RestoreApplier.actor.h
RestoreCommon.actor.cpp
RestoreCommon.actor.h
RestoreController.actor.cpp
RestoreController.actor.h
RestoreLoader.actor.cpp
RestoreLoader.actor.h
RestoreRoleCommon.actor.cpp
RestoreRoleCommon.actor.h
RestoreUtil.actor.cpp
RestoreUtil.h
RestoreWorker.actor.cpp
RestoreWorker.actor.h
RestoreWorkerInterface.actor.cpp
RestoreWorkerInterface.actor.h
RkTagThrottleCollection.cpp
RkTagThrottleCollection.h
RocksDBCheckpointUtils.actor.cpp
RocksDBCheckpointUtils.actor.h
RoleLineage.actor.cpp
RoleLineage.actor.h
ServerCheckpoint.actor.cpp
ServerCheckpoint.actor.h
ServerDBInfo.actor.h
ServerDBInfo.h
SigStack.cpp
SimKmsConnector.actor.h
SimKmsConnector.actor.cpp
SimpleConfigConsumer.actor.cpp
SimpleConfigConsumer.h
SimulatedCluster.actor.cpp
SimulatedCluster.h
SkipList.cpp
SpanContextMessage.h
Status.actor.cpp
Status.h
StorageCache.actor.cpp
StorageMetrics.actor.h
StorageMetrics.h
storageserver.actor.cpp
TagPartitionedLogSystem.actor.cpp
TagPartitionedLogSystem.actor.h
TagThrottler.actor.cpp
TagThrottler.h
TCInfo.actor.cpp
TCInfo.h
template_fdb.h
tester.actor.cpp
TesterInterface.actor.h
TLogInterface.h
TLogServer.actor.cpp
TransactionTagCounter.cpp
TransactionTagCounter.h
TSSMappingUtil.actor.cpp
TSSMappingUtil.actor.h
VersionedBTree.actor.cpp
VFSAsync.cpp
VFSAsync.h
WaitFailure.actor.cpp
WaitFailure.h
worker.actor.cpp
WorkerInterface.actor.h
workloads/ApiCorrectness.actor.cpp
workloads/ApiWorkload.actor.cpp
workloads/ApiWorkload.h
workloads/AsyncFile.actor.h
workloads/AsyncFile.cpp
workloads/AsyncFileCorrectness.actor.cpp
workloads/AsyncFileRead.actor.cpp
workloads/AsyncFileWrite.actor.cpp
workloads/AtomicOps.actor.cpp
workloads/AtomicOpsApiCorrectness.actor.cpp
workloads/AtomicRestore.actor.cpp
workloads/AtomicSwitchover.actor.cpp
workloads/BackgroundSelectors.actor.cpp
workloads/BackupAndParallelRestoreCorrectness.actor.cpp
workloads/BackupCorrectness.actor.cpp
workloads/BackupToBlob.actor.cpp
workloads/BackupToDBAbort.actor.cpp
workloads/BackupToDBCorrectness.actor.cpp
workloads/BackupToDBUpgrade.actor.cpp
workloads/BlobGranuleCorrectnessWorkload.actor.cpp
workloads/BlobGranuleVerifier.actor.cpp
workloads/BlobStoreWorkload.h
workloads/BulkLoad.actor.cpp
workloads/BulkSetup.actor.h
workloads/Cache.actor.cpp
workloads/ChangeConfig.actor.cpp
workloads/ChangeFeeds.actor.cpp
workloads/ClearSingleRange.actor.cpp
workloads/ClientTransactionProfileCorrectness.actor.cpp
workloads/ClientWorkload.actor.cpp
workloads/ClogSingleConnection.actor.cpp
workloads/CommitBugCheck.actor.cpp
workloads/ConfigIncrement.actor.cpp
workloads/ConfigureDatabase.actor.cpp
workloads/ConflictRange.actor.cpp
workloads/ConsistencyCheck.actor.cpp
workloads/CpuProfiler.actor.cpp
workloads/Cycle.actor.cpp
workloads/DataDistributionMetrics.actor.cpp
workloads/DataLossRecovery.actor.cpp
workloads/DDBalance.actor.cpp
workloads/DDMetrics.actor.cpp
workloads/DDMetricsExclude.actor.cpp
workloads/DifferentClustersSameRV.actor.cpp
workloads/DiskDurability.actor.cpp
workloads/DiskDurabilityTest.actor.cpp
workloads/DiskFailureInjection.actor.cpp
workloads/DummyWorkload.actor.cpp
workloads/EncryptionOps.actor.cpp
workloads/EncryptKeyProxyTest.actor.cpp
workloads/ExternalWorkload.actor.cpp
workloads/FastTriggeredWatches.actor.cpp
workloads/FileSystem.actor.cpp
workloads/Fuzz.cpp
workloads/FuzzApiCorrectness.actor.cpp
workloads/GetMappedRange.actor.cpp
workloads/GetRangeStream.actor.cpp
workloads/HealthMetricsApi.actor.cpp
workloads/HighContentionPrefixAllocatorWorkload.actor.cpp
workloads/Increment.actor.cpp
workloads/IncrementalBackup.actor.cpp
workloads/IndexScan.actor.cpp
workloads/Inventory.actor.cpp
workloads/KillRegion.actor.cpp
workloads/KVStoreTest.actor.cpp
workloads/LocalRatekeeper.actor.cpp
workloads/LockDatabase.actor.cpp
workloads/LockDatabaseFrequently.actor.cpp
workloads/LogMetrics.actor.cpp
workloads/LowLatency.actor.cpp
workloads/MachineAttrition.actor.cpp
workloads/Mako.actor.cpp
workloads/MemoryKeyValueStore.cpp
workloads/MemoryKeyValueStore.h
workloads/MemoryLifetime.actor.cpp
workloads/MetricLogging.actor.cpp
workloads/MiniCycle.actor.cpp
workloads/MutationLogReaderCorrectness.actor.cpp
workloads/ParallelRestore.actor.cpp
workloads/Performance.actor.cpp
workloads/PhysicalShardMove.actor.cpp
workloads/Ping.actor.cpp
workloads/PopulateTPCC.actor.cpp
workloads/PrivateEndpoints.actor.cpp
workloads/ProtocolVersion.actor.cpp
workloads/PubSubMultiples.actor.cpp
workloads/QueuePush.actor.cpp
workloads/RandomClogging.actor.cpp
workloads/RandomMoveKeys.actor.cpp
workloads/RandomSelector.actor.cpp
workloads/ReadAfterWrite.actor.cpp
workloads/ReadHotDetection.actor.cpp
workloads/ReadWrite.actor.cpp
workloads/RemoveServersSafely.actor.cpp
workloads/ReportConflictingKeys.actor.cpp
workloads/RestoreBackup.actor.cpp
workloads/RestoreFromBlob.actor.cpp
workloads/Rollback.actor.cpp
workloads/RyowCorrectness.actor.cpp
workloads/RYWDisable.actor.cpp
workloads/RYWPerformance.actor.cpp
workloads/SaveAndKill.actor.cpp
workloads/SelectorCorrectness.actor.cpp
workloads/Serializability.actor.cpp
workloads/Sideband.actor.cpp
workloads/SidebandSingle.actor.cpp
workloads/SimpleAtomicAdd.actor.cpp
workloads/SkewedReadWrite.actor.cpp
workloads/SlowTaskWorkload.actor.cpp
workloads/SnapTest.actor.cpp
workloads/SpecialKeySpaceCorrectness.actor.cpp
workloads/StatusWorkload.actor.cpp
workloads/Storefront.actor.cpp
workloads/StreamingRangeRead.actor.cpp
workloads/StreamingRead.actor.cpp
workloads/SubmitBackup.actor.cpp
workloads/SuspendProcesses.actor.cpp
workloads/TagThrottleApi.actor.cpp
workloads/TargetedKill.actor.cpp
workloads/TaskBucketCorrectness.actor.cpp
workloads/TenantManagement.actor.cpp
workloads/ThreadSafety.actor.cpp
workloads/Throttling.actor.cpp
workloads/Throughput.actor.cpp
workloads/TimeKeeperCorrectness.actor.cpp
workloads/TPCC.actor.cpp
workloads/TPCCWorkload.h
workloads/TriggerRecovery.actor.cpp
workloads/UDPWorkload.actor.cpp
workloads/UnitPerf.actor.cpp
workloads/UnitTests.actor.cpp
workloads/Unreadable.actor.cpp
workloads/VersionStamp.actor.cpp
workloads/WatchAndWait.actor.cpp
workloads/Watches.actor.cpp
workloads/WatchesSameKeyCorrectness.actor.cpp
workloads/WorkerErrors.actor.cpp
workloads/workloads.actor.h
workloads/WriteBandwidth.actor.cpp
workloads/WriteDuringRead.actor.cpp
workloads/WriteTagThrottling.actor.cpp
)
ApplyMetadataMutation.cpp
ApplyMetadataMutation.h
BackupInterface.h
BackupProgress.actor.cpp
BackupProgress.actor.h
BackupWorker.actor.cpp
BlobGranuleServerCommon.actor.cpp
BlobGranuleServerCommon.actor.h
BlobGranuleValidation.actor.cpp
BlobGranuleValidation.actor.h
BlobManager.actor.cpp
BlobManagerInterface.h
BlobWorker.actor.cpp
ClusterController.actor.cpp
ClusterController.actor.h
ClusterRecovery.actor.cpp
ClusterRecovery.actor.h
CommitProxyServer.actor.cpp
ConfigBroadcaster.actor.cpp
ConfigBroadcaster.h
ConfigDatabaseUnitTests.actor.cpp
ConfigFollowerInterface.cpp
ConfigFollowerInterface.h
ConfigNode.actor.cpp
ConfigNode.h
ConflictSet.h
CoordinatedState.actor.cpp
CoordinatedState.h
Coordination.actor.cpp
CoordinationInterface.h
CoroFlow.h
DataDistribution.actor.cpp
DataDistribution.actor.h
DataDistributionQueue.actor.cpp
DataDistributionTracker.actor.cpp
DataDistributorInterface.h
DBCoreState.h
DDTeamCollection.actor.cpp
DDTeamCollection.h
DiskQueue.actor.cpp
EncryptKeyProxy.actor.cpp
EncryptKeyProxyInterface.h
FDBExecHelper.actor.cpp
FDBExecHelper.actor.h
fdbserver.actor.cpp
GrvProxyServer.actor.cpp
IConfigConsumer.cpp
IConfigConsumer.h
IDiskQueue.h
IKeyValueContainer.h
IKeyValueStore.h
IPager.h
KeyValueStoreCompressTestData.actor.cpp
KeyValueStoreMemory.actor.cpp
KeyValueStoreRocksDB.actor.cpp
KeyValueStoreSQLite.actor.cpp
KmsConnector.h
KmsConnectorInterface.h
KnobProtectiveGroups.cpp
KnobProtectiveGroups.h
Knobs.h
LatencyBandConfig.cpp
LatencyBandConfig.h
LeaderElection.actor.cpp
LeaderElection.h
LocalConfiguration.actor.cpp
LocalConfiguration.h
LogProtocolMessage.h
LogRouter.actor.cpp
LogSystem.cpp
LogSystem.h
LogSystemConfig.cpp
LogSystemConfig.h
LogSystemDiskQueueAdapter.actor.cpp
LogSystemDiskQueueAdapter.h
LogSystemPeekCursor.actor.cpp
MasterInterface.h
masterserver.actor.cpp
MetricLogger.actor.cpp
MetricLogger.actor.h
MoveKeys.actor.cpp
MoveKeys.actor.h
MutationTracking.cpp
MutationTracking.h
networktest.actor.cpp
NetworkTest.h
OldTLogServer_4_6.actor.cpp
OldTLogServer_6_0.actor.cpp
OldTLogServer_6_2.actor.cpp
OTELSpanContextMessage.h
OnDemandStore.actor.cpp
OnDemandStore.h
PaxosConfigConsumer.actor.cpp
PaxosConfigConsumer.h
ProxyCommitData.actor.h
pubsub.actor.cpp
pubsub.h
QuietDatabase.actor.cpp
QuietDatabase.h
RadixTree.h
Ratekeeper.actor.cpp
Ratekeeper.h
RatekeeperInterface.h
RecoveryState.h
RemoteIKeyValueStore.actor.h
RemoteIKeyValueStore.actor.cpp
RESTKmsConnector.h
RESTKmsConnector.actor.cpp
ResolutionBalancer.actor.cpp
ResolutionBalancer.actor.h
Resolver.actor.cpp
ResolverInterface.h
RestoreApplier.actor.cpp
RestoreApplier.actor.h
RestoreCommon.actor.cpp
RestoreCommon.actor.h
RestoreController.actor.cpp
RestoreController.actor.h
RestoreLoader.actor.cpp
RestoreLoader.actor.h
RestoreRoleCommon.actor.cpp
RestoreRoleCommon.actor.h
RestoreUtil.actor.cpp
RestoreUtil.h
RestoreWorker.actor.cpp
RestoreWorker.actor.h
RestoreWorkerInterface.actor.cpp
RestoreWorkerInterface.actor.h
RkTagThrottleCollection.cpp
RkTagThrottleCollection.h
RocksDBCheckpointUtils.actor.cpp
RocksDBCheckpointUtils.actor.h
RoleLineage.actor.cpp
RoleLineage.actor.h
ServerCheckpoint.actor.cpp
ServerCheckpoint.actor.h
ServerDBInfo.actor.h
ServerDBInfo.h
SigStack.cpp
SimKmsConnector.h
SimKmsConnector.actor.cpp
SimpleConfigConsumer.actor.cpp
SimpleConfigConsumer.h
SimulatedCluster.actor.cpp
SimulatedCluster.h
SkipList.cpp
SpanContextMessage.h
Status.actor.cpp
Status.h
StorageCache.actor.cpp
StorageMetrics.actor.cpp
StorageMetrics.h
storageserver.actor.cpp
TagPartitionedLogSystem.actor.cpp
TagPartitionedLogSystem.actor.h
TagThrottler.actor.cpp
TagThrottler.h
TCInfo.actor.cpp
TCInfo.h
template_fdb.h
tester.actor.cpp
TesterInterface.actor.h
TLogInterface.h
TLogServer.actor.cpp
TransactionTagCounter.cpp
TransactionTagCounter.h
TSSMappingUtil.actor.cpp
TSSMappingUtil.actor.h
VersionedBTree.actor.cpp
VFSAsync.cpp
VFSAsync.h
WaitFailure.actor.cpp
WaitFailure.h
worker.actor.cpp
WorkerInterface.actor.h
workloads/ApiCorrectness.actor.cpp
workloads/ApiWorkload.actor.cpp
workloads/ApiWorkload.h
workloads/AsyncFile.actor.h
workloads/AsyncFile.cpp
workloads/AsyncFileCorrectness.actor.cpp
workloads/AsyncFileRead.actor.cpp
workloads/AsyncFileWrite.actor.cpp
workloads/AtomicOps.actor.cpp
workloads/AtomicOpsApiCorrectness.actor.cpp
workloads/AtomicRestore.actor.cpp
workloads/AtomicSwitchover.actor.cpp
workloads/BackgroundSelectors.actor.cpp
workloads/BackupAndParallelRestoreCorrectness.actor.cpp
workloads/BackupCorrectness.actor.cpp
workloads/BackupToBlob.actor.cpp
workloads/BackupToDBAbort.actor.cpp
workloads/BackupToDBCorrectness.actor.cpp
workloads/BackupToDBUpgrade.actor.cpp
workloads/BlobGranuleCorrectnessWorkload.actor.cpp
workloads/BlobGranuleVerifier.actor.cpp
workloads/BlobStoreWorkload.h
workloads/BulkLoad.actor.cpp
workloads/BulkSetup.actor.h
workloads/Cache.actor.cpp
workloads/ChangeConfig.actor.cpp
workloads/ChangeFeeds.actor.cpp
workloads/ClearSingleRange.actor.cpp
workloads/ClientTransactionProfileCorrectness.actor.cpp
workloads/ClientWorkload.actor.cpp
workloads/ClogSingleConnection.actor.cpp
workloads/CommitBugCheck.actor.cpp
workloads/ConfigIncrement.actor.cpp
workloads/ConfigureDatabase.actor.cpp
workloads/ConflictRange.actor.cpp
workloads/ConsistencyCheck.actor.cpp
workloads/CpuProfiler.actor.cpp
workloads/Cycle.actor.cpp
workloads/DataDistributionMetrics.actor.cpp
workloads/DataLossRecovery.actor.cpp
workloads/DDBalance.actor.cpp
workloads/DDMetrics.actor.cpp
workloads/DDMetricsExclude.actor.cpp
workloads/DifferentClustersSameRV.actor.cpp
workloads/DiskDurability.actor.cpp
workloads/DiskDurabilityTest.actor.cpp
workloads/DiskFailureInjection.actor.cpp
workloads/DummyWorkload.actor.cpp
workloads/EncryptionOps.actor.cpp
workloads/EncryptKeyProxyTest.actor.cpp
workloads/ExternalWorkload.actor.cpp
workloads/FastTriggeredWatches.actor.cpp
workloads/FileSystem.actor.cpp
workloads/Fuzz.cpp
workloads/FuzzApiCorrectness.actor.cpp
workloads/GetMappedRange.actor.cpp
workloads/GetRangeStream.actor.cpp
workloads/HealthMetricsApi.actor.cpp
workloads/HighContentionPrefixAllocatorWorkload.actor.cpp
workloads/Increment.actor.cpp
workloads/IncrementalBackup.actor.cpp
workloads/IndexScan.actor.cpp
workloads/Inventory.actor.cpp
workloads/KillRegion.actor.cpp
workloads/KVStoreTest.actor.cpp
workloads/LocalRatekeeper.actor.cpp
workloads/LockDatabase.actor.cpp
workloads/LockDatabaseFrequently.actor.cpp
workloads/LogMetrics.actor.cpp
workloads/LowLatency.actor.cpp
workloads/MachineAttrition.actor.cpp
workloads/Mako.actor.cpp
workloads/MemoryKeyValueStore.cpp
workloads/MemoryKeyValueStore.h
workloads/MemoryLifetime.actor.cpp
workloads/MetricLogging.actor.cpp
workloads/MiniCycle.actor.cpp
workloads/MutationLogReaderCorrectness.actor.cpp
workloads/ParallelRestore.actor.cpp
workloads/Performance.actor.cpp
workloads/PhysicalShardMove.actor.cpp
workloads/Ping.actor.cpp
workloads/PopulateTPCC.actor.cpp
workloads/PrivateEndpoints.actor.cpp
workloads/ProtocolVersion.actor.cpp
workloads/PubSubMultiples.actor.cpp
workloads/QueuePush.actor.cpp
workloads/RandomClogging.actor.cpp
workloads/RandomMoveKeys.actor.cpp
workloads/RandomSelector.actor.cpp
workloads/ReadAfterWrite.actor.cpp
workloads/ReadHotDetection.actor.cpp
workloads/ReadWrite.actor.cpp
workloads/RemoveServersSafely.actor.cpp
workloads/ReportConflictingKeys.actor.cpp
workloads/RestoreBackup.actor.cpp
workloads/RestoreFromBlob.actor.cpp
workloads/Rollback.actor.cpp
workloads/RyowCorrectness.actor.cpp
workloads/RYWDisable.actor.cpp
workloads/RYWPerformance.actor.cpp
workloads/SaveAndKill.actor.cpp
workloads/SelectorCorrectness.actor.cpp
workloads/Serializability.actor.cpp
workloads/Sideband.actor.cpp
workloads/SidebandSingle.actor.cpp
workloads/SimpleAtomicAdd.actor.cpp
workloads/SlowTaskWorkload.actor.cpp
workloads/SnapTest.actor.cpp
workloads/SpecialKeySpaceCorrectness.actor.cpp
workloads/StatusWorkload.actor.cpp
workloads/Storefront.actor.cpp
workloads/StreamingRangeRead.actor.cpp
workloads/StreamingRead.actor.cpp
workloads/SubmitBackup.actor.cpp
workloads/SuspendProcesses.actor.cpp
workloads/TagThrottleApi.actor.cpp
workloads/TargetedKill.actor.cpp
workloads/TaskBucketCorrectness.actor.cpp
workloads/TenantManagement.actor.cpp
workloads/ThreadSafety.actor.cpp
workloads/Throttling.actor.cpp
workloads/Throughput.actor.cpp
workloads/TimeKeeperCorrectness.actor.cpp
workloads/TPCC.actor.cpp
workloads/TPCCWorkload.h
workloads/TriggerRecovery.actor.cpp
workloads/UDPWorkload.actor.cpp
workloads/UnitPerf.actor.cpp
workloads/UnitTests.actor.cpp
workloads/Unreadable.actor.cpp
workloads/VersionStamp.actor.cpp
workloads/WatchAndWait.actor.cpp
workloads/Watches.actor.cpp
workloads/WatchesSameKeyCorrectness.actor.cpp
workloads/WorkerErrors.actor.cpp
workloads/workloads.actor.h
workloads/WriteBandwidth.actor.cpp
workloads/WriteDuringRead.actor.cpp
workloads/WriteTagThrottling.actor.cpp
)
if (${COROUTINE_IMPL} STREQUAL libcoro)
list(APPEND FDBSERVER_SRCS CoroFlowCoro.actor.cpp)
else ()
list(APPEND FDBSERVER_SRCS CoroFlow.actor.cpp)
endif ()
if(${COROUTINE_IMPL} STREQUAL libcoro)
list(APPEND FDBSERVER_SRCS CoroFlowCoro.actor.cpp)
else()
list(APPEND FDBSERVER_SRCS CoroFlow.actor.cpp)
endif()
add_library(fdb_sqlite STATIC
sqlite/btree.h
sqlite/hash.h
sqlite/sqlite3.h
sqlite/sqlite3ext.h
sqlite/sqliteInt.h
sqlite/sqliteLimit.h
sqlite/sqlite3.amalgamation.c)
sqlite/btree.h
sqlite/hash.h
sqlite/sqlite3.h
sqlite/sqlite3ext.h
sqlite/sqliteInt.h
sqlite/sqliteLimit.h
sqlite/sqlite3.amalgamation.c)
if (WITH_ROCKSDB_EXPERIMENTAL)
add_definitions(-DSSD_ROCKSDB_EXPERIMENTAL)
add_definitions(-DSSD_ROCKSDB_EXPERIMENTAL)
include(CompileRocksDB)
# CompileRocksDB sets `lz4_LIBRARIES` to be the shared lib, we want to link
# statically, so find the static library here.
find_library(lz4_STATIC_LIBRARIES
NAMES liblz4.a REQUIRED)
if (WITH_LIBURING)
find_package(uring)
endif ()
endif ()
include(CompileRocksDB)
# CompileRocksDB sets `lz4_LIBRARIES` to be the shared lib, we want to link
# statically, so find the static library here.
find_library(lz4_STATIC_LIBRARIES
NAMES liblz4.a REQUIRED)
if (WITH_LIBURING)
find_package(uring)
endif()
endif()
# Suppress warnings in sqlite since it's third party
if (NOT WIN32)
target_compile_definitions(fdb_sqlite PRIVATE $<$<CONFIG:Debug>:NDEBUG>)
target_compile_options(fdb_sqlite BEFORE PRIVATE -w) # disable warnings for third party
endif ()
if(NOT WIN32)
target_compile_definitions(fdb_sqlite PRIVATE $<$<CONFIG:Debug>:NDEBUG>)
target_compile_options(fdb_sqlite BEFORE PRIVATE -w) # disable warnings for third party
endif()
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/workloads)
add_flow_target(EXECUTABLE NAME fdbserver SRCS ${FDBSERVER_SRCS})
target_include_directories(fdbserver PRIVATE
${CMAKE_SOURCE_DIR}/bindings/c
${CMAKE_BINARY_DIR}/bindings/c
${CMAKE_CURRENT_BINARY_DIR}/workloads
${CMAKE_CURRENT_SOURCE_DIR}/workloads)
${CMAKE_SOURCE_DIR}/bindings/c
${CMAKE_BINARY_DIR}/bindings/c
${CMAKE_CURRENT_BINARY_DIR}/workloads
${CMAKE_CURRENT_SOURCE_DIR}/workloads)
if (WITH_ROCKSDB_EXPERIMENTAL)
add_dependencies(fdbserver rocksdb)
if (WITH_LIBURING)
target_include_directories(fdbserver PRIVATE ${ROCKSDB_INCLUDE_DIR} ${uring_INCLUDE_DIR})
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite ${ROCKSDB_LIBRARIES} ${uring_LIBRARIES} ${lz4_STATIC_LIBRARIES})
target_compile_definitions(fdbserver PRIVATE BOOST_ASIO_HAS_IO_URING=1 BOOST_ASIO_DISABLE_EPOLL=1)
else ()
target_include_directories(fdbserver PRIVATE ${ROCKSDB_INCLUDE_DIR})
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite ${ROCKSDB_LIBRARIES} ${lz4_STATIC_LIBRARIES})
target_compile_definitions(fdbserver PRIVATE)
endif ()
else ()
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite)
endif ()
add_dependencies(fdbserver rocksdb)
if(WITH_LIBURING)
target_include_directories(fdbserver PRIVATE ${ROCKSDB_INCLUDE_DIR} ${uring_INCLUDE_DIR})
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite ${ROCKSDB_LIBRARIES} ${uring_LIBRARIES} ${lz4_STATIC_LIBRARIES})
target_compile_definitions(fdbserver PRIVATE BOOST_ASIO_HAS_IO_URING=1 BOOST_ASIO_DISABLE_EPOLL=1)
else()
target_include_directories(fdbserver PRIVATE ${ROCKSDB_INCLUDE_DIR})
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite ${ROCKSDB_LIBRARIES} ${lz4_STATIC_LIBRARIES})
target_compile_definitions(fdbserver PRIVATE)
endif()
else()
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite)
endif()
target_link_libraries(fdbserver PRIVATE toml11_target jemalloc)
# target_compile_definitions(fdbserver PRIVATE -DENABLE_SAMPLING)
if (GPERFTOOLS_FOUND)
target_link_libraries(fdbserver PRIVATE gperftools)
endif ()
target_link_libraries(fdbserver PRIVATE gperftools)
endif()
if (NOT OPEN_FOR_IDE)
if (GENERATE_DEBUG_PACKAGES)
fdb_install(TARGETS fdbserver DESTINATION sbin COMPONENT server)
else ()
add_custom_target(prepare_fdbserver_install ALL DEPENDS strip_only_fdbserver)
fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbserver DESTINATION sbin COMPONENT server)
endif ()
endif ()
if(NOT OPEN_FOR_IDE)
if(GENERATE_DEBUG_PACKAGES)
fdb_install(TARGETS fdbserver DESTINATION sbin COMPONENT server)
else()
add_custom_target(prepare_fdbserver_install ALL DEPENDS strip_only_fdbserver)
fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbserver DESTINATION sbin COMPONENT server)
endif()
endif()

View File

@ -3170,4 +3170,4 @@ TEST_CASE("/fdbserver/clustercontroller/shouldTriggerFailoverDueToDegradedServer
return Void();
}
} // namespace
} // namespace

View File

@ -24,9 +24,9 @@
#include "fdbserver/KmsConnector.h"
#include "fdbserver/KmsConnectorInterface.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/RESTKmsConnector.actor.h"
#include "fdbserver/RESTKmsConnector.h"
#include "fdbserver/ServerDBInfo.actor.h"
#include "fdbserver/SimKmsConnector.actor.h"
#include "fdbserver/SimKmsConnector.h"
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/ServerDBInfo.h"
#include "flow/Arena.h"
@ -42,6 +42,7 @@
#include "flow/network.h"
#include <boost/mpl/not.hpp>
#include <string>
#include <utility>
#include <memory>
@ -162,10 +163,17 @@ ACTOR Future<Void> getCipherKeysByBaseCipherKeyIds(Reference<EncryptKeyProxyData
// for the rest, reachout to KMS to fetch the required details
std::vector<std::pair<EncryptCipherBaseKeyId, EncryptCipherDomainId>> lookupCipherIds;
state std::vector<EKPBaseCipherDetails> cachedCipherDetails;
state std::vector<EKPBaseCipherDetails> cachedCipherDetails;
state EKPGetBaseCipherKeysByIdsRequest keysByIds = req;
state EKPGetBaseCipherKeysByIdsReply keyIdsReply;
state Optional<TraceEvent> dbgTrace =
keysByIds.debugId.present() ? TraceEvent("GetByKeyIds", ekpProxyData->myId) : Optional<TraceEvent>();
if (dbgTrace.present()) {
dbgTrace.get().setMaxEventLength(SERVER_KNOBS->ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH);
dbgTrace.get().detail("DbgId", keysByIds.debugId.get());
}
// Dedup the requested pair<baseCipherId, encryptDomainId>
// TODO: endpoint serialization of std::unordered_set isn't working at the moment
@ -176,12 +184,28 @@ ACTOR Future<Void> getCipherKeysByBaseCipherKeyIds(Reference<EncryptKeyProxyData
dedupedCipherIds.emplace(item);
}
if (dbgTrace.present()) {
dbgTrace.get().detail("NKeys", dedupedCipherIds.size());
for (const auto& item : dedupedCipherIds) {
// Record {encryptDomainId, baseCipherId} queried
dbgTrace.get().detail(getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_QUERY_PREFIX, item.second, item.first), "");
}
}
for (const auto& item : dedupedCipherIds) {
const auto itr = ekpProxyData->baseCipherKeyIdCache.find(item.first);
if (itr != ekpProxyData->baseCipherKeyIdCache.end()) {
ASSERT(itr->second.isValid());
cachedCipherDetails.emplace_back(
itr->second.domainId, itr->second.baseCipherId, itr->second.baseCipherKey, keyIdsReply.arena);
if (dbgTrace.present()) {
// {encryptId, baseCipherId} forms a unique tuple across encryption domains
dbgTrace.get().detail(getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_CACHED_PREFIX,
itr->second.domainId,
itr->second.baseCipherId),
"");
}
} else {
lookupCipherIds.emplace_back(std::make_pair(item.first, item.second));
}
@ -192,7 +216,7 @@ ACTOR Future<Void> getCipherKeysByBaseCipherKeyIds(Reference<EncryptKeyProxyData
if (!lookupCipherIds.empty()) {
try {
KmsConnLookupEKsByKeyIdsReq keysByIdsReq(lookupCipherIds);
KmsConnLookupEKsByKeyIdsReq keysByIdsReq(lookupCipherIds, keysByIds.debugId);
KmsConnLookupEKsByKeyIdsRep keysByIdsRep = wait(kmsConnectorInf.ekLookupByIds.getReply(keysByIdsReq));
for (const auto& item : keysByIdsRep.cipherKeyDetails) {
@ -206,13 +230,20 @@ ACTOR Future<Void> getCipherKeysByBaseCipherKeyIds(Reference<EncryptKeyProxyData
for (auto& item : keysByIdsRep.cipherKeyDetails) {
// DomainId isn't available here, the caller must know the encryption domainId
ekpProxyData->insertIntoBaseCipherIdCache(item.encryptDomainId, item.encryptKeyId, item.encryptKey);
if (dbgTrace.present()) {
// {encryptId, baseCipherId} forms a unique tuple across encryption domains
dbgTrace.get().detail(
getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_INSERT_PREFIX, item.encryptDomainId, item.encryptKeyId),
"");
}
}
} catch (Error& e) {
if (!canReplyWith(e)) {
TraceEvent("GetCipherKeysByIds", ekpProxyData->myId).error(e);
TraceEvent("GetCipherKeysByKeyIds", ekpProxyData->myId).error(e);
throw;
}
TraceEvent("GetCipherKeysByIds", ekpProxyData->myId).detail("ErrorCode", e.code());
TraceEvent("GetCipherKeysByKeyIds", ekpProxyData->myId).detail("ErrorCode", e.code());
ekpProxyData->sendErrorResponse(keysByIds.reply, e);
return Void();
}
@ -237,6 +268,13 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
state EKPGetLatestBaseCipherKeysRequest latestKeysReq = req;
state EKPGetLatestBaseCipherKeysReply latestCipherReply;
state Arena& arena = latestCipherReply.arena;
state Optional<TraceEvent> dbgTrace =
latestKeysReq.debugId.present() ? TraceEvent("GetByDomIds", ekpProxyData->myId) : Optional<TraceEvent>();
if (dbgTrace.present()) {
dbgTrace.get().setMaxEventLength(SERVER_KNOBS->ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH);
dbgTrace.get().detail("DbgId", latestKeysReq.debugId.get());
}
// Dedup the requested domainIds.
// TODO: endpoint serialization of std::unordered_set isn't working at the moment
@ -245,6 +283,14 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
dedupedDomainIds.emplace(id);
}
if (dbgTrace.present()) {
dbgTrace.get().detail("NKeys", dedupedDomainIds.size());
for (EncryptCipherDomainId id : dedupedDomainIds) {
// log encryptDomainIds queried
dbgTrace.get().detail(getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_QUERY_PREFIX, id), "");
}
}
// First, check if the requested information is already cached by the server.
// Ensure the cached information is within FLOW_KNOBS->ENCRYPT_CIPHER_KEY_CACHE_TTL time window.
@ -253,6 +299,12 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
const auto itr = ekpProxyData->baseCipherDomainIdCache.find(id);
if (itr != ekpProxyData->baseCipherDomainIdCache.end() && itr->second.isValid()) {
cachedCipherDetails.emplace_back(id, itr->second.baseCipherId, itr->second.baseCipherKey, arena);
if (dbgTrace.present()) {
// {encryptDomainId, baseCipherId} forms a unique tuple across encryption domains
dbgTrace.get().detail(
getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_CACHED_PREFIX, id, itr->second.baseCipherId), "");
}
} else {
lookupCipherDomains.emplace_back(id);
}
@ -263,7 +315,7 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
if (!lookupCipherDomains.empty()) {
try {
KmsConnLookupEKsByDomainIdsReq keysByDomainIdReq(lookupCipherDomains);
KmsConnLookupEKsByDomainIdsReq keysByDomainIdReq(lookupCipherDomains, latestKeysReq.debugId);
KmsConnLookupEKsByDomainIdsRep keysByDomainIdRep =
wait(kmsConnectorInf.ekLookupByDomainIds.getReply(keysByDomainIdReq));
@ -273,6 +325,13 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
// Record the fetched cipher details to the local cache for the future references
ekpProxyData->insertIntoBaseDomainIdCache(item.encryptDomainId, item.encryptKeyId, item.encryptKey);
if (dbgTrace.present()) {
// {encryptDomainId, baseCipherId} forms a unique tuple across encryption domains
dbgTrace.get().detail(
getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_INSERT_PREFIX, item.encryptDomainId, item.encryptKeyId),
"");
}
}
} catch (Error& e) {
if (!canReplyWith(e)) {
@ -298,13 +357,16 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
ACTOR Future<Void> refreshEncryptionKeysCore(Reference<EncryptKeyProxyData> ekpProxyData,
KmsConnectorInterface kmsConnectorInf) {
state UID debugId = deterministicRandom()->randomUniqueID();
ASSERT(g_network->isSimulated());
TraceEvent("RefreshEKs_Start", ekpProxyData->myId).detail("KmsConnInf", kmsConnectorInf.id());
state TraceEvent t("RefreshEKs_Start", ekpProxyData->myId);
t.setMaxEventLength(SERVER_KNOBS->ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH);
t.detail("KmsConnInf", kmsConnectorInf.id());
t.detail("DebugId", debugId);
try {
KmsConnLookupEKsByDomainIdsReq req;
req.debugId = debugId;
req.encryptDomainIds.reserve(ekpProxyData->baseCipherDomainIdCache.size());
for (auto& item : ekpProxyData->baseCipherDomainIdCache) {
@ -313,16 +375,20 @@ ACTOR Future<Void> refreshEncryptionKeysCore(Reference<EncryptKeyProxyData> ekpP
KmsConnLookupEKsByDomainIdsRep rep = wait(kmsConnectorInf.ekLookupByDomainIds.getReply(req));
for (auto& item : rep.cipherKeyDetails) {
ekpProxyData->insertIntoBaseDomainIdCache(item.encryptDomainId, item.encryptKeyId, item.encryptKey);
// {encryptDomainId, baseCipherId} forms a unique tuple across encryption domains
t.detail(getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_INSERT_PREFIX, item.encryptDomainId, item.encryptKeyId),
"");
}
ekpProxyData->baseCipherKeysRefreshed += rep.cipherKeyDetails.size();
TraceEvent("RefreshEKs_Done", ekpProxyData->myId).detail("KeyCount", rep.cipherKeyDetails.size());
t.detail("nKeys", rep.cipherKeyDetails.size());
} catch (Error& e) {
if (!canReplyWith(e)) {
TraceEvent("RefreshEncryptionKeys_Error").error(e);
TraceEvent("RefreshEKs_Error").error(e);
throw e;
}
TraceEvent("RefreshEncryptionKeys").detail("ErrorCode", e.code());
TraceEvent("RefreshEKs").detail("ErrorCode", e.code());
++ekpProxyData->numEncryptionKeyRefreshErrors;
}

View File

@ -125,6 +125,7 @@ struct EKPGetBaseCipherKeysByIdsRequest {
constexpr static FileIdentifier file_identifier = 4930263;
UID requesterID;
std::vector<std::pair<uint64_t, int64_t>> baseCipherIds;
Optional<UID> debugId;
ReplyPromise<EKPGetBaseCipherKeysByIdsReply> reply;
EKPGetBaseCipherKeysByIdsRequest() : requesterID(deterministicRandom()->randomUniqueID()) {}
@ -133,7 +134,7 @@ struct EKPGetBaseCipherKeysByIdsRequest {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, requesterID, baseCipherIds, reply);
serializer(ar, requesterID, baseCipherIds, debugId, reply);
}
};
@ -156,17 +157,16 @@ struct EKPGetLatestBaseCipherKeysReply {
struct EKPGetLatestBaseCipherKeysRequest {
constexpr static FileIdentifier file_identifier = 1910123;
UID requesterID;
std::vector<uint64_t> encryptDomainIds;
Optional<UID> debugId;
ReplyPromise<EKPGetLatestBaseCipherKeysReply> reply;
EKPGetLatestBaseCipherKeysRequest() : requesterID(deterministicRandom()->randomUniqueID()) {}
explicit EKPGetLatestBaseCipherKeysRequest(UID uid, const std::vector<uint64_t>& ids)
: requesterID(uid), encryptDomainIds(ids) {}
EKPGetLatestBaseCipherKeysRequest() {}
explicit EKPGetLatestBaseCipherKeysRequest(const std::vector<uint64_t>& ids) : encryptDomainIds(ids) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, requesterID, encryptDomainIds, reply);
serializer(ar, encryptDomainIds, debugId, reply);
}
};

View File

@ -268,7 +268,7 @@ static auto fork_child(const std::string& path, std::vector<char*>& paramList) {
static void setupTraceWithOutput(TraceEvent& event, size_t bytesRead, char* outputBuffer) {
// get some errors printed for spawned process
std::cout << "Output bytesRead: " << bytesRead << std::endl;
std::cout << "output buffer: " << std::string(outputBuffer) << std::endl;
std::cout << "output buffer: " << std::string_view(outputBuffer, bytesRead) << std::endl;
if (bytesRead == 0)
return;
ASSERT(bytesRead <= SERVER_KNOBS->MAX_FORKED_PROCESS_OUTPUT);

View File

@ -101,16 +101,18 @@ struct KmsConnLookupEKsByKeyIdsRep {
struct KmsConnLookupEKsByKeyIdsReq {
constexpr static FileIdentifier file_identifier = 6913396;
std::vector<std::pair<EncryptCipherBaseKeyId, EncryptCipherDomainId>> encryptKeyIds;
Optional<UID> debugId;
ReplyPromise<KmsConnLookupEKsByKeyIdsRep> reply;
KmsConnLookupEKsByKeyIdsReq() {}
explicit KmsConnLookupEKsByKeyIdsReq(
const std::vector<std::pair<EncryptCipherBaseKeyId, EncryptCipherDomainId>>& keyIds)
: encryptKeyIds(keyIds) {}
const std::vector<std::pair<EncryptCipherBaseKeyId, EncryptCipherDomainId>>& keyIds,
Optional<UID> dbgId)
: encryptKeyIds(keyIds), debugId(dbgId) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, encryptKeyIds, reply);
serializer(ar, encryptKeyIds, debugId, reply);
}
};
@ -130,14 +132,16 @@ struct KmsConnLookupEKsByDomainIdsRep {
struct KmsConnLookupEKsByDomainIdsReq {
constexpr static FileIdentifier file_identifier = 9918682;
std::vector<EncryptCipherDomainId> encryptDomainIds;
Optional<UID> debugId;
ReplyPromise<KmsConnLookupEKsByDomainIdsRep> reply;
KmsConnLookupEKsByDomainIdsReq() {}
explicit KmsConnLookupEKsByDomainIdsReq(const std::vector<EncryptCipherDomainId>& ids) : encryptDomainIds(ids) {}
explicit KmsConnLookupEKsByDomainIdsReq(const std::vector<EncryptCipherDomainId>& ids, Optional<UID> dbgId)
: encryptDomainIds(ids), debugId(dbgId) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, encryptDomainIds, reply);
serializer(ar, encryptDomainIds, debugId, reply);
}
};

View File

@ -20,6 +20,7 @@
#include <cinttypes>
#include <vector>
#include <type_traits>
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/SystemData.h"
@ -669,6 +670,60 @@ ACTOR Future<Void> reconfigureAfter(Database cx,
return Void();
}
struct QuietDatabaseChecker {
double start = now();
constexpr static double maxDDRunTime = 1000.0;
struct Impl {
double start;
std::string const& phase;
std::vector<std::string> failReasons;
Impl(double start, const std::string& phase) : start(start), phase(phase) {}
template <class T, class Comparison = std::less_equal<>>
Impl& add(BaseTraceEvent& evt,
const char* name,
T value,
T expected,
Comparison const& cmp = std::less_equal<>()) {
std::string k = fmt::format("{}Gate", name);
evt.detail(name, value).detail(k.c_str(), expected);
if (!cmp(value, expected)) {
failReasons.push_back(name);
}
return *this;
}
bool success() {
bool timedOut = now() - start > maxDDRunTime;
if (!failReasons.empty()) {
std::string traceMessage = fmt::format("QuietDatabase{}Fail", phase);
std::string reasons = fmt::format("{}", fmt::join(failReasons, ", "));
TraceEvent(timedOut ? SevError : SevWarnAlways, traceMessage.c_str())
.detail("Reasons", reasons)
.detail("FailedAfter", now() - start)
.detail("Timeout", maxDDRunTime);
if (timedOut) {
// this bool is just created to make the assertion more readable
bool ddGotStuck = true;
// This assertion is here to make the test fail more quickly. If quietDatabase takes this
// long without completing, we can assume that the test will eventually time out. However,
// time outs are more annoying to debug. This will hopefully be easier to track down.
ASSERT(!ddGotStuck || !g_network->isSimulated());
}
return false;
}
return true;
}
};
Impl startIteration(std::string const& phase) const {
Impl res(start, phase);
return res;
}
};
// Waits until a database quiets down (no data in flight, small tlog queue, low SQ, no active data distribution). This
// requires the database to be available and healthy in order to succeed.
ACTOR Future<Void> waitForQuietDatabase(Database cx,
@ -680,6 +735,7 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
int64_t maxDataDistributionQueueSize = 0,
int64_t maxPoppedVersionLag = 30e6,
int64_t maxVersionOffset = 1e6) {
state QuietDatabaseChecker checker;
state Future<Void> reconfig =
reconfigureAfter(cx, 100 + (deterministicRandom()->random01() * 100), dbInfo, "QuietDatabase");
state Future<int64_t> dataInFlight;
@ -732,35 +788,26 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
success(teamCollectionValid) && success(storageQueueSize) && success(dataDistributionActive) &&
success(storageServersRecruiting) && success(versionOffset));
TraceEvent(("QuietDatabase" + phase).c_str())
.detail("DataInFlight", dataInFlight.get())
.detail("DataInFlightGate", dataInFlightGate)
.detail("MaxTLogQueueSize", tLogQueueInfo.get().first)
.detail("MaxTLogQueueGate", maxTLogQueueGate)
.detail("MaxTLogPoppedVersionLag", tLogQueueInfo.get().second)
.detail("MaxTLogPoppedVersionLagGate", maxPoppedVersionLag)
.detail("DataDistributionQueueSize", dataDistributionQueueSize.get())
.detail("DataDistributionQueueSizeGate", maxDataDistributionQueueSize)
.detail("TeamCollectionValid", teamCollectionValid.get())
.detail("MaxStorageQueueSize", storageQueueSize.get())
.detail("MaxStorageServerQueueGate", maxStorageServerQueueGate)
.detail("DataDistributionActive", dataDistributionActive.get())
.detail("StorageServersRecruiting", storageServersRecruiting.get())
.detail("RecoveryCount", dbInfo->get().recoveryCount)
.detail("VersionOffset", versionOffset.get())
.detail("NumSuccesses", numSuccesses);
maxVersionOffset += dbInfo->get().recoveryCount * SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT;
if (dataInFlight.get() > dataInFlightGate || tLogQueueInfo.get().first > maxTLogQueueGate ||
tLogQueueInfo.get().second > maxPoppedVersionLag ||
dataDistributionQueueSize.get() > maxDataDistributionQueueSize ||
storageQueueSize.get() > maxStorageServerQueueGate || !dataDistributionActive.get() ||
storageServersRecruiting.get() || versionOffset.get() > maxVersionOffset ||
!teamCollectionValid.get()) {
wait(delay(1.0));
numSuccesses = 0;
} else {
auto check = checker.startIteration(phase);
std::string evtType = "QuietDatabase" + phase;
TraceEvent evt(evtType.c_str());
check.add(evt, "DataInFlight", dataInFlight.get(), dataInFlightGate)
.add(evt, "MaxTLogQueueSize", tLogQueueInfo.get().first, maxTLogQueueGate)
.add(evt, "MaxTLogPoppedVersionLag", tLogQueueInfo.get().second, maxPoppedVersionLag)
.add(evt, "DataDistributionQueueSize", dataDistributionQueueSize.get(), maxDataDistributionQueueSize)
.add(evt, "TeamCollectionValid", teamCollectionValid.get(), true, std::equal_to<>())
.add(evt, "MaxStorageQueueSize", storageQueueSize.get(), maxStorageServerQueueGate)
.add(evt, "DataDistributionActive", dataDistributionActive.get(), true, std::equal_to<>())
.add(evt, "StorageServersRecruiting", storageServersRecruiting.get(), false, std::equal_to<>())
.add(evt, "VersionOffset", versionOffset.get(), maxVersionOffset);
evt.detail("RecoveryCount", dbInfo->get().recoveryCount).detail("NumSuccesses", numSuccesses);
evt.log();
if (check.success()) {
if (++numSuccesses == 3) {
auto msg = "QuietDatabase" + phase + "Done";
TraceEvent(msg.c_str()).log();
@ -768,6 +815,9 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
} else {
wait(delay(g_network->isSimulated() ? 2.0 : 30.0));
}
} else {
wait(delay(1.0));
numSuccesses = 0;
}
} catch (Error& e) {
TraceEvent(("QuietDatabase" + phase + "Error").c_str()).errorUnsuppressed(e);

View File

@ -18,7 +18,7 @@
* limitations under the License.
*/
#include "fdbserver/RESTKmsConnector.actor.h"
#include "fdbserver/RESTKmsConnector.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/rapidjson/document.h"
@ -61,6 +61,7 @@ const char* REFRESH_KMS_URLS_TAG = "refresh_kms_urls";
const char* VALIDATION_TOKENS_TAG = "validation_tokens";
const char* VALIDATION_TOKEN_NAME_TAG = "token_name";
const char* VALIDATION_TOKEN_VALUE_TAG = "token_value";
const char* DEBUG_UID_TAG = "debug_uid";
const char* TOKEN_NAME_FILE_SEP = "#";
const char* TOKEN_TUPLE_SEP = ",";
@ -280,9 +281,9 @@ void parseKmsResponse(Reference<RESTKmsConnectorCtx> ctx,
// "kms_urls" : [
// "url1", "url2", ...
// ],
// "error" : {
// "error" : { // Optional, populated by the KMS, if present, rest of payload is ignored.
// "details": <details>
// } // Optional, populated by the KMS, if present, rest of payload is ignored.
// }
// }
if (resp->code != HTTP::HTTP_STATUS_CODE_OK) {
@ -397,6 +398,20 @@ void addRefreshKmsUrlsSectionToJsonDoc(Reference<RESTKmsConnectorCtx> ctx,
doc.AddMember(key, refreshUrls, doc.GetAllocator());
}
void addDebugUidSectionToJsonDoc(Reference<RESTKmsConnectorCtx> ctx, rapidjson::Document& doc, Optional<UID> dbgId) {
if (!dbgId.present()) {
// Debug id not present; do nothing
return;
}
rapidjson::Value key(DEBUG_UID_TAG, doc.GetAllocator());
rapidjson::Value debugIdVal;
const std::string dbgIdStr = dbgId.get().toString();
debugIdVal.SetString(dbgIdStr.c_str(), dbgIdStr.size(), doc.GetAllocator());
// Append 'debug_uid' object to the parent document
doc.AddMember(key, debugIdVal, doc.GetAllocator());
}
StringRef getEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx,
const KmsConnLookupEKsByKeyIdsReq& req,
const bool refreshKmsUrls,
@ -424,6 +439,7 @@ StringRef getEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx,
// }
// ]
// "refresh_kms_urls" = 1/0
// "debug_uid" = <uid-string> // Optional debug info to trace requests across FDB <--> KMS
// }
rapidjson::Document doc;
@ -458,9 +474,12 @@ StringRef getEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx,
// Append 'validation_tokens' as json array
addValidationTokensSectionToJsonDoc(ctx, doc);
// Append "refresh_kms_urls'
// Append 'refresh_kms_urls'
addRefreshKmsUrlsSectionToJsonDoc(ctx, doc, refreshKmsUrls);
// Append 'debug_uid' section if needed
addDebugUidSectionToJsonDoc(ctx, doc, req.debugId);
// Serialize json to string
rapidjson::StringBuffer sb;
rapidjson::Writer<rapidjson::StringBuffer> writer(sb);
@ -574,6 +593,7 @@ StringRef getEncryptKeysByDomainIdsRequestBody(Reference<RESTKmsConnectorCtx> ct
// }
// ]
// "refresh_kms_urls" = 1/0
// "debug_uid" = <uid-string> // Optional debug info to trace requests across FDB <--> KMS
// }
rapidjson::Document doc;
@ -604,6 +624,9 @@ StringRef getEncryptKeysByDomainIdsRequestBody(Reference<RESTKmsConnectorCtx> ct
// Append 'refresh_kms_urls'
addRefreshKmsUrlsSectionToJsonDoc(ctx, doc, refreshKmsUrls);
// Append 'debug_uid' section if needed
addDebugUidSectionToJsonDoc(ctx, doc, req.debugId);
// Serialize json to string
rapidjson::StringBuffer sb;
rapidjson::Writer<rapidjson::StringBuffer> writer(sb);
@ -1007,13 +1030,16 @@ void testGetEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx, A
}
bool refreshKmsUrls = deterministicRandom()->randomInt(0, 100) < 50;
if (deterministicRandom()->randomInt(0, 100) < 40) {
req.debugId = deterministicRandom()->randomUniqueID();
}
StringRef requestBodyRef = getEncryptKeysByKeyIdsRequestBody(ctx, req, refreshKmsUrls, arena);
TraceEvent("FetchKeysByKeyIds", ctx->uid).setMaxFieldLength(10000).detail("JsonReqStr", requestBodyRef.toString());
TraceEvent("FetchKeysByKeyIds", ctx->uid).setMaxFieldLength(100000).detail("JsonReqStr", requestBodyRef.toString());
Reference<HTTP::Response> httpResp = makeReference<HTTP::Response>();
httpResp->code = HTTP::HTTP_STATUS_CODE_OK;
getFakeKmsResponse(requestBodyRef, true, httpResp);
TraceEvent("FetchKeysByKeyIds", ctx->uid).setMaxFieldLength(10000).detail("HttpRespStr", httpResp->content);
TraceEvent("FetchKeysByKeyIds", ctx->uid).setMaxFieldLength(100000).detail("HttpRespStr", httpResp->content);
std::vector<EncryptCipherKeyDetails> cipherDetails;
parseKmsResponse(ctx, httpResp, &arena, &cipherDetails);

View File

@ -18,14 +18,10 @@
* limitations under the License.
*/
#ifndef REST_KMS_CONNECTOR_H
#define REST_KMS_CONNECTOR_H
#pragma once
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTKMSCONNECTOR_ACTOR_G_H)
#define FDBSERVER_RESTKMSCONNECTOR_ACTOR_G_H
#include "fdbserver/RESTKmsConnector.actor.g.h"
#elif !defined(FDBSERVER_RESTKMSCONNECTOR_ACTOR_H)
#define FDBSERVER_RESTKMSCONNECTOR_ACTOR_H
#include "fdbserver/KmsConnector.h"
class RESTKmsConnector : public KmsConnector {

View File

@ -27,7 +27,7 @@
#include "fdbserver/RestoreLoader.actor.h"
#include "fdbserver/RestoreRoleCommon.actor.h"
#include "fdbserver/MutationTracking.h"
#include "fdbserver/StorageMetrics.actor.h"
#include "fdbserver/StorageMetrics.h"
#include "flow/actorcompiler.h" // This must be the last #include.

View File

@ -18,7 +18,7 @@
* limitations under the License.
*/
#include "fdbserver/SimKmsConnector.actor.h"
#include "fdbserver/SimKmsConnector.h"
#include "fdbrpc/sim_validation.h"
#include "fdbserver/Knobs.h"
@ -29,6 +29,7 @@
#include "flow/FastRef.h"
#include "flow/IRandom.h"
#include "flow/ITrace.h"
#include "flow/Trace.h"
#include "flow/network.h"
#include "flow/UnitTest.h"
@ -79,6 +80,14 @@ ACTOR Future<Void> simKmsConnectorCore_impl(KmsConnectorInterface interf) {
when(KmsConnLookupEKsByKeyIdsReq req = waitNext(interf.ekLookupByIds.getFuture())) {
state KmsConnLookupEKsByKeyIdsReq keysByIdsReq = req;
state KmsConnLookupEKsByKeyIdsRep keysByIdsRep;
state Optional<TraceEvent> dbgKIdTrace = keysByIdsReq.debugId.present()
? TraceEvent("SimKmsGetByKeyIds", interf.id())
: Optional<TraceEvent>();
if (dbgKIdTrace.present()) {
dbgKIdTrace.get().setMaxEventLength(100000);
dbgKIdTrace.get().detail("DbgId", keysByIdsReq.debugId.get());
}
// Lookup corresponding EncryptKeyCtx for input keyId
for (const auto& item : req.encryptKeyIds) {
@ -89,6 +98,12 @@ ACTOR Future<Void> simKmsConnectorCore_impl(KmsConnectorInterface interf) {
itr->first,
StringRef(keysByIdsRep.arena, itr->second.get()->key),
keysByIdsRep.arena);
if (dbgKIdTrace.present()) {
// {encryptDomainId, baseCipherId} forms a unique tuple across encryption domains
dbgKIdTrace.get().detail(
getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_RESULT_PREFIX, item.second, itr->first), "");
}
} else {
success = false;
break;
@ -102,16 +117,29 @@ ACTOR Future<Void> simKmsConnectorCore_impl(KmsConnectorInterface interf) {
when(KmsConnLookupEKsByDomainIdsReq req = waitNext(interf.ekLookupByDomainIds.getFuture())) {
state KmsConnLookupEKsByDomainIdsReq keysByDomainIdReq = req;
state KmsConnLookupEKsByDomainIdsRep keysByDomainIdRep;
state Optional<TraceEvent> dbgDIdTrace = keysByDomainIdReq.debugId.present()
? TraceEvent("SimKmsGetsByDomIds", interf.id())
: Optional<TraceEvent>();
// Map encryptionDomainId to corresponding EncryptKeyCtx element using a modulo operation. This would
// mean multiple domains gets mapped to the same encryption key which is fine, the EncryptKeyStore
// guarantees that keyId -> plaintext encryptKey mapping is idempotent.
if (dbgDIdTrace.present()) {
dbgDIdTrace.get().detail("DbgId", keysByDomainIdReq.debugId.get());
}
// Map encryptionDomainId to corresponding EncryptKeyCtx element using a modulo operation. This
// would mean multiple domains gets mapped to the same encryption key which is fine, the
// EncryptKeyStore guarantees that keyId -> plaintext encryptKey mapping is idempotent.
for (EncryptCipherDomainId domainId : req.encryptDomainIds) {
EncryptCipherBaseKeyId keyId = 1 + abs(domainId) % SERVER_KNOBS->SIM_KMS_MAX_KEYS;
const auto& itr = ctx->simEncryptKeyStore.find(keyId);
if (itr != ctx->simEncryptKeyStore.end()) {
keysByDomainIdRep.cipherKeyDetails.emplace_back(
domainId, keyId, StringRef(itr->second.get()->key), keysByDomainIdRep.arena);
if (dbgDIdTrace.present()) {
// {encryptId, baseCipherId} forms a unique tuple across encryption domains
dbgDIdTrace.get().detail(
getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_RESULT_PREFIX, domainId, keyId), "");
}
} else {
success = false;
break;

View File

@ -18,19 +18,13 @@
* limitations under the License.
*/
#ifndef SIM_KMS_CONNECTOR_H
#define SIM_KMS_CONNECTOR_H
#pragma once
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_SIMKMSCONNECTOR_ACTOR_G_H)
#define FDBSERVER_SIMKMSCONNECTOR_ACTOR_G_H
#include "fdbserver/SimKmsConnector.actor.g.h"
#elif !defined(FDBSERVER_SIMKMSCONNECTOR_ACTOR_H)
#define FDBSERVER_SIMKMSCONNECTOR_ACTOR_H
#include "fdbserver/KmsConnector.h"
#include "flow/BlobCipher.h"
#include "flow/actorcompiler.h" // This must be the last #include.
class SimKmsConnector : public KmsConnector {
public:
SimKmsConnector() = default;

View File

@ -2362,6 +2362,7 @@ ACTOR void setupAndRun(std::string dataFolder,
allowList.addTrustedSubnet("abcd::/16"sv);
state bool allowDefaultTenant = testConfig.allowDefaultTenant;
state bool allowDisablingTenants = testConfig.allowDisablingTenants;
state bool allowCreatingTenants = true;
// The RocksDB storage engine does not support the restarting tests because you cannot consistently get a clean
// snapshot of the storage engine without a snapshotting file system.
@ -2372,6 +2373,7 @@ ACTOR void setupAndRun(std::string dataFolder,
// Disable the default tenant in restarting tests for now
// TODO: persist the chosen default tenant in the restartInfo.ini file for the second test
allowDefaultTenant = false;
allowCreatingTenants = false;
}
// TODO: Currently backup and restore related simulation tests are failing when run with rocksDB storage engine
@ -2425,9 +2427,11 @@ ACTOR void setupAndRun(std::string dataFolder,
TEST(true); // Simulation start
state Optional<TenantName> defaultTenant;
state Standalone<VectorRef<TenantNameRef>> tenantsToCreate;
state TenantMode tenantMode = TenantMode::DISABLED;
if (allowDefaultTenant && deterministicRandom()->random01() < 0.5) {
defaultTenant = "SimulatedDefaultTenant"_sr;
tenantsToCreate.push_back_deep(tenantsToCreate.arena(), defaultTenant.get());
if (deterministicRandom()->random01() < 0.9) {
tenantMode = TenantMode::REQUIRED;
} else {
@ -2437,9 +2441,18 @@ ACTOR void setupAndRun(std::string dataFolder,
tenantMode = TenantMode::OPTIONAL_TENANT;
}
if (allowCreatingTenants && tenantMode != TenantMode::DISABLED && deterministicRandom()->random01() < 0.5) {
int numTenants = deterministicRandom()->randomInt(1, 6);
for (int i = 0; i < numTenants; ++i) {
tenantsToCreate.push_back_deep(tenantsToCreate.arena(),
TenantNameRef(format("SimulatedExtraTenant%04d", i)));
}
}
TraceEvent("SimulatedClusterTenantMode")
.detail("UsingTenant", defaultTenant)
.detail("TenantRequired", tenantMode.toString());
.detail("TenantRequired", tenantMode.toString())
.detail("TotalTenants", tenantsToCreate.size());
try {
// systemActors.push_back( startSystemMonitor(dataFolder) );
@ -2481,7 +2494,8 @@ ACTOR void setupAndRun(std::string dataFolder,
startingConfiguration,
LocalityData(),
UnitTestParameters(),
defaultTenant),
defaultTenant,
tenantsToCreate),
isBuggifyEnabled(BuggifyType::General) ? 36000.0 : 5400.0));
} catch (Error& e) {
TraceEvent(SevError, "SetupAndRunError").error(e);

View File

@ -0,0 +1,224 @@
/*
* StorageMetrics.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "flow/UnitTest.h"
#include "fdbserver/StorageMetrics.h"
#include "flow/actorcompiler.h" // This must be the last #include.
TEST_CASE("/fdbserver/StorageMetricSample/simple") {
StorageMetricSample s(1000);
s.sample.insert(LiteralStringRef("Apple"), 1000);
s.sample.insert(LiteralStringRef("Banana"), 2000);
s.sample.insert(LiteralStringRef("Cat"), 1000);
s.sample.insert(LiteralStringRef("Cathode"), 1000);
s.sample.insert(LiteralStringRef("Dog"), 1000);
ASSERT(s.getEstimate(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D"))) == 5000);
ASSERT(s.getEstimate(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("E"))) == 6000);
ASSERT(s.getEstimate(KeyRangeRef(LiteralStringRef("B"), LiteralStringRef("C"))) == 2000);
// ASSERT(s.splitEstimate(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D")), 3500) ==
// LiteralStringRef("Cat"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/simple") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 2000 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 1 && t[0] == LiteralStringRef("Bah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/multipleReturnedPoints") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 600 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 3 && t[0] == LiteralStringRef("Absolute") && t[1] == LiteralStringRef("Apple") &&
t[2] == LiteralStringRef("Bah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/noneSplitable") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 10000 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 0);
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/chunkTooLarge") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 10 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 10 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 30 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 1000 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 0);
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/readHotDetect/simple") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.bytesReadSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Banana"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cat"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cathode"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Dog"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<ReadHotRangeWithMetrics> t =
ssm.getReadHotRanges(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 2.0, 200 * sampleUnit, 0);
ASSERT(t.size() == 1 && (*t.begin()).keys.begin == LiteralStringRef("Bah") &&
(*t.begin()).keys.end == LiteralStringRef("Bob"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/readHotDetect/moreThanOneRange") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.bytesReadSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Banana"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cat"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cathode"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Dog"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Final"), 2000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Dah"), 300 * sampleUnit);
std::vector<ReadHotRangeWithMetrics> t =
ssm.getReadHotRanges(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D")), 2.0, 200 * sampleUnit, 0);
ASSERT(t.size() == 2 && (*t.begin()).keys.begin == LiteralStringRef("Bah") &&
(*t.begin()).keys.end == LiteralStringRef("Bob"));
ASSERT(t.at(1).keys.begin == LiteralStringRef("Cat") && t.at(1).keys.end == LiteralStringRef("Dah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/readHotDetect/consecutiveRanges") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.bytesReadSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Banana"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Bucket"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cat"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cathode"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Dog"), 5000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Final"), 2000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Dah"), 300 * sampleUnit);
std::vector<ReadHotRangeWithMetrics> t =
ssm.getReadHotRanges(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D")), 2.0, 200 * sampleUnit, 0);
ASSERT(t.size() == 2 && (*t.begin()).keys.begin == LiteralStringRef("Bah") &&
(*t.begin()).keys.end == LiteralStringRef("But"));
ASSERT(t.at(1).keys.begin == LiteralStringRef("Cat") && t.at(1).keys.end == LiteralStringRef("Dah"));
return Void();
}

View File

@ -1,817 +0,0 @@
/*
* StorageMetrics.actor.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Included via StorageMetrics.h
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/simulator.h"
#include "flow/UnitTest.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbclient/KeyRangeMap.h"
#include "fdbserver/Knobs.h"
#include "flow/actorcompiler.h" // This must be the last #include.
const StringRef STORAGESERVER_HISTOGRAM_GROUP = LiteralStringRef("StorageServer");
const StringRef FETCH_KEYS_LATENCY_HISTOGRAM = LiteralStringRef("FetchKeysLatency");
const StringRef FETCH_KEYS_BYTES_HISTOGRAM = LiteralStringRef("FetchKeysSize");
const StringRef FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM = LiteralStringRef("FetchKeysBandwidth");
const StringRef TLOG_CURSOR_READS_LATENCY_HISTOGRAM = LiteralStringRef("TLogCursorReadsLatency");
const StringRef SS_VERSION_LOCK_LATENCY_HISTOGRAM = LiteralStringRef("SSVersionLockLatency");
const StringRef EAGER_READS_LATENCY_HISTOGRAM = LiteralStringRef("EagerReadsLatency");
const StringRef FETCH_KEYS_PTREE_UPDATES_LATENCY_HISTOGRAM = LiteralStringRef("FetchKeysPTreeUpdatesLatency");
const StringRef TLOG_MSGS_PTREE_UPDATES_LATENCY_HISTOGRAM = LiteralStringRef("TLogMsgsPTreeUpdatesLatency");
const StringRef STORAGE_UPDATES_DURABLE_LATENCY_HISTOGRAM = LiteralStringRef("StorageUpdatesDurableLatency");
const StringRef STORAGE_COMMIT_LATENCY_HISTOGRAM = LiteralStringRef("StorageCommitLatency");
const StringRef SS_DURABLE_VERSION_UPDATE_LATENCY_HISTOGRAM = LiteralStringRef("SSDurableVersionUpdateLatency");
struct StorageMetricSample {
IndexedSet<Key, int64_t> sample;
int64_t metricUnitsPerSample;
StorageMetricSample(int64_t metricUnitsPerSample) : metricUnitsPerSample(metricUnitsPerSample) {}
int64_t getEstimate(KeyRangeRef keys) const { return sample.sumRange(keys.begin, keys.end); }
KeyRef splitEstimate(KeyRangeRef range, int64_t offset, bool front = true) const {
auto fwd_split = sample.index(front ? sample.sumTo(sample.lower_bound(range.begin)) + offset
: sample.sumTo(sample.lower_bound(range.end)) - offset);
if (fwd_split == sample.end() || *fwd_split >= range.end)
return range.end;
if (!front && *fwd_split <= range.begin)
return range.begin;
auto bck_split = fwd_split;
// Butterfly search - start at midpoint then go in both directions.
while ((fwd_split != sample.end() && *fwd_split < range.end) ||
(bck_split != sample.begin() && *bck_split > range.begin)) {
if (bck_split != sample.begin() && *bck_split > range.begin) {
auto it = bck_split;
bck_split.decrementNonEnd();
KeyRef split = keyBetween(KeyRangeRef(
bck_split != sample.begin() ? std::max<KeyRef>(*bck_split, range.begin) : range.begin, *it));
if (!front || (getEstimate(KeyRangeRef(range.begin, split)) > 0 &&
split.size() <= CLIENT_KNOBS->SPLIT_KEY_SIZE_LIMIT))
return split;
}
if (fwd_split != sample.end() && *fwd_split < range.end) {
auto it = fwd_split;
++it;
KeyRef split = keyBetween(
KeyRangeRef(*fwd_split, it != sample.end() ? std::min<KeyRef>(*it, range.end) : range.end));
if (front || (getEstimate(KeyRangeRef(split, range.end)) > 0 &&
split.size() <= CLIENT_KNOBS->SPLIT_KEY_SIZE_LIMIT))
return split;
fwd_split = it;
}
}
// If we didn't return above, we didn't find anything.
TraceEvent(SevWarn, "CannotSplitLastSampleKey").detail("Range", range).detail("Offset", offset);
return front ? range.end : range.begin;
}
};
TEST_CASE("/fdbserver/StorageMetricSample/simple") {
StorageMetricSample s(1000);
s.sample.insert(LiteralStringRef("Apple"), 1000);
s.sample.insert(LiteralStringRef("Banana"), 2000);
s.sample.insert(LiteralStringRef("Cat"), 1000);
s.sample.insert(LiteralStringRef("Cathode"), 1000);
s.sample.insert(LiteralStringRef("Dog"), 1000);
ASSERT(s.getEstimate(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D"))) == 5000);
ASSERT(s.getEstimate(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("E"))) == 6000);
ASSERT(s.getEstimate(KeyRangeRef(LiteralStringRef("B"), LiteralStringRef("C"))) == 2000);
// ASSERT(s.splitEstimate(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D")), 3500) ==
// LiteralStringRef("Cat"));
return Void();
}
struct TransientStorageMetricSample : StorageMetricSample {
Deque<std::pair<double, std::pair<Key, int64_t>>> queue;
TransientStorageMetricSample(int64_t metricUnitsPerSample) : StorageMetricSample(metricUnitsPerSample) {}
// Returns the sampled metric value (possibly 0, possibly increased by the sampling factor)
int64_t addAndExpire(KeyRef key, int64_t metric, double expiration) {
int64_t x = add(key, metric);
if (x)
queue.emplace_back(expiration, std::make_pair(*sample.find(key), -x));
return x;
}
// FIXME: both versions of erase are broken, because they do not remove items in the queue with will subtract a
// metric from the value sometime in the future
int64_t erase(KeyRef key) {
auto it = sample.find(key);
if (it == sample.end())
return 0;
int64_t x = sample.getMetric(it);
sample.erase(it);
return x;
}
void erase(KeyRangeRef keys) { sample.erase(keys.begin, keys.end); }
void poll(KeyRangeMap<std::vector<PromiseStream<StorageMetrics>>>& waitMap, StorageMetrics m) {
double now = ::now();
while (queue.size() && queue.front().first <= now) {
KeyRef key = queue.front().second.first;
int64_t delta = queue.front().second.second;
ASSERT(delta != 0);
if (sample.addMetric(key, delta) == 0)
sample.erase(key);
StorageMetrics deltaM = m * delta;
auto v = waitMap[key];
for (int i = 0; i < v.size(); i++) {
TEST(true); // TransientStorageMetricSample poll update
v[i].send(deltaM);
}
queue.pop_front();
}
}
void poll() {
double now = ::now();
while (queue.size() && queue.front().first <= now) {
KeyRef key = queue.front().second.first;
int64_t delta = queue.front().second.second;
ASSERT(delta != 0);
if (sample.addMetric(key, delta) == 0)
sample.erase(key);
queue.pop_front();
}
}
private:
bool roll(KeyRef key, int64_t metric) const {
return deterministicRandom()->random01() <
(double)metric / metricUnitsPerSample; //< SOMEDAY: Better randomInt64?
}
int64_t add(KeyRef key, int64_t metric) {
if (!metric)
return 0;
int64_t mag = metric < 0 ? -metric : metric;
if (mag < metricUnitsPerSample) {
if (!roll(key, mag))
return 0;
metric = metric < 0 ? -metricUnitsPerSample : metricUnitsPerSample;
}
if (sample.addMetric(key, metric) == 0)
sample.erase(key);
return metric;
}
};
struct StorageServerMetrics {
KeyRangeMap<std::vector<PromiseStream<StorageMetrics>>> waitMetricsMap;
StorageMetricSample byteSample;
TransientStorageMetricSample iopsSample,
bandwidthSample; // FIXME: iops and bandwidth calculations are not effectively tested, since they aren't
// currently used by data distribution
TransientStorageMetricSample bytesReadSample;
StorageServerMetrics()
: byteSample(0), iopsSample(SERVER_KNOBS->IOPS_UNITS_PER_SAMPLE),
bandwidthSample(SERVER_KNOBS->BANDWIDTH_UNITS_PER_SAMPLE),
bytesReadSample(SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE) {}
// Get the current estimated metrics for the given keys
StorageMetrics getMetrics(KeyRangeRef const& keys) const {
StorageMetrics result;
result.bytes = byteSample.getEstimate(keys);
result.bytesPerKSecond =
bandwidthSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
result.iosPerKSecond =
iopsSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
result.bytesReadPerKSecond =
bytesReadSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
return result;
}
// Called when metrics should change (IO for a given key)
// Notifies waiting WaitMetricsRequests through waitMetricsMap, and updates metricsAverageQueue and metricsSampleMap
void notify(KeyRef key, StorageMetrics& metrics) {
ASSERT(metrics.bytes == 0); // ShardNotifyMetrics
if (g_network->isSimulated()) {
TEST(metrics.bytesPerKSecond != 0); // ShardNotifyMetrics bytes
TEST(metrics.iosPerKSecond != 0); // ShardNotifyMetrics ios
TEST(metrics.bytesReadPerKSecond != 0); // ShardNotifyMetrics bytesRead
}
double expire = now() + SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL;
StorageMetrics notifyMetrics;
if (metrics.bytesPerKSecond)
notifyMetrics.bytesPerKSecond = bandwidthSample.addAndExpire(key, metrics.bytesPerKSecond, expire) *
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (metrics.iosPerKSecond)
notifyMetrics.iosPerKSecond = iopsSample.addAndExpire(key, metrics.iosPerKSecond, expire) *
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (metrics.bytesReadPerKSecond)
notifyMetrics.bytesReadPerKSecond = bytesReadSample.addAndExpire(key, metrics.bytesReadPerKSecond, expire) *
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (!notifyMetrics.allZero()) {
auto& v = waitMetricsMap[key];
for (int i = 0; i < v.size(); i++) {
if (g_network->isSimulated()) {
TEST(true); // shard notify metrics
}
// ShardNotifyMetrics
v[i].send(notifyMetrics);
}
}
}
// Due to the fact that read sampling will be called on all reads, use this specialized function to avoid overhead
// around branch misses and unnecessary stack allocation which eventually addes up under heavy load.
void notifyBytesReadPerKSecond(KeyRef key, int64_t in) {
double expire = now() + SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL;
int64_t bytesReadPerKSecond =
bytesReadSample.addAndExpire(key, in, expire) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (bytesReadPerKSecond > 0) {
StorageMetrics notifyMetrics;
notifyMetrics.bytesReadPerKSecond = bytesReadPerKSecond;
auto& v = waitMetricsMap[key];
for (int i = 0; i < v.size(); i++) {
TEST(true); // ShardNotifyMetrics
v[i].send(notifyMetrics);
}
}
}
// Called by StorageServerDisk when the size of a key in byteSample changes, to notify WaitMetricsRequest
// Should not be called for keys past allKeys.end
void notifyBytes(RangeMap<Key, std::vector<PromiseStream<StorageMetrics>>, KeyRangeRef>::iterator shard,
int64_t bytes) {
ASSERT(shard.end() <= allKeys.end);
StorageMetrics notifyMetrics;
notifyMetrics.bytes = bytes;
for (int i = 0; i < shard.value().size(); i++) {
TEST(true); // notifyBytes
shard.value()[i].send(notifyMetrics);
}
}
// Called by StorageServerDisk when the size of a key in byteSample changes, to notify WaitMetricsRequest
void notifyBytes(KeyRef key, int64_t bytes) {
if (key >= allKeys.end) // Do not notify on changes to internal storage server state
return;
notifyBytes(waitMetricsMap.rangeContaining(key), bytes);
}
// Called when a range of keys becomes unassigned (and therefore not readable), to notify waiting
// WaitMetricsRequests (also other types of wait
// requests in the future?)
void notifyNotReadable(KeyRangeRef keys) {
auto rs = waitMetricsMap.intersectingRanges(keys);
for (auto r = rs.begin(); r != rs.end(); ++r) {
auto& v = r->value();
TEST(v.size()); // notifyNotReadable() sending errors to intersecting ranges
for (int n = 0; n < v.size(); n++)
v[n].sendError(wrong_shard_server());
}
}
// Called periodically (~1 sec intervals) to remove older IOs from the averages
// Removes old entries from metricsAverageQueue, updates metricsSampleMap accordingly, and notifies
// WaitMetricsRequests through waitMetricsMap.
void poll() {
{
StorageMetrics m;
m.bytesPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
bandwidthSample.poll(waitMetricsMap, m);
}
{
StorageMetrics m;
m.iosPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
iopsSample.poll(waitMetricsMap, m);
}
{
StorageMetrics m;
m.bytesReadPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
bytesReadSample.poll(waitMetricsMap, m);
}
// bytesSample doesn't need polling because we never call addExpire() on it
}
// static void waitMetrics( StorageServerMetrics* const& self, WaitMetricsRequest const& req );
// This function can run on untrusted user data. We must validate all divisions carefully.
KeyRef getSplitKey(int64_t remaining,
int64_t estimated,
int64_t limits,
int64_t used,
int64_t infinity,
bool isLastShard,
const StorageMetricSample& sample,
double divisor,
KeyRef const& lastKey,
KeyRef const& key,
bool hasUsed) const {
ASSERT(remaining >= 0);
ASSERT(limits > 0);
ASSERT(divisor > 0);
if (limits < infinity / 2) {
int64_t expectedSize;
if (isLastShard || remaining > estimated) {
double remaining_divisor = (double(remaining) / limits) + 0.5;
expectedSize = remaining / remaining_divisor;
} else {
// If we are here, then estimated >= remaining >= 0
double estimated_divisor = (double(estimated) / limits) + 0.5;
expectedSize = remaining / estimated_divisor;
}
if (remaining > expectedSize) {
// This does the conversion from native units to bytes using the divisor.
double offset = (expectedSize - used) / divisor;
if (offset <= 0)
return hasUsed ? lastKey : key;
return sample.splitEstimate(
KeyRangeRef(lastKey, key),
offset * ((1.0 - SERVER_KNOBS->SPLIT_JITTER_AMOUNT) +
2 * deterministicRandom()->random01() * SERVER_KNOBS->SPLIT_JITTER_AMOUNT));
}
}
return key;
}
void splitMetrics(SplitMetricsRequest req) const {
try {
SplitMetricsReply reply;
KeyRef lastKey = req.keys.begin;
StorageMetrics used = req.used;
StorageMetrics estimated = req.estimated;
StorageMetrics remaining = getMetrics(req.keys) + used;
//TraceEvent("SplitMetrics").detail("Begin", req.keys.begin).detail("End", req.keys.end).detail("Remaining", remaining.bytes).detail("Used", used.bytes);
while (true) {
if (remaining.bytes < 2 * SERVER_KNOBS->MIN_SHARD_BYTES)
break;
KeyRef key = req.keys.end;
bool hasUsed = used.bytes != 0 || used.bytesPerKSecond != 0 || used.iosPerKSecond != 0;
key = getSplitKey(remaining.bytes,
estimated.bytes,
req.limits.bytes,
used.bytes,
req.limits.infinity,
req.isLastShard,
byteSample,
1,
lastKey,
key,
hasUsed);
if (used.bytes < SERVER_KNOBS->MIN_SHARD_BYTES)
key = std::max(key,
byteSample.splitEstimate(KeyRangeRef(lastKey, req.keys.end),
SERVER_KNOBS->MIN_SHARD_BYTES - used.bytes));
key = getSplitKey(remaining.iosPerKSecond,
estimated.iosPerKSecond,
req.limits.iosPerKSecond,
used.iosPerKSecond,
req.limits.infinity,
req.isLastShard,
iopsSample,
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS,
lastKey,
key,
hasUsed);
key = getSplitKey(remaining.bytesPerKSecond,
estimated.bytesPerKSecond,
req.limits.bytesPerKSecond,
used.bytesPerKSecond,
req.limits.infinity,
req.isLastShard,
bandwidthSample,
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS,
lastKey,
key,
hasUsed);
ASSERT(key != lastKey || hasUsed);
if (key == req.keys.end)
break;
reply.splits.push_back_deep(reply.splits.arena(), key);
StorageMetrics diff = (getMetrics(KeyRangeRef(lastKey, key)) + used);
remaining -= diff;
estimated -= diff;
used = StorageMetrics();
lastKey = key;
}
reply.used = getMetrics(KeyRangeRef(lastKey, req.keys.end)) + used;
req.reply.send(reply);
} catch (Error& e) {
req.reply.sendError(e);
}
}
void getStorageMetrics(GetStorageMetricsRequest req,
StorageBytes sb,
double bytesInputRate,
int64_t versionLag,
double lastUpdate) const {
GetStorageMetricsReply rep;
// SOMEDAY: make bytes dynamic with hard disk space
rep.load = getMetrics(allKeys);
if (sb.free < 1e9) {
TraceEvent(SevWarn, "PhysicalDiskMetrics")
.suppressFor(60.0)
.detail("Free", sb.free)
.detail("Total", sb.total)
.detail("Available", sb.available)
.detail("Load", rep.load.bytes);
}
rep.available.bytes = sb.available;
rep.available.iosPerKSecond = 10e6;
rep.available.bytesPerKSecond = 100e9;
rep.available.bytesReadPerKSecond = 100e9;
rep.capacity.bytes = sb.total;
rep.capacity.iosPerKSecond = 10e6;
rep.capacity.bytesPerKSecond = 100e9;
rep.capacity.bytesReadPerKSecond = 100e9;
rep.bytesInputRate = bytesInputRate;
rep.versionLag = versionLag;
rep.lastUpdate = lastUpdate;
req.reply.send(rep);
}
Future<Void> waitMetrics(WaitMetricsRequest req, Future<Void> delay);
// Given a read hot shard, this function will divide the shard into chunks and find those chunks whose
// readBytes/sizeBytes exceeds the `readDensityRatio`. Please make sure to run unit tests
// `StorageMetricsSampleTests.txt` after change made.
std::vector<ReadHotRangeWithMetrics> getReadHotRanges(KeyRangeRef shard,
double readDensityRatio,
int64_t baseChunkSize,
int64_t minShardReadBandwidthPerKSeconds) const {
std::vector<ReadHotRangeWithMetrics> toReturn;
double shardSize = (double)byteSample.getEstimate(shard);
int64_t shardReadBandwidth = bytesReadSample.getEstimate(shard);
if (shardReadBandwidth * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS <=
minShardReadBandwidthPerKSeconds) {
return toReturn;
}
if (shardSize <= baseChunkSize) {
// Shard is small, use it as is
if (bytesReadSample.getEstimate(shard) > (readDensityRatio * shardSize)) {
toReturn.emplace_back(shard,
bytesReadSample.getEstimate(shard) / shardSize,
bytesReadSample.getEstimate(shard) /
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL);
}
return toReturn;
}
KeyRef beginKey = shard.begin;
auto endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + baseChunkSize);
while (endKey != byteSample.sample.end()) {
if (*endKey > shard.end) {
endKey = byteSample.sample.lower_bound(shard.end);
if (*endKey == beginKey) {
// No need to increment endKey since otherwise it would stuck here forever.
break;
}
}
if (*endKey == beginKey) {
++endKey;
continue;
}
if (bytesReadSample.getEstimate(KeyRangeRef(beginKey, *endKey)) >
(readDensityRatio * std::max(baseChunkSize, byteSample.getEstimate(KeyRangeRef(beginKey, *endKey))))) {
auto range = KeyRangeRef(beginKey, *endKey);
if (!toReturn.empty() && toReturn.back().keys.end == range.begin) {
// in case two consecutive chunks both are over the ratio, merge them.
range = KeyRangeRef(toReturn.back().keys.begin, *endKey);
toReturn.pop_back();
}
toReturn.emplace_back(
range,
(double)bytesReadSample.getEstimate(range) / std::max(baseChunkSize, byteSample.getEstimate(range)),
bytesReadSample.getEstimate(range) / SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL);
}
beginKey = *endKey;
endKey = byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) +
baseChunkSize);
}
return toReturn;
}
void getReadHotRanges(ReadHotSubRangeRequest req) const {
ReadHotSubRangeReply reply;
auto _ranges = getReadHotRanges(req.keys,
SERVER_KNOBS->SHARD_MAX_READ_DENSITY_RATIO,
SERVER_KNOBS->READ_HOT_SUB_RANGE_CHUNK_SIZE,
SERVER_KNOBS->SHARD_READ_HOT_BANDWITH_MIN_PER_KSECONDS);
reply.readHotRanges = VectorRef(_ranges.data(), _ranges.size());
req.reply.send(reply);
}
std::vector<KeyRef> getSplitPoints(KeyRangeRef range, int64_t chunkSize, Optional<Key> prefixToRemove) {
std::vector<KeyRef> toReturn;
KeyRef beginKey = range.begin;
IndexedSet<Key, int64_t>::iterator endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + chunkSize);
while (endKey != byteSample.sample.end()) {
if (*endKey > range.end) {
break;
}
if (*endKey == beginKey) {
++endKey;
continue;
}
KeyRef splitPoint = *endKey;
if (prefixToRemove.present()) {
splitPoint = splitPoint.removePrefix(prefixToRemove.get());
}
toReturn.push_back(splitPoint);
beginKey = *endKey;
endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + chunkSize);
}
return toReturn;
}
void getSplitPoints(SplitRangeRequest req, Optional<Key> prefix) {
SplitRangeReply reply;
KeyRangeRef range = req.keys;
if (prefix.present()) {
range = range.withPrefix(prefix.get(), req.arena);
}
std::vector<KeyRef> points = getSplitPoints(range, req.chunkSize, prefix);
reply.splitPoints.append_deep(reply.splitPoints.arena(), points.data(), points.size());
req.reply.send(reply);
}
private:
static void collapse(KeyRangeMap<int>& map, KeyRef const& key) {
auto range = map.rangeContaining(key);
if (range == map.ranges().begin() || range == map.ranges().end())
return;
int value = range->value();
auto prev = range;
--prev;
if (prev->value() != value)
return;
KeyRange keys = KeyRangeRef(prev->begin(), range->end());
map.insert(keys, value);
}
static void add(KeyRangeMap<int>& map, KeyRangeRef const& keys, int delta) {
auto rs = map.modify(keys);
for (auto r = rs.begin(); r != rs.end(); ++r)
r->value() += delta;
collapse(map, keys.begin);
collapse(map, keys.end);
}
};
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/simple") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 2000 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 1 && t[0] == LiteralStringRef("Bah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/multipleReturnedPoints") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 600 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 3 && t[0] == LiteralStringRef("Absolute") && t[1] == LiteralStringRef("Apple") &&
t[2] == LiteralStringRef("Bah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/noneSplitable") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 10000 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 0);
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/chunkTooLarge") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 10 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 10 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 30 * sampleUnit);
std::vector<KeyRef> t = ssm.getSplitPoints(
KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 1000 * sampleUnit, Optional<Key>());
ASSERT(t.size() == 0);
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/readHotDetect/simple") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.bytesReadSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Banana"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cat"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cathode"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Dog"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
std::vector<ReadHotRangeWithMetrics> t =
ssm.getReadHotRanges(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 2.0, 200 * sampleUnit, 0);
ASSERT(t.size() == 1 && (*t.begin()).keys.begin == LiteralStringRef("Bah") &&
(*t.begin()).keys.end == LiteralStringRef("Bob"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/readHotDetect/moreThanOneRange") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.bytesReadSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Banana"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cat"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cathode"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Dog"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Final"), 2000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Dah"), 300 * sampleUnit);
std::vector<ReadHotRangeWithMetrics> t =
ssm.getReadHotRanges(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D")), 2.0, 200 * sampleUnit, 0);
ASSERT(t.size() == 2 && (*t.begin()).keys.begin == LiteralStringRef("Bah") &&
(*t.begin()).keys.end == LiteralStringRef("Bob"));
ASSERT(t.at(1).keys.begin == LiteralStringRef("Cat") && t.at(1).keys.end == LiteralStringRef("Dah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/readHotDetect/consecutiveRanges") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.bytesReadSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Banana"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Bucket"), 2000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cat"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Cathode"), 1000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Dog"), 5000 * sampleUnit);
ssm.bytesReadSample.sample.insert(LiteralStringRef("Final"), 2000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Dah"), 300 * sampleUnit);
std::vector<ReadHotRangeWithMetrics> t =
ssm.getReadHotRanges(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("D")), 2.0, 200 * sampleUnit, 0);
ASSERT(t.size() == 2 && (*t.begin()).keys.begin == LiteralStringRef("Bah") &&
(*t.begin()).keys.end == LiteralStringRef("But"));
ASSERT(t.at(1).keys.begin == LiteralStringRef("Cat") && t.at(1).keys.end == LiteralStringRef("Dah"));
return Void();
}
// Contains information about whether or not a key-value pair should be included in a byte sample
// Also contains size information about the byte sample
struct ByteSampleInfo {
bool inSample;
// Actual size of the key value pair
int64_t size;
// The recorded size of the sample (max of bytesPerSample, size)
int64_t sampledSize;
};
// Determines whether a key-value pair should be included in a byte sample
// Also returns size information about the sample
ByteSampleInfo isKeyValueInSample(KeyValueRef keyValue);
#include "flow/unactorcompiler.h"

View File

@ -18,8 +18,597 @@
* limitations under the License.
*/
#if defined(NO_INTELLISENSE)
#include "fdbserver/StorageMetrics.actor.g.h"
#else
#include "fdbserver/StorageMetrics.actor.h"
#endif
#pragma once
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/simulator.h"
#include "flow/UnitTest.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbclient/KeyRangeMap.h"
#include "fdbserver/Knobs.h"
const StringRef STORAGESERVER_HISTOGRAM_GROUP = "StorageServer"_sr;
const StringRef FETCH_KEYS_LATENCY_HISTOGRAM = "FetchKeysLatency"_sr;
const StringRef FETCH_KEYS_BYTES_HISTOGRAM = "FetchKeysSize"_sr;
const StringRef FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM = "FetchKeysBandwidth"_sr;
const StringRef TLOG_CURSOR_READS_LATENCY_HISTOGRAM = "TLogCursorReadsLatency"_sr;
const StringRef SS_VERSION_LOCK_LATENCY_HISTOGRAM = "SSVersionLockLatency"_sr;
const StringRef EAGER_READS_LATENCY_HISTOGRAM = "EagerReadsLatency"_sr;
const StringRef FETCH_KEYS_PTREE_UPDATES_LATENCY_HISTOGRAM = "FetchKeysPTreeUpdatesLatency"_sr;
const StringRef TLOG_MSGS_PTREE_UPDATES_LATENCY_HISTOGRAM = "TLogMsgsPTreeUpdatesLatency"_sr;
const StringRef STORAGE_UPDATES_DURABLE_LATENCY_HISTOGRAM = "StorageUpdatesDurableLatency"_sr;
const StringRef STORAGE_COMMIT_LATENCY_HISTOGRAM = "StorageCommitLatency"_sr;
const StringRef SS_DURABLE_VERSION_UPDATE_LATENCY_HISTOGRAM = "SSDurableVersionUpdateLatency"_sr;
struct StorageMetricSample {
IndexedSet<Key, int64_t> sample;
int64_t metricUnitsPerSample;
explicit StorageMetricSample(int64_t metricUnitsPerSample) : metricUnitsPerSample(metricUnitsPerSample) {}
int64_t getEstimate(KeyRangeRef keys) const { return sample.sumRange(keys.begin, keys.end); }
KeyRef splitEstimate(KeyRangeRef range, int64_t offset, bool front = true) const {
auto fwd_split = sample.index(front ? sample.sumTo(sample.lower_bound(range.begin)) + offset
: sample.sumTo(sample.lower_bound(range.end)) - offset);
if (fwd_split == sample.end() || *fwd_split >= range.end)
return range.end;
if (!front && *fwd_split <= range.begin)
return range.begin;
auto bck_split = fwd_split;
// Butterfly search - start at midpoint then go in both directions.
while ((fwd_split != sample.end() && *fwd_split < range.end) ||
(bck_split != sample.begin() && *bck_split > range.begin)) {
if (bck_split != sample.begin() && *bck_split > range.begin) {
auto it = bck_split;
bck_split.decrementNonEnd();
KeyRef split = keyBetween(KeyRangeRef(
bck_split != sample.begin() ? std::max<KeyRef>(*bck_split, range.begin) : range.begin, *it));
if (!front || (getEstimate(KeyRangeRef(range.begin, split)) > 0 &&
split.size() <= CLIENT_KNOBS->SPLIT_KEY_SIZE_LIMIT))
return split;
}
if (fwd_split != sample.end() && *fwd_split < range.end) {
auto it = fwd_split;
++it;
KeyRef split = keyBetween(
KeyRangeRef(*fwd_split, it != sample.end() ? std::min<KeyRef>(*it, range.end) : range.end));
if (front || (getEstimate(KeyRangeRef(split, range.end)) > 0 &&
split.size() <= CLIENT_KNOBS->SPLIT_KEY_SIZE_LIMIT))
return split;
fwd_split = it;
}
}
// If we didn't return above, we didn't find anything.
TraceEvent(SevWarn, "CannotSplitLastSampleKey").detail("Range", range).detail("Offset", offset);
return front ? range.end : range.begin;
}
};
struct TransientStorageMetricSample : StorageMetricSample {
Deque<std::pair<double, std::pair<Key, int64_t>>> queue;
explicit TransientStorageMetricSample(int64_t metricUnitsPerSample) : StorageMetricSample(metricUnitsPerSample) {}
// Returns the sampled metric value (possibly 0, possibly increased by the sampling factor)
int64_t addAndExpire(KeyRef key, int64_t metric, double expiration) {
int64_t x = add(key, metric);
if (x)
queue.emplace_back(expiration, std::make_pair(*sample.find(key), -x));
return x;
}
// FIXME: both versions of erase are broken, because they do not remove items in the queue with will subtract a
// metric from the value sometime in the future
int64_t erase(KeyRef key) {
auto it = sample.find(key);
if (it == sample.end())
return 0;
int64_t x = sample.getMetric(it);
sample.erase(it);
return x;
}
void erase(KeyRangeRef keys) { sample.erase(keys.begin, keys.end); }
void poll(KeyRangeMap<std::vector<PromiseStream<StorageMetrics>>>& waitMap, StorageMetrics m) {
double now = ::now();
while (queue.size() && queue.front().first <= now) {
KeyRef key = queue.front().second.first;
int64_t delta = queue.front().second.second;
ASSERT(delta != 0);
if (sample.addMetric(key, delta) == 0)
sample.erase(key);
StorageMetrics deltaM = m * delta;
auto v = waitMap[key];
for (int i = 0; i < v.size(); i++) {
TEST(true); // TransientStorageMetricSample poll update
v[i].send(deltaM);
}
queue.pop_front();
}
}
void poll() {
double now = ::now();
while (queue.size() && queue.front().first <= now) {
KeyRef key = queue.front().second.first;
int64_t delta = queue.front().second.second;
ASSERT(delta != 0);
if (sample.addMetric(key, delta) == 0)
sample.erase(key);
queue.pop_front();
}
}
private:
bool roll(KeyRef key, int64_t metric) const {
return deterministicRandom()->random01() <
(double)metric / metricUnitsPerSample; //< SOMEDAY: Better randomInt64?
}
int64_t add(KeyRef key, int64_t metric) {
if (!metric)
return 0;
int64_t mag = metric < 0 ? -metric : metric;
if (mag < metricUnitsPerSample) {
if (!roll(key, mag))
return 0;
metric = metric < 0 ? -metricUnitsPerSample : metricUnitsPerSample;
}
if (sample.addMetric(key, metric) == 0)
sample.erase(key);
return metric;
}
};
struct StorageServerMetrics {
KeyRangeMap<std::vector<PromiseStream<StorageMetrics>>> waitMetricsMap;
StorageMetricSample byteSample;
TransientStorageMetricSample iopsSample,
bandwidthSample; // FIXME: iops and bandwidth calculations are not effectively tested, since they aren't
// currently used by data distribution
TransientStorageMetricSample bytesReadSample;
StorageServerMetrics()
: byteSample(0), iopsSample(SERVER_KNOBS->IOPS_UNITS_PER_SAMPLE),
bandwidthSample(SERVER_KNOBS->BANDWIDTH_UNITS_PER_SAMPLE),
bytesReadSample(SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE) {}
// Get the current estimated metrics for the given keys
StorageMetrics getMetrics(KeyRangeRef const& keys) const {
StorageMetrics result;
result.bytes = byteSample.getEstimate(keys);
result.bytesPerKSecond =
bandwidthSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
result.iosPerKSecond =
iopsSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
result.bytesReadPerKSecond =
bytesReadSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
return result;
}
// Called when metrics should change (IO for a given key)
// Notifies waiting WaitMetricsRequests through waitMetricsMap, and updates metricsAverageQueue and metricsSampleMap
void notify(KeyRef key, StorageMetrics& metrics) {
ASSERT(metrics.bytes == 0); // ShardNotifyMetrics
if (g_network->isSimulated()) {
TEST(metrics.bytesPerKSecond != 0); // ShardNotifyMetrics bytes
TEST(metrics.iosPerKSecond != 0); // ShardNotifyMetrics ios
TEST(metrics.bytesReadPerKSecond != 0); // ShardNotifyMetrics bytesRead
}
double expire = now() + SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL;
StorageMetrics notifyMetrics;
if (metrics.bytesPerKSecond)
notifyMetrics.bytesPerKSecond = bandwidthSample.addAndExpire(key, metrics.bytesPerKSecond, expire) *
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (metrics.iosPerKSecond)
notifyMetrics.iosPerKSecond = iopsSample.addAndExpire(key, metrics.iosPerKSecond, expire) *
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (metrics.bytesReadPerKSecond)
notifyMetrics.bytesReadPerKSecond = bytesReadSample.addAndExpire(key, metrics.bytesReadPerKSecond, expire) *
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (!notifyMetrics.allZero()) {
auto& v = waitMetricsMap[key];
for (int i = 0; i < v.size(); i++) {
if (g_network->isSimulated()) {
TEST(true); // shard notify metrics
}
// ShardNotifyMetrics
v[i].send(notifyMetrics);
}
}
}
// Due to the fact that read sampling will be called on all reads, use this specialized function to avoid overhead
// around branch misses and unnecessary stack allocation which eventually addes up under heavy load.
void notifyBytesReadPerKSecond(KeyRef key, int64_t in) {
double expire = now() + SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL;
int64_t bytesReadPerKSecond =
bytesReadSample.addAndExpire(key, in, expire) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
if (bytesReadPerKSecond > 0) {
StorageMetrics notifyMetrics;
notifyMetrics.bytesReadPerKSecond = bytesReadPerKSecond;
auto& v = waitMetricsMap[key];
for (int i = 0; i < v.size(); i++) {
TEST(true); // ShardNotifyMetrics
v[i].send(notifyMetrics);
}
}
}
// Called by StorageServerDisk when the size of a key in byteSample changes, to notify WaitMetricsRequest
// Should not be called for keys past allKeys.end
void notifyBytes(RangeMap<Key, std::vector<PromiseStream<StorageMetrics>>, KeyRangeRef>::iterator shard,
int64_t bytes) {
ASSERT(shard.end() <= allKeys.end);
StorageMetrics notifyMetrics;
notifyMetrics.bytes = bytes;
for (int i = 0; i < shard.value().size(); i++) {
TEST(true); // notifyBytes
shard.value()[i].send(notifyMetrics);
}
}
// Called by StorageServerDisk when the size of a key in byteSample changes, to notify WaitMetricsRequest
void notifyBytes(KeyRef key, int64_t bytes) {
if (key >= allKeys.end) // Do not notify on changes to internal storage server state
return;
notifyBytes(waitMetricsMap.rangeContaining(key), bytes);
}
// Called when a range of keys becomes unassigned (and therefore not readable), to notify waiting
// WaitMetricsRequests (also other types of wait
// requests in the future?)
void notifyNotReadable(KeyRangeRef keys) {
auto rs = waitMetricsMap.intersectingRanges(keys);
for (auto r = rs.begin(); r != rs.end(); ++r) {
auto& v = r->value();
TEST(v.size()); // notifyNotReadable() sending errors to intersecting ranges
for (int n = 0; n < v.size(); n++)
v[n].sendError(wrong_shard_server());
}
}
// Called periodically (~1 sec intervals) to remove older IOs from the averages
// Removes old entries from metricsAverageQueue, updates metricsSampleMap accordingly, and notifies
// WaitMetricsRequests through waitMetricsMap.
void poll() {
{
StorageMetrics m;
m.bytesPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
bandwidthSample.poll(waitMetricsMap, m);
}
{
StorageMetrics m;
m.iosPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
iopsSample.poll(waitMetricsMap, m);
}
{
StorageMetrics m;
m.bytesReadPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
bytesReadSample.poll(waitMetricsMap, m);
}
// bytesSample doesn't need polling because we never call addExpire() on it
}
// static void waitMetrics( StorageServerMetrics* const& self, WaitMetricsRequest const& req );
// This function can run on untrusted user data. We must validate all divisions carefully.
KeyRef getSplitKey(int64_t remaining,
int64_t estimated,
int64_t limits,
int64_t used,
int64_t infinity,
bool isLastShard,
const StorageMetricSample& sample,
double divisor,
KeyRef const& lastKey,
KeyRef const& key,
bool hasUsed) const {
ASSERT(remaining >= 0);
ASSERT(limits > 0);
ASSERT(divisor > 0);
if (limits < infinity / 2) {
int64_t expectedSize;
if (isLastShard || remaining > estimated) {
double remaining_divisor = (double(remaining) / limits) + 0.5;
expectedSize = remaining / remaining_divisor;
} else {
// If we are here, then estimated >= remaining >= 0
double estimated_divisor = (double(estimated) / limits) + 0.5;
expectedSize = remaining / estimated_divisor;
}
if (remaining > expectedSize) {
// This does the conversion from native units to bytes using the divisor.
double offset = (expectedSize - used) / divisor;
if (offset <= 0)
return hasUsed ? lastKey : key;
return sample.splitEstimate(
KeyRangeRef(lastKey, key),
offset * ((1.0 - SERVER_KNOBS->SPLIT_JITTER_AMOUNT) +
2 * deterministicRandom()->random01() * SERVER_KNOBS->SPLIT_JITTER_AMOUNT));
}
}
return key;
}
void splitMetrics(SplitMetricsRequest req) const {
try {
SplitMetricsReply reply;
KeyRef lastKey = req.keys.begin;
StorageMetrics used = req.used;
StorageMetrics estimated = req.estimated;
StorageMetrics remaining = getMetrics(req.keys) + used;
//TraceEvent("SplitMetrics").detail("Begin", req.keys.begin).detail("End", req.keys.end).detail("Remaining", remaining.bytes).detail("Used", used.bytes);
while (true) {
if (remaining.bytes < 2 * SERVER_KNOBS->MIN_SHARD_BYTES)
break;
KeyRef key = req.keys.end;
bool hasUsed = used.bytes != 0 || used.bytesPerKSecond != 0 || used.iosPerKSecond != 0;
key = getSplitKey(remaining.bytes,
estimated.bytes,
req.limits.bytes,
used.bytes,
req.limits.infinity,
req.isLastShard,
byteSample,
1,
lastKey,
key,
hasUsed);
if (used.bytes < SERVER_KNOBS->MIN_SHARD_BYTES)
key = std::max(key,
byteSample.splitEstimate(KeyRangeRef(lastKey, req.keys.end),
SERVER_KNOBS->MIN_SHARD_BYTES - used.bytes));
key = getSplitKey(remaining.iosPerKSecond,
estimated.iosPerKSecond,
req.limits.iosPerKSecond,
used.iosPerKSecond,
req.limits.infinity,
req.isLastShard,
iopsSample,
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS,
lastKey,
key,
hasUsed);
key = getSplitKey(remaining.bytesPerKSecond,
estimated.bytesPerKSecond,
req.limits.bytesPerKSecond,
used.bytesPerKSecond,
req.limits.infinity,
req.isLastShard,
bandwidthSample,
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS,
lastKey,
key,
hasUsed);
ASSERT(key != lastKey || hasUsed);
if (key == req.keys.end)
break;
reply.splits.push_back_deep(reply.splits.arena(), key);
StorageMetrics diff = (getMetrics(KeyRangeRef(lastKey, key)) + used);
remaining -= diff;
estimated -= diff;
used = StorageMetrics();
lastKey = key;
}
reply.used = getMetrics(KeyRangeRef(lastKey, req.keys.end)) + used;
req.reply.send(reply);
} catch (Error& e) {
req.reply.sendError(e);
}
}
void getStorageMetrics(GetStorageMetricsRequest req,
StorageBytes sb,
double bytesInputRate,
int64_t versionLag,
double lastUpdate) const {
GetStorageMetricsReply rep;
// SOMEDAY: make bytes dynamic with hard disk space
rep.load = getMetrics(allKeys);
if (sb.free < 1e9) {
TraceEvent(SevWarn, "PhysicalDiskMetrics")
.suppressFor(60.0)
.detail("Free", sb.free)
.detail("Total", sb.total)
.detail("Available", sb.available)
.detail("Load", rep.load.bytes);
}
rep.available.bytes = sb.available;
rep.available.iosPerKSecond = 10e6;
rep.available.bytesPerKSecond = 100e9;
rep.available.bytesReadPerKSecond = 100e9;
rep.capacity.bytes = sb.total;
rep.capacity.iosPerKSecond = 10e6;
rep.capacity.bytesPerKSecond = 100e9;
rep.capacity.bytesReadPerKSecond = 100e9;
rep.bytesInputRate = bytesInputRate;
rep.versionLag = versionLag;
rep.lastUpdate = lastUpdate;
req.reply.send(rep);
}
Future<Void> waitMetrics(WaitMetricsRequest req, Future<Void> delay);
// Given a read hot shard, this function will divide the shard into chunks and find those chunks whose
// readBytes/sizeBytes exceeds the `readDensityRatio`. Please make sure to run unit tests
// `StorageMetricsSampleTests.txt` after change made.
std::vector<ReadHotRangeWithMetrics> getReadHotRanges(KeyRangeRef shard,
double readDensityRatio,
int64_t baseChunkSize,
int64_t minShardReadBandwidthPerKSeconds) const {
std::vector<ReadHotRangeWithMetrics> toReturn;
double shardSize = (double)byteSample.getEstimate(shard);
int64_t shardReadBandwidth = bytesReadSample.getEstimate(shard);
if (shardReadBandwidth * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS <=
minShardReadBandwidthPerKSeconds) {
return toReturn;
}
if (shardSize <= baseChunkSize) {
// Shard is small, use it as is
if (bytesReadSample.getEstimate(shard) > (readDensityRatio * shardSize)) {
toReturn.emplace_back(shard,
bytesReadSample.getEstimate(shard) / shardSize,
bytesReadSample.getEstimate(shard) /
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL);
}
return toReturn;
}
KeyRef beginKey = shard.begin;
auto endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + baseChunkSize);
while (endKey != byteSample.sample.end()) {
if (*endKey > shard.end) {
endKey = byteSample.sample.lower_bound(shard.end);
if (*endKey == beginKey) {
// No need to increment endKey since otherwise it would stuck here forever.
break;
}
}
if (*endKey == beginKey) {
++endKey;
continue;
}
if (bytesReadSample.getEstimate(KeyRangeRef(beginKey, *endKey)) >
(readDensityRatio * std::max(baseChunkSize, byteSample.getEstimate(KeyRangeRef(beginKey, *endKey))))) {
auto range = KeyRangeRef(beginKey, *endKey);
if (!toReturn.empty() && toReturn.back().keys.end == range.begin) {
// in case two consecutive chunks both are over the ratio, merge them.
range = KeyRangeRef(toReturn.back().keys.begin, *endKey);
toReturn.pop_back();
}
toReturn.emplace_back(
range,
(double)bytesReadSample.getEstimate(range) / std::max(baseChunkSize, byteSample.getEstimate(range)),
bytesReadSample.getEstimate(range) / SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL);
}
beginKey = *endKey;
endKey = byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) +
baseChunkSize);
}
return toReturn;
}
void getReadHotRanges(ReadHotSubRangeRequest req) const {
ReadHotSubRangeReply reply;
auto _ranges = getReadHotRanges(req.keys,
SERVER_KNOBS->SHARD_MAX_READ_DENSITY_RATIO,
SERVER_KNOBS->READ_HOT_SUB_RANGE_CHUNK_SIZE,
SERVER_KNOBS->SHARD_READ_HOT_BANDWITH_MIN_PER_KSECONDS);
reply.readHotRanges = VectorRef(_ranges.data(), _ranges.size());
req.reply.send(reply);
}
std::vector<KeyRef> getSplitPoints(KeyRangeRef range, int64_t chunkSize, Optional<Key> prefixToRemove) const {
std::vector<KeyRef> toReturn;
KeyRef beginKey = range.begin;
IndexedSet<Key, int64_t>::const_iterator endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + chunkSize);
while (endKey != byteSample.sample.end()) {
if (*endKey > range.end) {
break;
}
if (*endKey == beginKey) {
++endKey;
continue;
}
KeyRef splitPoint = *endKey;
if (prefixToRemove.present()) {
splitPoint = splitPoint.removePrefix(prefixToRemove.get());
}
toReturn.push_back(splitPoint);
beginKey = *endKey;
endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + chunkSize);
}
return toReturn;
}
void getSplitPoints(SplitRangeRequest req, Optional<Key> prefix) const {
SplitRangeReply reply;
KeyRangeRef range = req.keys;
if (prefix.present()) {
range = range.withPrefix(prefix.get(), req.arena);
}
std::vector<KeyRef> points = getSplitPoints(range, req.chunkSize, prefix);
reply.splitPoints.append_deep(reply.splitPoints.arena(), points.data(), points.size());
req.reply.send(reply);
}
private:
static void collapse(KeyRangeMap<int>& map, KeyRef const& key) {
auto range = map.rangeContaining(key);
if (range == map.ranges().begin() || range == map.ranges().end())
return;
int value = range->value();
auto prev = range;
--prev;
if (prev->value() != value)
return;
KeyRange keys = KeyRangeRef(prev->begin(), range->end());
map.insert(keys, value);
}
static void add(KeyRangeMap<int>& map, KeyRangeRef const& keys, int delta) {
auto rs = map.modify(keys);
for (auto r = rs.begin(); r != rs.end(); ++r)
r->value() += delta;
collapse(map, keys.begin);
collapse(map, keys.end);
}
};
// Contains information about whether or not a key-value pair should be included in a byte sample
// Also contains size information about the byte sample
struct ByteSampleInfo {
bool inSample;
// Actual size of the key value pair
int64_t size;
// The recorded size of the sample (max of bytesPerSample, size)
int64_t sampledSize;
};
// Determines whether a key-value pair should be included in a byte sample
// Also returns size information about the sample
ByteSampleInfo isKeyValueInSample(KeyValueRef keyValue);

View File

@ -1784,9 +1784,11 @@ Future<Void> tLogPeekMessages(PromiseType replyPromise,
state Version poppedVer = poppedVersion(logData, reqTag);
auto tagData = logData->getTagData(reqTag);
bool tagRecovered = tagData && !tagData->unpoppedRecovered;
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR && poppedVer <= reqBegin &&
reqBegin > logData->persistentDataDurableVersion && !reqOnlySpilled && reqTag.locality >= 0 &&
!reqReturnIfBlocked) {
!reqReturnIfBlocked && tagRecovered) {
state double startTime = now();
// TODO (version vector) check if this should be included in "status details" json
// TODO (version vector) all tags may be too many, instead, standard deviation?

View File

@ -122,15 +122,17 @@ ACTOR Future<Void> testerServerCore(TesterInterface interf,
enum test_location_t { TEST_HERE, TEST_ON_SERVERS, TEST_ON_TESTERS };
enum test_type_t { TEST_TYPE_FROM_FILE, TEST_TYPE_CONSISTENCY_CHECK, TEST_TYPE_UNIT_TESTS };
ACTOR Future<Void> runTests(Reference<IClusterConnectionRecord> connRecord,
test_type_t whatToRun,
test_location_t whereToRun,
int minTestersExpected,
std::string fileName = std::string(),
StringRef startingConfiguration = StringRef(),
LocalityData locality = LocalityData(),
UnitTestParameters testOptions = UnitTestParameters(),
Optional<TenantName> defaultTenant = Optional<TenantName>());
ACTOR Future<Void> runTests(
Reference<IClusterConnectionRecord> connRecord,
test_type_t whatToRun,
test_location_t whereToRun,
int minTestersExpected,
std::string fileName = std::string(),
StringRef startingConfiguration = StringRef(),
LocalityData locality = LocalityData(),
UnitTestParameters testOptions = UnitTestParameters(),
Optional<TenantName> defaultTenant = Optional<TenantName>(),
Standalone<VectorRef<TenantNameRef>> tenantsToCreate = Standalone<VectorRef<TenantNameRef>>());
#include "flow/unactorcompiler.h"
#endif

View File

@ -3723,7 +3723,8 @@ ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
StringRef mapper,
// To provide span context, tags, debug ID to underlying lookups.
GetMappedKeyValuesRequest* pOriginalReq,
Optional<Key> tenantPrefix) {
Optional<Key> tenantPrefix,
int matchIndex) {
state GetMappedKeyValuesReply result;
result.version = input.version;
result.more = input.more;
@ -3741,15 +3742,20 @@ ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
TraceEvent("MapperNotTuple").error(e).detail("Mapper", mapper.printable());
throw mapper_not_tuple();
}
state KeyValueRef* it = input.data.begin();
state std::vector<Optional<Tuple>> vt;
state bool isRangeQuery = false;
preprocessMappedKey(mappedKeyFormatTuple, vt, isRangeQuery);
for (; it != input.data.end(); it++) {
state int sz = input.data.size();
state int i = 0;
for (; i < sz; i++) {
KeyValueRef* it = &input.data[i];
state MappedKeyValueRef kvm;
kvm.key = it->key;
kvm.value = it->value;
// need to keep the boundary, so that caller can use it as a continuation.
if ((i == 0 || i == sz - 1) || matchIndex == MATCH_INDEX_ALL) {
kvm.key = it->key;
kvm.value = it->value;
}
state Key mappedKey = constructMappedKey(it, vt, mappedKeyTuple, mappedKeyFormatTuple);
// Make sure the mappedKey is always available, so that it's good even we want to get key asynchronously.
@ -3773,6 +3779,118 @@ ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
return result;
}
bool rangeIntersectsAnyTenant(TenantPrefixIndex& prefixIndex, KeyRangeRef range, Version ver) {
auto view = prefixIndex.at(ver);
auto beginItr = view.lastLessOrEqual(range.begin);
auto endItr = view.lastLess(range.end);
// If the begin and end reference different spots in the tenant index, then the tenant pointed to
// by endItr intersects the range
if (beginItr != endItr) {
return true;
}
// If the iterators point to the same entry and that entry contains begin, then we are wholly in
// one tenant
if (beginItr != view.end() && range.begin.startsWith(beginItr.key())) {
return true;
}
return false;
}
TEST_CASE("/fdbserver/storageserver/rangeIntersectsAnyTenant") {
std::map<TenantName, TenantMapEntry> entries = { std::make_pair("tenant0"_sr, TenantMapEntry(0, ""_sr)),
std::make_pair("tenant2"_sr, TenantMapEntry(2, ""_sr)),
std::make_pair("tenant3"_sr, TenantMapEntry(3, ""_sr)),
std::make_pair("tenant4"_sr, TenantMapEntry(4, ""_sr)),
std::make_pair("tenant6"_sr, TenantMapEntry(6, ""_sr)) };
TenantPrefixIndex index;
index.createNewVersion(1);
for (auto entry : entries) {
index.insert(entry.second.prefix, entry.first);
}
// Before all tenants
ASSERT(!rangeIntersectsAnyTenant(index, KeyRangeRef(""_sr, "\x00"_sr), index.getLatestVersion()));
// After all tenants
ASSERT(!rangeIntersectsAnyTenant(index, KeyRangeRef("\xfe"_sr, "\xff"_sr), index.getLatestVersion()));
// In between tenants
ASSERT(!rangeIntersectsAnyTenant(
index,
KeyRangeRef(TenantMapEntry::idToPrefix(1), TenantMapEntry::idToPrefix(1).withSuffix("\xff"_sr)),
index.getLatestVersion()));
// In between tenants with end intersecting tenant start
ASSERT(!rangeIntersectsAnyTenant(
index, KeyRangeRef(TenantMapEntry::idToPrefix(5), entries["tenant6"_sr].prefix), index.getLatestVersion()));
// Entire tenants
ASSERT(rangeIntersectsAnyTenant(
index, KeyRangeRef(entries["tenant0"_sr].prefix, TenantMapEntry::idToPrefix(1)), index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(
index, KeyRangeRef(entries["tenant2"_sr].prefix, entries["tenant3"_sr].prefix), index.getLatestVersion()));
// Partial tenants
ASSERT(rangeIntersectsAnyTenant(
index,
KeyRangeRef(entries["tenant0"_sr].prefix, entries["tenant0"_sr].prefix.withSuffix("foo"_sr)),
index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(
index,
KeyRangeRef(entries["tenant3"_sr].prefix.withSuffix("foo"_sr), entries["tenant4"_sr].prefix),
index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(index,
KeyRangeRef(entries["tenant4"_sr].prefix.withSuffix("bar"_sr),
entries["tenant4"_sr].prefix.withSuffix("foo"_sr)),
index.getLatestVersion()));
// Begin outside, end inside tenant
ASSERT(rangeIntersectsAnyTenant(
index,
KeyRangeRef(TenantMapEntry::idToPrefix(1), entries["tenant2"_sr].prefix.withSuffix("foo"_sr)),
index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(
index,
KeyRangeRef(TenantMapEntry::idToPrefix(1), entries["tenant3"_sr].prefix.withSuffix("foo"_sr)),
index.getLatestVersion()));
// Begin inside, end outside tenant
ASSERT(rangeIntersectsAnyTenant(
index,
KeyRangeRef(entries["tenant3"_sr].prefix.withSuffix("foo"_sr), TenantMapEntry::idToPrefix(5)),
index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(
index,
KeyRangeRef(entries["tenant4"_sr].prefix.withSuffix("foo"_sr), TenantMapEntry::idToPrefix(5)),
index.getLatestVersion()));
// Both inside different tenants
ASSERT(rangeIntersectsAnyTenant(index,
KeyRangeRef(entries["tenant0"_sr].prefix.withSuffix("foo"_sr),
entries["tenant2"_sr].prefix.withSuffix("foo"_sr)),
index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(index,
KeyRangeRef(entries["tenant0"_sr].prefix.withSuffix("foo"_sr),
entries["tenant3"_sr].prefix.withSuffix("foo"_sr)),
index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(index,
KeyRangeRef(entries["tenant2"_sr].prefix.withSuffix("foo"_sr),
entries["tenant6"_sr].prefix.withSuffix("foo"_sr)),
index.getLatestVersion()));
// Both outside tenants with tenant in the middle
ASSERT(rangeIntersectsAnyTenant(
index, KeyRangeRef(""_sr, TenantMapEntry::idToPrefix(1).withSuffix("foo"_sr)), index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(index, KeyRangeRef(""_sr, "\xff"_sr), index.getLatestVersion()));
ASSERT(rangeIntersectsAnyTenant(
index, KeyRangeRef(TenantMapEntry::idToPrefix(5).withSuffix("foo"_sr), "\xff"_sr), index.getLatestVersion()));
return Void();
}
// Most of the actor is copied from getKeyValuesQ. I tried to use templates but things become nearly impossible after
// combining actor shenanigans with template shenanigans.
ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRequest req)
@ -3859,13 +3977,7 @@ ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRe
throw tenant_name_required();
}
auto view = data->tenantPrefixIndex.at(req.version);
auto beginItr = view.lastLessOrEqual(begin);
if (beginItr != view.end() && !begin.startsWith(beginItr.key())) {
++beginItr;
}
auto endItr = view.lastLessOrEqual(end);
if (beginItr != endItr) {
if (rangeIntersectsAnyTenant(data->tenantPrefixIndex, KeyRangeRef(begin, end), req.version)) {
throw tenant_name_required();
}
}
@ -3920,7 +4032,7 @@ ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRe
try {
// Map the scanned range to another list of keys and look up.
GetMappedKeyValuesReply _r =
wait(mapKeyValues(data, getKeyValuesReply, req.mapper, &req, tenantPrefix));
wait(mapKeyValues(data, getKeyValuesReply, req.mapper, &req, tenantPrefix, req.matchIndex));
r = _r;
} catch (Error& e) {
TraceEvent("MapError").error(e);

View File

@ -1548,7 +1548,8 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
std::vector<TestSpec> tests,
StringRef startingConfiguration,
LocalityData locality,
Optional<TenantName> defaultTenant) {
Optional<TenantName> defaultTenant,
Standalone<VectorRef<TenantNameRef>> tenantsToCreate) {
state Database cx;
state Reference<AsyncVar<ServerDBInfo>> dbInfo(new AsyncVar<ServerDBInfo>);
state Future<Void> ccMonitor = monitorServerDBInfo(cc, LocalityData(), dbInfo); // FIXME: locality
@ -1624,9 +1625,14 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
}
}
if (useDB && defaultTenant.present()) {
TraceEvent("CreatingDefaultTenant").detail("Tenant", defaultTenant.get());
wait(ManagementAPI::createTenant(cx.getReference(), defaultTenant.get()));
if (useDB) {
std::vector<Future<Void>> tenantFutures;
for (auto tenant : tenantsToCreate) {
TraceEvent("CreatingTenant").detail("Tenant", tenant);
tenantFutures.push_back(ManagementAPI::createTenant(cx.getReference(), tenant));
}
wait(waitForAll(tenantFutures));
}
if (useDB && waitForQuiescenceBegin) {
@ -1708,7 +1714,8 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
int minTestersExpected,
StringRef startingConfiguration,
LocalityData locality,
Optional<TenantName> defaultTenant) {
Optional<TenantName> defaultTenant,
Standalone<VectorRef<TenantNameRef>> tenantsToCreate) {
state int flags = (at == TEST_ON_SERVERS ? 0 : GetWorkersRequest::TESTER_CLASS_ONLY) |
GetWorkersRequest::NON_EXCLUDED_PROCESSES_ONLY;
state Future<Void> testerTimeout = delay(600.0); // wait 600 sec for testers to show up
@ -1739,7 +1746,7 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
for (int i = 0; i < workers.size(); i++)
ts.push_back(workers[i].interf.testerInterface);
wait(runTests(cc, ci, ts, tests, startingConfiguration, locality, defaultTenant));
wait(runTests(cc, ci, ts, tests, startingConfiguration, locality, defaultTenant, tenantsToCreate));
return Void();
}
@ -1777,7 +1784,8 @@ ACTOR Future<Void> runTests(Reference<IClusterConnectionRecord> connRecord,
StringRef startingConfiguration,
LocalityData locality,
UnitTestParameters testOptions,
Optional<TenantName> defaultTenant) {
Optional<TenantName> defaultTenant,
Standalone<VectorRef<TenantNameRef>> tenantsToCreate) {
state TestSet testSet;
state std::unique_ptr<KnobProtectiveGroup> knobProtectiveGroup(nullptr);
auto cc = makeReference<AsyncVar<Optional<ClusterControllerFullInterface>>>();
@ -1861,11 +1869,19 @@ ACTOR Future<Void> runTests(Reference<IClusterConnectionRecord> connRecord,
actors.push_back(
reportErrors(monitorServerDBInfo(cc, LocalityData(), db), "MonitorServerDBInfo")); // FIXME: Locality
actors.push_back(reportErrors(testerServerCore(iTesters[0], connRecord, db, locality), "TesterServerCore"));
tests = runTests(cc, ci, iTesters, testSet.testSpecs, startingConfiguration, locality, defaultTenant);
tests = runTests(
cc, ci, iTesters, testSet.testSpecs, startingConfiguration, locality, defaultTenant, tenantsToCreate);
} else {
tests = reportErrors(
runTests(cc, ci, testSet.testSpecs, at, minTestersExpected, startingConfiguration, locality, defaultTenant),
"RunTests");
tests = reportErrors(runTests(cc,
ci,
testSet.testSpecs,
at,
minTestersExpected,
startingConfiguration,
locality,
defaultTenant,
tenantsToCreate),
"RunTests");
}
choose {

View File

@ -167,9 +167,9 @@ Database openDBOnServer(Reference<AsyncVar<ServerDBInfo> const> const& db,
enableLocalityLoadBalance,
taskID,
lockAware);
GlobalConfig::create(cx, db, std::addressof(db->get().client));
GlobalConfig::globalConfig().trigger(samplingFrequency, samplingProfilerUpdateFrequency);
GlobalConfig::globalConfig().trigger(samplingWindow, samplingProfilerUpdateWindow);
cx->globalConfig->init(db, std::addressof(db->get().client));
cx->globalConfig->trigger(samplingFrequency, samplingProfilerUpdateFrequency);
cx->globalConfig->trigger(samplingWindow, samplingProfilerUpdateWindow);
return cx;
}
@ -1606,16 +1606,16 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
state Database db =
Database::createDatabase(metricsConnFile, Database::API_VERSION_LATEST, IsInternal::True, locality);
metricsLogger = runMetrics(db, KeyRef(metricsPrefix));
db->globalConfig->trigger(samplingFrequency, samplingProfilerUpdateFrequency);
} catch (Error& e) {
TraceEvent(SevWarnAlways, "TDMetricsBadClusterFile").error(e).detail("ConnFile", metricsConnFile);
}
} else {
auto lockAware = metricsPrefix.size() && metricsPrefix[0] == '\xff' ? LockAware::True : LockAware::False;
metricsLogger =
runMetrics(openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, lockAware), KeyRef(metricsPrefix));
auto database = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, lockAware);
metricsLogger = runMetrics(database, KeyRef(metricsPrefix));
database->globalConfig->trigger(samplingFrequency, samplingProfilerUpdateFrequency);
}
GlobalConfig::globalConfig().trigger(samplingFrequency, samplingProfilerUpdateFrequency);
}
errorForwarders.add(resetAfter(degraded,

View File

@ -56,6 +56,7 @@ struct TransactionWrapper : public ReferenceCounted<TransactionWrapper> {
KeySelector& end,
Key& mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot snapshot,
Reverse reverse) = 0;
@ -128,9 +129,10 @@ struct FlowTransactionWrapper : public TransactionWrapper {
KeySelector& end,
Key& mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot snapshot,
Reverse reverse) override {
return transaction.getMappedRange(begin, end, mapper, limits, snapshot, reverse);
return transaction.getMappedRange(begin, end, mapper, limits, matchIndex, snapshot, reverse);
}
// Gets the key from the database specified by a given key selector
@ -203,9 +205,11 @@ struct ThreadTransactionWrapper : public TransactionWrapper {
KeySelector& end,
Key& mapper,
GetRangeLimits limits,
int matchIndex,
Snapshot snapshot,
Reverse reverse) override {
return unsafeThreadFutureToFuture(transaction->getMappedRange(begin, end, mapper, limits, snapshot, reverse));
return unsafeThreadFutureToFuture(
transaction->getMappedRange(begin, end, mapper, limits, matchIndex, snapshot, reverse));
}
// Gets the key from the database specified by a given key selector

View File

@ -72,7 +72,11 @@ struct EncryptKeyProxyTestWorkload : TestWorkload {
state int nAttempts = 0;
loop {
EKPGetLatestBaseCipherKeysRequest req(deterministicRandom()->randomUniqueID(), self->domainIds);
EKPGetLatestBaseCipherKeysRequest req;
req.encryptDomainIds = self->domainIds;
if (deterministicRandom()->randomInt(0, 100) < 50) {
req.debugId = deterministicRandom()->randomUniqueID();
}
ErrorOr<EKPGetLatestBaseCipherKeysReply> rep = wait(self->ekpInf.getLatestBaseCipherKeys.tryGetReply(req));
if (rep.present()) {
@ -82,7 +86,7 @@ struct EncryptKeyProxyTestWorkload : TestWorkload {
for (const uint64_t id : self->domainIds) {
bool found = false;
for (const auto& item : rep.get().baseCipherDetails) {
if (item.baseCipherId == id) {
if (item.encryptDomainId == id) {
found = true;
break;
}
@ -131,7 +135,11 @@ struct EncryptKeyProxyTestWorkload : TestWorkload {
// assertions. However, in simulation runs, RPCs can be force failed to inject retries, hence, code leverage
// tryGetReply to ensure at-most once delivery of message, further, assertions are relaxed to account of
// cache warm-up due to retries.
EKPGetLatestBaseCipherKeysRequest req(deterministicRandom()->randomUniqueID(), self->domainIds);
EKPGetLatestBaseCipherKeysRequest req;
req.encryptDomainIds = self->domainIds;
if (deterministicRandom()->randomInt(0, 100) < 50) {
req.debugId = deterministicRandom()->randomUniqueID();
}
ErrorOr<EKPGetLatestBaseCipherKeysReply> rep = wait(self->ekpInf.getLatestBaseCipherKeys.tryGetReply(req));
if (rep.present()) {
ASSERT(!rep.get().error.present());
@ -140,7 +148,7 @@ struct EncryptKeyProxyTestWorkload : TestWorkload {
for (const uint64_t id : self->domainIds) {
bool found = false;
for (const auto& item : rep.get().baseCipherDetails) {
if (item.baseCipherId == id) {
if (item.encryptDomainId == id) {
found = true;
break;
}
@ -176,7 +184,11 @@ struct EncryptKeyProxyTestWorkload : TestWorkload {
self->domainIds.emplace_back(self->minDomainId + i);
}
EKPGetLatestBaseCipherKeysRequest req(deterministicRandom()->randomUniqueID(), self->domainIds);
EKPGetLatestBaseCipherKeysRequest req;
req.encryptDomainIds = self->domainIds;
if (deterministicRandom()->randomInt(0, 100) < 50) {
req.debugId = deterministicRandom()->randomUniqueID();
}
EKPGetLatestBaseCipherKeysReply rep = wait(self->ekpInf.getLatestBaseCipherKeys.getReply(req));
ASSERT(!rep.error.present());
@ -184,7 +196,7 @@ struct EncryptKeyProxyTestWorkload : TestWorkload {
for (const uint64_t id : self->domainIds) {
bool found = false;
for (const auto& item : rep.baseCipherDetails) {
if (item.baseCipherId == id) {
if (item.encryptDomainId == id) {
found = true;
break;
}
@ -200,14 +212,24 @@ struct EncryptKeyProxyTestWorkload : TestWorkload {
}
state int numIterations = deterministicRandom()->randomInt(512, 786);
for (; numIterations > 0; numIterations--) {
for (; numIterations > 0;) {
int idx = deterministicRandom()->randomInt(1, self->cipherIds.size());
int nIds = deterministicRandom()->randomInt(1, self->cipherIds.size());
EKPGetBaseCipherKeysByIdsRequest req;
if (deterministicRandom()->randomInt(0, 100) < 50) {
req.debugId = deterministicRandom()->randomUniqueID();
}
for (int i = idx; i < nIds && i < self->cipherIds.size(); i++) {
req.baseCipherIds.emplace_back(std::make_pair(self->cipherIds[i], 1));
}
if (req.baseCipherIds.empty()) {
// No keys to query; continue
continue;
} else {
numIterations--;
}
expectedHits = req.baseCipherIds.size();
EKPGetBaseCipherKeysByIdsReply rep = wait(self->ekpInf.getBaseCipherKeysByIds.getReply(req));

View File

@ -226,7 +226,7 @@ struct EncryptionOpsWorkload : TestWorkload {
Reference<BlobCipherKey> cipherKey = cipherKeyCache->getCipherKey(domainId, baseCipherId, salt);
if (simCacheMiss) {
TraceEvent("SimKeyCacheMiss").detail("EncyrptDomainId", domainId).detail("BaseCipherId", baseCipherId);
TraceEvent("SimKeyCacheMiss").detail("EncryptDomainId", domainId).detail("BaseCipherId", baseCipherId);
// simulate KeyCache miss that may happen during decryption; insert a CipherKey with known 'salt'
cipherKeyCache->insertCipherKey(domainId,
baseCipherId,

View File

@ -901,6 +901,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
Key autoCoordinatorSpecialKey =
LiteralStringRef("auto_coordinators")
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin);
KeyRangeRef actorLineageRange = SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::ACTORLINEAGE);
// Read this particular special key may throw timed_out
Key statusJsonSpecialKey = LiteralStringRef("\xff\xff/status/json");
@ -920,8 +921,9 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
error_code_timed_out,
ExceptionContract::possibleIf(key1 <= statusJsonSpecialKey && statusJsonSpecialKey < key2)),
std::make_pair(error_code_special_keys_api_failure,
ExceptionContract::possibleIf(key1 <= autoCoordinatorSpecialKey &&
autoCoordinatorSpecialKey < key2)),
ExceptionContract::possibleIf(
(key1 <= autoCoordinatorSpecialKey && autoCoordinatorSpecialKey < key2) ||
actorLineageRange.intersects(KeyRangeRef(key1, key2)))),
std::make_pair(error_code_accessed_unreadable, ExceptionContract::Possible),
std::make_pair(error_code_tenant_not_found,
ExceptionContract::possibleIf(!workload->canUseTenant(tr->getTenant()))),
@ -956,6 +958,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
Key autoCoordinatorSpecialKey =
LiteralStringRef("auto_coordinators")
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin);
KeyRangeRef actorLineageRange = SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::ACTORLINEAGE);
Key statusJsonSpecialKey = LiteralStringRef("\xff\xff/status/json");
contract = {
@ -975,8 +978,9 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
error_code_timed_out,
ExceptionContract::possibleIf(key1 <= statusJsonSpecialKey && statusJsonSpecialKey < key2)),
std::make_pair(error_code_special_keys_api_failure,
ExceptionContract::possibleIf((key1 <= autoCoordinatorSpecialKey) &&
(autoCoordinatorSpecialKey < key2))),
ExceptionContract::possibleIf(
(key1 <= autoCoordinatorSpecialKey && autoCoordinatorSpecialKey < key2) ||
actorLineageRange.intersects(KeyRangeRef(key1, key2)))),
std::make_pair(error_code_accessed_unreadable, ExceptionContract::Possible),
std::make_pair(error_code_tenant_not_found,
ExceptionContract::possibleIf(!workload->canUseTenant(tr->getTenant()))),

View File

@ -145,10 +145,18 @@ struct GetMappedRangeWorkload : ApiWorkload {
}
// Return true if need to retry.
static bool validateRecord(int expectedId, const MappedKeyValueRef* it, GetMappedRangeWorkload* self) {
static bool validateRecord(int expectedId,
const MappedKeyValueRef* it,
GetMappedRangeWorkload* self,
int matchIndex,
bool isBoundary) {
// std::cout << "validateRecord expectedId " << expectedId << " it->key " << printable(it->key) << "
// indexEntryKey(expectedId) " << printable(indexEntryKey(expectedId)) << std::endl;
ASSERT(it->key == indexEntryKey(expectedId));
if (matchIndex == MATCH_INDEX_ALL || isBoundary) {
ASSERT(it->key == indexEntryKey(expectedId));
} else {
ASSERT(it->key == EMPTY);
}
ASSERT(it->value == EMPTY);
if (self->SPLIT_RECORDS) {
@ -189,7 +197,8 @@ struct GetMappedRangeWorkload : ApiWorkload {
Key mapper,
int limit,
int expectedBeginId,
GetMappedRangeWorkload* self) {
GetMappedRangeWorkload* self,
int matchIndex) {
std::cout << "start scanMappedRangeWithLimits beginSelector:" << beginSelector.toString()
<< " endSelector:" << endSelector.toString() << " expectedBeginId:" << expectedBeginId
@ -197,8 +206,13 @@ struct GetMappedRangeWorkload : ApiWorkload {
loop {
state Reference<TransactionWrapper> tr = self->createTransaction();
try {
MappedRangeResult result = wait(tr->getMappedRange(
beginSelector, endSelector, mapper, GetRangeLimits(limit), self->snapshot, Reverse::False));
MappedRangeResult result = wait(tr->getMappedRange(beginSelector,
endSelector,
mapper,
GetRangeLimits(limit),
matchIndex,
self->snapshot,
Reverse::False));
// showResult(result);
if (self->BAD_MAPPER) {
TraceEvent("GetMappedRangeWorkloadShouldNotReachable").detail("ResultSize", result.size());
@ -208,8 +222,10 @@ struct GetMappedRangeWorkload : ApiWorkload {
ASSERT(result.size() <= limit);
int expectedId = expectedBeginId;
bool needRetry = false;
for (const MappedKeyValueRef* it = result.begin(); it != result.end(); it++) {
if (validateRecord(expectedId, it, self)) {
int cnt = 0;
const MappedKeyValueRef* it = result.begin();
for (; cnt < result.size(); cnt++, it++) {
if (validateRecord(expectedId, it, self, matchIndex, cnt == 0 || cnt == result.size() - 1)) {
needRetry = true;
break;
}
@ -236,7 +252,12 @@ struct GetMappedRangeWorkload : ApiWorkload {
}
}
ACTOR Future<Void> scanMappedRange(Database cx, int beginId, int endId, Key mapper, GetMappedRangeWorkload* self) {
ACTOR Future<Void> scanMappedRange(Database cx,
int beginId,
int endId,
Key mapper,
GetMappedRangeWorkload* self,
int matchIndex) {
Key beginTuple = Tuple().append(prefix).append(INDEX).append(indexKey(beginId)).getDataAsStandalone();
state KeySelector beginSelector = KeySelector(firstGreaterOrEqual(beginTuple));
Key endTuple = Tuple().append(prefix).append(INDEX).append(indexKey(endId)).getDataAsStandalone();
@ -244,14 +265,15 @@ struct GetMappedRangeWorkload : ApiWorkload {
state int limit = 100;
state int expectedBeginId = beginId;
while (true) {
MappedRangeResult result = wait(
self->scanMappedRangeWithLimits(cx, beginSelector, endSelector, mapper, limit, expectedBeginId, self));
MappedRangeResult result = wait(self->scanMappedRangeWithLimits(
cx, beginSelector, endSelector, mapper, limit, expectedBeginId, self, matchIndex));
expectedBeginId += result.size();
if (result.more) {
if (result.empty()) {
// This is usually not expected.
std::cout << "not result but have more, try again" << std::endl;
} else {
// auto& reqAndResult = std::get<GetRangeReqAndResultRef>(result.back().reqAndResult);
beginSelector = KeySelector(firstGreaterThan(result.back().key));
}
} else {
@ -296,6 +318,7 @@ struct GetMappedRangeWorkload : ApiWorkload {
endSelector,
mapper,
GetRangeLimits(GetRangeLimits::ROW_LIMIT_UNLIMITED),
MATCH_INDEX_ALL,
self->snapshot,
Reverse::False);
}
@ -394,7 +417,8 @@ struct GetMappedRangeWorkload : ApiWorkload {
Key mapper = getMapper(self);
// The scanned range cannot be too large to hit get_mapped_key_values_has_more. We have a unit validating the
// error is thrown when the range is large.
wait(self->scanMappedRange(cx, 10, 490, mapper, self));
int matchIndex = deterministicRandom()->random01() > 0.5 ? MATCH_INDEX_NONE : MATCH_INDEX_ALL;
wait(self->scanMappedRange(cx, 10, 490, mapper, self, matchIndex));
return Void();
}

View File

@ -1175,7 +1175,7 @@ TEST_CASE("flow/BlobCipher") {
TraceEvent("MultiAuthMode_Done").log();
}
// Validate dropping encyrptDomainId cached keys
// Validate dropping encryptDomainId cached keys
const EncryptCipherDomainId candidate = deterministicRandom()->randomInt(minDomainId, maxDomainId);
cipherKeyCache->resetEncryptDomainId(candidate);
std::vector<Reference<BlobCipherKey>> cachedKeys = cipherKeyCache->getAllCiphers(candidate);

View File

@ -19,6 +19,8 @@ set(FLOW_SRCS
Error.cpp
Error.h
EventTypes.actor.h
EncryptUtils.h
EncryptUtils.cpp
FastAlloc.cpp
FastAlloc.h
FastRef.h
@ -46,6 +48,8 @@ set(FLOW_SRCS
Knobs.cpp
Knobs.h
MetricSample.h
MkCert.h
MkCert.cpp
Net2.actor.cpp
Net2Packet.cpp
Net2Packet.h
@ -54,6 +58,7 @@ set(FLOW_SRCS
Platform.h
Profiler.actor.cpp
Profiler.h
ScopeExit.h
SendBufferIterator.h
SignalSafeUnwind.cpp
SignalSafeUnwind.h
@ -191,3 +196,16 @@ if(APPLE)
target_link_libraries(flow PRIVATE ${IO_KIT} ${CORE_FOUNDATION})
target_link_libraries(flow_sampling PRIVATE ${IO_KIT} ${CORE_FOUNDATION})
endif()
add_executable(mkcert MkCertCli.cpp)
target_link_libraries(mkcert PUBLIC fmt::fmt flow)
add_executable(mtls_unittest TLSTest.cpp)
target_link_libraries(mtls_unittest PUBLIC fmt::fmt flow)
if(USE_SANITIZER)
target_link_libraries(mtls_unittest PUBLIC boost_asan)
else()
target_link_libraries(mtls_unittest PUBLIC boost_target)
endif()
add_test(NAME mutual_tls_unittest
COMMAND $<TARGET_FILE:mtls_unittest>)

38
flow/EncryptUtils.cpp Normal file
View File

@ -0,0 +1,38 @@
/*
* EncryptUtils.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "flow/EncryptUtils.h"
#include "flow/Trace.h"
#include <boost/format.hpp>
std::string getEncryptDbgTraceKey(std::string_view prefix,
EncryptCipherDomainId domainId,
Optional<EncryptCipherBaseKeyId> baseCipherId) {
// Construct the TraceEvent field key ensuring its uniqueness and compliance to TraceEvent field validator and log
// parsing tools
if (baseCipherId.present()) {
boost::format fmter("%s.%lld.%llu");
return boost::str(boost::format(fmter % prefix % domainId % baseCipherId.get()));
} else {
boost::format fmter("%s.%lld");
return boost::str(boost::format(fmter % prefix % domainId));
}
}

View File

@ -22,8 +22,12 @@
#define ENCRYPT_UTILS_H
#pragma once
#include "flow/Arena.h"
#include <cstdint>
#include <limits>
#include <string>
#include <string_view>
#define ENCRYPT_INVALID_DOMAIN_ID 0
#define ENCRYPT_INVALID_CIPHER_KEY_ID 0
@ -50,7 +54,7 @@ static_assert(EncryptCipherMode::ENCRYPT_CIPHER_MODE_LAST <= std::numeric_limits
// EncryptionHeader authentication modes
// 1. NONE - No 'authentication token' generation needed for EncryptionHeader i.e. no protection against header OR
// cipherText 'tampering' and/or bit rot/flip corruptions.
// 2. Single/Multi - Encyrption header would generate one or more 'authentication tokens' to protect the header against
// 2. Single/Multi - Encryption header would generate one or more 'authentication tokens' to protect the header against
// 'tempering' and/or bit rot/flip corruptions. Refer to BlobCipher.h for detailed usage recommendations.
// 3. LAST - Invalid mode, used for static asserts.
@ -64,4 +68,13 @@ typedef enum {
static_assert(EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_LAST <= std::numeric_limits<uint8_t>::max(),
"EncryptHeaderAuthToken value overflow");
constexpr std::string_view ENCRYPT_DBG_TRACE_CACHED_PREFIX = "Chd";
constexpr std::string_view ENCRYPT_DBG_TRACE_QUERY_PREFIX = "Qry";
constexpr std::string_view ENCRYPT_DBG_TRACE_INSERT_PREFIX = "Ins";
constexpr std::string_view ENCRYPT_DBG_TRACE_RESULT_PREFIX = "Res";
// Utility interface to construct TraceEvent key for debugging
std::string getEncryptDbgTraceKey(std::string_view prefix,
EncryptCipherDomainId domainId,
Optional<EncryptCipherBaseKeyId> baseCipherId = Optional<EncryptCipherBaseKeyId>());
#endif

View File

@ -39,47 +39,19 @@ Hostname Hostname::parse(const std::string& s) {
return Hostname(f.substr(0, colonPos), f.substr(colonPos + 1), isTLS);
}
void Hostname::resetToUnresolved() {
if (status == Hostname::RESOLVED) {
status = UNRESOLVED;
resolvedAddress = Optional<NetworkAddress>();
}
}
ACTOR Future<Optional<NetworkAddress>> resolveImpl(Hostname* self) {
loop {
if (self->status == Hostname::UNRESOLVED) {
self->status = Hostname::RESOLVING;
try {
std::vector<NetworkAddress> addresses =
wait(INetworkConnections::net()->resolveTCPEndpointWithDNSCache(self->host, self->service));
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (self->isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
self->resolvedAddress = address;
self->status = Hostname::RESOLVED;
self->resolveFinish.trigger();
return self->resolvedAddress.get();
} catch (...) {
self->status = Hostname::UNRESOLVED;
self->resolveFinish.trigger();
self->resolvedAddress = Optional<NetworkAddress>();
return Optional<NetworkAddress>();
}
} else if (self->status == Hostname::RESOLVING) {
wait(self->resolveFinish.onTrigger());
if (self->status == Hostname::RESOLVED) {
return self->resolvedAddress.get();
}
// Otherwise, this means other threads failed on resolve, so here we go back to the loop and try to resolve
// again.
} else {
// status is RESOLVED, nothing to do.
return self->resolvedAddress.get();
try {
std::vector<NetworkAddress> addresses =
wait(INetworkConnections::net()->resolveTCPEndpointWithDNSCache(self->host, self->service));
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (self->isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
return address;
} catch (...) {
return Optional<NetworkAddress>();
}
}
@ -109,24 +81,19 @@ Future<NetworkAddress> Hostname::resolveWithRetry() {
}
Optional<NetworkAddress> Hostname::resolveBlocking() {
if (status != RESOLVED) {
try {
std::vector<NetworkAddress> addresses =
INetworkConnections::net()->resolveTCPEndpointBlockingWithDNSCache(host, service);
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
resolvedAddress = address;
status = RESOLVED;
} catch (...) {
status = UNRESOLVED;
resolvedAddress = Optional<NetworkAddress>();
try {
std::vector<NetworkAddress> addresses =
INetworkConnections::net()->resolveTCPEndpointBlockingWithDNSCache(host, service);
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
return address;
} catch (...) {
return Optional<NetworkAddress>();
}
return resolvedAddress;
}
TEST_CASE("/flow/Hostname/hostname") {
@ -179,49 +146,36 @@ TEST_CASE("/flow/Hostname/hostname") {
ASSERT(!Hostname::isHostname(hn12s));
ASSERT(!Hostname::isHostname(hn13s));
ASSERT(hn1.status == Hostname::UNRESOLVED && !hn1.resolvedAddress.present());
ASSERT(hn2.status == Hostname::UNRESOLVED && !hn2.resolvedAddress.present());
ASSERT(hn3.status == Hostname::UNRESOLVED && !hn3.resolvedAddress.present());
ASSERT(hn4.status == Hostname::UNRESOLVED && !hn4.resolvedAddress.present());
state Optional<NetworkAddress> optionalAddress = wait(hn2.resolve());
ASSERT(!optionalAddress.present());
state Optional<NetworkAddress> emptyAddress = wait(hn2.resolve());
ASSERT(hn2.status == Hostname::UNRESOLVED && !hn2.resolvedAddress.present() && !emptyAddress.present());
optionalAddress = hn2.resolveBlocking();
ASSERT(!optionalAddress.present());
state NetworkAddress address;
try {
NetworkAddress _ = wait(timeoutError(hn2.resolveWithRetry(), 1));
wait(timeoutError(store(address, hn2.resolveWithRetry()), 1));
} catch (Error& e) {
ASSERT(e.code() == error_code_timed_out);
}
ASSERT(hn2.status == Hostname::UNRESOLVED && !hn2.resolvedAddress.present());
emptyAddress = hn2.resolveBlocking();
ASSERT(hn2.status == Hostname::UNRESOLVED && !hn2.resolvedAddress.present() && !emptyAddress.present());
ASSERT(address == NetworkAddress());
state NetworkAddress addressSource = NetworkAddress::parse("127.0.0.0:1234");
INetworkConnections::net()->addMockTCPEndpoint("host-name", "1234", { addressSource });
// Test resolve.
state Optional<NetworkAddress> optionalAddress = wait(hn2.resolve());
ASSERT(hn2.status == Hostname::RESOLVED);
ASSERT(hn2.resolvedAddress.get() == addressSource && optionalAddress.get() == addressSource);
wait(store(optionalAddress, hn2.resolve()));
ASSERT(optionalAddress.present() && optionalAddress.get() == addressSource);
optionalAddress = Optional<NetworkAddress>();
// Test resolveBlocking.
optionalAddress = hn2.resolveBlocking();
ASSERT(optionalAddress.present() && optionalAddress.get() == addressSource);
optionalAddress = Optional<NetworkAddress>();
// Test resolveWithRetry.
hn2.resetToUnresolved();
ASSERT(hn2.status == Hostname::UNRESOLVED && !hn2.resolvedAddress.present());
state NetworkAddress address = wait(hn2.resolveWithRetry());
ASSERT(hn2.status == Hostname::RESOLVED);
ASSERT(hn2.resolvedAddress.get() == addressSource && address == addressSource);
// Test resolveBlocking.
hn2.resetToUnresolved();
ASSERT(hn2.status == Hostname::UNRESOLVED && !hn2.resolvedAddress.present());
optionalAddress = hn2.resolveBlocking();
ASSERT(hn2.status == Hostname::RESOLVED);
ASSERT(hn2.resolvedAddress.get() == addressSource && optionalAddress.get() == addressSource);
optionalAddress = Optional<NetworkAddress>();
wait(store(address, hn2.resolveWithRetry()));
ASSERT(address == addressSource);
return Void();
}

View File

@ -33,16 +33,6 @@ struct Hostname {
Hostname(const std::string& host, const std::string& service, bool isTLS)
: host(host), service(service), isTLS(isTLS) {}
Hostname() : host(""), service(""), isTLS(false) {}
Hostname(const Hostname& rhs) { operator=(rhs); }
Hostname& operator=(const Hostname& rhs) {
// Copy everything except AsyncTrigger resolveFinish.
host = rhs.host;
service = rhs.service;
isTLS = rhs.isTLS;
resolvedAddress = rhs.resolvedAddress;
status = rhs.status;
return *this;
}
bool operator==(const Hostname& r) const { return host == r.host && service == r.service && isTLS == r.isTLS; }
bool operator!=(const Hostname& r) const { return !(*this == r); }
@ -72,20 +62,15 @@ struct Hostname {
std::string toString() const { return host + ":" + service + (isTLS ? ":tls" : ""); }
Optional<NetworkAddress> resolvedAddress;
enum HostnameStatus { UNRESOLVED, RESOLVING, RESOLVED };
// The resolve functions below use DNS cache.
Future<Optional<NetworkAddress>> resolve();
Future<NetworkAddress> resolveWithRetry();
Optional<NetworkAddress> resolveBlocking(); // This one should only be used when resolving asynchronously is
// impossible. For all other cases, resolve() should be preferred.
void resetToUnresolved();
HostnameStatus status = UNRESOLVED;
AsyncTrigger resolveFinish;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, host, service, isTLS, resolvedAddress, status);
serializer(ar, host, service, isTLS);
}
};

View File

@ -90,6 +90,7 @@ public:
uint64_t second() const { return part[1]; }
static UID fromString(std::string const&);
static UID fromStringThrowsOnFailure(std::string const&);
template <class Ar>
void serialize_unversioned(

394
flow/MkCert.cpp Normal file
View File

@ -0,0 +1,394 @@
/*
* MkCert.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "flow/Arena.h"
#include "flow/IRandom.h"
#include "flow/MkCert.h"
#include "flow/ScopeExit.h"
#include <limits>
#include <memory>
#include <string>
#include <cstring>
#include <openssl/bio.h>
#include <openssl/ec.h>
#include <openssl/err.h>
#include <openssl/evp.h>
#include <openssl/objects.h>
#include <openssl/pem.h>
#include <openssl/x509.h>
#include <openssl/x509v3.h>
namespace {
[[noreturn]] void traceAndThrow(const char* condition, int line) {
auto te = TraceEvent(SevWarnAlways, "MkCertOrKeyError");
te.suppressFor(10).detail("Line", line).detail("Condition", condition);
if (auto err = ::ERR_get_error()) {
char buf[256]{
0,
};
::ERR_error_string_n(err, buf, sizeof(buf));
te.detail("OpenSSLError", static_cast<const char*>(buf));
}
throw tls_error();
}
} // anonymous namespace
#define OSSL_ASSERT(condition) \
do { \
if (!(condition)) \
traceAndThrow(#condition, __LINE__); \
} while (false)
namespace mkcert {
// Helper functions working with OpenSSL native types
std::shared_ptr<X509> readX509CertPem(StringRef x509CertPem);
std::shared_ptr<EVP_PKEY> readPrivateKeyPem(StringRef privateKeyPem);
std::shared_ptr<EVP_PKEY> readPrivateKeyPem(StringRef privateKeyPem);
std::shared_ptr<EVP_PKEY> makeEllipticCurveKeyPairNative();
StringRef writeX509CertPem(Arena& arena, const std::shared_ptr<X509>& nativeCert);
StringRef writePrivateKeyPem(Arena& arena, const std::shared_ptr<EVP_PKEY>& nativePrivateKey);
struct CertAndKeyNative {
std::shared_ptr<X509> cert;
std::shared_ptr<EVP_PKEY> privateKey;
bool valid() const noexcept { return cert && privateKey; }
// self-signed cert case
bool null() const noexcept { return !cert && !privateKey; }
using SelfType = CertAndKeyNative;
using PemType = CertAndKeyRef;
static SelfType fromPem(PemType certAndKey) {
auto ret = SelfType{};
if (certAndKey.empty())
return ret;
auto [certPem, keyPem] = certAndKey;
// either both set or both unset
ASSERT(!certPem.empty() && !keyPem.empty());
ret.cert = readX509CertPem(certPem);
ret.privateKey = readPrivateKeyPem(keyPem);
return ret;
}
PemType toPem(Arena& arena) {
auto ret = PemType{};
if (null())
return ret;
ASSERT(valid());
ret.certPem = writeX509CertPem(arena, cert);
ret.privateKeyPem = writePrivateKeyPem(arena, privateKey);
return ret;
}
};
CertAndKeyNative makeCertNative(CertSpecRef spec, CertAndKeyNative issuer);
void printCert(FILE* out, StringRef certPem) {
auto x = readX509CertPem(certPem);
OSSL_ASSERT(0 < ::X509_print_fp(out, x.get()));
}
void printPrivateKey(FILE* out, StringRef privateKeyPem) {
auto key = readPrivateKeyPem(privateKeyPem);
auto bio = ::BIO_new_fp(out, BIO_NOCLOSE);
OSSL_ASSERT(bio);
auto bioGuard = ScopeExit([bio]() { ::BIO_free(bio); });
OSSL_ASSERT(0 < ::EVP_PKEY_print_private(bio, key.get(), 0, nullptr));
}
std::shared_ptr<EVP_PKEY> makeEllipticCurveKeyPairNative() {
auto params = std::add_pointer_t<EVP_PKEY>();
{
auto pctx = ::EVP_PKEY_CTX_new_id(EVP_PKEY_EC, nullptr);
OSSL_ASSERT(pctx);
auto ctxGuard = ScopeExit([pctx]() { ::EVP_PKEY_CTX_free(pctx); });
OSSL_ASSERT(0 < ::EVP_PKEY_paramgen_init(pctx));
OSSL_ASSERT(0 < ::EVP_PKEY_CTX_set_ec_paramgen_curve_nid(pctx, NID_X9_62_prime256v1));
OSSL_ASSERT(0 < ::EVP_PKEY_paramgen(pctx, &params));
OSSL_ASSERT(params);
}
auto paramsGuard = ScopeExit([params]() { ::EVP_PKEY_free(params); });
// keygen
auto kctx = ::EVP_PKEY_CTX_new(params, nullptr);
OSSL_ASSERT(kctx);
auto kctxGuard = ScopeExit([kctx]() { ::EVP_PKEY_CTX_free(kctx); });
auto key = std::add_pointer_t<EVP_PKEY>();
OSSL_ASSERT(0 < ::EVP_PKEY_keygen_init(kctx));
OSSL_ASSERT(0 < ::EVP_PKEY_keygen(kctx, &key));
OSSL_ASSERT(key);
return std::shared_ptr<EVP_PKEY>(key, &::EVP_PKEY_free);
}
std::shared_ptr<X509> readX509CertPem(StringRef x509CertPem) {
ASSERT(!x509CertPem.empty());
auto bio_mem = ::BIO_new_mem_buf(x509CertPem.begin(), x509CertPem.size());
OSSL_ASSERT(bio_mem);
auto bioGuard = ScopeExit([bio_mem]() { ::BIO_free(bio_mem); });
auto ret = ::PEM_read_bio_X509(bio_mem, nullptr, nullptr, nullptr);
OSSL_ASSERT(ret);
return std::shared_ptr<X509>(ret, &::X509_free);
}
std::shared_ptr<EVP_PKEY> readPrivateKeyPem(StringRef privateKeyPem) {
ASSERT(!privateKeyPem.empty());
auto bio_mem = ::BIO_new_mem_buf(privateKeyPem.begin(), privateKeyPem.size());
OSSL_ASSERT(bio_mem);
auto bioGuard = ScopeExit([bio_mem]() { ::BIO_free(bio_mem); });
auto ret = ::PEM_read_bio_PrivateKey(bio_mem, nullptr, nullptr, nullptr);
OSSL_ASSERT(ret);
return std::shared_ptr<EVP_PKEY>(ret, &::EVP_PKEY_free);
}
StringRef writeX509CertPem(Arena& arena, const std::shared_ptr<X509>& nativeCert) {
auto mem = ::BIO_new(::BIO_s_mem());
OSSL_ASSERT(mem);
auto memGuard = ScopeExit([mem]() { ::BIO_free(mem); });
OSSL_ASSERT(::PEM_write_bio_X509(mem, nativeCert.get()));
auto bioBuf = std::add_pointer_t<char>{};
auto const len = ::BIO_get_mem_data(mem, &bioBuf);
ASSERT_GT(len, 0);
auto buf = new (arena) uint8_t[len];
::memcpy(buf, bioBuf, len);
return StringRef(buf, static_cast<int>(len));
}
StringRef writePrivateKeyPem(Arena& arena, const std::shared_ptr<EVP_PKEY>& nativePrivateKey) {
auto mem = ::BIO_new(::BIO_s_mem());
OSSL_ASSERT(mem);
auto memGuard = ScopeExit([mem]() { ::BIO_free(mem); });
OSSL_ASSERT(::PEM_write_bio_PrivateKey(mem, nativePrivateKey.get(), nullptr, nullptr, 0, 0, nullptr));
auto bioBuf = std::add_pointer_t<char>{};
auto const len = ::BIO_get_mem_data(mem, &bioBuf);
ASSERT_GT(len, 0);
auto buf = new (arena) uint8_t[len];
::memcpy(buf, bioBuf, len);
return StringRef(buf, static_cast<int>(len));
}
KeyPairRef KeyPairRef::make(Arena& arena) {
auto keypair = makeEllipticCurveKeyPairNative();
auto ret = KeyPairRef{};
{
auto len = 0;
len = ::i2d_PrivateKey(keypair.get(), nullptr);
ASSERT_LT(0, len);
auto buf = new (arena) uint8_t[len];
auto out = std::add_pointer_t<uint8_t>(buf);
len = ::i2d_PrivateKey(keypair.get(), &out);
ret.privateKeyDer = StringRef(buf, len);
}
{
auto len = 0;
len = ::i2d_PUBKEY(keypair.get(), nullptr);
ASSERT_LT(0, len);
auto buf = new (arena) uint8_t[len];
auto out = std::add_pointer_t<uint8_t>(buf);
len = ::i2d_PUBKEY(keypair.get(), &out);
ret.publicKeyDer = StringRef(buf, len);
}
return ret;
}
CertAndKeyNative makeCertNative(CertSpecRef spec, CertAndKeyNative issuer) {
// issuer key/cert must be both set or both null (self-signed case)
ASSERT(issuer.valid() || issuer.null());
auto const isSelfSigned = issuer.null();
auto nativeKeyPair = makeEllipticCurveKeyPairNative();
auto newX = ::X509_new();
OSSL_ASSERT(newX);
auto x509Guard = ScopeExit([&newX]() {
if (newX)
::X509_free(newX);
});
auto smartX = std::shared_ptr<X509>(newX, &::X509_free);
newX = nullptr;
auto x = smartX.get();
OSSL_ASSERT(0 < ::X509_set_version(x, 2 /*X509_VERSION_3*/));
auto serialPtr = ::X509_get_serialNumber(x);
OSSL_ASSERT(serialPtr);
OSSL_ASSERT(0 < ::ASN1_INTEGER_set(serialPtr, spec.serialNumber));
auto notBefore = ::X509_getm_notBefore(x);
OSSL_ASSERT(notBefore);
OSSL_ASSERT(::X509_gmtime_adj(notBefore, spec.offsetNotBefore));
auto notAfter = ::X509_getm_notAfter(x);
OSSL_ASSERT(notAfter);
OSSL_ASSERT(::X509_gmtime_adj(notAfter, spec.offsetNotAfter));
OSSL_ASSERT(0 < ::X509_set_pubkey(x, nativeKeyPair.get()));
auto subjectName = ::X509_get_subject_name(x);
OSSL_ASSERT(subjectName);
for (const auto& entry : spec.subjectName) {
// field names are expected to null-terminate
auto fieldName = entry.field.toString();
OSSL_ASSERT(0 <
::X509_NAME_add_entry_by_txt(
subjectName, fieldName.c_str(), MBSTRING_ASC, entry.bytes.begin(), entry.bytes.size(), -1, 0));
}
auto issuerName = ::X509_get_issuer_name(x);
OSSL_ASSERT(issuerName);
OSSL_ASSERT(::X509_set_issuer_name(x, (isSelfSigned ? subjectName : ::X509_get_subject_name(issuer.cert.get()))));
auto ctx = X509V3_CTX{};
X509V3_set_ctx_nodb(&ctx);
::X509V3_set_ctx(&ctx, (isSelfSigned ? x : issuer.cert.get()), x, nullptr, nullptr, 0);
for (const auto& entry : spec.extensions) {
// extension field names and values are expected to null-terminate
auto extName = entry.field.toString();
auto extValue = entry.bytes.toString();
auto extNid = ::OBJ_txt2nid(extName.c_str());
if (extNid == NID_undef) {
TraceEvent(SevWarnAlways, "MkCertInvalidExtName").suppressFor(10).detail("Name", extName);
throw tls_error();
}
#ifdef OPENSSL_IS_BORINGSSL
auto ext = ::X509V3_EXT_conf_nid(nullptr, &ctx, extNid, const_cast<char*>(extValue.c_str()));
#else
auto ext = ::X509V3_EXT_conf_nid(nullptr, &ctx, extNid, extValue.c_str());
#endif
OSSL_ASSERT(ext);
auto extGuard = ScopeExit([ext]() { ::X509_EXTENSION_free(ext); });
OSSL_ASSERT(::X509_add_ext(x, ext, -1));
}
OSSL_ASSERT(::X509_sign(x, (isSelfSigned ? nativeKeyPair.get() : issuer.privateKey.get()), ::EVP_sha256()));
auto ret = CertAndKeyNative{};
ret.cert = smartX;
ret.privateKey = nativeKeyPair;
return ret;
}
CertAndKeyRef CertAndKeyRef::make(Arena& arena, CertSpecRef spec, CertAndKeyRef issuerPem) {
auto issuer = CertAndKeyNative::fromPem(issuerPem);
auto newCertAndKey = makeCertNative(spec, issuer);
return newCertAndKey.toPem(arena);
}
CertSpecRef CertSpecRef::make(Arena& arena, CertKind kind) {
auto spec = CertSpecRef{};
spec.serialNumber = static_cast<long>(deterministicRandom()->randomInt64(0, 1e10));
spec.offsetNotBefore = 0; // now
spec.offsetNotAfter = 60 * 60 * 24 * 365; // 1 year from now
auto& subject = spec.subjectName;
subject.push_back(arena, { "countryName"_sr, "DE"_sr });
subject.push_back(arena, { "localityName"_sr, "Berlin"_sr });
subject.push_back(arena, { "organizationName"_sr, "FoundationDB"_sr });
subject.push_back(arena, { "commonName"_sr, kind.getCommonName("FDB Testing Services"_sr, arena) });
auto& ext = spec.extensions;
if (kind.isCA()) {
ext.push_back(arena, { "basicConstraints"_sr, "critical, CA:TRUE"_sr });
ext.push_back(arena, { "keyUsage"_sr, "critical, digitalSignature, keyCertSign, cRLSign"_sr });
} else {
ext.push_back(arena, { "basicConstraints"_sr, "critical, CA:FALSE"_sr });
ext.push_back(arena, { "keyUsage"_sr, "critical, digitalSignature, keyEncipherment"_sr });
ext.push_back(arena, { "extendedKeyUsage"_sr, "serverAuth, clientAuth"_sr });
}
ext.push_back(arena, { "subjectKeyIdentifier"_sr, "hash"_sr });
if (!kind.isRootCA())
ext.push_back(arena, { "authorityKeyIdentifier"_sr, "keyid, issuer"_sr });
return spec;
}
StringRef concatCertChain(Arena& arena, CertChainRef chain) {
auto len = 0;
for (const auto& entry : chain) {
len += entry.certPem.size();
}
if (len == 0)
return StringRef();
auto buf = new (arena) uint8_t[len];
auto offset = 0;
for (auto const& entry : chain) {
::memcpy(&buf[offset], entry.certPem.begin(), entry.certPem.size());
offset += entry.certPem.size();
}
UNSTOPPABLE_ASSERT(offset == len);
return StringRef(buf, len);
}
CertChainRef makeCertChain(Arena& arena, VectorRef<CertSpecRef> specs, CertAndKeyRef rootAuthority) {
ASSERT_GT(specs.size(), 0);
// if rootAuthority is empty, use last element in specs to make root CA
auto const needRootCA = rootAuthority.empty();
if (needRootCA) {
int const chainLength = specs.size();
auto chain = new (arena) CertAndKeyRef[chainLength];
auto caNative = makeCertNative(specs.back(), CertAndKeyNative{} /* empty issuer == self-signed */);
chain[chainLength - 1] = caNative.toPem(arena);
for (auto i = chainLength - 2; i >= 0; i--) {
auto cnkNative = makeCertNative(specs[i], caNative);
chain[i] = cnkNative.toPem(arena);
caNative = cnkNative;
}
return CertChainRef(chain, chainLength);
} else {
int const chainLength = specs.size() + 1; /* account for deep-copied rootAuthority */
auto chain = new (arena) CertAndKeyRef[chainLength];
auto caNative = CertAndKeyNative::fromPem(rootAuthority);
chain[chainLength - 1] = rootAuthority.deepCopy(arena);
for (auto i = chainLength - 2; i >= 0; i--) {
auto cnkNative = makeCertNative(specs[i], caNative);
chain[i] = cnkNative.toPem(arena);
caNative = cnkNative;
}
return CertChainRef(chain, chainLength);
}
}
VectorRef<CertSpecRef> makeCertChainSpec(Arena& arena, unsigned length, ESide side) {
if (!length)
return {};
auto specs = new (arena) CertSpecRef[length];
auto const isServerSide = side == ESide::Server;
for (auto i = 0u; i < length; i++) {
auto kind = CertKind{};
if (i == 0u)
kind = isServerSide ? CertKind(Server{}) : CertKind(Client{});
else if (i == length - 1)
kind = isServerSide ? CertKind(ServerRootCA{}) : CertKind(ClientRootCA{});
else
kind = isServerSide ? CertKind(ServerIntermediateCA{ i }) : CertKind(ClientIntermediateCA{ i });
specs[i] = CertSpecRef::make(arena, kind);
}
return VectorRef<CertSpecRef>(specs, length);
}
CertChainRef makeCertChain(Arena& arena, unsigned length, ESide side) {
if (!length)
return {};
// temporary arena for writing up specs
auto tmpArena = Arena();
auto specs = makeCertChainSpec(tmpArena, length, side);
return makeCertChain(arena, specs, {} /*root*/);
}
StringRef CertKind::getCommonName(StringRef prefix, Arena& arena) const {
auto const side = std::string(isClientSide() ? " Client" : " Server");
if (isIntermediateCA()) {
auto const level = isClientSide() ? get<ClientIntermediateCA>().level : get<ServerIntermediateCA>().level;
return prefix.withSuffix(fmt::format("{} Intermediate {}", side, level), arena);
} else if (isRootCA()) {
return prefix.withSuffix(fmt::format("{} Root", side), arena);
} else {
return prefix.withSuffix(side, arena);
}
}
} // namespace mkcert

166
flow/MkCert.h Normal file
View File

@ -0,0 +1,166 @@
/*
* MkCert.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MKCERT_H
#define MKCERT_H
#include "flow/Arena.h"
#include "flow/Error.h"
#include <fmt/format.h>
#include <string>
#include <type_traits>
#include <variant>
namespace mkcert {
void printCert(FILE* out, StringRef certPem);
void printPrivateKey(FILE* out, StringRef privateKeyPem);
struct KeyPairRef {
using SelfType = KeyPairRef;
// Make new Elliptic Curve private-public key pair in DER
static SelfType make(Arena& arena);
StringRef privateKeyDer;
StringRef publicKeyDer;
};
struct Asn1EntryRef {
// field must match one of ASN.1 object short/long names: e.g. "C", "countryName", "CN", "commonName",
// "subjectAltName", ...
StringRef field;
StringRef bytes;
};
struct ServerRootCA {};
struct ServerIntermediateCA {
unsigned level;
};
struct Server {};
struct ClientRootCA {};
struct ClientIntermediateCA {
unsigned level;
};
struct Client {};
struct CertKind {
CertKind() noexcept = default;
template <class Kind>
CertKind(Kind kind) noexcept : value(std::in_place_type<Kind>, kind) {}
template <class Kind>
bool is() const noexcept {
return std::holds_alternative<Kind>(value);
}
template <class Kind>
Kind const& get() const {
return std::get<Kind>(value);
}
bool isServerSide() const noexcept { return is<ServerRootCA>() || is<ServerIntermediateCA>() || is<Server>(); }
bool isClientSide() const noexcept { return !isServerSide(); }
bool isRootCA() const noexcept { return is<ServerRootCA>() || is<ClientRootCA>(); }
bool isIntermediateCA() const noexcept { return is<ServerIntermediateCA>() || is<ClientIntermediateCA>(); }
bool isLeaf() const noexcept { return is<Server>() || is<Client>(); }
bool isCA() const noexcept { return !isLeaf(); }
StringRef getCommonName(StringRef prefix, Arena& arena) const;
std::variant<ServerRootCA, ServerIntermediateCA, Server, ClientRootCA, ClientIntermediateCA, Client> value;
};
struct CertSpecRef {
using SelfType = CertSpecRef;
long serialNumber;
// offset in number of seconds relative to now, i.e. cert creation
long offsetNotBefore;
long offsetNotAfter;
VectorRef<Asn1EntryRef> subjectName;
// time offset relative to time of cert creation (now)
VectorRef<Asn1EntryRef> extensions;
// make test-only sample certificate whose fields are inferred from CertKind
static SelfType make(Arena& arena, CertKind kind);
};
struct CertAndKeyRef {
using SelfType = CertAndKeyRef;
StringRef certPem;
StringRef privateKeyPem;
void printCert(FILE* out) {
if (!certPem.empty()) {
::mkcert::printCert(out, certPem);
}
}
void printPrivateKey(FILE* out) {
if (!privateKeyPem.empty()) {
::mkcert::printPrivateKey(out, privateKeyPem);
}
}
bool empty() const noexcept { return certPem.empty() && privateKeyPem.empty(); }
SelfType deepCopy(Arena& arena) {
auto ret = SelfType{};
if (!certPem.empty())
ret.certPem = StringRef(arena, certPem);
if (!privateKeyPem.empty())
ret.privateKeyPem = StringRef(arena, privateKeyPem);
return ret;
}
// Empty (default) issuer produces a self-signed certificate
static SelfType make(Arena& arena, CertSpecRef spec, CertAndKeyRef issuer);
};
using CertChainRef = VectorRef<CertAndKeyRef>;
// Concatenate chain of PEMs to one StringRef
StringRef concatCertChain(Arena& arena, CertChainRef chain);
enum class ESide : int { Server, Client };
// Generate a chain of valid cert specs that have consistent subject/issuer names and
// is valid for typical server/client TLS scenario
// The 'side' parameter makes a difference in the commonName ("CN") field of the produced specs
VectorRef<CertSpecRef> makeCertChainSpec(Arena& arena, unsigned length, ESide side);
// For empty (default) rootAuthority, the last item in specs is used to generate rootAuthority
// Otherwise, rootAuthority is deep-copied to first element of returned chain
CertChainRef makeCertChain(Arena& arena, VectorRef<CertSpecRef> specs, CertAndKeyRef rootAuthority);
// Make stub cert chain of given length inc. root authority
// Note: side does not imply anything different other than the choice of common names
CertChainRef makeCertChain(Arena& arena, unsigned depth, ESide side);
} // namespace mkcert
#endif /*MKCERT_H*/

326
flow/MkCertCli.cpp Normal file
View File

@ -0,0 +1,326 @@
/*
* MkCertCli.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdlib>
#include <fstream>
#include <string>
#include <string_view>
#include <thread>
#include <fmt/format.h>
#include "flow/Arena.h"
#include "flow/Error.h"
#include "flow/MkCert.h"
#include "flow/network.h"
#include "flow/Platform.h"
#include "flow/ScopeExit.h"
#include "flow/SimpleOpt.h"
#include "flow/TLSConfig.actor.h"
#include "flow/Trace.h"
enum EMkCertOpt : int {
OPT_HELP,
OPT_SERVER_CHAIN_LEN,
OPT_CLIENT_CHAIN_LEN,
OPT_SERVER_CERT_FILE,
OPT_SERVER_KEY_FILE,
OPT_SERVER_CA_FILE,
OPT_CLIENT_CERT_FILE,
OPT_CLIENT_KEY_FILE,
OPT_CLIENT_CA_FILE,
OPT_EXPIRE_SERVER_CERT,
OPT_EXPIRE_CLIENT_CERT,
OPT_PRINT_SERVER_CERT,
OPT_PRINT_CLIENT_CERT,
OPT_PRINT_ARGUMENTS,
};
CSimpleOpt::SOption gOptions[] = { { OPT_HELP, "--help", SO_NONE },
{ OPT_HELP, "-h", SO_NONE },
{ OPT_SERVER_CHAIN_LEN, "--server-chain-length", SO_REQ_SEP },
{ OPT_SERVER_CHAIN_LEN, "-S", SO_REQ_SEP },
{ OPT_CLIENT_CHAIN_LEN, "--client-chain-length", SO_REQ_SEP },
{ OPT_CLIENT_CHAIN_LEN, "-C", SO_REQ_SEP },
{ OPT_SERVER_CERT_FILE, "--server-cert-file", SO_REQ_SEP },
{ OPT_SERVER_KEY_FILE, "--server-key-file", SO_REQ_SEP },
{ OPT_SERVER_CA_FILE, "--server-ca-file", SO_REQ_SEP },
{ OPT_CLIENT_CERT_FILE, "--client-cert-file", SO_REQ_SEP },
{ OPT_CLIENT_KEY_FILE, "--client-key-file", SO_REQ_SEP },
{ OPT_CLIENT_CA_FILE, "--client-ca-file", SO_REQ_SEP },
{ OPT_EXPIRE_SERVER_CERT, "--expire-server-cert", SO_NONE },
{ OPT_EXPIRE_CLIENT_CERT, "--expire-client-cert", SO_NONE },
{ OPT_PRINT_SERVER_CERT, "--print-server-cert", SO_NONE },
{ OPT_PRINT_CLIENT_CERT, "--print-client-cert", SO_NONE },
{ OPT_PRINT_ARGUMENTS, "--print-args", SO_NONE },
SO_END_OF_OPTIONS };
template <size_t Len>
void printOptionUsage(std::string_view option, const char*(&&optionDescLines)[Len]) {
constexpr std::string_view optionIndent{ " " };
constexpr std::string_view descIndent{ " " };
fmt::print(stdout, "{}{}\n", optionIndent, option);
for (auto descLine : optionDescLines)
fmt::print(stdout, "{}{}\n", descIndent, descLine);
fmt::print("\n");
}
void printUsage(std::string_view binary) {
fmt::print(stdout,
"mkcert: FDB test certificate chain generator\n\n"
"Usage: {} [OPTIONS...]\n\n",
binary);
printOptionUsage("--server-chain-length LENGTH, -S LENGTH (default: 3)",
{ "Length of server certificate chain including root CA certificate." });
printOptionUsage("--client-chain-length LENGTH, -C LENGTH (default: 2)",
{ "Length of client certificate chain including root CA certificate.",
"Use zero-length to test to setup untrusted clients." });
printOptionUsage("--server-cert-file PATH (default: 'server_cert.pem')",
{ "Output filename for server certificate chain excluding root CA.",
"Intended for SERVERS to use as 'tls_certificate_file'.",
"Certificates are concatenated in leaf-to-CA order." });
printOptionUsage("--server-key-file PATH (default: 'server_key.pem')",
{ "Output filename for server private key matching its leaf certificate.",
"Intended for SERVERS to use as 'tls_key_file'" });
printOptionUsage("--server-ca-file PATH (default: 'server_ca.pem')",
{ "Output filename for server's root CA certificate.",
"Content same as '--server-cert-file' for '--server-chain-length' == 1.",
"Intended for CLIENTS to use as 'tls_ca_file': i.e. cert issuer to trust." });
printOptionUsage("--client-cert-file PATH (default: 'client_cert.pem')",
{ "Output filename for client certificate chain excluding root CA.",
"Intended for CLIENTS to use as 'tls_certificate_file'.",
"Certificates are concatenated in leaf-to-CA order." });
printOptionUsage("--client-key-file PATH (default: 'client_key.pem')",
{ "Output filename for client private key matching its leaf certificate.",
"Intended for CLIENTS to use as 'tls_key_file'" });
printOptionUsage("--client-ca-file PATH (default: 'client_ca.pem')",
{ "Output filename for client's root CA certificate.",
"Content same as '--client-cert-file' for '--client-chain-length' == 1.",
"Intended for SERVERS to use as 'tls_ca_file': i.e. cert issuer to trust." });
printOptionUsage("--expire-server-cert (default: no)",
{ "Deliberately expire server's leaf certificate for testing." });
printOptionUsage("--expire-client-cert (default: no)",
{ "Deliberately expire client's leaf certificate for testing." });
printOptionUsage("--print-server-cert (default: no)",
{ "Print generated server certificate chain including root in human readable form.",
"Printed certificates are in leaf-to-CA order.",
"If --print-client-cert is also used, server chain precedes client's." });
printOptionUsage("--print-client-cert (default: no)",
{ "Print generated client certificate chain including root in human readable form.",
"Printed certificates are in leaf-to-CA order.",
"If --print-server-cert is also used, server chain precedes client's." });
printOptionUsage("--print-args (default: no)", { "Print chain generation arguments." });
}
struct ChainSpec {
unsigned length;
std::string certFile;
std::string keyFile;
std::string caFile;
mkcert::ESide side;
bool expireLeaf;
void transformPathToAbs() {
certFile = abspath(certFile);
keyFile = abspath(keyFile);
caFile = abspath(caFile);
}
void print() {
fmt::print(stdout, "{}-side:\n", side == mkcert::ESide::Server ? "Server" : "Client");
fmt::print(stdout, " Chain length: {}\n", length);
fmt::print(stdout, " Certificate file: {}\n", certFile);
fmt::print(stdout, " Key file: {}\n", keyFile);
fmt::print(stdout, " CA file: {}\n", caFile);
fmt::print(stdout, " Expire cert: {}\n", expireLeaf);
}
mkcert::CertChainRef makeChain(Arena& arena);
};
mkcert::CertChainRef ChainSpec::makeChain(Arena& arena) {
auto checkStream = [](std::ofstream& fs, std::string_view filename) {
if (!fs) {
throw std::runtime_error(fmt::format("cannot open '{}' for writing", filename));
}
};
auto ofsCert = std::ofstream(certFile, std::ofstream::out | std::ofstream::trunc);
checkStream(ofsCert, certFile);
auto ofsKey = std::ofstream(keyFile, std::ofstream::out | std::ofstream::trunc);
checkStream(ofsKey, keyFile);
auto ofsCa = std::ofstream(caFile, std::ofstream::out | std::ofstream::trunc);
checkStream(ofsCa, caFile);
if (!length)
return {};
auto specs = mkcert::makeCertChainSpec(arena, length, side);
if (expireLeaf) {
specs[0].offsetNotBefore = -60l * 60 * 24 * 365;
specs[0].offsetNotAfter = -10l;
}
auto chain = mkcert::makeCertChain(arena, specs, {} /*generate root CA*/);
auto ca = chain.back().certPem;
ofsCa.write(reinterpret_cast<char const*>(ca.begin()), ca.size());
auto chainMinusRoot = chain;
if (chainMinusRoot.size() > 1)
chainMinusRoot.pop_back();
auto cert = mkcert::concatCertChain(arena, chainMinusRoot);
ofsCert.write(reinterpret_cast<char const*>(cert.begin()), cert.size());
auto key = chain[0].privateKeyPem;
ofsKey.write(reinterpret_cast<char const*>(key.begin()), key.size());
ofsCert.close();
ofsKey.close();
ofsCa.close();
return chain;
}
int main(int argc, char** argv) {
// default chain specs
auto serverArgs = ChainSpec{ 3u /*length*/, "server_cert.pem", "server_key.pem",
"server_ca.pem", mkcert::ESide::Server, false /* expireLeaf */ };
auto clientArgs = ChainSpec{ 2u /*length*/, "client_cert.pem", "client_key.pem",
"client_ca.pem", mkcert::ESide::Client, false /* expireLeaf */ };
auto printServerCert = false;
auto printClientCert = false;
auto printArguments = false;
auto args = CSimpleOpt(argc, argv, gOptions, SO_O_EXACT | SO_O_HYPHEN_TO_UNDERSCORE);
while (args.Next()) {
if (auto err = args.LastError()) {
switch (err) {
case SO_ARG_INVALID_DATA:
fmt::print(stderr, "ERROR: invalid argument to option '{}'\n", args.OptionText());
return FDB_EXIT_ERROR;
case SO_ARG_INVALID:
fmt::print(stderr, "ERROR: argument given to no-argument option '{}'\n", args.OptionText());
return FDB_EXIT_ERROR;
case SO_ARG_MISSING:
fmt::print(stderr, "ERROR: argument missing for option '{}'\n", args.OptionText());
return FDB_EXIT_ERROR;
case SO_OPT_INVALID:
fmt::print(stderr, "ERROR: unknown option '{}'\n", args.OptionText());
return FDB_EXIT_ERROR;
default:
fmt::print(stderr, "ERROR: unknown error {} with option '{}'\n", err, args.OptionText());
return FDB_EXIT_ERROR;
}
} else {
auto const optId = args.OptionId();
switch (optId) {
case OPT_HELP:
printUsage(argv[0]);
return FDB_EXIT_SUCCESS;
case OPT_SERVER_CHAIN_LEN:
try {
serverArgs.length = std::stoul(args.OptionArg());
} catch (std::exception const& ex) {
fmt::print(stderr, "ERROR: Invalid chain length ({})\n", ex.what());
return FDB_EXIT_ERROR;
}
break;
case OPT_CLIENT_CHAIN_LEN:
try {
clientArgs.length = std::stoul(args.OptionArg());
} catch (std::exception const& ex) {
fmt::print(stderr, "ERROR: Invalid chain length ({})\n", ex.what());
return FDB_EXIT_ERROR;
}
break;
case OPT_SERVER_CERT_FILE:
serverArgs.certFile.assign(args.OptionArg());
break;
case OPT_SERVER_KEY_FILE:
serverArgs.keyFile.assign(args.OptionArg());
break;
case OPT_SERVER_CA_FILE:
serverArgs.caFile.assign(args.OptionArg());
break;
case OPT_CLIENT_CERT_FILE:
clientArgs.certFile.assign(args.OptionArg());
break;
case OPT_CLIENT_KEY_FILE:
clientArgs.keyFile.assign(args.OptionArg());
break;
case OPT_CLIENT_CA_FILE:
clientArgs.caFile.assign(args.OptionArg());
break;
case OPT_EXPIRE_SERVER_CERT:
serverArgs.expireLeaf = true;
break;
case OPT_EXPIRE_CLIENT_CERT:
clientArgs.expireLeaf = true;
break;
case OPT_PRINT_SERVER_CERT:
printServerCert = true;
break;
case OPT_PRINT_CLIENT_CERT:
printClientCert = true;
break;
case OPT_PRINT_ARGUMENTS:
printArguments = true;
break;
default:
fmt::print(stderr, "ERROR: Unknown option {}\n", args.OptionText());
return FDB_EXIT_ERROR;
}
}
}
// Need to involve flow for the TraceEvent.
try {
platformInit();
Error::init();
g_network = newNet2(TLSConfig());
openTraceFile(NetworkAddress(), 10 << 20, 10 << 20, ".", "mkcert");
auto thread = std::thread([]() {
TraceEvent::setNetworkThread();
g_network->run();
});
auto cleanUpGuard = ScopeExit([&thread]() {
flushTraceFileVoid();
g_network->stop();
thread.join();
});
serverArgs.transformPathToAbs();
clientArgs.transformPathToAbs();
if (printArguments) {
serverArgs.print();
clientArgs.print();
}
auto arena = Arena();
auto serverChain = serverArgs.makeChain(arena);
auto clientChain = clientArgs.makeChain(arena);
if (printServerCert || printClientCert) {
if (printServerCert) {
for (auto i = 0; i < serverChain.size(); i++) {
mkcert::printCert(stdout, serverChain[i].certPem);
}
}
if (printClientCert) {
for (auto i = 0; i < clientChain.size(); i++) {
mkcert::printCert(stdout, clientChain[i].certPem);
}
}
} else {
fmt::print("OK\n");
}
return FDB_EXIT_SUCCESS;
} catch (const Error& e) {
fmt::print(stderr, "error: {}\n", e.name());
return FDB_EXIT_MAIN_ERROR;
} catch (const std::exception& e) {
fmt::print(stderr, "exception: {}\n", e.what());
return FDB_EXIT_MAIN_EXCEPTION;
}
}

36
flow/ScopeExit.h Normal file
View File

@ -0,0 +1,36 @@
/*
* ScopeExit.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_SCOPE_EXIT_H
#define FLOW_SCOPE_EXIT_H
#pragma once
// Execute lambda as this object goes out of scope
template <typename Func>
class ScopeExit {
std::decay_t<Func> fn;
public:
ScopeExit(Func&& fn) : fn(std::forward<Func>(fn)) {}
~ScopeExit() { fn(); }
};
#endif /*FLOW_SCOPE_EXIT_H*/

254
flow/TLSTest.cpp Normal file
View File

@ -0,0 +1,254 @@
/*
* TLSTest.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <boost/asio.hpp>
#include <boost/asio/ssl.hpp>
#include <boost/bind.hpp>
#include <fmt/format.h>
#include <algorithm>
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include "flow/Arena.h"
#include "flow/MkCert.h"
std::FILE* outp = stdout;
template <class... Args>
void log(Args&&... args) {
auto buf = fmt::memory_buffer{};
fmt::format_to(std::back_inserter(buf), std::forward<Args>(args)...);
fmt::print(outp, "{}\n", std::string_view(buf.data(), buf.size()));
}
template <class... Args>
void logc(Args&&... args) {
auto buf = fmt::memory_buffer{};
fmt::format_to(std::back_inserter(buf), "[CLIENT] ");
fmt::format_to(std::back_inserter(buf), std::forward<Args>(args)...);
fmt::print(outp, "{}\n", std::string_view(buf.data(), buf.size()));
}
template <class... Args>
void logs(Args&&... args) {
auto buf = fmt::memory_buffer{};
fmt::format_to(std::back_inserter(buf), "[SERVER] ");
fmt::format_to(std::back_inserter(buf), std::forward<Args>(args)...);
fmt::print(outp, "{}\n", std::string_view(buf.data(), buf.size()));
}
using namespace boost::asio;
using ip::tcp;
using ec_type = boost::system::error_code;
using socket_type = ssl::stream<tcp::socket&>;
using work_guard_type = executor_work_guard<io_context::executor_type>;
const_buffer toBuffer(StringRef s) {
ASSERT(!s.empty());
return const_buffer(s.begin(), s.size());
}
void trustRootCaCert(ssl::context& ctx, StringRef certPem) {
if (!certPem.empty())
ctx.add_certificate_authority(const_buffer(certPem.begin(), certPem.size()));
}
void useChain(ssl::context& ctx, mkcert::CertChainRef chain) {
auto arena = Arena();
auto chainStr = concatCertChain(arena, chain);
if (!chainStr.empty())
ctx.use_certificate_chain(toBuffer(chainStr));
auto keyPem = chain.front().privateKeyPem;
if (!keyPem.empty())
ctx.use_private_key(toBuffer(keyPem), ssl::context::pem);
}
void initCerts(ssl::context& ctx, mkcert::CertChainRef myChain, StringRef peerRootPem) {
trustRootCaCert(ctx, peerRootPem);
if (myChain.size() > 1)
myChain.pop_back();
if (!myChain.empty())
useChain(ctx, myChain);
}
void initSslContext(ssl::context& ctx,
mkcert::CertChainRef myChain,
mkcert::CertChainRef peerChain,
mkcert::ESide side) {
ctx.set_options(ssl::context::default_workarounds);
ctx.set_verify_mode(ssl::context::verify_peer |
(side == mkcert::ESide::Server ? 0 : ssl::verify_fail_if_no_peer_cert));
initCerts(ctx, myChain, peerChain.empty() ? StringRef() : peerChain.back().certPem);
}
template <>
struct fmt::formatter<tcp::endpoint> {
constexpr auto parse(format_parse_context& ctx) -> decltype(ctx.begin()) { return ctx.begin(); }
template <class FormatContext>
auto format(const tcp::endpoint& ep, FormatContext& ctx) -> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "{}:{}", ep.address().to_string(), ep.port());
}
};
void runTlsTest(int serverChainLen, int clientChainLen) {
log("==== BEGIN TESTCASE ====");
auto clientSsl = ssl::context(ssl::context::tls);
auto serverSsl = ssl::context(ssl::context::tls);
auto const expectHandshakeOk = clientChainLen >= 0 && serverChainLen > 0;
auto const expectTrusted = clientChainLen != 0;
log("cert chain length: server {}, client {}", serverChainLen, clientChainLen);
auto arena = Arena();
auto serverChain = mkcert::CertChainRef{};
auto clientChain = mkcert::CertChainRef{};
if (serverChainLen) {
auto tmpArena = Arena();
auto specs = mkcert::makeCertChainSpec(tmpArena, std::labs(serverChainLen), mkcert::ESide::Server);
if (serverChainLen < 0) {
specs[0].offsetNotBefore = -60l * 60 * 24 * 365;
specs[0].offsetNotAfter = -10l; // cert that expired 10 seconds ago
}
serverChain = mkcert::makeCertChain(arena, specs, {} /* create root CA cert from spec*/);
}
if (clientChainLen) {
auto tmpArena = Arena();
auto specs = mkcert::makeCertChainSpec(tmpArena, std::labs(clientChainLen), mkcert::ESide::Client);
if (clientChainLen < 0) {
specs[0].offsetNotBefore = -60l * 60 * 24 * 365;
specs[0].offsetNotAfter = -10l; // cert that expired 10 seconds ago
}
clientChain = mkcert::makeCertChain(arena, specs, {} /* create root CA cert from spec*/);
}
initSslContext(clientSsl, clientChain, serverChain, mkcert::ESide::Client);
log("client SSL contexts initialized");
initSslContext(serverSsl, serverChain, clientChain, mkcert::ESide::Server);
log("server SSL contexts initialized");
auto io = io_context();
auto serverWorkGuard = work_guard_type(io.get_executor());
auto clientWorkGuard = work_guard_type(io.get_executor());
auto const ip = ip::address::from_string("127.0.0.1");
auto acceptor = tcp::acceptor(io, tcp::endpoint(ip, 0));
auto const serverAddr = acceptor.local_endpoint();
logs("server listening at {}", serverAddr);
auto serverSock = tcp::socket(io);
auto serverSslSock = socket_type(serverSock, serverSsl);
enum class ESockState { AssumedUntrusted, Trusted };
auto serverSockState = ESockState::AssumedUntrusted;
auto clientSockState = ESockState::AssumedUntrusted;
auto handshakeOk = true;
serverSslSock.set_verify_callback([&serverSockState, &handshakeOk](bool preverify, ssl::verify_context&) {
logs("client preverify: {}", preverify);
switch (serverSockState) {
case ESockState::AssumedUntrusted:
if (!preverify)
return handshakeOk = false;
serverSockState = ESockState::Trusted;
break;
case ESockState::Trusted:
if (!preverify)
return handshakeOk = false;
break;
default:
break;
}
// if untrusted connection passes preverify, they are considered trusted
return true;
});
acceptor.async_accept(serverSock, [&serverSslSock, &serverWorkGuard, &handshakeOk](const ec_type& ec) {
if (ec) {
logs("accept error: {}", ec.message());
handshakeOk = false;
serverWorkGuard.reset();
} else {
logs("accepted connection from {}", serverSslSock.next_layer().remote_endpoint());
serverSslSock.async_handshake(ssl::stream_base::handshake_type::server,
[&serverWorkGuard, &handshakeOk](const ec_type& ec) {
if (ec) {
logs("server handshake returned {}", ec.message());
handshakeOk = false;
} else {
logs("handshake OK");
}
serverWorkGuard.reset();
});
}
});
auto clientSock = tcp::socket(io);
auto clientSslSock = socket_type(clientSock, clientSsl);
clientSslSock.set_verify_callback([&clientSockState](bool preverify, ssl::verify_context&) {
logc("server preverify: {}", preverify);
switch (clientSockState) {
case ESockState::AssumedUntrusted:
if (!preverify)
return false;
clientSockState = ESockState::Trusted;
break;
case ESockState::Trusted:
if (!preverify)
return false;
break;
default:
break;
}
// if untrusted connection passes preverify, they are considered trusted
return true;
});
clientSock.async_connect(serverAddr,
[&clientWorkGuard, &clientSock, &clientSslSock, &handshakeOk](const ec_type& ec) {
if (ec) {
logc("connect error: {}", ec.message());
handshakeOk = false;
clientWorkGuard.reset();
} else {
logc("connected to {}", clientSock.remote_endpoint());
clientSslSock.async_handshake(ssl::stream_base::handshake_type::client,
[&clientWorkGuard, &handshakeOk](const ec_type& ec) {
if (ec) {
logc("handshake returned: {}", ec.message());
handshakeOk = false;
} else {
logc("handshake OK");
}
clientWorkGuard.reset();
});
}
});
io.run();
ASSERT_EQ(expectHandshakeOk, handshakeOk);
if (expectHandshakeOk) {
ASSERT_EQ(expectTrusted, (serverSockState == ESockState::Trusted));
log("Test OK: Handshake passed and connection {} as expected",
serverSockState == ESockState::Trusted ? "trusted" : "untrusted");
} else {
log("Test OK: Handshake failed as expected");
}
}
int main() {
std::pair<int, int> inputs[] = { { 3, 2 }, { 4, 0 }, { -3, 1 }, { 3, -2 }, { -3, 0 },
{ 0, 0 }, { 0, 1 }, { 1, 3 }, { -1, -3 }, { 1, 0 } };
for (auto input : inputs) {
auto [serverChainLen, clientChainLen] = input;
runTlsTest(serverChainLen, clientChainLen);
}
return 0;
}

View File

@ -130,6 +130,19 @@ UID UID::fromString(std::string const& s) {
return UID(a, b);
}
UID UID::fromStringThrowsOnFailure(std::string const& s) {
if (s.size() != 32) {
// invalid string size
throw operation_failed();
}
uint64_t a = 0, b = 0;
int r = sscanf(s.c_str(), "%16" SCNx64 "%16" SCNx64, &a, &b);
if (r != 2) {
throw operation_failed();
}
return UID(a, b);
}
std::string UID::shortString() const {
return format("%016llx", part[0]);
}

View File

@ -707,7 +707,7 @@ private:
// Binds an AsyncTrigger object to an AsyncVar, so when the AsyncVar changes
// the AsyncTrigger is triggered.
ACTOR template <class T>
void forward(Reference<AsyncVar<T> const> from, AsyncTrigger* to) {
Future<Void> forward(Reference<AsyncVar<T> const> from, AsyncTrigger* to) {
loop {
wait(from->onChange());
to->trigger();

View File

@ -25,6 +25,7 @@
_AssumeVersion::_AssumeVersion(ProtocolVersion version) : v(version) {
if (!version.isValid()) {
ASSERT(!g_network->isSimulated());
TraceEvent("SerializationFailed").backtrace();
throw serialization_failed();
}
}
@ -34,6 +35,7 @@ const void* BinaryReader::readBytes(int bytes) {
const char* e = b + bytes;
if (e > end) {
ASSERT(!g_network->isSimulated());
TraceEvent("SerializationFailed").backtrace();
throw serialization_failed();
}
begin = e;

View File

@ -277,6 +277,12 @@ if(WITH_PYTHON)
add_fdb_test(
TEST_FILES restarting/from_7.1.0/ConfigureStorageMigrationTestRestart-1.toml
restarting/from_7.1.0/ConfigureStorageMigrationTestRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0/VersionVectorDisableRestart-1.toml
restarting/from_7.1.0/VersionVectorDisableRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0/VersionVectorEnableRestart-1.toml
restarting/from_7.1.0/VersionVectorEnableRestart-2.toml)
add_fdb_test(TEST_FILES slow/ApiCorrectness.toml)

View File

@ -7,6 +7,10 @@ import os
import socket
import time
CLUSTER_UPDATE_TIMEOUT_SEC = 10
EXCLUDE_SERVERS_TIMEOUT_SEC = 120
RETRY_INTERVAL_SEC = 0.5
def _get_free_port_internal():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
@ -94,6 +98,7 @@ logdir = {logdir}
port=None,
ip_address=None,
blob_granules_enabled: bool = False,
redundancy: str = "single"
):
self.basedir = Path(basedir)
self.etc = self.basedir.joinpath("etc")
@ -110,6 +115,7 @@ logdir = {logdir}
self.log.mkdir(exist_ok=True)
self.data.mkdir(exist_ok=True)
self.process_number = process_number
self.redundancy = redundancy
self.ip_address = "127.0.0.1" if ip_address is None else ip_address
self.first_port = port
self.blob_granules_enabled = blob_granules_enabled
@ -119,7 +125,9 @@ logdir = {logdir}
if self.first_port is not None:
self.last_used_port = int(self.first_port) - 1
self.server_ports = [self.__next_port() for _ in range(self.process_number)]
self.server_ports = {server_id: self.__next_port() for server_id in range(self.process_number)}
self.server_by_port = {port: server_id for server_id, port in self.server_ports.items()}
self.next_server_id = self.process_number
self.cluster_desc = random_secret_string(8)
self.cluster_secret = random_secret_string(8)
self.env_vars = {}
@ -127,6 +135,8 @@ logdir = {logdir}
self.process = None
self.fdbmonitor_logfile = None
self.use_legacy_conf_syntax = False
self.coordinators = set()
self.active_servers = set(self.server_ports.keys())
if create_config:
self.create_cluster_file()
@ -163,11 +173,15 @@ logdir = {logdir}
# E.g., port = 4000, process_number = 5
# Then 4000,4001,4002,4003,4004 will be used as ports
# If port number is not given, we will randomly pick free ports
for port in self.server_ports:
f.write("[fdbserver.{server_port}]\n".format(server_port=port))
for server_id in self.active_servers:
f.write("[fdbserver.{server_port}]\n".format(server_port=self.server_ports[server_id]))
if self.use_legacy_conf_syntax:
f.write("machine_id = {}\n".format(server_id))
else:
f.write("machine-id = {}\n".format(server_id))
if self.blob_granules_enabled:
# make last process a blob_worker class
f.write("class = blob_worker")
f.write("class = blob_worker\n")
f.flush()
os.fsync(f.fileno())
@ -183,6 +197,7 @@ logdir = {logdir}
server_port=self.server_ports[0],
)
)
self.coordinators = {0}
def start_cluster(self):
assert not self.running, "Can't start a server that is already running"
@ -212,7 +227,8 @@ logdir = {logdir}
sec = 0
while sec < timeout_sec:
in_use = False
for port in self.server_ports:
for server_id in self.active_servers:
port = self.server_ports[server_id]
if is_port_in_use(port):
print("Port {} in use. Waiting for it to be released".format(port))
in_use = True
@ -230,37 +246,60 @@ logdir = {logdir}
def __exit__(self, xc_type, exc_value, traceback):
self.stop_cluster()
def __fdbcli_exec(self, cmd, stdout, stderr, timeout):
args = [self.fdbcli_binary, "-C", self.cluster_file, "--exec", cmd]
res = subprocess.run(args, env=self.process_env(), stderr=stderr, stdout=stdout, timeout=timeout)
assert res.returncode == 0, "fdbcli command {} failed with {}".format(cmd, res.returncode)
return res.stdout
# Execute a fdbcli command
def fdbcli_exec(self, cmd, timeout=None):
self.__fdbcli_exec(cmd, None, None, timeout)
# Execute a fdbcli command and return its output
def fdbcli_exec_and_get(self, cmd, timeout=None):
return self.__fdbcli_exec(cmd, subprocess.PIPE, None, timeout)
def create_database(self, storage="ssd", enable_tenants=True):
db_config = "configure new single {}".format(storage)
db_config = "configure new {} {}".format(self.redundancy, storage)
if enable_tenants:
db_config += " tenant_mode=optional_experimental"
if self.blob_granules_enabled:
db_config += " blob_granules_enabled:=1"
args = [self.fdbcli_binary, "-C", self.cluster_file, "--exec", db_config]
res = subprocess.run(args, env=self.process_env())
assert res.returncode == 0, "Create database failed with {}".format(
res.returncode
)
self.fdbcli_exec(db_config)
if self.blob_granules_enabled:
bg_args = [
self.fdbcli_binary,
"-C",
self.cluster_file,
"--exec",
"blobrange start \\x00 \\xff",
]
bg_res = subprocess.run(bg_args, env=self.process_env())
assert bg_res.returncode == 0, "Start blob granules failed with {}".format(
bg_res.returncode
)
self.fdbcli_exec("blobrange start \\x00 \\xff")
# Get cluster status using fdbcli
def get_status(self):
args = [self.fdbcli_binary, "-C", self.cluster_file, "--exec", "status json"]
res = subprocess.run(args, env=self.process_env(), stdout=subprocess.PIPE)
assert res.returncode == 0, "Get status failed with {}".format(res.returncode)
return json.loads(res.stdout)
status_output = self.fdbcli_exec_and_get("status json")
return json.loads(status_output)
# Get the set of servers from the cluster status matching the given filter
def get_servers_from_status(self, filter):
status = self.get_status()
if "processes" not in status["cluster"]:
return {}
servers_found = set()
addresses = [proc_info["address"] for proc_info in status["cluster"]["processes"].values() if filter(proc_info)]
for addr in addresses:
port = int(addr.split(":", 1)[1])
assert port in self.server_by_port, "Unknown server port {}".format(port)
servers_found.add(self.server_by_port[port])
return servers_found
# Get the set of all servers from the cluster status
def get_all_servers_from_status(self):
return self.get_servers_from_status(lambda _: True)
# Get the set of all servers with coordinator role from the cluster status
def get_coordinators_from_status(self):
def is_coordinator(proc_status):
return any(entry["role"] == "coordinator" for entry in proc_status["roles"])
return self.get_servers_from_status(is_coordinator)
def process_env(self):
env = dict(os.environ)
@ -269,3 +308,102 @@ logdir = {logdir}
def set_env_var(self, var_name, var_val):
self.env_vars[var_name] = var_val
# Add a new server process to the cluster and return its ID
# Need to call save_config to apply the changes
def add_server(self):
server_id = self.next_server_id
assert server_id not in self.server_ports, "Server ID {} is already in use".format(server_id)
self.next_server_id += 1
port = self.__next_port()
self.server_ports[server_id] = port
self.server_by_port[port] = server_id
self.active_servers.add(server_id)
return server_id
# Remove the server with the given ID from the cluster
# Need to call save_config to apply the changes
def remove_server(self, server_id):
assert server_id in self.active_servers, "Server {} does not exist".format(server_id)
self.active_servers.remove(server_id)
# Wait until changes to the set of servers (additions & removals) are applied
def wait_for_server_update(self, timeout=CLUSTER_UPDATE_TIMEOUT_SEC):
time_limit = time.time() + timeout
servers_found = set()
while (time.time() <= time_limit):
servers_found = self.get_all_servers_from_status()
if (servers_found != self.active_servers):
break
time.sleep(RETRY_INTERVAL_SEC)
assert "Failed to apply server changes after {}sec. Expected: {}, Actual: {}".format(
timeout, self.active_servers, servers_found)
# Apply changes to the set of the coordinators, based on the current value of self.coordinators
def update_coordinators(self):
urls = ["{}:{}".format(self.ip_address, self.server_ports[id]) for id in self.coordinators]
self.fdbcli_exec("coordinators {}".format(" ".join(urls)))
# Wait until the changes to the set of the coordinators are applied
def wait_for_coordinator_update(self, timeout=CLUSTER_UPDATE_TIMEOUT_SEC):
time_limit = time.time() + timeout
coord_found = set()
while (time.time() <= time_limit):
coord_found = self.get_coordinators_from_status()
if (coord_found != self.coordinators):
break
time.sleep(RETRY_INTERVAL_SEC)
assert "Failed to apply coordinator changes after {}sec. Expected: {}, Actual: {}".format(
timeout, self.coordinators, coord_found)
# Check if the cluster file was successfully updated too
connection_string = open(self.cluster_file, "r").read()
for server_id in self.coordinators:
assert connection_string.find(str(self.server_ports[server_id])) != -1, \
"Missing coordinator {} port {} in the cluster file".format(server_id, self.server_ports[server_id])
# Exclude the servers with the given ID from the cluster, i.e. move out their data
# The method waits until the changes are applied
def exclude_servers(self, server_ids):
urls = ["{}:{}".format(self.ip_address, self.server_ports[id]) for id in server_ids]
self.fdbcli_exec("exclude FORCE {}".format(" ".join(urls)), timeout=EXCLUDE_SERVERS_TIMEOUT_SEC)
# Perform a cluster wiggle: replace all servers with new ones
def cluster_wiggle(self):
old_servers = self.active_servers.copy()
new_servers = set()
print("Starting cluster wiggle")
print("Old servers: {} on ports {}".format(old_servers, [
self.server_ports[server_id] for server_id in old_servers]))
print("Old coordinators: {}".format(self.coordinators))
# Step 1: add new servers
start_time = time.time()
for _ in range(len(old_servers)):
new_servers.add(self.add_server())
print("New servers: {} on ports {}".format(new_servers, [
self.server_ports[server_id] for server_id in new_servers]))
self.save_config()
self.wait_for_server_update()
print("New servers successfully added to the cluster. Time: {}s".format(time.time()-start_time))
# Step 2: change coordinators
start_time = time.time()
new_coordinators = set(random.sample(new_servers, len(self.coordinators)))
print("New coordinators: {}".format(new_coordinators))
self.coordinators = new_coordinators.copy()
self.update_coordinators()
self.wait_for_coordinator_update()
print("Coordinators successfully changed. Time: {}s".format(time.time()-start_time))
# Step 3: exclude old servers from the cluster, i.e. move out their data
start_time = time.time()
self.exclude_servers(old_servers)
print("Old servers successfully excluded from the cluster. Time: {}s".format(time.time()-start_time))
# Step 4: remove the old servers
start_time = time.time()
for server_id in old_servers:
self.remove_server(server_id)
self.save_config()
self.wait_for_server_update()
print("Old servers successfully removed from the cluster. Time: {}s".format(time.time()-start_time))

View File

@ -21,6 +21,8 @@ from local_cluster import LocalCluster, random_secret_string
SUPPORTED_PLATFORMS = ["x86_64"]
SUPPORTED_VERSIONS = [
"7.2.0",
"7.1.3",
"7.1.2",
"7.1.1",
"7.1.0",
"7.0.0",
@ -65,6 +67,7 @@ SUPPORTED_VERSIONS = [
"5.1.7",
"5.1.6",
]
CLUSTER_ACTIONS = ["wiggle"]
FDB_DOWNLOAD_ROOT = "https://github.com/apple/foundationdb/releases/download/"
LOCAL_OLD_BINARY_REPO = "/opt/foundationdb/old/"
CURRENT_VERSION = "7.2.0"
@ -128,19 +131,15 @@ def read_to_str(filename):
class UpgradeTest:
def __init__(
self,
build_dir: str,
upgrade_path: list,
process_number: int = 1,
port: str = None,
args
):
self.build_dir = Path(build_dir).resolve()
assert self.build_dir.exists(), "{} does not exist".format(build_dir)
assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
self.upgrade_path = upgrade_path
for version in upgrade_path:
assert version in SUPPORTED_VERSIONS, "Unsupported version {}".format(
version
)
self.build_dir = Path(args.build_dir).resolve()
assert self.build_dir.exists(), "{} does not exist".format(args.build_dir)
assert self.build_dir.is_dir(), "{} is not a directory".format(args.build_dir)
self.upgrade_path = args.upgrade_path
self.used_versions = set(self.upgrade_path).difference(set(CLUSTER_ACTIONS))
for version in self.used_versions:
assert version in SUPPORTED_VERSIONS, "Unsupported version or cluster action {}".format(version)
self.platform = platform.machine()
assert self.platform in SUPPORTED_PLATFORMS, "Unsupported platform {}".format(
self.platform
@ -153,15 +152,15 @@ class UpgradeTest:
self.local_binary_repo = None
self.download_old_binaries()
self.create_external_lib_dir()
init_version = upgrade_path[0]
init_version = self.upgrade_path[0]
self.cluster = LocalCluster(
self.tmp_dir,
self.binary_path(init_version, "fdbserver"),
self.binary_path(init_version, "fdbmonitor"),
self.binary_path(init_version, "fdbcli"),
process_number,
port=port,
args.process_number,
create_config=False,
redundancy=args.redundancy
)
self.cluster.create_cluster_file()
self.configure_version(init_version)
@ -271,7 +270,7 @@ class UpgradeTest:
# Download all old binaries required for testing the specified upgrade path
def download_old_binaries(self):
for version in self.upgrade_path:
for version in self.used_versions:
if version == CURRENT_VERSION:
continue
@ -297,7 +296,7 @@ class UpgradeTest:
def create_external_lib_dir(self):
self.external_lib_dir = self.tmp_dir.joinpath("client_libs")
self.external_lib_dir.mkdir(parents=True)
for version in self.upgrade_path:
for version in self.used_versions:
src_file_path = self.lib_dir(version).joinpath("libfdb_c.so")
assert src_file_path.exists(), "{} does not exist".format(src_file_path)
target_file_path = self.external_lib_dir.joinpath(
@ -317,7 +316,7 @@ class UpgradeTest:
time.sleep(1)
continue
num_proc = len(status["cluster"]["processes"])
if num_proc < self.cluster.process_number:
if num_proc != self.cluster.process_number:
print(
"Health check: {} of {} processes found. Retrying".format(
num_proc, self.cluster.process_number
@ -325,11 +324,6 @@ class UpgradeTest:
)
time.sleep(1)
continue
assert (
num_proc == self.cluster.process_number
), "Number of processes: expected: {}, actual: {}".format(
self.cluster.process_number, num_proc
)
for (_, proc_stat) in status["cluster"]["processes"].items():
proc_ver = proc_stat["version"]
assert (
@ -374,7 +368,7 @@ class UpgradeTest:
# Determine FDB API version matching the upgrade path
def determine_api_version(self):
self.api_version = api_version_from_str(CURRENT_VERSION)
for version in self.upgrade_path:
for version in self.used_versions:
self.api_version = min(api_version_from_str(version), self.api_version)
# Start the tester to generate the workload specified by the test file
@ -432,7 +426,6 @@ class UpgradeTest:
os._exit(1)
# Perform a progress check: Trigger it and wait until it is completed
def progress_check(self):
self.progress_event.clear()
os.write(self.ctrl_pipe, b"CHECK\n")
@ -468,11 +461,15 @@ class UpgradeTest:
try:
self.health_check()
self.progress_check()
for version in self.upgrade_path[1:]:
random_sleep(0.0, 2.0)
self.upgrade_to(version)
self.health_check()
self.progress_check()
random_sleep(0.0, 2.0)
for entry in self.upgrade_path[1:]:
if entry == "wiggle":
self.cluster.cluster_wiggle()
else:
assert entry in self.used_versions, "Unexpected entry in the upgrade path: {}".format(entry)
self.upgrade_to(entry)
self.health_check()
self.progress_check()
os.write(self.ctrl_pipe, b"STOP\n")
finally:
os.close(self.ctrl_pipe)
@ -615,7 +612,8 @@ if __name__ == "__main__":
parser.add_argument(
"--upgrade-path",
nargs="+",
help="Cluster upgrade path: a space separated list of versions",
help="Cluster upgrade path: a space separated list of versions.\n" +
"The list may also contain cluster change actions: {}".format(CLUSTER_ACTIONS),
default=[CURRENT_VERSION],
)
parser.add_argument(
@ -630,6 +628,12 @@ if __name__ == "__main__":
type=int,
default=0,
)
parser.add_argument(
"--redundancy",
help="Database redundancy level (default: single)",
type=str,
default="single",
)
parser.add_argument(
"--disable-log-dump",
help="Do not dump cluster log on error",
@ -643,11 +647,14 @@ if __name__ == "__main__":
args.process_number = random.randint(1, 5)
print("Testing with {} processes".format(args.process_number))
assert len(args.upgrade_path) > 0, "Upgrade path must be specified"
assert args.upgrade_path[0] in SUPPORTED_VERSIONS, "Upgrade path begin with a valid version number"
if args.run_with_gdb:
RUN_WITH_GDB = True
errcode = 1
with UpgradeTest(args.build_dir, args.upgrade_path, args.process_number) as test:
with UpgradeTest(args) as test:
print("log-dir: {}".format(test.log))
print("etc-dir: {}".format(test.etc))
print("data-dir: {}".format(test.data))

View File

@ -0,0 +1,60 @@
[[knobs]]
enable_version_vector = true
enable_version_vector_tlog_unicast = true
proxy_use_resolver_private_mutations = true
[[test]]
testTitle='VersionVectorDowngrade'
clearAfterTest=false
[[test.workload]]
testName='Cycle'
transactionsPerSecond=2500.0
nodeCount=1000
testDuration=30.0
expectedRate=0
keyPrefix = 'cycle'
[[test.workload]]
testName = 'Cycle'
nodeCount = 1000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = '!'
[[test.workload]]
testName = 'Cycle'
nodeCount = 1000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'ZZZ'
[[test.workload]]
testName='RandomClogging'
testDuration=10.0
[[test.workload]]
testName='Rollback'
meanDelay=10.0
testDuration=10.0
[[test.workload]]
testName='Attrition'
machinesToKill=10
machinesToLeave=3
reboot=true
testDuration=10.0
[[test.workload]]
testName='Attrition'
machinesToKill=10
machinesToLeave=3
reboot=true
testDuration=10.0
[[test.workload]]
testName='SaveAndKill'
restartInfoLocation='simfdb/restartInfo.ini'
testDuration=60.0

View File

@ -0,0 +1,54 @@
[[knobs]]
enable_version_vector = false
enable_version_vector_tlog_unicast = false
[[test]]
testTitle='VersionVectorDowngrade'
runSetup=false
[[test.workload]]
testName='Cycle'
transactionsPerSecond=2500.0
nodeCount=1000
testDuration=30.0
expectedRate=0
keyPrefix = 'cycle'
[[test.workload]]
testName = 'Cycle'
nodeCount = 1000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = '!'
[[test.workload]]
testName = 'Cycle'
nodeCount = 1000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'ZZZ'
[[test.workload]]
testName='RandomClogging'
testDuration=10.0
[[test.workload]]
testName='Rollback'
meanDelay=10.0
testDuration=10.0
[[test.workload]]
testName='Attrition'
machinesToKill=10
machinesToLeave=3
reboot=true
testDuration=10.0
[[test.workload]]
testName='Attrition'
machinesToKill=10
machinesToLeave=3
reboot=true
testDuration=60.0

View File

@ -0,0 +1,59 @@
[[knobs]]
enable_version_vector = false
enable_version_vector_tlog_unicast = false
[[test]]
testTitle='VersionVectorUpgrade'
clearAfterTest=false
[[test.workload]]
testName='Cycle'
transactionsPerSecond=2500.0
nodeCount=1000
testDuration=30.0
expectedRate=0
keyPrefix = 'cycle'
[[test.workload]]
testName = 'Cycle'
nodeCount = 1000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = '!'
[[test.workload]]
testName = 'Cycle'
nodeCount = 1000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'ZZZ'
[[test.workload]]
testName='RandomClogging'
testDuration=10.0
[[test.workload]]
testName='Rollback'
meanDelay=10.0
testDuration=10.0
[[test.workload]]
testName='Attrition'
machinesToKill=10
machinesToLeave=3
reboot=true
testDuration=10.0
[[test.workload]]
testName='Attrition'
machinesToKill=10
machinesToLeave=3
reboot=true
testDuration=10.0
[[test.workload]]
testName='SaveAndKill'
restartInfoLocation='simfdb/restartInfo.ini'
testDuration=60.0

Some files were not shown because too many files have changed in this diff Show More