diff --git a/.gitignore b/.gitignore index 7b23facbe3..8902f61d74 100644 --- a/.gitignore +++ b/.gitignore @@ -81,6 +81,11 @@ compile_commands.json flow/actorcompiler/obj flow/coveragetool/obj +# IDE indexing (commonly used tools) +/compile_commands.json +/.ccls-cache +/.clangd + # Temporary and user configuration files *~ *.orig @@ -89,5 +94,3 @@ flow/coveragetool/obj .envrc .DS_Store temp/ -/compile_commands.json -/.ccls-cache diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d497b3f73..8b622025c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,9 +179,6 @@ set(SEED "0x${SEED_}" CACHE STRING "Random seed for testing") ################################################################################ include(CompileBoost) -if(WITH_TLS) - add_subdirectory(FDBLibTLS) -endif() add_subdirectory(flow) add_subdirectory(fdbrpc) add_subdirectory(fdbclient) @@ -192,7 +189,9 @@ if(NOT WIN32) else() add_subdirectory(fdbservice) endif() -add_subdirectory(bindings) +if(WITH_PYTHON) + add_subdirectory(bindings) +endif() add_subdirectory(fdbbackup) add_subdirectory(tests) if(WITH_DOCUMENTATION) @@ -210,13 +209,13 @@ endif() # process compile commands for IDE ################################################################################ -if (CMAKE_EXPORT_COMPILE_COMMANDS) +if (CMAKE_EXPORT_COMPILE_COMMANDS AND WITH_PYTHON) add_custom_command( - OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/compile_commands.json - COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/build/gen_compile_db.py - ARGS -b ${CMAKE_CURRENT_BINARY_DIR} -s ${CMAKE_CURRENT_SOURCE_DIR} -o ${CMAKE_CURRENT_SOURCE_DIR}/compile_commands.json ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/build/gen_compile_db.py ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json - COMMENT "Build compile commands for IDE" + OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/compile_commands.json + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/build/gen_compile_db.py + ARGS -b ${CMAKE_CURRENT_BINARY_DIR} -s ${CMAKE_CURRENT_SOURCE_DIR} -o ${CMAKE_CURRENT_SOURCE_DIR}/compile_commands.json ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/build/gen_compile_db.py ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json + COMMENT "Build compile commands for IDE" ) add_custom_target(processed_compile_commands ALL DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/compile_commands.json ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json) endif() diff --git a/FDBLibTLS/FDBLibTLSPolicy.cpp b/FDBLibTLS/FDBLibTLSPolicy.cpp index d22f7d8f67..1fb9f65277 100644 --- a/FDBLibTLS/FDBLibTLSPolicy.cpp +++ b/FDBLibTLS/FDBLibTLSPolicy.cpp @@ -300,7 +300,7 @@ bool FDBLibTLSPolicy::set_verify_peers(int count, const uint8_t* verify_peers[], } Reference verify = Reference(new FDBLibTLSVerify(verifyString.substr(start))); verify_rules.push_back(verify); - } catch ( const std::runtime_error& e ) { + } catch ( const std::runtime_error& ) { verify_rules.clear(); std::string verifyString((const char*)verify_peers[i], verify_peers_len[i]); TraceEvent(SevError, "FDBLibTLSVerifyPeersParseError").detail("Config", verifyString); diff --git a/Makefile b/Makefile index 79f2cb05ec..87a477b83e 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ ifeq ($(PLATFORM),Linux) CXXFLAGS += -std=c++17 BOOST_BASEDIR ?= /opt - TLS_LIBDIR ?= /usr/local/lib + TLS_LIBDIR ?= /usr/local/lib64 DLEXT := so java_DLEXT := so TARGET_LIBC_VERSION ?= 2.11 @@ -67,7 +67,7 @@ else ifeq ($(PLATFORM),Darwin) .LIBPATTERNS := lib%.dylib lib%.a BOOST_BASEDIR ?= ${HOME} - TLS_LIBDIR ?= /usr/local/lib + TLS_LIBDIR ?= /usr/local/lib64 DLEXT := dylib java_DLEXT := jnilib else @@ -112,8 +112,8 @@ CFLAGS += -DTLS_DISABLED FDB_TLS_LIB := TLS_LIBS := else -FDB_TLS_LIB := lib/libFDBLibTLS.a -TLS_LIBS += $(addprefix $(TLS_LIBDIR)/,libtls.a libssl.a libcrypto.a) +FDB_TLS_LIB := +TLS_LIBS += $(addprefix $(TLS_LIBDIR)/,libssl.a libcrypto.a) endif CXXFLAGS += -Wno-deprecated -DBOOST_ERROR_CODE_HEADER_ONLY -DBOOST_SYSTEM_NO_DEPRECATED @@ -126,9 +126,6 @@ VPATH += $(addprefix :,$(filter-out lib,$(patsubst -L%,%,$(filter -L%,$(LDFLAGS) CS_PROJECTS := flow/actorcompiler flow/coveragetool fdbclient/vexillographer CPP_PROJECTS := flow fdbrpc fdbclient fdbbackup fdbserver fdbcli bindings/c bindings/java fdbmonitor bindings/flow/tester bindings/flow -ifndef TLS_DISABLED -CPP_PROJECTS += FDBLibTLS -endif OTHER_PROJECTS := bindings/python bindings/ruby bindings/go CS_MK_GENERATED := $(CS_PROJECTS:=/generated.mk) diff --git a/README.md b/README.md index edbec64cda..510943a6cc 100755 --- a/README.md +++ b/README.md @@ -33,6 +33,10 @@ CMake-based build system. Both of them should currently work for most users, and CMake should be the preferred choice as it will eventually become the only build system available. +If compiling for local development, please set -DUSE_WERROR=ON in +cmake. Our CI compiles with -Werror on, so this way you'll find out about +compiler warnings that break the build earlier. + ## CMake To build with CMake, generally the following is required (works on Linux and @@ -129,9 +133,9 @@ If you want to create a package you have to tell cmake what platform it is for. And then you can build by simply calling `cpack`. So for debian, call: ``` -cmake -DINSTALL_LAYOUT=DEB +cmake make -cpack +cpack -G DEB ``` For RPM simply replace `DEB` with `RPM`. @@ -151,9 +155,9 @@ To generate a installable package, you have to call CMake with the corresponding arguments and then use cpack to generate the package: ```sh -cmake -DINSTALL_LAYOUT=OSX +cmake make -cpack +cpack -G productbuild ``` ### Windows diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index c80dc44b3f..9d05990e0c 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -38,6 +38,21 @@ else() endif() add_dependencies(fdb_c fdb_c_generated fdb_c_options) target_link_libraries(fdb_c PUBLIC $) +if(APPLE) + set(symbols ${CMAKE_CURRENT_BINARY_DIR}/fdb_c.symbols) + add_custom_command(OUTPUT ${symbols} + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/symbolify.py + ${CMAKE_CURRENT_SOURCE_DIR}/foundationdb/fdb_c.h + ${symbols} + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/symbolify.py ${CMAKE_CURRENT_SOURCE_DIR}/foundationdb/fdb_c.h + COMMENT "Generate exported_symbols_list") + add_custom_target(exported_symbols_list DEPENDS ${symbols}) + add_dependencies(fdb_c exported_symbols_list) + target_link_options(fdb_c PRIVATE "LINKER:-no_weak_exports,-exported_symbols_list,${symbols}") +elseif(WIN32) +else() + target_link_options(fdb_c PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/fdb_c.map,-z,nodelete") +endif() target_include_directories(fdb_c PUBLIC $ $ diff --git a/bindings/c/ThreadCleanup.cpp b/bindings/c/ThreadCleanup.cpp index 20b49cf8e5..966e38b800 100644 --- a/bindings/c/ThreadCleanup.cpp +++ b/bindings/c/ThreadCleanup.cpp @@ -34,6 +34,10 @@ BOOL WINAPI DllMain( HINSTANCE dll, DWORD reason, LPVOID reserved ) { #elif defined( __unixish__ ) +#ifdef __INTEL_COMPILER +#pragma warning ( disable:2415 ) +#endif + static pthread_key_t threadDestructorKey; static void threadDestructor(void*) { @@ -57,4 +61,4 @@ static int threadDestructorKeyInit = initThreadDestructorKey(); #else #error Port me! -#endif \ No newline at end of file +#endif diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index dfa6f1d6bb..762c65596f 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -107,7 +107,12 @@ fdb_error_t fdb_network_set_option( FDBNetworkOption option, } fdb_error_t fdb_setup_network_impl() { - CATCH_AND_RETURN( API->setupNetwork(); ); + CATCH_AND_RETURN( + try { + API->setupNetwork(); + } catch (boost::system::system_error& e) { + return error_code_tls_error; + } ); } fdb_error_t fdb_setup_network_v13( const char* localAddress ) { @@ -627,6 +632,13 @@ fdb_error_t fdb_transaction_add_conflict_range( FDBTransaction*tr, uint8_t const } +extern "C" DLLEXPORT +FDBFuture* fdb_transaction_get_estimated_range_size_bytes( FDBTransaction* tr, uint8_t const* begin_key_name, + int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length ) { + KeyRangeRef range(KeyRef(begin_key_name, begin_key_name_length), KeyRef(end_key_name, end_key_name_length)); + return (FDBFuture*)(TXN(tr)->getEstimatedRangeSizeBytes(range).extractPtr()); +} + #include "fdb_c_function_pointers.g.h" #define FDB_API_CHANGED(func, ver) if (header_version < ver) fdb_api_ptr_##func = (void*)&(func##_v##ver##_PREV); else if (fdb_api_ptr_##func == (void*)&fdb_api_ptr_unimpl) fdb_api_ptr_##func = (void*)&(func##_impl); diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index 22fee464e5..b5dfa63d13 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -256,6 +256,10 @@ extern "C" { int end_key_name_length, FDBConflictRangeType type); + DLLEXPORT WARN_UNUSED_RESULT FDBFuture* + fdb_transaction_get_estimated_range_size_bytes( FDBTransaction* tr, uint8_t const* begin_key_name, + int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length); + #define FDB_KEYSEL_LAST_LESS_THAN(k, l) k, l, 0, 0 #define FDB_KEYSEL_LAST_LESS_OR_EQUAL(k, l) k, l, 1, 0 #define FDB_KEYSEL_FIRST_GREATER_THAN(k, l) k, l, 1, 1 diff --git a/bindings/c/symbolify.py b/bindings/c/symbolify.py new file mode 100644 index 0000000000..55d8dc81fd --- /dev/null +++ b/bindings/c/symbolify.py @@ -0,0 +1,10 @@ +if __name__ == '__main__': + import re + import sys + r = re.compile('DLLEXPORT[^(]*(fdb_[^(]*)[(]') + (fdb_c_h, symbols_file) = sys.argv[1:] + with open(fdb_c_h, 'r') as f: + symbols = sorted(set('_' + m.group(1) for m in r.finditer(f.read()))) + with open(symbols_file, 'w') as f: + f.write('\n'.join(symbols)) + f.write('\n') diff --git a/bindings/c/test/test.h b/bindings/c/test/test.h index 0c7b62c16b..7169689f76 100644 --- a/bindings/c/test/test.h +++ b/bindings/c/test/test.h @@ -236,7 +236,7 @@ void* runNetwork() { FDBDatabase* openDatabase(struct ResultSet *rs, pthread_t *netThread) { checkError(fdb_setup_network(), "setup network", rs); - pthread_create(netThread, NULL, &runNetwork, NULL); + pthread_create(netThread, NULL, (void*)(&runNetwork), NULL); FDBDatabase *db; checkError(fdb_create_database(NULL, &db), "create database", rs); diff --git a/bindings/flow/fdb_flow.actor.cpp b/bindings/flow/fdb_flow.actor.cpp index 742a60cd7e..d1a0c07bb1 100644 --- a/bindings/flow/fdb_flow.actor.cpp +++ b/bindings/flow/fdb_flow.actor.cpp @@ -82,7 +82,7 @@ void fdb_flow_test() { fdb->setupNetwork(); startThread(networkThread, fdb); - g_network = newNet2( false ); + g_network = newNet2(false); openTraceFile(NetworkAddress(), 1000000, 1000000, "."); systemMonitor(); @@ -131,6 +131,8 @@ namespace FDB { GetRangeLimits limits = GetRangeLimits(), bool snapshot = false, bool reverse = false, FDBStreamingMode streamingMode = FDB_STREAMING_MODE_SERIAL) override; + + Future getEstimatedRangeSizeBytes(const KeyRange& keys) override; void addReadConflictRange(KeyRangeRef const& keys) override; void addReadConflictKey(KeyRef const& key) override; @@ -345,6 +347,14 @@ namespace FDB { } ); } + Future TransactionImpl::getEstimatedRangeSizeBytes(const KeyRange& keys) { + return backToFuture(fdb_transaction_get_estimated_range_size_bytes(tr, keys.begin.begin(), keys.begin.size(), keys.end.begin(), keys.end.size()), [](Reference f) { + int64_t bytes; + throw_on_error(fdb_future_get_int64(f->f, &bytes)); + return bytes; + }); + } + void TransactionImpl::addReadConflictRange(KeyRangeRef const& keys) { throw_on_error( fdb_transaction_add_conflict_range( tr, keys.begin.begin(), keys.begin.size(), keys.end.begin(), keys.end.size(), FDB_CONFLICT_RANGE_TYPE_READ ) ); } diff --git a/bindings/flow/fdb_flow.h b/bindings/flow/fdb_flow.h index 2e34f20d59..66049cae0c 100644 --- a/bindings/flow/fdb_flow.h +++ b/bindings/flow/fdb_flow.h @@ -89,6 +89,8 @@ namespace FDB { streamingMode); } + virtual Future getEstimatedRangeSizeBytes(const KeyRange& keys) = 0; + virtual void addReadConflictRange(KeyRangeRef const& keys) = 0; virtual void addReadConflictKey(KeyRef const& key) = 0; diff --git a/bindings/flow/tester/Tester.actor.cpp b/bindings/flow/tester/Tester.actor.cpp index a257c05c78..4d0c50ba27 100644 --- a/bindings/flow/tester/Tester.actor.cpp +++ b/bindings/flow/tester/Tester.actor.cpp @@ -216,19 +216,19 @@ ACTOR Future< Standalone > getRange(Reference tr, K } } -ACTOR static Future debugPrintRange(Reference tr, std::string subspace, std::string msg) { - if (!tr) - return Void(); - - Standalone results = wait(getRange(tr, KeyRange(KeyRangeRef(subspace + '\x00', subspace + '\xff')))); - printf("==================================================DB:%s:%s, count:%d\n", msg.c_str(), - StringRef(subspace).printable().c_str(), results.size()); - for (auto & s : results) { - printf("=====key:%s, value:%s\n", StringRef(s.key).printable().c_str(), StringRef(s.value).printable().c_str()); - } - - return Void(); -} +//ACTOR static Future debugPrintRange(Reference tr, std::string subspace, std::string msg) { +// if (!tr) +// return Void(); +// +// Standalone results = wait(getRange(tr, KeyRange(KeyRangeRef(subspace + '\x00', subspace + '\xff')))); +// printf("==================================================DB:%s:%s, count:%d\n", msg.c_str(), +// StringRef(subspace).printable().c_str(), results.size()); +// for (auto & s : results) { +// printf("=====key:%s, value:%s\n", StringRef(s.key).printable().c_str(), StringRef(s.value).printable().c_str()); +// } +// +// return Void(); +//} ACTOR Future stackSub(FlowTesterStack* stack) { if (stack->data.size() < 2) diff --git a/bindings/flow/tester/local.mk b/bindings/flow/tester/local.mk index 83444774bd..3bfc3e4be6 100644 --- a/bindings/flow/tester/local.mk +++ b/bindings/flow/tester/local.mk @@ -23,6 +23,7 @@ fdb_flow_tester_CFLAGS := -Ibindings/c $(fdbrpc_CFLAGS) fdb_flow_tester_LDFLAGS := -Llib $(fdbrpc_LDFLAGS) -lfdb_c fdb_flow_tester_LIBS := lib/libfdb_flow.a lib/libflow.a lib/libfdb_c.$(DLEXT) +fdb_flow_tester_STATIC_LIBS := $(TLS_LIBS) fdb_flow_tester: lib/libfdb_c.$(DLEXT) @mkdir -p bindings/flow/bin diff --git a/bindings/go/src/fdb/fdb_darwin.go b/bindings/go/src/fdb/fdb_darwin.go new file mode 100644 index 0000000000..c4157af70b --- /dev/null +++ b/bindings/go/src/fdb/fdb_darwin.go @@ -0,0 +1,5 @@ +package fdb + +//#cgo CFLAGS: -I/usr/local/include/ +//#cgo LDFLAGS: -L/usr/local/lib/ +import "C" diff --git a/bindings/go/src/fdb/fdb_windows.go b/bindings/go/src/fdb/fdb_windows.go new file mode 100644 index 0000000000..a27cd11eb7 --- /dev/null +++ b/bindings/go/src/fdb/fdb_windows.go @@ -0,0 +1,5 @@ +package fdb + +//#cgo CFLAGS: -I"C:/Program Files/foundationdb/include" +//#cgo LDFLAGS: -L"C:/Program Files/foundationdb/bin" -lfdb_c +import "C" diff --git a/bindings/go/src/fdb/generated.go b/bindings/go/src/fdb/generated.go index c5dfa08d4f..909de9a1a0 100644 --- a/bindings/go/src/fdb/generated.go +++ b/bindings/go/src/fdb/generated.go @@ -88,6 +88,13 @@ func (o NetworkOptions) SetTraceFormat(param string) error { return o.setOpt(34, []byte(param)) } +// Select clock source for trace files. now (default) or realtime are supported. +// +// Parameter: Trace clock source +func (o NetworkOptions) SetTraceClockSource(param string) error { + return o.setOpt(35, []byte(param)) +} + // Set internal tuning or debugging knobs // // Parameter: knob_name=knob_value diff --git a/bindings/go/src/fdb/range.go b/bindings/go/src/fdb/range.go index 492a4df95f..584f23cb2b 100644 --- a/bindings/go/src/fdb/range.go +++ b/bindings/go/src/fdb/range.go @@ -54,7 +54,8 @@ type RangeOptions struct { // Reverse indicates that the read should be performed in lexicographic // (false) or reverse lexicographic (true) order. When Reverse is true and // Limit is non-zero, the last Limit key-value pairs in the range are - // returned. + // returned. Reading ranges in reverse is supported natively by the + // database and should have minimal extra cost. Reverse bool } diff --git a/bindings/go/src/fdb/snapshot.go b/bindings/go/src/fdb/snapshot.go index 18c77d79bb..ca21818729 100644 --- a/bindings/go/src/fdb/snapshot.go +++ b/bindings/go/src/fdb/snapshot.go @@ -86,3 +86,11 @@ func (s Snapshot) GetReadVersion() FutureInt64 { func (s Snapshot) GetDatabase() Database { return s.transaction.db } + +func (s Snapshot) GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64 { + beginKey, endKey := r.FDBRangeKeys() + return s.getEstimatedRangeSizeBytes( + beginKey.FDBKey(), + endKey.FDBKey(), + ) +} diff --git a/bindings/go/src/fdb/transaction.go b/bindings/go/src/fdb/transaction.go index ff6679c30b..069564bc7f 100644 --- a/bindings/go/src/fdb/transaction.go +++ b/bindings/go/src/fdb/transaction.go @@ -39,6 +39,7 @@ type ReadTransaction interface { GetReadVersion() FutureInt64 GetDatabase() Database Snapshot() Snapshot + GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64 ReadTransactor } @@ -305,6 +306,28 @@ func (t Transaction) GetRange(r Range, options RangeOptions) RangeResult { return t.getRange(r, options, false) } +func (t *transaction) getEstimatedRangeSizeBytes(beginKey Key, endKey Key) FutureInt64 { + return &futureInt64{ + future: newFuture(C.fdb_transaction_get_estimated_range_size_bytes( + t.ptr, + byteSliceToPtr(beginKey), + C.int(len(beginKey)), + byteSliceToPtr(endKey), + C.int(len(endKey)), + )), + } +} + +// GetEstimatedRangeSizeBytes will get the byte size of the key range based on the +// byte sample collected by FDB +func (t Transaction) GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64 { + beginKey, endKey := r.FDBRangeKeys() + return t.getEstimatedRangeSizeBytes( + beginKey.FDBKey(), + endKey.FDBKey(), + ) +} + func (t *transaction) getReadVersion() FutureInt64 { return &futureInt64{ future: newFuture(C.fdb_transaction_get_read_version(t.ptr)), diff --git a/bindings/java/JavaWorkload.cpp b/bindings/java/JavaWorkload.cpp index 2bb1f611fd..e47208b6e6 100644 --- a/bindings/java/JavaWorkload.cpp +++ b/bindings/java/JavaWorkload.cpp @@ -368,9 +368,11 @@ struct JVM { { { "send", "(JZ)V", reinterpret_cast(&promiseSend) } }); auto fdbClass = getClass("com/apple/foundationdb/FDB"); jmethodID selectMethod = - env->GetStaticMethodID(fdbClass, "selectAPIVersion", "(IZ)Lcom/apple/foundationdb/FDB;"); + env->GetStaticMethodID(fdbClass, "selectAPIVersion", "(I)Lcom/apple/foundationdb/FDB;"); checkException(); - env->CallStaticObjectMethod(fdbClass, selectMethod, jint(700), jboolean(false)); + auto fdbInstance = env->CallStaticObjectMethod(fdbClass, selectMethod, jint(700)); + checkException(); + env->CallObjectMethod(fdbInstance, getMethod(fdbClass, "disableShutdownHook", "()V")); checkException(); } diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 121a32ae25..232e8392fe 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -646,6 +646,35 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 return (jlong)f; } +JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1getEstimatedRangeSizeBytes(JNIEnv *jenv, jobject, jlong tPtr, + jbyteArray beginKeyBytes, jbyteArray endKeyBytes) { + if( !tPtr || !beginKeyBytes || !endKeyBytes) { + throwParamNotNull(jenv); + return 0; + } + FDBTransaction *tr = (FDBTransaction *)tPtr; + + uint8_t *startKey = (uint8_t *)jenv->GetByteArrayElements( beginKeyBytes, JNI_NULL ); + if(!startKey) { + if( !jenv->ExceptionOccurred() ) + throwRuntimeEx( jenv, "Error getting handle to native resources" ); + return 0; + } + + uint8_t *endKey = (uint8_t *)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL); + if (!endKey) { + jenv->ReleaseByteArrayElements( beginKeyBytes, (jbyte *)startKey, JNI_ABORT ); + if( !jenv->ExceptionOccurred() ) + throwRuntimeEx( jenv, "Error getting handle to native resources" ); + return 0; + } + + FDBFuture *f = fdb_transaction_get_estimated_range_size_bytes( tr, startKey, jenv->GetArrayLength( beginKeyBytes ), endKey, jenv->GetArrayLength( endKeyBytes ) ); + jenv->ReleaseByteArrayElements( beginKeyBytes, (jbyte *)startKey, JNI_ABORT ); + jenv->ReleaseByteArrayElements( endKeyBytes, (jbyte *)endKey, JNI_ABORT ); + return (jlong)f; +} + JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1set(JNIEnv *jenv, jobject, jlong tPtr, jbyteArray keyBytes, jbyteArray valueBytes) { if( !tPtr || !keyBytes || !valueBytes ) { throwParamNotNull(jenv); diff --git a/bindings/java/src/main/com/apple/foundationdb/FDB.java b/bindings/java/src/main/com/apple/foundationdb/FDB.java index ed62d0bd46..b945a5dc69 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDB.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDB.java @@ -85,6 +85,8 @@ public class FDB { private volatile boolean netStarted = false; private volatile boolean netStopped = false; volatile boolean warnOnUnclosed = true; + private boolean useShutdownHook = true; + private Thread shutdownHook; private final Semaphore netRunning = new Semaphore(1); private final NetworkOptions options; @@ -104,15 +106,8 @@ public class FDB { * Called only once to create the FDB singleton. */ private FDB(int apiVersion) { - this(apiVersion, true); - } - - private FDB(int apiVersion, boolean controlRuntime) { this.apiVersion = apiVersion; options = new NetworkOptions(this::Network_setOption); - if (controlRuntime) { - Runtime.getRuntime().addShutdownHook(new Thread(this::stopNetwork)); - } } /** @@ -167,9 +162,9 @@ public class FDB { * object.

* * Warning: When using the multi-version client API, setting an API version that - * is not supported by a particular client library will prevent that client from + * is not supported by a particular client library will prevent that client from * being used to connect to the cluster. In particular, you should not advance - * the API version of your application after upgrading your client until the + * the API version of your application after upgrading your client until the * cluster has also been upgraded. * * @param version the API version required @@ -177,13 +172,6 @@ public class FDB { * @return the FoundationDB API object */ public static FDB selectAPIVersion(final int version) throws FDBException { - return selectAPIVersion(version, true); - } - - /** - This function is called from C++ if the VM is controlled directly from FDB - */ - private static synchronized FDB selectAPIVersion(final int version, boolean controlRuntime) throws FDBException { if(singleton != null) { if(version != singleton.getAPIVersion()) { throw new IllegalArgumentException( @@ -197,9 +185,26 @@ public class FDB { throw new IllegalArgumentException("API version not supported (maximum 700)"); Select_API_version(version); - FDB fdb = new FDB(version, controlRuntime); + singleton = new FDB(version); - return singleton = fdb; + return singleton; + } + + /** + * Disables shutdown hook that stops network thread upon process shutdown. This is useful if you need to run + * your own shutdown hook that uses the FDB instance and you need to avoid race conditions + * with the default shutdown hook. Replacement shutdown hook should stop the network thread manually + * by calling {@link #stopNetwork}. + */ + public synchronized void disableShutdownHook() { + useShutdownHook = false; + if(shutdownHook != null) { + // If this method was called after network thread started and shutdown hook was installed, + // remove this hook + Runtime.getRuntime().removeShutdownHook(shutdownHook); + // Release thread reference for GC + shutdownHook = null; + } } /** @@ -405,6 +410,11 @@ public class FDB { if(netStarted) { return; } + if(useShutdownHook) { + // Register shutdown hook that stops network thread if user did not opt out + shutdownHook = new Thread(this::stopNetwork, "fdb-shutdown-hook"); + Runtime.getRuntime().addShutdownHook(shutdownHook); + } Network_setup(); netStarted = true; @@ -497,4 +507,4 @@ public class FDB { private native boolean Error_predicate(int predicate, int code); private native long Database_create(String clusterFilePath) throws FDBException; -} +} \ No newline at end of file diff --git a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java index d6f1e4f935..09be8a353a 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java @@ -70,6 +70,16 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC return getKey_internal(selector, true); } + @Override + public CompletableFuture getEstimatedRangeSizeBytes(byte[] begin, byte[] end) { + return FDBTransaction.this.getEstimatedRangeSizeBytes(begin, end); + } + + @Override + public CompletableFuture getEstimatedRangeSizeBytes(Range range) { + return FDBTransaction.this.getEstimatedRangeSizeBytes(range); + } + /////////////////// // getRange -> KeySelectors /////////////////// @@ -257,6 +267,21 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC } } + @Override + public CompletableFuture getEstimatedRangeSizeBytes(byte[] begin, byte[] end) { + pointerReadLock.lock(); + try { + return new FutureInt64(Transaction_getEstimatedRangeSizeBytes(getPtr(), begin, end), executor); + } finally { + pointerReadLock.unlock(); + } + } + + @Override + public CompletableFuture getEstimatedRangeSizeBytes(Range range) { + return this.getEstimatedRangeSizeBytes(range.begin, range.end); + } + /////////////////// // getRange -> KeySelectors /////////////////// @@ -659,4 +684,5 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC private native long Transaction_watch(long ptr, byte[] key) throws FDBException; private native void Transaction_cancel(long cPtr); private native long Transaction_getKeyLocations(long cPtr, byte[] key); + private native long Transaction_getEstimatedRangeSizeBytes(long cPtr, byte[] keyBegin, byte[] keyEnd); } diff --git a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java index 63a5fa73c6..3dd11b77ff 100644 --- a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java @@ -184,7 +184,9 @@ public interface ReadTransaction extends ReadTransactionContext { * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * should not limit the number of results. If {@code reverse} is {@code true} rows * will be limited starting at the end of the range. - * @param reverse return results starting at the end of the range in reverse order + * @param reverse return results starting at the end of the range in reverse order. + * Reading ranges in reverse is supported natively by the database and should + * have minimal extra cost. * * @return a handle to access the results of the asynchronous call */ @@ -205,11 +207,22 @@ public interface ReadTransaction extends ReadTransactionContext { * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * should not limit the number of results. If {@code reverse} is {@code true} rows * will be limited starting at the end of the range. - * @param reverse return results starting at the end of the range in reverse order + * @param reverse return results starting at the end of the range in reverse order. + * Reading ranges in reverse is supported natively by the database and should + * have minimal extra cost. * @param mode provide a hint about how the results are to be used. This * can provide speed improvements or efficiency gains based on the caller's * knowledge of the upcoming access pattern. * + *

+ * When converting the result of this query to a list using {@link AsyncIterable#asList()} with the {@code ITERATOR} streaming + * mode, the query is automatically modified to fetch results in larger batches. This is done because it is + * known in advance that the {@link AsyncIterable#asList()} function will fetch all results in the range. If a limit is specified, + * the {@code EXACT} streaming mode will be used, and otherwise it will use {@code WANT_ALL}. + * + * To achieve comparable performance when iterating over an entire range without using {@link AsyncIterable#asList()}, the same + * streaming mode would need to be used. + *

* @return a handle to access the results of the asynchronous call */ AsyncIterable getRange(KeySelector begin, KeySelector end, @@ -263,7 +276,9 @@ public interface ReadTransaction extends ReadTransactionContext { * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * should not limit the number of results. If {@code reverse} is {@code true} rows * will be limited starting at the end of the range. - * @param reverse return results starting at the end of the range in reverse order + * @param reverse return results starting at the end of the range in reverse order. + * Reading ranges in reverse is supported natively by the database and should + * have minimal extra cost. * * @return a handle to access the results of the asynchronous call */ @@ -284,11 +299,22 @@ public interface ReadTransaction extends ReadTransactionContext { * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * should not limit the number of results. If {@code reverse} is {@code true} rows * will be limited starting at the end of the range. - * @param reverse return results starting at the end of the range in reverse order + * @param reverse return results starting at the end of the range in reverse order. + * Reading ranges in reverse is supported natively by the database and should + * have minimal extra cost. * @param mode provide a hint about how the results are to be used. This * can provide speed improvements or efficiency gains based on the caller's * knowledge of the upcoming access pattern. * + *

+ * When converting the result of this query to a list using {@link AsyncIterable#asList()} with the {@code ITERATOR} streaming + * mode, the query is automatically modified to fetch results in larger batches. This is done because it is + * known in advance that the {@link AsyncIterable#asList()} function will fetch all results in the range. If a limit is specified, + * the {@code EXACT} streaming mode will be used, and otherwise it will use {@code WANT_ALL}. + * + * To achieve comparable performance when iterating over an entire range without using {@link AsyncIterable#asList()}, the same + * streaming mode would need to be used. + *

* @return a handle to access the results of the asynchronous call */ AsyncIterable getRange(byte[] begin, byte[] end, @@ -351,7 +377,9 @@ public interface ReadTransaction extends ReadTransactionContext { * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * should not limit the number of results. If {@code reverse} is {@code true} rows * will be limited starting at the end of the range. - * @param reverse return results starting at the end of the range in reverse order + * @param reverse return results starting at the end of the range in reverse order. + * Reading ranges in reverse is supported natively by the database and should + * have minimal extra cost. * * @return a handle to access the results of the asynchronous call */ @@ -375,16 +403,47 @@ public interface ReadTransaction extends ReadTransactionContext { * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * should not limit the number of results. If {@code reverse} is {@code true} rows * will be limited starting at the end of the range. - * @param reverse return results starting at the end of the range in reverse order + * @param reverse return results starting at the end of the range in reverse order. + * Reading ranges in reverse is supported natively by the database and should + * have minimal extra cost. * @param mode provide a hint about how the results are to be used. This * can provide speed improvements or efficiency gains based on the caller's * knowledge of the upcoming access pattern. * + *

+ * When converting the result of this query to a list using {@link AsyncIterable#asList()} with the {@code ITERATOR} streaming + * mode, the query is automatically modified to fetch results in larger batches. This is done because it is + * known in advance that the {@link AsyncIterable#asList()} function will fetch all results in the range. If a limit is specified, + * the {@code EXACT} streaming mode will be used, and otherwise it will use {@code WANT_ALL}. + * + * To achieve comparable performance when iterating over an entire range without using {@link AsyncIterable#asList()}, the same + * streaming mode would need to be used. + *

* @return a handle to access the results of the asynchronous call */ AsyncIterable getRange(Range range, int limit, boolean reverse, StreamingMode mode); + + /** + * Gets an estimate for the number of bytes stored in the given range. + * + * @param begin the beginning of the range (inclusive) + * @param end the end of the range (exclusive) + * + * @return a handle to access the results of the asynchronous call + */ + CompletableFuture getEstimatedRangeSizeBytes(byte[] begin, byte[] end); + + /** + * Gets an estimate for the number of bytes stored in the given range. + * + * @param range the range of the keys + * + * @return a handle to access the results of the asynchronous call + */ + CompletableFuture getEstimatedRangeSizeBytes(Range range); + /** * Returns a set of options that can be set on a {@code Transaction} * diff --git a/bindings/java/src/main/com/apple/foundationdb/directory/DirectoryLayer.java b/bindings/java/src/main/com/apple/foundationdb/directory/DirectoryLayer.java index be802b0cb6..5ea5f3945c 100644 --- a/bindings/java/src/main/com/apple/foundationdb/directory/DirectoryLayer.java +++ b/bindings/java/src/main/com/apple/foundationdb/directory/DirectoryLayer.java @@ -817,9 +817,9 @@ public class DirectoryLayer implements Directory { private static long unpackLittleEndian(byte[] bytes) { assert bytes.length == 8; - int value = 0; + long value = 0; for(int i = 0; i < 8; ++i) { - value += (bytes[i] << (i * 8)); + value += (Byte.toUnsignedLong(bytes[i]) << (i * 8)); } return value; } diff --git a/bindings/python/fdb/impl.py b/bindings/python/fdb/impl.py index c7d33f0fe9..974fcdce98 100644 --- a/bindings/python/fdb/impl.py +++ b/bindings/python/fdb/impl.py @@ -449,6 +449,17 @@ class TransactionRead(_FDBBase): if isinstance(key, slice): return self.get_range(key.start, key.stop, reverse=(key.step == -1)) return self.get(key) + + def get_estimated_range_size_bytes(self, beginKey, endKey): + if beginKey is None: + beginKey = b'' + if endKey is None: + endKey = b'\xff' + return FutureInt64(self.capi.fdb_transaction_get_estimated_range_size_bytes( + self.tpointer, + beginKey, len(beginKey), + endKey, len(endKey) + )) class Transaction(TransactionRead): @@ -1424,6 +1435,9 @@ def init_c_api(): ctypes.c_int, ctypes.c_int] _capi.fdb_transaction_get_range.restype = ctypes.c_void_p + _capi.fdb_transaction_get_estimated_range_size_bytes.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_get_estimated_range_size_bytes.restype = ctypes.c_void_p + _capi.fdb_transaction_add_conflict_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int] _capi.fdb_transaction_add_conflict_range.restype = ctypes.c_int _capi.fdb_transaction_add_conflict_range.errcheck = check_error_code diff --git a/bindings/ruby/lib/fdbimpl.rb b/bindings/ruby/lib/fdbimpl.rb index b1deb1123c..e5e5266197 100644 --- a/bindings/ruby/lib/fdbimpl.rb +++ b/bindings/ruby/lib/fdbimpl.rb @@ -108,6 +108,7 @@ module FDB attach_function :fdb_transaction_get, [ :pointer, :pointer, :int, :int ], :pointer attach_function :fdb_transaction_get_key, [ :pointer, :pointer, :int, :int, :int, :int ], :pointer attach_function :fdb_transaction_get_range, [ :pointer, :pointer, :int, :int, :int, :pointer, :int, :int, :int, :int, :int, :int, :int, :int, :int ], :pointer + attach_function :fdb_transaction_get_estimated_range_size_bytes, [ :pointer, :pointer, :int, :pointer, :int ], :pointer attach_function :fdb_transaction_set, [ :pointer, :pointer, :int, :pointer, :int ], :void attach_function :fdb_transaction_clear, [ :pointer, :pointer, :int ], :void attach_function :fdb_transaction_clear_range, [ :pointer, :pointer, :int, :pointer, :int ], :void @@ -817,6 +818,13 @@ module FDB prefix = prefix.dup.force_encoding "BINARY" get_range(prefix, FDB.strinc(prefix), options, &block) end + + def get_estimated_range_size_bytes(beginKey, endKey) + bkey = FDB.key_to_bytes(beginKey) + ekey = FDB.key_to_bytes(endKey) + Int64Future.new(FDBC.fdb_transaction_get_estimated_range_size_bytes(@tpointer, bkey, bkey.bytesize, ekey, ekey.bytesize)) + end + end TransactionRead.class_variable_set("@@StreamingMode", @@StreamingMode) diff --git a/build/Dockerfile b/build/Dockerfile index 4e11066125..c8a84818b8 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -1,6 +1,4 @@ FROM centos:6 -LABEL version=0.1.9 -ENV DOCKER_IMAGEVER=0.1.9 # Install dependencies for developer tools, bindings,\ # documentation, actorcompiler, and packaging tools\ @@ -8,9 +6,10 @@ RUN yum install -y yum-utils &&\ yum-config-manager --enable rhel-server-rhscl-7-rpms &&\ yum -y install centos-release-scl epel-release &&\ yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \ + devtoolset-8-gcc-8.3.1-3.1.el6 devtoolset-8-gcc-c++-8.3.1-3.1.el6 \ rh-python36-python-devel devtoolset-8-valgrind-devel \ mono-core rh-ruby24 golang python27 rpm-build debbuild \ - python-pip npm dos2unix valgrind-devel ccache distcc devtoolset-8-libubsan-devel libubsan-devel &&\ + python-pip dos2unix valgrind-devel ccache distcc devtoolset-8-libubsan-devel libubsan-devel &&\ pip install boto3==1.1.1 USER root @@ -19,32 +18,42 @@ RUN adduser --comment '' fdb && chown fdb /opt # wget of bintray without forcing UTF-8 encoding results in 403 Forbidden RUN cd /opt/ &&\ - curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 > boost_1_67_0.tar.bz2 &&\ - echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha.txt &&\ - sha256sum -c boost-sha.txt &&\ + curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 &&\ + echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha-67.txt &&\ + sha256sum -c boost-sha-67.txt &&\ tar -xjf boost_1_67_0.tar.bz2 &&\ - rm -rf boost_1_67_0.tar.bz2 boost-sha.txt boost_1_67_0/libs + rm -rf boost_1_67_0.tar.bz2 boost-sha-67.txt boost_1_67_0/libs &&\ + curl -L https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 &&\ + echo "59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 boost_1_72_0.tar.bz2" > boost-sha-72.txt &&\ + sha256sum -c boost-sha-72.txt &&\ + tar -xjf boost_1_72_0.tar.bz2 &&\ + rm -rf boost_1_72_0.tar.bz2 boost-sha-72.txt boost_1_72_0/libs # install cmake -RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz > /tmp/cmake.tar.gz &&\ +RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz -o /tmp/cmake.tar.gz &&\ echo "563a39e0a7c7368f81bfa1c3aff8b590a0617cdfe51177ddc808f66cc0866c76 /tmp/cmake.tar.gz" > /tmp/cmake-sha.txt &&\ sha256sum -c /tmp/cmake-sha.txt &&\ cd /tmp && tar xf cmake.tar.gz &&\ cp -r cmake-3.13.4-Linux-x86_64/* /usr/local/ &&\ rm -rf cmake.tar.gz cmake-3.13.4-Linux-x86_64 cmake-sha.txt -# install LibreSSL -RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip > ninja.zip &&\ +# install Ninja +RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -o ninja.zip &&\ unzip ninja.zip && cd ninja-1.9.0 && scl enable devtoolset-8 -- ./configure.py --bootstrap && cp ninja /usr/bin &&\ - cd .. && rm -rf ninja-1.9.0 ninja.zip &&\ - curl -L https://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.8.2.tar.gz > /tmp/libressl.tar.gz &&\ - cd /tmp && echo "b8cb31e59f1294557bfc80f2a662969bc064e83006ceef0574e2553a1c254fd5 libressl.tar.gz" > libressl-sha.txt &&\ - sha256sum -c libressl-sha.txt && tar xf libressl.tar.gz &&\ - cd libressl-2.8.2 && cd /tmp/libressl-2.8.2 && scl enable devtoolset-8 -- ./configure --prefix=/usr/local/stow/libressl CFLAGS="-fPIC -O3" --prefix=/usr/local &&\ - cd /tmp/libressl-2.8.2 && scl enable devtoolset-8 -- make -j`nproc` install &&\ - rm -rf /tmp/libressl-2.8.2 /tmp/libressl.tar.gz + cd .. && rm -rf ninja-1.9.0 ninja.zip +# install openssl +RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\ + echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2 openssl.tar.gz" > openssl-sha.txt &&\ + sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\ + cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\ + scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\ + ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\ + cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz + +LABEL version=0.1.12 +ENV DOCKER_IMAGEVER=0.1.12 ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0 ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++ -CMD scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash +CMD scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash diff --git a/build/docker-compose.yaml b/build/docker-compose.yaml index 1853cc80fd..2f6fe49b42 100644 --- a/build/docker-compose.yaml +++ b/build/docker-compose.yaml @@ -2,7 +2,7 @@ version: "3" services: common: &common - image: foundationdb/foundationdb-build:0.1.9 + image: foundationdb/foundationdb-build:0.1.12 build-setup: &build-setup <<: *common @@ -36,11 +36,11 @@ services: release-packages: &release-packages <<: *release-setup - command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages' + command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages' snapshot-packages: &snapshot-packages <<: *build-setup - command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages' + command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages' prb-packages: <<: *snapshot-packages @@ -48,11 +48,11 @@ services: release-bindings: &release-bindings <<: *release-setup - command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings' + command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings' snapshot-bindings: &snapshot-bindings <<: *build-setup - command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings' + command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings' prb-bindings: <<: *snapshot-bindings @@ -60,7 +60,7 @@ services: snapshot-cmake: &snapshot-cmake <<: *build-setup - command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack' + command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack' prb-cmake: <<: *snapshot-cmake @@ -68,7 +68,7 @@ services: snapshot-ctest: &snapshot-ctest <<: *build-setup - command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure' + command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure' prb-ctest: <<: *snapshot-ctest @@ -76,7 +76,7 @@ services: snapshot-correctness: &snapshot-correctness <<: *build-setup - command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure' + command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure' prb-correctness: <<: *snapshot-correctness diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index f212f58018..092683e393 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -11,6 +11,7 @@ set(USE_LIBCXX OFF CACHE BOOL "Use libc++") set(USE_CCACHE OFF CACHE BOOL "Use ccache for compilation if available") set(RELATIVE_DEBUG_PATHS OFF CACHE BOOL "Use relative file paths in debug info") set(STATIC_LINK_LIBCXX ON CACHE BOOL "Statically link libstdcpp/libc++") +set(USE_WERROR OFF CACHE BOOL "Compile with -Werror. Recommended for local development and CI.") set(rel_debug_paths OFF) if(RELATIVE_DEBUG_PATHS) @@ -86,15 +87,22 @@ if(WIN32) # see: https://docs.microsoft.com/en-us/windows/desktop/WinProg/using-the-windows-headers # this sets the windows target version to Windows Server 2003 set(WINDOWS_TARGET 0x0502) - add_compile_options(/W3 /EHsc /bigobj $<$:/Zi> /MP /FC) + if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + # TODO: This doesn't seem to be good style, but I couldn't find a better way so far + string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + endif() + add_compile_options(/W0 /EHsc /bigobj $<$:/Zi> /MP /FC /Gm-) add_compile_definitions(_WIN32_WINNT=${WINDOWS_TARGET} WINVER=${WINDOWS_TARGET} NTDDI_VERSION=0x05020000 BOOST_ALL_NO_LIB) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") else() set(GCC NO) set(CLANG NO) + set(ICC NO) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") set(CLANG YES) + elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") + set(ICC YES) else() # This is not a very good test. However, as we do not really support many architectures # this is good enough for now @@ -230,8 +238,7 @@ else() -Wno-error=unused-command-line-argument) endif() endif() - if (CMAKE_GENERATOR STREQUAL Xcode) - else() + if (USE_WERROR) add_compile_options(-Werror) endif() if (GCC) @@ -240,6 +247,9 @@ else() # Otherwise `state [[maybe_unused]] int x;` will issue a warning. # https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns-incorrectly-that-attribute-is add_compile_options(-Wno-attributes) + elseif(ICC) + add_compile_options(-wd1879 -wd1011) + add_link_options(-static-intel) endif() add_compile_options(-Wno-error=format -Wunused-variable diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index 69e93cec06..6ada101d39 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -9,21 +9,32 @@ if(USE_VALGRIND) endif() ################################################################################ -# LibreSSL +# SSL ################################################################################ set(DISABLE_TLS OFF CACHE BOOL "Don't try to find LibreSSL and always build without TLS support") if(DISABLE_TLS) set(WITH_TLS OFF) else() - set(LIBRESSL_USE_STATIC_LIBS TRUE) - find_package(LibreSSL) - if(LibreSSL_FOUND) + set(OPENSSL_USE_STATIC_LIBS TRUE) + find_package(OpenSSL) + if(NOT OPENSSL_FOUND) + set(LIBRESSL_USE_STATIC_LIBS TRUE) + find_package(LibreSSL) + if (LIBRESSL_FOUND) + add_library(OpenSSL::SSL ALIAS LibreSSL) + endif() + endif() + if(OPENSSL_FOUND OR LIBRESSL_FOUND) set(WITH_TLS ON) add_compile_options(-DHAVE_OPENSSL) else() - message(STATUS "LibreSSL NOT Found - Will compile without TLS Support") - message(STATUS "You can set LibreSSL_ROOT to the LibreSSL install directory to help cmake find it") + message(STATUS "Neither OpenSSL nor LibreSSL were found - Will compile without TLS Support") + message(STATUS "You can set OPENSSL_ROOT_DIR or LibreSSL_ROOT to the LibreSSL install directory to help cmake find it") + set(WITH_TLS OFF) + endif() + if(WIN32) + message(STATUS "TLS is temporarilty disabled on macOS while libressl -> openssl transition happens") set(WITH_TLS OFF) endif() endif() @@ -33,7 +44,7 @@ endif() ################################################################################ set(WITH_JAVA OFF) -find_package(JNI 1.8 REQUIRED) +find_package(JNI 1.8) find_package(Java 1.8 COMPONENTS Development) if(JNI_FOUND AND Java_FOUND AND Java_Development_FOUND) set(WITH_JAVA ON) @@ -51,7 +62,7 @@ find_package(Python COMPONENTS Interpreter) if(Python_Interpreter_FOUND) set(WITH_PYTHON ON) else() - message(FATAL_ERROR "Could not found a suitable python interpreter") + #message(FATAL_ERROR "Could not found a suitable python interpreter") set(WITH_PYTHON OFF) endif() @@ -59,8 +70,8 @@ endif() # Pip ################################################################################ -find_package(Virtualenv) -if (Virtualenv_FOUND) +find_package(Python3 COMPONENTS Interpreter) +if (Python3_Interpreter_FOUND) set(WITH_DOCUMENTATION ON) else() set(WITH_DOCUMENTATION OFF) @@ -102,6 +113,8 @@ function(print_components) message(STATUS "Build Ruby bindings: ${WITH_RUBY}") message(STATUS "Build Python sdist (make package): ${WITH_PYTHON}") message(STATUS "Build Documentation (make html): ${WITH_DOCUMENTATION}") + message(STATUS "Build Bindings (depends on Python): ${WITH_PYTHON}") + message(STATUS "Configure CTest (depends on Python): ${WITH_PYTHON}") message(STATUS "=========================================") endfunction() diff --git a/cmake/FindVirtualenv.cmake b/cmake/FindVirtualenv.cmake deleted file mode 100644 index ace748f672..0000000000 --- a/cmake/FindVirtualenv.cmake +++ /dev/null @@ -1,20 +0,0 @@ -find_program(_VIRTUALENV_EXE virtualenv) - -# get version and test that program actually works -if(_VIRTUALENV_EXE) - execute_process( - COMMAND ${_VIRTUALENV_EXE} --version - RESULT_VARIABLE ret_code - OUTPUT_VARIABLE version_string - ERROR_VARIABLE error_output - OUTPUT_STRIP_TRAILING_WHITESPACE) - if(ret_code EQUAL 0 AND NOT ERROR_VARIABLE) - # we found a working virtualenv - set(VIRTUALENV_EXE ${_VIRTUALENV_EXE}) - set(VIRTUALENV_VERSION version_string) - endif() -endif() - -find_package_handle_standard_args(Virtualenv - REQUIRED_VARS VIRTUALENV_EXE - VERSION_VAR ${VIRTUALENV_VERSION}) diff --git a/cmake/FlowCommands.cmake b/cmake/FlowCommands.cmake index 2fea45a8c2..a653336abe 100644 --- a/cmake/FlowCommands.cmake +++ b/cmake/FlowCommands.cmake @@ -186,12 +186,12 @@ function(add_flow_target) if(WIN32) add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}" COMMAND $ "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler ${actor_exe} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" COMMENT "Compile actor: ${src}") else() add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}" COMMAND ${MONO_EXECUTABLE} ${actor_exe} "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} > /dev/null - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler ${actor_exe} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" COMMENT "Compile actor: ${src}") endif() else() @@ -221,15 +221,18 @@ function(add_flow_target) get_filename_component(dname ${CMAKE_CURRENT_SOURCE_DIR} NAME) string(REGEX REPLACE "\\..*" "" fname ${src}) string(REPLACE / _ fname ${fname}) - set_source_files_properties(${src} PROPERTIES COMPILE_DEFINITIONS FNAME=${dname}_${fname}) + #set_source_files_properties(${src} PROPERTIES COMPILE_DEFINITIONS FNAME=${dname}_${fname}) endforeach() set_property(TARGET ${AFT_NAME} PROPERTY SOURCE_FILES ${AFT_SRCS}) set_property(TARGET ${AFT_NAME} PROPERTY COVERAGE_FILTERS ${AFT_SRCS}) add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files}) + add_dependencies(${AFT_NAME}_actors actorcompiler) add_dependencies(${AFT_NAME} ${AFT_NAME}_actors) - assert_no_version_h(${AFT_NAME}_actors) + if(NOT WIN32) + assert_no_version_h(${AFT_NAME}_actors) + endif() generate_coverage_xml(${AFT_NAME}) if(strip_target) strip_debug_symbols(${AFT_NAME}) diff --git a/design/backup.md b/design/backup.md index 80247462eb..072e8fb729 100644 --- a/design/backup.md +++ b/design/backup.md @@ -17,7 +17,7 @@ KV ranges {(a-b, v0), (c-d, v1), (e-f, v2) ... (y-z, v10)}. With mutation log recorded all along, we can still use the simple backup-restore scheme described above on sub keyspaces seperately. Assuming we did record mutation log from v0 to vn, that allows us to restore - + * Keyspace a-b to any version between v0 and vn * Keyspace c-d to any version between v1 and vn * Keyspace y-z to any version between v10 and vn diff --git a/design/recovery-internals.md b/design/recovery-internals.md index 3ede735e13..c9d8631ddc 100644 --- a/design/recovery-internals.md +++ b/design/recovery-internals.md @@ -67,7 +67,7 @@ The transaction system state before the recovery is the starting point for the c ## Phase 2: LOCKING_CSTATE -This phase locks the coordinated state (cstate) to make sure there is only one master who can change the cstate. Otherwise, we may end up with more than one master accepting commits after the recovery. To achieve that, the master needs to get currently alive tLogs’ interfaces and sends commands to tLogs to lock their states, preventing them from accepting any further writes. +This phase locks the coordinated state (cstate) to make sure there is only one master who can change the cstate. Otherwise, we may end up with more than one master accepting commits after the recovery. To achieve that, the master needs to get currently alive tLogs’ interfaces and sends commands to tLogs to lock their states, preventing them from accepting any further writes. Recall that `ServerDBInfo` has master's interface and is propogated by CC to every process in a cluster. The current running tLogs can use the master interface in its `ServerDBInfo` to send itself's interface to master. Master simply waits on receiving the `TLogRejoinRequest` streams: for each tLog’s interface received, the master compares the interface ID with the tLog ID read from cstate. Once the master collects enough old tLog interfaces, it will use the interfaces to lock those tLogs. diff --git a/documentation/CMakeLists.txt b/documentation/CMakeLists.txt index 0f0fd8c02d..ccd60a2bbd 100644 --- a/documentation/CMakeLists.txt +++ b/documentation/CMakeLists.txt @@ -10,7 +10,7 @@ set(pip_command ${venv_dir}/bin/pip${EXE_SUFFIX}) set(python_command ${venv_dir}/bin/python${EXE_SUFFIX}) add_custom_command(OUTPUT ${venv_dir}/venv_setup - COMMAND ${VIRTUALENV_EXE} venv && + COMMAND ${Python3_EXECUTABLE} -m venv venv && ${CMAKE_COMMAND} -E copy ${sphinx_dir}/.pip.conf ${venv_dir}/pip.conf && . ${venv_dir}/bin/activate && ${pip_command} install --upgrade pip && @@ -86,7 +86,7 @@ else() endif() add_custom_target(docpreview - COMMAND ${python_command} -m SimpleHTTPServer ${port} + COMMAND ${python_command} -m http.server ${port} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html USES_TERMINAL) add_dependencies(docpreview html) diff --git a/documentation/sphinx/Makefile b/documentation/sphinx/Makefile index ea672b8aa4..dc91435b93 100644 --- a/documentation/sphinx/Makefile +++ b/documentation/sphinx/Makefile @@ -18,11 +18,6 @@ SPHINXBUILD = $(VENVDIR)/bin/sphinx-build SPHINXAUTOBUILD = $(VENVDIR)/bin/sphinx-autobuild TEMPLATEDIR = $(ROOTDIR)/_templates -# virtualenv for sphinx-build -VENV_VERSION ?= virtualenv-13.0.1 -VENV_URL_BASE ?= https://pypi.python.org -VENV_URL ?= $(VENV_URL_BASE)/packages/source/v/virtualenv/$(VENV_VERSION).tar.gz - # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter @@ -68,9 +63,7 @@ buildsphinx: if [ ! -e $(SPHINXBUILD) ]; then \ mkdir $(BUILDDIR); \ cd $(BUILDDIR); \ - curl -OL $(VENV_URL); \ - tar zxvf $(VENV_VERSION).tar.gz; \ - python2 ./$(VENV_VERSION)/virtualenv.py venv; \ + python3 -m venv venv; \ fi . $(VENVDIR)/bin/activate && \ cp .pip.conf $(VENVDIR)/pip.conf && \ diff --git a/documentation/sphinx/extensions/rubydomain.py b/documentation/sphinx/extensions/rubydomain.py index 540f8487d3..1e5fb0bce4 100755 --- a/documentation/sphinx/extensions/rubydomain.py +++ b/documentation/sphinx/extensions/rubydomain.py @@ -502,7 +502,7 @@ class RubyModuleIndex(Index): ignores = self.domain.env.config['modindex_common_prefix'] ignores = sorted(ignores, key=len, reverse=True) # list of all modules, sorted by module name - modules = sorted(self.domain.data['modules'].iteritems(), + modules = sorted(iter(self.domain.data['modules'].items()), key=lambda x: x[0].lower()) # sort out collapsable modules prev_modname = '' @@ -551,7 +551,7 @@ class RubyModuleIndex(Index): collapse = len(modules) - num_toplevels < num_toplevels # sort by first letter - content = sorted(content.iteritems()) + content = sorted(content.items()) return content, collapse @@ -609,10 +609,10 @@ class RubyDomain(Domain): ] def clear_doc(self, docname): - for fullname, (fn, _) in self.data['objects'].items(): + for fullname, (fn, _) in list(self.data['objects'].items()): if fn == docname: del self.data['objects'][fullname] - for modname, (fn, _, _, _) in self.data['modules'].items(): + for modname, (fn, _, _, _) in list(self.data['modules'].items()): if fn == docname: del self.data['modules'][modname] @@ -704,9 +704,9 @@ class RubyDomain(Domain): contnode, name) def get_objects(self): - for modname, info in self.data['modules'].iteritems(): + for modname, info in self.data['modules'].items(): yield (modname, modname, 'module', info[0], 'module-' + modname, 0) - for refname, (docname, type) in self.data['objects'].iteritems(): + for refname, (docname, type) in self.data['objects'].items(): yield (refname, refname, type, docname, refname, 1) diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index be115ad585..4041ce5eda 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -177,7 +177,7 @@ You can add new machines to a cluster at any time: 5) If you have previously :ref:`excluded ` a machine from the cluster, you will need to take it off the exclusion list using the ``include `` command of fdbcli before it can be a full participant in the cluster. - .. note:: Addresses have the form ``IP``:``PORT``. This form is used even if TLS is enabled. +.. note:: Addresses have the form ``IP``:``PORT``. This form is used even if TLS is enabled. .. _removing-machines-from-a-cluster: @@ -192,26 +192,26 @@ To temporarily or permanently remove one or more machines from a FoundationDB cl 3) Use the ``exclude`` command in ``fdbcli`` on the machines you plan to remove: - :: +:: - user@host1$ fdbcli - Using cluster file `/etc/foundationdb/fdb.cluster'. + user@host1$ fdbcli + Using cluster file `/etc/foundationdb/fdb.cluster'. - The database is available. + The database is available. - Welcome to the fdbcli. For help, type `help'. - fdb> exclude 1.2.3.4 1.2.3.5 1.2.3.6 - Waiting for state to be removed from all excluded servers. This may take a while. - It is now safe to remove these machines or processes from the cluster. + Welcome to the fdbcli. For help, type `help'. + fdb> exclude 1.2.3.4 1.2.3.5 1.2.3.6 + Waiting for state to be removed from all excluded servers. This may take a while. + It is now safe to remove these machines or processes from the cluster. - - ``exclude`` can be used to exclude either machines (by specifying an IP address) or individual processes (by specifying an ``IP``:``PORT`` pair). - .. note:: Addresses have the form ``IP``:``PORT``. This form is used even if TLS is enabled. - - Excluding a server doesn't shut it down immediately; data on the machine is first moved away. When the ``exclude`` command completes successfully (by returning control to the command prompt), the machines that you specified are no longer required to maintain the configured redundancy mode. A large amount of data might need to be transferred first, so be patient. When the process is complete, the excluded machine or process can be shut down without fault tolerance or availability consequences. - - If you interrupt the exclude command with Ctrl-C after seeing the "waiting for state to be removed" message, the exclusion work will continue in the background. Repeating the command will continue waiting for the exclusion to complete. To reverse the effect of the ``exclude`` command, use the ``include`` command. +``exclude`` can be used to exclude either machines (by specifying an IP address) or individual processes (by specifying an ``IP``:``PORT`` pair). + +.. note:: Addresses have the form ``IP``:``PORT``. This form is used even if TLS is enabled. + +Excluding a server doesn't shut it down immediately; data on the machine is first moved away. When the ``exclude`` command completes successfully (by returning control to the command prompt), the machines that you specified are no longer required to maintain the configured redundancy mode. A large amount of data might need to be transferred first, so be patient. When the process is complete, the excluded machine or process can be shut down without fault tolerance or availability consequences. + +If you interrupt the exclude command with Ctrl-C after seeing the "waiting for state to be removed" message, the exclusion work will continue in the background. Repeating the command will continue waiting for the exclusion to complete. To reverse the effect of the ``exclude`` command, use the ``include`` command. Excluding a server with the ``failed`` flag will shut it down immediately; it will assume that it has already become unrecoverable or unreachable, and will not attempt to move the data on the machine away. This may break the guarantee required to maintain the configured redundancy mode, which will be checked internally, and the command may be denied if the guarantee is violated. This safety check can be ignored by using the command ``exclude FORCE failed``. @@ -320,9 +320,9 @@ Running backups Number of backups currently running. Different backups c Running DRs Number of DRs currently running. Different DRs could be streaming different prefixes and/or to different DR clusters. ====================== ========================================================================================================== -The "Memory availability" is a conservative estimate of the minimal RAM available to any ``fdbserver`` process across all machines in the cluster. This value is calculated in two steps. Memory available per process is first calculated *for each machine* by taking: +The "Memory availability" is a conservative estimate of the minimal RAM available to any ``fdbserver`` process across all machines in the cluster. This value is calculated in two steps. Memory available per process is first calculated *for each machine* by taking:: - availability = ((total - committed) + sum(processSize)) / processes + availability = ((total - committed) + sum(processSize)) / processes where: @@ -693,12 +693,18 @@ Upgrades from 6.1.x will keep all your old data and configuration settings. Data Upgrading from 6.0.x -------------------- -Upgrades from 6.0.x will keep all your old data and configuration settings. Data distribution will slowly reorganize how data is spread across storage servers. +Upgrades from 6.0.x will keep all your old data and configuration settings. Upgrading from 5.2.x -------------------- -Upgrades from 5.2.x will keep all your old data and configuration settings. +Upgrades from 5.2.x will keep all your old data and configuration settings. Some affinities that certain roles have for running on processes that haven't set a process class have changed, which may result in these processes running in different locations after upgrading. To avoid this, set process classes as needed. The following changes were made: + +* The proxies and master no longer prefer ``resolution`` or ``transaction`` class processes to processes with unset class. +* The resolver no longer prefers ``transaction`` class processes to processes with unset class. +* The cluster controller no longer prefers ``master``, ``resolution`` or ``proxy`` class processes to processes with unset class. + +See :ref:`guidelines-process-class-config` for recommendations on setting process classes. All of the above roles will prefer ``stateless`` class processes to ones that don't set a class. Upgrading from 5.0.x - 5.1.x ---------------------------- diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index f3fe41a64f..d31c735112 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -51,8 +51,6 @@ .. |timeout-database-option| replace:: FIXME .. |causal-read-risky-transaction-option| replace:: FIXME .. |causal-read-risky-database-option| replace:: FIXME -.. |include-port-in-address-database-option| replace:: FIXME -.. |include-port-in-address-transaction-option| replace:: FIXME .. |transaction-logging-max-field-length-database-option| replace:: FIXME .. |transaction-logging-max-field-length-transaction-option| replace:: FIXME @@ -530,8 +528,7 @@ Applications must provide error handling and an appropriate retry loop around th |snapshot| ``reverse`` - - If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range. + If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range. Reading ranges in reverse is supported natively by the database and should have minimal extra cost. .. type:: FDBStreamingMode @@ -539,31 +536,31 @@ Applications must provide error handling and an appropriate retry loop around th ``FDB_STREAMING_MODE_ITERATOR`` - The caller is implementing an iterator (most likely in a binding to a higher level language). The amount of data returned depends on the value of the ``iteration`` parameter to :func:`fdb_transaction_get_range()`. + The caller is implementing an iterator (most likely in a binding to a higher level language). The amount of data returned depends on the value of the ``iteration`` parameter to :func:`fdb_transaction_get_range()`. ``FDB_STREAMING_MODE_SMALL`` - Data is returned in small batches (not much more expensive than reading individual key-value pairs). + Data is returned in small batches (not much more expensive than reading individual key-value pairs). ``FDB_STREAMING_MODE_MEDIUM`` - Data is returned in batches between _SMALL and _LARGE. + Data is returned in batches between _SMALL and _LARGE. ``FDB_STREAMING_MODE_LARGE`` - Data is returned in batches large enough to be, in a high-concurrency environment, nearly as efficient as possible. If the caller does not need the entire range, some disk and network bandwidth may be wasted. The batch size may be still be too small to allow a single client to get high throughput from the database. + Data is returned in batches large enough to be, in a high-concurrency environment, nearly as efficient as possible. If the caller does not need the entire range, some disk and network bandwidth may be wasted. The batch size may be still be too small to allow a single client to get high throughput from the database. ``FDB_STREAMING_MODE_SERIAL`` - Data is returned in batches large enough that an individual client can get reasonable read bandwidth from the database. If the caller does not need the entire range, considerable disk and network bandwidth may be wasted. + Data is returned in batches large enough that an individual client can get reasonable read bandwidth from the database. If the caller does not need the entire range, considerable disk and network bandwidth may be wasted. ``FDB_STREAMING_MODE_WANT_ALL`` - The caller intends to consume the entire range and would like it all transferred as early as possible. + The caller intends to consume the entire range and would like it all transferred as early as possible. ``FDB_STREAMING_MODE_EXACT`` - The caller has passed a specific row limit and wants that many rows delivered in a single batch. + The caller has passed a specific row limit and wants that many rows delivered in a single batch. .. function:: void fdb_transaction_set(FDBTransaction* transaction, uint8_t const* key_name, int key_name_length, uint8_t const* value, int value_length) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index 65a5885c30..30f5439485 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -242,6 +242,9 @@ .. |option-trace-format-blurb| replace:: Select the format of the trace files for this FoundationDB client. xml (the default) and json are supported. +.. |option-trace-clock-source-blurb| replace:: + Select clock source for trace files. now (the default) or realtime are supported. + .. |network-options-warning| replace:: It is an error to set these options after the first call to |open-func| anywhere in your application. @@ -329,10 +332,6 @@ Transactions do not require the strict causal consistency guarantee that FoundationDB provides by default. The read version will be committed, and usually will be the latest committed, but might not be the latest committed in the event of a simultaneous fault and misbehaving clock. Enabling this option is equivalent to calling |causal-read-risky-transaction-option| on each transaction created by this database. -.. |option-db-include-port-in-address-blurb| replace:: - - Addresses returned by get_addresses_for_key include the port when enabled. This will be enabled by default in api version 700, and this option will be deprecated. Enabling this option is equivalent to calling |include-port-in-address-transaction-option| on each transaction created by this database. - .. |option-db-snapshot-ryw-enable-blurb| replace:: If this option has been set an equal or more times with this database than the disable option, snapshot reads *will* see the effects of prior writes in the same transaction. Enabling this option is equivalent to calling |snapshot-ryw-enable-transaction-option| on each transaction created by this database. @@ -372,10 +371,6 @@ This transaction does not require the strict causal consistency guarantee that FoundationDB provides by default. The read version will be committed, and usually will be the latest committed, but might not be the latest committed in the event of a simultaneous fault and misbehaving clock. One can set this for all transactions by calling |causal-read-risky-database-option|. -.. |option-include-port-in-address-blurb| replace:: - - Addresses returned by get_addresses_for_key include the port when enabled. This will be enabled by default in api version 700, and this option will be deprecated. One can set this for all transactions by calling |include-port-in-address-database-option|. - .. |option-causal-write-risky-blurb| replace:: The application either knows that this transaction will be self-conflicting (at least one read overlaps at least one set or clear), or is willing to accept a small risk that the transaction could be committed a second time after its commit apparently succeeds. This option provides a small performance benefit. diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index c9e7f834b6..2b5efe7d9c 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -26,7 +26,6 @@ .. |max-retry-delay-database-option| replace:: :func:`Database.options.set_transaction_max_retry_delay` .. |transaction-size-limit-database-option| replace:: :func:`Database.options.set_transaction_size_limit` .. |causal-read-risky-database-option| replace:: :func:`Database.options.set_transaction_causal_read_risky` -.. |include-port-in-address-database-option| replace:: :func:`Database.options.set_transaction_include_port_in_address` .. |transaction-logging-max-field-length-database-option| replace:: :func:`Database.options.set_transaction_logging_max_field_length` .. |snapshot-ryw-enable-database-option| replace:: :func:`Database.options.set_snapshot_ryw_enable` .. |snapshot-ryw-disable-database-option| replace:: :func:`Database.options.set_snapshot_ryw_disable` @@ -39,7 +38,6 @@ .. |snapshot-ryw-enable-transaction-option| replace:: :func:`Transaction.options.set_snapshot_ryw_enable` .. |snapshot-ryw-disable-transaction-option| replace:: :func:`Transaction.options.set_snapshot_ryw_disable` .. |causal-read-risky-transaction-option| replace:: :func:`Transaction.options.set_causal_read_risky` -.. |include-port-in-address-transaction-option| replace:: :func:`Transaction.options.set_include_port_in_address` .. |transaction-logging-max-field-length-transaction-option| replace:: :func:`Transaction.options.set_transaction_logging_max_field_length` .. |lazy-iterator-object| replace:: generator .. |key-meth| replace:: :meth:`Subspace.key` @@ -145,6 +143,10 @@ After importing the ``fdb`` module and selecting an API version, you probably wa |option-trace-format-blurb| + .. method :: fdb.options.set_trace_clock_source(source) + + |option-trace-clock-source-blurb| + .. method :: fdb.options.set_disable_multi_version_client_api() |option-disable-multi-version-client-api| @@ -291,7 +293,7 @@ A |database-blurb1| |database-blurb2| If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned. - If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. + If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost. If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently. @@ -400,10 +402,6 @@ Database options |option-db-causal-read-risky-blurb| -.. method:: Database.options.set_transaction_include_port_in_address() - - |option-db-include-port-in-address-blurb| - .. method:: Database.options.set_transaction_logging_max_field_length(size_limit) |option-db-tr-transaction-logging-max-field-length-blurb| @@ -507,7 +505,7 @@ Reading data If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned. - If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. + If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost. If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how the returned container is likely to be used. The default is :data:`StreamingMode.iterator`. @@ -829,10 +827,6 @@ Transaction options |option-causal-read-risky-blurb| -.. method:: Transaction.options.set_include_port_in_address - - |option-include-port-in-address-blurb| - .. method:: Transaction.options.set_causal_write_risky |option-causal-write-risky-blurb| diff --git a/documentation/sphinx/source/api-ruby.rst b/documentation/sphinx/source/api-ruby.rst index 70226e7fc6..46a1293b36 100644 --- a/documentation/sphinx/source/api-ruby.rst +++ b/documentation/sphinx/source/api-ruby.rst @@ -24,7 +24,6 @@ .. |max-retry-delay-database-option| replace:: :meth:`Database.options.set_transaction_max_retry_delay` .. |transaction-size-limit-database-option| replace:: :func:`Database.options.set_transaction_size_limit` .. |causal-read-risky-database-option| replace:: :meth:`Database.options.set_transaction_causal_read_risky` -.. |include-port-in-address-database-option| replace:: :meth:`Database.options.set_transaction_include_port_in_address` .. |snapshot-ryw-enable-database-option| replace:: :meth:`Database.options.set_snapshot_ryw_enable` .. |snapshot-ryw-disable-database-option| replace:: :meth:`Database.options.set_snapshot_ryw_disable` .. |transaction-logging-max-field-length-database-option| replace:: :meth:`Database.options.set_transaction_logging_max_field_length` @@ -37,7 +36,6 @@ .. |snapshot-ryw-enable-transaction-option| replace:: :meth:`Transaction.options.set_snapshot_ryw_enable` .. |snapshot-ryw-disable-transaction-option| replace:: :meth:`Transaction.options.set_snapshot_ryw_disable` .. |causal-read-risky-transaction-option| replace:: :meth:`Transaction.options.set_causal_read_risky` -.. |include-port-in-address-transaction-option| replace:: :meth:`Transaction.options.set_include_port_in_address` .. |transaction-logging-max-field-length-transaction-option| replace:: :meth:`Transaction.options.set_transaction_logging_max_field_length` .. |lazy-iterator-object| replace:: :class:`Enumerator` .. |key-meth| replace:: :meth:`Subspace.key` @@ -128,6 +126,10 @@ After requiring the ``FDB`` gem and selecting an API version, you probably want |option-trace-format-blurb| + .. method:: FDB.options.set_trace_clock_source(source) -> nil + + |option-trace-clock-source-blurb| + .. method:: FDB.options.set_disable_multi_version_client_api() -> nil |option-disable-multi-version-client-api| @@ -211,21 +213,21 @@ Key selectors Creates a key selector with the given reference key, equality flag, and offset. It is usually more convenient to obtain a key selector with one of the following methods: - .. classmethod:: last_less_than(key) -> KeySelector + .. classmethod:: last_less_than(key) -> KeySelector - Returns a key selector referencing the last (greatest) key in the database less than the specified key. + Returns a key selector referencing the last (greatest) key in the database less than the specified key. - .. classmethod:: KeySelector.last_less_or_equal(key) -> KeySelector + .. classmethod:: KeySelector.last_less_or_equal(key) -> KeySelector - Returns a key selector referencing the last (greatest) key less than, or equal to, the specified key. + Returns a key selector referencing the last (greatest) key less than, or equal to, the specified key. - .. classmethod:: KeySelector.first_greater_than(key) -> KeySelector + .. classmethod:: KeySelector.first_greater_than(key) -> KeySelector - Returns a key selector referencing the first (least) key greater than the specified key. + Returns a key selector referencing the first (least) key greater than the specified key. - .. classmethod:: KeySelector.first_greater_or_equal(key) -> KeySelector + .. classmethod:: KeySelector.first_greater_or_equal(key) -> KeySelector - Returns a key selector referencing the first key greater than, or equal to, the specified key. + Returns a key selector referencing the first key greater than, or equal to, the specified key. .. method:: KeySelector.+(offset) -> KeySelector @@ -281,16 +283,16 @@ A |database-blurb1| |database-blurb2| The ``options`` hash accepts the following optional parameters: - ``:limit`` - Only the first ``limit`` keys (and their values) in the range will be returned. + ``:limit`` + Only the first ``limit`` keys (and their values) in the range will be returned. - ``:reverse`` - If ``true``, then the keys in the range will be returned in reverse order. + ``:reverse`` + If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost. - If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order. + If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order. - ``:streaming_mode`` - A valid |streaming-mode|, which provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently. + ``:streaming_mode`` + A valid |streaming-mode|, which provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently. .. method:: Database.get_range(begin, end, options={}) {|kv| block } -> nil @@ -392,10 +394,6 @@ Database options |option-db-causal-read-risky-blurb| -.. method:: Database.options.set_transaction_include_port_in_address() -> nil - - |option-db-include-port-in-address-blurb| - .. method:: Database.options.set_transaction_logging_max_field_length(size_limit) -> nil |option-db-tr-transaction-logging-max-field-length-blurb| @@ -459,16 +457,16 @@ Reading data The ``options`` hash accepts the following optional parameters: - ``:limit`` - Only the first ``limit`` keys (and their values) in the range will be returned. + ``:limit`` + Only the first ``limit`` keys (and their values) in the range will be returned. - ``:reverse`` - If true, then the keys in the range will be returned in reverse order. + ``:reverse`` + If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost. - If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order. + If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order. - ``:streaming_mode`` - A valid |streaming-mode|, which provides a hint to FoundationDB about how the returned enumerable is likely to be used. The default is ``:iterator``. + ``:streaming_mode`` + A valid |streaming-mode|, which provides a hint to FoundationDB about how the returned enumerable is likely to be used. The default is ``:iterator``. .. method:: Transaction.get_range(begin, end, options={}) {|kv| block } -> nil @@ -771,10 +769,6 @@ Transaction options |option-causal-read-risky-blurb| -.. method:: Transaction.options.set_include_port_in_address() -> nil - - |option-include-port-in-address-blurb| - .. method:: Transaction.options.set_causal_write_risky() -> nil |option-causal-write-risky-blurb| diff --git a/documentation/sphinx/source/backups.rst b/documentation/sphinx/source/backups.rst index 6a48fae42e..1a30a6e4b1 100644 --- a/documentation/sphinx/source/backups.rst +++ b/documentation/sphinx/source/backups.rst @@ -31,7 +31,7 @@ While a cluster is being used as the destination for a DR operation it will be l Limitations =========== -Backup data is not encrypted on disk, in a blob store account, or in transit to a destination blob store account or database. +Backup data is not encrypted at rest on disk or in a blob store account. Tools =========== @@ -159,15 +159,14 @@ The Blob Credential File format is JSON with the following schema: } } -SSL Support +TLS Support =========== -By default, backup will communicate over https. To configure https, the following environment variables are used: +In-flight traffic for blob store or disaster recovery backups can be encrypted with the following environment variables. They are also offered as command-line flags or can be specified in ``foundationdb.conf`` for backup agents. ============================ ==================================================== Environment Variable Purpose ============================ ==================================================== -``FDB_TLS_PLUGIN`` Path to the file to be loaded as the TLS plugin ``FDB_TLS_CERTIFICATE_FILE`` Path to the file from which the local certificates can be loaded, used by the plugin ``FDB_TLS_KEY_FILE`` Path to the file from which to load the private @@ -177,8 +176,11 @@ Environment Variable Purpose ``FDB_TLS_CA_FILE`` Path to the file containing the CA certificates to trust. Specify to override the default openssl location. +``FDB_TLS_VERIFY_PEERS`` The byte-string for the verification of peer + certificates and sessions. ============================ ==================================================== +Blob store backups can be configured to use HTTPS/TLS by setting the ``secure_connection`` or ``sc`` backup URL option to ``1``, which is the default. Disaster recovery backups are secured by using TLS for both the source and target clusters and setting the TLS options for the ``fdbdr`` and ``dr_agent`` commands. ``fdbbackup`` command line tool =============================== diff --git a/documentation/sphinx/source/cap-theorem.rst b/documentation/sphinx/source/cap-theorem.rst index c5c3c64d55..42942d2f8c 100644 --- a/documentation/sphinx/source/cap-theorem.rst +++ b/documentation/sphinx/source/cap-theorem.rst @@ -9,9 +9,9 @@ What is the CAP Theorem? In 2000, Eric Brewer conjectured that a distributed system cannot simultaneously provide all three of the following desirable properties: - * Consistency: A read sees all previously completed writes. - * Availability: Reads and writes always succeed. - * Partition tolerance: Guaranteed properties are maintained even when network failures prevent some machines from communicating with others. +* Consistency: A read sees all previously completed writes. +* Availability: Reads and writes always succeed. +* Partition tolerance: Guaranteed properties are maintained even when network failures prevent some machines from communicating with others. In 2002, Gilbert and Lynch proved this in the asynchronous and partially synchronous network models, so it is now commonly called the `CAP Theorem `_. diff --git a/documentation/sphinx/source/configuration.rst b/documentation/sphinx/source/configuration.rst index 6c0b6e5cf0..671141313d 100644 --- a/documentation/sphinx/source/configuration.rst +++ b/documentation/sphinx/source/configuration.rst @@ -27,11 +27,11 @@ System requirements * Or, an unsupported Linux distribution with: * Kernel version between 2.6.33 and 3.0.x (inclusive) or 3.7 or greater - * Works with .deb or .rpm packages + * Preferably .deb or .rpm package support * Or, macOS 10.7 or later - .. warning:: The macOS version of the FoundationDB server is intended for use on locally accessible development machines only. Other uses are not supported. + .. warning:: The macOS and Windows versions of the FoundationDB server are intended for use on locally accessible development machines only. Other uses are not supported. * 4GB **ECC** RAM (per fdbserver process) * Storage @@ -387,6 +387,8 @@ FoundationDB will never use processes on the same machine for the replication of FoundationDB replicates data to three machines, and at least three available machines are required to make progress. This is the recommended mode for a cluster of five or more machines in a single datacenter. + .. note:: When running in cloud environments with managed disks that are already replicated and persistent, ``double`` replication may still be considered for 5+ machine clusters. This will result in lower availability fault tolerance for planned or unplanned failures and lower total read throughput, but offers a reasonable tradeoff for cost. + ``three_data_hall`` mode FoundationDB stores data in triplicate, with one copy on a storage server in each of three data halls. The transaction logs are replicated four times, with two data halls containing two replicas apiece. Four available machines (two in each of two data halls) are therefore required to make progress. This configuration enables the cluster to remain available after losing a single data hall and one machine in another data hall. @@ -395,7 +397,7 @@ Datacenter-aware mode In addition to the more commonly used modes listed above, this version of FoundationDB has support for redundancy across multiple datacenters. - .. note:: When using the datacenter-aware mode, all ``fdbserver`` processes should be passed a valid datacenter identifier on the command line. +.. note:: When using the datacenter-aware mode, all ``fdbserver`` processes should be passed a valid datacenter identifier on the command line. ``three_datacenter`` mode *(for 5+ machines in 3 datacenters)* @@ -622,23 +624,23 @@ The ``satellite_redundancy_mode`` is configured per region, and specifies how ma ``one_satellite_single`` mode - Keep one copy of the mutation log in the satellite datacenter with the highest priority. If the highest priority satellite is unavailable it will put the transaction log in the satellite datacenter with the next highest priority. +Keep one copy of the mutation log in the satellite datacenter with the highest priority. If the highest priority satellite is unavailable it will put the transaction log in the satellite datacenter with the next highest priority. ``one_satellite_double`` mode - Keep two copies of the mutation log in the satellite datacenter with the highest priority. +Keep two copies of the mutation log in the satellite datacenter with the highest priority. ``one_satellite_triple`` mode - Keep three copies of the mutation log in the satellite datacenter with the highest priority. +Keep three copies of the mutation log in the satellite datacenter with the highest priority. ``two_satellite_safe`` mode - Keep two copies of the mutation log in each of the two satellite datacenters with the highest priorities, for a total of four copies of each mutation. This mode will protect against the simultaneous loss of both the primary and one of the satellite datacenters. If only one satellite is available, it will fall back to only storing two copies of the mutation log in the remaining datacenter. +Keep two copies of the mutation log in each of the two satellite datacenters with the highest priorities, for a total of four copies of each mutation. This mode will protect against the simultaneous loss of both the primary and one of the satellite datacenters. If only one satellite is available, it will fall back to only storing two copies of the mutation log in the remaining datacenter. ``two_satellite_fast`` mode - Keep two copies of the mutation log in each of the two satellite datacenters with the highest priorities, for a total of four copies of each mutation. FoundationDB will only synchronously wait for one of the two satellite datacenters to make the mutations durable before considering a commit successful. This will reduce tail latencies caused by network issues between datacenters. If only one satellite is available, it will fall back to only storing two copies of the mutation log in the remaining datacenter. +Keep two copies of the mutation log in each of the two satellite datacenters with the highest priorities, for a total of four copies of each mutation. FoundationDB will only synchronously wait for one of the two satellite datacenters to make the mutations durable before considering a commit successful. This will reduce tail latencies caused by network issues between datacenters. If only one satellite is available, it will fall back to only storing two copies of the mutation log in the remaining datacenter. .. warning:: In release 6.0 this is implemented by waiting for all but 2 of the transaction logs. If ``satellite_logs`` is set to more than 4, FoundationDB will still need to wait for replies from both datacenters. @@ -696,17 +698,17 @@ Migrating a database to use a region configuration To configure an existing database to regions, do the following steps: - 1. Ensure all processes have their dcid locality set on the command line. All processes should exist in the same datacenter. If converting from a ``three_datacenter`` configuration, first configure down to using a single datacenter by changing the replication mode. Then exclude the machines in all datacenters but the one that will become the initial active region. +1. Ensure all processes have their dcid locality set on the command line. All processes should exist in the same datacenter. If converting from a ``three_datacenter`` configuration, first configure down to using a single datacenter by changing the replication mode. Then exclude the machines in all datacenters but the one that will become the initial active region. - 2. Configure the region configuration. The datacenter with all the existing processes should have a non-negative priority. The region which will eventually store the remote replica should be added with a negative priority. +2. Configure the region configuration. The datacenter with all the existing processes should have a non-negative priority. The region which will eventually store the remote replica should be added with a negative priority. - 3. Add processes to the cluster in the remote region. These processes will not take data yet, but need to be added to the cluster. If they are added before the region configuration is set they will be assigned data like any other FoundationDB process, which will lead to high latencies. +3. Add processes to the cluster in the remote region. These processes will not take data yet, but need to be added to the cluster. If they are added before the region configuration is set they will be assigned data like any other FoundationDB process, which will lead to high latencies. - 4. Configure ``usable_regions=2``. This will cause the cluster to start copying data between the regions. +4. Configure ``usable_regions=2``. This will cause the cluster to start copying data between the regions. - 5. Watch ``status`` and wait until data movement is complete. This will signal that the remote datacenter has a full replica of all of the data in the database. +5. Watch ``status`` and wait until data movement is complete. This will signal that the remote datacenter has a full replica of all of the data in the database. - 6. Change the region configuration to have a non-negative priority for the primary datacenters in both regions. This will enable automatic failover between regions. +6. Change the region configuration to have a non-negative priority for the primary datacenters in both regions. This will enable automatic failover between regions. Handling datacenter failures ---------------------------- @@ -717,9 +719,9 @@ When a primary datacenter fails, the cluster will go into a degraded state. It w To drop the dead datacenter do the following steps: - 1. Configure the region configuration so that the dead datacenter has a negative priority. +1. Configure the region configuration so that the dead datacenter has a negative priority. - 2. Configure ``usable_regions=1``. +2. Configure ``usable_regions=1``. If you are running in a configuration without a satellite datacenter, or you have lost all machines in a region simultaneously, the ``force_recovery_with_data_loss`` command from ``fdbcli`` allows you to force a recovery to the other region. This will discard the portion of the mutation log which did not make it across the WAN. Once the database has recovered, immediately follow the previous steps to drop the dead region the normal way. @@ -728,13 +730,10 @@ Region change safety The steps described above for both adding and removing replicas are enforced by ``fdbcli``. The following are the specific conditions checked by ``fdbcli``: - * You cannot change the ``regions`` configuration while also changing ``usable_regions``. - - * You can only change ``usable_regions`` when exactly one region has priority >= 0. - - * When ``usable_regions`` > 1, all regions with priority >= 0 must have a full replica of the data. - - * All storage servers must be in one of the regions specified by the region configuration. +* You cannot change the ``regions`` configuration while also changing ``usable_regions``. +* You can only change ``usable_regions`` when exactly one region has priority >= 0. +* When ``usable_regions`` > 1, all regions with priority >= 0 must have a full replica of the data. +* All storage servers must be in one of the regions specified by the region configuration. Monitoring ---------- @@ -768,13 +767,10 @@ Region configuration is better in almost all ways than the ``three_datacenter`` Known limitations ----------------- -The 6.0 release still has a number of rough edges related to region configuration. This is a collection of all the issues that have been pointed out in the sections above. These issues should be significantly improved in future releases of FoundationDB: +The 6.2 release still has a number of rough edges related to region configuration. This is a collection of all the issues that have been pointed out in the sections above. These issues should be significantly improved in future releases of FoundationDB: - * FoundationDB supports replicating data to at most two regions. - - * ``two_satellite_fast`` does not hide latency properly when configured with more than 4 satellite transaction logs. - - * While a datacenter has failed, the maximum write throughput of the cluster will be roughly 1/3 of normal performance. +* FoundationDB supports replicating data to at most two regions. +* ``two_satellite_fast`` does not hide latency properly when configured with more than 4 satellite transaction logs. .. _guidelines-process-class-config: diff --git a/documentation/sphinx/source/data-modeling.rst b/documentation/sphinx/source/data-modeling.rst index 24f3aad203..e039250f68 100644 --- a/documentation/sphinx/source/data-modeling.rst +++ b/documentation/sphinx/source/data-modeling.rst @@ -53,8 +53,6 @@ .. |timeout-database-option| replace:: FIXME .. |causal-read-risky-database-option| replace:: FIXME .. |causal-read-risky-transaction-option| replace:: FIXME -.. |include-port-in-address-database-option| replace:: FIXME -.. |include-port-in-address-transaction-option| replace:: FIXME .. |transaction-logging-max-field-length-transaction-option| replace:: FIXME .. |transaction-logging-max-field-length-database-option| replace:: FIXME @@ -269,6 +267,18 @@ Using the table name as the subspace, we could implement the common row-oriented cols[c] = v return cols + +Versionstamps +------------- + +A common data model is to index your data with a sequencing prefix to allow log scans or tails of recent data. This index requires a unique, monotonically increasing value, like an AUTO_INCREMENT PRIMARY KEY in SQL. This could be implemented at the client level by reading the value for conflict checks before every increment. A better solution is the versionstamp, which can be generated at commit-time with no read conflict ranges, providing a unique sequence ID in a single conflict-free write. + +Versioning commits provides FoundationDB with MVCC guarantees and transactional integrity. Versionstamps write the transaction's commit version as a value to an arbitrary key as part of the same transaction, allowing the client to leverage the version's unique and serial properties. Because the versionstamp is generated at commit-time, the versionstamped key cannot be read in the same transaction that it is written, and the versionstamp's value will be unknown until the transaction is committed. After the transaction is committed, the versionstamp can be obtained. + +The versionstamp guarantees uniqueness and monotonically increasing values for the entire lifetime of a single FDB cluster. This is even true if the cluster is restored from a backup, as a restored cluster will begin at a higher version than when the backup was taken. Special care must be taken when moving data between two FoundationDB clusters containing versionstamps, as the differing cluster versions might break the monotonicity. + +There are two concepts of versionstamp depending on your context. At the fdb_c client level, or any binding outside of the Tuple layer, the 'versionstamp' is 10 bytes: the transaction's commit version (8 bytes) and transaction batch order (2 bytes). The user can manually add 2 additional bytes to provide application level ordering. The tuple layer provides a useful api for getting and setting both the 10 byte system version and the 2 byte user version. In the context of the Tuple layer, the 'versionstamp' is all 12 bytes. For examples on how to use the versionstamp in the python binding, see the :doc:`api-python` documentation. + .. _data-modeling-entity-relationship: Entity-relationship models @@ -531,25 +541,25 @@ How you map your application data to keys and values can have a dramatic impact * Structure keys so that range reads can efficiently retrieve the most frequently accessed data. - * If you perform a range read that is, in total, much more than 1 kB, try to restrict your range as much as you can while still retrieving the needed data. + * If you perform a range read that is, in total, much more than 1 kB, try to restrict your range as much as you can while still retrieving the needed data. * Structure keys so that no single key needs to be updated too frequently, which can cause transaction conflicts. - * If a key is updated more than 10-100 times per second, try to split it into multiple keys. - * For example, if a key is storing a counter, split the counter into N separate counters that are randomly incremented by clients. The total value of the counter can then read by adding up the N individual ones. + * If a key is updated more than 10-100 times per second, try to split it into multiple keys. + * For example, if a key is storing a counter, split the counter into N separate counters that are randomly incremented by clients. The total value of the counter can then read by adding up the N individual ones. * Keep key sizes small. - * Try to keep key sizes below 1 kB. (Performance will be best with key sizes below 32 bytes and *cannot* be more than 10 kB.) - * When using the tuple layer to encode keys (as is recommended), select short strings or small integers for tuple elements. Small integers will encode to just two bytes. - * If your key sizes are above 1 kB, try either to move data from the key to the value, split the key into multiple keys, or encode the parts of the key more efficiently (remembering to preserve any important ordering). + * Try to keep key sizes below 1 kB. (Performance will be best with key sizes below 32 bytes and *cannot* be more than 10 kB.) + * When using the tuple layer to encode keys (as is recommended), select short strings or small integers for tuple elements. Small integers will encode to just two bytes. + * If your key sizes are above 1 kB, try either to move data from the key to the value, split the key into multiple keys, or encode the parts of the key more efficiently (remembering to preserve any important ordering). * Keep value sizes moderate. - * Try to keep value sizes below 10 kB. (Value sizes *cannot* be more than 100 kB.) - * If your value sizes are above 10 kB, consider splitting the value across multiple keys. - * If you read values with sizes above 1 kB but use only a part of each value, consider splitting the values using multiple keys. - * If you frequently perform individual reads on a set of values that total to fewer than 200 bytes, try either to combine the values into a single value or to store the values in adjacent keys and use a range read. + * Try to keep value sizes below 10 kB. (Value sizes *cannot* be more than 100 kB.) + * If your value sizes are above 10 kB, consider splitting the value across multiple keys. + * If you read values with sizes above 1 kB but use only a part of each value, consider splitting the values using multiple keys. + * If you frequently perform individual reads on a set of values that total to fewer than 200 bytes, try either to combine the values into a single value or to store the values in adjacent keys and use a range read. Large Values and Blobs ---------------------- diff --git a/documentation/sphinx/source/developer-guide.rst b/documentation/sphinx/source/developer-guide.rst index 5cda16c32a..ef1496f5c8 100644 --- a/documentation/sphinx/source/developer-guide.rst +++ b/documentation/sphinx/source/developer-guide.rst @@ -53,8 +53,6 @@ .. |timeout-database-option| replace:: FIXME .. |causal-read-risky-database-option| replace:: FIXME .. |causal-read-risky-transaction-option| replace:: FIXME -.. |include-port-in-address-database-option| replace:: FIXME -.. |include-port-in-address-transaction-option| replace:: FIXME .. |transaction-logging-max-field-length-transaction-option| replace:: FIXME .. |transaction-logging-max-field-length-database-option| replace:: FIXME diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst index c8eea1bf86..b375025a59 100644 --- a/documentation/sphinx/source/downloads.rst +++ b/documentation/sphinx/source/downloads.rst @@ -10,38 +10,38 @@ macOS The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. -* `FoundationDB-6.2.15.pkg `_ +* `FoundationDB-6.2.16.pkg `_ Ubuntu ------ The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. -* `foundationdb-clients-6.2.15-1_amd64.deb `_ -* `foundationdb-server-6.2.15-1_amd64.deb `_ (depends on the clients package) +* `foundationdb-clients-6.2.16-1_amd64.deb `_ +* `foundationdb-server-6.2.16-1_amd64.deb `_ (depends on the clients package) RHEL/CentOS EL6 --------------- The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. -* `foundationdb-clients-6.2.15-1.el6.x86_64.rpm `_ -* `foundationdb-server-6.2.15-1.el6.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.2.16-1.el6.x86_64.rpm `_ +* `foundationdb-server-6.2.16-1.el6.x86_64.rpm `_ (depends on the clients package) RHEL/CentOS EL7 --------------- The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. -* `foundationdb-clients-6.2.15-1.el7.x86_64.rpm `_ -* `foundationdb-server-6.2.15-1.el7.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.2.16-1.el7.x86_64.rpm `_ +* `foundationdb-server-6.2.16-1.el7.x86_64.rpm `_ (depends on the clients package) Windows ------- The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. -* `foundationdb-6.2.15-x64.msi `_ +* `foundationdb-6.2.16-x64.msi `_ API Language Bindings ===================== @@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package: -* `foundationdb-6.2.15.tar.gz `_ +* `foundationdb-6.2.16.tar.gz `_ Ruby 1.9.3/2.0.0+ ----------------- -* `fdb-6.2.15.gem `_ +* `fdb-6.2.16.gem `_ Java 8+ ------- -* `fdb-java-6.2.15.jar `_ -* `fdb-java-6.2.15-javadoc.jar `_ +* `fdb-java-6.2.16.jar `_ +* `fdb-java-6.2.16-javadoc.jar `_ Go 1.11+ -------- diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index c30f9932db..c7e52d5e7b 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -557,6 +557,7 @@ "auto_proxies":3, "auto_resolvers":1, "auto_logs":3, + "backup_worker_enabled":1, "proxies":5 // this field will be absent if a value has not been explicitly set }, "data":{ diff --git a/documentation/sphinx/source/old-release-notes/release-notes-100.rst b/documentation/sphinx/source/old-release-notes/release-notes-100.rst index e313cc9fc1..e82bf84069 100644 --- a/documentation/sphinx/source/old-release-notes/release-notes-100.rst +++ b/documentation/sphinx/source/old-release-notes/release-notes-100.rst @@ -5,7 +5,7 @@ Release Notes 1.0.1 ===== - * Fix segmentation fault in client when there are a very large number of dependent operations in a transaction and certain errors occur. +* Fix segmentation fault in client when there are a very large number of dependent operations in a transaction and certain errors occur. 1.0.0 ===== @@ -21,34 +21,34 @@ There are only minor technical differences between this release and the 0.3.0 re Java ---- - * ``clear(Range)`` replaces the now deprecated ``clearRangeStartsWith()``. +* ``clear(Range)`` replaces the now deprecated ``clearRangeStartsWith()``. Python ------ - * Windows installer supports Python 3. +* Windows installer supports Python 3. Node and Ruby ------------- - * String option parameters are converted to UTF-8. +* String option parameters are converted to UTF-8. All --- - * API version updated to 100. See the :ref:`API version upgrade guide ` for upgrade details. - * Runs on Mac OS X 10.7. - * Improvements to installation packages, including package paths and directory modes. - * Eliminated cases of excessive resource usage in the locality API. - * Watches are disabled when read-your-writes functionality is disabled. - * Fatal error paths now call ``_exit()`` instead instead of ``exit()``. +* API version updated to 100. See the :ref:`API version upgrade guide ` for upgrade details. +* Runs on Mac OS X 10.7. +* Improvements to installation packages, including package paths and directory modes. +* Eliminated cases of excessive resource usage in the locality API. +* Watches are disabled when read-your-writes functionality is disabled. +* Fatal error paths now call ``_exit()`` instead instead of ``exit()``. Fixes ----- - * A few Python API entry points failed to respect the ``as_foundationdb_key()`` convenience interface. - * ``fdbcli`` could print commit version numbers incorrectly in Windows. - * Multiple watches set on the same key were not correctly triggered by a subsequent write in the same transaction. +* A few Python API entry points failed to respect the ``as_foundationdb_key()`` convenience interface. +* ``fdbcli`` could print commit version numbers incorrectly in Windows. +* Multiple watches set on the same key were not correctly triggered by a subsequent write in the same transaction. Earlier release notes --------------------- diff --git a/documentation/sphinx/source/old-release-notes/release-notes-600.rst b/documentation/sphinx/source/old-release-notes/release-notes-600.rst index 01a4e07f07..a934dcae29 100644 --- a/documentation/sphinx/source/old-release-notes/release-notes-600.rst +++ b/documentation/sphinx/source/old-release-notes/release-notes-600.rst @@ -156,6 +156,7 @@ Other Changes * Does not support upgrades from any version older than 5.0. * Normalized the capitalization of trace event names and attributes. `(PR #455) `_ +* Various stateless processes now have a higher affinity for running on processes with unset process class, which may result in those roles changing location upon upgrade. See :ref:`version-specific-upgrading` for details. `(PR #526) `_ * Increased the memory requirements of the transaction log by 400MB. [6.0.5] `(PR #673) `_ Earlier release notes diff --git a/documentation/sphinx/source/old-release-notes/release-notes-620.rst b/documentation/sphinx/source/old-release-notes/release-notes-620.rst index be9517818d..4f86c1dab2 100644 --- a/documentation/sphinx/source/old-release-notes/release-notes-620.rst +++ b/documentation/sphinx/source/old-release-notes/release-notes-620.rst @@ -2,6 +2,14 @@ Release Notes ############# +6.2.16 +====== + +Fixes +----- + +* Storage servers could fail to advance their version correctly in response to empty commits. `(PR #2617) `_. + 6.2.15 ====== diff --git a/documentation/sphinx/source/tls.rst b/documentation/sphinx/source/tls.rst index d527f8887c..bfdac3fc88 100644 --- a/documentation/sphinx/source/tls.rst +++ b/documentation/sphinx/source/tls.rst @@ -128,9 +128,9 @@ Certificate file default location The default behavior when the certificate or key file is not specified is to look for a file named ``fdb.pem`` in the current working directory. If this file is not present, an attempt is made to load a file from a system-dependent location as follows: - * Linux: ``/etc/foundationdb/fdb.pem`` - * macOS: ``/usr/local/etc/foundationdb/fdb.pem`` - * Windows: ``C:\ProgramData\foundationdb\fdb.pem`` +* Linux: ``/etc/foundationdb/fdb.pem`` +* macOS: ``/usr/local/etc/foundationdb/fdb.pem`` +* Windows: ``C:\ProgramData\foundationdb\fdb.pem`` Default Peer Verification ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -152,9 +152,9 @@ Automatic TLS certificate refresh The TLS certificate will be automatically refreshed on a configurable cadence. The server will inspect the CA, certificate, and key files in the specified locations periodically, and will begin using the new versions if following criterion were met: - * They are changed, judging by the last modified time. - * They are valid certificates. - * The key file matches the certificate file. +* They are changed, judging by the last modified time. +* They are valid certificates. +* The key file matches the certificate file. The refresh rate is controlled by ``--knob_tls_cert_refresh_delay_seconds``. Setting it to 0 will disable the refresh. diff --git a/fdbbackup/FileConverter.actor.cpp b/fdbbackup/FileConverter.actor.cpp index 424139d38c..46beea723b 100644 --- a/fdbbackup/FileConverter.actor.cpp +++ b/fdbbackup/FileConverter.actor.cpp @@ -172,7 +172,7 @@ struct MutationFilesReadProgress : public ReferenceCounted()); const uint8_t* message = reader.consume(msgSize); - BinaryReader rd(message, msgSize, AssumeVersion(currentProtocolVersion)); + ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(currentProtocolVersion)); MutationRef m; rd >> m; count++; @@ -468,7 +468,7 @@ ACTOR Future convert(ConvertParams params) { arena = Arena(); } - BinaryReader rd(data.message, AssumeVersion(currentProtocolVersion)); + ArenaReader rd(data.arena, data.message, AssumeVersion(currentProtocolVersion)); MutationRef m; rd >> m; std::cout << data.version.toString() << " m = " << m.toString() << "\n"; diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 3ce4989d26..a43ce79578 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -37,7 +37,6 @@ #include "fdbclient/json_spirit/json_spirit_writer_template.h" #include "fdbrpc/Platform.h" -#include "fdbrpc/TLSConnection.h" #include #include @@ -2204,6 +2203,7 @@ ACTOR Future runFastRestoreAgent(Database db, std::string tagName, std::st if (performRestore) { if (dbVersion == invalidVersion) { + TraceEvent("FastRestoreAgent").detail("TargetRestoreVersion", "Largest restorable version"); BackupDescription desc = wait(IBackupContainer::openContainer(container)->describeBackup()); if (!desc.maxRestorableVersion.present()) { fprintf(stderr, "The specified backup is not restorable to any version.\n"); @@ -2211,6 +2211,7 @@ ACTOR Future runFastRestoreAgent(Database db, std::string tagName, std::st } dbVersion = desc.maxRestorableVersion.get(); + TraceEvent("FastRestoreAgent").detail("TargetRestoreVersion", dbVersion); } Version _restoreVersion = wait(fastRestore(db, KeyRef(tagName), KeyRef(container), waitForDone, dbVersion, verbose, range, KeyRef(addPrefix), KeyRef(removePrefix))); @@ -3223,22 +3224,22 @@ int main(int argc, char* argv[]) { blobCredentials.push_back(args->OptionArg()); break; #ifndef TLS_DISABLED - case TLSOptions::OPT_TLS_PLUGIN: + case TLSParams::OPT_TLS_PLUGIN: args->OptionArg(); break; - case TLSOptions::OPT_TLS_CERTIFICATES: + case TLSParams::OPT_TLS_CERTIFICATES: tlsCertPath = args->OptionArg(); break; - case TLSOptions::OPT_TLS_PASSWORD: + case TLSParams::OPT_TLS_PASSWORD: tlsPassword = args->OptionArg(); break; - case TLSOptions::OPT_TLS_CA_FILE: + case TLSParams::OPT_TLS_CA_FILE: tlsCAPath = args->OptionArg(); break; - case TLSOptions::OPT_TLS_KEY: + case TLSParams::OPT_TLS_KEY: tlsKeyPath = args->OptionArg(); break; - case TLSOptions::OPT_TLS_VERIFY_PEERS: + case TLSParams::OPT_TLS_VERIFY_PEERS: tlsVerifyPeers = args->OptionArg(); break; #endif @@ -3853,6 +3854,13 @@ int main(int argc, char* argv[]) { } catch (Error& e) { TraceEvent(SevError, "MainError").error(e); status = FDB_EXIT_MAIN_ERROR; + } catch (boost::system::system_error& e) { + if (g_network) { + TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what()); + } else { + fprintf(stderr, "ERROR: %s (%d)\n", e.what(), e.code().value()); + } + status = FDB_EXIT_MAIN_EXCEPTION; } catch (std::exception& e) { TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what()); status = FDB_EXIT_MAIN_EXCEPTION; diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index b22f5907fc..bf16223cc4 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -32,7 +32,6 @@ #include "fdbclient/FDBOptions.g.h" #include "flow/DeterministicRandom.h" -#include "fdbrpc/TLSConnection.h" #include "fdbrpc/Platform.h" #include "flow/SimpleOpt.h" @@ -1383,7 +1382,7 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, NetworkAddress parsedAddress; try { parsedAddress = NetworkAddress::parse(address); - } catch (Error& e) { + } catch (Error&) { // Groups all invalid IP address/port pair in the end of this detail group. line = format(" %-22s (invalid IP address or port)", address.c_str()); IPAddress::IPAddressStore maxIp; @@ -1602,9 +1601,9 @@ ACTOR Future timeWarning( double when, const char* msg ) { return Void(); } -ACTOR Future checkStatus(Future f, Reference clusterFile, bool displayDatabaseAvailable = true) { +ACTOR Future checkStatus(Future f, Database db, bool displayDatabaseAvailable = true) { wait(f); - StatusObject s = wait(StatusClient::statusFetcher(clusterFile)); + StatusObject s = wait(StatusClient::statusFetcher(db)); printf("\n"); printStatus(s, StatusClient::MINIMAL, displayDatabaseAvailable); printf("\n"); @@ -1646,7 +1645,7 @@ ACTOR Future configure( Database db, std::vector tokens, Refere state Optional conf; if( tokens[startToken] == LiteralStringRef("auto") ) { - StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( ccf )) ); + StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( db )) ); if(warn.isValid()) warn.cancel(); @@ -1776,6 +1775,10 @@ ACTOR Future configure( Database db, std::vector tokens, Refere printf("Configuration changed\n"); ret=false; break; + case ConfigurationResult::LOCKED_NOT_NEW: + printf("ERROR: `only new databases can be configured as locked`\n"); + ret = true; + break; default: ASSERT(false); ret=true; @@ -1916,10 +1919,10 @@ ACTOR Future fileConfigure(Database db, std::string filePath, bool isNewDa ACTOR Future coordinators( Database db, std::vector tokens, bool isClusterTLS ) { state StringRef setName; StringRef nameTokenBegin = LiteralStringRef("description="); - for(auto t = tokens.begin()+1; t != tokens.end(); ++t) - if (t->startsWith(nameTokenBegin)) { - setName = t->substr(nameTokenBegin.size()); - std::copy( t+1, tokens.end(), t ); + for(auto tok = tokens.begin()+1; tok != tokens.end(); ++tok) + if (tok->startsWith(nameTokenBegin)) { + setName = tok->substr(nameTokenBegin.size()); + std::copy( tok+1, tokens.end(), tok ); tokens.resize( tokens.size()-1 ); break; } @@ -2091,7 +2094,7 @@ ACTOR Future exclude( Database db, std::vector tokens, Referenc return true; } } - StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( ccf ) ) ); + StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( db ) ) ); state std::string errorString = "ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n" "Please try the exclude again in 30 seconds.\n" @@ -2537,22 +2540,22 @@ struct CLIOptions { #ifndef TLS_DISABLED // TLS Options - case TLSOptions::OPT_TLS_PLUGIN: + case TLSParams::OPT_TLS_PLUGIN: args.OptionArg(); break; - case TLSOptions::OPT_TLS_CERTIFICATES: + case TLSParams::OPT_TLS_CERTIFICATES: tlsCertPath = args.OptionArg(); break; - case TLSOptions::OPT_TLS_CA_FILE: + case TLSParams::OPT_TLS_CA_FILE: tlsCAPath = args.OptionArg(); break; - case TLSOptions::OPT_TLS_KEY: + case TLSParams::OPT_TLS_KEY: tlsKeyPath = args.OptionArg(); break; - case TLSOptions::OPT_TLS_PASSWORD: + case TLSParams::OPT_TLS_PASSWORD: tlsPassword = args.OptionArg(); break; - case TLSOptions::OPT_TLS_VERIFY_PEERS: + case TLSParams::OPT_TLS_VERIFY_PEERS: tlsVerifyPeers = args.OptionArg(); break; #endif @@ -2603,7 +2606,7 @@ ACTOR Future addInterface( std::mapCLI_CONNECT_TIMEOUT)) ) {} } return Void(); } @@ -2666,7 +2669,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { if (!opt.exec.present()) { if(opt.initialStatusCheck) { - Future checkStatusF = checkStatus(Void(), db->getConnectionFile()); + Future checkStatusF = checkStatus(Void(), db); wait(makeInterruptable(success(checkStatusF))); } else { @@ -2704,7 +2707,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { linenoise.historyAdd(line); } - warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db->getConnectionFile()); + warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db); try { state UID randomID = deterministicRandom()->randomUniqueID(); @@ -2849,7 +2852,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { continue; } - StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db->getConnectionFile()))); + StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db))); if (!opt.exec.present()) printf("\n"); printStatus(s, level); @@ -3787,5 +3790,8 @@ int main(int argc, char **argv) { } catch (Error& e) { printf("ERROR: %s (%d)\n", e.what(), e.code()); return 1; + } catch (boost::system::system_error& e) { + printf("ERROR: %s (%d)\n", e.what(), e.code().value()); + return 1; } } diff --git a/fdbclient/Atomic.h b/fdbclient/Atomic.h index 3e5a71abff..f32950cc70 100644 --- a/fdbclient/Atomic.h +++ b/fdbclient/Atomic.h @@ -24,15 +24,15 @@ #include "fdbclient/CommitTransaction.h" -static ValueRef doLittleEndianAdd(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doLittleEndianAdd(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { const ValueRef& existingValue = existingValueOptional.present() ? existingValueOptional.get() : StringRef(); if(!existingValue.size()) return otherOperand; if(!otherOperand.size()) return otherOperand; - + uint8_t* buf = new (ar) uint8_t [otherOperand.size()]; int i = 0; int carry = 0; - + for(i = 0; i& existingValueOptiona carry = sum >> 8; } - return StringRef(buf, i); + return StringRef(buf, i); } -static ValueRef doAnd(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doAnd(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { const ValueRef& existingValue = existingValueOptional.present() ? existingValueOptional.get() : StringRef(); if(!otherOperand.size()) return otherOperand; - + uint8_t* buf = new (ar) uint8_t [otherOperand.size()]; int i = 0; - + for(i = 0; i& existingValueOptional, const Val return StringRef(buf, i); } -static ValueRef doAndV2(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doAndV2(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { if (!existingValueOptional.present()) return otherOperand; return doAnd(existingValueOptional, otherOperand, ar); } -static ValueRef doOr(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doOr(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { const ValueRef& existingValue = existingValueOptional.present() ? existingValueOptional.get() : StringRef(); if(!existingValue.size()) return otherOperand; if(!otherOperand.size()) return otherOperand; uint8_t* buf = new (ar) uint8_t [otherOperand.size()]; int i = 0; - + for(i = 0; i& existingValueOptional, const Valu return StringRef(buf, i); } -static ValueRef doXor(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doXor(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { const ValueRef& existingValue = existingValueOptional.present() ? existingValueOptional.get() : StringRef(); if(!existingValue.size()) return otherOperand; if(!otherOperand.size()) return otherOperand; - + uint8_t* buf = new (ar) uint8_t [otherOperand.size()]; int i = 0; - + for(i = 0; i& existingValueOptional, const Val return StringRef(buf, i); } -static ValueRef doAppendIfFits(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doAppendIfFits(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { const ValueRef& existingValue = existingValueOptional.present() ? existingValueOptional.get() : StringRef(); if(!existingValue.size()) return otherOperand; if(!otherOperand.size()) return existingValue; @@ -123,7 +123,7 @@ static ValueRef doAppendIfFits(const Optional& existingValueOptional, return StringRef(buf, i+j); } -static ValueRef doMax(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doMax(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { const ValueRef& existingValue = existingValueOptional.present() ? existingValueOptional.get() : StringRef(); if (!existingValue.size()) return otherOperand; if (!otherOperand.size()) return otherOperand; @@ -155,7 +155,7 @@ static ValueRef doMax(const Optional& existingValueOptional, const Val return otherOperand; } -static ValueRef doByteMax(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doByteMax(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { if (!existingValueOptional.present()) return otherOperand; const ValueRef& existingValue = existingValueOptional.get(); @@ -165,7 +165,7 @@ static ValueRef doByteMax(const Optional& existingValueOptional, const return otherOperand; } -static ValueRef doMin(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doMin(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { if (!otherOperand.size()) return otherOperand; const ValueRef& existingValue = existingValueOptional.present() ? existingValueOptional.get() : StringRef(); @@ -203,16 +203,16 @@ static ValueRef doMin(const Optional& existingValueOptional, const Val return otherOperand; } -static ValueRef doMinV2(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doMinV2(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { if (!existingValueOptional.present()) return otherOperand; return doMin(existingValueOptional, otherOperand, ar); } -static ValueRef doByteMin(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { +inline ValueRef doByteMin(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { if (!existingValueOptional.present()) return otherOperand; - + const ValueRef& existingValue = existingValueOptional.get(); if (existingValue < otherOperand) return existingValue; @@ -220,7 +220,7 @@ static ValueRef doByteMin(const Optional& existingValueOptional, const return otherOperand; } -static Optional doCompareAndClear(const Optional& existingValueOptional, +inline Optional doCompareAndClear(const Optional& existingValueOptional, const ValueRef& otherOperand, Arena& ar) { if (!existingValueOptional.present() || existingValueOptional.get() == otherOperand) { // Clear the value. @@ -241,7 +241,7 @@ static void placeVersionstamp( uint8_t* destination, Version version, uint16_t t /* * Returns the range corresponding to the specified versionstamp key. */ -static KeyRangeRef getVersionstampKeyRange(Arena& arena, const KeyRef &key, Version minVersion, const KeyRef &maxKey) { +inline KeyRangeRef getVersionstampKeyRange(Arena& arena, const KeyRef &key, Version minVersion, const KeyRef &maxKey) { KeyRef begin(arena, key); KeyRef end(arena, key); @@ -264,7 +264,7 @@ static KeyRangeRef getVersionstampKeyRange(Arena& arena, const KeyRef &key, Vers return KeyRangeRef(begin, std::min(end, maxKey)); } -static void transformVersionstampKey( StringRef& key, Version version, uint16_t transactionNumber ) { +inline void transformVersionstampKey( StringRef& key, Version version, uint16_t transactionNumber ) { if (key.size() < 4) throw client_invalid_operation(); @@ -277,7 +277,7 @@ static void transformVersionstampKey( StringRef& key, Version version, uint16_t placeVersionstamp( mutateString(key) + pos, version, transactionNumber ); } -static void transformVersionstampMutation( MutationRef& mutation, StringRef MutationRef::* param, Version version, uint16_t transactionNumber ) { +inline void transformVersionstampMutation( MutationRef& mutation, StringRef MutationRef::* param, Version version, uint16_t transactionNumber ) { if ((mutation.*param).size() >= 4) { int32_t pos; memcpy(&pos, (mutation.*param).end() - sizeof(int32_t), sizeof(int32_t)); diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index 897345d00a..ae6717c619 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -782,6 +782,11 @@ public: return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + // Set to true when all backup workers for saving mutation logs have been started. + KeyBackedProperty allWorkerStarted() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } + // Stop differntial logging if already started or don't start after completing KV ranges KeyBackedProperty stopWhenDone() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 934220e575..acc2a85a69 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -329,17 +329,19 @@ public: } // The innermost folder covers 100,000 seconds (1e11 versions) which is 5,000 mutation log files at current settings. - static std::string logVersionFolderString(Version v) { - return format("logs/%s/", versionFolderString(v, 11).c_str()); + static std::string logVersionFolderString(Version v, bool mlogs) { + return format("%s/%s/", (mlogs ? "mlogs" : "logs"), versionFolderString(v, 11).c_str()); } Future> writeLogFile(Version beginVersion, Version endVersion, int blockSize) override { - return writeFile(logVersionFolderString(beginVersion) + format("log,%lld,%lld,%s,%d", beginVersion, endVersion, deterministicRandom()->randomUniqueID().toString().c_str(), blockSize)); + return writeFile(logVersionFolderString(beginVersion, false) + + format("log,%lld,%lld,%s,%d", beginVersion, endVersion, + deterministicRandom()->randomUniqueID().toString().c_str(), blockSize)); } Future> writeTaggedLogFile(Version beginVersion, Version endVersion, int blockSize, uint16_t tagId) override { - return writeFile(logVersionFolderString(beginVersion) + + return writeFile(logVersionFolderString(beginVersion, true) + format("log,%lld,%lld,%s,%d,%d", beginVersion, endVersion, deterministicRandom()->randomUniqueID().toString().c_str(), blockSize, tagId)); } @@ -355,8 +357,23 @@ public: return writeFile(snapshotFolderString(snapshotBeginVersion) + format("/%d/", snapshotFileCount / (BUGGIFY ? 1 : 5000)) + fileName); } + // Find what should be the filename of a path by finding whatever is after the last forward or backward slash, or failing to find those, the whole string. + static std::string fileNameOnly(std::string path) { + // Find the last forward slash position, defaulting to 0 if not found + int pos = path.find_last_of('/'); + if(pos == std::string::npos) { + pos = 0; + } + // Find the last backward slash position after pos, and update pos if found + int b = path.find_last_of('\\', pos); + if(b != std::string::npos) { + pos = b; + } + return path.substr(pos + 1); + } + static bool pathToRangeFile(RangeFile &out, std::string path, int64_t size) { - std::string name = basename(path); + std::string name = fileNameOnly(path); RangeFile f; f.fileName = path; f.fileSize = size; @@ -369,7 +386,7 @@ public: } static bool pathToLogFile(LogFile &out, std::string path, int64_t size) { - std::string name = basename(path); + std::string name = fileNameOnly(path); LogFile f; f.fileName = path; f.fileSize = size; @@ -387,7 +404,7 @@ public: } static bool pathToKeyspaceSnapshotFile(KeyspaceSnapshotFile &out, std::string path) { - std::string name = basename(path); + std::string name = fileNameOnly(path); KeyspaceSnapshotFile f; f.fileName = path; int len; @@ -517,10 +534,12 @@ public: // so start at an earlier version adjusted by how many versions a file could contain. // // Get the cleaned (without slashes) first and last folders that could contain relevant results. - std::string firstPath = cleanFolderString(logVersionFolderString( - std::max(0, beginVersion - CLIENT_KNOBS->BACKUP_MAX_LOG_RANGES * CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE) - )); - std::string lastPath = cleanFolderString(logVersionFolderString(targetVersion)); + bool mlogs = false; // tagged mutation logs + std::string firstPath = cleanFolderString( + logVersionFolderString(std::max(0, beginVersion - CLIENT_KNOBS->BACKUP_MAX_LOG_RANGES * + CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE), + mlogs)); + std::string lastPath = cleanFolderString(logVersionFolderString(targetVersion, mlogs)); std::function pathFilter = [=](const std::string &folderPath) { // Remove slashes in the given folder path so that the '/' positions in the version folder string do not matter diff --git a/fdbclient/CommitTransaction.h b/fdbclient/CommitTransaction.h index 540157e5c9..be5821cfa4 100644 --- a/fdbclient/CommitTransaction.h +++ b/fdbclient/CommitTransaction.h @@ -23,6 +23,7 @@ #pragma once #include "fdbclient/FDBTypes.h" +#include "fdbserver/Knobs.h" // The versioned message has wire format : -1, version, messages static const int32_t VERSION_HEADER = -1; @@ -49,7 +50,7 @@ static const char* typeString[] = { "SetValue", "AndV2", "CompareAndClear"}; -struct MutationRef { +struct MutationRef { static const int OVERHEAD_BYTES = 12; //12 is the size of Header in MutationList entries enum Type : uint8_t { SetValue = 0, @@ -82,8 +83,18 @@ struct MutationRef { MutationRef() {} MutationRef( Type t, StringRef a, StringRef b ) : type(t), param1(a), param2(b) {} MutationRef( Arena& to, const MutationRef& from ) : type(from.type), param1( to, from.param1 ), param2( to, from.param2 ) {} - int totalSize() const { return OVERHEAD_BYTES + param1.size() + param2.size(); } + int totalSize() const { return OVERHEAD_BYTES + param1.size() + param2.size(); } int expectedSize() const { return param1.size() + param2.size(); } + int weightedTotalSize() const { + // AtomicOp can cause more workload to FDB cluster than the same-size set mutation; + // Amplify atomicOp size to consider such extra workload. + // A good value for FASTRESTORE_ATOMICOP_WEIGHT needs experimental evaluations. + if (isAtomicOp()) { + return totalSize() * SERVER_KNOBS->FASTRESTORE_ATOMICOP_WEIGHT; + } else { + return totalSize(); + } + } std::string toString() const { if (type < MutationRef::MAX_ATOMIC_OP) { @@ -94,6 +105,8 @@ struct MutationRef { } } + bool isAtomicOp() const { return (ATOMIC_MASK & (1 << type)) != 0; } + template void serialize( Ar& ar ) { serializer(ar, type, param1, param2); diff --git a/fdbclient/DatabaseBackupAgent.actor.cpp b/fdbclient/DatabaseBackupAgent.actor.cpp index d9e05da706..8d2fe7ea4e 100644 --- a/fdbclient/DatabaseBackupAgent.actor.cpp +++ b/fdbclient/DatabaseBackupAgent.actor.cpp @@ -1840,6 +1840,9 @@ public: tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); + + //This commit must happen on the first proxy to ensure that the applier has flushed all mutations from previous DRs + tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); // We will use the global status for now to ensure that multiple backups do not start place with different tags state int status = wait(backupAgent->getStateValue(tr, logUidCurrent)); @@ -1959,8 +1962,8 @@ public: } if (!g_network->isSimulated() && !forceAction) { - state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src->getConnectionFile())); - StatusObject destStatus = wait(StatusClient::statusFetcher(dest->getConnectionFile())); + state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src)); + StatusObject destStatus = wait(StatusClient::statusFetcher(dest)); checkAtomicSwitchOverConfig(srcStatus, destStatus, tagName); } @@ -2274,6 +2277,7 @@ public: state Reference tr(new ReadYourWritesTransaction(cx)); tr->setOption(FDBTransactionOptions::LOCK_AWARE); state std::string statusText; + state int retries = 0; loop{ try { @@ -2291,27 +2295,33 @@ public: tr->setOption(FDBTransactionOptions::LOCK_AWARE); state Future> fPaused = tr->get(backupAgent->taskBucket->getPauseKey()); + state Future> fErrorValues = errorLimit > 0 ? tr->getRange(backupAgent->errors.get(BinaryWriter::toValue(logUid, Unversioned())).range(), errorLimit, false, true) : Future>(); + state Future> fBackupUid = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyFolderId)); + state Future> fBackupVerison = tr->get(BinaryWriter::toValue(logUid, Unversioned()).withPrefix(applyMutationsBeginRange.begin)); + state Future> fTagName = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupTag)); + state Future> fStopVersionKey = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyStateStop)); + state Future> fBackupKeysPacked = tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupRanges)); + int backupStateInt = wait(backupAgent->getStateValue(tr, logUid)); state BackupAgentBase::enumState backupState = (BackupAgentBase::enumState)backupStateInt; - + if (backupState == DatabaseBackupAgent::STATE_NEVERRAN) { statusText += "No previous backups found.\n"; } else { state std::string tagNameDisplay; - Optional tagName = wait(tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupTag))); + Optional tagName = wait(fTagName); // Define the display tag name if (tagName.present()) { tagNameDisplay = tagName.get().toString(); } - state Optional uid = wait(tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyFolderId))); - state Optional stopVersionKey = wait(tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyStateStop))); + state Optional stopVersionKey = wait(fStopVersionKey); + + Optional backupKeysPacked = wait(fBackupKeysPacked); state Standalone> backupRanges; - Optional backupKeysPacked = wait(tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupRanges))); - if (backupKeysPacked.present()) { BinaryReader br(backupKeysPacked.get(), IncludeVersion()); br >> backupRanges; @@ -2347,7 +2357,7 @@ public: // Append the errors, if requested if (errorLimit > 0) { - Standalone values = wait(tr->getRange(backupAgent->errors.get(BinaryWriter::toValue(logUid, Unversioned())).range(), errorLimit, false, true)); + Standalone values = wait( fErrorValues ); // Display the errors, if any if (values.size() > 0) { @@ -2364,10 +2374,9 @@ public: //calculate time differential - state Optional backupUid = wait(tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyFolderId))); + Optional backupUid = wait(fBackupUid); if(backupUid.present()) { - Optional v = wait(tr->get(BinaryWriter::toValue(logUid, Unversioned()).withPrefix(applyMutationsBeginRange.begin))); - + Optional v = wait(fBackupVerison); if (v.present()) { state Version destApplyBegin = BinaryReader::fromStringRef(v.get(), Unversioned()); Version sourceVersion = wait(srcReadVersion); @@ -2384,6 +2393,11 @@ public: break; } catch (Error &e) { + retries++; + if(retries > 5) { + statusText += format("\nWARNING: Could not fetch full DR status: %s\n", e.name()); + return statusText; + } wait(tr->onError(e)); } } diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index 425fd0b051..11938f29fa 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -41,6 +41,7 @@ void DatabaseConfiguration::resetInternal() { tLogPolicy = storagePolicy = remoteTLogPolicy = Reference(); remoteDesiredTLogCount = -1; remoteTLogReplicationFactor = repopulateRegionAntiQuorum = 0; + backupWorkerEnabled = false; } void parse( int* i, ValueRef const& v ) { @@ -322,6 +323,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if (autoDesiredTLogCount != CLIENT_KNOBS->DEFAULT_AUTO_LOGS) { result["auto_logs"] = autoDesiredTLogCount; } + + result["backup_worker_enabled"] = (int32_t)backupWorkerEnabled; } return result; @@ -434,6 +437,7 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { else if (ck == LiteralStringRef("remote_logs")) parse(&remoteDesiredTLogCount, value); else if (ck == LiteralStringRef("remote_log_replicas")) parse(&remoteTLogReplicationFactor, value); else if (ck == LiteralStringRef("remote_log_policy")) parseReplicationPolicy(&remoteTLogPolicy, value); + else if (ck == LiteralStringRef("backup_worker_enabled")) { parse((&type), value); backupWorkerEnabled = (type != 0); } else if (ck == LiteralStringRef("usable_regions")) parse(&usableRegions, value); else if (ck == LiteralStringRef("repopulate_anti_quorum")) parse(&repopulateRegionAntiQuorum, value); else if (ck == LiteralStringRef("regions")) parse(®ions, value); diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 0fdae09956..e085e003bd 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -178,6 +178,9 @@ struct DatabaseConfiguration { int32_t remoteTLogReplicationFactor; Reference remoteTLogPolicy; + // Backup Workers + bool backupWorkerEnabled; + //Data centers int32_t usableRegions; int32_t repopulateRegionAntiQuorum; diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 7637723dee..c8740bc072 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -216,6 +216,10 @@ public: Future clientInfoMonitor; Future connected; + Reference>> statusClusterInterface; + Future statusLeaderMon; + double lastStatusFetch; + int apiVersion; int mvCacheInsertLocation; diff --git a/fdbclient/FDBTypes.h b/fdbclient/FDBTypes.h index 26177e5d98..6111b23138 100644 --- a/fdbclient/FDBTypes.h +++ b/fdbclient/FDBTypes.h @@ -73,7 +73,7 @@ struct Tag { } template - force_inline void serialize_unversioned(Ar& ar) { + force_inline void serialize_unversioned(Ar& ar) { serializer(ar, locality, id); } }; @@ -162,11 +162,11 @@ void uniquify( Collection& c ) { c.resize( std::unique(c.begin(), c.end()) - c.begin() ); } -static std::string describe( const Tag item ) { +inline std::string describe( const Tag item ) { return format("%d:%d", item.locality, item.id); } -static std::string describe( const int item ) { +inline std::string describe( const int item ) { return format("%d", item); } @@ -176,17 +176,17 @@ static std::string describe(const std::string& s) { } template -static std::string describe( Reference const& item ) { +std::string describe( Reference const& item ) { return item->toString(); } template -static std::string describe( T const& item ) { +std::string describe( T const& item ) { return item.toString(); } template -static std::string describe( std::map const& items, int max_items = -1 ) { +std::string describe( std::map const& items, int max_items = -1 ) { if(!items.size()) return "[no items]"; @@ -202,7 +202,7 @@ static std::string describe( std::map const& items, int max_items = -1 ) { } template -static std::string describeList( T const& items, int max_items ) { +std::string describeList( T const& items, int max_items ) { if(!items.size()) return "[no items]"; @@ -218,12 +218,12 @@ static std::string describeList( T const& items, int max_items ) { } template -static std::string describe( std::vector const& items, int max_items = -1 ) { +std::string describe( std::vector const& items, int max_items = -1 ) { return describeList(items, max_items); } template -static std::string describe( std::set const& items, int max_items = -1 ) { +std::string describe( std::set const& items, int max_items = -1 ) { return describeList(items, max_items); } @@ -414,7 +414,7 @@ typedef Standalone Key; typedef Standalone Value; typedef Standalone KeyRange; typedef Standalone KeyValue; -typedef Standalone KeySelector; +typedef Standalone KeySelector; enum { invalidVersion = -1, latestVersion = -2 }; @@ -578,7 +578,7 @@ struct KeyRangeWith : KeyRange { } }; template -static inline KeyRangeWith keyRangeWith( const KeyRangeRef& range, const Val& value ) { +KeyRangeWith keyRangeWith( const KeyRangeRef& range, const Val& value ) { return KeyRangeWith(range, value); } @@ -663,6 +663,7 @@ struct KeyValueStoreType { this->type = END; } operator StoreType() const { return StoreType(type); } + StoreType storeType() const { return StoreType(type); } template void serialize(Ar& ar) { serializer(ar, type); } @@ -856,7 +857,7 @@ struct AddressExclusion { } }; -static bool addressExcluded( std::set const& exclusions, NetworkAddress const& addr ) { +inline bool addressExcluded( std::set const& exclusions, NetworkAddress const& addr ) { return exclusions.count( AddressExclusion(addr.ip, addr.port) ) || exclusions.count( AddressExclusion(addr.ip) ); } diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index c051adf052..0eec26fa8a 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -23,6 +23,7 @@ #include "fdbclient/DatabaseContext.h" #include "fdbclient/ManagementAPI.actor.h" #include "fdbclient/Status.h" +#include "fdbclient/SystemData.h" #include "fdbclient/KeyBackedTypes.h" #include "fdbclient/JsonBuilder.h" @@ -899,6 +900,29 @@ namespace fileBackup { return LiteralStringRef("OnSetAddTask"); } + // Clears the backup ID from "backupStartedKey" to pause backup workers. + ACTOR static Future clearBackupStartID(Reference tr, UID backupUid) { + // If backup worker is not enabled, exit early. + Optional started = wait(tr->get(backupStartedKey)); + std::vector> ids; + if (started.present()) { + ids = decodeBackupStartedValue(started.get()); + } + auto it = std::find_if(ids.begin(), ids.end(), + [=](const std::pair& p) { return p.first == backupUid; }); + if (it != ids.end()) { + ids.erase(it); + } + + if (ids.empty()) { + TraceEvent("ClearBackup").detail("BackupID", backupUid); + tr->clear(backupStartedKey); + } else { + tr->set(backupStartedKey, encodeBackupStartedValue(ids)); + } + return Void(); + } + // Backup and Restore taskFunc definitions will inherit from one of the following classes which // servers to catch and log to the appropriate config any error that execute/finish didn't catch and log. struct RestoreTaskFuncBase : TaskFuncBase { @@ -953,7 +977,7 @@ namespace fileBackup { } Params; std::string toString(Reference task) { - return format("beginKey '%s' endKey '%s' addTasks %d", + return format("beginKey '%s' endKey '%s' addTasks %d", Params.beginKey().get(task).printable().c_str(), Params.endKey().get(task).printable().c_str(), Params.addBackupRangeTasks().get(task) @@ -965,7 +989,7 @@ namespace fileBackup { Future execute(Database cx, Reference tb, Reference fb, Reference task) { return _execute(cx, tb, fb, task); }; Future finish(Reference tr, Reference tb, Reference fb, Reference task) { return _finish(tr, tb, fb, task); }; - // Finish (which flushes/syncs) the file, and then in a single transaction, make some range backup progress durable. + // Finish (which flushes/syncs) the file, and then in a single transaction, make some range backup progress durable. // This means: // - increment the backup config's range bytes written // - update the range file map @@ -1540,7 +1564,7 @@ namespace fileBackup { } // The number of shards 'behind' the snapshot is the count of how may additional shards beyond normal are being dispatched, if any. - int countShardsBehind = std::max(0, countShardsToDispatch + snapshotBatchSize.get() - countShardsExpectedPerNormalWindow); + int countShardsBehind = std::max(0, countShardsToDispatch + snapshotBatchSize.get() - countShardsExpectedPerNormalWindow); Params.shardsBehind().set(task, countShardsBehind); TraceEvent("FileBackupSnapshotDispatchStats") @@ -1591,7 +1615,7 @@ namespace fileBackup { state int64_t oldBatchSize = snapshotBatchSize.get(); state int64_t newBatchSize = oldBatchSize + rangesToAdd.size(); - // Now add the selected ranges in a single transaction. + // Now add the selected ranges in a single transaction. tr->reset(); loop { try { @@ -1844,7 +1868,7 @@ namespace fileBackup { for (auto &range : ranges) { rc.push_back(readCommitted(cx, results, lock, range, false, true, true)); } - + state Future sendEOS = map(errorOr(waitForAll(rc)), [=](ErrorOr const &result) { if(result.isError()) results.sendError(result.getError()); @@ -2050,7 +2074,7 @@ namespace fileBackup { state Optional tag; state Optional latestSnapshotEndVersion; - wait(store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) + wait(store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) && store(restorableVersion, config.getLatestRestorableVersion(tr)) && store(backupState, config.stateEnum().getOrThrow(tr)) && store(tag, config.tag().get(tr)) @@ -2148,8 +2172,9 @@ namespace fileBackup { tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); state Key destUidValue = wait(backup.destUidValue().getOrThrow(tr)); - wait( eraseLogData(tr, backup.getUidAsKey(), destUidValue) ); - + + wait(eraseLogData(tr, backup.getUidAsKey(), destUidValue) && clearBackupStartID(tr, uid)); + backup.stateEnum().set(tr, EBackupState::STATE_COMPLETED); wait(taskBucket->finish(tr, task)); @@ -2282,7 +2307,7 @@ namespace fileBackup { state Optional firstSnapshotEndVersion; state Optional tag; - wait(store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) + wait(store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) && store(backupState, config.stateEnum().getOrThrow(tr)) && store(restorableVersion, config.getLatestRestorableVersion(tr)) && store(firstSnapshotEndVersion, config.firstSnapshotEndVersion().get(tr)) @@ -2346,8 +2371,8 @@ namespace fileBackup { ACTOR static Future _execute(Database cx, Reference taskBucket, Reference futureBucket, Reference task) { wait(checkTaskVersion(cx, task, StartFullBackupTaskFunc::name, StartFullBackupTaskFunc::version)); + state Reference tr(new ReadYourWritesTransaction(cx)); loop{ - state Reference tr(new ReadYourWritesTransaction(cx)); try { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); @@ -2361,7 +2386,56 @@ namespace fileBackup { } } - return Void(); + // Check if backup worker is enabled + DatabaseConfiguration dbConfig = wait(getDatabaseConfiguration(cx)); + if (!dbConfig.backupWorkerEnabled) { + wait(success(changeConfig(cx, "backup_worker_enabled:=1", true))); + } + + // Set the "backupStartedKey" and wait for all backup worker started + tr->reset(); + state BackupConfig config(task); + loop { + state Future watchFuture; + try { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + state Future keepRunning = taskBucket->keepRunning(tr, task); + + state Future> started = tr->get(backupStartedKey); + state Future> taskStarted = tr->get(config.allWorkerStarted().key); + wait(success(started) && success(taskStarted)); + + std::vector> ids; + if (started.get().present()) { + ids = decodeBackupStartedValue(started.get().get()); + } + const UID uid = config.getUid(); + auto it = std::find_if(ids.begin(), ids.end(), + [uid](const std::pair& p) { return p.first == uid; }); + if (it == ids.end()) { + ids.emplace_back(uid, Params.beginVersion().get(task)); + } else { + Params.beginVersion().set(task, it->second); + } + + tr->set(backupStartedKey, encodeBackupStartedValue(ids)); + + // The task may be restarted. Set the watch if started key has NOT been set. + if (!taskStarted.get().present()) { + watchFuture = tr->watch(config.allWorkerStarted().key); + } + + wait(keepRunning); + wait(tr->commit()); + if (!taskStarted.get().present()) { + wait(watchFuture); + } + return Void(); + } catch (Error &e) { + wait(tr->onError(e)); + } + } } ACTOR static Future _finish(Reference tr, Reference taskBucket, Reference futureBucket, Reference task) { @@ -2396,6 +2470,7 @@ namespace fileBackup { wait(success(FileBackupFinishedTask::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), backupFinished))); wait(taskBucket->finish(tr, task)); + return Void(); } @@ -2477,12 +2552,12 @@ namespace fileBackup { std::string toString(Reference task) { return format("fileName '%s' readLen %lld readOffset %lld", - Params.inputFile().get(task).fileName.c_str(), + Params.inputFile().get(task).fileName.c_str(), Params.readLen().get(task), Params.readOffset().get(task)); } }; - + struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase { static struct : InputParams { // The range of data that the (possibly empty) data represented, which is set if it intersects the target restore range @@ -2707,7 +2782,7 @@ namespace fileBackup { // Create a restore config from the current task and bind it to the new task. wait(RestoreConfig(parentTask).toTask(tr, task)); - + Params.inputFile().set(task, rf); Params.readOffset().set(task, offset); Params.readLen().set(task, len); @@ -3019,7 +3094,7 @@ namespace fileBackup { } // Start moving through the file list and queuing up blocks. Only queue up to RESTORE_DISPATCH_ADDTASK_SIZE blocks per Dispatch task - // and target batchSize total per batch but a batch must end on a complete version boundary so exceed the limit if necessary + // and target batchSize total per batch but a batch must end on a complete version boundary so exceed the limit if necessary // to reach the end of a version of files. state std::vector> addTaskFutures; state Version endVersion = files[0].version; @@ -3067,12 +3142,12 @@ namespace fileBackup { ++blocksDispatched; --remainingInBatch; } - + // Stop if we've reached the addtask limit if(blocksDispatched == taskBatchSize) break; - // We just completed an entire file so the next task should start at the file after this one within endVersion (or later) + // We just completed an entire file so the next task should start at the file after this one within endVersion (or later) // if this iteration ends up being the last for this task beginFile = beginFile + '\x00'; beginBlock = 0; @@ -3110,7 +3185,7 @@ namespace fileBackup { .detail("RemainingInBatch", remainingInBatch); wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, endVersion, beginFile, beginBlock, batchSize, remainingInBatch, TaskCompletionKey::joinWith((allPartsDone))))); - + // If adding to existing batch then task is joined with a batch future so set done future. // Note that this must be done after joining at least one task with the batch future in case all other blockers already finished. Future setDone = addingToExistingBatch ? onDone->set(tr, taskBucket) : Void(); @@ -3123,7 +3198,7 @@ namespace fileBackup { // Increment the number of blocks dispatched in the restore config restore.filesBlocksDispatched().atomicOp(tr, blocksDispatched, MutationRef::Type::AddValue); - // If beginFile is not empty then we had to stop in the middle of a version (possibly within a file) so we cannot end + // If beginFile is not empty then we had to stop in the middle of a version (possibly within a file) so we cannot end // the batch here because we do not know if we got all of the files and blocks from the last version queued, so // make sure remainingInBatch is at least 1. if(!beginFile.empty()) @@ -3260,7 +3335,7 @@ namespace fileBackup { wait( tr->onError(e) ); } } - + tr = Reference( new ReadYourWritesTransaction(cx) ); //Commit a dummy transaction before returning success, to ensure the mutation applier has stopped submitting mutations @@ -3523,6 +3598,7 @@ public: ACTOR static Future submitBackup(FileBackupAgent* backupAgent, Reference tr, Key outContainer, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, bool stopWhenDone) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); + tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); TraceEvent(SevInfo, "FBA_SubmitBackup") .detail("TagName", tagName.c_str()) @@ -3773,7 +3849,7 @@ public: throw backup_unneeded(); } - // If the backup is already restorable then 'mostly' abort it - cancel all tasks via the tag + // If the backup is already restorable then 'mostly' abort it - cancel all tasks via the tag // and clear the mutation logging config and data - but set its state as COMPLETED instead of ABORTED. state Optional latestRestorableVersion = wait(config.getLatestRestorableVersion(tr)); @@ -3790,7 +3866,8 @@ public: state Key destUidValue = wait(config.destUidValue().getOrThrow(tr)); wait(success(tr->getReadVersion())); - wait( eraseLogData(tr, config.getUidAsKey(), destUidValue) ); + wait(eraseLogData(tr, config.getUidAsKey(), destUidValue) && + fileBackup::clearBackupStartID(tr, config.getUid())); config.stateEnum().set(tr, EBackupState::STATE_COMPLETED); @@ -3829,8 +3906,9 @@ public: // Cancel backup task through tag wait(tag.cancel(tr)); - - wait(eraseLogData(tr, config.getUidAsKey(), destUidValue)); + + wait(eraseLogData(tr, config.getUidAsKey(), destUidValue) && + fileBackup::clearBackupStartID(tr, config.getUid())); config.stateEnum().set(tr, EBackupState::STATE_ABORTED); @@ -3940,7 +4018,7 @@ public: wait( store(snapshotInterval, config.snapshotIntervalSeconds().getOrThrow(tr)) && store(logBytesWritten, config.logBytesWritten().getD(tr)) && store(rangeBytesWritten, config.rangeBytesWritten().getD(tr)) - && store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) + && store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) && store(snapshotBegin, getTimestampedVersion(tr, config.snapshotBeginVersion().get(tr))) && store(snapshotTargetEnd, getTimestampedVersion(tr, config.snapshotTargetEndVersion().get(tr))) && store(latestLogEnd, getTimestampedVersion(tr, config.latestLogEndVersion().get(tr))) @@ -4039,7 +4117,7 @@ public: state Reference bc; state Optional latestRestorableVersion; state Version recentReadVersion; - + wait( store(latestRestorableVersion, config.getLatestRestorableVersion(tr)) && store(bc, config.backupContainer().getOrThrow(tr)) && store(recentReadVersion, tr->getReadVersion()) @@ -4090,7 +4168,7 @@ public: && store(rangeBytesWritten, config.rangeBytesWritten().get(tr)) && store(latestLogEndVersion, config.latestLogEndVersion().get(tr)) && store(latestSnapshotEndVersion, config.latestSnapshotEndVersion().get(tr)) - && store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) + && store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) ); wait( store(latestSnapshotEndVersionTimestamp, getTimestampFromVersion(latestSnapshotEndVersion, tr)) @@ -4104,7 +4182,7 @@ public: statusText += format("Current snapshot progress target is %3.2f%% (>100%% means the snapshot is supposed to be done)\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ; else statusText += "The initial snapshot is still running.\n"; - + statusText += format("\nDetails:\n LogBytes written - %ld\n RangeBytes written - %ld\n " "Last complete log version and timestamp - %s, %s\n " "Last complete snapshot version and timestamp - %s, %s\n " @@ -4255,7 +4333,7 @@ public: wait( ryw_tr->onError(e) ); } } - + //Lock src, record commit version state Transaction tr(cx); state Version commitVersion; @@ -4395,4 +4473,3 @@ void FileBackupAgent::setLastRestorable(Reference tr, Future FileBackupAgent::waitBackup(Database cx, std::string tagName, bool stopWhenDone, Reference *pContainer, UID *pUID) { return FileBackupAgentImpl::waitBackup(this, cx, tagName, stopWhenDone, pContainer, pUID); } - diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index b3e6217054..154ac9723f 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -48,6 +48,7 @@ public: virtual ThreadFuture> getVersionstamp() = 0; virtual void addReadConflictRange(const KeyRangeRef& keys) = 0; + virtual ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) = 0; virtual void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) = 0; virtual void set(const KeyRef& key, const ValueRef& value) = 0; diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index 3c745ce694..22d33038bb 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -46,6 +46,7 @@ ClientKnobs::ClientKnobs(bool randomize) { init( CLIENT_EXAMPLE_AMOUNT, 20 ); init( MAX_CLIENT_STATUS_AGE, 1.0 ); init( MAX_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_PROXY_CONNECTIONS = 1; + init( STATUS_IDLE_TIMEOUT, 120.0 ); // wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin @@ -200,7 +201,8 @@ ClientKnobs::ClientKnobs(bool randomize) { init( CONSISTENCY_CHECK_RATE_LIMIT_MAX, 50e6 ); // Limit in per sec init( CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME, 7 * 24 * 60 * 60 ); // 7 days - - //fdbcli - init( CLI_CONNECT_PARALLELISM, 10 ); + + //fdbcli + init( CLI_CONNECT_PARALLELISM, 400 ); + init( CLI_CONNECT_TIMEOUT, 10.0 ); } diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 25b26b0d12..b2a2348061 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -45,6 +45,7 @@ public: int CLIENT_EXAMPLE_AMOUNT; double MAX_CLIENT_STATUS_AGE; int MAX_PROXY_CONNECTIONS; + double STATUS_IDLE_TIMEOUT; // wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test) @@ -190,10 +191,11 @@ public: int CONSISTENCY_CHECK_RATE_LIMIT_MAX; int CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME; - - //fdbcli - int CLI_CONNECT_PARALLELISM; + // fdbcli + int CLI_CONNECT_PARALLELISM; + double CLI_CONNECT_TIMEOUT; + ClientKnobs(bool randomize = false); }; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index de8912f4e8..f160431bdf 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -57,6 +57,13 @@ std::map configForToken( std::string const& mode ) { return out; } + if (mode == "locked") { + // Setting this key is interpreted as an instruction to use the normal version-stamp-based mechanism for locking + // the database. + out[databaseLockedKey.toString()] = deterministicRandom()->randomUniqueID().toString(); + return out; + } + size_t pos; // key:=value is unvalidated and unchecked @@ -112,8 +119,8 @@ std::map configForToken( std::string const& mode ) { // Add any new store types to fdbserver/workloads/ConfigureDatabase, too if (storeType.present()) { - out[p+"log_engine"] = format("%d", logType.get()); - out[p+"storage_engine"] = format("%d", storeType.get()); + out[p+"log_engine"] = format("%d", logType.get().storeType()); + out[p+"storage_engine"] = format("%d", KeyValueStoreType::StoreType(storeType.get())); return out; } @@ -305,6 +312,17 @@ ACTOR Future changeConfig( Database cx, std::map locked; + { + auto iter = m.find(databaseLockedKey.toString()); + if (iter != m.end()) { + if (!creating) { + return ConfigurationResult::LOCKED_NOT_NEW; + } + locked = UID::fromString(iter->second); + m.erase(iter); + } + } if (creating) { m[initIdKey.toString()] = deterministicRandom()->randomUniqueID().toString(); if (!isCompleteConfiguration(m)) { @@ -491,6 +509,15 @@ ACTOR Future changeConfig( Database cx, std::mapfirst) ); } + if (locked.present()) { + ASSERT(creating); + tr.atomicOp(databaseLockedKey, + BinaryWriter::toValue(locked.get(), Unversioned()) + .withPrefix(LiteralStringRef("0123456789")) + .withSuffix(LiteralStringRef("\x00\x00\x00\x00")), + MutationRef::SetVersionstampedValue); + } + for (auto i = m.begin(); i != m.end(); ++i) { tr.set( StringRef(i->first), StringRef(i->second) ); } @@ -963,9 +990,13 @@ ACTOR Future changeQuorum( Database cx, ReferenceisSimulated()) { for(int i = 0; i < (desiredCoordinators.size()/2)+1; i++) { - auto address = NetworkAddress(desiredCoordinators[i].ip,desiredCoordinators[i].port,true,false); - g_simulator.protectedAddresses.insert(address); - TraceEvent("ProtectCoordinator").detail("Address", address).backtrace(); + auto addresses = g_simulator.getProcessByAddress(desiredCoordinators[i])->addresses; + + g_simulator.protectedAddresses.insert(addresses.address); + if(addresses.secondaryAddress.present()) { + g_simulator.protectedAddresses.insert(addresses.secondaryAddress.get()); + } + TraceEvent("ProtectCoordinator").detail("Address", desiredCoordinators[i]).backtrace(); } } @@ -1124,8 +1155,7 @@ struct AutoQuorumChange : IQuorumChange { *err = CoordinatorsResult::NOT_ENOUGH_MACHINES; return vector(); } - desiredCount = std::max(oldCoordinators.size(), (workers.size() - 1) | 1); - chosen.resize(desiredCount); + chosen.resize((chosen.size() - 1) | 1); } return chosen; @@ -1521,10 +1551,14 @@ ACTOR Future> checkForExcludingServers(Database cx, vec state bool ok = true; inProgressExclusion.clear(); for(auto& s : serverList) { - auto addr = decodeServerListValue( s.value ).address(); - if ( addressExcluded(exclusions, addr) ) { + auto addresses = decodeServerListValue( s.value ).getKeyValues.getEndpoint().addresses; + if ( addressExcluded(exclusions, addresses.address) ) { ok = false; - inProgressExclusion.insert(addr); + inProgressExclusion.insert(addresses.address); + } + if ( addresses.secondaryAddress.present() && addressExcluded(exclusions, addresses.secondaryAddress.get()) ) { + ok = false; + inProgressExclusion.insert(addresses.secondaryAddress.get()); } } diff --git a/fdbclient/ManagementAPI.actor.h b/fdbclient/ManagementAPI.actor.h index 6834bbfcfb..11f0cba2fb 100644 --- a/fdbclient/ManagementAPI.actor.h +++ b/fdbclient/ManagementAPI.actor.h @@ -61,7 +61,8 @@ public: NOT_ENOUGH_WORKERS, REGION_REPLICATION_MISMATCH, DCID_MISSING, - SUCCESS + LOCKED_NOT_NEW, + SUCCESS, }; }; diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/MasterProxyInterface.h index 2e0e2cd8bb..b188ee761c 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/MasterProxyInterface.h @@ -82,6 +82,7 @@ struct ClientDBInfo { constexpr static FileIdentifier file_identifier = 5355080; UID id; // Changes each time anything else changes vector< MasterProxyInterface > proxies; + Optional firstProxy; //not serialized, used for commitOnFirstProxy when the proxies vector has been shrunk double clientTxnInfoSampleRate; int64_t clientTxnInfoSizeLimit; Optional forward; diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index 6a5c4195de..90b569941f 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -511,7 +511,7 @@ ACTOR Future asyncDeserializeClusterInterface(Reference> s Reference>> outKnownLeader) { state Reference>> knownLeader( new AsyncVar>{}); - state Future deserializer = asyncDeserialize(serializedInfo, knownLeader, FLOW_KNOBS->USE_OBJECT_SERIALIZER); + state Future deserializer = asyncDeserialize(serializedInfo, knownLeader); loop { choose { when(wait(deserializer)) { UNSTOPPABLE_ASSERT(false); } @@ -655,15 +655,10 @@ ACTOR Future monitorLeaderForProxies( Key clusterKey, vectorUSE_OBJECT_SERIALIZER) { - ObjectReader reader(leader.get().first.serializedInfo.begin(), IncludeVersion()); - ClusterControllerClientInterface res; - reader.deserialize(res); - knownLeader->set(res); - } else { - ClusterControllerClientInterface res = BinaryReader::fromStringRef( leader.get().first.serializedInfo, IncludeVersion() ); - knownLeader->set(res); - } + ObjectReader reader(leader.get().first.serializedInfo.begin(), IncludeVersion()); + ClusterControllerClientInterface res; + reader.deserialize(res); + knownLeader->set(res); } } wait( nomineeChange.onTrigger() || allActors ); @@ -685,6 +680,7 @@ void shrinkProxyList( ClientDBInfo& ni, std::vector& lastProxyUIDs, std::ve TraceEvent("ConnectedProxy").detail("Proxy", lastProxies[i].id()); } } + ni.firstProxy = ni.proxies[0]; ni.proxies = lastProxies; } } diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h index 0eae5151f7..020a113a6b 100644 --- a/fdbclient/MonitorLeader.h +++ b/fdbclient/MonitorLeader.h @@ -61,7 +61,9 @@ Future monitorProxies( Reference void shrinkProxyList( ClientDBInfo& ni, std::vector& lastProxyUIDs, std::vector& lastProxies ); +#ifndef __INTEL_COMPILER #pragma region Implementation +#endif Future monitorLeaderInternal( Reference const& connFile, Reference> const& outSerializedLeaderInfo ); @@ -69,7 +71,7 @@ template struct LeaderDeserializer { Future operator()(const Reference>& serializedInfo, const Reference>>& outKnownLeader) { - return asyncDeserialize(serializedInfo, outKnownLeader, FLOW_KNOBS->USE_OBJECT_SERIALIZER); + return asyncDeserialize(serializedInfo, outKnownLeader); } }; @@ -93,6 +95,8 @@ Future monitorLeader(Reference const& connFile, return m || deserializer( serializedInfo, outKnownLeader ); } +#ifndef __INTEL_COMPILER #pragma endregion +#endif #endif diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 767805d785..40c8616d12 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -145,6 +145,20 @@ ThreadFuture> DLTransaction::getVersionstamp() { }); } +ThreadFuture DLTransaction::getEstimatedRangeSizeBytes(const KeyRangeRef& keys) { + if (!api->transactionGetEstimatedRangeSizeBytes) { + return unsupported_operation(); + } + FdbCApi::FDBFuture *f = api->transactionGetEstimatedRangeSizeBytes(tr, keys.begin.begin(), keys.begin.size(), keys.end.begin(), keys.end.size()); + + return toThreadFuture(api, f, [](FdbCApi::FDBFuture *f, FdbCApi *api) { + int64_t sampledSize; + FdbCApi::fdb_error_t error = api->futureGetInt64(f, &sampledSize); + ASSERT(!error); + return sampledSize; + }); +} + void DLTransaction::addReadConflictRange(const KeyRangeRef& keys) { throwIfError(api->transactionAddConflictRange(tr, keys.begin.begin(), keys.begin.size(), keys.end.begin(), keys.end.size(), FDBConflictRangeTypes::READ)); } @@ -307,6 +321,7 @@ void DLApi::init() { loadClientFunction(&api->transactionReset, lib, fdbCPath, "fdb_transaction_reset"); loadClientFunction(&api->transactionCancel, lib, fdbCPath, "fdb_transaction_cancel"); loadClientFunction(&api->transactionAddConflictRange, lib, fdbCPath, "fdb_transaction_add_conflict_range"); + loadClientFunction(&api->transactionGetEstimatedRangeSizeBytes, lib, fdbCPath, "fdb_transaction_get_estimated_range_size_bytes", headerVersion >= 700); loadClientFunction(&api->futureGetInt64, lib, fdbCPath, headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version"); loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error"); @@ -547,6 +562,12 @@ void MultiVersionTransaction::addReadConflictRange(const KeyRangeRef& keys) { } } +ThreadFuture MultiVersionTransaction::getEstimatedRangeSizeBytes(const KeyRangeRef& keys) { + auto tr = getTransaction(); + auto f = tr.transaction ? tr.transaction->getEstimatedRangeSizeBytes(keys) : ThreadFuture(Never()); + return abortableFuture(f, tr.onChange); +} + void MultiVersionTransaction::atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) { auto tr = getTransaction(); if(tr.transaction) { diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index e71f6e895f..a657f49cbb 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -81,6 +81,9 @@ struct FdbCApi : public ThreadSafeReferenceCounted { void (*transactionClear)(FDBTransaction *tr, uint8_t const *keyName, int keyNameLength); void (*transactionClearRange)(FDBTransaction *tr, uint8_t const *beginKeyName, int beginKeyNameLength, uint8_t const *endKeyName, int endKeyNameLength); void (*transactionAtomicOp)(FDBTransaction *tr, uint8_t const *keyName, int keyNameLength, uint8_t const *param, int paramLength, FDBMutationTypes::Option operationType); + + FDBFuture* (*transactionGetEstimatedRangeSizeBytes)(FDBTransaction* tr, uint8_t const* begin_key_name, + int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length); FDBFuture* (*transactionCommit)(FDBTransaction *tr); fdb_error_t (*transactionGetCommittedVersion)(FDBTransaction *tr, int64_t *outVersion); @@ -129,6 +132,7 @@ public: ThreadFuture> getRange( const KeyRangeRef& keys, GetRangeLimits limits, bool snapshot=false, bool reverse=false) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; + ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; void addReadConflictRange(const KeyRangeRef& keys) override; @@ -228,6 +232,7 @@ public: ThreadFuture> getVersionstamp() override; void addReadConflictRange(const KeyRangeRef& keys) override; + ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) override; void set(const KeyRef& key, const ValueRef& value) override; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index cf7b9e790f..2d89d78b2c 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -47,13 +47,13 @@ #include "fdbrpc/LoadBalance.h" #include "fdbrpc/Net2FileSystem.h" #include "fdbrpc/simulator.h" -#include "fdbrpc/TLSConnection.h" #include "flow/flow.h" #include "flow/ActorCollection.h" #include "flow/DeterministicRandom.h" #include "flow/Knobs.h" #include "flow/Platform.h" #include "flow/SystemMonitor.h" +#include "flow/TLSPolicy.h" #include "flow/UnitTest.h" #include "flow/genericactors.actor.h" #include "flow/serialize.h" @@ -107,12 +107,15 @@ Future loadBalance( } // namespace NetworkOptions networkOptions; -Reference tlsOptions; +TLSParams tlsParams; +static Reference tlsPolicy; -static void initTLSOptions() { - if (!tlsOptions) { - tlsOptions = Reference(new TLSOptions()); +static void initTLSPolicy() { +#ifndef TLS_DISABLED + if (!tlsPolicy) { + tlsPolicy = Reference(new TLSPolicy(TLSPolicy::Is::CLIENT)); } +#endif } static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/"); @@ -296,24 +299,6 @@ ACTOR Future databaseLogger( DatabaseContext *cx ) { } } -ACTOR static Future > getSampleVersionStamp(Transaction *tr) { - loop{ - try { - tr->reset(); - tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - wait(success(tr->get(LiteralStringRef("\xff/StatusJsonTestKey62793")))); - state Future > vstamp = tr->getVersionstamp(); - tr->makeSelfConflicting(); - wait(tr->commit()); - Standalone val = wait(vstamp); - return val; - } - catch (Error& e) { - wait(tr->onError(e)); - } - } -} - struct TrInfoChunk { ValueRef value; Key key; @@ -1001,6 +986,7 @@ Database Database::createDatabase( Reference connFile, in auto publicIP = determinePublicIPAutomatically( connFile->getConnectionString() ); selectTraceFormatter(networkOptions.traceFormat); + selectTraceClockSource(networkOptions.traceClockSource); openTraceFile(NetworkAddress(publicIP, ::getpid()), networkOptions.traceRollSize, networkOptions.traceMaxLogsSize, networkOptions.traceDirectory.get(), "trace", networkOptions.traceLogGroup); TraceEvent("ClientStart") @@ -1073,6 +1059,14 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu throw invalid_option_value(); } break; + case FDBNetworkOptions::TRACE_CLOCK_SOURCE: + validateOptionValue(value, true); + networkOptions.traceClockSource = value.get().toString(); + if (!validateTraceClockSource(networkOptions.traceClockSource)) { + fprintf(stderr, "Unrecognized trace clock source: `%s'\n", networkOptions.traceClockSource.c_str()); + throw invalid_option_value(); + } + break; case FDBNetworkOptions::KNOB: { validateOptionValue(value, true); @@ -1100,49 +1094,46 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu break; case FDBNetworkOptions::TLS_CERT_PATH: validateOptionValue(value, true); - initTLSOptions(); - tlsOptions->set_cert_file( value.get().toString() ); + tlsParams.tlsCertPath = value.get().toString(); break; - case FDBNetworkOptions::TLS_CERT_BYTES: - initTLSOptions(); - tlsOptions->set_cert_data( value.get().toString() ); - break; - case FDBNetworkOptions::TLS_CA_PATH: + case FDBNetworkOptions::TLS_CERT_BYTES: { validateOptionValue(value, true); - initTLSOptions(); - tlsOptions->set_ca_file( value.get().toString() ); + tlsParams.tlsCertBytes = value.get().toString(); break; - case FDBNetworkOptions::TLS_CA_BYTES: + } + case FDBNetworkOptions::TLS_CA_PATH: { validateOptionValue(value, true); - initTLSOptions(); - tlsOptions->set_ca_data(value.get().toString()); + tlsParams.tlsCAPath = value.get().toString(); break; + } + case FDBNetworkOptions::TLS_CA_BYTES: { + validateOptionValue(value, true); + tlsParams.tlsCABytes = value.get().toString(); + break; + } case FDBNetworkOptions::TLS_PASSWORD: validateOptionValue(value, true); - initTLSOptions(); - tlsOptions->set_key_password(value.get().toString()); + tlsParams.tlsPassword = value.get().toString(); break; case FDBNetworkOptions::TLS_KEY_PATH: - validateOptionValue(value, true); - initTLSOptions(); - tlsOptions->set_key_file( value.get().toString() ); + validateOptionValue(value, true); + tlsParams.tlsKeyPath = value.get().toString(); break; - case FDBNetworkOptions::TLS_KEY_BYTES: + case FDBNetworkOptions::TLS_KEY_BYTES: { validateOptionValue(value, true); - initTLSOptions(); - tlsOptions->set_key_data( value.get().toString() ); + tlsParams.tlsKeyBytes = value.get().toString(); break; + } case FDBNetworkOptions::TLS_VERIFY_PEERS: validateOptionValue(value, true); - initTLSOptions(); - try { - tlsOptions->set_verify_peers({ value.get().toString() }); - } catch( Error& e ) { + initTLSPolicy(); +#ifndef TLS_DISABLED + if (!tlsPolicy->set_verify_peers({ value.get().toString() })) { TraceEvent(SevWarnAlways, "TLSValidationSetError") - .error( e ) .detail("Input", value.get().toString() ); throw invalid_option_value(); } +#endif break; case FDBNetworkOptions::CLIENT_BUGGIFY_ENABLE: enableBuggify(true, BuggifyType::Client); @@ -1200,15 +1191,11 @@ void setupNetwork(uint64_t transportId, bool useMetrics) { if (!networkOptions.logClientInfo.present()) networkOptions.logClientInfo = true; - g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present()); + initTLSPolicy(); + + g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present(), tlsPolicy, tlsParams); FlowTransport::createInstance(true, transportId); Net2FileSystem::newFileSystem(); - - initTLSOptions(); - -#ifndef TLS_DISABLED - tlsOptions->register_network(); -#endif } void runNetwork() { @@ -2655,6 +2642,9 @@ void TransactionOptions::reset(Database const& cx) { maxBackoff = CLIENT_KNOBS->DEFAULT_MAX_BACKOFF; sizeLimit = CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT; lockAware = cx->lockAware; + if (cx->apiVersionAtLeast(700)) { + includePort = true; + } } void Transaction::reset() { @@ -2771,8 +2761,8 @@ ACTOR void checkWrites( Database cx, Future committed, Promise outCo } else { Optional val = wait( tr.get( it->range().begin ) ); if( !val.present() || val.get() != m.setValue ) { - TraceEvent evt = TraceEvent(SevError, "CheckWritesFailed") - .detail("Class", "Set") + TraceEvent evt(SevError, "CheckWritesFailed"); + evt.detail("Class", "Set") .detail("Key", it->range().begin) .detail("Expected", m.setValue); if( !val.present() ) @@ -2866,8 +2856,12 @@ ACTOR static Future tryCommit( Database cx, Reference req.debugID = commitID; state Future reply; if (options.commitOnFirstProxy) { - const std::vector& proxies = cx->clientInfo->get().proxies; - reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never(); + if(cx->clientInfo->get().firstProxy.present()) { + reply = throwErrorOr ( brokenPromiseToMaybeDelivered ( cx->clientInfo->get().firstProxy.get().commit.tryGetReply(req) ) ); + } else { + const std::vector& proxies = cx->clientInfo->get().proxies; + reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never(); + } } else { reply = loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskPriority::DefaultPromiseEndpoint, true ); } @@ -3398,6 +3392,46 @@ Future Transaction::onError( Error const& e ) { return e; } +ACTOR Future getStorageMetricsLargeKeyRange(Database cx, KeyRangeRef keys); + +ACTOR Future doGetStorageMetrics(Database cx, KeyRangeRef keys, Reference locationInfo) { + loop { + try { + WaitMetricsRequest req(keys, StorageMetrics(), StorageMetrics()); + req.min.bytes = 0; + req.max.bytes = -1; + StorageMetrics m = wait( + loadBalance(locationInfo, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution)); + return m; + } catch (Error& e) { + if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) { + TraceEvent(SevError, "WaitStorageMetricsError").error(e); + throw; + } + wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution)); + cx->invalidateCache(keys); + StorageMetrics m = wait(getStorageMetricsLargeKeyRange(cx, keys)); + return m; + } + } +} + +ACTOR Future getStorageMetricsLargeKeyRange(Database cx, KeyRangeRef keys) { + + vector>> locations = wait(getKeyRangeLocations( + cx, keys, std::numeric_limits::max(), false, &StorageServerInterface::waitMetrics, TransactionInfo(TaskPriority::DataDistribution))); + state int nLocs = locations.size(); + state vector> fx(nLocs); + state StorageMetrics total; + for (int i = 0; i < nLocs; i++) { + fx[i] = doGetStorageMetrics(cx, locations[i].first, locations[i].second); + } + wait(waitForAll(fx)); + for (int i = 0; i < nLocs; i++) { + total += fx[i].get(); + } + return total; +} ACTOR Future trackBoundedStorageMetrics( KeyRange keys, @@ -3419,14 +3453,11 @@ ACTOR Future trackBoundedStorageMetrics( } } -ACTOR Future< StorageMetrics > waitStorageMetricsMultipleLocations( - vector< pair> > locations, - StorageMetrics min, - StorageMetrics max, - StorageMetrics permittedError) -{ +ACTOR Future waitStorageMetricsMultipleLocations( + vector>> locations, StorageMetrics min, StorageMetrics max, + StorageMetrics permittedError) { state int nLocs = locations.size(); - state vector> fx( nLocs ); + state vector> fx(nLocs); state StorageMetrics total; state PromiseStream deltas; state vector> wx( fx.size() ); @@ -3434,18 +3465,17 @@ ACTOR Future< StorageMetrics > waitStorageMetricsMultipleLocations( state StorageMetrics maxPlus = max + halfErrorPerMachine * (nLocs-1); state StorageMetrics minMinus = min - halfErrorPerMachine * (nLocs-1); - for(int i=0; ilocations(), &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution); } - wait( waitForAll(fx) ); + wait(waitForAll(fx)); // invariant: true total is between (total-permittedError/2, total+permittedError/2) - for(int i=0; i, int> > waitStorageMetrics( try { Future fx; if (locations.size() > 1) { - fx = waitStorageMetricsMultipleLocations( locations, min, max, permittedError ); + fx = waitStorageMetricsMultipleLocations(locations, min, max, permittedError); } else { WaitMetricsRequest req( keys, min, max ); fx = loadBalance(locations[0].second->locations(), &StorageServerInterface::waitMetrics, req, @@ -3526,9 +3556,13 @@ Future< std::pair, int> > Transaction::waitStorageMetri } Future< StorageMetrics > Transaction::getStorageMetrics( KeyRange const& keys, int shardLimit ) { - StorageMetrics m; - m.bytes = -1; - return extractMetrics( ::waitStorageMetrics( cx, keys, StorageMetrics(), m, StorageMetrics(), shardLimit, -1 ) ); + if (shardLimit > 0) { + StorageMetrics m; + m.bytes = -1; + return extractMetrics(::waitStorageMetrics(cx, keys, StorageMetrics(), m, StorageMetrics(), shardLimit, -1)); + } else { + return ::getStorageMetricsLargeKeyRange(cx, keys); + } } ACTOR Future< Standalone> > splitStorageMetrics( Database cx, KeyRange keys, StorageMetrics limit, StorageMetrics estimated ) diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index 0eb470969f..d53adfec52 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -55,9 +55,10 @@ struct NetworkOptions { std::string clusterFile; Optional traceDirectory; uint64_t traceRollSize; - uint64_t traceMaxLogsSize; + uint64_t traceMaxLogsSize; std::string traceLogGroup; std::string traceFormat; + std::string traceClockSource; Optional logClientInfo; Standalone> supportedVersions; bool slowTaskProfilingEnabled; @@ -66,7 +67,7 @@ struct NetworkOptions { NetworkOptions() : localAddress(""), clusterFile(""), traceDirectory(Optional()), traceRollSize(TRACE_DEFAULT_ROLL_SIZE), traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"), - traceFormat("xml"), slowTaskProfilingEnabled(false) {} + traceFormat("xml"), traceClockSource("now"), slowTaskProfilingEnabled(false) {} }; class Database { @@ -244,6 +245,7 @@ public: Future< Void > warmRange( Database cx, KeyRange keys ); Future< std::pair, int> > waitStorageMetrics( KeyRange const& keys, StorageMetrics const& min, StorageMetrics const& max, StorageMetrics const& permittedError, int shardLimit, int expectedShardCount ); + // Pass a negative value for `shardLimit` to indicate no limit on the shard number. Future< StorageMetrics > getStorageMetrics( KeyRange const& keys, int shardLimit ); Future< Standalone> > splitStorageMetrics( KeyRange const& keys, StorageMetrics const& limit, StorageMetrics const& estimated ); diff --git a/fdbclient/RYWIterator.cpp b/fdbclient/RYWIterator.cpp index 3f8decfaab..7e9960b40f 100644 --- a/fdbclient/RYWIterator.cpp +++ b/fdbclient/RYWIterator.cpp @@ -334,31 +334,31 @@ ACTOR Standalone getRange( Transaction* tr, KeySelector begin, K -static void printWriteMap(WriteMap *p) { - WriteMap::iterator it(p); - for (it.skip(allKeys.begin); it.beginKey() < allKeys.end; ++it) { - if (it.is_cleared_range()) { - printf("CLEARED "); - } - if (it.is_conflict_range()) { - printf("CONFLICT "); - } - if (it.is_operation()) { - printf("OPERATION "); - printf(it.is_independent() ? "INDEPENDENT " : "DEPENDENT "); - } - if (it.is_unmodified_range()) { - printf("UNMODIFIED "); - } - if (it.is_unreadable()) { - printf("UNREADABLE "); - } - printf(": \"%s\" -> \"%s\"\n", - printable(it.beginKey().toStandaloneStringRef()).c_str(), - printable(it.endKey().toStandaloneStringRef()).c_str()); - } - printf("\n"); -} +//static void printWriteMap(WriteMap *p) { +// WriteMap::iterator it(p); +// for (it.skip(allKeys.begin); it.beginKey() < allKeys.end; ++it) { +// if (it.is_cleared_range()) { +// printf("CLEARED "); +// } +// if (it.is_conflict_range()) { +// printf("CONFLICT "); +// } +// if (it.is_operation()) { +// printf("OPERATION "); +// printf(it.is_independent() ? "INDEPENDENT " : "DEPENDENT "); +// } +// if (it.is_unmodified_range()) { +// printf("UNMODIFIED "); +// } +// if (it.is_unreadable()) { +// printf("UNREADABLE "); +// } +// printf(": \"%s\" -> \"%s\"\n", +// printable(it.beginKey().toStandaloneStringRef()).c_str(), +// printable(it.endKey().toStandaloneStringRef()).c_str()); +// } +// printf("\n"); +//} static int getWriteMapCount(WriteMap *p) { // printWriteMap(p); diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index 611b0ea41b..0c5c245b81 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -1165,8 +1165,8 @@ Optional getValueFromJSON(StatusObject statusObj) { } } -ACTOR Future> getJSON(Reference clusterFile) { - StatusObject statusObj = wait(StatusClient::statusFetcher(clusterFile)); +ACTOR Future> getJSON(Database db) { + StatusObject statusObj = wait(StatusClient::statusFetcher(db)); return getValueFromJSON(statusObj); } @@ -1194,7 +1194,7 @@ Future< Optional > ReadYourWritesTransaction::get( const Key& key, bool s if (key == LiteralStringRef("\xff\xff/status/json")){ if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) { - return getJSON(tr.getDatabase()->getConnectionFile()); + return getJSON(tr.getDatabase()); } else { return Optional(); @@ -1343,6 +1343,16 @@ Future< Standalone >> ReadYourWritesTransaction::getAddre return result; } +Future ReadYourWritesTransaction::getEstimatedRangeSizeBytes(const KeyRangeRef& keys) { + if(checkUsedDuringCommit()) { + throw used_during_commit(); + } + if( resetPromise.isSet() ) + return resetPromise.getFuture().getError(); + + return map(waitOrError(tr.getStorageMetrics(keys, -1), resetPromise.getFuture()), [](const StorageMetrics& m) { return m.bytes; }); +} + void ReadYourWritesTransaction::addReadConflictRange( KeyRangeRef const& keys ) { if(checkUsedDuringCommit()) { throw used_during_commit(); diff --git a/fdbclient/ReadYourWrites.h b/fdbclient/ReadYourWrites.h index fc766617bf..f4ebd92e4b 100644 --- a/fdbclient/ReadYourWrites.h +++ b/fdbclient/ReadYourWrites.h @@ -84,6 +84,7 @@ public: } [[nodiscard]] Future>> getAddressesForKey(const Key& key); + Future getEstimatedRangeSizeBytes( const KeyRangeRef& keys ); void addReadConflictRange( KeyRangeRef const& keys ); void makeSelfConflicting() { tr.makeSelfConflicting(); } diff --git a/fdbclient/RestoreWorkerInterface.actor.h b/fdbclient/RestoreWorkerInterface.actor.h index cc008cad2d..82c6e9b25d 100644 --- a/fdbclient/RestoreWorkerInterface.actor.h +++ b/fdbclient/RestoreWorkerInterface.actor.h @@ -29,6 +29,7 @@ #define FDBCLIENT_RESTORE_WORKER_INTERFACE_ACTOR_H #include +#include #include "flow/Stats.h" #include "flow/flow.h" #include "fdbrpc/fdbrpc.h" @@ -51,6 +52,7 @@ struct RestoreSendMutationsToAppliersRequest; struct RestoreSendVersionedMutationsRequest; struct RestoreSysInfo; struct RestoreApplierInterface; +struct RestoreFinishRequest; // RestoreSysInfo includes information each (type of) restore roles should know. // At this moment, it only include appliers. We keep the name for future extension. @@ -129,7 +131,7 @@ struct RestoreLoaderInterface : RestoreRoleInterface { RequestStream sendMutations; RequestStream initVersionBatch; RequestStream collectRestoreRoleInterfaces; - RequestStream finishRestore; + RequestStream finishRestore; bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); } bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); } @@ -166,7 +168,7 @@ struct RestoreApplierInterface : RestoreRoleInterface { RequestStream applyToDB; RequestStream initVersionBatch; RequestStream collectRestoreRoleInterfaces; - RequestStream finishRestore; + RequestStream finishRestore; bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); } bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); } @@ -337,6 +339,7 @@ struct RestoreRecruitRoleRequest : TimedRequest { std::string toString() { return printable(); } }; +// Static info. across version batches struct RestoreSysInfoRequest : TimedRequest { constexpr static FileIdentifier file_identifier = 75960741; @@ -364,18 +367,21 @@ struct RestoreLoadFileReply : TimedRequest { LoadingParam param; MutationsVec samples; // sampled mutations + bool isDuplicated; // true if loader thinks the request is a duplicated one RestoreLoadFileReply() = default; - explicit RestoreLoadFileReply(LoadingParam param, MutationsVec samples) : param(param), samples(samples) {} + explicit RestoreLoadFileReply(LoadingParam param, MutationsVec samples, bool isDuplicated) + : param(param), samples(samples), isDuplicated(isDuplicated) {} template void serialize(Ar& ar) { - serializer(ar, param, samples); + serializer(ar, param, samples, isDuplicated); } std::string toString() { std::stringstream ss; - ss << "LoadingParam:" << param.toString() << " samples.size:" << samples.size(); + ss << "LoadingParam:" << param.toString() << " samples.size:" << samples.size() + << " isDuplicated:" << isDuplicated; return ss.str(); } }; @@ -384,21 +390,22 @@ struct RestoreLoadFileReply : TimedRequest { struct RestoreLoadFileRequest : TimedRequest { constexpr static FileIdentifier file_identifier = 26557364; + int batchIndex; LoadingParam param; ReplyPromise reply; RestoreLoadFileRequest() = default; - explicit RestoreLoadFileRequest(LoadingParam& param) : param(param){}; + explicit RestoreLoadFileRequest(int batchIndex, LoadingParam& param) : batchIndex(batchIndex), param(param){}; template void serialize(Ar& ar) { - serializer(ar, param, reply); + serializer(ar, batchIndex, param, reply); } std::string toString() { std::stringstream ss; - ss << "RestoreLoadFileRequest param:" << param.toString(); + ss << "RestoreLoadFileRequest batchIndex:" << batchIndex << " param:" << param.toString(); return ss.str(); } }; @@ -406,24 +413,25 @@ struct RestoreLoadFileRequest : TimedRequest { struct RestoreSendMutationsToAppliersRequest : TimedRequest { constexpr static FileIdentifier file_identifier = 68827305; + int batchIndex; // version batch index std::map rangeToApplier; bool useRangeFile; // Send mutations parsed from range file? ReplyPromise reply; RestoreSendMutationsToAppliersRequest() = default; - explicit RestoreSendMutationsToAppliersRequest(std::map rangeToApplier, bool useRangeFile) - : rangeToApplier(rangeToApplier), useRangeFile(useRangeFile) {} + explicit RestoreSendMutationsToAppliersRequest(int batchIndex, std::map rangeToApplier, bool useRangeFile) + : batchIndex(batchIndex), rangeToApplier(rangeToApplier), useRangeFile(useRangeFile) {} template void serialize(Ar& ar) { - serializer(ar, rangeToApplier, useRangeFile, reply); + serializer(ar, batchIndex, rangeToApplier, useRangeFile, reply); } std::string toString() { std::stringstream ss; - ss << "RestoreSendMutationsToAppliersRequest keyToAppliers.size:" << rangeToApplier.size() - << " useRangeFile:" << useRangeFile; + ss << "RestoreSendMutationsToAppliersRequest batchIndex:" << batchIndex + << " keyToAppliers.size:" << rangeToApplier.size() << " useRangeFile:" << useRangeFile; return ss.str(); } }; @@ -431,6 +439,7 @@ struct RestoreSendMutationsToAppliersRequest : TimedRequest { struct RestoreSendVersionedMutationsRequest : TimedRequest { constexpr static FileIdentifier file_identifier = 69764565; + int batchIndex; // version batch index RestoreAsset asset; // Unique identifier for the current restore asset Version prevVersion, version; // version is the commitVersion of the mutation vector. @@ -440,41 +449,65 @@ struct RestoreSendVersionedMutationsRequest : TimedRequest { ReplyPromise reply; RestoreSendVersionedMutationsRequest() = default; - explicit RestoreSendVersionedMutationsRequest(const RestoreAsset& asset, Version prevVersion, Version version, - bool isRangeFile, MutationsVec mutations) - : asset(asset), prevVersion(prevVersion), version(version), isRangeFile(isRangeFile), mutations(mutations) {} + explicit RestoreSendVersionedMutationsRequest(int batchIndex, const RestoreAsset& asset, Version prevVersion, + Version version, bool isRangeFile, MutationsVec mutations) + : batchIndex(batchIndex), asset(asset), prevVersion(prevVersion), version(version), isRangeFile(isRangeFile), + mutations(mutations) {} std::string toString() { std::stringstream ss; - ss << "RestoreAsset:" << asset.toString() << " prevVersion:" << prevVersion << " version:" << version - << " isRangeFile:" << isRangeFile << " mutations.size:" << mutations.size(); + ss << "VersionBatchIndex:" << batchIndex << "RestoreAsset:" << asset.toString() + << " prevVersion:" << prevVersion << " version:" << version << " isRangeFile:" << isRangeFile + << " mutations.size:" << mutations.size(); return ss.str(); } template void serialize(Ar& ar) { - serializer(ar, asset, prevVersion, version, isRangeFile, mutations, reply); + serializer(ar, batchIndex, asset, prevVersion, version, isRangeFile, mutations, reply); } }; struct RestoreVersionBatchRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 13018413; + constexpr static FileIdentifier file_identifier = 97223537; - int batchID; + int batchIndex; ReplyPromise reply; RestoreVersionBatchRequest() = default; - explicit RestoreVersionBatchRequest(int batchID) : batchID(batchID) {} + explicit RestoreVersionBatchRequest(int batchIndex) : batchIndex(batchIndex) {} template void serialize(Ar& ar) { - serializer(ar, batchID, reply); + serializer(ar, batchIndex, reply); } std::string toString() { std::stringstream ss; - ss << "RestoreVersionBatchRequest BatchID:" << batchID; + ss << "RestoreVersionBatchRequest batchIndex:" << batchIndex; + return ss.str(); + } +}; + +struct RestoreFinishRequest : TimedRequest { + constexpr static FileIdentifier file_identifier = 13018413; + + bool terminate; // role exits if terminate = true + + ReplyPromise reply; + + RestoreFinishRequest() = default; + explicit RestoreFinishRequest(bool terminate) : terminate(terminate) {} + + template + void serialize(Ar& ar) { + serializer(ar, terminate, reply); + } + + std::string toString() { + std::stringstream ss; + ss << "RestoreFinishRequest terminate:" << terminate; return ss.str(); } }; @@ -530,7 +563,7 @@ std::string getRoleStr(RestoreRole role); ////--- Interface functions ACTOR Future _restoreWorker(Database cx, LocalityData locality); -ACTOR Future restoreWorker(Reference ccf, LocalityData locality); +ACTOR Future restoreWorker(Reference ccf, LocalityData locality, std::string coordFolder); #include "flow/unactorcompiler.h" #endif diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index e855b07faa..c4253bd0af 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -590,7 +590,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "auto_proxies":3, "auto_resolvers":1, "auto_logs":3, - "proxies":5 + "proxies":5, + "backup_worker_enabled":1 }, "data":{ "least_operating_space_bytes_log_server":0, diff --git a/fdbclient/Status.h b/fdbclient/Status.h index 6d7384abfb..8a6e49ff25 100644 --- a/fdbclient/Status.h +++ b/fdbclient/Status.h @@ -68,7 +68,7 @@ struct StatusValue : json_spirit::mValue { StatusValue(json_spirit::mValue const& o) : json_spirit::mValue(o) {} }; -static StatusObject makeMessage(const char *name, const char *description) { +inline StatusObject makeMessage(const char *name, const char *description) { StatusObject out; out["name"] = name; out["description"] = description; @@ -88,7 +88,7 @@ template <> inline bool JSONDoc::get(const std::string path, StatusObje } // Takes an object by reference so make usage look clean and avoid the client doing object["messages"] which will create the key. -static bool findMessagesByName(StatusObjectReader object, std::set to_find) { +inline bool findMessagesByName(StatusObjectReader object, std::set to_find) { if (!object.has("messages") || object.last().type() != json_spirit::array_type) return false; diff --git a/fdbclient/StatusClient.actor.cpp b/fdbclient/StatusClient.actor.cpp index ae7aef1190..ab5dce1aa9 100644 --- a/fdbclient/StatusClient.actor.cpp +++ b/fdbclient/StatusClient.actor.cpp @@ -451,7 +451,7 @@ StatusObject getClientDatabaseStatus(StatusObjectReader client, StatusObjectRead return databaseStatus; } -ACTOR Future statusFetcherImpl( Reference f ) { +ACTOR Future statusFetcherImpl( Reference f, Reference>> clusterInterface) { if (!g_network) throw network_not_setup(); state StatusObject statusObj; @@ -461,13 +461,10 @@ ACTOR Future statusFetcherImpl( Reference f // This could be read from the JSON but doing so safely is ugly so using a real var. state bool quorum_reachable = false; state int coordinatorsFaultTolerance = 0; - state Reference>> clusterInterface(new AsyncVar>); try { state int64_t clientTime = time(0); - state Future leaderMon = monitorLeader(f, clusterInterface); - StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance)); statusObjClient = _statusObjClient; @@ -547,6 +544,23 @@ ACTOR Future statusFetcherImpl( Reference f return statusObj; } -Future StatusClient::statusFetcher( Reference clusterFile ) { - return statusFetcherImpl(clusterFile); +ACTOR Future timeoutMonitorLeader(Database db) { + state Future leadMon = monitorLeader(db->getConnectionFile(), db->statusClusterInterface); + loop { + wait(delay(CLIENT_KNOBS->STATUS_IDLE_TIMEOUT + 0.00001 + db->lastStatusFetch - now())); + if(now() - db->lastStatusFetch > CLIENT_KNOBS->STATUS_IDLE_TIMEOUT) { + db->statusClusterInterface = Reference>>(); + return Void(); + } + } +} + +Future StatusClient::statusFetcher( Database db ) { + db->lastStatusFetch = now(); + if(!db->statusClusterInterface) { + db->statusClusterInterface = Reference>>(new AsyncVar>); + db->statusLeaderMon = timeoutMonitorLeader(db); + } + + return statusFetcherImpl(db->getConnectionFile(), db->statusClusterInterface); } diff --git a/fdbclient/StatusClient.h b/fdbclient/StatusClient.h index 5a78b9b20f..6b780163a4 100755 --- a/fdbclient/StatusClient.h +++ b/fdbclient/StatusClient.h @@ -23,11 +23,12 @@ #include "flow/flow.h" #include "fdbclient/Status.h" +#include "fdbclient/DatabaseContext.h" class StatusClient { public: enum StatusLevel { MINIMAL = 0, NORMAL = 1, DETAILED = 2, JSON = 3 }; - static Future statusFetcher(Reference clusterFile); + static Future statusFetcher(Database db); }; #endif \ No newline at end of file diff --git a/fdbclient/StorageServerInterface.h b/fdbclient/StorageServerInterface.h index c706978f67..67f3bd5e57 100644 --- a/fdbclient/StorageServerInterface.h +++ b/fdbclient/StorageServerInterface.h @@ -394,7 +394,7 @@ struct SplitMetricsRequest { struct GetStorageMetricsReply { constexpr static FileIdentifier file_identifier = 15491478; StorageMetrics load; - StorageMetrics free; + StorageMetrics available; StorageMetrics capacity; double bytesInputRate; @@ -402,7 +402,7 @@ struct GetStorageMetricsReply { template void serialize(Ar& ar) { - serializer(ar, load, free, capacity, bytesInputRate); + serializer(ar, load, available, capacity, bytesInputRate); } }; diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index a9df1ec223..96d1c7e5d7 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -580,6 +580,19 @@ WorkerBackupStatus decodeBackupProgressValue(const ValueRef& value) { return status; } +Value encodeBackupStartedValue(const std::vector>& ids) { + BinaryWriter wr(IncludeVersion()); + wr << ids; + return wr.toValue(); +} + +std::vector> decodeBackupStartedValue(const ValueRef& value) { + std::vector> ids; + BinaryReader reader(value, IncludeVersion()); + if (value.size() > 0) reader >> ids; + return ids; +} + const KeyRef coordinatorsKey = LiteralStringRef("\xff/coordinators"); const KeyRef logsKey = LiteralStringRef("\xff/logs"); const KeyRef minRequiredCommitVersionKey = LiteralStringRef("\xff/minRequiredCommitVersion"); @@ -794,20 +807,22 @@ const KeyRangeRef restoreApplierKeys(LiteralStringRef("\xff\x02/restoreApplier/" const KeyRef restoreApplierTxnValue = LiteralStringRef("1"); // restoreApplierKeys: track atomic transaction progress to ensure applying atomicOp exactly once -// Version is passed in as LittleEndian, it must be converted to BigEndian to maintain ordering in lexical order -const Key restoreApplierKeyFor(UID const& applierID, Version version) { +// Version and batchIndex are passed in as LittleEndian, +// they must be converted to BigEndian to maintain ordering in lexical order +const Key restoreApplierKeyFor(UID const& applierID, int64_t batchIndex, Version version) { BinaryWriter wr(Unversioned()); wr.serializeBytes(restoreApplierKeys.begin); - wr << applierID << bigEndian64(version); + wr << applierID << bigEndian64(batchIndex) << bigEndian64(version); return wr.toValue(); } -std::pair decodeRestoreApplierKey(ValueRef const& key) { +std::tuple decodeRestoreApplierKey(ValueRef const& key) { BinaryReader rd(key, Unversioned()); UID applierID; + int64_t batchIndex; Version version; - rd >> applierID >> version; - return std::make_pair(applierID, bigEndian64(version)); + rd >> applierID >> batchIndex >> version; + return std::make_tuple(applierID, bigEndian64(batchIndex), bigEndian64(version)); } // Encode restore worker key for workerID diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 4aaf63978f..30b69941fa 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -188,8 +188,10 @@ const Value backupProgressValue(const WorkerBackupStatus& status); UID decodeBackupProgressKey(const KeyRef& key); WorkerBackupStatus decodeBackupProgressValue(const ValueRef& value); -// "\xff/backupStarted" +// "\xff/backupStarted" := "[[vector]]" extern const KeyRef backupStartedKey; +Value encodeBackupStartedValue(const std::vector>& ids); +std::vector> decodeBackupStartedValue(const ValueRef& value); extern const KeyRef coordinatorsKey; extern const KeyRef logsKey; @@ -334,8 +336,8 @@ extern const KeyRangeRef restoreRequestKeys; extern const KeyRangeRef restoreApplierKeys; extern const KeyRef restoreApplierTxnValue; -const Key restoreApplierKeyFor(UID const& applierID, Version version); -std::pair decodeRestoreApplierKey(ValueRef const& key); +const Key restoreApplierKeyFor(UID const& applierID, int64_t batchIndex, Version version); +std::tuple decodeRestoreApplierKey(ValueRef const& key); const Key restoreWorkerKeyFor(UID const& workerID); const Value restoreWorkerInterfaceValue(RestoreWorkerInterface const& server); RestoreWorkerInterface decodeRestoreWorkerInterfaceValue(ValueRef const& value); diff --git a/fdbclient/TaskBucket.actor.cpp b/fdbclient/TaskBucket.actor.cpp index f40b0090a9..66d3b9571f 100644 --- a/fdbclient/TaskBucket.actor.cpp +++ b/fdbclient/TaskBucket.actor.cpp @@ -230,7 +230,7 @@ public: return task; } - // Verify that the user configured task verification key still has the user specificied value + // Verify that the user configured task verification key still has the user specified value ACTOR static Future taskVerify(Reference tb, Reference tr, Reference task) { if (task->params.find(Task::reservedTaskParamValidKey) == task->params.end()) { diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp index c71482b3b9..afafa8747c 100644 --- a/fdbclient/ThreadSafeTransaction.actor.cpp +++ b/fdbclient/ThreadSafeTransaction.actor.cpp @@ -64,9 +64,9 @@ void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional< Standalone> passValue = value; // ThreadSafeDatabase is not allowed to do anything with options except pass them through to RYW. - onMainThreadVoid( [db, option, passValue](){ + onMainThreadVoid( [db, option, passValue](){ db->checkDeferredError(); - db->setOption(option, passValue.contents()); + db->setOption(option, passValue.contents()); }, &db->deferredError ); } @@ -77,7 +77,7 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) // but run its constructor on the main thread DatabaseContext *db = this->db = DatabaseContext::allocateOnForeignThread(); - onMainThreadVoid([db, connFile, apiVersion](){ + onMainThreadVoid([db, connFile, apiVersion](){ try { Database::createDatabase(Reference(connFile), apiVersion, false, LocalityData(), db).extractPtr(); } @@ -157,6 +157,17 @@ ThreadFuture< Key > ThreadSafeTransaction::getKey( const KeySelectorRef& key, bo } ); } +ThreadFuture ThreadSafeTransaction::getEstimatedRangeSizeBytes( const KeyRangeRef& keys ) { + KeyRange r = keys; + + ReadYourWritesTransaction *tr = this->tr; + return onMainThread( [tr, r]() -> Future { + tr->checkDeferredError(); + return tr->getEstimatedRangeSizeBytes(r); + } ); +} + + ThreadFuture< Standalone > ThreadSafeTransaction::getRange( const KeySelectorRef& begin, const KeySelectorRef& end, int limit, bool snapshot, bool reverse ) { KeySelector b = begin; KeySelector e = end; @@ -292,7 +303,7 @@ void ThreadSafeTransaction::setOption( FDBTransactionOptions::Option option, Opt TraceEvent("UnknownTransactionOption").detail("Option", option); throw invalid_option(); } - + ReadYourWritesTransaction *tr = this->tr; Standalone> passValue = value; diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index c5832cec45..61b64aa4b4 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -71,6 +71,7 @@ public: } ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; + ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; void addReadConflictRange( const KeyRangeRef& keys ) override; void makeSelfConflicting(); diff --git a/fdbclient/md5/md5.c b/fdbclient/md5/md5.c index 52d96accd3..1032ccfdaf 100644 --- a/fdbclient/md5/md5.c +++ b/fdbclient/md5/md5.c @@ -35,7 +35,7 @@ * compile-time configuration. */ -#ifndef HAVE_OPENSSL +#if !defined(HAVE_OPENSSL) || defined(TLS_DISABLED) #include diff --git a/fdbclient/md5/md5.h b/fdbclient/md5/md5.h index e73fb29c35..5731872376 100644 --- a/fdbclient/md5/md5.h +++ b/fdbclient/md5/md5.h @@ -23,7 +23,7 @@ * See md5.c for more information. */ -#ifdef HAVE_OPENSSL +#if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED) #include #elif !defined(_MD5_H) #define _MD5_H diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index 890dea4864..5be79357e8 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -51,6 +51,9 @@ description is not currently required but encouraged.