Merge remote-tracking branch 'origin/master' into run-minio-joshua
This commit is contained in:
commit
51cbc494f1
|
@ -38,7 +38,7 @@ ITLSPolicy *FDBLibTLSPlugin::create_policy() {
|
|||
if (rc < 0) {
|
||||
// Log the failure from tls_init during our constructor.
|
||||
TraceEvent(SevError, "FDBLibTLSInitError").detail("LibTLSErrorMessage", "failed to initialize libtls");
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
return new FDBLibTLSPolicy(Reference<FDBLibTLSPlugin>::addRef(this));
|
||||
}
|
||||
|
@ -47,5 +47,5 @@ extern "C" BOOST_SYMBOL_EXPORT void *get_tls_plugin(const char *plugin_type_name
|
|||
if (strcmp(plugin_type_name_and_version, FDBLibTLSPlugin::get_plugin_type_name_and_version()) == 0) {
|
||||
return new FDBLibTLSPlugin;
|
||||
}
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -37,11 +37,11 @@
|
|||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
FDBLibTLSPolicy::FDBLibTLSPolicy(Reference<FDBLibTLSPlugin> plugin):
|
||||
plugin(plugin), tls_cfg(NULL), roots(NULL), session_created(false), ca_data_set(false),
|
||||
cert_data_set(false), key_data_set(false), verify_peers_set(false) {
|
||||
FDBLibTLSPolicy::FDBLibTLSPolicy(Reference<FDBLibTLSPlugin> plugin)
|
||||
: plugin(plugin), tls_cfg(nullptr), roots(nullptr), session_created(false), ca_data_set(false), cert_data_set(false),
|
||||
key_data_set(false), verify_peers_set(false) {
|
||||
|
||||
if ((tls_cfg = tls_config_new()) == NULL) {
|
||||
if ((tls_cfg = tls_config_new()) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSConfigError");
|
||||
throw std::runtime_error("FDBLibTLSConfigError");
|
||||
}
|
||||
|
@ -55,29 +55,31 @@ FDBLibTLSPolicy::~FDBLibTLSPolicy() {
|
|||
tls_config_free(tls_cfg);
|
||||
}
|
||||
|
||||
ITLSSession* FDBLibTLSPolicy::create_session(bool is_client, const char* servername, TLSSendCallbackFunc send_func, void* send_ctx, TLSRecvCallbackFunc recv_func, void* recv_ctx, void* uid) {
|
||||
ITLSSession* FDBLibTLSPolicy::create_session(bool is_client, const char* servername, TLSSendCallbackFunc send_func,
|
||||
void* send_ctx, TLSRecvCallbackFunc recv_func, void* recv_ctx, void* uid) {
|
||||
if (is_client) {
|
||||
// If verify peers has been set then there is no point specifying a
|
||||
// servername, since this will be ignored - the servername should be
|
||||
// matched by the verify criteria instead.
|
||||
if (verify_peers_set && servername != NULL) {
|
||||
if (verify_peers_set && servername != nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSVerifyPeersWithServerName");
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// If verify peers has not been set, then require a server name to
|
||||
// avoid an accidental lack of name validation.
|
||||
if (!verify_peers_set && servername == NULL) {
|
||||
if (!verify_peers_set && servername == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSNoServerName");
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
session_created = true;
|
||||
try {
|
||||
return new FDBLibTLSSession(Reference<FDBLibTLSPolicy>::addRef(this), is_client, servername, send_func, send_ctx, recv_func, recv_ctx, uid);
|
||||
return new FDBLibTLSSession(Reference<FDBLibTLSPolicy>::addRef(this), is_client, servername, send_func,
|
||||
send_ctx, recv_func, recv_ctx, uid);
|
||||
} catch ( ... ) {
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,8 +89,7 @@ static int password_cb(char *buf, int size, int rwflag, void *u) {
|
|||
|
||||
if (size < 0)
|
||||
return 0;
|
||||
if (u == NULL)
|
||||
return 0;
|
||||
if (u == nullptr) return 0;
|
||||
|
||||
plen = strlen(password);
|
||||
if (plen > size)
|
||||
|
@ -102,24 +103,24 @@ static int password_cb(char *buf, int size, int rwflag, void *u) {
|
|||
}
|
||||
|
||||
struct stack_st_X509* FDBLibTLSPolicy::parse_cert_pem(const uint8_t* cert_pem, size_t cert_pem_len) {
|
||||
struct stack_st_X509 *certs = NULL;
|
||||
X509 *cert = NULL;
|
||||
BIO *bio = NULL;
|
||||
struct stack_st_X509* certs = nullptr;
|
||||
X509* cert = nullptr;
|
||||
BIO* bio = nullptr;
|
||||
int errnum;
|
||||
|
||||
if (cert_pem_len > INT_MAX)
|
||||
goto err;
|
||||
if ((bio = BIO_new_mem_buf((void *)cert_pem, cert_pem_len)) == NULL) {
|
||||
if ((bio = BIO_new_mem_buf((void*)cert_pem, cert_pem_len)) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSOutOfMemory");
|
||||
goto err;
|
||||
}
|
||||
if ((certs = sk_X509_new_null()) == NULL) {
|
||||
if ((certs = sk_X509_new_null()) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSOutOfMemory");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ERR_clear_error();
|
||||
while ((cert = PEM_read_bio_X509(bio, NULL, password_cb, NULL)) != NULL) {
|
||||
while ((cert = PEM_read_bio_X509(bio, nullptr, password_cb, nullptr)) != nullptr) {
|
||||
if (!sk_X509_push(certs, cert)) {
|
||||
TraceEvent(SevError, "FDBLibTLSOutOfMemory");
|
||||
goto err;
|
||||
|
@ -150,7 +151,7 @@ struct stack_st_X509* FDBLibTLSPolicy::parse_cert_pem(const uint8_t* cert_pem, s
|
|||
X509_free(cert);
|
||||
BIO_free(bio);
|
||||
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool FDBLibTLSPolicy::set_ca_data(const uint8_t* ca_data, int ca_len) {
|
||||
|
@ -166,8 +167,7 @@ bool FDBLibTLSPolicy::set_ca_data(const uint8_t* ca_data, int ca_len) {
|
|||
if (ca_len < 0)
|
||||
return false;
|
||||
sk_X509_pop_free(roots, X509_free);
|
||||
if ((roots = parse_cert_pem(ca_data, ca_len)) == NULL)
|
||||
return false;
|
||||
if ((roots = parse_cert_pem(ca_data, ca_len)) == nullptr) return false;
|
||||
|
||||
if (tls_config_set_ca_mem(tls_cfg, ca_data, ca_len) == -1) {
|
||||
TraceEvent(SevError, "FDBLibTLSCAError").detail("LibTLSErrorMessage", tls_config_error(tls_cfg));
|
||||
|
@ -200,8 +200,8 @@ bool FDBLibTLSPolicy::set_cert_data(const uint8_t* cert_data, int cert_len) {
|
|||
}
|
||||
|
||||
bool FDBLibTLSPolicy::set_key_data(const uint8_t* key_data, int key_len, const char* password) {
|
||||
EVP_PKEY *key = NULL;
|
||||
BIO *bio = NULL;
|
||||
EVP_PKEY* key = nullptr;
|
||||
BIO* bio = nullptr;
|
||||
bool rc = false;
|
||||
|
||||
if (key_data_set) {
|
||||
|
@ -213,16 +213,16 @@ bool FDBLibTLSPolicy::set_key_data(const uint8_t* key_data, int key_len, const c
|
|||
goto err;
|
||||
}
|
||||
|
||||
if (password != NULL) {
|
||||
if (password != nullptr) {
|
||||
char *data;
|
||||
long len;
|
||||
|
||||
if ((bio = BIO_new_mem_buf((void *)key_data, key_len)) == NULL) {
|
||||
if ((bio = BIO_new_mem_buf((void*)key_data, key_len)) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSOutOfMemory");
|
||||
goto err;
|
||||
}
|
||||
ERR_clear_error();
|
||||
if ((key = PEM_read_bio_PrivateKey(bio, NULL, password_cb, (void *)password)) == NULL) {
|
||||
if ((key = PEM_read_bio_PrivateKey(bio, nullptr, password_cb, (void*)password)) == nullptr) {
|
||||
int errnum = ERR_peek_error();
|
||||
char errbuf[256];
|
||||
|
||||
|
@ -236,11 +236,11 @@ bool FDBLibTLSPolicy::set_key_data(const uint8_t* key_data, int key_len, const c
|
|||
goto err;
|
||||
}
|
||||
BIO_free(bio);
|
||||
if ((bio = BIO_new(BIO_s_mem())) == NULL) {
|
||||
if ((bio = BIO_new(BIO_s_mem())) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSOutOfMemory");
|
||||
goto err;
|
||||
}
|
||||
if (!PEM_write_bio_PrivateKey(bio, key, NULL, NULL, 0, NULL, NULL)) {
|
||||
if (!PEM_write_bio_PrivateKey(bio, key, nullptr, nullptr, 0, nullptr, nullptr)) {
|
||||
TraceEvent(SevError, "FDBLibTLSOutOfMemory");
|
||||
goto err;
|
||||
}
|
||||
|
|
|
@ -60,14 +60,16 @@ static ssize_t tls_write_func(struct tls *ctx, const void *buf, size_t buflen, v
|
|||
return (ssize_t)rv;
|
||||
}
|
||||
|
||||
FDBLibTLSSession::FDBLibTLSSession(Reference<FDBLibTLSPolicy> policy, bool is_client, const char* servername, TLSSendCallbackFunc send_func, void* send_ctx, TLSRecvCallbackFunc recv_func, void* recv_ctx, void* uidptr) :
|
||||
tls_ctx(NULL), tls_sctx(NULL), is_client(is_client), policy(policy), send_func(send_func), send_ctx(send_ctx),
|
||||
recv_func(recv_func), recv_ctx(recv_ctx), handshake_completed(false), lastVerifyFailureLogged(0.0) {
|
||||
FDBLibTLSSession::FDBLibTLSSession(Reference<FDBLibTLSPolicy> policy, bool is_client, const char* servername,
|
||||
TLSSendCallbackFunc send_func, void* send_ctx, TLSRecvCallbackFunc recv_func,
|
||||
void* recv_ctx, void* uidptr)
|
||||
: tls_ctx(nullptr), tls_sctx(nullptr), is_client(is_client), policy(policy), send_func(send_func), send_ctx(send_ctx),
|
||||
recv_func(recv_func), recv_ctx(recv_ctx), handshake_completed(false), lastVerifyFailureLogged(0.0) {
|
||||
if (uidptr)
|
||||
uid = * (UID*) uidptr;
|
||||
|
||||
if (is_client) {
|
||||
if ((tls_ctx = tls_client()) == NULL) {
|
||||
if ((tls_ctx = tls_client()) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSClientError", uid);
|
||||
throw std::runtime_error("FDBLibTLSClientError");
|
||||
}
|
||||
|
@ -82,7 +84,7 @@ FDBLibTLSSession::FDBLibTLSSession(Reference<FDBLibTLSPolicy> policy, bool is_cl
|
|||
throw std::runtime_error("FDBLibTLSConnectError");
|
||||
}
|
||||
} else {
|
||||
if ((tls_sctx = tls_server()) == NULL) {
|
||||
if ((tls_sctx = tls_server()) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSServerError", uid);
|
||||
throw std::runtime_error("FDBLibTLSServerError");
|
||||
}
|
||||
|
@ -108,14 +110,13 @@ FDBLibTLSSession::~FDBLibTLSSession() {
|
|||
|
||||
bool match_criteria_entry(const std::string& criteria, ASN1_STRING* entry, MatchType mt) {
|
||||
bool rc = false;
|
||||
ASN1_STRING* asn_criteria = NULL;
|
||||
unsigned char* criteria_utf8 = NULL;
|
||||
ASN1_STRING* asn_criteria = nullptr;
|
||||
unsigned char* criteria_utf8 = nullptr;
|
||||
int criteria_utf8_len = 0;
|
||||
unsigned char* entry_utf8 = NULL;
|
||||
unsigned char* entry_utf8 = nullptr;
|
||||
int entry_utf8_len = 0;
|
||||
|
||||
if ((asn_criteria = ASN1_IA5STRING_new()) == NULL)
|
||||
goto err;
|
||||
if ((asn_criteria = ASN1_IA5STRING_new()) == nullptr) goto err;
|
||||
if (ASN1_STRING_set(asn_criteria, criteria.c_str(), criteria.size()) != 1)
|
||||
goto err;
|
||||
if ((criteria_utf8_len = ASN1_STRING_to_UTF8(&criteria_utf8, asn_criteria)) < 1)
|
||||
|
@ -152,8 +153,7 @@ bool match_name_criteria(X509_NAME *name, NID nid, const std::string& criteria,
|
|||
return false;
|
||||
if (X509_NAME_get_index_by_NID(name, nid, idx) != -1)
|
||||
return false;
|
||||
if ((name_entry = X509_NAME_get_entry(name, idx)) == NULL)
|
||||
return false;
|
||||
if ((name_entry = X509_NAME_get_entry(name, idx)) == nullptr) return false;
|
||||
|
||||
return match_criteria_entry(criteria, name_entry->value, mt);
|
||||
}
|
||||
|
@ -169,8 +169,9 @@ bool match_extension_criteria(X509 *cert, NID nid, const std::string& value, Mat
|
|||
}
|
||||
std::string value_gen = value.substr(0, pos);
|
||||
std::string value_val = value.substr(pos+1, value.npos);
|
||||
STACK_OF(GENERAL_NAME)* sans = reinterpret_cast<STACK_OF(GENERAL_NAME)*>(X509_get_ext_d2i(cert, nid, NULL, NULL));
|
||||
if (sans == NULL) {
|
||||
STACK_OF(GENERAL_NAME)* sans =
|
||||
reinterpret_cast<STACK_OF(GENERAL_NAME)*>(X509_get_ext_d2i(cert, nid, nullptr, nullptr));
|
||||
if (sans == nullptr) {
|
||||
return false;
|
||||
}
|
||||
int num_sans = sk_GENERAL_NAME_num( sans );
|
||||
|
@ -231,10 +232,10 @@ bool match_criteria(X509* cert, X509_NAME* subject, NID nid, const std::string&
|
|||
}
|
||||
|
||||
std::tuple<bool,std::string> FDBLibTLSSession::check_verify(Reference<FDBLibTLSVerify> verify, struct stack_st_X509 *certs) {
|
||||
X509_STORE_CTX *store_ctx = NULL;
|
||||
X509_STORE_CTX* store_ctx = nullptr;
|
||||
X509_NAME *subject, *issuer;
|
||||
bool rc = false;
|
||||
X509* cert = NULL;
|
||||
X509* cert = nullptr;
|
||||
// if returning false, give a reason string
|
||||
std::string reason = "";
|
||||
|
||||
|
@ -243,12 +244,12 @@ std::tuple<bool,std::string> FDBLibTLSSession::check_verify(Reference<FDBLibTLSV
|
|||
return std::make_tuple(true, reason);
|
||||
|
||||
// Verify the certificate.
|
||||
if ((store_ctx = X509_STORE_CTX_new()) == NULL) {
|
||||
if ((store_ctx = X509_STORE_CTX_new()) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSOutOfMemory", uid);
|
||||
reason = "Out of memory";
|
||||
goto err;
|
||||
}
|
||||
if (!X509_STORE_CTX_init(store_ctx, NULL, sk_X509_value(certs, 0), certs)) {
|
||||
if (!X509_STORE_CTX_init(store_ctx, nullptr, sk_X509_value(certs, 0), certs)) {
|
||||
reason = "Store ctx init";
|
||||
goto err;
|
||||
}
|
||||
|
@ -264,7 +265,7 @@ std::tuple<bool,std::string> FDBLibTLSSession::check_verify(Reference<FDBLibTLSV
|
|||
|
||||
// Check subject criteria.
|
||||
cert = sk_X509_value(store_ctx->chain, 0);
|
||||
if ((subject = X509_get_subject_name(cert)) == NULL) {
|
||||
if ((subject = X509_get_subject_name(cert)) == nullptr) {
|
||||
reason = "Cert subject error";
|
||||
goto err;
|
||||
}
|
||||
|
@ -276,7 +277,7 @@ std::tuple<bool,std::string> FDBLibTLSSession::check_verify(Reference<FDBLibTLSV
|
|||
}
|
||||
|
||||
// Check issuer criteria.
|
||||
if ((issuer = X509_get_issuer_name(cert)) == NULL) {
|
||||
if ((issuer = X509_get_issuer_name(cert)) == nullptr) {
|
||||
reason = "Cert issuer error";
|
||||
goto err;
|
||||
}
|
||||
|
@ -289,7 +290,7 @@ std::tuple<bool,std::string> FDBLibTLSSession::check_verify(Reference<FDBLibTLSV
|
|||
|
||||
// Check root criteria - this is the subject of the final certificate in the stack.
|
||||
cert = sk_X509_value(store_ctx->chain, sk_X509_num(store_ctx->chain) - 1);
|
||||
if ((subject = X509_get_subject_name(cert)) == NULL) {
|
||||
if ((subject = X509_get_subject_name(cert)) == nullptr) {
|
||||
reason = "Root subject error";
|
||||
goto err;
|
||||
}
|
||||
|
@ -310,7 +311,7 @@ std::tuple<bool,std::string> FDBLibTLSSession::check_verify(Reference<FDBLibTLSV
|
|||
}
|
||||
|
||||
bool FDBLibTLSSession::verify_peer() {
|
||||
struct stack_st_X509 *certs = NULL;
|
||||
struct stack_st_X509* certs = nullptr;
|
||||
const uint8_t *cert_pem;
|
||||
size_t cert_pem_len;
|
||||
bool rc = false;
|
||||
|
@ -323,12 +324,11 @@ bool FDBLibTLSSession::verify_peer() {
|
|||
if (policy->verify_rules.empty())
|
||||
return true;
|
||||
|
||||
if ((cert_pem = tls_peer_cert_chain_pem(tls_ctx, &cert_pem_len)) == NULL) {
|
||||
if ((cert_pem = tls_peer_cert_chain_pem(tls_ctx, &cert_pem_len)) == nullptr) {
|
||||
TraceEvent(SevError, "FDBLibTLSNoCertError", uid);
|
||||
goto err;
|
||||
}
|
||||
if ((certs = policy->parse_cert_pem(cert_pem, cert_pem_len)) == NULL)
|
||||
goto err;
|
||||
if ((certs = policy->parse_cert_pem(cert_pem, cert_pem_len)) == nullptr) goto err;
|
||||
|
||||
// Any matching rule is sufficient.
|
||||
for (auto &verify_rule: policy->verify_rules) {
|
||||
|
|
|
@ -147,7 +147,7 @@ static NID abbrevToNID(std::string const& sn) {
|
|||
|
||||
static X509Location locationForNID(NID nid) {
|
||||
const char* name = OBJ_nid2ln(nid);
|
||||
if (name == NULL) {
|
||||
if (name == nullptr) {
|
||||
throw std::runtime_error("locationForNID");
|
||||
}
|
||||
if (strncmp(name, "X509v3", 6) == 0) {
|
||||
|
|
|
@ -111,6 +111,12 @@ if(NOT WIN32)
|
|||
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
|
||||
target_link_libraries(mako PRIVATE fdb_c)
|
||||
|
||||
# Make sure that fdb_c.h is compatible with c90
|
||||
add_executable(fdb_c90_test test/fdb_c90_test.c)
|
||||
set_property(TARGET fdb_c90_test PROPERTY C_STANDARD 90)
|
||||
target_compile_options(fdb_c90_test PRIVATE -Wall -Wextra -Wpedantic -Werror)
|
||||
target_link_libraries(fdb_c90_test PRIVATE fdb_c)
|
||||
|
||||
add_fdbclient_test(
|
||||
NAME fdb_c_setup_tests
|
||||
COMMAND $<TARGET_FILE:fdb_c_setup_tests>)
|
||||
|
|
|
@ -389,6 +389,10 @@ fdb_error_t fdb_database_create_transaction( FDBDatabase* d,
|
|||
*out_transaction = (FDBTransaction*)tr.extractPtr(); );
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT FDBFuture* fdb_database_reboot_worker(FDBDatabase* db, uint8_t const* address, int address_length,
|
||||
fdb_bool_t check, int duration) {
|
||||
return (FDBFuture*)(DB(db)->rebootWorker(StringRef(address, address_length), check, duration).extractPtr());
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT
|
||||
void fdb_transaction_destroy( FDBTransaction* tr ) {
|
||||
|
|
|
@ -45,12 +45,17 @@
|
|||
#define WARN_UNUSED_RESULT
|
||||
#endif
|
||||
|
||||
// With default settings, gcc will not warn about unprototyped functions being called, so it
|
||||
// is easy to erroneously call a function which is not available at FDB_API_VERSION and then
|
||||
// get an error only at runtime. These macros ensure a compile error in such cases, and
|
||||
// attempt to make the compile error slightly informative.
|
||||
#define This_FoundationDB_API_function_is_removed_at_this_FDB_API_VERSION() [=====]
|
||||
#define FDB_REMOVED_FUNCTION This_FoundationDB_API_function_is_removed_at_this_FDB_API_VERSION(0)
|
||||
/*
|
||||
* With default settings, gcc will not warn about unprototyped functions being
|
||||
* called, so it is easy to erroneously call a function which is not available
|
||||
* at FDB_API_VERSION and then get an error only at runtime. These macros
|
||||
* ensure a compile error in such cases, and attempt to make the compile error
|
||||
* slightly informative.
|
||||
*/
|
||||
#define This_FoundationDB_API_function_is_removed_at_this_FDB_API_VERSION() \
|
||||
[== == = ]
|
||||
#define FDB_REMOVED_FUNCTION \
|
||||
This_FoundationDB_API_function_is_removed_at_this_FDB_API_VERSION(0)
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -173,6 +178,10 @@ extern "C" {
|
|||
fdb_database_create_transaction( FDBDatabase* d,
|
||||
FDBTransaction** out_transaction );
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
|
||||
fdb_database_reboot_worker( FDBDatabase* db, uint8_t const* address,
|
||||
int address_length, fdb_bool_t check, int duration);
|
||||
|
||||
DLLEXPORT void fdb_transaction_destroy( FDBTransaction* tr);
|
||||
|
||||
DLLEXPORT void fdb_transaction_cancel( FDBTransaction* tr);
|
||||
|
@ -244,12 +253,15 @@ extern "C" {
|
|||
fdb_transaction_get_committed_version( FDBTransaction* tr,
|
||||
int64_t* out_version );
|
||||
|
||||
// This function intentionally returns an FDBFuture instead of an integer directly,
|
||||
// so that calling this API can see the effect of previous mutations on the transaction.
|
||||
// Specifically, mutations are applied asynchronously by the main thread. In order to
|
||||
// see them, this call has to be serviced by the main thread too.
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
|
||||
fdb_transaction_get_approximate_size(FDBTransaction* tr);
|
||||
/*
|
||||
* This function intentionally returns an FDBFuture instead of an integer
|
||||
* directly, so that calling this API can see the effect of previous
|
||||
* mutations on the transaction. Specifically, mutations are applied
|
||||
* asynchronously by the main thread. In order to see them, this call has to
|
||||
* be serviced by the main thread too.
|
||||
*/
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture *
|
||||
fdb_transaction_get_approximate_size(FDBTransaction *tr);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
|
||||
fdb_get_server_protocol(const char* clusterFilePath);
|
||||
|
@ -301,7 +313,7 @@ extern "C" {
|
|||
typedef struct FDB_cluster FDBCluster;
|
||||
|
||||
typedef enum {
|
||||
// This option is only a placeholder for C compatibility and should not be used
|
||||
/* This option is only a placeholder for C compatibility and should not be used */
|
||||
FDB_CLUSTER_OPTION_DUMMY_DO_NOT_USE=-1
|
||||
} FDBClusterOption;
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
#define FDB_API_VERSION 700
|
||||
#include <foundationdb/fdb_c.h>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
fdb_select_api_version(700);
|
||||
return 0;
|
||||
}
|
|
@ -92,6 +92,12 @@ void Future::cancel() {
|
|||
return fdb_future_get_keyvalue_array(future_, out_kv, out_count, out_more);
|
||||
}
|
||||
|
||||
// Database
|
||||
Int64Future Database::reboot_worker(FDBDatabase* db, const uint8_t* address, int address_length, fdb_bool_t check,
|
||||
int duration) {
|
||||
return Int64Future(fdb_database_reboot_worker(db, address, address_length, check, duration));
|
||||
}
|
||||
|
||||
// Transaction
|
||||
|
||||
Transaction::Transaction(FDBDatabase* db) {
|
||||
|
|
|
@ -77,7 +77,6 @@ class Future {
|
|||
FDBFuture* future_;
|
||||
};
|
||||
|
||||
|
||||
class Int64Future : public Future {
|
||||
public:
|
||||
// Call this function instead of fdb_future_get_int64 when using the
|
||||
|
@ -86,6 +85,7 @@ class Int64Future : public Future {
|
|||
|
||||
private:
|
||||
friend class Transaction;
|
||||
friend class Database;
|
||||
Int64Future(FDBFuture* f) : Future(f) {}
|
||||
};
|
||||
|
||||
|
@ -147,6 +147,13 @@ class EmptyFuture : public Future {
|
|||
EmptyFuture(FDBFuture* f) : Future(f) {}
|
||||
};
|
||||
|
||||
// Wrapper around FDBDatabase, providing database-level API
|
||||
class Database final {
|
||||
public:
|
||||
static Int64Future reboot_worker(FDBDatabase* db, const uint8_t* address, int address_length, fdb_bool_t check,
|
||||
int duration);
|
||||
};
|
||||
|
||||
// Wrapper around FDBTransaction, providing the same set of calls as the C API.
|
||||
// Handles cleanup of memory, removing the need to call
|
||||
// fdb_transaction_destroy.
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
|
||||
#define DOCTEST_CONFIG_IMPLEMENT
|
||||
#include "doctest.h"
|
||||
#include "fdbclient/rapidjson/document.h"
|
||||
|
||||
#include "fdb_api.hpp"
|
||||
|
||||
|
@ -1967,6 +1968,65 @@ TEST_CASE("special-key-space tracing get range") {
|
|||
}
|
||||
}
|
||||
|
||||
std::string get_valid_status_json() {
|
||||
fdb::Transaction tr(db);
|
||||
while (1) {
|
||||
fdb::ValueFuture f1 = tr.get("\xff\xff/status/json", false);
|
||||
fdb_error_t err = wait_future(f1);
|
||||
if (err) {
|
||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||
fdb_check(wait_future(f2));
|
||||
continue;
|
||||
}
|
||||
|
||||
int out_present;
|
||||
char *val;
|
||||
int vallen;
|
||||
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
|
||||
assert(out_present);
|
||||
std::string statusJsonStr(val, vallen);
|
||||
rapidjson::Document statusJson;
|
||||
statusJson.Parse(statusJsonStr.c_str());
|
||||
// make sure it is available
|
||||
bool available = statusJson["client"]["database_status"]["available"].GetBool();
|
||||
if (!available)
|
||||
continue; // cannot reach to the cluster, retry
|
||||
return statusJsonStr;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_database_reboot_worker") {
|
||||
std::string status_json = get_valid_status_json();
|
||||
rapidjson::Document statusJson;
|
||||
statusJson.Parse(status_json.c_str());
|
||||
CHECK(statusJson.HasMember("cluster"));
|
||||
CHECK(statusJson["cluster"].HasMember("generation"));
|
||||
int old_generation = statusJson["cluster"]["generation"].GetInt();
|
||||
CHECK(statusJson["cluster"].HasMember("processes"));
|
||||
// Make sure we only have one process in the cluster
|
||||
// Thus, rebooting the worker ensures a recovery
|
||||
// Configuration changes may break the contract here
|
||||
CHECK(statusJson["cluster"]["processes"].MemberCount() == 1);
|
||||
auto processPtr = statusJson["cluster"]["processes"].MemberBegin();
|
||||
CHECK(processPtr->value.HasMember("address"));
|
||||
std::string network_address = processPtr->value["address"].GetString();
|
||||
while (1) {
|
||||
fdb::Int64Future f =
|
||||
fdb::Database::reboot_worker(db, (const uint8_t*)network_address.c_str(), network_address.size(), false, 0);
|
||||
fdb_check(wait_future(f));
|
||||
int64_t successful;
|
||||
fdb_check(f.get(&successful));
|
||||
if (successful) break; // retry rebooting until success
|
||||
}
|
||||
status_json = get_valid_status_json();
|
||||
statusJson.Parse(status_json.c_str());
|
||||
CHECK(statusJson.HasMember("cluster"));
|
||||
CHECK(statusJson["cluster"].HasMember("generation"));
|
||||
int new_generation = statusJson["cluster"]["generation"].GetInt();
|
||||
// The generation number should increase after the recovery
|
||||
CHECK(new_generation > old_generation);
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_error_predicate") {
|
||||
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1007)); // transaction_too_old
|
||||
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1020)); // not_committed
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include "fdb_flow.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdio.h>
|
||||
#include <cinttypes>
|
||||
|
||||
|
@ -101,6 +102,7 @@ namespace FDB {
|
|||
|
||||
Reference<Transaction> createTransaction() override;
|
||||
void setDatabaseOption(FDBDatabaseOption option, Optional<StringRef> value = Optional<StringRef>()) override;
|
||||
Future<int64_t> rebootWorker(const StringRef& address, bool check = false, int duration = 0) override;
|
||||
|
||||
private:
|
||||
FDBDatabase* db;
|
||||
|
@ -284,6 +286,16 @@ namespace FDB {
|
|||
throw_on_error(fdb_database_set_option(db, option, nullptr, 0));
|
||||
}
|
||||
|
||||
Future<int64_t> DatabaseImpl::rebootWorker(const StringRef &address, bool check, int duration) {
|
||||
return backToFuture<int64_t>( fdb_database_reboot_worker(db, address.begin(), address.size(), check, duration), [](Reference<CFuture> f) {
|
||||
int64_t res;
|
||||
|
||||
throw_on_error(fdb_future_get_int64( f->f, &res ) );
|
||||
|
||||
return res;
|
||||
} );
|
||||
}
|
||||
|
||||
TransactionImpl::TransactionImpl(FDBDatabase* db) {
|
||||
throw_on_error(fdb_database_create_transaction(db, &tr));
|
||||
}
|
||||
|
|
|
@ -124,6 +124,7 @@ namespace FDB {
|
|||
virtual ~Database(){};
|
||||
virtual Reference<Transaction> createTransaction() = 0;
|
||||
virtual void setDatabaseOption(FDBDatabaseOption option, Optional<StringRef> value = Optional<StringRef>()) = 0;
|
||||
virtual Future<int64_t> rebootWorker(const StringRef& address, bool check = false, int duration = 0) = 0;
|
||||
};
|
||||
|
||||
class API {
|
||||
|
|
|
@ -1,17 +1,27 @@
|
|||
FROM centos:6
|
||||
|
||||
# Clean yum cache, disable default Base repo and enable Vault
|
||||
RUN yum clean all &&\
|
||||
sed -i -e 's/gpgcheck=1/enabled=0/g' /etc/yum.repos.d/CentOS-Base.repo &&\
|
||||
sed -i -e 's/enabled=0/enabled=1/g' /etc/yum.repos.d/CentOS-Vault.repo &&\
|
||||
sed -i -n '/6.1/q;p' /etc/yum.repos.d/CentOS-Vault.repo &&\
|
||||
sed -i -e "s/6\.0/$(cut -d\ -f3 /etc/redhat-release)/g" /etc/yum.repos.d/CentOS-Vault.repo &&\
|
||||
yum install -y yum-utils &&\
|
||||
yum-config-manager --enable rhel-server-rhscl-7-rpms &&\
|
||||
yum -y install centos-release-scl-rh epel-release \
|
||||
http://opensource.wandisco.com/centos/6/git/x86_64/wandisco-git-release-6-1.noarch.rpm &&\
|
||||
sed -i -e 's/#baseurl=/baseurl=/g' -e 's/mirror.centos.org/vault.centos.org/g' \
|
||||
-e 's/mirrorlist=/#mirrorlist=/g' /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo &&\
|
||||
yum clean all
|
||||
|
||||
# Install dependencies for developer tools, bindings,\
|
||||
# documentation, actorcompiler, and packaging tools\
|
||||
RUN yum install -y yum-utils &&\
|
||||
yum-config-manager --enable rhel-server-rhscl-7-rpms &&\
|
||||
yum -y install centos-release-scl epel-release \
|
||||
http://opensource.wandisco.com/centos/6/git/x86_64/wandisco-git-release-6-1.noarch.rpm &&\
|
||||
yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
|
||||
devtoolset-8-gcc-8.3.1 devtoolset-8-gcc-c++-8.3.1 \
|
||||
devtoolset-8-libubsan-devel devtoolset-8-libasan-devel devtoolset-8-valgrind-devel \
|
||||
rh-python36-python-devel rh-ruby24 golang python27 rpm-build \
|
||||
mono-core debbuild python-pip dos2unix valgrind-devel ccache \
|
||||
distcc wget git lz4 lz4-devel lz4-static &&\
|
||||
RUN yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
|
||||
devtoolset-8-gcc-8.3.1 devtoolset-8-gcc-c++-8.3.1 \
|
||||
devtoolset-8-libubsan-devel devtoolset-8-libasan-devel devtoolset-8-valgrind-devel \
|
||||
rh-python36-python-devel rh-ruby24 golang python27 rpm-build \
|
||||
mono-core debbuild python-pip dos2unix valgrind-devel ccache \
|
||||
distcc wget libxslt git lz4 lz4-devel lz4-static &&\
|
||||
pip install boto3==1.1.1
|
||||
|
||||
USER root
|
||||
|
@ -41,6 +51,8 @@ RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.1
|
|||
|
||||
# install Ninja
|
||||
RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -o ninja.zip &&\
|
||||
echo "8e2e654a418373f10c22e4cc9bdbe9baeca8527ace8d572e0b421e9d9b85b7ef ninja.zip" > /tmp/ninja-sha.txt &&\
|
||||
sha256sum -c /tmp/ninja-sha.txt &&\
|
||||
unzip ninja.zip && cd ninja-1.9.0 && scl enable devtoolset-8 -- ./configure.py --bootstrap && cp ninja /usr/bin &&\
|
||||
cd .. && rm -rf ninja-1.9.0 ninja.zip
|
||||
|
||||
|
@ -53,17 +65,59 @@ RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1h.tar.gz -o o
|
|||
ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\
|
||||
cd /tmp/ && rm -rf /tmp/openssl-1.1.1h /tmp/openssl.tar.gz
|
||||
|
||||
# Install toml11
|
||||
RUN cd /tmp && curl -L https://github.com/ToruNiina/toml11/archive/v3.4.0.tar.gz > toml.tar.gz &&\
|
||||
echo "bc6d733efd9216af8c119d8ac64a805578c79cc82b813e4d1d880ca128bd154d toml.tar.gz" > toml-sha256.txt &&\
|
||||
sha256sum -c toml-sha256.txt &&\
|
||||
tar xf toml.tar.gz && rm -rf build && mkdir build && cd build && scl enable devtoolset-8 -- cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -Dtoml11_BUILD_TEST=OFF ../toml11-3.4.0 &&\
|
||||
scl enable devtoolset-8 -- cmake --build . --target install && cd / && rm -rf tmp/build && rm -rf tmp/toml11-3.4.0
|
||||
|
||||
RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocksdb.tar.gz &&\
|
||||
echo "d573d2f15cdda883714f7e0bc87b814a8d4a53a82edde558f08f940e905541ee rocksdb.tar.gz" > rocksdb-sha.txt &&\
|
||||
sha256sum -c rocksdb-sha.txt && tar xf rocksdb.tar.gz && rm -rf rocksdb.tar.gz rocksdb-sha.txt
|
||||
|
||||
RUN cd /opt/ && curl -L https://github.com/manticoresoftware/manticoresearch/raw/master/misc/junit/ctest2junit.xsl -o ctest2junit.xsl
|
||||
|
||||
# Setting this environment variable switches from OpenSSL to BoringSSL
|
||||
#ENV OPENSSL_ROOT_DIR=/opt/boringssl
|
||||
|
||||
# install BoringSSL: TODO: They don't seem to have releases(?) I picked today's master SHA.
|
||||
RUN cd /opt &&\
|
||||
git clone https://boringssl.googlesource.com/boringssl &&\
|
||||
cd boringssl &&\
|
||||
git checkout e796cc65025982ed1fb9ef41b3f74e8115092816 &&\
|
||||
mkdir build
|
||||
|
||||
# ninja doesn't respect CXXFLAGS, and the boringssl CMakeLists doesn't expose an option to define __STDC_FORMAT_MACROS
|
||||
# also, enable -fPIC.
|
||||
# this is moderately uglier than creating a patchfile, but easier to maintain.
|
||||
RUN cd /opt/boringssl &&\
|
||||
for f in crypto/fipsmodule/rand/fork_detect_test.cc \
|
||||
include/openssl/bn.h \
|
||||
ssl/test/bssl_shim.cc ; do \
|
||||
perl -p -i -e 's/#include <inttypes.h>/#define __STDC_FORMAT_MACROS 1\n#include <inttypes.h>/g;' $f ; \
|
||||
done &&\
|
||||
perl -p -i -e 's/-Werror/-Werror -fPIC/' CMakeLists.txt &&\
|
||||
git diff
|
||||
|
||||
RUN cd /opt/boringssl/build &&\
|
||||
scl enable devtoolset-8 rh-python36 rh-ruby24 -- cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. &&\
|
||||
scl enable devtoolset-8 rh-python36 rh-ruby24 -- ninja &&\
|
||||
./ssl/ssl_test &&\
|
||||
mkdir -p ../lib && cp crypto/libcrypto.a ssl/libssl.a ../lib
|
||||
|
||||
# Localize time zone
|
||||
ARG TIMEZONEINFO=America/Los_Angeles
|
||||
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
|
||||
|
||||
LABEL version=0.1.19
|
||||
ENV DOCKER_IMAGEVER=0.1.19
|
||||
LABEL version=0.1.22
|
||||
ENV DOCKER_IMAGEVER=0.1.22
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
|
||||
|
||||
ENV CCACHE_NOHASHDIR=true
|
||||
ENV CCACHE_UMASK=0000
|
||||
ENV CCACHE_SLOPPINESS="file_macro,time_macros,include_file_mtime,include_file_ctime,file_stat_matches"
|
||||
|
||||
CMD scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
FROM foundationdb/foundationdb-build:0.1.19
|
||||
ARG IMAGE_TAG=0.1.21
|
||||
FROM foundationdb/foundationdb-build:${IMAGE_TAG}
|
||||
|
||||
USER root
|
||||
|
||||
|
@ -50,8 +51,8 @@ RUN cp -iv /usr/local/bin/clang++ /usr/local/bin/clang++.deref &&\
|
|||
ldconfig &&\
|
||||
rm -rf /mnt/artifacts
|
||||
|
||||
LABEL version=0.11.10
|
||||
ENV DOCKER_IMAGEVER=0.11.10
|
||||
LABEL version=0.11.13
|
||||
ENV DOCKER_IMAGEVER=0.11.13
|
||||
|
||||
ENV CLANGCC=/usr/local/bin/clang.de8a65ef
|
||||
ENV CLANGCXX=/usr/local/bin/clang++.de8a65ef
|
||||
|
@ -63,8 +64,5 @@ ENV CC=/usr/local/bin/clang.de8a65ef
|
|||
ENV CXX=/usr/local/bin/clang++.de8a65ef
|
||||
ENV USE_LD=LLD
|
||||
ENV USE_LIBCXX=1
|
||||
ENV CCACHE_NOHASHDIR=true
|
||||
ENV CCACHE_UMASK=0000
|
||||
ENV CCACHE_SLOPPINESS="file_macro,time_macros,include_file_mtime,include_file_ctime,file_stat_matches"
|
||||
|
||||
CMD scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash
|
||||
|
|
|
@ -2,7 +2,7 @@ version: "3"
|
|||
|
||||
services:
|
||||
common: &common
|
||||
image: foundationdb/foundationdb-build:0.1.19
|
||||
image: foundationdb/foundationdb-build:0.1.22
|
||||
|
||||
build-setup: &build-setup
|
||||
<<: *common
|
||||
|
|
|
@ -273,7 +273,6 @@ else()
|
|||
-Wno-unused-function
|
||||
-Wno-unused-local-typedef
|
||||
-Wno-unused-parameter
|
||||
-Wno-unused-value
|
||||
-Wno-self-assign
|
||||
)
|
||||
if (USE_CCACHE)
|
||||
|
|
|
@ -426,6 +426,25 @@ An |database-blurb1| Modifications to a database are performed via transactions.
|
|||
``*out_transaction``
|
||||
Set to point to the newly created :type:`FDBTransaction`.
|
||||
|
||||
.. function:: FDBFuture* fdb_database_reboot_worker(FDBDatabase* database, uint8_t const* address, int address_length, fdb_bool_t check, int duration)
|
||||
|
||||
Reboot the specified process in the database.
|
||||
|
||||
|future-return0| a :type:`int64_t` which represents whether the reboot request is sent or not. In particular, 1 means request sent and 0 means failure (e.g. the process with the specified address does not exist). |future-return1| call :func:`fdb_future_get_int64()` to extract the result, |future-return2|
|
||||
|
||||
``address``
|
||||
A pointer to the network address of the process.
|
||||
|
||||
``address_length``
|
||||
|length-of| ``address``.
|
||||
|
||||
``check``
|
||||
whether to perform a storage engine integrity check. In particular, the check-on-reboot is implemented by writing a check/validation file on disk as breadcrumb for the process to find after reboot, at which point it will eat the breadcrumb file and pass true to the integrityCheck parameter of the openKVStore() factory method.
|
||||
|
||||
``duration``
|
||||
If positive, the process will be first suspended for ``duration`` seconds before being rebooted.
|
||||
|
||||
|
||||
Transaction
|
||||
===========
|
||||
|
||||
|
|
|
@ -474,6 +474,11 @@ Prints a list of currently active transaction tag throttles, or recommended tran
|
|||
|
||||
``LIMIT`` - The number of throttles to print. Defaults to 100.
|
||||
|
||||
triggerddteaminfolog
|
||||
--------------------
|
||||
|
||||
The ``triggerddteaminfolog`` command would trigger the data distributor to log very detailed teams information into trace event logs.
|
||||
|
||||
unlock
|
||||
------
|
||||
|
||||
|
|
|
@ -10,38 +10,38 @@ macOS
|
|||
|
||||
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
|
||||
|
||||
* `FoundationDB-6.3.9.pkg <https://www.foundationdb.org/downloads/6.3.9/macOS/installers/FoundationDB-6.3.9.pkg>`_
|
||||
* `FoundationDB-6.3.10.pkg <https://www.foundationdb.org/downloads/6.3.10/macOS/installers/FoundationDB-6.3.10.pkg>`_
|
||||
|
||||
Ubuntu
|
||||
------
|
||||
|
||||
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
|
||||
|
||||
* `foundationdb-clients-6.3.9-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.9/ubuntu/installers/foundationdb-clients_6.3.9-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.3.9-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.9/ubuntu/installers/foundationdb-server_6.3.9-1_amd64.deb>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.10/ubuntu/installers/foundationdb-clients_6.3.10-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.3.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.10/ubuntu/installers/foundationdb-server_6.3.10-1_amd64.deb>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL6
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
|
||||
|
||||
* `foundationdb-clients-6.3.9-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.9/rhel6/installers/foundationdb-clients-6.3.9-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.9-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.9/rhel6/installers/foundationdb-server-6.3.9-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.10/rhel6/installers/foundationdb-clients-6.3.10-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.10/rhel6/installers/foundationdb-server-6.3.10-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL7
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
|
||||
|
||||
* `foundationdb-clients-6.3.9-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.9/rhel7/installers/foundationdb-clients-6.3.9-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.9-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.9/rhel7/installers/foundationdb-server-6.3.9-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.10/rhel7/installers/foundationdb-clients-6.3.10-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.10/rhel7/installers/foundationdb-server-6.3.10-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
Windows
|
||||
-------
|
||||
|
||||
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
|
||||
|
||||
* `foundationdb-6.3.9-x64.msi <https://www.foundationdb.org/downloads/6.3.9/windows/installers/foundationdb-6.3.9-x64.msi>`_
|
||||
* `foundationdb-6.3.10-x64.msi <https://www.foundationdb.org/downloads/6.3.10/windows/installers/foundationdb-6.3.10-x64.msi>`_
|
||||
|
||||
API Language Bindings
|
||||
=====================
|
||||
|
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
|
|||
|
||||
If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package:
|
||||
|
||||
* `foundationdb-6.3.9.tar.gz <https://www.foundationdb.org/downloads/6.3.9/bindings/python/foundationdb-6.3.9.tar.gz>`_
|
||||
* `foundationdb-6.3.10.tar.gz <https://www.foundationdb.org/downloads/6.3.10/bindings/python/foundationdb-6.3.10.tar.gz>`_
|
||||
|
||||
Ruby 1.9.3/2.0.0+
|
||||
-----------------
|
||||
|
||||
* `fdb-6.3.9.gem <https://www.foundationdb.org/downloads/6.3.9/bindings/ruby/fdb-6.3.9.gem>`_
|
||||
* `fdb-6.3.10.gem <https://www.foundationdb.org/downloads/6.3.10/bindings/ruby/fdb-6.3.10.gem>`_
|
||||
|
||||
Java 8+
|
||||
-------
|
||||
|
||||
* `fdb-java-6.3.9.jar <https://www.foundationdb.org/downloads/6.3.9/bindings/java/fdb-java-6.3.9.jar>`_
|
||||
* `fdb-java-6.3.9-javadoc.jar <https://www.foundationdb.org/downloads/6.3.9/bindings/java/fdb-java-6.3.9-javadoc.jar>`_
|
||||
* `fdb-java-6.3.10.jar <https://www.foundationdb.org/downloads/6.3.10/bindings/java/fdb-java-6.3.10.jar>`_
|
||||
* `fdb-java-6.3.10-javadoc.jar <https://www.foundationdb.org/downloads/6.3.10/bindings/java/fdb-java-6.3.10-javadoc.jar>`_
|
||||
|
||||
Go 1.11+
|
||||
--------
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 2.5 MiB |
Binary file not shown.
After Width: | Height: | Size: 449 KiB |
Binary file not shown.
After Width: | Height: | Size: 467 KiB |
|
@ -0,0 +1,469 @@
|
|||
##############################
|
||||
FDB Read and Write Path
|
||||
##############################
|
||||
|
||||
| Author: Meng Xu
|
||||
| Reviewer: Evan Tschannen, Jingyu Zhou
|
||||
| Audience: FDB developers, SREs and expert users.
|
||||
|
||||
This document explains how FDB works at high level in database terms without mentioning FDB internal concepts.
|
||||
|
||||
We first discuss the read path and the write path separately for a single transaction.
|
||||
We then describe how the read path and write path work together for a read and write transaction.
|
||||
In the last section, we illustrate how multiple outstanding write transactions are processed and *ordered* in FDB.
|
||||
The processing order of multiple transactions is important because it affects the parallelism of transaction processing and the write throughput.
|
||||
|
||||
The content is based on FDB 6.2 and is true for FDB 6.3. A new timestamp proxy role is introduced in post FDB 6.3,
|
||||
which affects the read path. We will discuss the timestamp proxy role in the future version of this document.
|
||||
|
||||
.. image:: /images/FDB_read_path.png
|
||||
|
||||
Components
|
||||
=================
|
||||
|
||||
FDB is built on top of several key components.
|
||||
The terms below are common database or distributed system terms, instead of FDB specific terms.
|
||||
|
||||
**Timestamp generator.** It serves logical time, which defines happen-before relation:
|
||||
An event at t1 happens before another event at t2, if t1 < t2.
|
||||
The logic time is used to order events in FDB distributed systems and it is used by concurrency control to decide if two transactions have conflicts.
|
||||
The logical time is the timestamp for a transaction.
|
||||
|
||||
* A read-only transaction has only one timestamp which is assigned when the transaction is created;
|
||||
* A read-write transaction has one timestamp at the transaction’s creation time and one timestamp at its commit time.
|
||||
|
||||
|
||||
**Concurrency Control.** It decides if two transactions can be executed concurrently without violating Strict Serializable Isolation (SSI) property.
|
||||
It uses the Optimistic Concurrency Control (OCC) mechanism described in [SSI] to achieve that.
|
||||
|
||||
**Client.** It is a library, an FDB application uses, to access the database.
|
||||
It exposes the transaction concept to applications.
|
||||
Client in FDB is a *fat* client that does multiple complex operations:
|
||||
(1) It calculates read and write conflict ranges for transactions;
|
||||
(2) it batches a transaction's operations and send them all together at commit for better throughput;
|
||||
(3) it automatically retries failed transactions.
|
||||
|
||||
**Proxies.** It is a subsystem that acts like reverse proxies to serve clients’ requests. Its main purposes is:
|
||||
|
||||
* Serve for read request by (1) serving the logical time to client; and (2) providing which storage server has data for a key;
|
||||
* Process write transactions on behalf of clients and return the results;
|
||||
|
||||
Each proxy has the system’s metadata, called transaction state store (txnStateStore). The metadata decides:
|
||||
(1) which key should go to which storage servers in the storage system;
|
||||
(2) which key should go to which processes in the durable queuing system;
|
||||
(3) is the database locked; etc.
|
||||
|
||||
The metadata on all proxies are consistent at any given timestamp.
|
||||
To achieve that, when a proxy has a metadata mutation that changes the metadata at the timestamp V1,
|
||||
the mutation is propagated to all proxies (through the concurrency control component), and
|
||||
its effect is applied on all proxies before any proxy can process transactions after the timestamp V1.
|
||||
|
||||
**Durable queuing system.** It is a queuing system for write traffic.
|
||||
Its producers are proxies that send transaction mutation data for durability purpose.
|
||||
Its consumers are storage systems that index data and serve read request.
|
||||
The queuing system is partitioned for the key-space.
|
||||
A shard (i.e., key-range) is mapped to *k* log processes in the queuing system, where *k* is the replication factor.
|
||||
The mapping between shard and storage servers decides the mapping between shard and log processes.
|
||||
|
||||
**Storage system.** It is a collection of storage servers (SS), each of which is a sqlite database running on a single thread.
|
||||
It indexes data and serves read requests.
|
||||
Each SS has an in-memory p-tree data structure that stores the past 5-second mutations and an on-disk sqlite data.
|
||||
The in-memory data structure can serve multiple versions of key-values in the past 5 seconds.
|
||||
Due to memory limit, the in-memory data cannot hold more than 5 seconds’ multi-version key-values,
|
||||
which is the root cause why FDB’s transactions cannot be longer than 5 seconds.
|
||||
The on-disk sqlite data has only the most-recent key-value.
|
||||
|
||||
**Zookeeper like system.** The system solves two main problems:
|
||||
|
||||
* Store the configuration of the transaction system, which includes information such as generations of queuing systems and their processes.
|
||||
The system used to be zookeeper. FDB later replaced it with its own implementation.
|
||||
|
||||
* Service discovery. Processes in the zookeeper-like system serve as well-known endpoints for clients to connect to the cluster.
|
||||
These well-known endpoint returns the list of proxies to clients.
|
||||
|
||||
|
||||
|
||||
Read path of a transaction
|
||||
==================================
|
||||
|
||||
Fig. 1 above shows a high-level view of the read path. An application uses FDB client library to read data.
|
||||
It creates a transaction and calls its read() function. The read() operation will lead to several steps.
|
||||
|
||||
* **Step 1 (Timestamp request)**: The read operation needs a timestamp.
|
||||
The client initiates the timestamp request through an RPC to proxy. The request will trigger Step 2 and Step 3;
|
||||
|
||||
* To improve throughput and reduce load on the server side, each client dynamically batches the timestamp requests.
|
||||
A client keeps adding requests to the current batch until
|
||||
*when* the number of requests in a batch exceeds a configurable threshold or
|
||||
*when* the batching times out at a dynamically computed threshold.
|
||||
Each batch sends only one timestamp request to proxy and all requests in the same batch share the same timestamp.
|
||||
|
||||
* **Step 2 (Get latest commit version)**: When the timestamp request arrives at a proxy,
|
||||
the proxy wants to get the largest commit version as the return value.
|
||||
So it contacts the rest of (n-1) proxies for their latest commit versions and
|
||||
uses the largest one as the return value for Step 1.
|
||||
|
||||
* O(n^2) communication cost: Because each proxy needs to contact the rest of (n-1) proxies to serve clients’ timestamp request,
|
||||
the communication cost is n*(n-1), where n is the number of proxies;
|
||||
|
||||
* Batching: To reduce communication cost, each proxy batches clients’ timestamp requests for a configurable time period (say 1ms) and
|
||||
return the same timestamp for requests in the same batch.
|
||||
|
||||
* **Step 3 (Confirm proxy’s liveness)**: To prevent proxies that are no longer a part of the system (such as due to network partition) from serving requests,
|
||||
each proxy contacts the queuing system for each timestamp request to confirm it is still a valid proxy
|
||||
(i.e., not replaced by a newer generation proxy process).
|
||||
This is based on the FDB property that at most one active queuing system is available at any given time.
|
||||
|
||||
* Why do we need this step? This is to achieve consensus (i.e., external consistency).
|
||||
Compared to serializable isolation, Strict Serializable Isolation (SSI) requires external consistency.
|
||||
It means the timestamp received by clients cannot decrease. If we do not have step and network partition happens,
|
||||
a set of old proxies that are disconnected from the rest of systems can still serve timestamp requests to clients.
|
||||
These timestamps can be smaller than the new generation of proxies, which breaks the external consistency in SSI.
|
||||
|
||||
* O(n * m) communication cost: To confirm a proxy’s liveness, the proxy has to contact all members in the queuing system to
|
||||
ensure the queuing system is still active. This causes *m* network communication, where *m* is the number of processes in the queuing system.
|
||||
A system with n proxies will have O(n * m) network communications at the step 3. In our deployment, n is typically equal to m;
|
||||
|
||||
* Do FDB production clusters have this overhead? No. Our production clusters disable the external consistency by
|
||||
configuring the knob ALWAYS_CAUSAL_READ_RISKY.
|
||||
|
||||
* **Step 4 (Locality request)**: The client gets which storage servers have its requested keys by sending another RPC to proxy.
|
||||
This step returns a set of *k* storage server interfaces, where k is the replication factor;
|
||||
|
||||
* Client cache mechanism: The key location will be cached in client.
|
||||
Future requests will use the cache to directly read from storage servers,
|
||||
which saves a trip to proxy. If location is stale, read will return error and client will retry and refresh the cache.
|
||||
|
||||
* **Step 5 (Get data request)**: The client uses the location information from step 4 to directly query keys from corresponding storage servers.
|
||||
* Direct read from client’s memory: If a key’s value exists in the client’s memory, the client reads it directly from its local memory.
|
||||
This happens when a client updates a key’s value and later reads it.
|
||||
This optimization reduces the amount of unnecessary requests to storage servers.
|
||||
|
||||
* Load balance: Each data exists on k storage servers, where k is the replication factor.
|
||||
To balance the load across the k replicas, client has a load balancing algorithm to balance the number of requests to each replica.
|
||||
|
||||
* Transaction succeed: If the storage server has the data at the read timestamp, the client will receive the data and return succeed.
|
||||
|
||||
* Transaction too old error: If the read request’s timestamp is older than 5 seconds,
|
||||
storage server may have already flushed the data from its in-memory multi-version data structure to its on-disk single-version data structure.
|
||||
This means storage server does not have the data older than 5 seconds. So client will receive transaction too old error.
|
||||
The client will retry with a new timestamp.
|
||||
One scenario that can lead to the error is when it takes too long for a client to send the read request after it gets the timestamp.
|
||||
|
||||
* Future transaction error: Each storage server pulls data in increasing order of data’s timestamp from the queuing system.
|
||||
Let’s define a storage server’s timestamp as the largest timestamp of data the storage server has.
|
||||
If the read request’s timestamp is larger than the storage server’s timestamp,
|
||||
the storage server will reply future-transaction-error to the client.
|
||||
The client will retry. One scenario that can lead to the error is when the connection between the SS and the queuing system is slow.
|
||||
|
||||
* Wrong shard error: If keys in the request or result depend on data outside this storage server OR
|
||||
if a large selector offset prevents all data from being read in one range read.
|
||||
Client will invalidate its locality cache for the key and retry the read request at the failed key.
|
||||
|
||||
Implementation of FDB read path
|
||||
------------------------------------------
|
||||
|
||||
* **Step 1 (Timestamp request)**:
|
||||
* Each read request tries to get a timestamp if its transaction has not got one:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbclient/NativeAPI.actor.cpp#L2104
|
||||
* Client batches the get-timestamp requests:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbclient/NativeAPI.actor.cpp#L3172
|
||||
* Dynamic batching algorithm:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbclient/NativeAPI.actor.cpp#L3101-L3104
|
||||
|
||||
* **Step 2 (Get latest commit version)**: Contacting (n-1) proxies for commit version:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbserver/MasterProxyServer.actor.cpp#L1196
|
||||
|
||||
* **Step 3 (Confirm proxy’s liveness)**:
|
||||
* We typically set our clusters’ knob ALWAYS_CAUSAL_READ_RISKY to 1 to skip this step
|
||||
* Proxy confirm queuing system is alive:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbserver/MasterProxyServer.actor.cpp#L1199
|
||||
* How is confirmEpochLive(..) implemented for the above item:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbserver/TagPartitionedLogSystem.actor.cpp#L1216-L1225
|
||||
|
||||
* **Step 4 (Locality request)**:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbclient/NativeAPI.actor.cpp#L1312-L1313
|
||||
|
||||
* **Step 5 (Get data request)**:
|
||||
* Logics of handling get value request:
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbclient/NativeAPI.actor.cpp#L1306-L1396
|
||||
* Load balance algorithm: The loadBalance() at
|
||||
https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbclient/NativeAPI.actor.cpp#L1342-L1344
|
||||
|
||||
|
||||
|
||||
Write path of a transaction
|
||||
================================
|
||||
|
||||
Suppose a client has a write-only transaction. Fig. 2 below shows the write path in a non-HA cluster.
|
||||
We will discuss how a transaction with both read and write works in the next section.
|
||||
|
||||
.. image:: /images/FDB_write_path.png
|
||||
|
||||
To simplify the explanation, the steps below do not include transaction batching on proxy,
|
||||
which is a typical database technique to increase transaction throughput.
|
||||
|
||||
* **Step 1 (Client buffers write mutations):** Client buffers all writes in a transaction until commit is called on the transaction.
|
||||
In the rest of document, a write is also named as a mutation.
|
||||
|
||||
* Client is a fat client that preprocess transactions:
|
||||
(a) For atomic operations, if client knows the key value, it will convert atomic operations to set operations;
|
||||
(b) For version stamp atomic operations, client adds extra bytes to key or value for the version stamp;
|
||||
(c) If a key has multiple operations, client coalesces them to one operation whenever possible.
|
||||
|
||||
* How client buffers mutations:
|
||||
https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbclient/NativeAPI.actor.cpp#L2345-L2361
|
||||
|
||||
* **Step 2 (Client commits the transaction):** When a client calls commit(), it performs several operations:
|
||||
|
||||
* **Step 2-1**: Add extra conflict ranges that are added by user but cannot be calculated from mutations.
|
||||
|
||||
* **Step 2-2**: Get a timestamp as the transaction’s start time. The timestamp does not need causal consistency because the transaction has no read.
|
||||
* This request goes to one of proxies. The proxy will contact all other (n-1) proxies to get the most recent commit version as it does in read path.
|
||||
The proxy does not need to contact log systems to confirm its activeness because it does not need causal consistency.
|
||||
|
||||
* **Step 2-3**: Sends the transaction’s information to a proxy. Load balancer in client decides which proxy will be used to handle a transaction.
|
||||
A transaction’s information includes:
|
||||
|
||||
* All of its mutations;
|
||||
* Read and write conflict range;
|
||||
* Transaction options that control a transaction’s behavior. For example, should the transaction write when the DB is locked?
|
||||
Shall the transaction uses the first proxy in the proxy list to commit?
|
||||
|
||||
* Implementation:
|
||||
* Transaction commit function: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbclient/NativeAPI.actor.cpp#L2895-L2899
|
||||
* Major work of commit in client side is done at here: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbclient/NativeAPI.actor.cpp#L2784-L2868
|
||||
* Step 2-1: Add extra conflict ranges: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbclient/NativeAPI.actor.cpp#L2826-L2828
|
||||
* Step 2-2: getReadVersion at commit which does not need external consistency because we do not have read in the transaction: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbclient/NativeAPI.actor.cpp#L2822-L2823
|
||||
* Step 2-3: Send transaction to a proxy via RPC: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbclient/NativeAPI.actor.cpp#L2691-L2700
|
||||
|
||||
* When a proxy receives clients’ transactions, it commits the transaction on behalf of clients with Step 3 - 9.
|
||||
|
||||
* **Step 3 (Proxy gets commit timestamp)**: The proxy gets the timestamp of the transaction’s commit time from the time oracle through an RPC call.
|
||||
|
||||
* To improve transaction throughput and reduce network communication overhead,
|
||||
each proxy dynamically batch transactions and process transactions in batches.
|
||||
A proxy keeps batching transactions until the batch time exceeds a configurable timeout value or
|
||||
until the number of transactions exceed a configurable value or
|
||||
until the total bytes of the batch exceeds a dynamically calculated desired size.
|
||||
|
||||
* The network overhead is 1 network communication per batch of commit transactions;
|
||||
|
||||
* How is the dynamically calculated batch size calculated: https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbserver/MasterProxyServer.actor.cpp#L1770-L1774
|
||||
* How commit transactions are batched: https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbserver/MasterProxyServer.actor.cpp#L416-L486
|
||||
* How each transaction batch is handled: https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbserver/MasterProxyServer.actor.cpp#L523-L1174
|
||||
* Where does proxy sends commit timestamp request to the timestamp generator: https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbserver/MasterProxyServer.actor.cpp#L586-L587
|
||||
|
||||
* **Step 4 (Proxy builds transactions’ conflict ranges)**: Because the concurrency control component may have multiple processes,
|
||||
each of which is responsible for resolving conflicts in a key range,
|
||||
the proxy needs to build one transaction-conflict-resolution request for each concurrency control process:
|
||||
For each transaction, the proxy splits its read and write conflict ranges based on concurrency control process’ responsible ranges.
|
||||
The proxy will create k conflict resolution requests for each transaction, where k is the number of processes in the concurrency control component.
|
||||
|
||||
* Implementation: https://github.com/apple/foundationdb/blob/4086e3a2750b776cc8bfb0f0e463fe00ac905595/fdbserver/MasterProxyServer.actor.cpp#L607-L618
|
||||
|
||||
* **Step 5 (Proxy sends conflict resolution requests to concurrency control)**:
|
||||
Each concurrency control process is responsible for checking conflicts in a key range.
|
||||
Each process checks if the transaction has conflicts with other transactions in its key-range.
|
||||
Each process returns the conflict checking result back to the proxy.
|
||||
|
||||
* What is conflict range?
|
||||
* A transaction’s write conflict range includes any key and key-ranges that are modified in the transactions.
|
||||
* A transaction’s read conflict range includes any key and key-ranges that are read in the transaction.
|
||||
* Client can also use transaction options to add explicit read-conflict-range or write-conflict-range.
|
||||
Example: https://github.com/apple/foundationdb/blob/4b0fba6ea89b51b82df7868ca24b81f6997db4e4/fdbclient/NativeAPI.actor.cpp#L2634-L2635
|
||||
|
||||
* **Piggy-back metadata change**. If the transaction changes database’s metadata, such as locking the database,
|
||||
the change is considered as a special mutation and also checked for conflicts by the concurrency control component.
|
||||
The primary difference between metadata mutation and normal mutations is that the metadata change must be propagated to all proxies
|
||||
so that all proxies have a consistent view of database’s metadata.
|
||||
This is achieved by piggy-backing metadata change in the reply from resolver to proxies.
|
||||
|
||||
* Implementation
|
||||
* Create conflict resolution requests for a batch of transactions: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L607-L618
|
||||
* Metadata mutations are sent from proxy to concurrency control processes: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L366-L369
|
||||
|
||||
* **Step 6 (Resolve conflicts among concurrent transactions)**:
|
||||
Each concurrency control process checks conflicts among transactions based on the theory in [1].
|
||||
In a nutshell, it checks for read-write conflicts. Suppose two transactions operates on the same key.
|
||||
If a write transaction’s time overlaps between another read-write transaction’s start time and commit time,
|
||||
only one transaction can commit: the one that arrives first at all concurrency control processes will commit.
|
||||
|
||||
* Implementation
|
||||
* Proxy sends conflict checking request: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L626-L629
|
||||
* Concurrency control process handles the request: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/Resolver.actor.cpp#L320-L322
|
||||
|
||||
* **Step 7 (Proxy’s post resolution processing)**:
|
||||
Once the proxy receives conflict-resolution replies from all concurrency control processes, it performs three steps
|
||||
|
||||
* **Step 7-1 (Apply metadata effect caused by other proxies)**: As mentioned above, when a proxy changes database’s metadata,
|
||||
the metadata mutations will be propagated via the concurrency control component to other proxies.
|
||||
So the proxy needs to first compute and apply these metadata mutations onto the proxy’s local states.
|
||||
Otherwise, the proxy will operate in a different view of database’s metadata.
|
||||
|
||||
* For example, if one proxy locks the database in a committed transaction at time t1, all other proxies should have seen the lock immediately after t1. Since another proxy may have transactions in flight already at t1, the proxy must first apply the “lock“ effect before it can process its in-flight transactions.
|
||||
* How metadata effect is applied in implementation: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L678-L719
|
||||
|
||||
* **Step 7-2 (Determine which transactions are committed)**: Proxy combines results from all concurrency control processes.
|
||||
Only if all concurrency control processes say a transaction is committed, will the transaction be considered as committed by the proxy.
|
||||
|
||||
* Implementation: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L721-L757
|
||||
|
||||
* **Step 7-3 (Apply metadata effect caused by this proxy)**: For each committed transaction,
|
||||
this proxy applies its metadata mutations to the proxy’s local state.
|
||||
|
||||
* Note: These metadata mutations are also sent to concurrency control processes and propagated to other proxies at Step 5.
|
||||
This step is to apply metadata effect on its own proxy’s states.
|
||||
* Implementation: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L763-L777
|
||||
|
||||
* **Step 7-4 (Assign mutations to storage servers and serialize them)**:
|
||||
In order to let the rest of system (the queuing system and storage system) know which process a mutation should be routed to,
|
||||
the proxy needs to add tags to mutations.
|
||||
The proxy serializes mutations with the same tag into the same message and sends the serialized message to the queuing system.
|
||||
|
||||
* Implementation of adding tags and serializing mutations into messages: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L800-L910
|
||||
* The lines that add tags to a mutation and serialize it: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L846-L847
|
||||
|
||||
* **Step 7-5 (Duplicate and serialize mutations to backup system keyspace)**:
|
||||
When backup or disaster recovery (DR) is enabled, each proxy captures mutation streams into a dedicated system keyspace.
|
||||
Mutations in a transaction batch are serialized as a single mutation in a dedicated system keyspace.
|
||||
|
||||
* How mutations are duplicated for backup and DR: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L912-L986
|
||||
* Note: FDB will have a new backup system that avoids duplicating mutations to the system keyspace.
|
||||
Its design is similar to database’s Change Data Capture (CDC) design. The new backup system is not production-ready yet.
|
||||
|
||||
* **Step 8 (Make mutation messages durable in the queuing system)**:
|
||||
Proxy sends serialized mutation messages to the queuing system.
|
||||
The queuing system will append the mutation to an append-only file, fsync it, and send the respnose back.
|
||||
Each message has a tag, which decides which process in the queuing system the message should be sent to.
|
||||
The queuing system returns to the proxy the minimum known committed version, which is the smallest commit version among all proxies.
|
||||
The minimum known commit version is used when the system recovers from fault.
|
||||
|
||||
* Sending messages to the queuing system is abstracted into a push() operation: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L1045
|
||||
* The minimum known committed version is called minKnownCommittedVersion. It is updated for each commit: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L1067
|
||||
|
||||
* **Step 9 (Reply to client)**: Proxy replies the transaction’s result to client.
|
||||
If the transaction fails (say due to transaction conflicts), proxy sends the error message to the client.
|
||||
|
||||
* Reply to clients based on different transaction’s results: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/MasterProxyServer.actor.cpp#L1117-L1138
|
||||
|
||||
* **Step 10 (Storage systems pull data from queuing system)**:
|
||||
Storage system asynchronously pulls data from queuing system and indexes data for read path.
|
||||
|
||||
* Each SS has a primary process (called primary tLog) in the queuing system to pull data from the SS’s data from the queuing system.
|
||||
Each SS only gets in-ordered streams of mutations that are owned by the SS.
|
||||
|
||||
* In failure scenario when a SS cannot reach the primary tLog, the SS will pull data from different tLogs that have part of the SS’s data.
|
||||
The SS will then merge the stream of data from different tLogs.
|
||||
|
||||
* Each SS does not make its pulled data durable to disk until the data becomes
|
||||
at least 5 seconds older than the most recent data the SS has pulled.
|
||||
This allows each SS to roll back at least 5 seconds of mutations.
|
||||
|
||||
* Why do we need roll back feature for SS? This comes from an optimization used in FDB.
|
||||
To make a mutation available in a SS as soon as possible,
|
||||
a SS may fetch a mutation from the queuing system that has not been fully replicated.
|
||||
The mutation’s transaction may be aborted in rare situations, such as
|
||||
when FDB has to recover from faults and decides to throw away the last few non-fully-durable transactions.
|
||||
SSes must throw away data in the aborted transactions.
|
||||
|
||||
* Why does SS not make data durable until 5 seconds later?
|
||||
This is because today’s SS does not support rolling back data that has already been made durable on disk.
|
||||
To support roll back, SS keeps data that might be rolled back in memory.
|
||||
When roll-back is needed, SS just throws away the in-memory data. This simplifies the SS implementation.
|
||||
|
||||
|
||||
* Each storage process pulls data from the queuing system: https://github.com/apple/foundationdb/blob/07e354c499158630d760283aa845440cbeaaa1ca/fdbserver/storageserver.actor.cpp#L3593-L3599
|
||||
|
||||
|
||||
|
||||
Read write path of a transaction
|
||||
====================================
|
||||
|
||||
This section uses an example transaction to describe how a transaction with both read and write operation works in FDB.
|
||||
|
||||
Suppose application creates the following transaction, where *Future<int>* is an object that holds an asynchronous call and
|
||||
becomes ready when the async call returns, and *wait()* is a synchronous point when the code waits for futures to be ready.
|
||||
The following code reads key k1 and k2 from database, increases k1’s value by 1 and write back k1’s new value into database.
|
||||
|
||||
**Example Transaction** ::
|
||||
|
||||
Line1: Transaction tr;
|
||||
Line2: Future<int> fv1 = tr.get(k1);
|
||||
Line3: Future<int> fv2 = tr.get(k2);
|
||||
Line4: v1 = wait(fv1);
|
||||
Line5: v2 = wait(fv2);
|
||||
Line6: tr.set(v1+v2);
|
||||
Line7: tr.commit();
|
||||
|
||||
The transaction starts with the read path:
|
||||
|
||||
* When tr.get() is called, FDB client issues a timestamp request to proxies *if* the transaction has not set its start timestamp.
|
||||
The logic is the Step 1 in the read path;
|
||||
|
||||
* Batching timestamp requests. When another tr.get() is called, it will try to get a timestamp as well. If we let every get request to follow the Step 1 in the read path, the performance overhead (especially network communication) will be a lot. In addition, this is not necessary because a transaction has only one start timestamp. To solve this problem, client chooses to batch timestamp requests from the same transaction and only issues one timestamp request when the transaction size reaches a preconfigured threshold or when the transaction duration reaches the batching timeout threshold.
|
||||
* Timestamp requests are batched: https://github.com/apple/foundationdb/blob/4086e3a2750b776cc8bfb0f0e463fe00ac905595/fdbclient/NativeAPI.actor.cpp#L3185
|
||||
* Thresholds for client to send the timestamp request: https://github.com/apple/foundationdb/blob/4086e3a2750b776cc8bfb0f0e463fe00ac905595/fdbclient/NativeAPI.actor.cpp#L3095-L3098
|
||||
|
||||
* Each read request, i.e., tr.get operation in the example, will follow the read path to get data from storage servers, except that they will share the same timestamp;
|
||||
* These read requests are sent to FDB cluster in parallel.
|
||||
The ordering of which read request will be ready first depends on requests’ network path and storage servers’ load.
|
||||
* In the example, tr.get(k2) may return result earlier than tr.get(k1).
|
||||
|
||||
* Client will likely block at the synchronization point at Line 4, until the value is returned from the cluster.
|
||||
* To maximize clients’ performance, a client can issue multiple transactions concurrently.
|
||||
When one transaction is blocked at the synchronization point,
|
||||
the client can switch to work on the other transactions concurrently.
|
||||
|
||||
* Client may or may not block at the synchronization point at Line 5.
|
||||
If tr.get(k2) returns earlier than tr.get(k1), the future fv2 is already ready when the client arrives at Line 5.
|
||||
|
||||
* At Line 6, client starts the write path. Because the transaction already has its start timestamp,
|
||||
client does not need to request for the transaction’s start time any more and can skip the Step 2-2 in the write path.
|
||||
|
||||
* At Line 7, client commits the transaction, which will trigger the operations from Step 2 in the write path.
|
||||
|
||||
|
||||
A transaction can get more complex than the example above.
|
||||
|
||||
* A transaction can have more writes operations between Line 6 and Line 7.
|
||||
Those writes will be buffered in client’s memory, which is the Step 1 in the write path.
|
||||
Only when the client calls commit(), will the rest of steps in the write path will be triggered;
|
||||
|
||||
* A transaction can have reads operations between Line 6 and Line 7 as well.
|
||||
|
||||
* A transaction may return commit_unknown_result, which indicate the transaction may or may not succeed.
|
||||
If application simply retries the transaction, the transaction may get executed twice.
|
||||
To solve this problem, the application can adds a transaction id to the transaction and
|
||||
check if the transaction id exists on the commit_unknown_result error.
|
||||
|
||||
|
||||
|
||||
Concurrency and ordering of multiple write transactions
|
||||
=======================================================================
|
||||
|
||||
FDB orders concurrent transactions in increasing order of the transactions’ commit timestamp.
|
||||
The ordering is enforced in the timestamp generator, the concurrency control component and the durable queuing system.
|
||||
|
||||
* When timestamp generator serves the commit timestamp request from a proxy,
|
||||
the reply includes not only the commit timestamp but also the latest commit timestamp the generator has sent out.
|
||||
For example, the timestamp generator just gave out the commit timestamp 50.
|
||||
When the next request arrives, the generator’s timestamp is 100 and the generator replies (50, 100).
|
||||
When the second request arrives and the generator’s timestamp is 200, the generator replies (100, 200).
|
||||
|
||||
* When a proxy sends conflict resolution requests to concurrency control processes or durable requests to the queuing system,
|
||||
each request includes both the current transaction’s commit timestamp and the previous transaction’s commit timestamp.
|
||||
|
||||
* Each concurrency control process and each process in the queuing system always process requests in the strict order of the request’s commit version.
|
||||
The semantics is do not process a request whose commit timestamp is V2 until the request at its previous commit timestamp V1 has been processed.
|
||||
|
||||
|
||||
We use the following example and draw its swimlane diagram to illustrate how two write transactions are ordered in FDB.
|
||||
The diagram with notes can be viewed at `here <https://lucid.app/lucidchart/6336dbe3-cff4-4c46-995a-4ca3d9260696/view?page=0_0#?folder_id=home&browser=icon>`_.
|
||||
|
||||
.. image:: /images/FDB_multiple_txn_swimlane_diagram.png
|
||||
|
||||
Reference
|
||||
============
|
||||
|
||||
[SSI] Serializable Snapshot Isolation in PostgreSQL. https://arxiv.org/pdf/1208.4179.pdf
|
|
@ -6,6 +6,11 @@ Release Notes
|
|||
======
|
||||
* Fix invalid memory access on data distributor when snapshotting large clusters. `(PR #4076) <https://github.com/apple/foundationdb/pull/4076>`_
|
||||
* Add human-readable DateTime to trace events `(PR #4087) <https://github.com/apple/foundationdb/pull/4087>`_
|
||||
* Proxy rejects transaction batch that exceeds MVCC window `(PR #4113) <https://github.com/apple/foundationdb/pull/4113>`_
|
||||
* Add a command in fdbcli to manually trigger the detailed teams information loggings in data distribution. `(PR #4060) <https://github.com/apple/foundationdb/pull/4060>`_
|
||||
* Add documentation on read and write Path. `(PR #4099) <https://github.com/apple/foundationdb/pull/4099>`_
|
||||
* Add a histogram to expose commit batching window on Proxies. `(PR #4166) <https://github.com/apple/foundationdb/pull/4166>`_
|
||||
* Fix double counting of range reads in TransactionMetrics. `(PR #4130) <https://github.com/apple/foundationdb/pull/4130>`_
|
||||
|
||||
6.2.28
|
||||
======
|
||||
|
|
|
@ -4,6 +4,12 @@ Release Notes
|
|||
|
||||
6.3.10
|
||||
======
|
||||
* Make fault tolerance metric calculation in HA clusters consistent with 6.2 branch. `(PR #4175) <https://github.com/apple/foundationdb/pull/4175>`_
|
||||
* Bug fix, stack overflow in redwood storage engine. `(PR #4161) <https://github.com/apple/foundationdb/pull/4161>`_
|
||||
* Bug fix, getting certain special keys fail. `(PR #4128) <https://github.com/apple/foundationdb/pull/4128>`_
|
||||
* Prevent slow task on TLog by yielding while processing ignored pop requests. `(PR #4112) <https://github.com/apple/foundationdb/pull/4112>`_
|
||||
* Support reading xxhash3 sqlite checksums. `(PR #4104) <https://github.com/apple/foundationdb/pull/4104>`_
|
||||
* Fix a race between submit and abort backup. `(PR #3935) <https://github.com/apple/foundationdb/pull/3935>`_
|
||||
|
||||
Packaging
|
||||
---------
|
||||
|
@ -132,7 +138,7 @@ Fixes from previous versions
|
|||
* The 6.3.3 patch release includes all fixes from the patch release 6.2.23. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
|
||||
* The 6.3.5 patch release includes all fixes from the patch releases 6.2.24 and 6.2.25. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
|
||||
* The 6.3.9 patch release includes all fixes from the patch releases 6.2.26. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
|
||||
* The 6.3.10 patch release includes all fixes from the patch releases 6.2.27. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
|
||||
* The 6.3.10 patch release includes all fixes from the patch releases 6.2.27-6.2.29 :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
|
||||
|
||||
Fixes only impacting 6.3.0+
|
||||
---------------------------
|
||||
|
|
|
@ -35,7 +35,7 @@ Status
|
|||
Bindings
|
||||
--------
|
||||
* Python: The function ``get_estimated_range_size_bytes`` will now throw an error if the ``begin_key`` or ``end_key`` is ``None``. `(PR #3394) <https://github.com/apple/foundationdb/pull/3394>`_
|
||||
|
||||
* C: Added a function, ``fdb_database_reboot_worker``, to reboot or suspend the specified process. `(PR #4094) <https://github.com/apple/foundationdb/pull/4094>`_
|
||||
|
||||
Other Changes
|
||||
-------------
|
||||
|
|
|
@ -28,6 +28,8 @@ These documents explain the engineering design of FoundationDB, with detailed in
|
|||
|
||||
* :doc:`kv-architecture` provides a description of every major role a process in FoundationDB can fulfill.
|
||||
|
||||
* :doc:`read-write-path` describes how FDB read and write path works.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:titlesonly:
|
||||
|
@ -45,3 +47,4 @@ These documents explain the engineering design of FoundationDB, with detailed in
|
|||
flow
|
||||
testing
|
||||
kv-architecture
|
||||
read-write-path
|
||||
|
|
|
@ -201,6 +201,15 @@ struct VersionedMutations {
|
|||
Arena arena; // The arena that contains the mutations.
|
||||
};
|
||||
|
||||
struct VersionedKVPart {
|
||||
Arena arena;
|
||||
Version version;
|
||||
int32_t part;
|
||||
StringRef kv;
|
||||
VersionedKVPart(Arena arena, Version version, int32_t part, StringRef kv)
|
||||
: arena(arena), version(version), part(part), kv(kv) {}
|
||||
};
|
||||
|
||||
/*
|
||||
* Model a decoding progress for a mutation file. Usage is:
|
||||
*
|
||||
|
@ -217,7 +226,10 @@ struct VersionedMutations {
|
|||
* pairs, the decoding of mutation batch needs to look ahead one more pair. So
|
||||
* at any time this object might have two blocks of data in memory.
|
||||
*/
|
||||
struct DecodeProgress {
|
||||
class DecodeProgress {
|
||||
std::vector<VersionedKVPart> keyValues;
|
||||
|
||||
public:
|
||||
DecodeProgress() = default;
|
||||
template <class U>
|
||||
DecodeProgress(const LogFile& file, U &&values)
|
||||
|
@ -227,9 +239,9 @@ struct DecodeProgress {
|
|||
// However, we could have unfinished version in the buffer when EOF is true,
|
||||
// which means we should look for data in the next file. The caller
|
||||
// should call getUnfinishedBuffer() to get these left data.
|
||||
bool finished() { return (eof && keyValues.empty()) || (leftover && !keyValues.empty()); }
|
||||
bool finished() const { return (eof && keyValues.empty()) || (leftover && !keyValues.empty()); }
|
||||
|
||||
std::vector<std::tuple<Arena, Version, int32_t, StringRef>>&& getUnfinishedBuffer() && { return std::move(keyValues); }
|
||||
std::vector<VersionedKVPart>&& getUnfinishedBuffer() && { return std::move(keyValues); }
|
||||
|
||||
// Returns all mutations of the next version in a batch.
|
||||
Future<VersionedMutations> getNextBatch() { return getNextBatchImpl(this); }
|
||||
|
@ -239,7 +251,7 @@ struct DecodeProgress {
|
|||
// The following are private APIs:
|
||||
|
||||
// Returns true if value contains complete data.
|
||||
bool isValueComplete(StringRef value) {
|
||||
static bool isValueComplete(StringRef value) {
|
||||
StringRefReader reader(value, restore_corrupted_data());
|
||||
|
||||
reader.consume<uint64_t>(); // Consume the includeVersion
|
||||
|
@ -260,41 +272,41 @@ struct DecodeProgress {
|
|||
wait(readAndDecodeFile(self));
|
||||
}
|
||||
|
||||
auto& tuple = self->keyValues[0];
|
||||
ASSERT(std::get<2>(tuple) == 0); // first part number must be 0.
|
||||
const auto& kv = self->keyValues[0];
|
||||
ASSERT(kv.part == 0);
|
||||
|
||||
// decode next versions, check if they are continuous parts
|
||||
int idx = 1; // next kv pair in "keyValues"
|
||||
int bufSize = std::get<3>(tuple).size();
|
||||
int bufSize = kv.kv.size();
|
||||
for (int lastPart = 0; idx < self->keyValues.size(); idx++, lastPart++) {
|
||||
if (idx == self->keyValues.size()) break;
|
||||
|
||||
auto next_tuple = self->keyValues[idx];
|
||||
if (std::get<1>(tuple) != std::get<1>(next_tuple)) {
|
||||
const auto& nextKV = self->keyValues[idx];
|
||||
if (kv.version != nextKV.version) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (lastPart + 1 != std::get<2>(next_tuple)) {
|
||||
TraceEvent("DecodeError").detail("Part1", lastPart).detail("Part2", std::get<2>(next_tuple));
|
||||
if (lastPart + 1 != nextKV.part) {
|
||||
TraceEvent("DecodeError").detail("Part1", lastPart).detail("Part2", nextKV.part);
|
||||
throw restore_corrupted_data();
|
||||
}
|
||||
bufSize += std::get<3>(next_tuple).size();
|
||||
bufSize += nextKV.kv.size();
|
||||
}
|
||||
|
||||
VersionedMutations m;
|
||||
m.version = std::get<1>(tuple);
|
||||
m.version = kv.version;
|
||||
TraceEvent("Decode").detail("Version", m.version).detail("Idx", idx).detail("Q", self->keyValues.size());
|
||||
StringRef value = std::get<3>(tuple);
|
||||
StringRef value = kv.kv;
|
||||
if (idx > 1) {
|
||||
// Stitch parts into one and then decode one by one
|
||||
Standalone<StringRef> buf = self->combineValues(idx, bufSize);
|
||||
value = buf;
|
||||
m.arena = buf.arena();
|
||||
}
|
||||
if (self->isValueComplete(value)) {
|
||||
if (isValueComplete(value)) {
|
||||
m.mutations = decode_value(value);
|
||||
if (m.arena.getSize() == 0) {
|
||||
m.arena = std::get<0>(tuple);
|
||||
m.arena = kv.arena;
|
||||
}
|
||||
self->keyValues.erase(self->keyValues.begin(), self->keyValues.begin() + idx);
|
||||
return m;
|
||||
|
@ -317,7 +329,7 @@ struct DecodeProgress {
|
|||
Standalone<StringRef> buf = makeString(len);
|
||||
int n = 0;
|
||||
for (int i = 0; i < idx; i++) {
|
||||
const auto& value = std::get<3>(keyValues[i]);
|
||||
const auto& value = keyValues[i].kv;
|
||||
memcpy(mutateString(buf) + n, value.begin(), value.size());
|
||||
n += value.size();
|
||||
}
|
||||
|
@ -363,12 +375,9 @@ struct DecodeProgress {
|
|||
// The (version, part) in a block can be out of order, i.e., (3, 0)
|
||||
// can be followed by (4, 0), and then (3, 1). So we need to sort them
|
||||
// first by version, and then by part number.
|
||||
std::sort(keyValues.begin(), keyValues.end(),
|
||||
[](const std::tuple<Arena, Version, int32_t, StringRef>& a,
|
||||
const std::tuple<Arena, Version, int32_t, StringRef>& b) {
|
||||
return std::get<1>(a) == std::get<1>(b) ? std::get<2>(a) < std::get<2>(b)
|
||||
: std::get<1>(a) < std::get<1>(b);
|
||||
});
|
||||
std::sort(keyValues.begin(), keyValues.end(), [](const VersionedKVPart& a, const VersionedKVPart& b) {
|
||||
return a.version == b.version ? a.part < b.part : a.version < b.version;
|
||||
});
|
||||
return;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarn, "CorruptBlock").error(e).detail("Offset", reader.rptr - buf.begin());
|
||||
|
@ -419,8 +428,6 @@ struct DecodeProgress {
|
|||
int64_t offset = 0;
|
||||
bool eof = false;
|
||||
bool leftover = false; // Done but has unfinished version batch data left
|
||||
// A (version, part_number)'s mutations and memory arena.
|
||||
std::vector<std::tuple<Arena, Version, int32_t, StringRef>> keyValues;
|
||||
};
|
||||
|
||||
ACTOR Future<Void> decode_logs(DecodeParams params) {
|
||||
|
@ -445,7 +452,7 @@ ACTOR Future<Void> decode_logs(DecodeParams params) {
|
|||
|
||||
state int i = 0;
|
||||
// Previous file's unfinished version data
|
||||
state std::vector<std::tuple<Arena, Version, int32_t, StringRef>> left;
|
||||
state std::vector<VersionedKVPart> left;
|
||||
for (; i < logs.size(); i++) {
|
||||
if (logs[i].fileSize == 0) continue;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -152,17 +152,16 @@ public:
|
|||
|
||||
//Applies all enabled transaction options to the given transaction
|
||||
void apply(Reference<ReadYourWritesTransaction> tr) {
|
||||
for(auto itr = transactionOptions.options.begin(); itr != transactionOptions.options.end(); ++itr)
|
||||
tr->setOption(itr->first, itr->second.castTo<StringRef>());
|
||||
for (const auto& [name, value] : transactionOptions.options) {
|
||||
tr->setOption(name, value.castTo<StringRef>());
|
||||
}
|
||||
}
|
||||
|
||||
//Returns true if any options have been set
|
||||
bool hasAnyOptionsEnabled() {
|
||||
return !transactionOptions.options.empty();
|
||||
}
|
||||
bool hasAnyOptionsEnabled() const { return !transactionOptions.options.empty(); }
|
||||
|
||||
//Prints a list of enabled options, along with their parameters (if any)
|
||||
void print() {
|
||||
void print() const {
|
||||
bool found = false;
|
||||
found = found || transactionOptions.print();
|
||||
|
||||
|
@ -171,14 +170,10 @@ public:
|
|||
}
|
||||
|
||||
//Returns a vector of the names of all documented options
|
||||
std::vector<std::string> getValidOptions() {
|
||||
return transactionOptions.getValidOptions();
|
||||
}
|
||||
std::vector<std::string> getValidOptions() const { return transactionOptions.getValidOptions(); }
|
||||
|
||||
//Prints the help string obtained by invoking `help options'
|
||||
void printHelpString() {
|
||||
transactionOptions.printHelpString();
|
||||
}
|
||||
void printHelpString() const { transactionOptions.printHelpString(); }
|
||||
|
||||
private:
|
||||
//Sets a transaction option. If intrans == true, then this option is also applied to the passed in transaction.
|
||||
|
@ -219,7 +214,7 @@ private:
|
|||
}
|
||||
|
||||
//Prints a list of all enabled options in this group
|
||||
bool print() {
|
||||
bool print() const {
|
||||
bool found = false;
|
||||
|
||||
for(auto itr = legalOptions.begin(); itr != legalOptions.end(); ++itr) {
|
||||
|
@ -238,7 +233,7 @@ private:
|
|||
}
|
||||
|
||||
//Returns true if the specified option is documented
|
||||
bool isDocumented(typename T::Option option) {
|
||||
bool isDocumented(typename T::Option option) const {
|
||||
FDBOptionInfo info = T::optionInfo.getMustExist(option);
|
||||
|
||||
std::string deprecatedStr = "Deprecated";
|
||||
|
@ -246,7 +241,7 @@ private:
|
|||
}
|
||||
|
||||
//Returns a vector of the names of all documented options
|
||||
std::vector<std::string> getValidOptions() {
|
||||
std::vector<std::string> getValidOptions() const {
|
||||
std::vector<std::string> ret;
|
||||
|
||||
for (auto itr = legalOptions.begin(); itr != legalOptions.end(); ++itr)
|
||||
|
@ -258,7 +253,7 @@ private:
|
|||
|
||||
//Prints a help string for each option in this group. Any options with no comment
|
||||
//are excluded from this help string. Lines are wrapped to 80 characters.
|
||||
void printHelpString() {
|
||||
void printHelpString() const {
|
||||
for(auto itr = legalOptions.begin(); itr != legalOptions.end(); ++itr) {
|
||||
if(isDocumented(itr->second)) {
|
||||
FDBOptionInfo info = T::optionInfo.getMustExist(itr->second);
|
||||
|
@ -615,6 +610,9 @@ void initHelp() {
|
|||
CommandHelp("unlock <UID>", "unlock the database with the provided lockUID",
|
||||
"Unlocks the database with the provided lockUID. This is a potentially dangerous operation, so the "
|
||||
"user will be asked to enter a passphrase to confirm their intent.");
|
||||
helpMap["triggerddteaminfolog"] =
|
||||
CommandHelp("triggerddteaminfolog", "trigger the data distributor teams logging",
|
||||
"Trigger the data distributor to log detailed information about its teams.");
|
||||
|
||||
hiddenCommands.insert("expensive_data_check");
|
||||
hiddenCommands.insert("datadistribution");
|
||||
|
@ -629,12 +627,12 @@ void printVersion() {
|
|||
|
||||
void printHelpOverview() {
|
||||
printf("\nList of commands:\n\n");
|
||||
for (auto i = helpMap.begin(); i != helpMap.end(); ++i)
|
||||
if (i->second.short_desc.size())
|
||||
printf(" %s:\n %s\n", i->first.c_str(), i->second.short_desc.c_str());
|
||||
printf("\nFor information on a specific command, type `help <command>'.");
|
||||
printf("\nFor information on escaping keys and values, type `help escaping'.");
|
||||
printf("\nFor information on available options, type `help options'.\n\n");
|
||||
for (const auto& [command, help] : helpMap) {
|
||||
if (help.short_desc.size()) printf(" %s:\n %s\n", command.c_str(), help.short_desc.c_str());
|
||||
printf("\nFor information on a specific command, type `help <command>'.");
|
||||
printf("\nFor information on escaping keys and values, type `help escaping'.");
|
||||
printf("\nFor information on available options, type `help options'.\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
void printHelp(StringRef command) {
|
||||
|
@ -1774,6 +1772,23 @@ int printStatusFromJSON( std::string const& jsonFileName ) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> triggerDDTeamInfoLog(Database db) {
|
||||
state ReadYourWritesTransaction tr(db);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
std::string v = deterministicRandom()->randomUniqueID().toString();
|
||||
tr.set(triggerDDTeamInfoPrintKey, v);
|
||||
wait(tr.commit());
|
||||
printf("Triggered team info logging in data distribution.\n");
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> timeWarning( double when, const char* msg ) {
|
||||
wait( delay(when) );
|
||||
fputs( msg, stderr );
|
||||
|
@ -2005,16 +2020,18 @@ ACTOR Future<bool> fileConfigure(Database db, std::string filePath, bool isNewDa
|
|||
configString = "new";
|
||||
}
|
||||
|
||||
for(auto kv : configJSON) {
|
||||
for (const auto& [name, value] : configJSON) {
|
||||
if(!configString.empty()) {
|
||||
configString += " ";
|
||||
}
|
||||
if( kv.second.type() == json_spirit::int_type ) {
|
||||
configString += kv.first + ":=" + format("%d", kv.second.get_int());
|
||||
} else if( kv.second.type() == json_spirit::str_type ) {
|
||||
configString += kv.second.get_str();
|
||||
} else if( kv.second.type() == json_spirit::array_type ) {
|
||||
configString += kv.first + "=" + json_spirit::write_string(json_spirit::mValue(kv.second.get_array()), json_spirit::Output_options::none);
|
||||
if (value.type() == json_spirit::int_type) {
|
||||
configString += name + ":=" + format("%d", value.get_int());
|
||||
} else if (value.type() == json_spirit::str_type) {
|
||||
configString += value.get_str();
|
||||
} else if (value.type() == json_spirit::array_type) {
|
||||
configString +=
|
||||
name + "=" +
|
||||
json_spirit::write_string(json_spirit::mValue(value.get_array()), json_spirit::Output_options::none);
|
||||
} else {
|
||||
printUsage(LiteralStringRef("fileconfigure"));
|
||||
return true;
|
||||
|
@ -2229,8 +2246,7 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
|
|||
}
|
||||
|
||||
printf("There are currently %zu servers or processes being excluded from the database:\n", excl.size());
|
||||
for(auto& e : excl)
|
||||
printf(" %s\n", e.toString().c_str());
|
||||
for (const auto& e : excl) printf(" %s\n", e.toString().c_str());
|
||||
|
||||
printf("To find out whether it is safe to remove one or more of these\n"
|
||||
"servers from the cluster, type `exclude <addresses>'.\n"
|
||||
|
@ -2435,7 +2451,7 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
|
|||
|
||||
bool foundCoordinator = false;
|
||||
auto ccs = ClusterConnectionFile( ccf->getFilename() ).getConnectionString();
|
||||
for( auto& c : ccs.coordinators()) {
|
||||
for (const auto& c : ccs.coordinators()) {
|
||||
if (std::count(exclusionVector.begin(), exclusionVector.end(), AddressExclusion(c.ip, c.port)) ||
|
||||
std::count(exclusionVector.begin(), exclusionVector.end(), AddressExclusion(c.ip))) {
|
||||
printf("WARNING: %s is a coordinator!\n", c.toString().c_str());
|
||||
|
@ -2483,7 +2499,7 @@ ACTOR Future<bool> setClass( Database db, std::vector<StringRef> tokens ) {
|
|||
std::sort(workers.begin(), workers.end(), ProcessData::sort_by_address());
|
||||
|
||||
printf("There are currently %zu processes in the database:\n", workers.size());
|
||||
for(auto& w : workers)
|
||||
for (const auto& w : workers)
|
||||
printf(" %s: %s (%s)\n", w.address.toString().c_str(), w.processClass.toString().c_str(), w.processClass.sourceString().c_str());
|
||||
return false;
|
||||
}
|
||||
|
@ -2841,22 +2857,25 @@ struct CLIOptions {
|
|||
ClientKnobs* clientKnobs = new ClientKnobs;
|
||||
CLIENT_KNOBS = clientKnobs;
|
||||
|
||||
for(auto k=knobs.begin(); k!=knobs.end(); ++k) {
|
||||
for (const auto& [knob, value] : knobs) {
|
||||
try {
|
||||
if (!flowKnobs->setKnob( k->first, k->second ) &&
|
||||
!clientKnobs->setKnob( k->first, k->second ))
|
||||
{
|
||||
fprintf(stderr, "WARNING: Unrecognized knob option '%s'\n", k->first.c_str());
|
||||
TraceEvent(SevWarnAlways, "UnrecognizedKnobOption").detail("Knob", printable(k->first));
|
||||
if (!flowKnobs->setKnob(knob, value) && !clientKnobs->setKnob(knob, value)) {
|
||||
fprintf(stderr, "WARNING: Unrecognized knob option '%s'\n", knob.c_str());
|
||||
TraceEvent(SevWarnAlways, "UnrecognizedKnobOption").detail("Knob", printable(knob));
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_invalid_option_value) {
|
||||
fprintf(stderr, "WARNING: Invalid value '%s' for knob option '%s'\n", k->second.c_str(), k->first.c_str());
|
||||
TraceEvent(SevWarnAlways, "InvalidKnobValue").detail("Knob", printable(k->first)).detail("Value", printable(k->second));
|
||||
fprintf(stderr, "WARNING: Invalid value '%s' for knob option '%s'\n", value.c_str(), knob.c_str());
|
||||
TraceEvent(SevWarnAlways, "InvalidKnobValue")
|
||||
.detail("Knob", printable(knob))
|
||||
.detail("Value", printable(value));
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "ERROR: Failed to set knob option '%s': %s\n", k->first.c_str(), e.what());
|
||||
TraceEvent(SevError, "FailedToSetKnob").detail("Knob", printable(k->first)).detail("Value", printable(k->second)).error(e);
|
||||
fprintf(stderr, "ERROR: Failed to set knob option '%s': %s\n", knob.c_str(), e.what());
|
||||
TraceEvent(SevError, "FailedToSetKnob")
|
||||
.detail("Knob", printable(knob))
|
||||
.detail("Value", printable(value))
|
||||
.error(e);
|
||||
exit_code = FDB_EXIT_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -3240,6 +3259,11 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "triggerddteaminfolog")) {
|
||||
wait(triggerDDTeamInfoLog(db));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "configure")) {
|
||||
bool err = wait(configure(db, tokens, db->getConnectionFile(), &linenoise, warn));
|
||||
if (err) is_error = true;
|
||||
|
|
|
@ -54,10 +54,10 @@ static Future<T> joinErrorGroup(Future<T> f, Promise<Void> p) {
|
|||
// using multi-part upload and beginning to transfer each part as soon as it is large enough.
|
||||
// All write operations file operations must be sequential and contiguous.
|
||||
// Limits on part sizes, upload speed, and concurrent uploads are taken from the S3BlobStoreEndpoint being used.
|
||||
class AsyncFileS3BlobStoreWrite : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreWrite> {
|
||||
class AsyncFileS3BlobStoreWrite final : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreWrite> {
|
||||
public:
|
||||
virtual void addref() { ReferenceCounted<AsyncFileS3BlobStoreWrite>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<AsyncFileS3BlobStoreWrite>::delref(); }
|
||||
void addref() override { ReferenceCounted<AsyncFileS3BlobStoreWrite>::addref(); }
|
||||
void delref() override { ReferenceCounted<AsyncFileS3BlobStoreWrite>::delref(); }
|
||||
|
||||
struct Part : ReferenceCounted<Part> {
|
||||
Part(int n, int minSize)
|
||||
|
@ -256,10 +256,10 @@ public:
|
|||
};
|
||||
|
||||
// This class represents a read-only file that lives in an S3-style blob store. It reads using the REST API.
|
||||
class AsyncFileS3BlobStoreRead : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreRead> {
|
||||
class AsyncFileS3BlobStoreRead final : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreRead> {
|
||||
public:
|
||||
virtual void addref() { ReferenceCounted<AsyncFileS3BlobStoreRead>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<AsyncFileS3BlobStoreRead>::delref(); }
|
||||
void addref() override { ReferenceCounted<AsyncFileS3BlobStoreRead>::addref(); }
|
||||
void delref() override { ReferenceCounted<AsyncFileS3BlobStoreRead>::delref(); }
|
||||
|
||||
Future<int> read(void* data, int length, int64_t offset) override;
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ Future<Version> timeKeeperVersionFromDatetime(std::string const &datetime, Datab
|
|||
// TODO: Move the log file and range file format encoding/decoding stuff to this file and behind interfaces.
|
||||
class IBackupFile {
|
||||
public:
|
||||
IBackupFile(const std::string& fileName) : m_fileName(fileName), m_offset(0) {}
|
||||
IBackupFile(const std::string& fileName) : m_fileName(fileName) {}
|
||||
virtual ~IBackupFile() {}
|
||||
// Backup files are append-only and cannot have more than 1 append outstanding at once.
|
||||
virtual Future<Void> append(const void *data, int len) = 0;
|
||||
|
@ -48,16 +48,13 @@ public:
|
|||
inline std::string getFileName() const {
|
||||
return m_fileName;
|
||||
}
|
||||
inline int64_t size() const {
|
||||
return m_offset;
|
||||
}
|
||||
virtual int64_t size() const = 0;
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
Future<Void> appendStringRefWithLen(Standalone<StringRef> s);
|
||||
protected:
|
||||
std::string m_fileName;
|
||||
int64_t m_offset;
|
||||
};
|
||||
|
||||
// Structures for various backup components
|
||||
|
|
|
@ -44,7 +44,7 @@ public:
|
|||
blobName = this->blobName, data, length, offset] {
|
||||
std::ostringstream oss(std::ios::out | std::ios::binary);
|
||||
client->download_blob_to_stream(containerName, blobName, offset, length, oss);
|
||||
auto str = oss.str();
|
||||
auto str = std::move(oss).str();
|
||||
memcpy(data, str.c_str(), str.size());
|
||||
return static_cast<int>(str.size());
|
||||
});
|
||||
|
@ -127,9 +127,11 @@ public:
|
|||
|
||||
class BackupFile final : public IBackupFile, ReferenceCounted<BackupFile> {
|
||||
Reference<IAsyncFile> m_file;
|
||||
int64_t m_offset;
|
||||
|
||||
public:
|
||||
BackupFile(const std::string& fileName, Reference<IAsyncFile> file) : IBackupFile(fileName), m_file(file) {}
|
||||
BackupFile(const std::string& fileName, Reference<IAsyncFile> file)
|
||||
: IBackupFile(fileName), m_file(file), m_offset(0) {}
|
||||
Future<Void> append(const void* data, int len) override {
|
||||
Future<Void> r = m_file->write(data, len, m_offset);
|
||||
m_offset += len;
|
||||
|
@ -142,6 +144,7 @@ public:
|
|||
return Void();
|
||||
});
|
||||
}
|
||||
int64_t size() const override { return m_offset; }
|
||||
void addref() override { ReferenceCounted<BackupFile>::addref(); }
|
||||
void delref() override { ReferenceCounted<BackupFile>::delref(); }
|
||||
};
|
||||
|
|
|
@ -159,8 +159,7 @@ public:
|
|||
state int i;
|
||||
|
||||
// Validate each filename, update version range
|
||||
for (i = 0; i < fileNames.size(); ++i) {
|
||||
auto const& f = fileNames[i];
|
||||
for (const auto& f : fileNames) {
|
||||
if (pathToRangeFile(rf, f, 0)) {
|
||||
fileArray.push_back(f);
|
||||
if (rf.version < minVer) minVer = rf.version;
|
||||
|
|
|
@ -30,16 +30,38 @@ namespace {
|
|||
|
||||
class BackupFile : public IBackupFile, ReferenceCounted<BackupFile> {
|
||||
public:
|
||||
BackupFile(std::string fileName, Reference<IAsyncFile> file, std::string finalFullPath)
|
||||
: IBackupFile(fileName), m_file(file), m_finalFullPath(finalFullPath) {}
|
||||
BackupFile(const std::string& fileName, Reference<IAsyncFile> file, const std::string& finalFullPath)
|
||||
: IBackupFile(fileName), m_file(file), m_finalFullPath(finalFullPath), m_writeOffset(0) {
|
||||
m_buffer.reserve(m_buffer.arena(), CLIENT_KNOBS->BACKUP_LOCAL_FILE_WRITE_BLOCK);
|
||||
}
|
||||
|
||||
Future<Void> append(const void* data, int len) {
|
||||
Future<Void> r = m_file->write(data, len, m_offset);
|
||||
m_offset += len;
|
||||
m_buffer.append(m_buffer.arena(), (const uint8_t*)data, len);
|
||||
|
||||
if (m_buffer.size() >= CLIENT_KNOBS->BACKUP_LOCAL_FILE_WRITE_BLOCK) {
|
||||
return flush(CLIENT_KNOBS->BACKUP_LOCAL_FILE_WRITE_BLOCK);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> flush(int size) {
|
||||
ASSERT(size <= m_buffer.size());
|
||||
|
||||
// Keep a reference to the old buffer
|
||||
Standalone<VectorRef<uint8_t>> old = m_buffer;
|
||||
// Make a new buffer, initialized with the excess bytes over the block size from the old buffer
|
||||
m_buffer = Standalone<VectorRef<uint8_t>>(old.slice(size, old.size()));
|
||||
|
||||
// Write the old buffer to the underlying file and update the write offset
|
||||
Future<Void> r = holdWhile(old, m_file->write(old.begin(), size, m_writeOffset));
|
||||
m_writeOffset += size;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> finish_impl(Reference<BackupFile> f) {
|
||||
wait(f->flush(f->m_buffer.size()));
|
||||
wait(f->m_file->truncate(f->size())); // Some IAsyncFile implementations extend in whole block sizes.
|
||||
wait(f->m_file->sync());
|
||||
std::string name = f->m_file->getFilename();
|
||||
|
@ -48,6 +70,8 @@ public:
|
|||
return Void();
|
||||
}
|
||||
|
||||
int64_t size() const { return m_buffer.size() + m_writeOffset; }
|
||||
|
||||
Future<Void> finish() { return finish_impl(Reference<BackupFile>::addRef(this)); }
|
||||
|
||||
void addref() override { return ReferenceCounted<BackupFile>::addref(); }
|
||||
|
@ -55,6 +79,8 @@ public:
|
|||
|
||||
private:
|
||||
Reference<IAsyncFile> m_file;
|
||||
Standalone<VectorRef<uint8_t>> m_buffer;
|
||||
int64_t m_writeOffset;
|
||||
std::string m_finalFullPath;
|
||||
};
|
||||
|
||||
|
@ -72,7 +98,7 @@ ACTOR static Future<BackupContainerFileSystem::FilesAndSizesT> listFiles_impl(st
|
|||
[](std::string const& f) { return StringRef(f).endsWith(LiteralStringRef(".lnk")); }),
|
||||
files.end());
|
||||
|
||||
for (auto& f : files) {
|
||||
for (const auto& f : files) {
|
||||
// Hide .part or .temp files.
|
||||
StringRef s(f);
|
||||
if (!s.endsWith(LiteralStringRef(".part")) && !s.endsWith(LiteralStringRef(".temp")))
|
||||
|
@ -147,7 +173,7 @@ Future<std::vector<std::string>> BackupContainerLocalDirectory::listURLs(const s
|
|||
std::vector<std::string> dirs = platform::listDirectories(path);
|
||||
std::vector<std::string> results;
|
||||
|
||||
for (auto& r : dirs) {
|
||||
for (const auto& r : dirs) {
|
||||
if (r == "." || r == "..") continue;
|
||||
results.push_back(std::string("file://") + joinPath(path, r));
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
state std::string basePath = INDEXFOLDER + '/';
|
||||
S3BlobStoreEndpoint::ListResult contents = wait(bstore->listObjects(bucket, basePath));
|
||||
std::vector<std::string> results;
|
||||
for (auto& f : contents.objects) {
|
||||
for (const auto& f : contents.objects) {
|
||||
results.push_back(
|
||||
bstore->getResourceURL(f.name.substr(basePath.size()), format("bucket=%s", bucket.c_str())));
|
||||
}
|
||||
|
@ -45,7 +45,8 @@ public:
|
|||
|
||||
class BackupFile : public IBackupFile, ReferenceCounted<BackupFile> {
|
||||
public:
|
||||
BackupFile(std::string fileName, Reference<IAsyncFile> file) : IBackupFile(fileName), m_file(file) {}
|
||||
BackupFile(std::string fileName, Reference<IAsyncFile> file)
|
||||
: IBackupFile(fileName), m_file(file), m_offset(0) {}
|
||||
|
||||
Future<Void> append(const void* data, int len) {
|
||||
Future<Void> r = m_file->write(data, len, m_offset);
|
||||
|
@ -61,11 +62,14 @@ public:
|
|||
});
|
||||
}
|
||||
|
||||
int64_t size() const override { return m_offset; }
|
||||
|
||||
void addref() final { return ReferenceCounted<BackupFile>::addref(); }
|
||||
void delref() final { return ReferenceCounted<BackupFile>::delref(); }
|
||||
|
||||
private:
|
||||
Reference<IAsyncFile> m_file;
|
||||
int64_t m_offset;
|
||||
};
|
||||
|
||||
ACTOR static Future<BackupContainerFileSystem::FilesAndSizesT> listFiles(
|
||||
|
@ -82,7 +86,7 @@ public:
|
|||
state S3BlobStoreEndpoint::ListResult result = wait(bc->m_bstore->listObjects(
|
||||
bc->m_bucket, bc->dataPath(path), '/', std::numeric_limits<int>::max(), rawPathFilter));
|
||||
BackupContainerFileSystem::FilesAndSizesT files;
|
||||
for (auto& o : result.objects) {
|
||||
for (const auto& o : result.objects) {
|
||||
ASSERT(o.name.size() >= prefixTrim);
|
||||
files.push_back({ o.name.substr(prefixTrim), o.size });
|
||||
}
|
||||
|
@ -135,15 +139,13 @@ BackupContainerS3BlobStore::BackupContainerS3BlobStore(Reference<S3BlobStoreEndp
|
|||
: m_bstore(bstore), m_name(name), m_bucket("FDB_BACKUPS_V2") {
|
||||
|
||||
// Currently only one parameter is supported, "bucket"
|
||||
for (auto& kv : params) {
|
||||
if (kv.first == "bucket") {
|
||||
m_bucket = kv.second;
|
||||
for (const auto& [name, value] : params) {
|
||||
if (name == "bucket") {
|
||||
m_bucket = value;
|
||||
continue;
|
||||
}
|
||||
TraceEvent(SevWarn, "BackupContainerS3BlobStoreInvalidParameter")
|
||||
.detail("Name", kv.first)
|
||||
.detail("Value", kv.second);
|
||||
IBackupContainer::lastOpenError = format("Unknown URL parameter: '%s'", kv.first.c_str());
|
||||
TraceEvent(SevWarn, "BackupContainerS3BlobStoreInvalidParameter").detail("Name", name).detail("Value", value);
|
||||
IBackupContainer::lastOpenError = format("Unknown URL parameter: '%s'", name.c_str());
|
||||
throw backup_invalid_url();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,46 +23,39 @@
|
|||
#define FDBCLIENT_CLIENTLOGEVENTS_H
|
||||
|
||||
namespace FdbClientLogEvents {
|
||||
typedef int EventType;
|
||||
enum { GET_VERSION_LATENCY = 0,
|
||||
GET_LATENCY = 1,
|
||||
GET_RANGE_LATENCY = 2,
|
||||
COMMIT_LATENCY = 3,
|
||||
ERROR_GET = 4,
|
||||
ERROR_GET_RANGE = 5,
|
||||
ERROR_COMMIT = 6,
|
||||
enum class EventType {
|
||||
GET_VERSION_LATENCY = 0,
|
||||
GET_LATENCY = 1,
|
||||
GET_RANGE_LATENCY = 2,
|
||||
COMMIT_LATENCY = 3,
|
||||
ERROR_GET = 4,
|
||||
ERROR_GET_RANGE = 5,
|
||||
ERROR_COMMIT = 6,
|
||||
UNSET
|
||||
};
|
||||
|
||||
EVENTTYPEEND // End of EventType
|
||||
};
|
||||
enum class TransactionPriorityType { PRIORITY_DEFAULT = 0, PRIORITY_BATCH = 1, PRIORITY_IMMEDIATE = 2, UNSET };
|
||||
|
||||
typedef int TrasactionPriorityType;
|
||||
enum {
|
||||
PRIORITY_DEFAULT = 0,
|
||||
PRIORITY_BATCH = 1,
|
||||
PRIORITY_IMMEDIATE = 2,
|
||||
PRIORITY_END
|
||||
};
|
||||
struct Event {
|
||||
Event(EventType t, double ts, const Optional<Standalone<StringRef>>& dc) : type(t), startTs(ts) {
|
||||
if (dc.present()) dcId = dc.get();
|
||||
}
|
||||
Event() {}
|
||||
|
||||
struct Event {
|
||||
Event(EventType t, double ts, const Optional<Standalone<StringRef>> &dc) : type(t), startTs(ts){
|
||||
if (dc.present())
|
||||
dcId = dc.get();
|
||||
}
|
||||
Event() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (ar.protocolVersion().version() >= (uint64_t) 0x0FDB00B063010001LL) {
|
||||
return serializer(ar, type, startTs, dcId);
|
||||
} else {
|
||||
return serializer(ar, type, startTs);
|
||||
}
|
||||
template <typename Ar>
|
||||
Ar& serialize(Ar& ar) {
|
||||
if (ar.protocolVersion().version() >= (uint64_t)0x0FDB00B063010001LL) {
|
||||
return serializer(ar, type, startTs, dcId);
|
||||
} else {
|
||||
return serializer(ar, type, startTs);
|
||||
}
|
||||
}
|
||||
|
||||
EventType type{ EVENTTYPEEND };
|
||||
double startTs{ 0 };
|
||||
Key dcId{};
|
||||
EventType type{ EventType::UNSET };
|
||||
double startTs{ 0 };
|
||||
Key dcId{};
|
||||
|
||||
void logEvent(std::string id, int maxFieldLength) const {}
|
||||
void logEvent(std::string id, int maxFieldLength) const {}
|
||||
};
|
||||
|
||||
struct EventGetVersion : public Event {
|
||||
|
@ -96,9 +89,9 @@ namespace FdbClientLogEvents {
|
|||
}
|
||||
|
||||
double latency;
|
||||
TrasactionPriorityType priorityType {PRIORITY_END};
|
||||
TransactionPriorityType priorityType{ TransactionPriorityType::UNSET };
|
||||
|
||||
void logEvent(std::string id, int maxFieldLength) const {
|
||||
void logEvent(std::string id, int maxFieldLength) const {
|
||||
TraceEvent("TransactionTrace_GetVersion")
|
||||
.detail("TransactionID", id)
|
||||
.detail("Latency", latency)
|
||||
|
@ -108,23 +101,25 @@ namespace FdbClientLogEvents {
|
|||
|
||||
// Version V3 of EventGetVersion starting at 6.3
|
||||
struct EventGetVersion_V3 : public Event {
|
||||
EventGetVersion_V3(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, TransactionPriority priority, Version version) : Event(GET_VERSION_LATENCY, ts, dcId), latency(lat), readVersion(version) {
|
||||
switch(priority) {
|
||||
EventGetVersion_V3(double ts, const Optional<Standalone<StringRef>>& dcId, double lat,
|
||||
TransactionPriority priority, Version version)
|
||||
: Event(EventType::GET_VERSION_LATENCY, ts, dcId), latency(lat), readVersion(version) {
|
||||
switch(priority) {
|
||||
// Unfortunately, the enum serialized here disagrees with the enum used elsewhere for the values used by each priority
|
||||
case TransactionPriority::IMMEDIATE:
|
||||
priorityType = PRIORITY_IMMEDIATE;
|
||||
break;
|
||||
priorityType = TransactionPriorityType::PRIORITY_IMMEDIATE;
|
||||
break;
|
||||
case TransactionPriority::DEFAULT:
|
||||
priorityType = PRIORITY_DEFAULT;
|
||||
break;
|
||||
priorityType = TransactionPriorityType::PRIORITY_DEFAULT;
|
||||
break;
|
||||
case TransactionPriority::BATCH:
|
||||
priorityType = PRIORITY_BATCH;
|
||||
break;
|
||||
priorityType = TransactionPriorityType::PRIORITY_BATCH;
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
}
|
||||
}
|
||||
EventGetVersion_V3() { }
|
||||
}
|
||||
EventGetVersion_V3() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (!ar.isDeserializing)
|
||||
|
@ -134,8 +129,8 @@ namespace FdbClientLogEvents {
|
|||
}
|
||||
|
||||
double latency;
|
||||
TrasactionPriorityType priorityType {PRIORITY_END};
|
||||
Version readVersion;
|
||||
TransactionPriorityType priorityType{ TransactionPriorityType::UNSET };
|
||||
Version readVersion;
|
||||
|
||||
void logEvent(std::string id, int maxFieldLength) const {
|
||||
TraceEvent("TransactionTrace_GetVersion")
|
||||
|
@ -147,8 +142,9 @@ namespace FdbClientLogEvents {
|
|||
};
|
||||
|
||||
struct EventGet : public Event {
|
||||
EventGet(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, int size, const KeyRef &in_key) : Event(GET_LATENCY, ts, dcId), latency(lat), valueSize(size), key(in_key) { }
|
||||
EventGet() { }
|
||||
EventGet(double ts, const Optional<Standalone<StringRef>>& dcId, double lat, int size, const KeyRef& in_key)
|
||||
: Event(EventType::GET_LATENCY, ts, dcId), latency(lat), valueSize(size), key(in_key) {}
|
||||
EventGet() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (!ar.isDeserializing)
|
||||
|
@ -173,8 +169,11 @@ namespace FdbClientLogEvents {
|
|||
};
|
||||
|
||||
struct EventGetRange : public Event {
|
||||
EventGetRange(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, int size, const KeyRef &start_key, const KeyRef & end_key) : Event(GET_RANGE_LATENCY, ts, dcId), latency(lat), rangeSize(size), startKey(start_key), endKey(end_key) { }
|
||||
EventGetRange() { }
|
||||
EventGetRange(double ts, const Optional<Standalone<StringRef>>& dcId, double lat, int size,
|
||||
const KeyRef& start_key, const KeyRef& end_key)
|
||||
: Event(EventType::GET_RANGE_LATENCY, ts, dcId), latency(lat), rangeSize(size), startKey(start_key),
|
||||
endKey(end_key) {}
|
||||
EventGetRange() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (!ar.isDeserializing)
|
||||
|
@ -252,9 +251,11 @@ namespace FdbClientLogEvents {
|
|||
|
||||
// Version V2 of EventGetVersion starting at 6.3
|
||||
struct EventCommit_V2 : public Event {
|
||||
EventCommit_V2(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, int mut, int bytes, Version version, const CommitTransactionRequest &commit_req)
|
||||
: Event(COMMIT_LATENCY, ts, dcId), latency(lat), numMutations(mut), commitBytes(bytes), commitVersion(version), req(commit_req) { }
|
||||
EventCommit_V2() { }
|
||||
EventCommit_V2(double ts, const Optional<Standalone<StringRef>>& dcId, double lat, int mut, int bytes,
|
||||
Version version, const CommitTransactionRequest& commit_req)
|
||||
: Event(EventType::COMMIT_LATENCY, ts, dcId), latency(lat), numMutations(mut), commitBytes(bytes),
|
||||
commitVersion(version), req(commit_req) {}
|
||||
EventCommit_V2() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (!ar.isDeserializing)
|
||||
|
@ -306,8 +307,9 @@ namespace FdbClientLogEvents {
|
|||
};
|
||||
|
||||
struct EventGetError : public Event {
|
||||
EventGetError(double ts, const Optional<Standalone<StringRef>> &dcId, int err_code, const KeyRef &in_key) : Event(ERROR_GET, ts, dcId), errCode(err_code), key(in_key) { }
|
||||
EventGetError() { }
|
||||
EventGetError(double ts, const Optional<Standalone<StringRef>>& dcId, int err_code, const KeyRef& in_key)
|
||||
: Event(EventType::ERROR_GET, ts, dcId), errCode(err_code), key(in_key) {}
|
||||
EventGetError() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (!ar.isDeserializing)
|
||||
|
@ -330,8 +332,10 @@ namespace FdbClientLogEvents {
|
|||
};
|
||||
|
||||
struct EventGetRangeError : public Event {
|
||||
EventGetRangeError(double ts, const Optional<Standalone<StringRef>> &dcId, int err_code, const KeyRef &start_key, const KeyRef & end_key) : Event(ERROR_GET_RANGE, ts, dcId), errCode(err_code), startKey(start_key), endKey(end_key) { }
|
||||
EventGetRangeError() { }
|
||||
EventGetRangeError(double ts, const Optional<Standalone<StringRef>>& dcId, int err_code,
|
||||
const KeyRef& start_key, const KeyRef& end_key)
|
||||
: Event(EventType::ERROR_GET_RANGE, ts, dcId), errCode(err_code), startKey(start_key), endKey(end_key) {}
|
||||
EventGetRangeError() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (!ar.isDeserializing)
|
||||
|
@ -356,8 +360,10 @@ namespace FdbClientLogEvents {
|
|||
};
|
||||
|
||||
struct EventCommitError : public Event {
|
||||
EventCommitError(double ts, const Optional<Standalone<StringRef>> &dcId, int err_code, const CommitTransactionRequest &commit_req) : Event(ERROR_COMMIT, ts, dcId), errCode(err_code), req(commit_req) { }
|
||||
EventCommitError() { }
|
||||
EventCommitError(double ts, const Optional<Standalone<StringRef>>& dcId, int err_code,
|
||||
const CommitTransactionRequest& commit_req)
|
||||
: Event(EventType::ERROR_COMMIT, ts, dcId), errCode(err_code), req(commit_req) {}
|
||||
EventCommitError() { }
|
||||
|
||||
template <typename Ar> Ar& serialize(Ar &ar) {
|
||||
if (!ar.isDeserializing)
|
||||
|
|
|
@ -93,7 +93,5 @@ struct ProfilerRequest {
|
|||
serializer(ar, reply, type, action, duration, outputFile);
|
||||
}
|
||||
};
|
||||
BINARY_SERIALIZABLE( ProfilerRequest::Type );
|
||||
BINARY_SERIALIZABLE( ProfilerRequest::Action );
|
||||
|
||||
#endif
|
||||
|
|
|
@ -51,9 +51,19 @@ struct RegionInfo {
|
|||
int32_t priority;
|
||||
|
||||
Reference<IReplicationPolicy> satelliteTLogPolicy;
|
||||
|
||||
// Number of tLogs that should be recruited in satellite datacenters.
|
||||
int32_t satelliteDesiredTLogCount;
|
||||
|
||||
// Total number of copies made for each mutation across all satellite tLogs in all DCs.
|
||||
int32_t satelliteTLogReplicationFactor;
|
||||
|
||||
// Number of tLog replies we can ignore when waiting for quorum. Hence, effective quorum is
|
||||
// satelliteDesiredTLogCount - satelliteTLogWriteAntiQuorum. Locality of individual tLogs is not taken
|
||||
// into account.
|
||||
int32_t satelliteTLogWriteAntiQuorum;
|
||||
|
||||
// Number of satellite datacenters for current region, as set by `satellite_redundancy_mode`.
|
||||
int32_t satelliteTLogUsableDcs;
|
||||
|
||||
Reference<IReplicationPolicy> satelliteTLogPolicyFallback;
|
||||
|
@ -63,27 +73,32 @@ struct RegionInfo {
|
|||
|
||||
std::vector<SatelliteInfo> satellites;
|
||||
|
||||
RegionInfo() : priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0), satelliteTLogUsableDcs(1),
|
||||
satelliteTLogReplicationFactorFallback(0), satelliteTLogWriteAntiQuorumFallback(0), satelliteTLogUsableDcsFallback(0) {}
|
||||
RegionInfo()
|
||||
: priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0),
|
||||
satelliteTLogUsableDcs(1), satelliteTLogReplicationFactorFallback(0), satelliteTLogWriteAntiQuorumFallback(0),
|
||||
satelliteTLogUsableDcsFallback(0) {}
|
||||
|
||||
struct sort_by_priority {
|
||||
bool operator ()(RegionInfo const&a, RegionInfo const& b) const { return a.priority > b.priority; }
|
||||
bool operator()(RegionInfo const& a, RegionInfo const& b) const { return a.priority > b.priority; }
|
||||
};
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, dcId, priority, satelliteTLogPolicy, satelliteDesiredTLogCount, satelliteTLogReplicationFactor, satelliteTLogWriteAntiQuorum, satelliteTLogUsableDcs,
|
||||
satelliteTLogPolicyFallback, satelliteTLogReplicationFactorFallback, satelliteTLogWriteAntiQuorumFallback, satelliteTLogUsableDcsFallback, satellites);
|
||||
serializer(ar, dcId, priority, satelliteTLogPolicy, satelliteDesiredTLogCount, satelliteTLogReplicationFactor,
|
||||
satelliteTLogWriteAntiQuorum, satelliteTLogUsableDcs, satelliteTLogPolicyFallback,
|
||||
satelliteTLogReplicationFactorFallback, satelliteTLogWriteAntiQuorumFallback,
|
||||
satelliteTLogUsableDcsFallback, satellites);
|
||||
}
|
||||
};
|
||||
|
||||
struct DatabaseConfiguration {
|
||||
DatabaseConfiguration();
|
||||
|
||||
void applyMutation( MutationRef mutation );
|
||||
bool set( KeyRef key, ValueRef value ); // Returns true if a configuration option that requires recovery to take effect is changed
|
||||
bool clear( KeyRangeRef keys );
|
||||
Optional<ValueRef> get( KeyRef key ) const;
|
||||
void applyMutation(MutationRef mutation);
|
||||
bool set(KeyRef key,
|
||||
ValueRef value); // Returns true if a configuration option that requires recovery to take effect is changed
|
||||
bool clear(KeyRangeRef keys);
|
||||
Optional<ValueRef> get(KeyRef key) const;
|
||||
|
||||
bool isValid() const;
|
||||
|
||||
|
@ -92,63 +107,75 @@ struct DatabaseConfiguration {
|
|||
std::string toString() const;
|
||||
StatusObject toJSON(bool noPolicies = false) const;
|
||||
StatusArray getRegionJSON() const;
|
||||
|
||||
RegionInfo getRegion( Optional<Key> dcId ) const {
|
||||
if(!dcId.present()) {
|
||||
|
||||
RegionInfo getRegion(Optional<Key> dcId) const {
|
||||
if (!dcId.present()) {
|
||||
return RegionInfo();
|
||||
}
|
||||
for(auto& r : regions) {
|
||||
if(r.dcId == dcId.get()) {
|
||||
for (auto& r : regions) {
|
||||
if (r.dcId == dcId.get()) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
return RegionInfo();
|
||||
}
|
||||
|
||||
int expectedLogSets( Optional<Key> dcId ) const {
|
||||
int expectedLogSets(Optional<Key> dcId) const {
|
||||
int result = 1;
|
||||
if(dcId.present() && getRegion(dcId.get()).satelliteTLogReplicationFactor > 0 && usableRegions > 1) {
|
||||
if (dcId.present() && getRegion(dcId.get()).satelliteTLogReplicationFactor > 0 && usableRegions > 1) {
|
||||
result++;
|
||||
}
|
||||
|
||||
if(usableRegions > 1) {
|
||||
|
||||
if (usableRegions > 1) {
|
||||
result++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Counts the number of DCs required including remote and satellites for current database configuraiton.
|
||||
int32_t minDatacentersRequired() const {
|
||||
int minRequired = 0;
|
||||
for(auto& r : regions) {
|
||||
for (auto& r : regions) {
|
||||
minRequired += 1 + r.satellites.size();
|
||||
}
|
||||
return minRequired;
|
||||
}
|
||||
|
||||
int32_t minZonesRequiredPerDatacenter() const {
|
||||
int minRequired = std::max( remoteTLogReplicationFactor, std::max(tLogReplicationFactor, storageTeamSize) );
|
||||
for(auto& r : regions) {
|
||||
minRequired = std::max( minRequired, r.satelliteTLogReplicationFactor/std::max(1, r.satelliteTLogUsableDcs) );
|
||||
int minRequired = std::max(remoteTLogReplicationFactor, std::max(tLogReplicationFactor, storageTeamSize));
|
||||
for (auto& r : regions) {
|
||||
minRequired =
|
||||
std::max(minRequired, r.satelliteTLogReplicationFactor / std::max(1, r.satelliteTLogUsableDcs));
|
||||
}
|
||||
return minRequired;
|
||||
}
|
||||
|
||||
//Killing an entire datacenter counts as killing one zone in modes that support it
|
||||
// Retuns the maximum number of discrete failures a cluster can tolerate.
|
||||
// In HA mode, `fullyReplicatedRegions` is set to false initially when data is being
|
||||
// replicated to remote, and will be true later. `forAvailablity` is set to true
|
||||
// if we want to account the number for machines that can recruit new tLogs/SS after failures.
|
||||
// Killing an entire datacenter counts as killing one zone in modes that support it
|
||||
int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const {
|
||||
int worstSatellite = regions.size() ? std::numeric_limits<int>::max() : 0;
|
||||
int regionsWithNonNegativePriority = 0;
|
||||
for(auto& r : regions) {
|
||||
if(r.priority >= 0) {
|
||||
for (auto& r : regions) {
|
||||
if (r.priority >= 0) {
|
||||
regionsWithNonNegativePriority++;
|
||||
}
|
||||
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
|
||||
if(r.satelliteTLogUsableDcsFallback > 0) {
|
||||
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
|
||||
worstSatellite =
|
||||
std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
|
||||
if (r.satelliteTLogUsableDcsFallback > 0) {
|
||||
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback -
|
||||
r.satelliteTLogWriteAntiQuorumFallback);
|
||||
}
|
||||
}
|
||||
if(usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 && (!forAvailability || regionsWithNonNegativePriority > 1)) {
|
||||
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1);
|
||||
} else if(worstSatellite > 0) {
|
||||
return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, storageTeamSize - 1);
|
||||
if (usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 &&
|
||||
(!forAvailability || regionsWithNonNegativePriority > 1)) {
|
||||
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1),
|
||||
storageTeamSize - 1);
|
||||
} else if (worstSatellite > 0) {
|
||||
// Primary and Satellite tLogs are synchronously replicated, hence we can lose all but 1.
|
||||
return std::min(tLogReplicationFactor + worstSatellite - 1 - tLogWriteAntiQuorum, storageTeamSize - 1);
|
||||
}
|
||||
return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1);
|
||||
}
|
||||
|
@ -187,13 +214,13 @@ struct DatabaseConfiguration {
|
|||
// Backup Workers
|
||||
bool backupWorkerEnabled;
|
||||
|
||||
//Data centers
|
||||
int32_t usableRegions;
|
||||
// Data centers
|
||||
int32_t usableRegions; // Number of regions which have a replica of the database.
|
||||
int32_t repopulateRegionAntiQuorum;
|
||||
std::vector<RegionInfo> regions;
|
||||
|
||||
// Excluded servers (no state should be here)
|
||||
bool isExcludedServer( NetworkAddressList ) const;
|
||||
bool isExcludedServer(NetworkAddressList) const;
|
||||
std::set<AddressExclusion> getExcludedServers() const;
|
||||
|
||||
int32_t getDesiredCommitProxies() const {
|
||||
|
@ -204,17 +231,33 @@ struct DatabaseConfiguration {
|
|||
if (grvProxyCount == -1) return autoGrvProxyCount;
|
||||
return grvProxyCount;
|
||||
}
|
||||
int32_t getDesiredResolvers() const { if(resolverCount == -1) return autoResolverCount; return resolverCount; }
|
||||
int32_t getDesiredLogs() const { if(desiredTLogCount == -1) return autoDesiredTLogCount; return desiredTLogCount; }
|
||||
int32_t getDesiredRemoteLogs() const { if(remoteDesiredTLogCount == -1) return getDesiredLogs(); return remoteDesiredTLogCount; }
|
||||
int32_t getDesiredSatelliteLogs( Optional<Key> dcId ) const {
|
||||
auto desired = getRegion(dcId).satelliteDesiredTLogCount;
|
||||
if(desired == -1) return autoDesiredTLogCount; return desired;
|
||||
int32_t getDesiredResolvers() const {
|
||||
if (resolverCount == -1) return autoResolverCount;
|
||||
return resolverCount;
|
||||
}
|
||||
int32_t getDesiredLogs() const {
|
||||
if (desiredTLogCount == -1) return autoDesiredTLogCount;
|
||||
return desiredTLogCount;
|
||||
}
|
||||
int32_t getDesiredRemoteLogs() const {
|
||||
if (remoteDesiredTLogCount == -1) return getDesiredLogs();
|
||||
return remoteDesiredTLogCount;
|
||||
}
|
||||
int32_t getDesiredSatelliteLogs(Optional<Key> dcId) const {
|
||||
auto desired = getRegion(dcId).satelliteDesiredTLogCount;
|
||||
if (desired == -1) return autoDesiredTLogCount;
|
||||
return desired;
|
||||
}
|
||||
int32_t getRemoteTLogReplicationFactor() const {
|
||||
if (remoteTLogReplicationFactor == 0) return tLogReplicationFactor;
|
||||
return remoteTLogReplicationFactor;
|
||||
}
|
||||
Reference<IReplicationPolicy> getRemoteTLogPolicy() const {
|
||||
if (remoteTLogReplicationFactor == 0) return tLogPolicy;
|
||||
return remoteTLogPolicy;
|
||||
}
|
||||
int32_t getRemoteTLogReplicationFactor() const { if(remoteTLogReplicationFactor == 0) return tLogReplicationFactor; return remoteTLogReplicationFactor; }
|
||||
Reference<IReplicationPolicy> getRemoteTLogPolicy() const { if(remoteTLogReplicationFactor == 0) return tLogPolicy; return remoteTLogPolicy; }
|
||||
|
||||
bool operator == ( DatabaseConfiguration const& rhs ) const {
|
||||
bool operator==(DatabaseConfiguration const& rhs) const {
|
||||
const_cast<DatabaseConfiguration*>(this)->makeConfigurationImmutable();
|
||||
const_cast<DatabaseConfiguration*>(&rhs)->makeConfigurationImmutable();
|
||||
return rawConfiguration == rhs.rawConfiguration;
|
||||
|
@ -226,8 +269,7 @@ struct DatabaseConfiguration {
|
|||
if (!ar.isDeserializing) makeConfigurationImmutable();
|
||||
serializer(ar, rawConfiguration);
|
||||
if (ar.isDeserializing) {
|
||||
for(auto c=rawConfiguration.begin(); c!=rawConfiguration.end(); ++c)
|
||||
setInternal(c->key, c->value);
|
||||
for (auto c = rawConfiguration.begin(); c != rawConfiguration.end(); ++c) setInternal(c->key, c->value);
|
||||
setDefaultReplicationPolicy();
|
||||
}
|
||||
}
|
||||
|
@ -235,13 +277,13 @@ struct DatabaseConfiguration {
|
|||
void fromKeyValues(Standalone<VectorRef<KeyValueRef>> rawConfig);
|
||||
|
||||
private:
|
||||
Optional< std::map<std::string, std::string> > mutableConfiguration; // If present, rawConfiguration is not valid
|
||||
Standalone<VectorRef<KeyValueRef>> rawConfiguration; // sorted by key
|
||||
Optional<std::map<std::string, std::string>> mutableConfiguration; // If present, rawConfiguration is not valid
|
||||
Standalone<VectorRef<KeyValueRef>> rawConfiguration; // sorted by key
|
||||
|
||||
void makeConfigurationMutable();
|
||||
void makeConfigurationImmutable();
|
||||
|
||||
bool setInternal( KeyRef key, ValueRef value );
|
||||
bool setInternal(KeyRef key, ValueRef value);
|
||||
void resetInternal();
|
||||
void setDefaultReplicationPolicy();
|
||||
|
||||
|
|
|
@ -205,6 +205,9 @@ public:
|
|||
Future<Void> switchConnectionFile(Reference<ClusterConnectionFile> standby);
|
||||
Future<Void> connectionFileChanged();
|
||||
bool switchable = false;
|
||||
|
||||
// Management API, Attempt to kill or suspend a process, return 1 for success, 0 for failure
|
||||
Future<int64_t> rebootWorker(StringRef address, bool check = false, int duration = 0);
|
||||
|
||||
//private:
|
||||
explicit DatabaseContext( Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile, Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
|
||||
|
@ -303,6 +306,7 @@ public:
|
|||
Counter transactionsCommitCompleted;
|
||||
Counter transactionKeyServerLocationRequests;
|
||||
Counter transactionKeyServerLocationRequestsCompleted;
|
||||
Counter transactionStatusRequests;
|
||||
Counter transactionsTooOld;
|
||||
Counter transactionsFutureVersions;
|
||||
Counter transactionsNotCommitted;
|
||||
|
|
|
@ -71,9 +71,7 @@ struct Tag {
|
|||
bool operator != ( const Tag& r ) const { return locality!=r.locality || id!=r.id; }
|
||||
bool operator < ( const Tag& r ) const { return locality < r.locality || (locality == r.locality && id < r.id); }
|
||||
|
||||
int toTagDataIndex() {
|
||||
return locality >= 0 ? 2 * locality : 1 - (2 * locality);
|
||||
}
|
||||
int toTagDataIndex() const { return locality >= 0 ? 2 * locality : 1 - (2 * locality); }
|
||||
|
||||
std::string toString() const {
|
||||
return format("%d:%d", locality, id);
|
||||
|
|
|
@ -84,6 +84,9 @@ public:
|
|||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
// Management API, Attempt to kill or suspend a process, return 1 for success, 0 for failure
|
||||
virtual ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration) = 0;
|
||||
};
|
||||
|
||||
class IClientApi {
|
||||
|
|
|
@ -123,6 +123,7 @@ void ClientKnobs::initialize(bool randomize) {
|
|||
init( TASKBUCKET_MAX_TASK_KEYS, 1000 ); if( randomize && BUGGIFY ) TASKBUCKET_MAX_TASK_KEYS = 20;
|
||||
|
||||
//Backup
|
||||
init( BACKUP_LOCAL_FILE_WRITE_BLOCK, 1024*1024 ); if( randomize && BUGGIFY ) BACKUP_LOCAL_FILE_WRITE_BLOCK = 100;
|
||||
init( BACKUP_CONCURRENT_DELETES, 100 );
|
||||
init( BACKUP_SIMULATED_LIMIT_BYTES, 1e6 ); if( randomize && BUGGIFY ) BACKUP_SIMULATED_LIMIT_BYTES = 1000;
|
||||
init( BACKUP_GET_RANGE_LIMIT_BYTES, 1e6 );
|
||||
|
|
|
@ -120,6 +120,7 @@ public:
|
|||
int TASKBUCKET_MAX_TASK_KEYS;
|
||||
|
||||
// Backup
|
||||
int BACKUP_LOCAL_FILE_WRITE_BLOCK;
|
||||
int BACKUP_CONCURRENT_DELETES;
|
||||
int BACKUP_SIMULATED_LIMIT_BYTES;
|
||||
int BACKUP_GET_RANGE_LIMIT_BYTES;
|
||||
|
|
|
@ -284,6 +284,20 @@ Reference<ITransaction> DLDatabase::createTransaction() {
|
|||
void DLDatabase::setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value) {
|
||||
throwIfError(api->databaseSetOption(db, option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> DLDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
|
||||
if(!api->databaseRebootWorker) {
|
||||
return unsupported_operation();
|
||||
}
|
||||
|
||||
FdbCApi::FDBFuture *f = api->databaseRebootWorker(db, address.begin(), address.size(), check, duration);
|
||||
return toThreadFuture<int64_t>(api, f, [](FdbCApi::FDBFuture *f, FdbCApi *api) {
|
||||
int64_t res;
|
||||
FdbCApi::fdb_error_t error = api->futureGetInt64(f, &res);
|
||||
ASSERT(!error);
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
// DLApi
|
||||
template<class T>
|
||||
|
@ -319,6 +333,7 @@ void DLApi::init() {
|
|||
loadClientFunction(&api->databaseCreateTransaction, lib, fdbCPath, "fdb_database_create_transaction");
|
||||
loadClientFunction(&api->databaseSetOption, lib, fdbCPath, "fdb_database_set_option");
|
||||
loadClientFunction(&api->databaseDestroy, lib, fdbCPath, "fdb_database_destroy");
|
||||
loadClientFunction(&api->databaseRebootWorker, lib, fdbCPath, "fdb_database_reboot_worker", headerVersion >= 700);
|
||||
|
||||
loadClientFunction(&api->transactionSetOption, lib, fdbCPath, "fdb_transaction_set_option");
|
||||
loadClientFunction(&api->transactionDestroy, lib, fdbCPath, "fdb_transaction_destroy");
|
||||
|
@ -781,6 +796,13 @@ void MultiVersionDatabase::setOption(FDBDatabaseOptions::Option option, Optional
|
|||
}
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> MultiVersionDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
|
||||
if (dbState->db) {
|
||||
return dbState->db->rebootWorker(address, check, duration);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MultiVersionDatabase::Connector::connect() {
|
||||
addref();
|
||||
onMainThreadVoid([this]() {
|
||||
|
|
|
@ -65,7 +65,8 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
|
|||
//Database
|
||||
fdb_error_t (*databaseCreateTransaction)(FDBDatabase *database, FDBTransaction **tr);
|
||||
fdb_error_t (*databaseSetOption)(FDBDatabase *database, FDBDatabaseOptions::Option option, uint8_t const *value, int valueLength);
|
||||
void (*databaseDestroy)(FDBDatabase *database);
|
||||
void (*databaseDestroy)(FDBDatabase *database);
|
||||
FDBFuture* (*databaseRebootWorker)(FDBDatabase *database, uint8_t const *address, int addressLength, fdb_bool_t check, int duration);
|
||||
|
||||
//Transaction
|
||||
fdb_error_t (*transactionSetOption)(FDBTransaction *tr, FDBTransactionOptions::Option option, uint8_t const *value, int valueLength);
|
||||
|
@ -109,6 +110,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
|
|||
fdb_error_t (*futureGetDatabase)(FDBFuture *f, FDBDatabase **outDb);
|
||||
fdb_error_t (*futureGetInt64)(FDBFuture *f, int64_t *outValue);
|
||||
fdb_error_t (*futureGetUInt64)(FDBFuture *f, uint64_t *outValue);
|
||||
fdb_error_t (*futureGetBool) (FDBFuture *f, bool *outValue);
|
||||
fdb_error_t (*futureGetError)(FDBFuture *f);
|
||||
fdb_error_t (*futureGetKey)(FDBFuture *f, uint8_t const **outKey, int *outKeyLength);
|
||||
fdb_error_t (*futureGetValue)(FDBFuture *f, fdb_bool_t *outPresent, uint8_t const **outValue, int *outValueLength);
|
||||
|
@ -194,6 +196,8 @@ public:
|
|||
void addref() override { ThreadSafeReferenceCounted<DLDatabase>::addref(); }
|
||||
void delref() override { ThreadSafeReferenceCounted<DLDatabase>::delref(); }
|
||||
|
||||
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration) override;
|
||||
|
||||
private:
|
||||
const Reference<FdbCApi> api;
|
||||
FdbCApi::FDBDatabase* db; // Always set if API version >= 610, otherwise guaranteed to be set when onReady future is set
|
||||
|
@ -325,6 +329,8 @@ public:
|
|||
|
||||
static Reference<IDatabase> debugCreateFromExistingDatabase(Reference<IDatabase> db);
|
||||
|
||||
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration);
|
||||
|
||||
private:
|
||||
struct DatabaseState;
|
||||
|
||||
|
|
|
@ -868,13 +868,13 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
|
|||
transactionsCommitStarted("CommitStarted", cc), transactionsCommitCompleted("CommitCompleted", cc),
|
||||
transactionKeyServerLocationRequests("KeyServerLocationRequests", cc),
|
||||
transactionKeyServerLocationRequestsCompleted("KeyServerLocationRequestsCompleted", cc),
|
||||
transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc),
|
||||
transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc),
|
||||
transactionsResourceConstrained("ResourceConstrained", cc), transactionsThrottled("Throttled", cc),
|
||||
transactionsProcessBehind("ProcessBehind", cc), outstandingWatches(0), latencies(1000), readLatencies(1000),
|
||||
commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
|
||||
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal), transactionTracingEnabled(true),
|
||||
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
|
||||
transactionStatusRequests("StatusRequests", cc), transactionsTooOld("TooOld", cc),
|
||||
transactionsFutureVersions("FutureVersions", cc), transactionsNotCommitted("NotCommitted", cc),
|
||||
transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc),
|
||||
transactionsThrottled("Throttled", cc), transactionsProcessBehind("ProcessBehind", cc), outstandingWatches(0),
|
||||
latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000),
|
||||
bytesPerCommit(1000), mvCacheInsertLocation(0), healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0),
|
||||
internal(internal), transactionTracingEnabled(true), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
|
||||
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc),
|
||||
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)) {
|
||||
dbId = deterministicRandom()->randomUniqueID();
|
||||
|
@ -974,6 +974,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
|
|||
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
|
||||
if (ryw->getDatabase().getPtr() &&
|
||||
ryw->getDatabase()->getConnectionFile()) {
|
||||
++ryw->getDatabase()->transactionStatusRequests;
|
||||
return getJSON(ryw->getDatabase());
|
||||
} else {
|
||||
return Optional<Value>();
|
||||
|
@ -1042,13 +1043,14 @@ DatabaseContext::DatabaseContext(const Error& err)
|
|||
transactionsCommitStarted("CommitStarted", cc), transactionsCommitCompleted("CommitCompleted", cc),
|
||||
transactionKeyServerLocationRequests("KeyServerLocationRequests", cc),
|
||||
transactionKeyServerLocationRequestsCompleted("KeyServerLocationRequestsCompleted", cc),
|
||||
transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc),
|
||||
transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc),
|
||||
transactionsResourceConstrained("ResourceConstrained", cc), transactionsThrottled("Throttled", cc),
|
||||
transactionsProcessBehind("ProcessBehind", cc), latencies(1000), readLatencies(1000), commitLatencies(1000),
|
||||
GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000),
|
||||
transactionStatusRequests("StatusRequests", cc), transactionsTooOld("TooOld", cc),
|
||||
transactionsFutureVersions("FutureVersions", cc), transactionsNotCommitted("NotCommitted", cc),
|
||||
transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc),
|
||||
transactionsThrottled("Throttled", cc), transactionsProcessBehind("ProcessBehind", cc), latencies(1000),
|
||||
readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000),
|
||||
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
|
||||
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false), transactionTracingEnabled(true) {}
|
||||
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false),
|
||||
transactionTracingEnabled(true) {}
|
||||
|
||||
Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID, bool lockAware, int apiVersion, bool switchable) {
|
||||
return Database( new DatabaseContext( Reference<AsyncVar<Reference<ClusterConnectionFile>>>(), clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, true, apiVersion, switchable ) );
|
||||
|
@ -1104,7 +1106,7 @@ bool DatabaseContext::getCachedLocations( const KeyRangeRef& range, vector<std::
|
|||
Reference<LocationInfo> DatabaseContext::setCachedLocation( const KeyRangeRef& keys, const vector<StorageServerInterface>& servers ) {
|
||||
vector<Reference<ReferencedInterface<StorageServerInterface>>> serverRefs;
|
||||
serverRefs.reserve(servers.size());
|
||||
for(auto& interf : servers) {
|
||||
for (const auto& interf : servers) {
|
||||
serverRefs.push_back( StorageServerInfo::getInterface( this, interf, clientLocality ) );
|
||||
}
|
||||
|
||||
|
@ -1850,17 +1852,17 @@ Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLocations(
|
|||
}
|
||||
|
||||
bool foundFailed = false;
|
||||
for(auto& it : locations) {
|
||||
for (const auto& [range, locInfo] : locations) {
|
||||
bool onlyEndpointFailed = false;
|
||||
for(int i = 0; i < it.second->size(); i++) {
|
||||
if( IFailureMonitor::failureMonitor().onlyEndpointFailed(it.second->get(i, member).getEndpoint()) ) {
|
||||
for (int i = 0; i < locInfo->size(); i++) {
|
||||
if (IFailureMonitor::failureMonitor().onlyEndpointFailed(locInfo->get(i, member).getEndpoint())) {
|
||||
onlyEndpointFailed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( onlyEndpointFailed ) {
|
||||
cx->invalidateCache( it.first.begin );
|
||||
cx->invalidateCache(range.begin);
|
||||
foundFailed = true;
|
||||
}
|
||||
}
|
||||
|
@ -1911,6 +1913,7 @@ ACTOR Future<Optional<Value>> getValue( Future<Version> version, Key key, Databa
|
|||
{
|
||||
state Version ver = wait( version );
|
||||
state Span span("NAPI:getValue"_loc, info.spanID);
|
||||
span.addTag("key"_sr, key);
|
||||
cx->validateVersion(ver);
|
||||
|
||||
loop {
|
||||
|
@ -2535,7 +2538,6 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
|
|||
}
|
||||
|
||||
++cx->transactionPhysicalReads;
|
||||
++cx->transactionGetRangeRequests;
|
||||
state GetKeyValuesReply rep;
|
||||
try {
|
||||
if (CLIENT_BUGGIFY) {
|
||||
|
@ -4779,3 +4781,53 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
|
|||
|
||||
return (ddCheck && coordinatorCheck);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> addInterfaceActor( std::map<Key,std::pair<Value,ClientLeaderRegInterface>>* address_interface, Reference<FlowLock> connectLock, KeyValue kv) {
|
||||
wait(connectLock->take());
|
||||
state FlowLock::Releaser releaser(*connectLock);
|
||||
state ClientWorkerInterface workerInterf = BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
|
||||
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
|
||||
choose {
|
||||
when( Optional<LeaderInfo> rep = wait( brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())) ) ) {
|
||||
StringRef ip_port =
|
||||
kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key;
|
||||
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
|
||||
|
||||
if(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
|
||||
Key full_ip_port2 =
|
||||
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
|
||||
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls")) ? full_ip_port2.removeSuffix(LiteralStringRef(":tls")) : full_ip_port2;
|
||||
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
|
||||
}
|
||||
}
|
||||
when( wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT)) ) {} // NOTE : change timeout time here if necessary
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<int64_t> rebootWorkerActor(DatabaseContext* cx, ValueRef addr, bool check, int duration) {
|
||||
// ignore negative value
|
||||
if (duration < 0) duration = 0;
|
||||
// fetch the addresses of all workers
|
||||
state std::map<Key,std::pair<Value,ClientLeaderRegInterface>> address_interface;
|
||||
if (!cx->getConnectionFile())
|
||||
return 0;
|
||||
Standalone<RangeResultRef> kvs = wait(getWorkerInterfaces(cx->getConnectionFile()));
|
||||
ASSERT(!kvs.more);
|
||||
// Note: reuse this knob from fdbcli, change it if necessary
|
||||
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
|
||||
std::vector<Future<Void>> addInterfs;
|
||||
for( const auto& it : kvs ) {
|
||||
addInterfs.push_back(addInterfaceActor(&address_interface, connectLock, it));
|
||||
}
|
||||
wait(waitForAll(addInterfs));
|
||||
if (!address_interface.count(addr)) return 0;
|
||||
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[addr].first, IncludeVersion())
|
||||
.reboot.send(RebootRequest(false, check, duration));
|
||||
return 1;
|
||||
}
|
||||
|
||||
Future<int64_t> DatabaseContext::rebootWorker(StringRef addr, bool check, int duration) {
|
||||
return rebootWorkerActor(this, addr, check, duration);
|
||||
}
|
||||
|
|
|
@ -1238,6 +1238,7 @@ Future< Optional<Value> > ReadYourWritesTransaction::get( const Key& key, bool s
|
|||
} else {
|
||||
if (key == LiteralStringRef("\xff\xff/status/json")) {
|
||||
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
|
||||
++tr.getDatabase()->transactionStatusRequests;
|
||||
return getJSON(tr.getDatabase());
|
||||
} else {
|
||||
return Optional<Value>();
|
||||
|
|
|
@ -58,7 +58,9 @@ struct TransactionDebugInfo : public ReferenceCounted<TransactionDebugInfo> {
|
|||
|
||||
//Values returned by a ReadYourWritesTransaction will contain a reference to the transaction's arena. Therefore, keeping a reference to a value
|
||||
//longer than its creating transaction would hold all of the memory generated by the transaction
|
||||
class ReadYourWritesTransaction : NonCopyable, public ReferenceCounted<ReadYourWritesTransaction>, public FastAllocated<ReadYourWritesTransaction> {
|
||||
class ReadYourWritesTransaction final : NonCopyable,
|
||||
public ReferenceCounted<ReadYourWritesTransaction>,
|
||||
public FastAllocated<ReadYourWritesTransaction> {
|
||||
public:
|
||||
static ReadYourWritesTransaction* allocateOnForeignThread() {
|
||||
ReadYourWritesTransaction *tr = (ReadYourWritesTransaction*)ReadYourWritesTransaction::operator new( sizeof(ReadYourWritesTransaction) );
|
||||
|
@ -115,9 +117,6 @@ public:
|
|||
void operator=(ReadYourWritesTransaction&& r) noexcept;
|
||||
ReadYourWritesTransaction(ReadYourWritesTransaction&& r) noexcept;
|
||||
|
||||
virtual void addref() { ReferenceCounted<ReadYourWritesTransaction>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<ReadYourWritesTransaction>::delref(); }
|
||||
|
||||
void cancel();
|
||||
void reset();
|
||||
void debugTransaction(UID dID) { tr.debugTransaction(dID); }
|
||||
|
|
|
@ -249,7 +249,7 @@ Reference<S3BlobStoreEndpoint> S3BlobStoreEndpoint::fromString(std::string const
|
|||
}
|
||||
}
|
||||
|
||||
std::string S3BlobStoreEndpoint::getResourceURL(std::string resource, std::string params) {
|
||||
std::string S3BlobStoreEndpoint::getResourceURL(std::string resource, std::string params) const {
|
||||
std::string hostPort = host;
|
||||
if (!service.empty()) {
|
||||
hostPort.append(":");
|
||||
|
@ -271,14 +271,14 @@ std::string S3BlobStoreEndpoint::getResourceURL(std::string resource, std::strin
|
|||
params.append(knobParams);
|
||||
}
|
||||
|
||||
for (auto& kv : extraHeaders) {
|
||||
for (const auto& [k, v] : extraHeaders) {
|
||||
if (!params.empty()) {
|
||||
params.append("&");
|
||||
}
|
||||
params.append("header=");
|
||||
params.append(HTTP::urlEncode(kv.first));
|
||||
params.append(HTTP::urlEncode(k));
|
||||
params.append(":");
|
||||
params.append(HTTP::urlEncode(kv.second));
|
||||
params.append(HTTP::urlEncode(v));
|
||||
}
|
||||
|
||||
if (!params.empty()) r.append("?").append(params);
|
||||
|
@ -563,12 +563,12 @@ ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<S3BlobStoreEndp
|
|||
headers["Accept"] = "application/xml";
|
||||
|
||||
// Merge extraHeaders into headers
|
||||
for (auto& kv : bstore->extraHeaders) {
|
||||
std::string& fieldValue = headers[kv.first];
|
||||
for (const auto& [k, v] : bstore->extraHeaders) {
|
||||
std::string& fieldValue = headers[k];
|
||||
if (!fieldValue.empty()) {
|
||||
fieldValue.append(",");
|
||||
}
|
||||
fieldValue.append(kv.second);
|
||||
fieldValue.append(v);
|
||||
}
|
||||
|
||||
// For requests with content to upload, the request timeout should be at least twice the amount of time
|
||||
|
|
|
@ -123,7 +123,7 @@ public:
|
|||
|
||||
// Get a normalized version of this URL with the given resource and any non-default BlobKnob values as URL
|
||||
// parameters in addition to the passed params string
|
||||
std::string getResourceURL(std::string resource, std::string params);
|
||||
std::string getResourceURL(std::string resource, std::string params) const;
|
||||
|
||||
struct ReusableConnection {
|
||||
Reference<IConnection> conn;
|
||||
|
|
|
@ -451,7 +451,7 @@ struct ReadHotRangeWithMetrics {
|
|||
ReadHotRangeWithMetrics(Arena& arena, const ReadHotRangeWithMetrics& rhs)
|
||||
: keys(arena, rhs.keys), density(rhs.density), readBandwidth(rhs.readBandwidth) {}
|
||||
|
||||
int expectedSize() { return keys.expectedSize() + sizeof(density) + sizeof(readBandwidth); }
|
||||
int expectedSize() const { return keys.expectedSize() + sizeof(density) + sizeof(readBandwidth); }
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
|
|
|
@ -203,10 +203,6 @@ const KeyRangeRef writeConflictRangeKeysRange =
|
|||
LiteralStringRef("\xff\xff/transaction/write_conflict_range/\xff\xff"));
|
||||
|
||||
// "\xff/cacheServer/[[UID]] := StorageServerInterface"
|
||||
// This will be added by the cache server on initialization and removed by DD
|
||||
// TODO[mpilman]: We will need a way to map uint16_t ids to UIDs in a future
|
||||
// versions. For now caches simply cache everything so the ids
|
||||
// are not yet meaningful.
|
||||
const KeyRangeRef storageCacheServerKeys(LiteralStringRef("\xff/cacheServer/"),
|
||||
LiteralStringRef("\xff/cacheServer0"));
|
||||
const KeyRef storageCacheServersPrefix = storageCacheServerKeys.begin;
|
||||
|
@ -598,6 +594,8 @@ ProcessClass decodeProcessClassValue( ValueRef const& value ) {
|
|||
const KeyRangeRef configKeys( LiteralStringRef("\xff/conf/"), LiteralStringRef("\xff/conf0") );
|
||||
const KeyRef configKeysPrefix = configKeys.begin;
|
||||
|
||||
const KeyRef triggerDDTeamInfoPrintKey(LiteralStringRef("\xff/triggerDDTeamInfoPrint"));
|
||||
|
||||
const KeyRangeRef excludedServersKeys( LiteralStringRef("\xff/conf/excluded/"), LiteralStringRef("\xff/conf/excluded0") );
|
||||
const KeyRef excludedServersPrefix = excludedServersKeys.begin;
|
||||
const KeyRef excludedServersVersionKey = LiteralStringRef("\xff/conf/excluded");
|
||||
|
|
|
@ -45,6 +45,9 @@ extern const KeyRangeRef specialKeys; // [FF][FF] to [FF][FF][FF], some client f
|
|||
extern const KeyRef afterAllKeys;
|
||||
|
||||
// "\xff/keyServers/[[begin]]" := "[[vector<serverID>, vector<serverID>]|[vector<Tag>, vector<Tag>]]"
|
||||
// An internal mapping of where shards are located in the database. [[begin]] is the start of the shard range
|
||||
// and the result is a list of serverIDs or Tags where these shards are located. These values can be changed
|
||||
// as data movement occurs.
|
||||
extern const KeyRangeRef keyServersKeys, keyServersKeyServersKeys;
|
||||
extern const KeyRef keyServersPrefix, keyServersEnd, keyServersKeyServersKey;
|
||||
const Key keyServersKey( const KeyRef& k );
|
||||
|
@ -63,6 +66,10 @@ void decodeKeyServersValue( std::map<Tag, UID> const& tag_uid, const ValueRef& v
|
|||
std::vector<UID>& src, std::vector<UID>& dest );
|
||||
|
||||
// "\xff/storageCacheServer/[[UID]] := StorageServerInterface"
|
||||
// This will be added by the cache server on initialization and removed by DD
|
||||
// TODO[mpilman]: We will need a way to map uint16_t ids to UIDs in a future
|
||||
// versions. For now caches simply cache everything so the ids
|
||||
// are not yet meaningful.
|
||||
extern const KeyRangeRef storageCacheServerKeys;
|
||||
extern const KeyRef storageCacheServersPrefix, storageCacheServersEnd;
|
||||
const Key storageCacheServerKey(UID id);
|
||||
|
@ -75,7 +82,11 @@ const Key storageCacheKey( const KeyRef& k );
|
|||
const Value storageCacheValue( const std::vector<uint16_t>& serverIndices );
|
||||
void decodeStorageCacheValue( const ValueRef& value, std::vector<uint16_t>& serverIndices );
|
||||
|
||||
// "\xff/serverKeys/[[serverID]]/[[begin]]" := "" | "1" | "2"
|
||||
// "\xff/serverKeys/[[serverID]]/[[begin]]" := "[[serverKeysTrue]]" |" [[serverKeysFalse]]"
|
||||
// An internal mapping of what shards any given server currently has ownership of
|
||||
// Using the serverID as a prefix, then followed by the beginning of the shard range
|
||||
// as the key, the value indicates whether the shard does or does not exist on the server.
|
||||
// These values can be changed as data movement occurs.
|
||||
extern const KeyRef serverKeysPrefix;
|
||||
extern const ValueRef serverKeysTrue, serverKeysFalse;
|
||||
const Key serverKeysKey( UID serverID, const KeyRef& keys );
|
||||
|
@ -103,6 +114,8 @@ const Key cacheChangeKeyFor( uint16_t idx );
|
|||
uint16_t cacheChangeKeyDecodeIndex( const KeyRef& key );
|
||||
|
||||
// "\xff/serverTag/[[serverID]]" = "[[Tag]]"
|
||||
// Provides the Tag for the given serverID. Used to access a
|
||||
// storage server's corresponding TLog in order to apply mutations.
|
||||
extern const KeyRangeRef serverTagKeys;
|
||||
extern const KeyRef serverTagPrefix;
|
||||
extern const KeyRangeRef serverTagMaxKeys;
|
||||
|
@ -122,6 +135,8 @@ Tag decodeServerTagValue( ValueRef const& );
|
|||
const Key serverTagConflictKeyFor( Tag );
|
||||
|
||||
// "\xff/tagLocalityList/[[datacenterID]]" := "[[tagLocality]]"
|
||||
// Provides the tagLocality for the given datacenterID
|
||||
// See "FDBTypes.h" struct Tag for more details on tagLocality
|
||||
extern const KeyRangeRef tagLocalityListKeys;
|
||||
extern const KeyRef tagLocalityListPrefix;
|
||||
const Key tagLocalityListKeyFor( Optional<Value> dcID );
|
||||
|
@ -130,6 +145,8 @@ Optional<Value> decodeTagLocalityListKey( KeyRef const& );
|
|||
int8_t decodeTagLocalityListValue( ValueRef const& );
|
||||
|
||||
// "\xff\x02/datacenterReplicas/[[datacenterID]]" := "[[replicas]]"
|
||||
// Provides the number of replicas for the given datacenterID.
|
||||
// Used in the initialization of the Data Distributor.
|
||||
extern const KeyRangeRef datacenterReplicasKeys;
|
||||
extern const KeyRef datacenterReplicasPrefix;
|
||||
const Key datacenterReplicasKeyFor( Optional<Value> dcID );
|
||||
|
@ -138,6 +155,8 @@ Optional<Value> decodeDatacenterReplicasKey( KeyRef const& );
|
|||
int decodeDatacenterReplicasValue( ValueRef const& );
|
||||
|
||||
// "\xff\x02/tLogDatacenters/[[datacenterID]]"
|
||||
// The existence of an empty string as a value signifies that the datacenterID is valid
|
||||
// (as opposed to having no value at all)
|
||||
extern const KeyRangeRef tLogDatacentersKeys;
|
||||
extern const KeyRef tLogDatacentersPrefix;
|
||||
const Key tLogDatacentersKeyFor( Optional<Value> dcID );
|
||||
|
@ -170,29 +189,46 @@ ProcessClass decodeProcessClassValue( ValueRef const& );
|
|||
UID decodeProcessClassKeyOld( KeyRef const& key );
|
||||
|
||||
// "\xff/conf/[[option]]" := "value"
|
||||
// An umbrella prefix for options mostly used by the DatabaseConfiguration class.
|
||||
// See DatabaseConfiguration.cpp ::setInternal for more examples.
|
||||
extern const KeyRangeRef configKeys;
|
||||
extern const KeyRef configKeysPrefix;
|
||||
|
||||
// Change the value of this key to anything and that will trigger detailed data distribution team info log.
|
||||
extern const KeyRef triggerDDTeamInfoPrintKey;
|
||||
|
||||
// The differences between excluded and failed can be found in "command-line-interface.rst"
|
||||
// and in the help message of the fdbcli command "exclude".
|
||||
|
||||
// "\xff/conf/excluded/1.2.3.4" := ""
|
||||
// "\xff/conf/excluded/1.2.3.4:4000" := ""
|
||||
// These are inside configKeysPrefix since they represent a form of configuration and they are convenient
|
||||
// to track in the same way by the tlog and recovery process, but they are ignored by the DatabaseConfiguration
|
||||
// class.
|
||||
// The existence of an empty string as a value signifies that the provided IP has been excluded.
|
||||
// (as opposed to having no value at all)
|
||||
extern const KeyRef excludedServersPrefix;
|
||||
extern const KeyRangeRef excludedServersKeys;
|
||||
extern const KeyRef excludedServersVersionKey; // The value of this key shall be changed by any transaction that modifies the excluded servers list
|
||||
const AddressExclusion decodeExcludedServersKey( KeyRef const& key ); // where key.startsWith(excludedServersPrefix)
|
||||
std::string encodeExcludedServersKey( AddressExclusion const& );
|
||||
|
||||
// "\xff/conf/failed/1.2.3.4" := ""
|
||||
// "\xff/conf/failed/1.2.3.4:4000" := ""
|
||||
// These are inside configKeysPrefix since they represent a form of configuration and they are convenient
|
||||
// to track in the same way by the tlog and recovery process, but they are ignored by the DatabaseConfiguration
|
||||
// class.
|
||||
// The existence of an empty string as a value signifies that the provided IP has been marked as failed.
|
||||
// (as opposed to having no value at all)
|
||||
extern const KeyRef failedServersPrefix;
|
||||
extern const KeyRangeRef failedServersKeys;
|
||||
extern const KeyRef failedServersVersionKey; // The value of this key shall be changed by any transaction that modifies the failed servers list
|
||||
const AddressExclusion decodeFailedServersKey( KeyRef const& key ); // where key.startsWith(failedServersPrefix)
|
||||
std::string encodeFailedServersKey( AddressExclusion const& );
|
||||
|
||||
// "\xff/workers/[[processID]]" := ""
|
||||
// Asynchronously updated by the cluster controller, this is a list of fdbserver processes that have joined the cluster
|
||||
// and are currently (recently) available
|
||||
// "\xff/workers/[[processID]]" := ""
|
||||
// Asynchronously updated by the cluster controller, this is a list of fdbserver processes that have joined the cluster
|
||||
// and are currently (recently) available
|
||||
extern const KeyRangeRef workerListKeys;
|
||||
extern const KeyRef workerListPrefix;
|
||||
const Key workerListKeyFor(StringRef processID );
|
||||
|
@ -200,7 +236,9 @@ const Value workerListValue( ProcessData const& );
|
|||
Key decodeWorkerListKey( KeyRef const& );
|
||||
ProcessData decodeWorkerListValue( ValueRef const& );
|
||||
|
||||
// "\xff\x02/backupProgress/[[workerID]]" := "[[WorkerBackupStatus]]"
|
||||
// "\xff\x02/backupProgress/[[workerID]]" := "[[WorkerBackupStatus]]"
|
||||
// Provides the progress for the given backup worker.
|
||||
// See "FDBTypes.h" struct WorkerBackupStatus for more details on the return type value.
|
||||
extern const KeyRangeRef backupProgressKeys;
|
||||
extern const KeyRef backupProgressPrefix;
|
||||
const Key backupProgressKeyFor(UID workerID);
|
||||
|
@ -214,18 +252,31 @@ extern const KeyRef backupStartedKey;
|
|||
Value encodeBackupStartedValue(const std::vector<std::pair<UID, Version>>& ids);
|
||||
std::vector<std::pair<UID, Version>> decodeBackupStartedValue(const ValueRef& value);
|
||||
|
||||
// The key to signal backup workers that they should pause or resume.
|
||||
// The key to signal backup workers that they should resume or pause.
|
||||
// "\xff\x02/backupPaused" := "[[0|1]]"
|
||||
// 0 = Send a signal to resume/already resumed.
|
||||
// 1 = Send a signal to pause/already paused.
|
||||
extern const KeyRef backupPausedKey;
|
||||
|
||||
// "\xff/coordinators" = "[[ClusterConnectionString]]"
|
||||
// Set to the encoded structure of the cluster's current set of coordinators.
|
||||
// Changed when performing quorumChange.
|
||||
// See "CoordinationInterface.h" struct ClusterConnectionString for more details
|
||||
extern const KeyRef coordinatorsKey;
|
||||
|
||||
// "\xff/logs" = "[[LogsValue]]"
|
||||
// Used during master recovery in order to communicate
|
||||
// and store info about the logs system.
|
||||
extern const KeyRef logsKey;
|
||||
|
||||
// "\xff/minRequiredCommitVersion" = "[[Version]]"
|
||||
// Used during backup/recovery to restrict version requirements
|
||||
extern const KeyRef minRequiredCommitVersionKey;
|
||||
|
||||
const Value logsValue( const vector<std::pair<UID, NetworkAddress>>& logs, const vector<std::pair<UID, NetworkAddress>>& oldLogs );
|
||||
std::pair<vector<std::pair<UID, NetworkAddress>>,vector<std::pair<UID, NetworkAddress>>> decodeLogsValue( const ValueRef& value );
|
||||
|
||||
// The "global keys" are send to each storage server any time they are changed
|
||||
// The "global keys" are sent to each storage server any time they are changed
|
||||
extern const KeyRef globalKeysPrefix;
|
||||
extern const KeyRef lastEpochEndKey;
|
||||
extern const KeyRef lastEpochEndPrivateKey;
|
||||
|
@ -253,6 +304,7 @@ extern const KeyRef tagThrottleLimitKey;
|
|||
extern const KeyRef tagThrottleCountKey;
|
||||
|
||||
// Log Range constant variables
|
||||
// Used in the backup pipeline to track mutations
|
||||
// \xff/logRanges/[16-byte UID][begin key] := serialize( make_pair([end key], [destination key prefix]), IncludeVersion() )
|
||||
extern const KeyRangeRef logRangesRange;
|
||||
|
||||
|
@ -397,8 +449,16 @@ std::pair<Key,Version> decodeHealthyZoneValue( ValueRef const& );
|
|||
extern const KeyRangeRef testOnlyTxnStateStorePrefixRange;
|
||||
|
||||
// Snapshot + Incremental Restore
|
||||
|
||||
// "\xff/writeRecovery" = "[[writeRecoveryKeyTrue]]"
|
||||
// Flag used for the snapshot-restore pipeline in order to avoid
|
||||
// anomalous behaviour with multiple recoveries.
|
||||
extern const KeyRef writeRecoveryKey;
|
||||
extern const ValueRef writeRecoveryKeyTrue;
|
||||
|
||||
// "\xff/snapshotEndVersion" = "[[Version]]"
|
||||
// Written by master server during recovery if recovering from a snapshot.
|
||||
// Allows incremental restore to read and set starting version for consistency.
|
||||
extern const KeyRef snapshotEndVersionKey;
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
|
|
@ -169,7 +169,7 @@ struct TagThrottleValue {
|
|||
template<class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
if(ar.protocolVersion().hasTagThrottleValueReason()) {
|
||||
serializer(ar, tpsRate, expirationTime, initialDuration, reinterpret_cast<uint8_t&>(reason));
|
||||
serializer(ar, tpsRate, expirationTime, initialDuration, reason);
|
||||
}
|
||||
else if(ar.protocolVersion().hasTagThrottleValue()) {
|
||||
serializer(ar, tpsRate, expirationTime, initialDuration);
|
||||
|
@ -216,8 +216,6 @@ namespace ThrottleApi {
|
|||
Future<Void> enableAuto(Database const& db, bool const& enabled);
|
||||
};
|
||||
|
||||
BINARY_SERIALIZABLE(TransactionPriority);
|
||||
|
||||
template<class Value>
|
||||
using TransactionTagMap = std::unordered_map<TransactionTag, Value, std::hash<TransactionTagRef>>;
|
||||
|
||||
|
|
|
@ -68,6 +68,14 @@ void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional<
|
|||
}, &db->deferredError );
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> ThreadSafeDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
|
||||
DatabaseContext *db = this->db;
|
||||
Key addressKey = address;
|
||||
return onMainThread( [db, addressKey, check, duration]() -> Future<int64_t> {
|
||||
return db->rebootWorker(addressKey, check, duration);
|
||||
} );
|
||||
}
|
||||
|
||||
ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) {
|
||||
ClusterConnectionFile *connFile = new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first);
|
||||
|
||||
|
|
|
@ -41,6 +41,8 @@ public:
|
|||
void addref() { ThreadSafeReferenceCounted<ThreadSafeDatabase>::addref(); }
|
||||
void delref() { ThreadSafeReferenceCounted<ThreadSafeDatabase>::delref(); }
|
||||
|
||||
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration);
|
||||
|
||||
private:
|
||||
friend class ThreadSafeTransaction;
|
||||
DatabaseContext* db;
|
||||
|
|
|
@ -52,8 +52,8 @@ namespace vexillographer
|
|||
{
|
||||
string parameterComment = "";
|
||||
if (o.scope.ToString().EndsWith("Option"))
|
||||
parameterComment = String.Format("{0}// {1}\n", indent, "Parameter: " + o.getParameterComment());
|
||||
return String.Format("{0}// {2}\n{5}{0}{1}{3}={4}", indent, prefix, o.comment, o.name.ToUpper(), o.code, parameterComment);
|
||||
parameterComment = String.Format("{0}/* {1} */\n", indent, "Parameter: " + o.getParameterComment());
|
||||
return String.Format("{0}/* {2} */\n{5}{0}{1}{3}={4}", indent, prefix, o.comment, o.name.ToUpper(), o.code, parameterComment);
|
||||
}
|
||||
|
||||
private static void writeCEnum(TextWriter outFile, Scope scope, IEnumerable<Option> options)
|
||||
|
|
|
@ -142,7 +142,7 @@ struct OpenFileInfo : NonCopyable {
|
|||
|
||||
struct AFCPage;
|
||||
|
||||
class AsyncFileCached : public IAsyncFile, public ReferenceCounted<AsyncFileCached> {
|
||||
class AsyncFileCached final : public IAsyncFile, public ReferenceCounted<AsyncFileCached> {
|
||||
friend struct AFCPage;
|
||||
|
||||
public:
|
||||
|
@ -221,11 +221,11 @@ public:
|
|||
|
||||
std::string getFilename() const override { return filename; }
|
||||
|
||||
virtual void addref() {
|
||||
void addref() override {
|
||||
ReferenceCounted<AsyncFileCached>::addref();
|
||||
//TraceEvent("AsyncFileCachedAddRef").detail("Filename", filename).detail("Refcount", debugGetReferenceCount()).backtrace();
|
||||
}
|
||||
virtual void delref() {
|
||||
void delref() override {
|
||||
if (delref_no_destroy()) {
|
||||
// If this is ever ThreadSafeReferenceCounted...
|
||||
// setrefCountUnsafe(0);
|
||||
|
|
|
@ -52,7 +52,7 @@ DESCR struct SlowAioSubmit {
|
|||
int64_t largestTruncate;
|
||||
};
|
||||
|
||||
class AsyncFileKAIO : public IAsyncFile, public ReferenceCounted<AsyncFileKAIO> {
|
||||
class AsyncFileKAIO final : public IAsyncFile, public ReferenceCounted<AsyncFileKAIO> {
|
||||
public:
|
||||
|
||||
#if KAIO_LOGGING
|
||||
|
@ -179,8 +179,8 @@ public:
|
|||
static int get_eventfd() { return ctx.evfd; }
|
||||
static void setTimeout(double ioTimeout) { ctx.setIOTimeout(ioTimeout); }
|
||||
|
||||
virtual void addref() { ReferenceCounted<AsyncFileKAIO>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<AsyncFileKAIO>::delref(); }
|
||||
void addref() override { ReferenceCounted<AsyncFileKAIO>::addref(); }
|
||||
void delref() override { ReferenceCounted<AsyncFileKAIO>::delref(); }
|
||||
|
||||
Future<int> read(void* data, int length, int64_t offset) override {
|
||||
++countFileLogicalReads;
|
||||
|
|
|
@ -53,7 +53,7 @@ Future<T> sendErrorOnShutdown( Future<T> in ) {
|
|||
}
|
||||
}
|
||||
|
||||
class AsyncFileDetachable sealed : public IAsyncFile, public ReferenceCounted<AsyncFileDetachable>{
|
||||
class AsyncFileDetachable final : public IAsyncFile, public ReferenceCounted<AsyncFileDetachable> {
|
||||
private:
|
||||
Reference<IAsyncFile> file;
|
||||
Future<Void> shutdown;
|
||||
|
@ -125,7 +125,7 @@ public:
|
|||
|
||||
//An async file implementation which wraps another async file and will randomly destroy sectors that it is writing when killed
|
||||
//This is used to simulate a power failure which prevents all written data from being persisted to disk
|
||||
class AsyncFileNonDurable sealed : public IAsyncFile, public ReferenceCounted<AsyncFileNonDurable>{
|
||||
class AsyncFileNonDurable final : public IAsyncFile, public ReferenceCounted<AsyncFileNonDurable> {
|
||||
public:
|
||||
UID id;
|
||||
std::string filename;
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#undef min
|
||||
#undef max
|
||||
|
||||
class AsyncFileWinASIO : public IAsyncFile, public ReferenceCounted<AsyncFileWinASIO> {
|
||||
class AsyncFileWinASIO final : public IAsyncFile, public ReferenceCounted<AsyncFileWinASIO> {
|
||||
public:
|
||||
static void init() {}
|
||||
|
||||
|
@ -84,8 +84,8 @@ public:
|
|||
return buf.st_mtime;
|
||||
}
|
||||
|
||||
virtual void addref() { ReferenceCounted<AsyncFileWinASIO>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<AsyncFileWinASIO>::delref(); }
|
||||
void addref() override { ReferenceCounted<AsyncFileWinASIO>::addref(); }
|
||||
void delref() override { ReferenceCounted<AsyncFileWinASIO>::delref(); }
|
||||
|
||||
int64_t debugFD() const override { return (int64_t)(const_cast<decltype(file)&>(file).native_handle()); }
|
||||
|
||||
|
|
|
@ -855,6 +855,9 @@ static bool checkCompatible(const PeerCompatibilityPolicy& policy, ProtocolVersi
|
|||
return version.version() == policy.version.version();
|
||||
case RequirePeer::AtLeast:
|
||||
return version.version() >= policy.version.version();
|
||||
default:
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,7 @@ ACTOR static Future<Void> incrementalDeleteHelper( std::string filename, bool mu
|
|||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> IAsyncFileSystem::incrementalDeleteFile( std::string filename, bool mustBeDurable ) {
|
||||
Future<Void> IAsyncFileSystem::incrementalDeleteFile(const std::string& filename, bool mustBeDurable) {
|
||||
return uncancellable(incrementalDeleteHelper(
|
||||
filename,
|
||||
mustBeDurable,
|
||||
|
|
|
@ -88,17 +88,17 @@ typedef void (*runCycleFuncPtr)();
|
|||
class IAsyncFileSystem {
|
||||
public:
|
||||
// Opens a file for asynchronous I/O
|
||||
virtual Future< Reference<class IAsyncFile> > open( std::string filename, int64_t flags, int64_t mode ) = 0;
|
||||
virtual Future<Reference<class IAsyncFile>> open(const std::string& filename, int64_t flags, int64_t mode) = 0;
|
||||
|
||||
// Deletes the given file. If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
|
||||
virtual Future< Void > deleteFile( std::string filename, bool mustBeDurable ) = 0;
|
||||
virtual Future<Void> deleteFile(const std::string& filename, bool mustBeDurable) = 0;
|
||||
|
||||
// Unlinks a file and then deletes it slowly by truncating the file repeatedly.
|
||||
// If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
|
||||
virtual Future<Void> incrementalDeleteFile( std::string filename, bool mustBeDurable );
|
||||
virtual Future<Void> incrementalDeleteFile(const std::string& filename, bool mustBeDurable);
|
||||
|
||||
// Returns the time of the last modification of the file.
|
||||
virtual Future<std::time_t> lastWriteTime( std::string filename ) = 0;
|
||||
virtual Future<std::time_t> lastWriteTime(const std::string& filename) = 0;
|
||||
|
||||
static IAsyncFileSystem* filesystem() { return filesystem(g_network); }
|
||||
static runCycleFuncPtr runCycleFunc() { return reinterpret_cast<runCycleFuncPtr>(reinterpret_cast<flowGlobalType>(g_network->global(INetwork::enRunCycleFunc))); }
|
||||
|
|
|
@ -40,8 +40,7 @@
|
|||
#include "fdbrpc/AsyncFileWriteChecker.h"
|
||||
|
||||
// Opens a file for asynchronous I/O
|
||||
Future< Reference<class IAsyncFile> > Net2FileSystem::open( std::string filename, int64_t flags, int64_t mode )
|
||||
{
|
||||
Future<Reference<class IAsyncFile>> Net2FileSystem::open(const std::string& filename, int64_t flags, int64_t mode) {
|
||||
#ifdef __linux__
|
||||
if (checkFileSystem) {
|
||||
dev_t fileDeviceId = getDeviceId(filename);
|
||||
|
@ -75,22 +74,19 @@ Future< Reference<class IAsyncFile> > Net2FileSystem::open( std::string filename
|
|||
}
|
||||
|
||||
// Deletes the given file. If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
|
||||
Future< Void > Net2FileSystem::deleteFile( std::string filename, bool mustBeDurable )
|
||||
{
|
||||
Future<Void> Net2FileSystem::deleteFile(const std::string& filename, bool mustBeDurable) {
|
||||
return Net2AsyncFile::deleteFile(filename, mustBeDurable);
|
||||
}
|
||||
|
||||
Future< std::time_t > Net2FileSystem::lastWriteTime( std::string filename ) {
|
||||
Future<std::time_t> Net2FileSystem::lastWriteTime(const std::string& filename) {
|
||||
return Net2AsyncFile::lastWriteTime( filename );
|
||||
}
|
||||
|
||||
void Net2FileSystem::newFileSystem(double ioTimeout, std::string fileSystemPath)
|
||||
{
|
||||
void Net2FileSystem::newFileSystem(double ioTimeout, const std::string& fileSystemPath) {
|
||||
g_network->setGlobal(INetwork::enFileSystem, (flowGlobalType) new Net2FileSystem(ioTimeout, fileSystemPath));
|
||||
}
|
||||
|
||||
Net2FileSystem::Net2FileSystem(double ioTimeout, std::string fileSystemPath)
|
||||
{
|
||||
Net2FileSystem::Net2FileSystem(double ioTimeout, const std::string& fileSystemPath) {
|
||||
Net2AsyncFile::init();
|
||||
#ifdef __linux__
|
||||
if (!FLOW_KNOBS->DISABLE_POSIX_KERNEL_AIO)
|
||||
|
|
|
@ -24,25 +24,25 @@
|
|||
|
||||
#include "fdbrpc/IAsyncFile.h"
|
||||
|
||||
class Net2FileSystem : public IAsyncFileSystem {
|
||||
class Net2FileSystem final : public IAsyncFileSystem {
|
||||
public:
|
||||
// Opens a file for asynchronous I/O
|
||||
virtual Future< Reference<class IAsyncFile> > open( std::string filename, int64_t flags, int64_t mode );
|
||||
Future<Reference<class IAsyncFile>> open(const std::string& filename, int64_t flags, int64_t mode) override;
|
||||
|
||||
// Deletes the given file. If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
|
||||
virtual Future< Void > deleteFile( std::string filename, bool mustBeDurable );
|
||||
Future<Void> deleteFile(const std::string& filename, bool mustBeDurable) override;
|
||||
|
||||
// Returns the time of the last modification of the file.
|
||||
virtual Future< std::time_t > lastWriteTime( std::string filename );
|
||||
Future<std::time_t> lastWriteTime(const std::string& filename) override;
|
||||
|
||||
//void init();
|
||||
static void stop();
|
||||
|
||||
Net2FileSystem(double ioTimeout=0.0, std::string fileSystemPath = "");
|
||||
Net2FileSystem(double ioTimeout = 0.0, const std::string& fileSystemPath = "");
|
||||
|
||||
virtual ~Net2FileSystem() {}
|
||||
|
||||
static void newFileSystem(double ioTimeout=0.0, std::string fileSystemPath = "");
|
||||
static void newFileSystem(double ioTimeout = 0.0, const std::string& fileSystemPath = "");
|
||||
|
||||
#ifdef __linux__
|
||||
dev_t fileSystemDeviceId;
|
||||
|
|
|
@ -152,15 +152,16 @@ Future<Void> traceCounters(std::string const& traceEventName, UID const& traceEv
|
|||
|
||||
class LatencyBands {
|
||||
public:
|
||||
LatencyBands(std::string name, UID id, double loggingInterval) : name(name), id(id), loggingInterval(loggingInterval), cc(nullptr), filteredCount(nullptr) {}
|
||||
LatencyBands(std::string name, UID id, double loggingInterval)
|
||||
: name(name), id(id), loggingInterval(loggingInterval) {}
|
||||
|
||||
void addThreshold(double value) {
|
||||
if(value > 0 && bands.count(value) == 0) {
|
||||
if(bands.size() == 0) {
|
||||
ASSERT(!cc && !filteredCount);
|
||||
cc = new CounterCollection(name, id.toString());
|
||||
logger = traceCounters(name, id, loggingInterval, cc, id.toString() + "/" + name);
|
||||
filteredCount = new Counter("Filtered", *cc);
|
||||
cc = std::make_unique<CounterCollection>(name, id.toString());
|
||||
logger = traceCounters(name, id, loggingInterval, cc.get(), id.toString() + "/" + name);
|
||||
filteredCount = std::make_unique<Counter>("Filtered", *cc);
|
||||
insertBand(std::numeric_limits<double>::infinity());
|
||||
}
|
||||
|
||||
|
@ -181,18 +182,9 @@ public:
|
|||
|
||||
void clearBands() {
|
||||
logger = Void();
|
||||
|
||||
for(auto itr : bands) {
|
||||
delete itr.second;
|
||||
}
|
||||
|
||||
bands.clear();
|
||||
|
||||
delete filteredCount;
|
||||
delete cc;
|
||||
|
||||
filteredCount = nullptr;
|
||||
cc = nullptr;
|
||||
filteredCount.reset();
|
||||
cc.reset();
|
||||
}
|
||||
|
||||
~LatencyBands() {
|
||||
|
@ -200,18 +192,18 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
std::map<double, Counter*> bands;
|
||||
Counter *filteredCount;
|
||||
std::map<double, std::unique_ptr<Counter>> bands;
|
||||
std::unique_ptr<Counter> filteredCount;
|
||||
|
||||
std::string name;
|
||||
UID id;
|
||||
double loggingInterval;
|
||||
|
||||
CounterCollection *cc;
|
||||
std::unique_ptr<CounterCollection> cc;
|
||||
Future<Void> logger;
|
||||
|
||||
void insertBand(double value) {
|
||||
bands.insert(std::make_pair(value, new Counter(format("Band%f", value), *cc)));
|
||||
bands.emplace(std::make_pair(value, std::make_unique<Counter>(format("Band%f", value), *cc)));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ struct NetSAV final : SAV<T>, FlowReceiver, FastAllocated<NetSAV<T>> {
|
|||
};
|
||||
|
||||
template <class T>
|
||||
class ReplyPromise sealed : public ComposedIdentifier<T, 1> {
|
||||
class ReplyPromise final : public ComposedIdentifier<T, 1> {
|
||||
public:
|
||||
template <class U>
|
||||
void send(U&& value) const {
|
||||
|
|
|
@ -97,18 +97,9 @@ void ISimulator::displayWorkers() const
|
|||
int openCount = 0;
|
||||
|
||||
struct SimClogging {
|
||||
double getSendDelay( NetworkAddress from, NetworkAddress to ) {
|
||||
return halfLatency();
|
||||
double tnow = now();
|
||||
double t = tnow + halfLatency();
|
||||
double getSendDelay(NetworkAddress from, NetworkAddress to) const { return halfLatency(); }
|
||||
|
||||
if (!g_simulator.speedUpSimulation && clogSendUntil.count( to.ip ))
|
||||
t = std::max( t, clogSendUntil[ to.ip ] );
|
||||
|
||||
return t - tnow;
|
||||
}
|
||||
|
||||
double getRecvDelay( NetworkAddress from, NetworkAddress to ) {
|
||||
double getRecvDelay(NetworkAddress from, NetworkAddress to) {
|
||||
auto pair = std::make_pair( from.ip, to.ip );
|
||||
|
||||
double tnow = now();
|
||||
|
@ -1905,6 +1896,10 @@ public:
|
|||
return _localAddress;
|
||||
}
|
||||
|
||||
boost::asio::ip::udp::socket::native_handle_type native_handle() override {
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
Future<Reference<IUDPSocket>> Sim2::createUDPSocket(NetworkAddress toAddr) {
|
||||
|
@ -2041,8 +2036,7 @@ int sf_open( const char* filename, int flags, int convFlags, int mode ) {
|
|||
#endif
|
||||
|
||||
// Opens a file for asynchronous I/O
|
||||
Future< Reference<class IAsyncFile> > Sim2FileSystem::open( std::string filename, int64_t flags, int64_t mode )
|
||||
{
|
||||
Future<Reference<class IAsyncFile>> Sim2FileSystem::open(const std::string& filename, int64_t flags, int64_t mode) {
|
||||
ASSERT( (flags & IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE) ||
|
||||
!(flags & IAsyncFile::OPEN_CREATE) ||
|
||||
StringRef(filename).endsWith(LiteralStringRef(".fdb-lock")) ); // We don't use "ordinary" non-atomic file creation right now except for folder locking, and we don't have code to simulate its unsafeness.
|
||||
|
@ -2079,12 +2073,11 @@ Future< Reference<class IAsyncFile> > Sim2FileSystem::open( std::string filename
|
|||
}
|
||||
|
||||
// Deletes the given file. If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
|
||||
Future< Void > Sim2FileSystem::deleteFile( std::string filename, bool mustBeDurable )
|
||||
{
|
||||
Future<Void> Sim2FileSystem::deleteFile(const std::string& filename, bool mustBeDurable) {
|
||||
return Sim2::deleteFileImpl(&g_sim2, filename, mustBeDurable);
|
||||
}
|
||||
|
||||
Future< std::time_t > Sim2FileSystem::lastWriteTime( std::string filename ) {
|
||||
Future<std::time_t> Sim2FileSystem::lastWriteTime(const std::string& filename) {
|
||||
// TODO: update this map upon file writes.
|
||||
static std::map<std::string, double> fileWrites;
|
||||
if (BUGGIFY && deterministicRandom()->random01() < 0.01) {
|
||||
|
|
|
@ -36,12 +36,16 @@ enum ClogMode { ClogDefault, ClogAll, ClogSend, ClogReceive };
|
|||
|
||||
class ISimulator : public INetwork {
|
||||
public:
|
||||
ISimulator() : desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1), isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false), allSwapsDisabled(false), backupAgents(WaitForType), drAgents(WaitForType), extraDB(nullptr), allowLogSetKills(true), usableRegions(1) {}
|
||||
ISimulator()
|
||||
: desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1),
|
||||
isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false),
|
||||
allSwapsDisabled(false), backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType), extraDB(nullptr),
|
||||
allowLogSetKills(true), usableRegions(1) {}
|
||||
|
||||
// Order matters!
|
||||
enum KillType { KillInstantly, InjectFaults, RebootAndDelete, RebootProcessAndDelete, Reboot, RebootProcess, None };
|
||||
|
||||
enum BackupAgentType { NoBackupAgents, WaitForType, BackupToFile, BackupToDB };
|
||||
enum class BackupAgentType { NoBackupAgents, WaitForType, BackupToFile, BackupToDB };
|
||||
|
||||
// Subclasses may subclass ProcessInfo as well
|
||||
struct MachineInfo;
|
||||
|
@ -89,7 +93,7 @@ public:
|
|||
bool isAvailable() const { return !isExcluded() && isReliable(); }
|
||||
bool isExcluded() const { return excluded; }
|
||||
bool isCleared() const { return cleared; }
|
||||
std::string getReliableInfo() {
|
||||
std::string getReliableInfo() const {
|
||||
std::stringstream ss;
|
||||
ss << "failed:" << failed << " fault_injection_p1:" << fault_injection_p1
|
||||
<< " fault_injection_p2:" << fault_injection_p2;
|
||||
|
@ -123,7 +127,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
const Reference<IListener> getListener(const NetworkAddress& addr) {
|
||||
Reference<IListener> getListener(const NetworkAddress& addr) const {
|
||||
auto listener = listenerMap.find(addr);
|
||||
ASSERT( listener != listenerMap.end());
|
||||
return listener->second;
|
||||
|
@ -153,7 +157,7 @@ public:
|
|||
std::set<std::string> closingFiles;
|
||||
Optional<Standalone<StringRef>> machineId;
|
||||
|
||||
MachineInfo() : machineProcess(0) {}
|
||||
MachineInfo() : machineProcess(nullptr) {}
|
||||
};
|
||||
|
||||
ProcessInfo* getProcess( Endpoint const& endpoint ) { return getProcessByAddress(endpoint.getPrimaryAddress()); }
|
||||
|
@ -178,15 +182,13 @@ public:
|
|||
virtual bool isAvailable() const = 0;
|
||||
virtual bool datacenterDead(Optional<Standalone<StringRef>> dcId) const = 0;
|
||||
virtual void displayWorkers() const;
|
||||
|
||||
virtual ProtocolVersion protocolVersion() = 0;
|
||||
|
||||
virtual void addRole(NetworkAddress const& address, std::string const& role) {
|
||||
void addRole(NetworkAddress const& address, std::string const& role) {
|
||||
roleAddresses[address][role] ++;
|
||||
TraceEvent("RoleAdd").detail("Address", address).detail("Role", role).detail("NumRoles", roleAddresses[address].size()).detail("Value", roleAddresses[address][role]);
|
||||
}
|
||||
|
||||
virtual void removeRole(NetworkAddress const& address, std::string const& role) {
|
||||
void removeRole(NetworkAddress const& address, std::string const& role) {
|
||||
auto addressIt = roleAddresses.find(address);
|
||||
if (addressIt != roleAddresses.end()) {
|
||||
auto rolesIt = addressIt->second.find(role);
|
||||
|
@ -215,7 +217,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
virtual std::string getRoles(NetworkAddress const& address, bool skipWorkers = true) const {
|
||||
std::string getRoles(NetworkAddress const& address, bool skipWorkers = true) const {
|
||||
auto addressIt = roleAddresses.find(address);
|
||||
std::string roleText;
|
||||
if (addressIt != roleAddresses.end()) {
|
||||
|
@ -229,20 +231,20 @@ public:
|
|||
return roleText;
|
||||
}
|
||||
|
||||
virtual void clearAddress(NetworkAddress const& address) {
|
||||
void clearAddress(NetworkAddress const& address) {
|
||||
clearedAddresses[address]++;
|
||||
TraceEvent("ClearAddress").detail("Address", address).detail("Value", clearedAddresses[address]);
|
||||
}
|
||||
virtual bool isCleared(NetworkAddress const& address) const {
|
||||
bool isCleared(NetworkAddress const& address) const {
|
||||
return clearedAddresses.find(address) != clearedAddresses.end();
|
||||
}
|
||||
|
||||
virtual void excludeAddress(NetworkAddress const& address) {
|
||||
void excludeAddress(NetworkAddress const& address) {
|
||||
excludedAddresses[address]++;
|
||||
TraceEvent("ExcludeAddress").detail("Address", address).detail("Value", excludedAddresses[address]);
|
||||
}
|
||||
|
||||
virtual void includeAddress(NetworkAddress const& address) {
|
||||
void includeAddress(NetworkAddress const& address) {
|
||||
auto addressIt = excludedAddresses.find(address);
|
||||
if (addressIt != excludedAddresses.end()) {
|
||||
if (addressIt->second > 1) {
|
||||
|
@ -258,29 +260,27 @@ public:
|
|||
TraceEvent(SevWarn,"IncludeAddress").detail("Address", address).detail("Result", "Missing");
|
||||
}
|
||||
}
|
||||
virtual void includeAllAddresses() {
|
||||
void includeAllAddresses() {
|
||||
TraceEvent("IncludeAddressAll").detail("AddressTotal", excludedAddresses.size());
|
||||
excludedAddresses.clear();
|
||||
}
|
||||
virtual bool isExcluded(NetworkAddress const& address) const {
|
||||
bool isExcluded(NetworkAddress const& address) const {
|
||||
return excludedAddresses.find(address) != excludedAddresses.end();
|
||||
}
|
||||
|
||||
virtual void disableSwapToMachine(Optional<Standalone<StringRef>> zoneId ) {
|
||||
swapsDisabled.insert(zoneId);
|
||||
}
|
||||
virtual void enableSwapToMachine(Optional<Standalone<StringRef>> zoneId ) {
|
||||
void disableSwapToMachine(Optional<Standalone<StringRef>> zoneId) { swapsDisabled.insert(zoneId); }
|
||||
void enableSwapToMachine(Optional<Standalone<StringRef>> zoneId) {
|
||||
swapsDisabled.erase(zoneId);
|
||||
allSwapsDisabled = false;
|
||||
}
|
||||
virtual bool canSwapToMachine(Optional<Standalone<StringRef>> zoneId ) {
|
||||
bool canSwapToMachine(Optional<Standalone<StringRef>> zoneId) const {
|
||||
return swapsDisabled.count( zoneId ) == 0 && !allSwapsDisabled && !extraDB;
|
||||
}
|
||||
virtual void enableSwapsToAll() {
|
||||
void enableSwapsToAll() {
|
||||
swapsDisabled.clear();
|
||||
allSwapsDisabled = false;
|
||||
}
|
||||
virtual void disableSwapsToAll() {
|
||||
void disableSwapsToAll() {
|
||||
swapsDisabled.clear();
|
||||
allSwapsDisabled = true;
|
||||
}
|
||||
|
@ -335,15 +335,12 @@ public:
|
|||
bool hasDiffProtocolProcess; // true if simulator is testing a process with a different version
|
||||
bool setDiffProtocol; // true if a process with a different protocol version has been started
|
||||
|
||||
virtual flowGlobalType global(int id) const { return getCurrentProcess()->global(id); };
|
||||
virtual void setGlobal(size_t id, flowGlobalType v) { getCurrentProcess()->setGlobal(id,v); };
|
||||
flowGlobalType global(int id) const final { return getCurrentProcess()->global(id); };
|
||||
void setGlobal(size_t id, flowGlobalType v) final { getCurrentProcess()->setGlobal(id, v); };
|
||||
|
||||
virtual void disableFor(const std::string& desc, double time) {
|
||||
disabledMap[desc] = time;
|
||||
}
|
||||
void disableFor(const std::string& desc, double time) { disabledMap[desc] = time; }
|
||||
|
||||
virtual double checkDisabled(const std::string& desc) const
|
||||
{
|
||||
double checkDisabled(const std::string& desc) const {
|
||||
auto iter = disabledMap.find(desc);
|
||||
if (iter != disabledMap.end()) {
|
||||
return iter->second;
|
||||
|
@ -386,12 +383,12 @@ extern Future<Void> waitUntilDiskReady(Reference<DiskParameters> parameters, int
|
|||
class Sim2FileSystem : public IAsyncFileSystem {
|
||||
public:
|
||||
// Opens a file for asynchronous I/O
|
||||
virtual Future< Reference<class IAsyncFile> > open( std::string filename, int64_t flags, int64_t mode );
|
||||
Future<Reference<class IAsyncFile>> open(const std::string& filename, int64_t flags, int64_t mode) override;
|
||||
|
||||
// Deletes the given file. If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
|
||||
virtual Future< Void > deleteFile( std::string filename, bool mustBeDurable );
|
||||
Future<Void> deleteFile(const std::string& filename, bool mustBeDurable) override;
|
||||
|
||||
virtual Future< std::time_t > lastWriteTime( std::string filename );
|
||||
Future<std::time_t> lastWriteTime(const std::string& filename) override;
|
||||
|
||||
Sim2FileSystem() {}
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ public:
|
|||
void delref() { ReferenceCounted<BackupProgress>::delref(); }
|
||||
|
||||
private:
|
||||
std::set<Tag> enumerateLogRouterTags(int logRouterTags) {
|
||||
std::set<Tag> enumerateLogRouterTags(int logRouterTags) const {
|
||||
std::set<Tag> tags;
|
||||
for (int i = 0; i < logRouterTags; i++) {
|
||||
tags.insert(Tag(tagLocalityLogRouter, i));
|
||||
|
|
|
@ -288,9 +288,10 @@ bool isWhitelisted(const vector<Standalone<StringRef>>& binPathVec, StringRef bi
|
|||
return std::find(binPathVec.begin(), binPathVec.end(), binPath) != binPathVec.end();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, MutationListRef>* logRangeMutations,
|
||||
LogPushData* toCommit, Version commitVersion, double* computeDuration, double* computeStart) {
|
||||
state std::map<Key, MutationListRef>::iterator logRangeMutation = logRangeMutations->begin();
|
||||
ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, const std::map<Key, MutationListRef>* logRangeMutations,
|
||||
LogPushData* toCommit, Version commitVersion, double* computeDuration,
|
||||
double* computeStart) {
|
||||
state std::map<Key, MutationListRef>::const_iterator logRangeMutation = logRangeMutations->cbegin();
|
||||
state int32_t version = commitVersion / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE;
|
||||
state int yieldBytes = 0;
|
||||
state BinaryWriter valueWriter(Unversioned());
|
||||
|
@ -298,8 +299,7 @@ ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, Mutat
|
|||
toCommit->addTransactionInfo(SpanID());
|
||||
|
||||
// Serialize the log range mutations within the map
|
||||
for (; logRangeMutation != logRangeMutations->end(); ++logRangeMutation)
|
||||
{
|
||||
for (; logRangeMutation != logRangeMutations->cend(); ++logRangeMutation) {
|
||||
//FIXME: this is re-implementing the serialize function of MutationListRef in order to have a yield
|
||||
valueWriter = BinaryWriter(IncludeVersion(ProtocolVersion::withBackupMutations()));
|
||||
valueWriter << logRangeMutation->second.totalSize();
|
||||
|
@ -391,6 +391,7 @@ struct CommitBatchContext {
|
|||
Optional<UID> debugID;
|
||||
|
||||
bool forceRecovery = false;
|
||||
bool rejected = false; // If rejected due to long queue length
|
||||
|
||||
int64_t localBatchNumber;
|
||||
LogPushData toCommit;
|
||||
|
@ -527,6 +528,20 @@ void CommitBatchContext::evaluateBatchSize() {
|
|||
}
|
||||
}
|
||||
|
||||
// Try to identify recovery transaction and backup's apply mutations (blind writes).
|
||||
// Both cannot be rejected and are approximated by looking at first mutation
|
||||
// starting with 0xff.
|
||||
bool canReject(const std::vector<CommitTransactionRequest>& trs) {
|
||||
for (const auto& tr : trs) {
|
||||
if (tr.transaction.mutations.empty()) continue;
|
||||
if (tr.transaction.mutations[0].param1.startsWith(LiteralStringRef("\xff")) ||
|
||||
tr.transaction.read_conflict_ranges.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
||||
|
||||
state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData;
|
||||
|
@ -535,6 +550,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
|||
state const int latencyBucket = self->latencyBucket;
|
||||
state const Optional<UID>& debugID = self->debugID;
|
||||
state Span span("MP:preresolutionProcessing"_loc, self->span.context);
|
||||
state double timeStart = now();
|
||||
|
||||
if (self->localBatchNumber - self->pProxyCommitData->latestLocalCommitBatchResolving.get() >
|
||||
SERVER_KNOBS->RESET_MASTER_BATCHES &&
|
||||
|
@ -549,6 +565,33 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
|||
// Pre-resolution the commits
|
||||
TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch
|
||||
wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1));
|
||||
double queuingDelay = g_network->now() - timeStart;
|
||||
if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
|
||||
(g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01))) &&
|
||||
SERVER_KNOBS->PROXY_REJECT_BATCH_QUEUED_TOO_LONG && canReject(trs)) {
|
||||
// Disabled for the recovery transaction. otherwise, recovery can't finish and keeps doing more recoveries.
|
||||
TEST(true); // Reject transactions in the batch
|
||||
TraceEvent(SevWarnAlways, "ProxyReject", pProxyCommitData->dbgid)
|
||||
.suppressFor(0.1)
|
||||
.detail("QDelay", queuingDelay)
|
||||
.detail("Transactions", trs.size())
|
||||
.detail("BatchNumber", localBatchNumber);
|
||||
ASSERT(pProxyCommitData->latestLocalCommitBatchResolving.get() == localBatchNumber - 1);
|
||||
pProxyCommitData->latestLocalCommitBatchResolving.set(localBatchNumber);
|
||||
|
||||
wait(pProxyCommitData->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber - 1));
|
||||
ASSERT(pProxyCommitData->latestLocalCommitBatchLogging.get() == localBatchNumber - 1);
|
||||
pProxyCommitData->latestLocalCommitBatchLogging.set(localBatchNumber);
|
||||
for (const auto& tr : trs) {
|
||||
tr.reply.sendError(transaction_too_old());
|
||||
}
|
||||
++pProxyCommitData->stats.commitBatchOut;
|
||||
pProxyCommitData->stats.txnCommitOut += trs.size();
|
||||
pProxyCommitData->stats.txnConflicts += trs.size();
|
||||
self->rejected = true;
|
||||
return Void();
|
||||
}
|
||||
|
||||
self->releaseDelay = delay(
|
||||
std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
|
||||
self->batchOperations * pProxyCommitData->commitComputePerOperation[latencyBucket]),
|
||||
|
@ -1288,6 +1331,7 @@ ACTOR Future<Void> commitBatch(
|
|||
|
||||
/////// Phase 1: Pre-resolution processing (CPU bound except waiting for a version # which is separately pipelined and *should* be available by now (unless empty commit); ordered; currently atomic but could yield)
|
||||
wait(CommitBatch::preresolutionProcessing(&context));
|
||||
if (context.rejected) return Void();
|
||||
|
||||
/////// Phase 2: Resolution (waiting on the network; pipelined)
|
||||
wait(CommitBatch::getResolution(&context));
|
||||
|
|
|
@ -189,11 +189,18 @@ struct CoordinatedStateImpl {
|
|||
}
|
||||
};
|
||||
|
||||
CoordinatedState::CoordinatedState( ServerCoordinators const& coord ) : impl( new CoordinatedStateImpl(coord) ) { }
|
||||
CoordinatedState::~CoordinatedState() { delete impl; }
|
||||
Future<Value> CoordinatedState::read() { return CoordinatedStateImpl::read(impl); }
|
||||
Future<Void> CoordinatedState::onConflict() { return CoordinatedStateImpl::onConflict(impl); }
|
||||
Future<Void> CoordinatedState::setExclusive(Value v) { return CoordinatedStateImpl::setExclusive(impl,v); }
|
||||
CoordinatedState::CoordinatedState(ServerCoordinators const& coord)
|
||||
: impl(std::make_unique<CoordinatedStateImpl>(coord)) {}
|
||||
CoordinatedState::~CoordinatedState() = default;
|
||||
Future<Value> CoordinatedState::read() {
|
||||
return CoordinatedStateImpl::read(impl.get());
|
||||
}
|
||||
Future<Void> CoordinatedState::onConflict() {
|
||||
return CoordinatedStateImpl::onConflict(impl.get());
|
||||
}
|
||||
Future<Void> CoordinatedState::setExclusive(Value v) {
|
||||
return CoordinatedStateImpl::setExclusive(impl.get(), v);
|
||||
}
|
||||
uint64_t CoordinatedState::getConflict() { return impl->getConflict(); }
|
||||
|
||||
struct MovableValue {
|
||||
|
@ -306,20 +313,15 @@ struct MovableCoordinatedStateImpl {
|
|||
}
|
||||
};
|
||||
|
||||
void MovableCoordinatedState::operator=(MovableCoordinatedState&& av) {
|
||||
if(impl) {
|
||||
delete impl;
|
||||
}
|
||||
impl = av.impl;
|
||||
av.impl = 0;
|
||||
MovableCoordinatedState& MovableCoordinatedState::operator=(MovableCoordinatedState&&) = default;
|
||||
MovableCoordinatedState::MovableCoordinatedState(class ServerCoordinators const& coord)
|
||||
: impl(std::make_unique<MovableCoordinatedStateImpl>(coord)) {}
|
||||
MovableCoordinatedState::~MovableCoordinatedState() = default;
|
||||
Future<Value> MovableCoordinatedState::read() {
|
||||
return MovableCoordinatedStateImpl::read(impl.get());
|
||||
}
|
||||
MovableCoordinatedState::MovableCoordinatedState( class ServerCoordinators const& coord ) : impl( new MovableCoordinatedStateImpl(coord) ) {}
|
||||
MovableCoordinatedState::~MovableCoordinatedState() {
|
||||
if(impl) {
|
||||
delete impl;
|
||||
}
|
||||
}
|
||||
Future<Value> MovableCoordinatedState::read() { return MovableCoordinatedStateImpl::read(impl); }
|
||||
Future<Void> MovableCoordinatedState::onConflict() { return impl->onConflict(); }
|
||||
Future<Void> MovableCoordinatedState::setExclusive(Value v) { return impl->setExclusive(v); }
|
||||
Future<Void> MovableCoordinatedState::move( ClusterConnectionString const& nc ) { return MovableCoordinatedStateImpl::move(impl, nc); }
|
||||
Future<Void> MovableCoordinatedState::move(ClusterConnectionString const& nc) {
|
||||
return MovableCoordinatedStateImpl::move(impl.get(), nc);
|
||||
}
|
||||
|
|
|
@ -54,13 +54,13 @@ public:
|
|||
|
||||
uint64_t getConflict();
|
||||
private:
|
||||
struct CoordinatedStateImpl *impl;
|
||||
std::unique_ptr<struct CoordinatedStateImpl> impl;
|
||||
};
|
||||
|
||||
class MovableCoordinatedState : NonCopyable {
|
||||
public:
|
||||
MovableCoordinatedState( class ServerCoordinators const& );
|
||||
void operator=(MovableCoordinatedState&& av);
|
||||
MovableCoordinatedState& operator=(MovableCoordinatedState&& av);
|
||||
~MovableCoordinatedState();
|
||||
|
||||
Future<Value> read();
|
||||
|
@ -76,7 +76,7 @@ public:
|
|||
// (and therefore the caller should die).
|
||||
|
||||
private:
|
||||
struct MovableCoordinatedStateImpl *impl;
|
||||
std::unique_ptr<struct MovableCoordinatedStateImpl> impl;
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -108,15 +108,15 @@ struct TCMachineInfo : public ReferenceCounted<TCMachineInfo> {
|
|||
machineID = locality.zoneId().get();
|
||||
}
|
||||
|
||||
std::string getServersIDStr() {
|
||||
std::string getServersIDStr() const {
|
||||
std::stringstream ss;
|
||||
if (serversOnMachine.empty()) return "[unset]";
|
||||
|
||||
for (auto& server : serversOnMachine) {
|
||||
for (const auto& server : serversOnMachine) {
|
||||
ss << server->id.toString() << " ";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
return std::move(ss).str();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -139,21 +139,21 @@ public:
|
|||
sort(machineIDs.begin(), machineIDs.end());
|
||||
}
|
||||
|
||||
int size() {
|
||||
int size() const {
|
||||
ASSERT(machines.size() == machineIDs.size());
|
||||
return machineIDs.size();
|
||||
}
|
||||
|
||||
std::string getMachineIDsStr() {
|
||||
std::string getMachineIDsStr() const {
|
||||
std::stringstream ss;
|
||||
|
||||
if (machineIDs.empty()) return "[unset]";
|
||||
|
||||
for (auto& id : machineIDs) {
|
||||
for (const auto& id : machineIDs) {
|
||||
ss << id.contents().toString() << " ";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
return std::move(ss).str();
|
||||
}
|
||||
|
||||
bool operator==(TCMachineTeamInfo& rhs) const { return this->machineIDs == rhs.machineIDs; }
|
||||
|
@ -199,18 +199,18 @@ public:
|
|||
return servers.size();
|
||||
}
|
||||
vector<UID> const& getServerIDs() const override { return serverIDs; }
|
||||
const vector<Reference<TCServerInfo>>& getServers() { return servers; }
|
||||
const vector<Reference<TCServerInfo>>& getServers() const { return servers; }
|
||||
|
||||
std::string getServerIDsStr() const {
|
||||
std::stringstream ss;
|
||||
|
||||
if (serverIDs.empty()) return "[unset]";
|
||||
|
||||
for (auto& id : serverIDs) {
|
||||
for (const auto& id : serverIDs) {
|
||||
ss << id.toString() << " ";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
return std::move(ss).str();
|
||||
}
|
||||
|
||||
void addDataInFlightToTeam(int64_t delta) override {
|
||||
|
@ -701,15 +701,15 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
// The following kills a reference cycle between the teamTracker actor and the TCTeamInfo that both holds and is
|
||||
// held by the actor It also ensures that the trackers are done fiddling with healthyTeamCount before we free
|
||||
// this
|
||||
for(int i=0; i < teams.size(); i++) {
|
||||
teams[i]->tracker.cancel();
|
||||
for (auto& team : teams) {
|
||||
team->tracker.cancel();
|
||||
}
|
||||
// The commented TraceEvent log is useful in detecting what is running during the destruction
|
||||
// TraceEvent("DDTeamCollectionDestructed", distributorId)
|
||||
// .detail("Primary", primary)
|
||||
// .detail("TeamTrackerDestroyed", teams.size());
|
||||
for(int i=0; i < badTeams.size(); i++) {
|
||||
badTeams[i]->tracker.cancel();
|
||||
for (auto& badTeam : badTeams) {
|
||||
badTeam->tracker.cancel();
|
||||
}
|
||||
// TraceEvent("DDTeamCollectionDestructed", distributorId)
|
||||
// .detail("Primary", primary)
|
||||
|
@ -717,9 +717,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
// The following makes sure that, even if a reference to a team is held in the DD Queue, the tracker will be
|
||||
// stopped
|
||||
// before the server_status map to which it has a pointer, is destroyed.
|
||||
for(auto it = server_info.begin(); it != server_info.end(); ++it) {
|
||||
it->second->tracker.cancel();
|
||||
it->second->collection = nullptr;
|
||||
for (auto& [_, info] : server_info) {
|
||||
info->tracker.cancel();
|
||||
info->collection = nullptr;
|
||||
}
|
||||
// TraceEvent("DDTeamCollectionDestructed", distributorId)
|
||||
// .detail("Primary", primary)
|
||||
|
@ -799,9 +799,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
self->lastMedianAvailableSpaceUpdate = now();
|
||||
std::vector<double> teamAvailableSpace;
|
||||
teamAvailableSpace.reserve(self->teams.size());
|
||||
for( int i = 0; i < self->teams.size(); i++ ) {
|
||||
if (self->teams[i]->isHealthy()) {
|
||||
teamAvailableSpace.push_back(self->teams[i]->getMinAvailableSpaceRatio());
|
||||
for (const auto& team : self->teams) {
|
||||
if (team->isHealthy()) {
|
||||
teamAvailableSpace.push_back(team->getMinAvailableSpaceRatio());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1135,14 +1135,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
double varTeams = 0;
|
||||
|
||||
std::map<Optional<Standalone<StringRef>>, int> machineTeams;
|
||||
for(auto s = server_info.begin(); s != server_info.end(); ++s) {
|
||||
if(!server_status.get(s->first).isUnhealthy()) {
|
||||
int stc = s->second->teams.size();
|
||||
for (const auto& [id, info] : server_info) {
|
||||
if (!server_status.get(id).isUnhealthy()) {
|
||||
int stc = info->teams.size();
|
||||
minTeams = std::min(minTeams, stc);
|
||||
maxTeams = std::max(maxTeams, stc);
|
||||
varTeams += (stc - teamsPerServer)*(stc - teamsPerServer);
|
||||
// Use zoneId as server's machine id
|
||||
machineTeams[s->second->lastKnownInterface.locality.zoneId()] += stc;
|
||||
machineTeams[info->lastKnownInterface.locality.zoneId()] += stc;
|
||||
}
|
||||
}
|
||||
varTeams /= teamsPerServer*teamsPerServer;
|
||||
|
@ -1167,14 +1167,17 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
.detail("MachineMaxTeams", maxMachineTeams);
|
||||
}
|
||||
|
||||
int overlappingMembers( vector<UID> &team ) {
|
||||
int overlappingMembers(const vector<UID>& team) const {
|
||||
if (team.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int maxMatchingServers = 0;
|
||||
UID& serverID = team[0];
|
||||
for (auto& usedTeam : server_info[serverID]->teams) {
|
||||
const UID& serverID = team[0];
|
||||
const auto it = server_info.find(serverID);
|
||||
ASSERT(it != server_info.end());
|
||||
const auto& usedTeams = it->second->teams;
|
||||
for (const auto& usedTeam : usedTeams) {
|
||||
auto used = usedTeam->getServerIDs();
|
||||
int teamIdx = 0;
|
||||
int usedIdx = 0;
|
||||
|
@ -2758,178 +2761,189 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
|
|||
state int traceEventsPrinted = 0;
|
||||
state std::vector<const UID*> serverIDs;
|
||||
state double lastPrintTime = 0;
|
||||
state ReadYourWritesTransaction tr(self->cx);
|
||||
loop {
|
||||
wait(self->printDetailedTeamsInfo.onTrigger());
|
||||
if (now() - lastPrintTime < SERVER_KNOBS->DD_TEAMS_INFO_PRINT_INTERVAL) {
|
||||
continue;
|
||||
}
|
||||
lastPrintTime = now();
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
state Future<Void> watchFuture = tr.watch(triggerDDTeamInfoPrintKey);
|
||||
wait(tr.commit());
|
||||
wait(self->printDetailedTeamsInfo.onTrigger() || watchFuture);
|
||||
tr.reset();
|
||||
if (now() - lastPrintTime < SERVER_KNOBS->DD_TEAMS_INFO_PRINT_INTERVAL) {
|
||||
continue;
|
||||
}
|
||||
lastPrintTime = now();
|
||||
|
||||
traceEventsPrinted = 0;
|
||||
traceEventsPrinted = 0;
|
||||
|
||||
double snapshotStart = now();
|
||||
double snapshotStart = now();
|
||||
|
||||
configuration = self->configuration;
|
||||
server_info = self->server_info;
|
||||
teams = self->teams;
|
||||
machine_info = self->machine_info;
|
||||
machineTeams = self->machineTeams;
|
||||
// internedLocalityRecordKeyNameStrings = self->machineLocalityMap._keymap->_lookuparray;
|
||||
// machineLocalityMapEntryArraySize = self->machineLocalityMap.size();
|
||||
// machineLocalityMapRecordArray = self->machineLocalityMap.getRecordArray();
|
||||
std::vector<const UID*> _uids = self->machineLocalityMap.getObjects();
|
||||
serverIDs = _uids;
|
||||
configuration = self->configuration;
|
||||
server_info = self->server_info;
|
||||
teams = self->teams;
|
||||
machine_info = self->machine_info;
|
||||
machineTeams = self->machineTeams;
|
||||
// internedLocalityRecordKeyNameStrings = self->machineLocalityMap._keymap->_lookuparray;
|
||||
// machineLocalityMapEntryArraySize = self->machineLocalityMap.size();
|
||||
// machineLocalityMapRecordArray = self->machineLocalityMap.getRecordArray();
|
||||
std::vector<const UID*> _uids = self->machineLocalityMap.getObjects();
|
||||
serverIDs = _uids;
|
||||
|
||||
auto const& keys = self->server_status.getKeys();
|
||||
for (auto const& key : keys) {
|
||||
server_status.emplace(key, self->server_status.get(key));
|
||||
}
|
||||
auto const& keys = self->server_status.getKeys();
|
||||
for (auto const& key : keys) {
|
||||
server_status.emplace(key, self->server_status.get(key));
|
||||
}
|
||||
|
||||
TraceEvent("DDPrintSnapshotTeasmInfo", self->distributorId)
|
||||
.detail("SnapshotSpeed", now() - snapshotStart)
|
||||
.detail("Primary", self->primary);
|
||||
TraceEvent("DDPrintSnapshotTeasmInfo", self->distributorId)
|
||||
.detail("SnapshotSpeed", now() - snapshotStart)
|
||||
.detail("Primary", self->primary);
|
||||
|
||||
// Print to TraceEvents
|
||||
TraceEvent("DDConfig", self->distributorId)
|
||||
.detail("StorageTeamSize", configuration.storageTeamSize)
|
||||
.detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER)
|
||||
.detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER)
|
||||
.detail("Primary", self->primary);
|
||||
// Print to TraceEvents
|
||||
TraceEvent("DDConfig", self->distributorId)
|
||||
.detail("StorageTeamSize", configuration.storageTeamSize)
|
||||
.detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER)
|
||||
.detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER)
|
||||
.detail("Primary", self->primary);
|
||||
|
||||
TraceEvent("ServerInfo", self->distributorId)
|
||||
.detail("Size", server_info.size())
|
||||
.detail("Primary", self->primary);
|
||||
state int i;
|
||||
state std::map<UID, Reference<TCServerInfo>>::iterator server = server_info.begin();
|
||||
for (i = 0; i < server_info.size(); i++) {
|
||||
TraceEvent("ServerInfo", self->distributorId)
|
||||
.detail("ServerInfoIndex", i)
|
||||
.detail("ServerID", server->first.toString())
|
||||
.detail("ServerTeamOwned", server->second->teams.size())
|
||||
.detail("MachineID", server->second->machine->machineID.contents().toString())
|
||||
.detail("Size", server_info.size())
|
||||
.detail("Primary", self->primary);
|
||||
server++;
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
server = server_info.begin();
|
||||
for (i = 0; i < server_info.size(); i++) {
|
||||
const UID& uid = server->first;
|
||||
TraceEvent("ServerStatus", self->distributorId)
|
||||
.detail("ServerUID", uid)
|
||||
.detail("Healthy", !server_status.at(uid).isUnhealthy())
|
||||
.detail("MachineIsValid", server_info[uid]->machine.isValid())
|
||||
.detail("MachineTeamSize",
|
||||
server_info[uid]->machine.isValid() ? server_info[uid]->machine->machineTeams.size() : -1)
|
||||
.detail("Primary", self->primary);
|
||||
server++;
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("ServerTeamInfo", self->distributorId).detail("Size", teams.size()).detail("Primary", self->primary);
|
||||
for (i = 0; i < teams.size(); i++) {
|
||||
const auto& team = teams[i];
|
||||
TraceEvent("ServerTeamInfo", self->distributorId)
|
||||
.detail("TeamIndex", i)
|
||||
.detail("Healthy", team->isHealthy())
|
||||
.detail("TeamSize", team->size())
|
||||
.detail("MemberIDs", team->getServerIDsStr())
|
||||
.detail("Primary", self->primary);
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("MachineInfo", self->distributorId)
|
||||
.detail("Size", machine_info.size())
|
||||
.detail("Primary", self->primary);
|
||||
state std::map<Standalone<StringRef>, Reference<TCMachineInfo>>::iterator machine = machine_info.begin();
|
||||
state bool isMachineHealthy = false;
|
||||
for (i = 0; i < machine_info.size(); i++) {
|
||||
Reference<TCMachineInfo> _machine = machine->second;
|
||||
if (!_machine.isValid() || machine_info.find(_machine->machineID) == machine_info.end() ||
|
||||
_machine->serversOnMachine.empty()) {
|
||||
isMachineHealthy = false;
|
||||
}
|
||||
|
||||
// Healthy machine has at least one healthy server
|
||||
for (auto& server : _machine->serversOnMachine) {
|
||||
if (!server_status.at(server->id).isUnhealthy()) {
|
||||
isMachineHealthy = true;
|
||||
state int i;
|
||||
state std::map<UID, Reference<TCServerInfo>>::iterator server = server_info.begin();
|
||||
for (i = 0; i < server_info.size(); i++) {
|
||||
TraceEvent("ServerInfo", self->distributorId)
|
||||
.detail("ServerInfoIndex", i)
|
||||
.detail("ServerID", server->first.toString())
|
||||
.detail("ServerTeamOwned", server->second->teams.size())
|
||||
.detail("MachineID", server->second->machine->machineID.contents().toString())
|
||||
.detail("Primary", self->primary);
|
||||
server++;
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
server = server_info.begin();
|
||||
for (i = 0; i < server_info.size(); i++) {
|
||||
const UID& uid = server->first;
|
||||
TraceEvent("ServerStatus", self->distributorId)
|
||||
.detail("ServerUID", uid)
|
||||
.detail("Healthy", !server_status.at(uid).isUnhealthy())
|
||||
.detail("MachineIsValid", server_info[uid]->machine.isValid())
|
||||
.detail("MachineTeamSize",
|
||||
server_info[uid]->machine.isValid() ? server_info[uid]->machine->machineTeams.size() : -1)
|
||||
.detail("Primary", self->primary);
|
||||
server++;
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("ServerTeamInfo", self->distributorId)
|
||||
.detail("Size", teams.size())
|
||||
.detail("Primary", self->primary);
|
||||
for (i = 0; i < teams.size(); i++) {
|
||||
const auto& team = teams[i];
|
||||
TraceEvent("ServerTeamInfo", self->distributorId)
|
||||
.detail("TeamIndex", i)
|
||||
.detail("Healthy", team->isHealthy())
|
||||
.detail("TeamSize", team->size())
|
||||
.detail("MemberIDs", team->getServerIDsStr())
|
||||
.detail("Primary", self->primary);
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
isMachineHealthy = false;
|
||||
TraceEvent("MachineInfo", self->distributorId)
|
||||
.detail("MachineInfoIndex", i)
|
||||
.detail("Healthy", isMachineHealthy)
|
||||
.detail("MachineID", machine->first.contents().toString())
|
||||
.detail("MachineTeamOwned", machine->second->machineTeams.size())
|
||||
.detail("ServerNumOnMachine", machine->second->serversOnMachine.size())
|
||||
.detail("ServersID", machine->second->getServersIDStr())
|
||||
.detail("Size", machine_info.size())
|
||||
.detail("Primary", self->primary);
|
||||
machine++;
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
state std::map<Standalone<StringRef>, Reference<TCMachineInfo>>::iterator machine = machine_info.begin();
|
||||
state bool isMachineHealthy = false;
|
||||
for (i = 0; i < machine_info.size(); i++) {
|
||||
Reference<TCMachineInfo> _machine = machine->second;
|
||||
if (!_machine.isValid() || machine_info.find(_machine->machineID) == machine_info.end() ||
|
||||
_machine->serversOnMachine.empty()) {
|
||||
isMachineHealthy = false;
|
||||
}
|
||||
|
||||
// Healthy machine has at least one healthy server
|
||||
for (auto& server : _machine->serversOnMachine) {
|
||||
if (!server_status.at(server->id).isUnhealthy()) {
|
||||
isMachineHealthy = true;
|
||||
}
|
||||
}
|
||||
|
||||
isMachineHealthy = false;
|
||||
TraceEvent("MachineInfo", self->distributorId)
|
||||
.detail("MachineInfoIndex", i)
|
||||
.detail("Healthy", isMachineHealthy)
|
||||
.detail("MachineID", machine->first.contents().toString())
|
||||
.detail("MachineTeamOwned", machine->second->machineTeams.size())
|
||||
.detail("ServerNumOnMachine", machine->second->serversOnMachine.size())
|
||||
.detail("ServersID", machine->second->getServersIDStr())
|
||||
.detail("Primary", self->primary);
|
||||
machine++;
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("MachineTeamInfo", self->distributorId)
|
||||
.detail("Size", machineTeams.size())
|
||||
.detail("Primary", self->primary);
|
||||
for (i = 0; i < machineTeams.size(); i++) {
|
||||
const auto& team = machineTeams[i];
|
||||
TraceEvent("MachineTeamInfo", self->distributorId)
|
||||
.detail("TeamIndex", i)
|
||||
.detail("MachineIDs", team->getMachineIDsStr())
|
||||
.detail("ServerTeams", team->serverTeams.size())
|
||||
.detail("Size", machineTeams.size())
|
||||
.detail("Primary", self->primary);
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
for (i = 0; i < machineTeams.size(); i++) {
|
||||
const auto& team = machineTeams[i];
|
||||
TraceEvent("MachineTeamInfo", self->distributorId)
|
||||
.detail("TeamIndex", i)
|
||||
.detail("MachineIDs", team->getMachineIDsStr())
|
||||
.detail("ServerTeams", team->serverTeams.size())
|
||||
.detail("Primary", self->primary);
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: re-enable the following logging or remove them.
|
||||
// TraceEvent("LocalityRecordKeyName", self->distributorId)
|
||||
// .detail("Size", internedLocalityRecordKeyNameStrings.size())
|
||||
// .detail("Primary", self->primary);
|
||||
// for (i = 0; i < internedLocalityRecordKeyNameStrings.size(); i++) {
|
||||
// TraceEvent("LocalityRecordKeyIndexName", self->distributorId)
|
||||
// .detail("KeyIndex", i)
|
||||
// .detail("KeyName", internedLocalityRecordKeyNameStrings[i])
|
||||
// .detail("Primary", self->primary);
|
||||
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
// wait(yield());
|
||||
// }
|
||||
// }
|
||||
|
||||
// TraceEvent("MachineLocalityMap", self->distributorId)
|
||||
// .detail("Size", machineLocalityMapEntryArraySize)
|
||||
// .detail("Primary", self->primary);
|
||||
// for (i = 0; i < serverIDs.size(); i++) {
|
||||
// const auto& serverID = serverIDs[i];
|
||||
// Reference<LocalityRecord> record = machineLocalityMapRecordArray[i];
|
||||
// if (record.isValid()) {
|
||||
// TraceEvent("MachineLocalityMap", self->distributorId)
|
||||
// .detail("LocalityIndex", i)
|
||||
// .detail("UID", serverID->toString())
|
||||
// .detail("LocalityRecord", record->toString())
|
||||
// .detail("Primary", self->primary);
|
||||
// } else {
|
||||
// TraceEvent("MachineLocalityMap", self->distributorId)
|
||||
// .detail("LocalityIndex", i)
|
||||
// .detail("UID", serverID->toString())
|
||||
// .detail("LocalityRecord", "[NotFound]")
|
||||
// .detail("Primary", self->primary);
|
||||
// }
|
||||
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
// wait(yield());
|
||||
// }
|
||||
// }
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
|
||||
// TODO: re-enable the following logging or remove them.
|
||||
// TraceEvent("LocalityRecordKeyName", self->distributorId)
|
||||
// .detail("Size", internedLocalityRecordKeyNameStrings.size())
|
||||
// .detail("Primary", self->primary);
|
||||
// for (i = 0; i < internedLocalityRecordKeyNameStrings.size(); i++) {
|
||||
// TraceEvent("LocalityRecordKeyIndexName", self->distributorId)
|
||||
// .detail("KeyIndex", i)
|
||||
// .detail("KeyName", internedLocalityRecordKeyNameStrings[i])
|
||||
// .detail("Primary", self->primary);
|
||||
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
// wait(yield());
|
||||
// }
|
||||
// }
|
||||
|
||||
// TraceEvent("MachineLocalityMap", self->distributorId)
|
||||
// .detail("Size", machineLocalityMapEntryArraySize)
|
||||
// .detail("Primary", self->primary);
|
||||
// for (i = 0; i < serverIDs.size(); i++) {
|
||||
// const auto& serverID = serverIDs[i];
|
||||
// Reference<LocalityRecord> record = machineLocalityMapRecordArray[i];
|
||||
// if (record.isValid()) {
|
||||
// TraceEvent("MachineLocalityMap", self->distributorId)
|
||||
// .detail("LocalityIndex", i)
|
||||
// .detail("UID", serverID->toString())
|
||||
// .detail("LocalityRecord", record->toString())
|
||||
// .detail("Primary", self->primary);
|
||||
// } else {
|
||||
// TraceEvent("MachineLocalityMap", self->distributorId)
|
||||
// .detail("LocalityIndex", i)
|
||||
// .detail("UID", serverID->toString())
|
||||
// .detail("LocalityRecord", "[NotFound]")
|
||||
// .detail("Primary", self->primary);
|
||||
// }
|
||||
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
// wait(yield());
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5401,7 +5415,7 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
|
|||
return Void();
|
||||
}
|
||||
|
||||
DDTeamCollection* testTeamCollection(int teamSize, Reference<IReplicationPolicy> policy, int processCount) {
|
||||
std::unique_ptr<DDTeamCollection> testTeamCollection(int teamSize, Reference<IReplicationPolicy> policy, int processCount) {
|
||||
Database database =
|
||||
DatabaseContext::create(makeReference<AsyncVar<ClientDBInfo>>(), Never(), LocalityData(), false);
|
||||
|
||||
|
@ -5409,11 +5423,11 @@ DDTeamCollection* testTeamCollection(int teamSize, Reference<IReplicationPolicy>
|
|||
conf.storageTeamSize = teamSize;
|
||||
conf.storagePolicy = policy;
|
||||
|
||||
DDTeamCollection* collection =
|
||||
new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
|
||||
makeReference<ShardsAffectedByTeamFailure>(), conf, {}, {}, Future<Void>(Void()),
|
||||
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false),
|
||||
PromiseStream<GetMetricsRequest>());
|
||||
auto collection =
|
||||
std::unique_ptr<DDTeamCollection>(new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
|
||||
makeReference<ShardsAffectedByTeamFailure>(), conf, {}, {}, Future<Void>(Void()),
|
||||
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false),
|
||||
PromiseStream<GetMetricsRequest>()));
|
||||
|
||||
for (int id = 1; id <= processCount; ++id) {
|
||||
UID uid(id, 0);
|
||||
|
@ -5423,7 +5437,7 @@ DDTeamCollection* testTeamCollection(int teamSize, Reference<IReplicationPolicy>
|
|||
interface.locality.set(LiteralStringRef("zoneid"), Standalone<StringRef>(std::to_string(id % 5)));
|
||||
interface.locality.set(LiteralStringRef("data_hall"), Standalone<StringRef>(std::to_string(id % 3)));
|
||||
collection->server_info[uid] =
|
||||
makeReference<TCServerInfo>(interface, collection, ProcessClass(), true, collection->storageServerSet);
|
||||
makeReference<TCServerInfo>(interface, collection.get(), ProcessClass(), true, collection->storageServerSet);
|
||||
collection->server_status.set(uid, ServerStatus(false, false, interface.locality));
|
||||
collection->checkAndCreateMachine(collection->server_info[uid]);
|
||||
}
|
||||
|
@ -5431,7 +5445,7 @@ DDTeamCollection* testTeamCollection(int teamSize, Reference<IReplicationPolicy>
|
|||
return collection;
|
||||
}
|
||||
|
||||
DDTeamCollection* testMachineTeamCollection(int teamSize, Reference<IReplicationPolicy> policy, int processCount) {
|
||||
std::unique_ptr<DDTeamCollection> testMachineTeamCollection(int teamSize, Reference<IReplicationPolicy> policy, int processCount) {
|
||||
Database database =
|
||||
DatabaseContext::create(makeReference<AsyncVar<ClientDBInfo>>(), Never(), LocalityData(), false);
|
||||
|
||||
|
@ -5439,11 +5453,11 @@ DDTeamCollection* testMachineTeamCollection(int teamSize, Reference<IReplication
|
|||
conf.storageTeamSize = teamSize;
|
||||
conf.storagePolicy = policy;
|
||||
|
||||
DDTeamCollection* collection =
|
||||
new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
|
||||
makeReference<ShardsAffectedByTeamFailure>(), conf, {}, {}, Future<Void>(Void()),
|
||||
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false),
|
||||
PromiseStream<GetMetricsRequest>());
|
||||
auto collection =
|
||||
std::unique_ptr<DDTeamCollection>(new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
|
||||
makeReference<ShardsAffectedByTeamFailure>(), conf, {}, {}, Future<Void>(Void()),
|
||||
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false),
|
||||
PromiseStream<GetMetricsRequest>()));
|
||||
|
||||
for (int id = 1; id <= processCount; id++) {
|
||||
UID uid(id, 0);
|
||||
|
@ -5463,7 +5477,7 @@ DDTeamCollection* testMachineTeamCollection(int teamSize, Reference<IReplication
|
|||
interface.locality.set(LiteralStringRef("data_hall"), Standalone<StringRef>(std::to_string(data_hall_id)));
|
||||
interface.locality.set(LiteralStringRef("dcid"), Standalone<StringRef>(std::to_string(dc_id)));
|
||||
collection->server_info[uid] =
|
||||
makeReference<TCServerInfo>(interface, collection, ProcessClass(), true, collection->storageServerSet);
|
||||
makeReference<TCServerInfo>(interface, collection.get(), ProcessClass(), true, collection->storageServerSet);
|
||||
|
||||
collection->server_status.set(uid, ServerStatus(false, false, interface.locality));
|
||||
}
|
||||
|
@ -5483,14 +5497,12 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/UseMachineID") {
|
|||
int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
|
||||
|
||||
Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(teamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
|
||||
state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize);
|
||||
state std::unique_ptr<DDTeamCollection> collection = testMachineTeamCollection(teamSize, policy, processSize);
|
||||
|
||||
collection->addTeamsBestOf(30, desiredTeams, maxTeams);
|
||||
|
||||
ASSERT(collection->sanityCheckTeams() == true);
|
||||
|
||||
delete (collection);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -5503,7 +5515,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") {
|
|||
int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
|
||||
|
||||
Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(teamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
|
||||
state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize);
|
||||
state std::unique_ptr<DDTeamCollection> collection = testMachineTeamCollection(teamSize, policy, processSize);
|
||||
|
||||
if (collection == nullptr) {
|
||||
fprintf(stderr, "collection is null\n");
|
||||
|
@ -5514,8 +5526,6 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") {
|
|||
collection->addTeamsBestOf(30, desiredTeams, maxTeams);
|
||||
collection->sanityCheckTeams(); // Server team may happen to be on the same machine team, although unlikely
|
||||
|
||||
if (collection) delete (collection);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -5524,12 +5534,10 @@ TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") {
|
|||
state int processSize = 10;
|
||||
state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
|
||||
state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
|
||||
state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);
|
||||
state std::unique_ptr<DDTeamCollection> collection = testTeamCollection(3, policy, processSize);
|
||||
|
||||
int result = collection->addTeamsBestOf(200, desiredTeams, maxTeams);
|
||||
|
||||
delete(collection);
|
||||
|
||||
// The maximum number of available server teams without considering machine locality is 120
|
||||
// The maximum number of available server teams with machine locality constraint is 120 - 40, because
|
||||
// the 40 (5*4*2) server teams whose servers come from the same machine are invalid.
|
||||
|
@ -5544,12 +5552,10 @@ TEST_CASE("/DataDistribution/AddAllTeams/withLimit") {
|
|||
state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
|
||||
state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
|
||||
|
||||
state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);
|
||||
state std::unique_ptr<DDTeamCollection> collection = testTeamCollection(3, policy, processSize);
|
||||
|
||||
int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams);
|
||||
|
||||
delete(collection);
|
||||
|
||||
ASSERT(result >= 10);
|
||||
|
||||
return Void();
|
||||
|
@ -5563,7 +5569,7 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") {
|
|||
state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
|
||||
state int teamSize = 3;
|
||||
//state int targetTeamsPerServer = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (teamSize + 1) / 2;
|
||||
state DDTeamCollection* collection = testTeamCollection(teamSize, policy, processSize);
|
||||
state std::unique_ptr<DDTeamCollection> collection = testTeamCollection(teamSize, policy, processSize);
|
||||
|
||||
collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
|
||||
collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);
|
||||
|
@ -5578,8 +5584,6 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") {
|
|||
//ASSERT(teamCount <= targetTeamsPerServer);
|
||||
}
|
||||
|
||||
delete(collection);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -5594,7 +5598,7 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") {
|
|||
state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
|
||||
state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
|
||||
state int teamSize = 3;
|
||||
state DDTeamCollection* collection = testTeamCollection(teamSize, policy, processSize);
|
||||
state std::unique_ptr<DDTeamCollection> collection = testTeamCollection(teamSize, policy, processSize);
|
||||
|
||||
collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
|
||||
collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);
|
||||
|
@ -5617,8 +5621,6 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") {
|
|||
ASSERT(teamCount >= 1);
|
||||
}
|
||||
|
||||
delete(collection);
|
||||
|
||||
// If we find all available teams, result will be 8 because we prebuild 2 teams
|
||||
ASSERT(result == 8);
|
||||
|
||||
|
|
|
@ -85,8 +85,8 @@ struct GetTeamRequest {
|
|||
GetTeamRequest() {}
|
||||
GetTeamRequest( bool wantsNewServers, bool wantsTrueBest, bool preferLowerUtilization, bool teamMustHaveShards, double inflightPenalty = 1.0 )
|
||||
: wantsNewServers( wantsNewServers ), wantsTrueBest( wantsTrueBest ), preferLowerUtilization( preferLowerUtilization ), teamMustHaveShards( teamMustHaveShards ), inflightPenalty( inflightPenalty ) {}
|
||||
|
||||
std::string getDesc() {
|
||||
|
||||
std::string getDesc() const {
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "WantsNewServers:" << wantsNewServers << " WantsTrueBest:" << wantsTrueBest
|
||||
|
@ -94,11 +94,11 @@ struct GetTeamRequest {
|
|||
<< " teamMustHaveShards:" << teamMustHaveShards
|
||||
<< " inflightPenalty:" << inflightPenalty << ";";
|
||||
ss << "CompleteSources:";
|
||||
for (auto& cs : completeSources) {
|
||||
for (const auto& cs : completeSources) {
|
||||
ss << cs.toString() << ",";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
return std::move(ss).str();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -162,9 +162,9 @@ public:
|
|||
// no longer in the map), the servers will be set for all contained shards and added to all
|
||||
// intersecting shards.
|
||||
|
||||
int getNumberOfShards( UID ssID );
|
||||
int getNumberOfShards(UID ssID) const;
|
||||
vector<KeyRange> getShardsFor( Team team );
|
||||
bool hasShards(Team team);
|
||||
bool hasShards(Team team) const;
|
||||
|
||||
//The first element of the pair is either the source for non-moving shards or the destination team for in-flight shards
|
||||
//The second element of the pair is all previous sources for in-flight shards
|
||||
|
|
|
@ -319,7 +319,7 @@ ACTOR Future<Void> readHotDetector(DataDistributionTracker* self) {
|
|||
loop {
|
||||
try {
|
||||
Standalone<VectorRef<ReadHotRangeWithMetrics>> readHotRanges = wait(tr.getReadHotRanges(keys));
|
||||
for (auto& keyRange : readHotRanges) {
|
||||
for (const auto& keyRange : readHotRanges) {
|
||||
TraceEvent("ReadHotRangeLog")
|
||||
.detail("ReadDensity", keyRange.density)
|
||||
.detail("ReadBandwidth", keyRange.readBandwidth)
|
||||
|
@ -394,12 +394,14 @@ ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRange keys, in
|
|||
wait( yield(TaskPriority::DataDistribution) );
|
||||
|
||||
int64_t newShardsStartingSize = 0;
|
||||
for ( int i = 0; i < sizes.size(); i++ )
|
||||
newShardsStartingSize += sizes[i].get();
|
||||
for (const auto& size : sizes) {
|
||||
newShardsStartingSize += size.get();
|
||||
}
|
||||
|
||||
int64_t newSystemShardsStartingSize = 0;
|
||||
for ( int i = 0; i < systemSizes.size(); i++ )
|
||||
newSystemShardsStartingSize += systemSizes[i].get();
|
||||
for (const auto& systemSize : systemSizes) {
|
||||
newSystemShardsStartingSize += systemSize.get();
|
||||
}
|
||||
|
||||
int64_t totalSizeEstimate = self->dbSizeEstimate->get();
|
||||
/*TraceEvent("TrackerChangeSizes")
|
||||
|
@ -944,13 +946,14 @@ vector<KeyRange> ShardsAffectedByTeamFailure::getShardsFor( Team team ) {
|
|||
return r;
|
||||
}
|
||||
|
||||
bool ShardsAffectedByTeamFailure::hasShards(Team team) {
|
||||
bool ShardsAffectedByTeamFailure::hasShards(Team team) const {
|
||||
auto it = team_shards.lower_bound(std::pair<Team, KeyRange>(team, KeyRangeRef()));
|
||||
return it != team_shards.end() && it->first == team;
|
||||
}
|
||||
|
||||
int ShardsAffectedByTeamFailure::getNumberOfShards( UID ssID ) {
|
||||
return storageServerShards[ssID];
|
||||
int ShardsAffectedByTeamFailure::getNumberOfShards(UID ssID) const {
|
||||
auto it = storageServerShards.find(ssID);
|
||||
return it == storageServerShards.end() ? 0 : it->second;
|
||||
}
|
||||
|
||||
std::pair<vector<ShardsAffectedByTeamFailure::Team>,vector<ShardsAffectedByTeamFailure::Team>> ShardsAffectedByTeamFailure::getTeamsFor( KeyRangeRef keys ) {
|
||||
|
|
|
@ -27,15 +27,15 @@ void ExecCmdValueString::setCmdValueString(StringRef pCmdValueString) {
|
|||
parseCmdValue();
|
||||
}
|
||||
|
||||
StringRef ExecCmdValueString::getCmdValueString() {
|
||||
StringRef ExecCmdValueString::getCmdValueString() const {
|
||||
return cmdValueString.toString();
|
||||
}
|
||||
|
||||
StringRef ExecCmdValueString::getBinaryPath() {
|
||||
StringRef ExecCmdValueString::getBinaryPath() const {
|
||||
return binaryPath;
|
||||
}
|
||||
|
||||
VectorRef<StringRef> ExecCmdValueString::getBinaryArgs() {
|
||||
VectorRef<StringRef> ExecCmdValueString::getBinaryArgs() const {
|
||||
return binaryArgs;
|
||||
}
|
||||
|
||||
|
@ -57,7 +57,7 @@ void ExecCmdValueString::parseCmdValue() {
|
|||
return;
|
||||
}
|
||||
|
||||
void ExecCmdValueString::dbgPrint() {
|
||||
void ExecCmdValueString::dbgPrint() const {
|
||||
auto te = TraceEvent("ExecCmdValueString");
|
||||
|
||||
te.detail("CmdValueString", cmdValueString.toString());
|
||||
|
|
|
@ -25,13 +25,13 @@ public: // ctor & dtor
|
|||
explicit ExecCmdValueString(StringRef cmdValueString);
|
||||
|
||||
public: // interfaces
|
||||
StringRef getBinaryPath();
|
||||
VectorRef<StringRef> getBinaryArgs();
|
||||
StringRef getBinaryPath() const;
|
||||
VectorRef<StringRef> getBinaryArgs() const;
|
||||
void setCmdValueString(StringRef cmdValueString);
|
||||
StringRef getCmdValueString(void);
|
||||
StringRef getCmdValueString(void) const;
|
||||
|
||||
public: // helper functions
|
||||
void dbgPrint();
|
||||
void dbgPrint() const;
|
||||
|
||||
private: // functions
|
||||
void parseCmdValue();
|
||||
|
|
|
@ -279,7 +279,7 @@ private:
|
|||
StringRef p1, p2;
|
||||
OpRef() {}
|
||||
OpRef(Arena& a, OpRef const& o) : op(o.op), p1(a, o.p1), p2(a, o.p2) {}
|
||||
size_t expectedSize() { return p1.expectedSize() + p2.expectedSize(); }
|
||||
size_t expectedSize() const { return p1.expectedSize() + p2.expectedSize(); }
|
||||
};
|
||||
struct OpHeader {
|
||||
int op;
|
||||
|
|
|
@ -381,6 +381,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
|
||||
init( TXN_STATE_SEND_AMOUNT, 4 );
|
||||
init( REPORT_TRANSACTION_COST_ESTIMATION_DELAY, 0.1 );
|
||||
init( PROXY_REJECT_BATCH_QUEUED_TOO_LONG, true );
|
||||
|
||||
init( RESET_MASTER_BATCHES, 200 );
|
||||
init( RESET_RESOLVER_BATCHES, 200 );
|
||||
|
|
|
@ -312,6 +312,7 @@ public:
|
|||
double PROXY_COMPUTE_GROWTH_RATE;
|
||||
int TXN_STATE_SEND_AMOUNT;
|
||||
double REPORT_TRANSACTION_COST_ESTIMATION_DELAY;
|
||||
bool PROXY_REJECT_BATCH_QUEUED_TOO_LONG;
|
||||
|
||||
int RESET_MASTER_BATCHES;
|
||||
int RESET_RESOLVER_BATCHES;
|
||||
|
|
|
@ -182,7 +182,7 @@ void commitMessages( LogRouterData* self, Version version, const std::vector<Tag
|
|||
}
|
||||
|
||||
int msgSize = 0;
|
||||
for(auto& i : taggedMessages) {
|
||||
for (const auto& i : taggedMessages) {
|
||||
msgSize += i.message.size();
|
||||
}
|
||||
|
||||
|
@ -199,7 +199,7 @@ void commitMessages( LogRouterData* self, Version version, const std::vector<Tag
|
|||
|
||||
block.pop_front(block.size());
|
||||
|
||||
for(auto& msg : taggedMessages) {
|
||||
for (const auto& msg : taggedMessages) {
|
||||
if(msg.message.size() > block.capacity() - block.size()) {
|
||||
self->messageBlocks.emplace_back(version, block);
|
||||
block = Standalone<VectorRef<uint8_t>>();
|
||||
|
@ -207,7 +207,7 @@ void commitMessages( LogRouterData* self, Version version, const std::vector<Tag
|
|||
}
|
||||
|
||||
block.append(block.arena(), msg.message.begin(), msg.message.size());
|
||||
for(auto& tag : msg.tags) {
|
||||
for (const auto& tag : msg.tags) {
|
||||
auto tagData = self->getTagData(tag);
|
||||
if(!tagData) {
|
||||
tagData = self->createTagData(tag, 0, 0);
|
||||
|
|
|
@ -329,11 +329,11 @@ struct ILogSystem {
|
|||
|
||||
//if hasMessage() returns true, getMessage(), getMessageWithTags(), or reader() can be called.
|
||||
//does not modify the cursor
|
||||
virtual bool hasMessage() = 0;
|
||||
virtual bool hasMessage() const = 0;
|
||||
|
||||
//pre: only callable if hasMessage() returns true
|
||||
//return the tags associated with the message for the current sequence
|
||||
virtual VectorRef<Tag> getTags() = 0;
|
||||
virtual VectorRef<Tag> getTags() const = 0;
|
||||
|
||||
//pre: only callable if hasMessage() returns true
|
||||
//returns the arena containing the contents of getMessage(), getMessageWithTags(), and reader()
|
||||
|
@ -374,32 +374,32 @@ struct ILogSystem {
|
|||
// (1) the failure monitor detects that the servers associated with the cursor is failed
|
||||
// (2) the interface is not present
|
||||
// (3) the cursor cannot return any more results
|
||||
virtual bool isActive() = 0;
|
||||
virtual bool isActive() const = 0;
|
||||
|
||||
//returns true if the cursor cannot return any more results
|
||||
virtual bool isExhausted() = 0;
|
||||
virtual bool isExhausted() const = 0;
|
||||
|
||||
// Returns the smallest possible message version which the current message (if any) or a subsequent message might have
|
||||
// (If hasMessage(), this is therefore the message version of the current message)
|
||||
virtual const LogMessageVersion& version() = 0;
|
||||
virtual const LogMessageVersion& version() const = 0;
|
||||
|
||||
//So far, the cursor has returned all messages which both satisfy the criteria passed to peek() to create the cursor AND have (popped(),0) <= message version number <= version()
|
||||
//Other messages might have been skipped
|
||||
virtual Version popped() = 0;
|
||||
virtual Version popped() const = 0;
|
||||
|
||||
// Returns the maximum version known to have been pushed (not necessarily durably) into the log system (0 is always a possible result!)
|
||||
virtual Version getMaxKnownVersion() { return 0; }
|
||||
virtual Version getMaxKnownVersion() const { return 0; }
|
||||
|
||||
virtual Version getMinKnownCommittedVersion() = 0;
|
||||
virtual Version getMinKnownCommittedVersion() const = 0;
|
||||
|
||||
virtual Optional<UID> getPrimaryPeekLocation() = 0;
|
||||
virtual Optional<UID> getPrimaryPeekLocation() const = 0;
|
||||
|
||||
virtual void addref() = 0;
|
||||
|
||||
virtual void delref() = 0;
|
||||
};
|
||||
|
||||
struct ServerPeekCursor : IPeekCursor, ReferenceCounted<ServerPeekCursor> {
|
||||
struct ServerPeekCursor final : IPeekCursor, ReferenceCounted<ServerPeekCursor> {
|
||||
Reference<AsyncVar<OptionalInterface<TLogInterface>>> interf;
|
||||
const Tag tag;
|
||||
|
||||
|
@ -428,37 +428,33 @@ struct ILogSystem {
|
|||
ServerPeekCursor( Reference<AsyncVar<OptionalInterface<TLogInterface>>> const& interf, Tag tag, Version begin, Version end, bool returnIfBlocked, bool parallelGetMore );
|
||||
ServerPeekCursor( TLogPeekReply const& results, LogMessageVersion const& messageVersion, LogMessageVersion const& end, TagsAndMessage const& message, bool hasMsg, Version poppedVersion, Tag tag );
|
||||
|
||||
virtual Reference<IPeekCursor> cloneNoMore();
|
||||
virtual void setProtocolVersion( ProtocolVersion version );
|
||||
virtual Arena& arena();
|
||||
virtual ArenaReader* reader();
|
||||
virtual bool hasMessage();
|
||||
virtual void nextMessage();
|
||||
virtual StringRef getMessage();
|
||||
virtual StringRef getMessageWithTags();
|
||||
virtual VectorRef<Tag> getTags();
|
||||
virtual void advanceTo(LogMessageVersion n);
|
||||
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
|
||||
virtual Future<Void> onFailed();
|
||||
virtual bool isActive();
|
||||
virtual bool isExhausted();
|
||||
virtual const LogMessageVersion& version();
|
||||
virtual Version popped();
|
||||
virtual Version getMinKnownCommittedVersion();
|
||||
virtual Optional<UID> getPrimaryPeekLocation();
|
||||
Reference<IPeekCursor> cloneNoMore() override;
|
||||
void setProtocolVersion(ProtocolVersion version) override;
|
||||
Arena& arena() override;
|
||||
ArenaReader* reader() override;
|
||||
bool hasMessage() const override;
|
||||
void nextMessage() override;
|
||||
StringRef getMessage() override;
|
||||
StringRef getMessageWithTags() override;
|
||||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
|
||||
virtual void addref() {
|
||||
ReferenceCounted<ServerPeekCursor>::addref();
|
||||
}
|
||||
void addref() override { ReferenceCounted<ServerPeekCursor>::addref(); }
|
||||
|
||||
virtual void delref() {
|
||||
ReferenceCounted<ServerPeekCursor>::delref();
|
||||
}
|
||||
void delref() override { ReferenceCounted<ServerPeekCursor>::delref(); }
|
||||
|
||||
virtual Version getMaxKnownVersion() { return results.maxKnownVersion; }
|
||||
Version getMaxKnownVersion() const override { return results.maxKnownVersion; }
|
||||
};
|
||||
|
||||
struct MergedPeekCursor : IPeekCursor, ReferenceCounted<MergedPeekCursor> {
|
||||
struct MergedPeekCursor final : IPeekCursor, ReferenceCounted<MergedPeekCursor> {
|
||||
Reference<LogSet> logSet;
|
||||
std::vector< Reference<IPeekCursor> > serverCursors;
|
||||
std::vector<LocalityEntry> locations;
|
||||
|
@ -476,37 +472,33 @@ struct ILogSystem {
|
|||
MergedPeekCursor( std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> const& logServers, int bestServer, int readQuorum, Tag tag, Version begin, Version end, bool parallelGetMore, std::vector<LocalityData> const& tLogLocalities, Reference<IReplicationPolicy> const tLogPolicy, int tLogReplicationFactor );
|
||||
MergedPeekCursor( std::vector< Reference<IPeekCursor> > const& serverCursors, LogMessageVersion const& messageVersion, int bestServer, int readQuorum, Optional<LogMessageVersion> nextVersion, Reference<LogSet> logSet, int tLogReplicationFactor );
|
||||
|
||||
virtual Reference<IPeekCursor> cloneNoMore();
|
||||
virtual void setProtocolVersion( ProtocolVersion version );
|
||||
virtual Arena& arena();
|
||||
virtual ArenaReader* reader();
|
||||
Reference<IPeekCursor> cloneNoMore() override;
|
||||
void setProtocolVersion(ProtocolVersion version) override;
|
||||
Arena& arena() override;
|
||||
ArenaReader* reader() override;
|
||||
void calcHasMessage();
|
||||
void updateMessage(bool usePolicy);
|
||||
virtual bool hasMessage();
|
||||
virtual void nextMessage();
|
||||
virtual StringRef getMessage();
|
||||
virtual StringRef getMessageWithTags();
|
||||
virtual VectorRef<Tag> getTags();
|
||||
virtual void advanceTo(LogMessageVersion n);
|
||||
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
|
||||
virtual Future<Void> onFailed();
|
||||
virtual bool isActive();
|
||||
virtual bool isExhausted();
|
||||
virtual const LogMessageVersion& version();
|
||||
virtual Version popped();
|
||||
virtual Version getMinKnownCommittedVersion();
|
||||
virtual Optional<UID> getPrimaryPeekLocation();
|
||||
bool hasMessage() const override;
|
||||
void nextMessage() override;
|
||||
StringRef getMessage() override;
|
||||
StringRef getMessageWithTags() override;
|
||||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
|
||||
virtual void addref() {
|
||||
ReferenceCounted<MergedPeekCursor>::addref();
|
||||
}
|
||||
void addref() override { ReferenceCounted<MergedPeekCursor>::addref(); }
|
||||
|
||||
virtual void delref() {
|
||||
ReferenceCounted<MergedPeekCursor>::delref();
|
||||
}
|
||||
void delref() override { ReferenceCounted<MergedPeekCursor>::delref(); }
|
||||
};
|
||||
|
||||
struct SetPeekCursor : IPeekCursor, ReferenceCounted<SetPeekCursor> {
|
||||
struct SetPeekCursor final : IPeekCursor, ReferenceCounted<SetPeekCursor> {
|
||||
std::vector<Reference<LogSet>> logSets;
|
||||
std::vector< std::vector< Reference<IPeekCursor> > > serverCursors;
|
||||
Tag tag;
|
||||
|
@ -523,72 +515,64 @@ struct ILogSystem {
|
|||
SetPeekCursor( std::vector<Reference<LogSet>> const& logSets, int bestSet, int bestServer, Tag tag, Version begin, Version end, bool parallelGetMore );
|
||||
SetPeekCursor( std::vector<Reference<LogSet>> const& logSets, std::vector< std::vector< Reference<IPeekCursor> > > const& serverCursors, LogMessageVersion const& messageVersion, int bestSet, int bestServer, Optional<LogMessageVersion> nextVersion, bool useBestSet );
|
||||
|
||||
virtual Reference<IPeekCursor> cloneNoMore();
|
||||
virtual void setProtocolVersion( ProtocolVersion version );
|
||||
virtual Arena& arena();
|
||||
virtual ArenaReader* reader();
|
||||
Reference<IPeekCursor> cloneNoMore() override;
|
||||
void setProtocolVersion(ProtocolVersion version) override;
|
||||
Arena& arena() override;
|
||||
ArenaReader* reader() override;
|
||||
void calcHasMessage();
|
||||
void updateMessage(int logIdx, bool usePolicy);
|
||||
virtual bool hasMessage();
|
||||
virtual void nextMessage();
|
||||
virtual StringRef getMessage();
|
||||
virtual StringRef getMessageWithTags();
|
||||
virtual VectorRef<Tag> getTags();
|
||||
virtual void advanceTo(LogMessageVersion n);
|
||||
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
|
||||
virtual Future<Void> onFailed();
|
||||
virtual bool isActive();
|
||||
virtual bool isExhausted();
|
||||
virtual const LogMessageVersion& version();
|
||||
virtual Version popped();
|
||||
virtual Version getMinKnownCommittedVersion();
|
||||
virtual Optional<UID> getPrimaryPeekLocation();
|
||||
bool hasMessage() const override;
|
||||
void nextMessage() override;
|
||||
StringRef getMessage() override;
|
||||
StringRef getMessageWithTags() override;
|
||||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
|
||||
virtual void addref() {
|
||||
ReferenceCounted<SetPeekCursor>::addref();
|
||||
}
|
||||
void addref() override { ReferenceCounted<SetPeekCursor>::addref(); }
|
||||
|
||||
virtual void delref() {
|
||||
ReferenceCounted<SetPeekCursor>::delref();
|
||||
}
|
||||
void delref() override { ReferenceCounted<SetPeekCursor>::delref(); }
|
||||
};
|
||||
|
||||
struct MultiCursor : IPeekCursor, ReferenceCounted<MultiCursor> {
|
||||
struct MultiCursor final : IPeekCursor, ReferenceCounted<MultiCursor> {
|
||||
std::vector<Reference<IPeekCursor>> cursors;
|
||||
std::vector<LogMessageVersion> epochEnds;
|
||||
Version poppedVersion;
|
||||
|
||||
MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds );
|
||||
|
||||
virtual Reference<IPeekCursor> cloneNoMore();
|
||||
virtual void setProtocolVersion( ProtocolVersion version );
|
||||
virtual Arena& arena();
|
||||
virtual ArenaReader* reader();
|
||||
virtual bool hasMessage();
|
||||
virtual void nextMessage();
|
||||
virtual StringRef getMessage();
|
||||
virtual StringRef getMessageWithTags();
|
||||
virtual VectorRef<Tag> getTags();
|
||||
virtual void advanceTo(LogMessageVersion n);
|
||||
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
|
||||
virtual Future<Void> onFailed();
|
||||
virtual bool isActive();
|
||||
virtual bool isExhausted();
|
||||
virtual const LogMessageVersion& version();
|
||||
virtual Version popped();
|
||||
virtual Version getMinKnownCommittedVersion();
|
||||
virtual Optional<UID> getPrimaryPeekLocation();
|
||||
Reference<IPeekCursor> cloneNoMore() override;
|
||||
void setProtocolVersion(ProtocolVersion version) override;
|
||||
Arena& arena() override;
|
||||
ArenaReader* reader() override;
|
||||
bool hasMessage() const override;
|
||||
void nextMessage() override;
|
||||
StringRef getMessage() override;
|
||||
StringRef getMessageWithTags() override;
|
||||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
|
||||
virtual void addref() {
|
||||
ReferenceCounted<MultiCursor>::addref();
|
||||
}
|
||||
void addref() override { ReferenceCounted<MultiCursor>::addref(); }
|
||||
|
||||
virtual void delref() {
|
||||
ReferenceCounted<MultiCursor>::delref();
|
||||
}
|
||||
void delref() override { ReferenceCounted<MultiCursor>::delref(); }
|
||||
};
|
||||
|
||||
struct BufferedCursor : IPeekCursor, ReferenceCounted<BufferedCursor> {
|
||||
struct BufferedCursor final : IPeekCursor, ReferenceCounted<BufferedCursor> {
|
||||
struct BufferedMessage {
|
||||
Arena arena;
|
||||
StringRef message;
|
||||
|
@ -632,39 +616,35 @@ struct ILogSystem {
|
|||
BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool withTags, bool collectTags, bool canDiscardPopped );
|
||||
BufferedCursor( std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> const& logServers, Tag tag, Version begin, Version end, bool parallelGetMore );
|
||||
|
||||
virtual Reference<IPeekCursor> cloneNoMore();
|
||||
virtual void setProtocolVersion( ProtocolVersion version );
|
||||
virtual Arena& arena();
|
||||
virtual ArenaReader* reader();
|
||||
virtual bool hasMessage();
|
||||
virtual void nextMessage();
|
||||
virtual StringRef getMessage();
|
||||
virtual StringRef getMessageWithTags();
|
||||
virtual VectorRef<Tag> getTags();
|
||||
virtual void advanceTo(LogMessageVersion n);
|
||||
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
|
||||
virtual Future<Void> onFailed();
|
||||
virtual bool isActive();
|
||||
virtual bool isExhausted();
|
||||
virtual const LogMessageVersion& version();
|
||||
virtual Version popped();
|
||||
virtual Version getMinKnownCommittedVersion();
|
||||
virtual Optional<UID> getPrimaryPeekLocation();
|
||||
Reference<IPeekCursor> cloneNoMore() override;
|
||||
void setProtocolVersion(ProtocolVersion version) override;
|
||||
Arena& arena() override;
|
||||
ArenaReader* reader() override;
|
||||
bool hasMessage() const override;
|
||||
void nextMessage() override;
|
||||
StringRef getMessage() override;
|
||||
StringRef getMessageWithTags() override;
|
||||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
|
||||
virtual void addref() {
|
||||
ReferenceCounted<BufferedCursor>::addref();
|
||||
}
|
||||
void addref() override { ReferenceCounted<BufferedCursor>::addref(); }
|
||||
|
||||
virtual void delref() {
|
||||
ReferenceCounted<BufferedCursor>::delref();
|
||||
}
|
||||
void delref() override { ReferenceCounted<BufferedCursor>::delref(); }
|
||||
};
|
||||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
virtual std::string describe() = 0;
|
||||
virtual UID getDebugID() = 0;
|
||||
virtual std::string describe() const = 0;
|
||||
virtual UID getDebugID() const = 0;
|
||||
|
||||
virtual void toCoreState( DBCoreState& ) = 0;
|
||||
|
||||
|
@ -736,9 +716,10 @@ struct ILogSystem {
|
|||
// Ensures that any calls to push or confirmEpochLive in the current epoch but strictly later than change_epoch will not return
|
||||
// Whenever changes in the set of available log servers require restarting recovery with a different end sequence, outLogSystem will be changed to a new ILogSystem
|
||||
|
||||
virtual Version getEnd() = 0;
|
||||
// Call only on an ILogSystem obtained from recoverAndEndEpoch()
|
||||
// Returns the first unreadable version number of the recovered epoch (i.e. message version numbers < (get_end(), 0) will be readable)
|
||||
virtual Version getEnd() const = 0;
|
||||
// Call only on an ILogSystem obtained from recoverAndEndEpoch()
|
||||
// Returns the first unreadable version number of the recovered epoch (i.e. message version numbers < (get_end(), 0)
|
||||
// will be readable)
|
||||
|
||||
// Returns the start version of current epoch for backup workers.
|
||||
virtual Version getBackupStartVersion() const = 0;
|
||||
|
@ -759,15 +740,17 @@ struct ILogSystem {
|
|||
// Call only on an ILogSystem obtained from recoverAndEndEpoch()
|
||||
// Returns an ILogSystem representing a new epoch immediately following this one. The new epoch is only provisional until the caller updates the coordinated DBCoreState
|
||||
|
||||
virtual LogSystemConfig getLogSystemConfig() = 0;
|
||||
// Returns the physical configuration of this LogSystem, that could be used to construct an equivalent LogSystem using fromLogSystemConfig()
|
||||
virtual LogSystemConfig getLogSystemConfig() const = 0;
|
||||
// Returns the physical configuration of this LogSystem, that could be used to construct an equivalent LogSystem
|
||||
// using fromLogSystemConfig()
|
||||
|
||||
virtual Standalone<StringRef> getLogsValue() = 0;
|
||||
virtual Standalone<StringRef> getLogsValue() const = 0;
|
||||
|
||||
virtual Future<Void> onLogSystemConfigChange() = 0;
|
||||
// Returns when the log system configuration has changed due to a tlog rejoin.
|
||||
|
||||
virtual void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations = false) = 0;
|
||||
virtual void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations,
|
||||
bool allLocations = false) const = 0;
|
||||
|
||||
void getPushLocations(std::vector<Tag> const& tags, std::vector<int>& locations, bool allLocations = false) {
|
||||
getPushLocations(VectorRef<Tag>((Tag*)&tags.front(), tags.size()), locations, allLocations);
|
||||
|
@ -788,9 +771,9 @@ struct ILogSystem {
|
|||
|
||||
// Returns the pseudo tag to be popped for the given process class. If the
|
||||
// process class doesn't use pseudo tag, return the same tag.
|
||||
virtual Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) = 0;
|
||||
virtual Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const = 0;
|
||||
|
||||
virtual bool hasPseudoLocality(int8_t locality) = 0;
|
||||
virtual bool hasPseudoLocality(int8_t locality) const = 0;
|
||||
|
||||
// Returns the actual version to be popped from the log router tag for the given pseudo tag.
|
||||
// For instance, a pseudo tag (-8, 2) means the actual popping tag is (-2, 2). Assuming there
|
||||
|
|
|
@ -78,7 +78,7 @@ struct serializable_traits<OptionalInterface<Interface>> : std::true_type {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
// Contains a generation of tLogs for an individual DC.
|
||||
struct TLogSet {
|
||||
constexpr static FileIdentifier file_identifier = 6302317;
|
||||
std::vector<OptionalInterface<TLogInterface>> tLogs;
|
||||
|
@ -203,7 +203,6 @@ enum class LogSystemType {
|
|||
empty = 0, // Never used.
|
||||
tagPartitioned = 2,
|
||||
};
|
||||
BINARY_SERIALIZABLE(LogSystemType);
|
||||
|
||||
struct LogSystemConfig {
|
||||
constexpr static FileIdentifier file_identifier = 16360847;
|
||||
|
|
|
@ -62,7 +62,7 @@ ArenaReader* ILogSystem::ServerPeekCursor::reader() {
|
|||
return &rd;
|
||||
}
|
||||
|
||||
bool ILogSystem::ServerPeekCursor::hasMessage() {
|
||||
bool ILogSystem::ServerPeekCursor::hasMessage() const {
|
||||
//TraceEvent("SPC_HasMessage", randomID).detail("HasMsg", hasMsg);
|
||||
return hasMsg;
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ StringRef ILogSystem::ServerPeekCursor::getMessageWithTags() {
|
|||
return rawMessage;
|
||||
}
|
||||
|
||||
VectorRef<Tag> ILogSystem::ServerPeekCursor::getTags() {
|
||||
VectorRef<Tag> ILogSystem::ServerPeekCursor::getTags() const {
|
||||
return messageAndTags.tags;
|
||||
}
|
||||
|
||||
|
@ -329,7 +329,7 @@ Future<Void> ILogSystem::ServerPeekCursor::onFailed() {
|
|||
return serverPeekOnFailed(this);
|
||||
}
|
||||
|
||||
bool ILogSystem::ServerPeekCursor::isActive() {
|
||||
bool ILogSystem::ServerPeekCursor::isActive() const {
|
||||
if( !interf->get().present() )
|
||||
return false;
|
||||
if( messageVersion >= end )
|
||||
|
@ -337,22 +337,29 @@ bool ILogSystem::ServerPeekCursor::isActive() {
|
|||
return IFailureMonitor::failureMonitor().getState( interf->get().interf().peekMessages.getEndpoint() ).isAvailable();
|
||||
}
|
||||
|
||||
bool ILogSystem::ServerPeekCursor::isExhausted() {
|
||||
bool ILogSystem::ServerPeekCursor::isExhausted() const {
|
||||
return messageVersion >= end;
|
||||
}
|
||||
|
||||
const LogMessageVersion& ILogSystem::ServerPeekCursor::version() { return messageVersion; } // Call only after nextMessage(). The sequence of the current message, or results.end if nextMessage() has returned false.
|
||||
const LogMessageVersion& ILogSystem::ServerPeekCursor::version() const {
|
||||
return messageVersion;
|
||||
} // Call only after nextMessage(). The sequence of the current message, or results.end if nextMessage() has returned
|
||||
// false.
|
||||
|
||||
Version ILogSystem::ServerPeekCursor::getMinKnownCommittedVersion() { return results.minKnownCommittedVersion; }
|
||||
Version ILogSystem::ServerPeekCursor::getMinKnownCommittedVersion() const {
|
||||
return results.minKnownCommittedVersion;
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::ServerPeekCursor::getPrimaryPeekLocation() {
|
||||
Optional<UID> ILogSystem::ServerPeekCursor::getPrimaryPeekLocation() const {
|
||||
if(interf) {
|
||||
return interf->get().id();
|
||||
}
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Version ILogSystem::ServerPeekCursor::popped() { return poppedVersion; }
|
||||
Version ILogSystem::ServerPeekCursor::popped() const {
|
||||
return poppedVersion;
|
||||
}
|
||||
|
||||
ILogSystem::MergedPeekCursor::MergedPeekCursor( vector< Reference<ILogSystem::IPeekCursor> > const& serverCursors, Version begin )
|
||||
: serverCursors(serverCursors), bestServer(-1), readQuorum(serverCursors.size()), tag(invalidTag), currentCursor(0), hasNextMessage(false),
|
||||
|
@ -486,7 +493,7 @@ void ILogSystem::MergedPeekCursor::updateMessage(bool usePolicy) {
|
|||
}
|
||||
}
|
||||
|
||||
bool ILogSystem::MergedPeekCursor::hasMessage() {
|
||||
bool ILogSystem::MergedPeekCursor::hasMessage() const {
|
||||
return hasNextMessage;
|
||||
}
|
||||
|
||||
|
@ -504,7 +511,7 @@ StringRef ILogSystem::MergedPeekCursor::getMessageWithTags() {
|
|||
return serverCursors[currentCursor]->getMessageWithTags();
|
||||
}
|
||||
|
||||
VectorRef<Tag> ILogSystem::MergedPeekCursor::getTags() {
|
||||
VectorRef<Tag> ILogSystem::MergedPeekCursor::getTags() const {
|
||||
return serverCursors[currentCursor]->getTags();
|
||||
}
|
||||
|
||||
|
@ -569,29 +576,31 @@ Future<Void> ILogSystem::MergedPeekCursor::onFailed() {
|
|||
return Never();
|
||||
}
|
||||
|
||||
bool ILogSystem::MergedPeekCursor::isActive() {
|
||||
bool ILogSystem::MergedPeekCursor::isActive() const {
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ILogSystem::MergedPeekCursor::isExhausted() {
|
||||
bool ILogSystem::MergedPeekCursor::isExhausted() const {
|
||||
return serverCursors[currentCursor]->isExhausted();
|
||||
}
|
||||
|
||||
const LogMessageVersion& ILogSystem::MergedPeekCursor::version() { return messageVersion; }
|
||||
const LogMessageVersion& ILogSystem::MergedPeekCursor::version() const {
|
||||
return messageVersion;
|
||||
}
|
||||
|
||||
Version ILogSystem::MergedPeekCursor::getMinKnownCommittedVersion() {
|
||||
Version ILogSystem::MergedPeekCursor::getMinKnownCommittedVersion() const {
|
||||
return serverCursors[currentCursor]->getMinKnownCommittedVersion();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::MergedPeekCursor::getPrimaryPeekLocation() {
|
||||
Optional<UID> ILogSystem::MergedPeekCursor::getPrimaryPeekLocation() const {
|
||||
if(bestServer >= 0) {
|
||||
return serverCursors[bestServer]->getPrimaryPeekLocation();
|
||||
}
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Version ILogSystem::MergedPeekCursor::popped() {
|
||||
Version ILogSystem::MergedPeekCursor::popped() const {
|
||||
Version poppedVersion = 0;
|
||||
for (auto& c : serverCursors)
|
||||
poppedVersion = std::max(poppedVersion, c->popped());
|
||||
|
@ -761,7 +770,7 @@ void ILogSystem::SetPeekCursor::updateMessage(int logIdx, bool usePolicy) {
|
|||
}
|
||||
}
|
||||
|
||||
bool ILogSystem::SetPeekCursor::hasMessage() {
|
||||
bool ILogSystem::SetPeekCursor::hasMessage() const {
|
||||
return hasNextMessage;
|
||||
}
|
||||
|
||||
|
@ -777,7 +786,7 @@ StringRef ILogSystem::SetPeekCursor::getMessage() { return serverCursors[current
|
|||
|
||||
StringRef ILogSystem::SetPeekCursor::getMessageWithTags() { return serverCursors[currentSet][currentCursor]->getMessageWithTags(); }
|
||||
|
||||
VectorRef<Tag> ILogSystem::SetPeekCursor::getTags() {
|
||||
VectorRef<Tag> ILogSystem::SetPeekCursor::getTags() const {
|
||||
return serverCursors[currentSet][currentCursor]->getTags();
|
||||
}
|
||||
|
||||
|
@ -881,29 +890,31 @@ Future<Void> ILogSystem::SetPeekCursor::onFailed() {
|
|||
return Never();
|
||||
}
|
||||
|
||||
bool ILogSystem::SetPeekCursor::isActive() {
|
||||
bool ILogSystem::SetPeekCursor::isActive() const {
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ILogSystem::SetPeekCursor::isExhausted() {
|
||||
bool ILogSystem::SetPeekCursor::isExhausted() const {
|
||||
return serverCursors[currentSet][currentCursor]->isExhausted();
|
||||
}
|
||||
|
||||
const LogMessageVersion& ILogSystem::SetPeekCursor::version() { return messageVersion; }
|
||||
const LogMessageVersion& ILogSystem::SetPeekCursor::version() const {
|
||||
return messageVersion;
|
||||
}
|
||||
|
||||
Version ILogSystem::SetPeekCursor::getMinKnownCommittedVersion() {
|
||||
Version ILogSystem::SetPeekCursor::getMinKnownCommittedVersion() const {
|
||||
return serverCursors[currentSet][currentCursor]->getMinKnownCommittedVersion();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::SetPeekCursor::getPrimaryPeekLocation() {
|
||||
Optional<UID> ILogSystem::SetPeekCursor::getPrimaryPeekLocation() const {
|
||||
if(bestServer >= 0 && bestSet >= 0) {
|
||||
return serverCursors[bestSet][bestServer]->getPrimaryPeekLocation();
|
||||
}
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Version ILogSystem::SetPeekCursor::popped() {
|
||||
Version ILogSystem::SetPeekCursor::popped() const {
|
||||
Version poppedVersion = 0;
|
||||
for (auto& cursors : serverCursors) {
|
||||
for(auto& c : cursors) {
|
||||
|
@ -935,7 +946,7 @@ ArenaReader* ILogSystem::MultiCursor::reader() {
|
|||
return cursors.back()->reader();
|
||||
}
|
||||
|
||||
bool ILogSystem::MultiCursor::hasMessage() {
|
||||
bool ILogSystem::MultiCursor::hasMessage() const {
|
||||
return cursors.back()->hasMessage();
|
||||
}
|
||||
|
||||
|
@ -951,7 +962,7 @@ StringRef ILogSystem::MultiCursor::getMessageWithTags() {
|
|||
return cursors.back()->getMessageWithTags();
|
||||
}
|
||||
|
||||
VectorRef<Tag> ILogSystem::MultiCursor::getTags() {
|
||||
VectorRef<Tag> ILogSystem::MultiCursor::getTags() const {
|
||||
return cursors.back()->getTags();
|
||||
}
|
||||
|
||||
|
@ -981,27 +992,27 @@ Future<Void> ILogSystem::MultiCursor::onFailed() {
|
|||
return cursors.back()->onFailed();
|
||||
}
|
||||
|
||||
bool ILogSystem::MultiCursor::isActive() {
|
||||
bool ILogSystem::MultiCursor::isActive() const {
|
||||
return cursors.back()->isActive();
|
||||
}
|
||||
|
||||
bool ILogSystem::MultiCursor::isExhausted() {
|
||||
bool ILogSystem::MultiCursor::isExhausted() const {
|
||||
return cursors.back()->isExhausted();
|
||||
}
|
||||
|
||||
const LogMessageVersion& ILogSystem::MultiCursor::version() {
|
||||
const LogMessageVersion& ILogSystem::MultiCursor::version() const {
|
||||
return cursors.back()->version();
|
||||
}
|
||||
|
||||
Version ILogSystem::MultiCursor::getMinKnownCommittedVersion() {
|
||||
Version ILogSystem::MultiCursor::getMinKnownCommittedVersion() const {
|
||||
return cursors.back()->getMinKnownCommittedVersion();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::MultiCursor::getPrimaryPeekLocation() {
|
||||
Optional<UID> ILogSystem::MultiCursor::getPrimaryPeekLocation() const {
|
||||
return cursors.back()->getPrimaryPeekLocation();
|
||||
}
|
||||
|
||||
Version ILogSystem::MultiCursor::popped() {
|
||||
Version ILogSystem::MultiCursor::popped() const {
|
||||
return std::max(poppedVersion, cursors.back()->popped());
|
||||
}
|
||||
|
||||
|
@ -1069,7 +1080,7 @@ ArenaReader* ILogSystem::BufferedCursor::reader() {
|
|||
return cursors[0]->reader();
|
||||
}
|
||||
|
||||
bool ILogSystem::BufferedCursor::hasMessage() {
|
||||
bool ILogSystem::BufferedCursor::hasMessage() const {
|
||||
return hasNextMessage;
|
||||
}
|
||||
|
||||
|
@ -1093,7 +1104,7 @@ StringRef ILogSystem::BufferedCursor::getMessageWithTags() {
|
|||
return messages[messageIndex].message;
|
||||
}
|
||||
|
||||
VectorRef<Tag> ILogSystem::BufferedCursor::getTags() {
|
||||
VectorRef<Tag> ILogSystem::BufferedCursor::getTags() const {
|
||||
ASSERT(withTags);
|
||||
return messages[messageIndex].tags;
|
||||
}
|
||||
|
@ -1222,32 +1233,32 @@ Future<Void> ILogSystem::BufferedCursor::onFailed() {
|
|||
return Never();
|
||||
}
|
||||
|
||||
bool ILogSystem::BufferedCursor::isActive() {
|
||||
bool ILogSystem::BufferedCursor::isActive() const {
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ILogSystem::BufferedCursor::isExhausted() {
|
||||
bool ILogSystem::BufferedCursor::isExhausted() const {
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
const LogMessageVersion& ILogSystem::BufferedCursor::version() {
|
||||
const LogMessageVersion& ILogSystem::BufferedCursor::version() const {
|
||||
if(hasNextMessage) {
|
||||
return messages[messageIndex].version;
|
||||
}
|
||||
return messageVersion;
|
||||
}
|
||||
|
||||
Version ILogSystem::BufferedCursor::getMinKnownCommittedVersion() {
|
||||
Version ILogSystem::BufferedCursor::getMinKnownCommittedVersion() const {
|
||||
return minKnownCommittedVersion;
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::BufferedCursor::getPrimaryPeekLocation() {
|
||||
Optional<UID> ILogSystem::BufferedCursor::getPrimaryPeekLocation() const {
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Version ILogSystem::BufferedCursor::popped() {
|
||||
Version ILogSystem::BufferedCursor::popped() const {
|
||||
if(initialPoppedVersion == poppedVersion) {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ private:
|
|||
m_is_inline = isInline;
|
||||
}
|
||||
|
||||
StringRef getKey() {
|
||||
StringRef getKey() const {
|
||||
if (m_is_inline) {
|
||||
return StringRef(&key.inlineData[0], m_inline_length);
|
||||
} else {
|
||||
|
@ -122,9 +122,9 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
inline int getKeySize() { return m_is_inline ? m_inline_length : key.data.size(); }
|
||||
inline int getKeySize() const { return m_is_inline ? m_inline_length : key.data.size(); }
|
||||
|
||||
inline int16_t getFirstByte() {
|
||||
inline int16_t getFirstByte() const {
|
||||
if (m_is_inline) {
|
||||
return m_inline_length == 0 ? LEAF_BYTE : key.inlineData[0];
|
||||
} else {
|
||||
|
@ -132,7 +132,7 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
inline size_type getArenaSize() { return m_is_inline ? 0 : arena.getSize(); }
|
||||
inline size_type getArenaSize() const { return m_is_inline ? 0 : arena.getSize(); }
|
||||
|
||||
uint32_t m_is_leaf : 1;
|
||||
uint32_t m_is_fixed : 1; // if true, then we have fixed number of children (3)
|
||||
|
|
|
@ -729,8 +729,7 @@ ACTOR Future<Void> monitorServerListChange(
|
|||
self->lastSSListFetchedTimestamp = now();
|
||||
|
||||
std::map<UID, StorageServerInterface> newServers;
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
const StorageServerInterface& ssi = results[i].first;
|
||||
for (const auto& [ssi, _] : results) {
|
||||
const UID serverId = ssi.id();
|
||||
newServers[serverId] = ssi;
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
// is slightly more detailed and is used by the status infrastructure. But I'm scared to make changes to the former so close to 1.0 release, so I'm making the latter.
|
||||
|
||||
enum class RecoveryState { UNINITIALIZED = 0, READING_CSTATE = 1, LOCKING_CSTATE = 2, RECRUITING = 3, RECOVERY_TRANSACTION = 4, WRITING_CSTATE = 5, ACCEPTING_COMMITS = 6, ALL_LOGS_RECRUITED = 7, STORAGE_RECOVERED = 8, FULLY_RECOVERED = 9 };
|
||||
BINARY_SERIALIZABLE( RecoveryState );
|
||||
|
||||
namespace RecoveryStatus {
|
||||
enum RecoveryStatus {
|
||||
|
|
|
@ -80,7 +80,6 @@ using VersionedMutationsVec = Standalone<VectorRef<VersionedMutation>>;
|
|||
using SampledMutationsVec = Standalone<VectorRef<SampledMutation>>;
|
||||
|
||||
enum class RestoreRole { Invalid = 0, Controller = 1, Loader, Applier };
|
||||
BINARY_SERIALIZABLE(RestoreRole);
|
||||
std::string getRoleStr(RestoreRole role);
|
||||
extern const std::vector<std::string> RestoreRoleStr;
|
||||
extern int numRoles;
|
||||
|
@ -130,4 +129,4 @@ struct RestoreSimpleRequest : TimedRequest {
|
|||
|
||||
bool isRangeMutation(MutationRef m);
|
||||
|
||||
#endif // FDBSERVER_RESTOREUTIL_H
|
||||
#endif // FDBSERVER_RESTOREUTIL_H
|
||||
|
|
|
@ -62,18 +62,18 @@ T simulate( const T& in ) {
|
|||
ACTOR Future<Void> runBackup( Reference<ClusterConnectionFile> connFile ) {
|
||||
state std::vector<Future<Void>> agentFutures;
|
||||
|
||||
while (g_simulator.backupAgents == ISimulator::WaitForType) {
|
||||
while (g_simulator.backupAgents == ISimulator::BackupAgentType::WaitForType) {
|
||||
wait(delay(1.0));
|
||||
}
|
||||
|
||||
if (g_simulator.backupAgents == ISimulator::BackupToFile) {
|
||||
if (g_simulator.backupAgents == ISimulator::BackupAgentType::BackupToFile) {
|
||||
Database cx = Database::createDatabase(connFile, -1);
|
||||
|
||||
state FileBackupAgent fileAgent;
|
||||
state double backupPollDelay = 1.0 / CLIENT_KNOBS->BACKUP_AGGREGATE_POLL_RATE;
|
||||
agentFutures.push_back(fileAgent.run(cx, &backupPollDelay, CLIENT_KNOBS->SIM_BACKUP_TASKS_PER_AGENT));
|
||||
|
||||
while (g_simulator.backupAgents == ISimulator::BackupToFile) {
|
||||
while (g_simulator.backupAgents == ISimulator::BackupAgentType::BackupToFile) {
|
||||
wait(delay(1.0));
|
||||
}
|
||||
|
||||
|
@ -89,11 +89,11 @@ ACTOR Future<Void> runBackup( Reference<ClusterConnectionFile> connFile ) {
|
|||
ACTOR Future<Void> runDr( Reference<ClusterConnectionFile> connFile ) {
|
||||
state std::vector<Future<Void>> agentFutures;
|
||||
|
||||
while (g_simulator.drAgents == ISimulator::WaitForType) {
|
||||
while (g_simulator.drAgents == ISimulator::BackupAgentType::WaitForType) {
|
||||
wait(delay(1.0));
|
||||
}
|
||||
|
||||
if (g_simulator.drAgents == ISimulator::BackupToDB) {
|
||||
if (g_simulator.drAgents == ISimulator::BackupAgentType::BackupToDB) {
|
||||
Database cx = Database::createDatabase(connFile, -1);
|
||||
|
||||
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
|
||||
|
@ -110,7 +110,7 @@ ACTOR Future<Void> runDr( Reference<ClusterConnectionFile> connFile ) {
|
|||
agentFutures.push_back(extraAgent.run(cx, &dr1PollDelay, CLIENT_KNOBS->SIM_BACKUP_TASKS_PER_AGENT));
|
||||
agentFutures.push_back(dbAgent.run(extraDB, &dr2PollDelay, CLIENT_KNOBS->SIM_BACKUP_TASKS_PER_AGENT));
|
||||
|
||||
while (g_simulator.drAgents == ISimulator::BackupToDB) {
|
||||
while (g_simulator.drAgents == ISimulator::BackupAgentType::BackupToDB) {
|
||||
wait(delay(1.0));
|
||||
}
|
||||
|
||||
|
|
|
@ -209,9 +209,9 @@ void sortPoints(std::vector<KeyInfo>& points) {
|
|||
|
||||
class SkipList : NonCopyable {
|
||||
private:
|
||||
static const int MaxLevels = 26;
|
||||
static constexpr int MaxLevels = 26;
|
||||
|
||||
int randomLevel() {
|
||||
int randomLevel() const {
|
||||
uint32_t i = uint32_t(skfastrand()) >> (32 - (MaxLevels - 1));
|
||||
int level = 0;
|
||||
while (i & 1) {
|
||||
|
@ -225,9 +225,9 @@ private:
|
|||
// Represent a node in the SkipList. The node has multiple (i.e., level) pointers to
|
||||
// other nodes, and keeps a record of the max versions for each level.
|
||||
struct Node {
|
||||
int level() { return nPointers - 1; }
|
||||
int level() const { return nPointers - 1; }
|
||||
uint8_t* value() { return end() + nPointers * (sizeof(Node*) + sizeof(Version)); }
|
||||
int length() { return valueLength; }
|
||||
int length() const { return valueLength; }
|
||||
|
||||
// Returns the next node pointer at the given level.
|
||||
Node* getNext(int level) { return *((Node**)end() + level); }
|
||||
|
@ -235,7 +235,7 @@ private:
|
|||
void setNext(int level, Node* n) { *((Node**)end() + level) = n; }
|
||||
|
||||
// Returns the max version at the given level.
|
||||
Version getMaxVersion(int i) { return ((Version*)(end() + nPointers * sizeof(Node*)))[i]; }
|
||||
Version getMaxVersion(int i) const { return ((Version*)(end() + nPointers * sizeof(Node*)))[i]; }
|
||||
// Sets the max version at the given level.
|
||||
void setMaxVersion(int i, Version v) { ((Version*)(end() + nPointers * sizeof(Node*)))[i] = v; }
|
||||
|
||||
|
@ -289,9 +289,10 @@ private:
|
|||
}
|
||||
|
||||
private:
|
||||
int getNodeSize() { return sizeof(Node) + valueLength + nPointers * (sizeof(Node*) + sizeof(Version)); }
|
||||
int getNodeSize() const { return sizeof(Node) + valueLength + nPointers * (sizeof(Node*) + sizeof(Version)); }
|
||||
// Returns the first Node* pointer
|
||||
uint8_t* end() { return (uint8_t*)(this + 1); }
|
||||
uint8_t const* end() const { return (uint8_t const*)(this + 1); }
|
||||
int nPointers, valueLength;
|
||||
};
|
||||
|
||||
|
@ -365,7 +366,7 @@ public:
|
|||
;
|
||||
}
|
||||
|
||||
force_inline bool finished() { return level == 0; }
|
||||
force_inline bool finished() const { return level == 0; }
|
||||
|
||||
// Returns if the finger value is found in the SkipList.
|
||||
force_inline Node* found() const {
|
||||
|
@ -636,7 +637,7 @@ private:
|
|||
this->state = 0;
|
||||
}
|
||||
|
||||
bool noConflict() { return true; }
|
||||
bool noConflict() const { return true; }
|
||||
bool conflict() {
|
||||
*result = true;
|
||||
if (conflictingKeyRange != nullptr) conflictingKeyRange->push_back(*cKRArena, indexInTx);
|
||||
|
|
|
@ -1703,6 +1703,7 @@ ACTOR static Future<vector<std::pair<GrvProxyInterface, EventMap>>> getGrvProxie
|
|||
return results;
|
||||
}
|
||||
|
||||
// Returns the number of zones eligble for recruiting new tLogs after failures, to maintain the current replication factor.
|
||||
static int getExtraTLogEligibleZones(const vector<WorkerDetails>& workers, const DatabaseConfiguration& configuration) {
|
||||
std::set<StringRef> allZones;
|
||||
std::map<Key,std::set<StringRef>> dcId_zone;
|
||||
|
@ -2067,7 +2068,14 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
|
|||
if(currentFaultTolerance >= 0) {
|
||||
localSetsWithNonNegativeFaultTolerance++;
|
||||
}
|
||||
minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance);
|
||||
|
||||
if (tLogs[i].locality == tagLocalitySatellite) {
|
||||
// FIXME: This hack to bump satellite fault tolerance, is to make it consistent
|
||||
// with 6.2.
|
||||
minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance + 1);
|
||||
} else {
|
||||
minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance);
|
||||
}
|
||||
}
|
||||
|
||||
if (tLogs[i].isLocal && tLogs[i].locality == tagLocalitySatellite) {
|
||||
|
|
|
@ -88,24 +88,21 @@ struct AddingCacheRange : NonCopyable {
|
|||
bool isTransferred() const { return phase == Waiting; }
|
||||
};
|
||||
|
||||
struct CacheRangeInfo : ReferenceCounted<CacheRangeInfo>, NonCopyable {
|
||||
AddingCacheRange* adding;
|
||||
class CacheRangeInfo : public ReferenceCounted<CacheRangeInfo>, NonCopyable {
|
||||
CacheRangeInfo(KeyRange keys, std::unique_ptr<AddingCacheRange> &&adding, StorageCacheData* readWrite)
|
||||
: adding(std::move(adding)), readWrite(readWrite), keys(keys)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
std::unique_ptr<AddingCacheRange> adding;
|
||||
struct StorageCacheData* readWrite;
|
||||
KeyRange keys;
|
||||
uint64_t changeCounter;
|
||||
|
||||
CacheRangeInfo(KeyRange keys, AddingCacheRange* adding, StorageCacheData* readWrite)
|
||||
: adding(adding), readWrite(readWrite), keys(keys)
|
||||
{
|
||||
}
|
||||
|
||||
~CacheRangeInfo() {
|
||||
delete adding;
|
||||
}
|
||||
|
||||
static CacheRangeInfo* newNotAssigned(KeyRange keys) { return new CacheRangeInfo(keys, nullptr, nullptr); }
|
||||
static CacheRangeInfo* newReadWrite(KeyRange keys, StorageCacheData* data) { return new CacheRangeInfo(keys, nullptr, data); }
|
||||
static CacheRangeInfo* newAdding(StorageCacheData* data, KeyRange keys) { return new CacheRangeInfo(keys, new AddingCacheRange(data, keys), nullptr); }
|
||||
static CacheRangeInfo* newAdding(StorageCacheData* data, KeyRange keys) { return new CacheRangeInfo(keys, std::make_unique<AddingCacheRange>(data, keys), nullptr); }
|
||||
|
||||
bool isReadable() const { return readWrite!=nullptr; }
|
||||
bool isAdding() const { return adding!=nullptr; }
|
||||
|
|
|
@ -227,7 +227,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
ReferenceCounted<TagPartitionedLogSystem>::delref();
|
||||
}
|
||||
|
||||
std::string describe() final {
|
||||
std::string describe() const final {
|
||||
std::string result;
|
||||
for( int i = 0; i < tLogs.size(); i++ ) {
|
||||
result += format("%d: ", i);
|
||||
|
@ -238,7 +238,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
return result;
|
||||
}
|
||||
|
||||
UID getDebugID() final { return dbgid; }
|
||||
UID getDebugID() const final { return dbgid; }
|
||||
|
||||
void addPseudoLocality(int8_t locality) {
|
||||
ASSERT(locality < 0);
|
||||
|
@ -248,7 +248,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
}
|
||||
}
|
||||
|
||||
Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) final {
|
||||
Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const final {
|
||||
switch (type) {
|
||||
case ProcessClass::LogRouterClass:
|
||||
if (tag.locality == tagLocalityLogRouter) {
|
||||
|
@ -270,7 +270,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
return tag;
|
||||
}
|
||||
|
||||
bool hasPseudoLocality(int8_t locality) final { return pseudoLocalities.count(locality) > 0; }
|
||||
bool hasPseudoLocality(int8_t locality) const final { return pseudoLocalities.count(locality) > 0; }
|
||||
|
||||
// Return the min version of all pseudoLocalities, i.e., logRouter and backupTag
|
||||
Version popPseudoLocalityTag(Tag tag, Version upTo) final {
|
||||
|
@ -1364,7 +1364,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
return newEpoch( Reference<TagPartitionedLogSystem>::addRef(this), recr, fRemoteWorkers, config, recoveryCount, primaryLocality, remoteLocality, allTags, recruitmentStalled );
|
||||
}
|
||||
|
||||
LogSystemConfig getLogSystemConfig() final {
|
||||
LogSystemConfig getLogSystemConfig() const final {
|
||||
LogSystemConfig logSystemConfig(epoch);
|
||||
logSystemConfig.logSystemType = logSystemType;
|
||||
logSystemConfig.expectedLogSets = expectedLogSets;
|
||||
|
@ -1389,7 +1389,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
return logSystemConfig;
|
||||
}
|
||||
|
||||
Standalone<StringRef> getLogsValue() final {
|
||||
Standalone<StringRef> getLogsValue() const final {
|
||||
vector<std::pair<UID, NetworkAddress>> logs;
|
||||
vector<std::pair<UID, NetworkAddress>> oldLogs;
|
||||
for(auto& t : tLogs) {
|
||||
|
@ -1434,19 +1434,19 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
return waitForAny(changes);
|
||||
}
|
||||
|
||||
Version getEnd() final {
|
||||
Version getEnd() const final {
|
||||
ASSERT( recoverAt.present() );
|
||||
return recoverAt.get() + 1;
|
||||
}
|
||||
|
||||
Version getPeekEnd() {
|
||||
Version getPeekEnd() const {
|
||||
if (recoverAt.present())
|
||||
return getEnd();
|
||||
else
|
||||
return std::numeric_limits<Version>::max();
|
||||
}
|
||||
|
||||
void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations) final {
|
||||
void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations) const final {
|
||||
int locationOffset = 0;
|
||||
for(auto& log : tLogs) {
|
||||
if(log->isLocal && log->logServers.size()) {
|
||||
|
|
|
@ -1649,11 +1649,10 @@ public:
|
|||
|
||||
if (!cacheEntry.initialized()) {
|
||||
debug_printf("DWALPager(%s) issuing actual read of %s\n", filename.c_str(), toString(pageID).c_str());
|
||||
cacheEntry.readFuture = readPhysicalPage(this, (PhysicalPageID)pageID);
|
||||
cacheEntry.readFuture = forwardError(readPhysicalPage(this, (PhysicalPageID)pageID), errorPromise);
|
||||
cacheEntry.writeFuture = Void();
|
||||
}
|
||||
|
||||
cacheEntry.readFuture = forwardError(cacheEntry.readFuture, errorPromise);
|
||||
return cacheEntry.readFuture;
|
||||
}
|
||||
|
||||
|
@ -1837,6 +1836,7 @@ public:
|
|||
state Version minStopVersion = cutoff.version - (BUGGIFY ? deterministicRandom()->randomInt(0, 10) : (self->remapCleanupWindow * SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_LAG));
|
||||
self->remapDestinationsSimOnly.clear();
|
||||
|
||||
state int sinceYield = 0;
|
||||
loop {
|
||||
state Optional<RemappedPage> p = wait(self->remapQueue.pop(cutoff));
|
||||
debug_printf("DWALPager(%s) remapCleanup popped %s\n", self->filename.c_str(), ::toString(p).c_str());
|
||||
|
@ -1855,6 +1855,11 @@ public:
|
|||
if (self->remapCleanupStop && p.get().version >= minStopVersion) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(++sinceYield >= 100) {
|
||||
sinceYield = 0;
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
||||
debug_printf("DWALPager(%s) remapCleanup stopped (stop=%d)\n", self->filename.c_str(), self->remapCleanupStop);
|
||||
|
|
|
@ -888,7 +888,7 @@ void restoreRoleFilesHelper(std::string dirSrc, std::string dirToMove, std::stri
|
|||
}
|
||||
|
||||
namespace {
|
||||
enum Role {
|
||||
enum class ServerRole {
|
||||
ConsistencyCheck,
|
||||
CreateTemplateDatabase,
|
||||
DSLTest,
|
||||
|
@ -916,7 +916,7 @@ struct CLIOptions {
|
|||
int maxLogs = 0;
|
||||
bool maxLogsSet = false;
|
||||
|
||||
Role role = FDBD;
|
||||
ServerRole role = ServerRole::FDBD;
|
||||
uint32_t randomSeed = platform::getRandomSeed();
|
||||
|
||||
const char* testFile = "tests/default.txt";
|
||||
|
@ -1051,35 +1051,35 @@ private:
|
|||
case OPT_ROLE:
|
||||
sRole = args.OptionArg();
|
||||
if (!strcmp(sRole, "fdbd"))
|
||||
role = FDBD;
|
||||
role = ServerRole::FDBD;
|
||||
else if (!strcmp(sRole, "simulation"))
|
||||
role = Simulation;
|
||||
role = ServerRole::Simulation;
|
||||
else if (!strcmp(sRole, "test"))
|
||||
role = Test;
|
||||
role = ServerRole::Test;
|
||||
else if (!strcmp(sRole, "multitest"))
|
||||
role = MultiTester;
|
||||
role = ServerRole::MultiTester;
|
||||
else if (!strcmp(sRole, "skiplisttest"))
|
||||
role = SkipListTest;
|
||||
role = ServerRole::SkipListTest;
|
||||
else if (!strcmp(sRole, "search"))
|
||||
role = SearchMutations;
|
||||
role = ServerRole::SearchMutations;
|
||||
else if (!strcmp(sRole, "dsltest"))
|
||||
role = DSLTest;
|
||||
role = ServerRole::DSLTest;
|
||||
else if (!strcmp(sRole, "versionedmaptest"))
|
||||
role = VersionedMapTest;
|
||||
role = ServerRole::VersionedMapTest;
|
||||
else if (!strcmp(sRole, "createtemplatedb"))
|
||||
role = CreateTemplateDatabase;
|
||||
role = ServerRole::CreateTemplateDatabase;
|
||||
else if (!strcmp(sRole, "networktestclient"))
|
||||
role = NetworkTestClient;
|
||||
role = ServerRole::NetworkTestClient;
|
||||
else if (!strcmp(sRole, "networktestserver"))
|
||||
role = NetworkTestServer;
|
||||
role = ServerRole::NetworkTestServer;
|
||||
else if (!strcmp(sRole, "restore"))
|
||||
role = Restore;
|
||||
role = ServerRole::Restore;
|
||||
else if (!strcmp(sRole, "kvfileintegritycheck"))
|
||||
role = KVFileIntegrityCheck;
|
||||
role = ServerRole::KVFileIntegrityCheck;
|
||||
else if (!strcmp(sRole, "kvfilegeneratesums"))
|
||||
role = KVFileGenerateIOLogChecksums;
|
||||
role = ServerRole::KVFileGenerateIOLogChecksums;
|
||||
else if (!strcmp(sRole, "consistencycheck"))
|
||||
role = ConsistencyCheck;
|
||||
role = ServerRole::ConsistencyCheck;
|
||||
else {
|
||||
fprintf(stderr, "ERROR: Unknown role `%s'\n", sRole);
|
||||
printHelpTeaser(argv[0]);
|
||||
|
@ -1241,6 +1241,10 @@ private:
|
|||
openTracer(TracerType::DISABLED);
|
||||
} else if (tracer == "logfile" || tracer == "file" || tracer == "log_file") {
|
||||
openTracer(TracerType::LOG_FILE);
|
||||
} else if (tracer == "network_async") {
|
||||
openTracer(TracerType::NETWORK_ASYNC);
|
||||
} else if (tracer == "network_lossy") {
|
||||
openTracer(TracerType::NETWORK_LOSSY);
|
||||
} else {
|
||||
fprintf(stderr, "ERROR: Unknown or unsupported tracer: `%s'", args.OptionArg());
|
||||
printHelpTeaser(argv[0]);
|
||||
|
@ -1432,8 +1436,8 @@ private:
|
|||
bool autoPublicAddress =
|
||||
std::any_of(publicAddressStrs.begin(), publicAddressStrs.end(),
|
||||
[](const std::string& addr) { return StringRef(addr).startsWith(LiteralStringRef("auto:")); });
|
||||
if ((role != Simulation && role != CreateTemplateDatabase && role != KVFileIntegrityCheck &&
|
||||
role != KVFileGenerateIOLogChecksums) ||
|
||||
if ((role != ServerRole::Simulation && role != ServerRole::CreateTemplateDatabase &&
|
||||
role != ServerRole::KVFileIntegrityCheck && role != ServerRole::KVFileGenerateIOLogChecksums) ||
|
||||
autoPublicAddress) {
|
||||
|
||||
if (seedSpecified && !fileExists(connFile)) {
|
||||
|
@ -1480,7 +1484,7 @@ private:
|
|||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
|
||||
if (role == ConsistencyCheck) {
|
||||
if (role == ServerRole::ConsistencyCheck) {
|
||||
if (!publicAddressStrs.empty()) {
|
||||
fprintf(stderr, "ERROR: Public address cannot be specified for consistency check processes\n");
|
||||
printHelpTeaser(argv[0]);
|
||||
|
@ -1490,18 +1494,18 @@ private:
|
|||
publicAddresses.address = NetworkAddress(publicIP, ::getpid());
|
||||
}
|
||||
|
||||
if (role == Simulation) {
|
||||
if (role == ServerRole::Simulation) {
|
||||
Optional<bool> buggifyOverride = checkBuggifyOverride(testFile);
|
||||
if (buggifyOverride.present()) buggifyEnabled = buggifyOverride.get();
|
||||
}
|
||||
|
||||
if (role == SearchMutations && !targetKey) {
|
||||
if (role == ServerRole::SearchMutations && !targetKey) {
|
||||
fprintf(stderr, "ERROR: please specify a target key\n");
|
||||
printHelpTeaser(argv[0]);
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
|
||||
if (role == NetworkTestClient && !testServersStr.size()) {
|
||||
if (role == ServerRole::NetworkTestClient && !testServersStr.size()) {
|
||||
fprintf(stderr, "ERROR: please specify --testservers\n");
|
||||
printHelpTeaser(argv[0]);
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
|
@ -1561,7 +1565,7 @@ int main(int argc, char* argv[]) {
|
|||
const auto opts = CLIOptions::parseArgs(argc, argv);
|
||||
const auto role = opts.role;
|
||||
|
||||
if (role == Simulation) printf("Random seed is %u...\n", opts.randomSeed);
|
||||
if (role == ServerRole::Simulation) printf("Random seed is %u...\n", opts.randomSeed);
|
||||
|
||||
if (opts.zoneId.present())
|
||||
printf("ZoneId set to %s, dcId to %s\n", printable(opts.zoneId).c_str(), printable(opts.dcId).c_str());
|
||||
|
@ -1581,7 +1585,7 @@ int main(int argc, char* argv[]) {
|
|||
CLIENT_KNOBS = clientKnobs;
|
||||
|
||||
if (!serverKnobs->setKnob("log_directory", opts.logFolder)) ASSERT(false);
|
||||
if (role != Simulation) {
|
||||
if (role != ServerRole::Simulation) {
|
||||
if (!serverKnobs->setKnob("commit_batches_mem_bytes_hard_limit", std::to_string(opts.memLimit)))
|
||||
ASSERT(false);
|
||||
}
|
||||
|
@ -1608,9 +1612,9 @@ int main(int argc, char* argv[]) {
|
|||
if (!serverKnobs->setKnob("server_mem_limit", std::to_string(opts.memLimit))) ASSERT(false);
|
||||
|
||||
// Reinitialize knobs in order to update knobs that are dependent on explicitly set knobs
|
||||
flowKnobs->initialize(true, role == Simulation);
|
||||
flowKnobs->initialize(true, role == ServerRole::Simulation);
|
||||
clientKnobs->initialize(true);
|
||||
serverKnobs->initialize(true, clientKnobs, role == Simulation);
|
||||
serverKnobs->initialize(true, clientKnobs, role == ServerRole::Simulation);
|
||||
|
||||
// evictionPolicyStringToEnum will throw an exception if the string is not recognized as a valid
|
||||
EvictablePageCache::evictionPolicyStringToEnum(flowKnobs->CACHE_EVICTION_POLICY);
|
||||
|
@ -1620,17 +1624,17 @@ int main(int argc, char* argv[]) {
|
|||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
|
||||
if (role == SkipListTest) {
|
||||
if (role == ServerRole::SkipListTest) {
|
||||
skipListTest();
|
||||
flushAndExit(FDB_EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (role == DSLTest) {
|
||||
if (role == ServerRole::DSLTest) {
|
||||
dsltest();
|
||||
flushAndExit(FDB_EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (role == VersionedMapTest) {
|
||||
if (role == ServerRole::VersionedMapTest) {
|
||||
versionedMapTest();
|
||||
flushAndExit(FDB_EXIT_SUCCESS);
|
||||
}
|
||||
|
@ -1642,7 +1646,7 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
std::vector<Future<Void>> listenErrors;
|
||||
|
||||
if (role == Simulation || role == CreateTemplateDatabase) {
|
||||
if (role == ServerRole::Simulation || role == ServerRole::CreateTemplateDatabase) {
|
||||
//startOldSimulator();
|
||||
startNewSimulator();
|
||||
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
|
||||
|
@ -1652,7 +1656,8 @@ int main(int argc, char* argv[]) {
|
|||
g_network->addStopCallback( Net2FileSystem::stop );
|
||||
FlowTransport::createInstance(false, 1);
|
||||
|
||||
const bool expectsPublicAddress = (role == FDBD || role == NetworkTestServer || role == Restore);
|
||||
const bool expectsPublicAddress =
|
||||
(role == ServerRole::FDBD || role == ServerRole::NetworkTestServer || role == ServerRole::Restore);
|
||||
if (opts.publicAddressStrs.empty()) {
|
||||
if (expectsPublicAddress) {
|
||||
fprintf(stderr, "ERROR: The -p or --public_address option is required\n");
|
||||
|
@ -1745,7 +1750,7 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
Future<Optional<Void>> f;
|
||||
|
||||
if (role == Simulation) {
|
||||
if (role == ServerRole::Simulation) {
|
||||
TraceEvent("Simulation").detail("TestFile", opts.testFile);
|
||||
|
||||
auto histogramReportActor = histogramReport();
|
||||
|
@ -1869,7 +1874,7 @@ int main(int argc, char* argv[]) {
|
|||
}
|
||||
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths);
|
||||
g_simulator.run();
|
||||
} else if (role == FDBD) {
|
||||
} else if (role == ServerRole::FDBD) {
|
||||
// Update the global blob credential files list so that both fast
|
||||
// restore workers and backup workers can access blob storage.
|
||||
std::vector<std::string>* pFiles =
|
||||
|
@ -1913,40 +1918,40 @@ int main(int argc, char* argv[]) {
|
|||
f = stopAfter(waitForAll(actors));
|
||||
g_network->run();
|
||||
}
|
||||
} else if (role == MultiTester) {
|
||||
} else if (role == ServerRole::MultiTester) {
|
||||
setupRunLoopProfiler();
|
||||
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE,
|
||||
opts.testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS, opts.minTesterCount,
|
||||
opts.testFile, StringRef(), opts.localities));
|
||||
g_network->run();
|
||||
} else if (role == Test) {
|
||||
} else if (role == ServerRole::Test) {
|
||||
setupRunLoopProfiler();
|
||||
auto m = startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId);
|
||||
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE, TEST_HERE, 1, opts.testFile, StringRef(),
|
||||
opts.localities));
|
||||
g_network->run();
|
||||
} else if (role == ConsistencyCheck) {
|
||||
} else if (role == ServerRole::ConsistencyCheck) {
|
||||
setupRunLoopProfiler();
|
||||
|
||||
auto m = startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId);
|
||||
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_CONSISTENCY_CHECK, TEST_HERE, 1, opts.testFile,
|
||||
StringRef(), opts.localities));
|
||||
g_network->run();
|
||||
} else if (role == CreateTemplateDatabase) {
|
||||
} else if (role == ServerRole::CreateTemplateDatabase) {
|
||||
createTemplateDatabase();
|
||||
} else if (role == NetworkTestClient) {
|
||||
} else if (role == ServerRole::NetworkTestClient) {
|
||||
f = stopAfter(networkTestClient(opts.testServersStr));
|
||||
g_network->run();
|
||||
} else if (role == NetworkTestServer) {
|
||||
} else if (role == ServerRole::NetworkTestServer) {
|
||||
f = stopAfter( networkTestServer() );
|
||||
g_network->run();
|
||||
} else if (role == Restore) {
|
||||
} else if (role == ServerRole::Restore) {
|
||||
f = stopAfter(restoreWorker(opts.connectionFile, opts.localities, opts.dataFolder));
|
||||
g_network->run();
|
||||
} else if (role == KVFileIntegrityCheck) {
|
||||
} else if (role == ServerRole::KVFileIntegrityCheck) {
|
||||
f = stopAfter(KVFileCheck(opts.kvFile, true));
|
||||
g_network->run();
|
||||
} else if (role == KVFileGenerateIOLogChecksums) {
|
||||
} else if (role == ServerRole::KVFileGenerateIOLogChecksums) {
|
||||
Optional<Void> result;
|
||||
try {
|
||||
GenerateIOLogChecksumFile(opts.kvFile);
|
||||
|
@ -1968,7 +1973,7 @@ int main(int argc, char* argv[]) {
|
|||
TraceEvent("ElapsedTime").detail("SimTime", now()-startNow).detail("RealTime", timer()-start)
|
||||
.detail("RandomUnseed", unseed);
|
||||
|
||||
if (role==Simulation){
|
||||
if (role == ServerRole::Simulation) {
|
||||
printf("Unseed: %d\n", unseed);
|
||||
printf("Elapsed: %f simsec, %f real seconds\n", now()-startNow, timer()-start);
|
||||
}
|
||||
|
@ -2005,7 +2010,7 @@ int main(int argc, char* argv[]) {
|
|||
cout << " " << i->second << " " << i->first << endl;*/
|
||||
// cout << " " << Actor::allActors[i]->getName() << endl;
|
||||
|
||||
if (role == Simulation) {
|
||||
if (role == ServerRole::Simulation) {
|
||||
unsigned long sevErrorEventsLogged = TraceEvent::CountEventsLoggedAt(SevError);
|
||||
if (sevErrorEventsLogged > 0) {
|
||||
printf("%lu SevError events logged\n", sevErrorEventsLogged);
|
||||
|
|
|
@ -118,25 +118,22 @@ struct AddingShard : NonCopyable {
|
|||
bool isTransferred() const { return phase == Waiting; }
|
||||
};
|
||||
|
||||
struct ShardInfo : ReferenceCounted<ShardInfo>, NonCopyable {
|
||||
AddingShard* adding;
|
||||
class ShardInfo : public ReferenceCounted<ShardInfo>, NonCopyable {
|
||||
ShardInfo(KeyRange keys, std::unique_ptr<AddingShard> &&adding, StorageServer* readWrite)
|
||||
: adding(std::move(adding)), readWrite(readWrite), keys(keys)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
std::unique_ptr<AddingShard> adding;
|
||||
struct StorageServer* readWrite;
|
||||
KeyRange keys;
|
||||
uint64_t changeCounter;
|
||||
|
||||
ShardInfo(KeyRange keys, AddingShard* adding, StorageServer* readWrite)
|
||||
: adding(adding), readWrite(readWrite), keys(keys)
|
||||
{
|
||||
}
|
||||
|
||||
~ShardInfo() {
|
||||
delete adding;
|
||||
}
|
||||
|
||||
static ShardInfo* newNotAssigned(KeyRange keys) { return new ShardInfo(keys, nullptr, nullptr); }
|
||||
static ShardInfo* newReadWrite(KeyRange keys, StorageServer* data) { return new ShardInfo(keys, nullptr, data); }
|
||||
static ShardInfo* newAdding(StorageServer* data, KeyRange keys) { return new ShardInfo(keys, new AddingShard(data, keys), nullptr); }
|
||||
static ShardInfo* addingSplitLeft( KeyRange keys, AddingShard* oldShard) { return new ShardInfo(keys, new AddingShard(oldShard, keys), nullptr); }
|
||||
static ShardInfo* newAdding(StorageServer* data, KeyRange keys) { return new ShardInfo(keys, std::make_unique<AddingShard>(data, keys), nullptr); }
|
||||
static ShardInfo* addingSplitLeft( KeyRange keys, AddingShard* oldShard) { return new ShardInfo(keys, std::make_unique<AddingShard>(oldShard, keys), nullptr); }
|
||||
|
||||
bool isReadable() const { return readWrite!=nullptr; }
|
||||
bool notAssigned() const { return !readWrite && !adding; }
|
||||
|
@ -974,6 +971,7 @@ ACTOR Future<Version> waitForVersionNoTooOld( StorageServer* data, Version versi
|
|||
ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
|
||||
state int64_t resultSize = 0;
|
||||
Span span("SS:getValue"_loc, { req.spanContext });
|
||||
span.addTag("key"_sr, req.key);
|
||||
|
||||
try {
|
||||
++data->counters.getValueQueries;
|
||||
|
@ -2358,9 +2356,9 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
|||
// The remaining unfetched keys [nfk,keys.end) will become a separate AddingShard with its own fetchKeys.
|
||||
shard->server->addShard( ShardInfo::addingSplitLeft( KeyRangeRef(keys.begin, nfk), shard ) );
|
||||
shard->server->addShard( ShardInfo::newAdding( data, KeyRangeRef(nfk, keys.end) ) );
|
||||
shard = data->shards.rangeContaining( keys.begin ).value()->adding;
|
||||
shard = data->shards.rangeContaining( keys.begin ).value()->adding.get();
|
||||
warningLogger = logFetchKeysWarning(shard);
|
||||
AddingShard* otherShard = data->shards.rangeContaining( nfk ).value()->adding;
|
||||
AddingShard* otherShard = data->shards.rangeContaining( nfk ).value()->adding.get();
|
||||
keys = shard->keys;
|
||||
|
||||
// Split our prior updates. The ones that apply to our new, restricted key range will go back into shard->updates,
|
||||
|
@ -2879,7 +2877,6 @@ private:
|
|||
ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
||||
{
|
||||
state double start;
|
||||
state Span span("SS:update"_loc);
|
||||
try {
|
||||
// If we are disk bound and durableVersion is very old, we need to block updates or we could run out of memory
|
||||
// This is often referred to as the storage server e-brake (emergency brake)
|
||||
|
@ -3022,6 +3019,7 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
|||
|
||||
state Version ver = invalidVersion;
|
||||
cloneCursor2->setProtocolVersion(data->logProtocol);
|
||||
state SpanID spanContext = SpanID();
|
||||
for (;cloneCursor2->hasMessage(); cloneCursor2->nextMessage()) {
|
||||
if(mutationBytes > SERVER_KNOBS->DESIRED_UPDATE_BYTES) {
|
||||
mutationBytes = 0;
|
||||
|
@ -3051,12 +3049,15 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
|||
else if (rd.protocolVersion().hasSpanContext() && SpanContextMessage::isNextIn(rd)) {
|
||||
SpanContextMessage scm;
|
||||
rd >> scm;
|
||||
span.addParent(scm.spanContext);
|
||||
spanContext = scm.spanContext;
|
||||
}
|
||||
else {
|
||||
MutationRef msg;
|
||||
rd >> msg;
|
||||
|
||||
Span span("SS:update"_loc, { spanContext });
|
||||
span.addTag("key"_sr, msg.param1);
|
||||
|
||||
if (ver != invalidVersion) { // This change belongs to a version < minVersion
|
||||
DEBUG_MUTATION("SSPeek", ver, msg).detail("ServerID", data->thisServerID);
|
||||
if (ver == 1) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue