From 1b259dbc9dce5441790aef0f9071cbb37e295ba7 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Wed, 26 Sep 2018 10:27:55 -0700 Subject: [PATCH 001/226] Initial work to remove clusters from our C API --- bindings/c/fdb_c.cpp | 41 +++++++++--- bindings/c/foundationdb/fdb_c.h | 40 +++++++----- fdbclient/IClientApi.h | 2 +- fdbclient/MultiVersionTransaction.actor.cpp | 72 +++++++++++---------- fdbclient/MultiVersionTransaction.h | 19 +++--- fdbclient/ThreadSafeTransaction.actor.cpp | 45 ++++++------- fdbclient/ThreadSafeTransaction.h | 5 +- 7 files changed, 122 insertions(+), 102 deletions(-) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index 990d7c0419..b63657cafd 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -31,17 +31,18 @@ int g_api_version = 0; * * type mapping: * FDBFuture -> ThreadSingleAssignmentVarBase - * FDBCluster -> char * FDBDatabase -> IDatabase * FDBTransaction -> ITransaction */ #define TSAVB(f) ((ThreadSingleAssignmentVarBase*)(f)) #define TSAV(T, f) ((ThreadSingleAssignmentVar*)(f)) -#define CLUSTER(c) ((char*)c) #define DB(d) ((IDatabase*)d) #define TXN(t) ((ITransaction*)t) +// Legacy (pre API version 610) +#define CLUSTER(c) ((char*)c) + /* * While we could just use the MultiVersionApi instance directly, this #define allows us to swap in any other IClientApi instance (e.g. from ThreadSafeApi) */ @@ -134,7 +135,7 @@ fdb_error_t fdb_add_network_thread_completion_hook(void (*hook)(void*), void *ho extern "C" DLLEXPORT -FDBFuture* fdb_cluster_configure_database( FDBCluster* c, int config_type, +FDBFuture* fdb_cluster_configure_database_v12( FDBCluster* c, int config_type, int config_mode, uint8_t const* db_name, int db_name_length ) { @@ -235,14 +236,14 @@ fdb_error_t fdb_future_get_key( FDBFuture* f, uint8_t const** out_key, } extern "C" DLLEXPORT -fdb_error_t fdb_future_get_cluster( FDBFuture* f, FDBCluster** out_cluster ) { +fdb_error_t fdb_future_get_cluster_v609( FDBFuture* f, FDBCluster** out_cluster ) { CATCH_AND_RETURN( *out_cluster = (FDBCluster*) ( (TSAV( char*, f )->get() ) ); ); } extern "C" DLLEXPORT -fdb_error_t fdb_future_get_database( FDBFuture* f, FDBDatabase** out_database ) { +fdb_error_t fdb_future_get_database_v609( FDBFuture* f, FDBDatabase** out_database ) { CATCH_AND_RETURN( *out_database = (FDBDatabase*) ( (TSAV( Reference, f )->get() ).extractPtr() ); ); @@ -294,7 +295,7 @@ fdb_error_t fdb_future_get_string_array( } extern "C" DLLEXPORT -FDBFuture* fdb_create_cluster( const char* cluster_file_path ) { +FDBFuture* fdb_create_cluster_v609( const char* cluster_file_path ) { char *path = NULL; if(cluster_file_path) { path = new char[strlen(cluster_file_path) + 1]; @@ -304,7 +305,7 @@ FDBFuture* fdb_create_cluster( const char* cluster_file_path ) { } extern "C" DLLEXPORT -fdb_error_t fdb_cluster_set_option( FDBCluster* c, +fdb_error_t fdb_cluster_set_option_v609( FDBCluster* c, FDBClusterOption option, uint8_t const* value, int value_length ) @@ -314,19 +315,32 @@ fdb_error_t fdb_cluster_set_option( FDBCluster* c, } extern "C" DLLEXPORT -void fdb_cluster_destroy( FDBCluster* c ) { +void fdb_cluster_destroy_v609( FDBCluster* c ) { CATCH_AND_DIE( delete[] CLUSTER(c); ); } extern "C" DLLEXPORT -FDBFuture* fdb_cluster_create_database( FDBCluster* c, uint8_t const* db_name, +FDBFuture* fdb_cluster_create_database_v609( FDBCluster* c, uint8_t const* db_name, int db_name_length ) { if(strncmp((const char*)db_name, "DB", db_name_length) != 0) { return (FDBFuture*)ThreadFuture>(invalid_database_name()).extractPtr(); } - return (FDBFuture*) API->createDatabase( c ? CLUSTER(c) : "").extractPtr(); + FDBDatabase *db; + fdb_error_t err = fdb_create_database(CLUSTER(c), &db); + if(err) { + return (FDBFuture*)ThreadFuture>(Error(err)).extractPtr(); + } + + return (FDBFuture*)ThreadFuture>(Reference(DB(db))).extractPtr(); +} + +extern "C" DLLEXPORT +fdb_error_t fdb_create_database( const char* cluster_file_path, FDBDatabase** out_database ) { + CATCH_AND_RETURN( + *out_database = (FDBDatabase*)API->createDatabase( cluster_file_path ? cluster_file_path : "" ).extractPtr(); + ); } extern "C" DLLEXPORT @@ -659,6 +673,12 @@ fdb_error_t fdb_select_api_version_impl( int runtime_version, int header_version // Versioned API changes -- descending order by version (new changes at top) // FDB_API_CHANGED( function, ver ) means there is a new implementation as of ver, and a function function_(ver-1) is the old implementation // FDB_API_REMOVED( function, ver ) means the function was removed as of ver, and function_(ver-1) is the old implementation + FDB_API_REMOVED( fdb_create_cluster, 610 ); + FDB_API_REMOVED( fdb_cluster_create_database, 610 ); + FDB_API_REMOVED( fdb_cluster_set_option, 610 ); + FDB_API_REMOVED( fdb_cluster_destroy, 610 ); + FDB_API_REMOVED( fdb_future_get_cluster, 610 ); + FDB_API_REMOVED( fdb_future_get_database, 610 ); FDB_API_CHANGED( fdb_future_get_error, 23 ); FDB_API_REMOVED( fdb_future_is_error, 23 ); FDB_API_CHANGED( fdb_future_get_keyvalue_array, 14 ); @@ -668,6 +688,7 @@ fdb_error_t fdb_select_api_version_impl( int runtime_version, int header_version FDB_API_CHANGED( fdb_transaction_get, 14 ); FDB_API_CHANGED( fdb_setup_network, 14 ); FDB_API_CHANGED( fdb_transaction_set_option, 14 ); + FDB_API_REMOVED( fdb_cluster_configure_database, 13 ); /* End versioned API changes */ return error_code_success; diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index a9e1bd20b7..0950acca7c 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -62,7 +62,6 @@ extern "C" { /* Pointers to these opaque types represent objects in the FDB API */ typedef struct FDB_future FDBFuture; - typedef struct FDB_cluster FDBCluster; typedef struct FDB_database FDBDatabase; typedef struct FDB_transaction FDBTransaction; @@ -128,12 +127,6 @@ extern "C" { fdb_future_get_key( FDBFuture* f, uint8_t const** out_key, int* out_key_length ); - DLLEXPORT WARN_UNUSED_RESULT fdb_error_t - fdb_future_get_cluster( FDBFuture* f, FDBCluster** out_cluster ); - - DLLEXPORT WARN_UNUSED_RESULT fdb_error_t - fdb_future_get_database( FDBFuture* f, FDBDatabase** out_database ); - DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_value( FDBFuture* f, fdb_bool_t *out_present, uint8_t const** out_value, @@ -148,17 +141,8 @@ extern "C" { DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_string_array(FDBFuture* f, const char*** out_strings, int* out_count); - DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_create_cluster( const char* cluster_file_path ); - - DLLEXPORT void fdb_cluster_destroy( FDBCluster* c ); - DLLEXPORT WARN_UNUSED_RESULT fdb_error_t - fdb_cluster_set_option( FDBCluster* c, FDBClusterOption option, - uint8_t const* value, int value_length ); - - DLLEXPORT WARN_UNUSED_RESULT FDBFuture* - fdb_cluster_create_database( FDBCluster* c, uint8_t const* db_name, - int db_name_length ); + fdb_create_database( const char* cluster_file_path, FDBDatabase** out_database ); DLLEXPORT void fdb_database_destroy( FDBDatabase* d ); @@ -269,6 +253,28 @@ extern "C" { /* LEGACY API VERSIONS */ +#if FDB_API_VERSION < 610 + typedef struct FDB_cluster FDBCluster; + + DLLEXPORT WARN_UNUSED_RESULT fdb_error_t + fdb_future_get_cluster( FDBFuture* f, FDBCluster** out_cluster ); + + DLLEXPORT WARN_UNUSED_RESULT fdb_error_t + fdb_future_get_database( FDBFuture* f, FDBDatabase** out_database ); + + DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_create_cluster( const char* cluster_file_path ); + + DLLEXPORT void fdb_cluster_destroy( FDBCluster* c ); + + DLLEXPORT WARN_UNUSED_RESULT fdb_error_t + fdb_cluster_set_option( FDBCluster* c, FDBClusterOption option, + uint8_t const* value, int value_length ); + + DLLEXPORT WARN_UNUSED_RESULT FDBFuture* + fdb_cluster_create_database( FDBCluster* c, uint8_t const* db_name, + int db_name_length ); +#endif + #if FDB_API_VERSION < 23 DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_error( FDBFuture* f, diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index df609d1c57..56ded563ea 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -94,7 +94,7 @@ public: virtual void runNetwork() = 0; virtual void stopNetwork() = 0; - virtual ThreadFuture> createDatabase(const char *clusterFilePath) = 0; + virtual Reference createDatabase(const char *clusterFilePath) = 0; virtual void addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParameter) = 0; }; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 9c13330d6f..0e1ad17fb6 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -251,11 +251,7 @@ void DLApi::init() { loadClientFunction(&api->setupNetwork, lib, fdbCPath, "fdb_setup_network"); loadClientFunction(&api->runNetwork, lib, fdbCPath, "fdb_run_network"); loadClientFunction(&api->stopNetwork, lib, fdbCPath, "fdb_stop_network"); - loadClientFunction(&api->createCluster, lib, fdbCPath, "fdb_create_cluster"); - - loadClientFunction(&api->clusterCreateDatabase, lib, fdbCPath, "fdb_cluster_create_database"); - loadClientFunction(&api->clusterSetOption, lib, fdbCPath, "fdb_cluster_set_option"); - loadClientFunction(&api->clusterDestroy, lib, fdbCPath, "fdb_cluster_destroy"); + loadClientFunction(&api->createDatabase, lib, fdbCPath, "fdb_create_database", headerVersion >= 610); loadClientFunction(&api->databaseCreateTransaction, lib, fdbCPath, "fdb_database_create_transaction"); loadClientFunction(&api->databaseSetOption, lib, fdbCPath, "fdb_database_set_option"); @@ -282,7 +278,6 @@ void DLApi::init() { loadClientFunction(&api->transactionCancel, lib, fdbCPath, "fdb_transaction_cancel"); loadClientFunction(&api->transactionAddConflictRange, lib, fdbCPath, "fdb_transaction_add_conflict_range"); - loadClientFunction(&api->futureGetCluster, lib, fdbCPath, "fdb_future_get_cluster"); loadClientFunction(&api->futureGetDatabase, lib, fdbCPath, "fdb_future_get_database"); loadClientFunction(&api->futureGetVersion, lib, fdbCPath, "fdb_future_get_version"); loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error"); @@ -293,6 +288,11 @@ void DLApi::init() { loadClientFunction(&api->futureSetCallback, lib, fdbCPath, "fdb_future_set_callback"); loadClientFunction(&api->futureCancel, lib, fdbCPath, "fdb_future_cancel"); loadClientFunction(&api->futureDestroy, lib, fdbCPath, "fdb_future_destroy"); + + loadClientFunction(&api->createCluster, lib, fdbCPath, "fdb_create_cluster", headerVersion < 610); + loadClientFunction(&api->clusterCreateDatabase, lib, fdbCPath, "fdb_cluster_create_database", headerVersion < 610); + loadClientFunction(&api->clusterDestroy, lib, fdbCPath, "fdb_cluster_destroy", headerVersion < 610); + loadClientFunction(&api->futureGetCluster, lib, fdbCPath, "fdb_future_get_cluster", headerVersion < 610); } void DLApi::selectApiVersion(int apiVersion) { @@ -346,7 +346,7 @@ void DLApi::stopNetwork() { } } -ThreadFuture> DLApi::createDatabase(const char *clusterFilePath) { +ThreadFuture> DLApi::createDatabase609(const char *clusterFilePath) { FdbCApi::FDBFuture *f = api->createCluster(clusterFilePath); auto clusterFuture = toThreadFuture(api, f, [](FdbCApi::FDBFuture *f, FdbCApi *api) { @@ -374,6 +374,24 @@ ThreadFuture> DLApi::createDatabase(const char *clusterFile }); } +Reference DLApi::createDatabase(const char *clusterFilePath) { + if(headerVersion >= 610) { + FdbCApi::FDBDatabase *db; + api->createDatabase(clusterFilePath, &db); + return Reference(new DLDatabase(api, db)); + } + else { + auto f = DLApi::createDatabase609(clusterFilePath); + + f.blockUntilReady(); + if(f.isError()) { + throw f.getError(); + } + + return f.get(); + } +} + void DLApi::addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParameter) { MutexHolder holder(lock); threadCompletionHooks.push_back(std::make_pair(hook, hookParameter)); @@ -634,25 +652,15 @@ void MultiVersionDatabase::Connector::connect() { connectionFuture.cancel(); } - ThreadFuture> dbFuture = client->api->createDatabase(clusterFilePath.c_str()); - connectionFuture = flatMapThreadFuture, Void>(dbFuture, [this](ErrorOr> db) { - if(db.isError()) { - return ErrorOr>(db.getError()); + candidateDatabase = client->api->createDatabase(clusterFilePath.c_str()); + tr = candidateDatabase->createTransaction(); + connectionFuture = mapThreadFuture(tr->getReadVersion(), [this](ErrorOr v) { + // If the version attempt returns an error, we regard that as a connection (except operation_cancelled) + if(v.isError() && v.getError().code() == error_code_operation_cancelled) { + return ErrorOr(v.getError()); } else { - candidateDatabase = db.get(); - tr = db.get()->createTransaction(); - auto versionFuture = mapThreadFuture(tr->getReadVersion(), [this](ErrorOr v) { - // If the version attempt returns an error, we regard that as a connection (except operation_cancelled) - if(v.isError() && v.getError().code() == error_code_operation_cancelled) { - return ErrorOr(v.getError()); - } - else { - return ErrorOr(Void()); - } - }); - - return ErrorOr>(versionFuture); + return ErrorOr(Void()); } }); @@ -1113,11 +1121,11 @@ void MultiVersionApi::addNetworkThreadCompletionHook(void (*hook)(void*), void * } } -ThreadFuture> MultiVersionApi::createDatabase(const char *clusterFilePath) { +Reference MultiVersionApi::createDatabase(const char *clusterFilePath) { lock.enter(); if(!networkSetup) { lock.leave(); - return network_not_setup(); + throw network_not_setup(); } lock.leave(); @@ -1126,21 +1134,15 @@ ThreadFuture> MultiVersionApi::createDatabase(const char *c return Reference(new MultiVersionDatabase(this, clusterFile, Reference())); } - auto databaseFuture = localClient->api->createDatabase(clusterFilePath); + auto db = localClient->api->createDatabase(clusterFilePath); if(bypassMultiClientApi) { - return databaseFuture; + return db; } else { for(auto it : externalClients) { TraceEvent("CreatingDatabaseOnExternalClient").detail("LibraryPath", it.second->libPath).detail("Failed", it.second->failed); } - return mapThreadFuture, Reference>(databaseFuture, [this, clusterFile](ErrorOr> database) { - if(database.isError()) { - return database; - } - - return ErrorOr>(Reference(new MultiVersionDatabase(this, clusterFile, database.get()))); - }); + return Reference(new MultiVersionDatabase(this, clusterFile, db)); } } diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index 99c111b024..8402ac76e9 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -55,12 +55,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted { fdb_error_t (*setupNetwork)(); fdb_error_t (*runNetwork)(); fdb_error_t (*stopNetwork)(); - FDBFuture* (*createCluster)(const char *clusterFilePath); - - //Cluster - FDBFuture* (*clusterCreateDatabase)(FDBCluster *cluster, uint8_t *dbName, int dbNameLength); - fdb_error_t (*clusterSetOption)(FDBCluster *cluster, FDBClusterOptions::Option option, uint8_t const *value, int valueLength); - void (*clusterDestroy)(FDBCluster *cluster); + fdb_error_t* (*createDatabase)(const char *clusterFilePath, FDBDatabase **db); //Database fdb_error_t (*databaseCreateTransaction)(FDBDatabase *database, FDBTransaction **tr); @@ -98,7 +93,6 @@ struct FdbCApi : public ThreadSafeReferenceCounted { uint8_t const *endKeyName, int endKeyNameLength, FDBConflictRangeTypes::Option); //Future - fdb_error_t (*futureGetCluster)(FDBFuture *f, FDBCluster **outCluster); fdb_error_t (*futureGetDatabase)(FDBFuture *f, FDBDatabase **outDb); fdb_error_t (*futureGetVersion)(FDBFuture *f, int64_t *outVersion); fdb_error_t (*futureGetError)(FDBFuture *f); @@ -109,6 +103,12 @@ struct FdbCApi : public ThreadSafeReferenceCounted { fdb_error_t (*futureSetCallback)(FDBFuture *f, FDBCallback callback, void *callback_parameter); void (*futureCancel)(FDBFuture *f); void (*futureDestroy)(FDBFuture *f); + + //Legacy Support + FDBFuture* (*createCluster)(const char *clusterFilePath); + FDBFuture* (*clusterCreateDatabase)(FDBCluster *cluster, uint8_t *dbName, int dbNameLength); + void (*clusterDestroy)(FDBCluster *cluster); + fdb_error_t (*futureGetCluster)(FDBFuture *f, FDBCluster **outCluster); }; class DLTransaction : public ITransaction, ThreadSafeReferenceCounted { @@ -185,7 +185,8 @@ public: void runNetwork(); void stopNetwork(); - ThreadFuture> createDatabase(const char *clusterFilePath); + Reference createDatabase(const char *clusterFilePath); + ThreadFuture> createDatabase609(const char *clusterFilePath); // legacy database creation void addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParameter); @@ -355,7 +356,7 @@ public: void stopNetwork(); void addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParameter); - ThreadFuture> createDatabase(const char *clusterFilePath); + Reference createDatabase(const char *clusterFilePath); static MultiVersionApi* api; Reference getLocalClient(); diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp index fcfa45c44d..ac3bae657c 100644 --- a/fdbclient/ThreadSafeTransaction.actor.cpp +++ b/fdbclient/ThreadSafeTransaction.actor.cpp @@ -30,20 +30,8 @@ // Users of ThreadSafeTransaction might share Reference between different threads as long as they don't call addRef (e.g. C API follows this). // Therefore, it is unsafe to call (explicitly or implicitly) this->addRef in any of these functions. -Reference constructThreadSafeDatabase( Database db ) { - return Reference( new ThreadSafeDatabase(db.extractPtr()) ); -} -Future> createThreadSafeDatabase( std::string connFilename, int apiVersion ) { - Database db = Database::createDatabase( connFilename, apiVersion ); - return constructThreadSafeDatabase( db ); -} -ThreadFuture> ThreadSafeDatabase::create( std::string connFilename, int apiVersion ) { - if (!g_network) return ThreadFuture>(network_not_setup()); - return onMainThread( [connFilename, apiVersion](){ return createThreadSafeDatabase( connFilename, apiVersion ); } ); -} ThreadFuture ThreadSafeDatabase::onConnected() { - DatabaseContext* db = this->db; - return onMainThread( [db]() -> Future { + return onMainThread( [this]() -> Future { db->checkDeferredError(); return db->onConnected(); } ); @@ -52,7 +40,9 @@ ThreadFuture ThreadSafeDatabase::onConnected() { ThreadFuture> ThreadSafeDatabase::createFromExistingDatabase(Database db) { return onMainThread( [db](){ db->checkDeferredError(); - return Future>(constructThreadSafeDatabase(db)); + DatabaseContext *cx = db.getPtr(); + cx->addref(); + return Future>(Reference(new ThreadSafeDatabase(cx))); }); } @@ -60,20 +50,22 @@ Reference ThreadSafeDatabase::createTransaction() { return Reference(new ThreadSafeTransaction(this)); } -Database ThreadSafeDatabase::unsafeGetDatabase() const { - db->addref(); - return Database(db); +void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional value) { + Standalone> passValue = value; + onMainThreadVoid( [this, option, passValue](){ db->setOption(option, passValue.contents()); }, &db->deferredError ); } -void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional value) { - DatabaseContext *db = this->db; - Standalone> passValue = value; - onMainThreadVoid( [db, option, passValue](){ db->setOption(option, passValue.contents()); }, &db->deferredError ); +ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) { + db = NULL; // All accesses to db happen on the main thread, so this pointer will be set by the time anybody uses it + + onMainThreadVoid([this, connFilename, apiVersion](){ + Database db = Database::createDatabase(connFilename, apiVersion); + this->db = db.extractPtr(); + }, NULL); } ThreadSafeDatabase::~ThreadSafeDatabase() { - DatabaseContext* db = this->db; - onMainThreadVoid( [db](){ db->delref(); }, NULL ); + onMainThreadVoid( [this](){ db->delref(); }, NULL ); } ThreadSafeTransaction::ThreadSafeTransaction( ThreadSafeDatabase *cx ) { @@ -84,10 +76,9 @@ ThreadSafeTransaction::ThreadSafeTransaction( ThreadSafeDatabase *cx ) { // because the reference count of the DatabaseContext is solely managed from the main thread. If cx is destructed // immediately after this call, it will defer the DatabaseContext::delref (and onMainThread preserves the order of // these operations). - DatabaseContext* db = cx->db; ReadYourWritesTransaction *tr = this->tr = ReadYourWritesTransaction::allocateOnForeignThread(); // No deferred error -- if the construction of the RYW transaction fails, we have no where to put it - onMainThreadVoid( [tr,db](){ db->addref(); new (tr) ReadYourWritesTransaction( Database(db) ); }, NULL ); + onMainThreadVoid( [tr,cx](){ cx->db->addref(); new (tr) ReadYourWritesTransaction( Database(cx->db) ); }, NULL ); } ThreadSafeTransaction::~ThreadSafeTransaction() { @@ -357,8 +348,8 @@ void ThreadSafeApi::stopNetwork() { ::stopNetwork(); } -ThreadFuture> ThreadSafeApi::createDatabase(const char *clusterFilePath) { - return ThreadSafeDatabase::create(clusterFilePath, apiVersion); +Reference ThreadSafeApi::createDatabase(const char *clusterFilePath) { + return Reference(new ThreadSafeDatabase(clusterFilePath, apiVersion)); } void ThreadSafeApi::addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParameter) { diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index b1ddc0d6e0..b8645d5b14 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -30,7 +30,6 @@ class ThreadSafeDatabase : public IDatabase, public ThreadSafeReferenceCounted { public: ~ThreadSafeDatabase(); - static ThreadFuture> create( std::string connFilename, int apiVersion=-1 ); static ThreadFuture> createFromExistingDatabase(Database cx); Reference createTransaction(); @@ -46,9 +45,9 @@ private: friend class ThreadSafeTransaction; DatabaseContext* db; public: // Internal use only + ThreadSafeDatabase( std::string connFilename, int apiVersion ); ThreadSafeDatabase( DatabaseContext* db ) : db(db) {} DatabaseContext* unsafeGetPtr() const { return db; } - Database unsafeGetDatabase() const; // This is thread unsafe (ONLY call from the network thread), but respects reference counting }; class ThreadSafeTransaction : public ITransaction, ThreadSafeReferenceCounted, NonCopyable { @@ -119,7 +118,7 @@ public: void runNetwork(); void stopNetwork(); - ThreadFuture> createDatabase(const char *clusterFilePath); + Reference createDatabase(const char *clusterFilePath); void addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParameter); From d5357d2439b16aa29a4e0ab786da3b74640b6aeb Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Tue, 2 Oct 2018 15:28:46 -0700 Subject: [PATCH 002/226] Update all of the bindings to use the new C API and deprecate or remove unneeded parts of their APIs. --- .gitignore | 4 +- bindings/c/foundationdb/fdb_c.h | 5 + bindings/flow/fdb_flow.actor.cpp | 24 ++--- bindings/flow/fdb_flow.h | 14 ++- bindings/flow/tester/Tester.actor.cpp | 8 +- bindings/go/src/_stacktester/stacktester.go | 2 +- bindings/go/src/fdb/cluster.go | 41 ++------ bindings/go/src/fdb/fdb.go | 99 +++++++++---------- bindings/java/fdbJNI.cpp | 91 ++--------------- .../main/com/apple/foundationdb/Cluster.java | 54 +++------- .../apple/foundationdb/ClusterOptions.java | 5 + .../main/com/apple/foundationdb/Database.java | 3 +- .../src/main/com/apple/foundationdb/FDB.java | 45 ++++----- .../com/apple/foundationdb/FutureCluster.java | 40 -------- .../apple/foundationdb/FutureDatabase.java | 40 -------- .../foundationdb/test/AsyncStackTester.java | 5 +- bindings/python/fdb/__init__.py | 9 +- bindings/python/fdb/impl.py | 90 ++++++----------- bindings/ruby/lib/fdb.rb | 5 + bindings/ruby/lib/fdbimpl.rb | 63 +++--------- bindings/ruby/lib/fdbimpl_v609.rb | 62 ++++++++++++ .../sphinx/source/administration.rst | 2 +- documentation/sphinx/source/api-c.rst | 65 +++--------- .../sphinx/source/api-common.rst.inc | 4 +- documentation/sphinx/source/api-python.rst | 35 +------ documentation/sphinx/source/api-ruby.rst | 28 +----- documentation/sphinx/source/data-modeling.rst | 1 - .../sphinx/source/developer-guide.rst | 1 - fdbclient/DatabaseContext.h | 8 +- fdbclient/NativeAPI.actor.cpp | 18 ++-- fdbclient/ReadYourWrites.actor.cpp | 2 +- fdbclient/ThreadSafeTransaction.actor.cpp | 15 ++- fdbclient/vexillographer/java.cs | 2 - fdbclient/vexillographer/vexillographer.cs | 3 - fdbserver/Status.actor.cpp | 0 35 files changed, 298 insertions(+), 595 deletions(-) create mode 100644 bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java delete mode 100644 bindings/java/src/main/com/apple/foundationdb/FutureCluster.java delete mode 100644 bindings/java/src/main/com/apple/foundationdb/FutureDatabase.java create mode 100644 bindings/ruby/lib/fdbimpl_v609.rb mode change 100755 => 100644 fdbserver/Status.actor.cpp diff --git a/.gitignore b/.gitignore index 13942d1317..b0ea7ba212 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,9 @@ packaging/msi/FDBInstaller.msi *.pom bindings/java/pom*.xml bindings/java/src*/main/overview.html -bindings/java/src*/main/com/apple/foundationdb/*Options.java +bindings/java/src*/main/com/apple/foundationdb/NetworkOptions.java +bindings/java/src*/main/com/apple/foundationdb/DatabaseOptions.java +bindings/java/src*/main/com/apple/foundationdb/TransactionOptions.java bindings/java/src*/main/com/apple/foundationdb/StreamingMode.java bindings/java/src*/main/com/apple/foundationdb/MutationType.java bindings/java/src*/main/com/apple/foundationdb/ConflictRangeType.java diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index 0950acca7c..8d6991ebcf 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -266,6 +266,11 @@ extern "C" { DLLEXPORT void fdb_cluster_destroy( FDBCluster* c ); + typedef enum { + // This option is only a placeholder for C compatibility and should not be used + FDB_CLUSTER_OPTION_DUMMY_DO_NOT_USE=-1 + } FDBClusterOption; + DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_cluster_set_option( FDBCluster* c, FDBClusterOption option, uint8_t const* value, int value_length ); diff --git a/bindings/flow/fdb_flow.actor.cpp b/bindings/flow/fdb_flow.actor.cpp index 630bd2bf19..b011dfa1c1 100644 --- a/bindings/flow/fdb_flow.actor.cpp +++ b/bindings/flow/fdb_flow.actor.cpp @@ -34,8 +34,7 @@ THREAD_FUNC networkThread(void* fdb) { ACTOR Future _test() { API *fdb = FDB::API::selectAPIVersion(600); - auto c = fdb->createCluster( std::string() ); - auto db = c->createDatabase(); + auto db = fdb->createDatabase(); state Reference tr( new Transaction(db) ); // tr->setVersion(1); @@ -189,13 +188,13 @@ namespace FDB { } Reference API::createCluster( std::string const& connFilename ) { - CFuture f( fdb_create_cluster( connFilename.c_str() ) ); - f.blockUntilReady(); + return Reference(new Cluster(connFilename)); + } - FDBCluster* c; - throw_on_error( fdb_future_get_cluster( f.f, &c ) ); - - return Reference( new Cluster(c) ); + Reference API::createDatabase(std::string const& connFilename) { + FDBDatabase *db; + throw_on_error(fdb_create_database(connFilename.c_str(), &db)); + return Reference(new DatabaseContext(db)); } int API::getAPIVersion() const { @@ -203,14 +202,7 @@ namespace FDB { } Reference Cluster::createDatabase() { - const char *dbName = "DB"; - CFuture f( fdb_cluster_create_database( c, (uint8_t*)dbName, (int)strlen(dbName) ) ); - f.blockUntilReady(); - - FDBDatabase* db; - throw_on_error( fdb_future_get_database( f.f, &db ) ); - - return Reference( new DatabaseContext(db) ); + return API::getInstance()->createDatabase(connFilename.c_str()); } void DatabaseContext::setDatabaseOption(FDBDatabaseOption option, Optional value) { diff --git a/bindings/flow/fdb_flow.h b/bindings/flow/fdb_flow.h index e24fa1c6db..4e10fa6b40 100644 --- a/bindings/flow/fdb_flow.h +++ b/bindings/flow/fdb_flow.h @@ -44,20 +44,21 @@ namespace FDB { private: FDBDatabase* db; explicit DatabaseContext( FDBDatabase* db ) : db(db) {} + + friend class API; }; + // Deprecated: Use createDatabase instead. class Cluster : public ReferenceCounted, NonCopyable { public: - ~Cluster() { - fdb_cluster_destroy( c ); - } + ~Cluster() {} Reference createDatabase(); private: - explicit Cluster( FDBCluster* c ) : c(c) {} - FDBCluster* c; + explicit Cluster( std::string connFilename ) : connFilename(connFilename) {} + std::string connFilename; friend class API; }; @@ -73,8 +74,11 @@ namespace FDB { void runNetwork(); void stopNetwork(); + // Deprecated: Use createDatabase instead. Reference createCluster( std::string const& connFilename ); + Reference createDatabase( std::string const& connFilename="" ); + bool evaluatePredicate(FDBErrorPredicate pred, Error const& e); int getAPIVersion() const; diff --git a/bindings/flow/tester/Tester.actor.cpp b/bindings/flow/tester/Tester.actor.cpp index 7d9fd7f983..87fc0e9a35 100644 --- a/bindings/flow/tester/Tester.actor.cpp +++ b/bindings/flow/tester/Tester.actor.cpp @@ -28,7 +28,7 @@ #include #endif -// Otherwise we have to type setupNetwork(), Cluster::createCluster(), etc. +// Otherwise we have to type setupNetwork(), FDB::open(), etc. using namespace FDB; std::map optionInfo; @@ -1714,8 +1714,7 @@ ACTOR void startTest(std::string clusterFilename, StringRef prefix, int apiVersi startThread(networkThread, fdb); // Connect to the default cluster/database, and create a transaction - auto cluster = fdb->createCluster(clusterFilename); - Reference db = cluster->createDatabase(); + auto db = fdb->createDatabase(clusterFilename); Reference data = Reference(new FlowTesterData(fdb)); wait(runTest(data, db, prefix)); @@ -1744,8 +1743,7 @@ ACTOR void _test_versionstamp() { fdb->setupNetwork(); startThread(networkThread, fdb); - auto c = fdb->createCluster(std::string()); - auto db = c->createDatabase(); + auto db = fdb->createDatabase(); state Reference tr(new Transaction(db)); state Future> ftrVersion = tr->getVersionstamp(); diff --git a/bindings/go/src/_stacktester/stacktester.go b/bindings/go/src/_stacktester/stacktester.go index c5cbd8852f..514901c43a 100644 --- a/bindings/go/src/_stacktester/stacktester.go +++ b/bindings/go/src/_stacktester/stacktester.go @@ -872,7 +872,7 @@ func main() { log.Fatal("API version not equal to value selected") } - db, e = fdb.Open(clusterFile, []byte("DB")) + db, e = fdb.OpenDatabase(clusterFile) if e != nil { log.Fatal(e) } diff --git a/bindings/go/src/fdb/cluster.go b/bindings/go/src/fdb/cluster.go index a900f65699..6098c602f7 100644 --- a/bindings/go/src/fdb/cluster.go +++ b/bindings/go/src/fdb/cluster.go @@ -28,47 +28,18 @@ package fdb */ import "C" -import ( - "runtime" -) - +// Deprecated: Use OpenDatabase or OpenDefault to obtain a database handle directly // Cluster is a handle to a FoundationDB cluster. Cluster is a lightweight // object that may be efficiently copied, and is safe for concurrent use by // multiple goroutines. -// -// It is generally preferable to use Open or OpenDefault to obtain a database -// handle directly. type Cluster struct { - *cluster + clusterFileName string } -type cluster struct { - ptr *C.FDBCluster -} - -func (c *cluster) destroy() { - C.fdb_cluster_destroy(c.ptr) -} - -// OpenDatabase returns a database handle from the FoundationDB cluster. It is -// generally preferable to use Open or OpenDefault to obtain a database handle -// directly. +// Deprecated: Use OpenDatabase or OpenDefault to obtain a database handle directly +// OpenDatabase returns a database handle from the FoundationDB cluster. // -// In the current release, the database name must be []byte("DB"). +// The database name must be []byte("DB"). func (c Cluster) OpenDatabase(dbName []byte) (Database, error) { - f := C.fdb_cluster_create_database(c.ptr, byteSliceToPtr(dbName), C.int(len(dbName))) - fdb_future_block_until_ready(f) - - var outd *C.FDBDatabase - - if err := C.fdb_future_get_database(f, &outd); err != 0 { - return Database{}, Error{int(err)} - } - - C.fdb_future_destroy(f) - - d := &database{outd} - runtime.SetFinalizer(d, (*database).destroy) - - return Database{d}, nil + return Open(c.clusterFileName, dbName) } diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index 76cf5ac67e..28b674e270 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -30,6 +30,7 @@ package fdb import "C" import ( + "bytes" "fmt" "log" "runtime" @@ -192,17 +193,10 @@ var apiVersion int var networkStarted bool var networkMutex sync.Mutex -type DatabaseId struct { - clusterFile string - dbName string -} - -var openClusters map[string]Cluster -var openDatabases map[DatabaseId]Database +var openDatabases map[string]Database func init() { - openClusters = make(map[string]Cluster) - openDatabases = make(map[DatabaseId]Database) + openDatabases = make(map[string]Database) } func startNetwork() error { @@ -222,10 +216,9 @@ func startNetwork() error { return nil } -// StartNetwork initializes the FoundationDB client networking engine. It is not -// necessary to call StartNetwork when using the fdb.Open or fdb.OpenDefault -// functions to obtain a database handle. StartNetwork must not be called more -// than once. +// Deprecated: the network is started automatically when a database is opened. +// StartNetwork initializes the FoundationDB client networking engine. StartNetwork +// must not be called more than once. func StartNetwork() error { networkMutex.Lock() defer networkMutex.Unlock() @@ -237,17 +230,15 @@ func StartNetwork() error { return startNetwork() } -// DefaultClusterFile should be passed to fdb.Open or fdb.CreateCluster to allow -// the FoundationDB C library to select the platform-appropriate default cluster -// file on the current machine. +// DefaultClusterFile should be passed to fdb.Open to allow the FoundationDB C +// library to select the platform-appropriate default cluster file on the current machine. const DefaultClusterFile string = "" -// OpenDefault returns a database handle to the default database from the -// FoundationDB cluster identified by the DefaultClusterFile on the current -// machine. The FoundationDB client networking engine will be initialized first, -// if necessary. +// OpenDefault returns a database handle to the FoundationDB cluster identified +// by the DefaultClusterFile on the current machine. The FoundationDB client +// networking engine will be initialized first, if necessary. func OpenDefault() (Database, error) { - return Open(DefaultClusterFile, []byte("DB")) + return OpenDatabase(DefaultClusterFile) } // MustOpenDefault is like OpenDefault but panics if the default database cannot @@ -260,13 +251,9 @@ func MustOpenDefault() Database { return db } -// Open returns a database handle to the named database from the FoundationDB -// cluster identified by the provided cluster file and database name. The -// FoundationDB client networking engine will be initialized first, if -// necessary. -// -// In the current release, the database name must be []byte("DB"). -func Open(clusterFile string, dbName []byte) (Database, error) { +// Open returns a database handle to the FoundationDB cluster identified +// by the provided cluster file and database name. +func OpenDatabase(clusterFile string) (Database, error) { networkMutex.Lock() defer networkMutex.Unlock() @@ -283,27 +270,36 @@ func Open(clusterFile string, dbName []byte) (Database, error) { } } - cluster, ok := openClusters[clusterFile] + db, ok := openDatabases[clusterFile] if !ok { - cluster, e = createCluster(clusterFile) + db, e = createDatabase(clusterFile) if e != nil { return Database{}, e } - openClusters[clusterFile] = cluster - } - - db, ok := openDatabases[DatabaseId{clusterFile, string(dbName)}] - if !ok { - db, e = cluster.OpenDatabase(dbName) - if e != nil { - return Database{}, e - } - openDatabases[DatabaseId{clusterFile, string(dbName)}] = db + openDatabases[clusterFile] = db } return db, nil } +func MustOpenDatabase(clusterFile string) Database { + db, err := OpenDatabase(clusterFile) + if err != nil { + panic(err) + } + return db +} + +// Deprecated: Use OpenDatabase instead +// The database name must be []byte("DB"). +func Open(clusterFile string, dbName []byte) (Database, error) { + if bytes.Compare(dbName, []byte("DB")) != 0 { + return Database{}, Error{2013} // invalid_database_name + } + return OpenDatabase(clusterFile) +} + +// Deprecated: Use MustOpenDatabase instead // MustOpen is like Open but panics if the database cannot be opened. func MustOpen(clusterFile string, dbName []byte) Database { db, err := Open(clusterFile, dbName) @@ -314,6 +310,10 @@ func MustOpen(clusterFile string, dbName []byte) Database { } func createCluster(clusterFile string) (Cluster, error) { + return Cluster{clusterFile}, nil +} + +func createDatabase(clusterFile string) (Database, error) { var cf *C.char if len(clusterFile) != 0 { @@ -321,23 +321,18 @@ func createCluster(clusterFile string) (Cluster, error) { defer C.free(unsafe.Pointer(cf)) } - f := C.fdb_create_cluster(cf) - fdb_future_block_until_ready(f) - - var outc *C.FDBCluster - - if err := C.fdb_future_get_cluster(f, &outc); err != 0 { - return Cluster{}, Error{int(err)} + var outdb *C.FDBDatabase + if err := C.fdb_create_database(cf, &outdb); err != 0 { + return Database{}, Error{int(err)} } - C.fdb_future_destroy(f) + db := &database{outdb} + runtime.SetFinalizer(db, (*database).destroy) - c := &cluster{outc} - runtime.SetFinalizer(c, (*cluster).destroy) - - return Cluster{c}, nil + return Database{db}, nil } +// Deprecated: Use OpenDatabase instead. // CreateCluster returns a cluster handle to the FoundationDB cluster identified // by the provided cluster file. func CreateCluster(clusterFile string) (Cluster, error) { diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 0e41b8d2bc..f248f042a7 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -480,38 +480,6 @@ JNIEXPORT jbyteArray JNICALL Java_com_apple_foundationdb_FutureKey_FutureKey_1ge return result; } -JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FutureCluster_FutureCluster_1get(JNIEnv *jenv, jobject, jlong future) { - if( !future ) { - throwParamNotNull(jenv); - return 0; - } - FDBFuture *f = (FDBFuture *)future; - - FDBCluster *cluster; - fdb_error_t err = fdb_future_get_cluster(f, &cluster); - if( err ) { - safeThrow( jenv, getThrowable( jenv, err ) ); - return 0; - } - return (jlong)cluster; -} - -JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FutureDatabase_FutureDatabase_1get(JNIEnv *jenv, jobject, jlong future) { - if( !future ) { - throwParamNotNull(jenv); - return 0; - } - FDBFuture *f = (FDBFuture *)future; - - FDBDatabase *database; - fdb_error_t err = fdb_future_get_database(f, &database); - if( err ) { - safeThrow( jenv, getThrowable( jenv, err ) ); - return 0; - } - return (jlong)database; -} - JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1createTransaction(JNIEnv *jenv, jobject, jlong dbPtr) { if( !dbPtr ) { throwParamNotNull(jenv); @@ -564,69 +532,28 @@ JNIEXPORT jboolean JNICALL Java_com_apple_foundationdb_FDB_Error_1predicate(JNIE return (jboolean)fdb_error_predicate(predicate, code); } -JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDB_Cluster_1create(JNIEnv *jenv, jobject, jstring clusterFileName) { +JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDB_Database_1create(JNIEnv *jenv, jobject, jstring clusterFileName) { const char* fileName = 0; if(clusterFileName != 0) { fileName = jenv->GetStringUTFChars(clusterFileName, 0); - if( jenv->ExceptionOccurred() ) + if(jenv->ExceptionOccurred()) { return 0; - } - FDBFuture *cluster = fdb_create_cluster( fileName ); - if(clusterFileName != 0) - jenv->ReleaseStringUTFChars( clusterFileName, fileName ); - return (jlong)cluster; -} - -JNIEXPORT void JNICALL Java_com_apple_foundationdb_Cluster_Cluster_1setOption(JNIEnv *jenv, jobject, jlong cPtr, jint code, jbyteArray value) { - if( !cPtr ) { - throwParamNotNull(jenv); - return; - } - FDBCluster *c = (FDBCluster *)cPtr; - uint8_t *barr = NULL; - int size = 0; - - if(value != 0) { - barr = (uint8_t *)jenv->GetByteArrayElements( value, NULL ); - if (!barr) { - throwRuntimeEx( jenv, "Error getting handle to native resources" ); - return; } - size = jenv->GetArrayLength( value ); } - fdb_error_t err = fdb_cluster_set_option( c, (FDBClusterOption)code, barr, size ); - if(value != 0) - jenv->ReleaseByteArrayElements( value, (jbyte *)barr, JNI_ABORT ); - if( err ) { - safeThrow( jenv, getThrowable( jenv, err ) ); - } -} -JNIEXPORT void JNICALL Java_com_apple_foundationdb_Cluster_Cluster_1dispose(JNIEnv *jenv, jobject, jlong cPtr) { - if( !cPtr ) { - throwParamNotNull(jenv); - return; - } - fdb_cluster_destroy( (FDBCluster *)cPtr ); -} + FDBDatabase *db; + fdb_error_t err = fdb_create_database(fileName, &db); -JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_Cluster_Cluster_1createDatabase(JNIEnv *jenv, jobject, jlong cPtr, jbyteArray dbNameBytes) { - if( !cPtr || !dbNameBytes ) { - throwParamNotNull(jenv); - return 0; + if(clusterFileName != 0) { + jenv->ReleaseStringUTFChars(clusterFileName, fileName); } - FDBCluster *cluster = (FDBCluster *)cPtr; - uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( dbNameBytes, NULL ); - if (!barr) { - throwRuntimeEx( jenv, "Error getting handle to native resources" ); + if(err) { + safeThrow(jenv, getThrowable(jenv, err)); return 0; } - int size = jenv->GetArrayLength( dbNameBytes ); - FDBFuture * f = fdb_cluster_create_database( cluster, barr, size ); - jenv->ReleaseByteArrayElements( dbNameBytes, (jbyte *)barr, JNI_ABORT ); - return (jlong)f; + return (jlong)db; } JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1setVersion(JNIEnv *jenv, jobject, jlong tPtr, jlong version) { diff --git a/bindings/java/src/main/com/apple/foundationdb/Cluster.java b/bindings/java/src/main/com/apple/foundationdb/Cluster.java index 299c65f80c..fbbcca6d4a 100644 --- a/bindings/java/src/main/com/apple/foundationdb/Cluster.java +++ b/bindings/java/src/main/com/apple/foundationdb/Cluster.java @@ -20,33 +20,29 @@ package com.apple.foundationdb; -import java.nio.charset.Charset; import java.util.concurrent.Executor; /** * The {@code Cluster} represents a connection to a physical set of cooperating machines - * running FoundationDB. A {@code Cluster} is opened with a reference to a cluster file.
+ * running FoundationDB. A {@code Cluster} is opened with a reference to a cluster file. + * + * This class is deprecated. Use {@link #FDB.open} to open a {@link #Database} directly
*
* Note: {@code Cluster} objects must be {@link #close closed} when no longer in use * in order to free any associated resources. */ +@Deprecated public class Cluster extends NativeObjectWrapper { private ClusterOptions options; private final Executor executor; + private final String clusterFile; - private static final Charset UTF8 = Charset.forName("UTF-8"); + protected Cluster(String clusterFile, Executor executor) { + super(0); - protected Cluster(long cPtr, Executor executor) { - super(cPtr); this.executor = executor; - this.options = new ClusterOptions((code, parameter) -> { - pointerReadLock.lock(); - try { - Cluster_setOption(getPtr(), code, parameter); - } finally { - pointerReadLock.unlock(); - } - }); + this.options = new ClusterOptions((code, parameter) -> {}); + this.clusterFile = clusterFile; } /** @@ -59,19 +55,8 @@ public class Cluster extends NativeObjectWrapper { return options; } - @Override - protected void finalize() throws Throwable { - try { - checkUnclosed("Cluster"); - close(); - } - finally { - super.finalize(); - } - } - /** - * Creates a connection to a specific database on an FDB cluster. + * Creates a connection to the database on an FDB cluster. * * @return a {@code Future} that will be set to a {@code Database} upon * successful connection. @@ -81,7 +66,7 @@ public class Cluster extends NativeObjectWrapper { } /** - * Creates a connection to a specific database on an FDB cluster. + * Creates a connection to the database on an FDB cluster. * * @param e the {@link Executor} to use when executing asynchronous callbacks for the database * @@ -89,22 +74,9 @@ public class Cluster extends NativeObjectWrapper { * successful connection. */ public Database openDatabase(Executor e) throws FDBException { - FutureDatabase futureDatabase; - pointerReadLock.lock(); - try { - futureDatabase = new FutureDatabase(Cluster_createDatabase(getPtr(), "DB".getBytes(UTF8)), e); - } finally { - pointerReadLock.unlock(); - } - return futureDatabase.join(); + return FDB.instance().open(clusterFile, e); } @Override - protected void closeInternal(long cPtr) { - Cluster_dispose(cPtr); - } - - private native void Cluster_dispose(long cPtr); - private native long Cluster_createDatabase(long cPtr, byte[] dbName); - private native void Cluster_setOption(long cPtr, int code, byte[] value) throws FDBException; + protected void closeInternal(long cPtr) {} } diff --git a/bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java b/bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java new file mode 100644 index 0000000000..8eee964277 --- /dev/null +++ b/bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java @@ -0,0 +1,5 @@ +package com.apple.foundationdb; /** + * A set of options that can be set on a {@link Cluster}. + * + * There are currently no options available. + */ public class ClusterOptions extends OptionsSet { public ClusterOptions( OptionConsumer consumer ) { super(consumer); } } \ No newline at end of file diff --git a/bindings/java/src/main/com/apple/foundationdb/Database.java b/bindings/java/src/main/com/apple/foundationdb/Database.java index 628e1da471..886b289876 100644 --- a/bindings/java/src/main/com/apple/foundationdb/Database.java +++ b/bindings/java/src/main/com/apple/foundationdb/Database.java @@ -26,8 +26,7 @@ import java.util.function.Function; /** * A mutable, lexicographically ordered mapping from binary keys to binary values. - * A {@code Database} is stored on a FoundationDB {@link Cluster}. - * {@link Transaction}s are used to manipulate data within a single + * {@link Transaction}s are used to manipulate data within a single * {@code Database} -- multiple, concurrent * {@code Transaction}s on a {@code Database} enforce ACID properties.
*
diff --git a/bindings/java/src/main/com/apple/foundationdb/FDB.java b/bindings/java/src/main/com/apple/foundationdb/FDB.java index 2bc931deac..d72f518cc0 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDB.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDB.java @@ -54,8 +54,8 @@ import java.util.concurrent.atomic.AtomicInteger; * to call {@link #open}. *
*

Client networking

- * The network is started either implicitly with a call to a variant of {@link #open()} or - * {@link #createCluster()}, or started explicitly with a call to {@link #startNetwork()}. + * The network is started either implicitly with a call to a variant of {@link #open()} + * or started explicitly with a call to {@link #startNetwork()}. *
* */ @@ -114,8 +114,8 @@ public class FDB { * Returns a set of options that can be set on a the FoundationDB API. Generally, * these options to the top level of the API affect the networking engine and * therefore must be set before the network engine is started. The network is started - * by calls to {@link #startNetwork()} and implicitly by calls to {@link #open()} and - * {@link #createCluster()} (and their respective variants). + * by calls to {@link #startNetwork()} and implicitly by a call to {@link #open()} and + * and its variants. * * @return a set of options affecting this instance of the FoundationDB API */ @@ -218,11 +218,14 @@ public class FDB { * If the FoundationDB network has not been started, it will be started in the course of this call * as if {@link FDB#startNetwork()} had been called. * + * This function is deprecated. Use {@link #open()} instead. + * * @return a {@code CompletableFuture} that will be set to a FoundationDB {@code Cluster}. * * @throws FDBException on errors encountered starting the FoundationDB networking engine * @throws IllegalStateException if the network had been previously stopped */ + @Deprecated public Cluster createCluster() throws IllegalStateException, FDBException { return createCluster(null); } @@ -232,6 +235,8 @@ public class FDB { * has not been started, it will be started in the course of this call as if * {@link #startNetwork()} had been called. * + * This function is deprecated. Use {@link #open(String)} instead. + * * @param clusterFilePath the * cluster file * defining the FoundationDB cluster. This can be {@code null} if the @@ -243,6 +248,7 @@ public class FDB { * @throws FDBException on errors encountered starting the FoundationDB networking engine * @throws IllegalStateException if the network had been previously stopped */ + @Deprecated public Cluster createCluster(String clusterFilePath) throws IllegalStateException, FDBException { return createCluster(clusterFilePath, DEFAULT_EXECUTOR); } @@ -253,6 +259,8 @@ public class FDB { * {@link Executor} will be used as the default for the execution of all callbacks that * are produced from using the resulting {@link Cluster}. * + * This function is deprecated. Use {@link #open(String, Executor)} instead. + * * @param clusterFilePath the * cluster file * defining the FoundationDB cluster. This can be {@code null} if the @@ -265,16 +273,10 @@ public class FDB { * @throws FDBException on errors encountered starting the FoundationDB networking engine * @throws IllegalStateException if the network had been previously stopped */ + @Deprecated public Cluster createCluster(String clusterFilePath, Executor e) throws FDBException, IllegalStateException { - FutureCluster f; - synchronized (this) { - if (!isConnected()) { - startNetwork(); - } - f = new FutureCluster(Cluster_create(clusterFilePath), e); - } - return f.join(); + return new Cluster(clusterFilePath, e); } /** @@ -318,26 +320,21 @@ public class FDB { * @return a {@code CompletableFuture} that will be set to a FoundationDB {@link Database} */ public Database open(String clusterFilePath, Executor e) throws FDBException { - FutureCluster f; - synchronized (this) { - if (!isConnected()) { + synchronized(this) { + if(!isConnected()) { startNetwork(); } - f = new FutureCluster(Cluster_create(clusterFilePath), e); } - Cluster c = f.join(); - Database db = c.openDatabase(e); - c.close(); - return db; + return new FDBDatabase(Database_create(clusterFilePath), e); } /** * Initializes networking. Can only be called once. This version of * {@code startNetwork()} will create a new thread and execute the networking - * event loop on that thread. This method is called upon {@link Database} or - * {@link Cluster} creation by default if the network has not yet - * been started. If one wishes to control what thread the network runs on, + * event loop on that thread. This method is called upon {@link Database} + * creation by default if the network has not yet been started. If one + * wishes to control what thread the network runs on, * one should use the version of {@link #startNetwork(Executor) startNetwork()} * that takes an {@link Executor}.
*
@@ -472,5 +469,5 @@ public class FDB { private native boolean Error_predicate(int predicate, int code); - private native long Cluster_create(String clusterFileName); + private native long Database_create(String clusterFilePath) throws FDBException; } diff --git a/bindings/java/src/main/com/apple/foundationdb/FutureCluster.java b/bindings/java/src/main/com/apple/foundationdb/FutureCluster.java deleted file mode 100644 index 03f6d62056..0000000000 --- a/bindings/java/src/main/com/apple/foundationdb/FutureCluster.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * FutureCluster.java - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.apple.foundationdb; - -import java.util.concurrent.Executor; - -class FutureCluster extends NativeFuture { - private final Executor executor; - - protected FutureCluster(long cPtr, Executor executor) { - super(cPtr); - this.executor = executor; - registerMarshalCallback(executor); - } - - @Override - protected Cluster getIfDone_internal(long cPtr) throws FDBException { - return new Cluster(FutureCluster_get(cPtr), executor); - } - - private native long FutureCluster_get(long cPtr) throws FDBException; -} diff --git a/bindings/java/src/main/com/apple/foundationdb/FutureDatabase.java b/bindings/java/src/main/com/apple/foundationdb/FutureDatabase.java deleted file mode 100644 index f534a00810..0000000000 --- a/bindings/java/src/main/com/apple/foundationdb/FutureDatabase.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * FutureDatabase.java - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.apple.foundationdb; - -import java.util.concurrent.Executor; - -class FutureDatabase extends NativeFuture { - private final Executor executor; - - FutureDatabase(long cPtr, Executor executor) { - super(cPtr); - this.executor = executor; - registerMarshalCallback(executor); - } - - @Override - protected Database getIfDone_internal(long cPtr) throws FDBException { - return new FDBDatabase(FutureDatabase_get(cPtr), executor); - } - - private native long FutureDatabase_get(long cPtr) throws FDBException; -} diff --git a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java index a244b83cd8..617586fe9d 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java @@ -33,7 +33,6 @@ import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.function.Function; -import com.apple.foundationdb.Cluster; import com.apple.foundationdb.Database; import com.apple.foundationdb.FDB; import com.apple.foundationdb.FDBException; @@ -723,9 +722,7 @@ public class AsyncStackTester { throw new IllegalStateException("API version not correctly set to " + apiVersion); } //ExecutorService executor = Executors.newFixedThreadPool(2); - Cluster cl = fdb.createCluster(args.length > 2 ? args[2] : null); - - Database db = cl.openDatabase(); + Database db = fdb.open(args.length > 2 ? args[2] : null); Context c = new AsynchronousContext(db, prefix); //System.out.println("Starting test..."); diff --git a/bindings/python/fdb/__init__.py b/bindings/python/fdb/__init__.py index 6c3482723b..0742d78f3e 100644 --- a/bindings/python/fdb/__init__.py +++ b/bindings/python/fdb/__init__.py @@ -85,13 +85,10 @@ def api_version(ver): 'FDBError', 'predicates', 'Future', - 'Cluster', 'Database', 'Transaction', 'KeyValue', 'KeySelector', - 'init', - 'create_cluster', 'open', 'transactional', 'options', @@ -100,6 +97,12 @@ def api_version(ver): _add_symbols(fdb.impl, list) + if ver < 610: + globals()["init"] = getattr(fdb.impl, "init") + globals()["open"] = getattr(fdb.impl, "open_v609") + globals()["create_cluster"] = getattr(fdb.impl, "create_cluster") + globals()["Cluster"] = getattr(fdb.impl, "Cluster") + if ver > 22: import fdb.locality diff --git a/bindings/python/fdb/impl.py b/bindings/python/fdb/impl.py index 66079cf09c..f8110af5a1 100644 --- a/bindings/python/fdb/impl.py +++ b/bindings/python/fdb/impl.py @@ -51,11 +51,6 @@ class _ErrorPredicates(object): self._parent = parent -class _ClusterOptions(object): - def __init__(self, cluster): - self._parent = weakref.proxy(cluster) - - class _DatabaseOptions(object): def __init__(self, db): self._parent = weakref.proxy(db) @@ -158,7 +153,7 @@ def fill_operations(): add_operation("bit_" + fname, v) -for scope in ['ClusterOption', 'DatabaseOption', 'TransactionOption', 'NetworkOption']: +for scope in ['DatabaseOption', 'TransactionOption', 'NetworkOption']: fill_options(scope) fill_options('ErrorPredicate', True) @@ -600,11 +595,6 @@ class Future(_FDBBase): def block_until_ready(self): self.capi.fdb_future_block_until_ready(self.fpointer) - # Depending on the event_model, block_until_ready may be remapped to do something asynchronous or - # just fail. really_block_until_ready() is always fdb_future_block_until_ready() and is used e.g. - # for database and cluster futures that should always be available very quickly - really_block_until_ready = block_until_ready - def on_ready(self, callback): def cb_and_delref(ignore): _unpin_callback(cbfunc[0]) @@ -878,7 +868,7 @@ class FormerFuture(_FDBBase): pass -class Database(FormerFuture): +class Database(_FDBBase): def __init__(self, dpointer): self.dpointer = dpointer self.options = _DatabaseOptions(self) @@ -1097,33 +1087,27 @@ class Database(FormerFuture): fill_operations() -class Cluster(FormerFuture): - def __init__(self, cpointer): - self.cpointer = cpointer - self.options = _ClusterOptions(self) - - def __del__(self): - # print('Destroying cluster 0x%x' % self.cpointer) - self.capi.fdb_cluster_destroy(self.cpointer) +class Cluster(_FDBBase): + def __init__(self, cluster_file): + self.cluster_file = cluster_file def open_database(self, name): - name = paramToBytes(name) - f = Future(self.capi.fdb_cluster_create_database(self.cpointer, name, len(name))) - f.really_block_until_ready() - dpointer = ctypes.c_void_p() - self.capi.fdb_future_get_database(f.fpointer, ctypes.byref(dpointer)) - return Database(dpointer) + if name != b'DB': + raise FDBError(2013) # invalid_database_name + + return create_database(self.cluster_file) def _set_option(self, option, param, length): - self.capi.fdb_cluster_set_option(self.cpointer, option, param, length) + pass +def create_database(cluster_file=None): + pointer = ctypes.c_void_p() + _FDBBase.capi.fdb_create_database(optionalParamToBytes(cluster_file)[0], ctypes.byref(pointer)) + return Database(pointer) + def create_cluster(cluster_file=None): - f = Future(_FDBBase.capi.fdb_create_cluster(optionalParamToBytes(cluster_file)[0])) - cpointer = ctypes.c_void_p() - f.really_block_until_ready() - _FDBBase.capi.fdb_future_get_cluster(f.fpointer, ctypes.byref(cpointer)) - return Cluster(cpointer) + return Cluster(cluster_file) class KeySelector(object): @@ -1363,14 +1347,6 @@ _capi.fdb_future_get_key.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POIN _capi.fdb_future_get_key.restype = ctypes.c_int _capi.fdb_future_get_key.errcheck = check_error_code -_capi.fdb_future_get_cluster.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_void_p)] -_capi.fdb_future_get_cluster.restype = ctypes.c_int -_capi.fdb_future_get_cluster.errcheck = check_error_code - -_capi.fdb_future_get_database.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_void_p)] -_capi.fdb_future_get_database.restype = ctypes.c_int -_capi.fdb_future_get_database.errcheck = check_error_code - _capi.fdb_future_get_value.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)), ctypes.POINTER(ctypes.c_int)] _capi.fdb_future_get_value.restype = ctypes.c_int @@ -1385,18 +1361,9 @@ _capi.fdb_future_get_string_array.argtypes = [ctypes.c_void_p, ctypes.POINTER(ct _capi.fdb_future_get_string_array.restype = int _capi.fdb_future_get_string_array.errcheck = check_error_code -_capi.fdb_create_cluster.argtypes = [ctypes.c_char_p] -_capi.fdb_create_cluster.restype = ctypes.c_void_p - -_capi.fdb_cluster_destroy.argtypes = [ctypes.c_void_p] -_capi.fdb_cluster_destroy.restype = None - -_capi.fdb_cluster_create_database.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_cluster_create_database.restype = ctypes.c_void_p - -_capi.fdb_cluster_set_option.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_cluster_set_option.restype = ctypes.c_int -_capi.fdb_cluster_set_option.errcheck = check_error_code +_capi.fdb_create_database.argtypes = [ctypes.c_char_p, ctypes.POINTER(ctypes.c_void_p)] +_capi.fdb_create_database.restype = ctypes.c_int +_capi.fdb_create_database.errcheck = check_error_code _capi.fdb_database_destroy.argtypes = [ctypes.c_void_p] _capi.fdb_database_destroy.restype = None @@ -1655,13 +1622,12 @@ def init_v13(local_address, event_model=None): return init(event_model) -open_clusters = {} open_databases = {} cacheLock = threading.Lock() -def open(cluster_file=None, database_name=b'DB', event_model=None): +def open(cluster_file=None, event_model=None): """Opens the given database (or the default database of the cluster indicated by the fdb.cluster file in a platform-specific location, if no cluster_file or database_name is provided). Initializes the FDB interface as required.""" @@ -1671,17 +1637,21 @@ def open(cluster_file=None, database_name=b'DB', event_model=None): init(event_model=event_model) with cacheLock: - if cluster_file not in open_clusters: - open_clusters[cluster_file] = create_cluster(cluster_file) + if cluster_file not in open_databases: + open_databases[cluster_file] = create_database(cluster_file) - if (cluster_file, database_name) not in open_databases: - open_databases[(cluster_file, database_name)] = open_clusters[cluster_file].open_database(database_name) + return open_databases[(cluster_file)] + - return open_databases[(cluster_file, database_name)] +def open_v609(cluster_file=None, database_name=b'DB', event_model=None): + if database_name != b'DB': + raise FDBError(2013) # invalid_database_name + + return open(cluster_file, event_model) def open_v13(cluster_id_path, database_name, local_address=None, event_model=None): - return open(cluster_id_path, database_name, event_model) + return open_v609(cluster_id_path, database_name, event_model) import atexit diff --git a/bindings/ruby/lib/fdb.rb b/bindings/ruby/lib/fdb.rb index 7a6dd4edca..08aab30080 100644 --- a/bindings/ruby/lib/fdb.rb +++ b/bindings/ruby/lib/fdb.rb @@ -72,6 +72,11 @@ module FDB require_relative 'fdbtuple' require_relative 'fdbdirectory' + + if version < 610 + require_relative 'fdbimpl_v609' + end + if version > 22 require_relative 'fdblocality' end diff --git a/bindings/ruby/lib/fdbimpl.rb b/bindings/ruby/lib/fdbimpl.rb index 565bc915b9..ba4f8c0b89 100644 --- a/bindings/ruby/lib/fdbimpl.rb +++ b/bindings/ruby/lib/fdbimpl.rb @@ -83,17 +83,12 @@ module FDB attach_function :fdb_future_get_error, [ :pointer ], :fdb_error attach_function :fdb_future_get_version, [ :pointer, :pointer ], :fdb_error attach_function :fdb_future_get_key, [ :pointer, :pointer, :pointer ], :fdb_error - attach_function :fdb_future_get_cluster, [ :pointer, :pointer ], :fdb_error - attach_function :fdb_future_get_database, [ :pointer, :pointer ], :fdb_error attach_function :fdb_future_get_value, [ :pointer, :pointer, :pointer, :pointer ], :fdb_error attach_function :fdb_future_get_keyvalue_array, [ :pointer, :pointer, :pointer, :pointer ], :fdb_error attach_function :fdb_future_get_string_array, [ :pointer, :pointer, :pointer ], :fdb_error - attach_function :fdb_create_cluster, [ :string ], :pointer - attach_function :fdb_cluster_destroy, [ :pointer ], :void - attach_function :fdb_cluster_set_option, [ :pointer, :int, :pointer, :int ], :fdb_error + attach_function :fdb_create_database, [ :string, :pointer ], :fdb_error - attach_function :fdb_cluster_create_database, [ :pointer, :pointer, :int ], :pointer attach_function :fdb_database_destroy, [ :pointer ], :void attach_function :fdb_database_set_option, [ :pointer, :int, :pointer, :int ], :fdb_error @@ -156,7 +151,7 @@ module FDB @@ffi_callbacks end - [ "Network", "Cluster", "Database", "Transaction" ].each do |scope| + [ "Network", "Database", "Transaction" ].each do |scope| klass = FDB.const_set("#{scope}Options", Class.new) klass.class_eval do define_method(:initialize) do |setfunc| @@ -242,6 +237,10 @@ module FDB nil end + class << self + private :init + end + def self.stop() FDBC.check_error FDBC.fdb_stop_network end @@ -254,11 +253,10 @@ module FDB end end - @@open_clusters = {} @@open_databases = {} @@cache_lock = Mutex.new - def self.open( cluster_file = nil, database_name = "DB" ) + def self.open( cluster_file = nil ) @@network_thread_monitor.synchronize do if ! @@network_thread init @@ -266,15 +264,13 @@ module FDB end @@cache_lock.synchronize do - if ! @@open_clusters.has_key? cluster_file - @@open_clusters[cluster_file] = create_cluster( cluster_file ) + if ! @@open_databases.has_key? [cluster_file] + dpointer = FFI::MemoryPointer.new :pointer + FDBC.check_error FDBC.fdb_create_database(cluster_file, dpointer) + @@open_databases[cluster_file] = Database.new dpointer.get_pointer(0) end - if ! @@open_databases.has_key? [cluster_file, database_name] - @@open_databases[[cluster_file, database_name]] = @@open_clusters[cluster_file].open_database(database_name) - end - - @@open_databases[[cluster_file, database_name]] + @@open_databases[cluster_file] end end @@ -503,41 +499,6 @@ module FDB end end - def self.create_cluster(cluster=nil) - f = FDBC.fdb_create_cluster(cluster) - cpointer = FFI::MemoryPointer.new :pointer - FDBC.check_error FDBC.fdb_future_block_until_ready(f) - FDBC.check_error FDBC.fdb_future_get_cluster(f, cpointer) - Cluster.new cpointer.get_pointer(0) - end - - class Cluster < FormerFuture - attr_reader :options - - def self.finalize(ptr) - proc do - # puts "Destroying cluster #{ptr}" - FDBC.fdb_cluster_destroy(ptr) - end - end - - def initialize(cpointer) - @cpointer = cpointer - @options = ClusterOptions.new lambda { |code, param| - FDBC.check_error FDBC.fdb_cluster_set_option(cpointer, code, param, param.nil? ? 0 : param.bytesize) - } - ObjectSpace.define_finalizer(self, self.class.finalize(@cpointer)) - end - - def open_database(name="DB") - f = FDBC.fdb_cluster_create_database(@cpointer, name, name.bytesize) - dpointer = FFI::MemoryPointer.new :pointer - FDBC.check_error FDBC.fdb_future_block_until_ready(f) - FDBC.check_error FDBC.fdb_future_get_database(f, dpointer) - Database.new dpointer.get_pointer(0) - end - end - class Database < FormerFuture attr_reader :options diff --git a/bindings/ruby/lib/fdbimpl_v609.rb b/bindings/ruby/lib/fdbimpl_v609.rb new file mode 100644 index 0000000000..4c0d6b1dd0 --- /dev/null +++ b/bindings/ruby/lib/fdbimpl_v609.rb @@ -0,0 +1,62 @@ +#encoding: BINARY + +# +# fdbimpl.rb +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# FoundationDB Ruby API + +# Documentation for this API can be found at +# https://apple.github.io/foundationdb/api-ruby.html + +module FDB + class << self + alias_method :open_impl, :open + def open( cluster_file = nil, database_name = "DB" ) + if database_name != "DB" + raise Error.new(2013) # invalid_database_name + end + + open_impl(cluster_file) + end + + def create_cluster(cluster_file_path=nil) + Cluster.new cluster_file_path + end + + public :init + end + + class ClusterOptions + end + + class Cluster < FormerFuture + attr_reader :options + + def initialize(cluster_file_path) + @cluster_file_path = cluster_file_path + @options = ClusterOptions.new + end + + def open_database(name="DB") + FDB.open(@cluster_file_path, name) + end + end + +end diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 56674dc099..a78379ff8f 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -82,7 +82,7 @@ Specifying the cluster file All FoundationDB components can be configured to use a specified cluster file: * The ``fdbcli`` tool allows a cluster file to be passed on the command line using the ``-C`` option. -* The :doc:`client APIs ` allow a cluster file to be passed when connecting to a cluster, usually via ``open()`` or ``create_cluster()``. +* The :doc:`client APIs ` allow a cluster file to be passed when connecting to a cluster, usually via ``open()``. * A FoundationDB server or ``backup-agent`` allow a cluster file to be specified in :ref:`foundationdb.conf `. In addition, FoundationDB allows you to use the environment variable ``FDB_CLUSTER_FILE`` to specify a cluster file. This approach is helpful if you operate or access more than one cluster. diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index 741f1ded53..e79d2f9567 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -13,7 +13,6 @@ .. |reset-func-name| replace:: :func:`reset ` .. |reset-func| replace:: :func:`fdb_transaction_reset()` .. |cancel-func| replace:: :func:`fdb_transaction_cancel()` -.. |init-func| replace:: FIXME .. |open-func| replace:: FIXME .. |set-cluster-file-func| replace:: FIXME .. |set-local-address-func| replace:: FIXME @@ -292,22 +291,6 @@ See :ref:`developer-guide-programming-with-futures` for further (language-indepe |future-memory-mine| -.. function:: fdb_error_t fdb_future_get_cluster(FDBFuture* future, FDBCluster** out_cluster) - - Extracts a value of type :type:`FDBCluster*` from an :type:`FDBFuture` into a caller-provided variable. |future-warning| - - |future-get-return1| |future-get-return2|. - - |future-memory-yours1| :type:`FDBCluster` |future-memory-yours2| :func:`fdb_cluster_destroy()` |future-memory-yours3| - -.. function:: fdb_error_t fdb_future_get_database(FDBFuture* future, FDBDatabase** out_database) - - Extracts a value of type :type:`FDBDatabase*` from an :type:`FDBFuture` into a caller-provided variable. |future-warning| - - |future-get-return1| |future-get-return2|. - - |future-memory-yours1| :type:`FDBDatabase` |future-memory-yours2| ``fdb_database_destroy(*out_database)`` |future-memory-yours3| - .. function:: fdb_error_t fdb_future_get_value(FDBFuture* future, fdb_bool_t* out_present, uint8_t const** out_value, int* out_value_length) Extracts a database value from an :type:`FDBFuture` into caller-provided variables. |future-warning| @@ -379,42 +362,6 @@ See :ref:`developer-guide-programming-with-futures` for further (language-indepe :data:`value_length` The length of the value pointed to by :data:`value`. -Cluster -======= - -.. type:: FDBCluster - - An opaque type that represents a Cluster in the FoundationDB C API. - -.. function:: FDBFuture* fdb_create_cluster(const char* cluster_file_path) - - |future-return0| an :type:`FDBCluster` object. |future-return1| call :func:`fdb_future_get_cluster()` to extract the :type:`FDBCluster` object, |future-return2| - - :data:`cluster_file_path` - A NULL-terminated string giving a local path of a :ref:`cluster file ` (often called 'fdb.cluster') which contains connection information for the FoundationDB cluster. If cluster_file_path is NULL or an empty string, then a :ref:`default cluster file ` will be used. - -.. function:: void fdb_cluster_destroy(FDBCluster* cluster) - - Destroys an :type:`FDBCluster` object. It must be called exactly once for each successful call to :func:`fdb_future_get_cluster()`. This function only destroys a handle to the cluster -- your cluster will be fine! - -.. function:: fdb_error_t fdb_cluster_set_option(FDBCluster* cluster, FDBClusterOption option, uint8_t const* value, int value_length) - - Called to set an option on an :type:`FDBCluster`. |option-parameter| :func:`fdb_cluster_set_option()` returns. - -.. type:: FDBClusterOption - - |option-doc| - -.. function:: FDBFuture* fdb_cluster_create_database(FDBCluster *cluster, uint8_t const* db_name, int db_name_length) - - |future-return0| an :type:`FDBDatabase` object. |future-return1| call :func:`fdb_future_get_database()` to extract the :type:`FDBDatabase` object, |future-return2| - - :data:`db_name` - A pointer to the name of the database to be opened. |no-null| In the current FoundationDB API, the database name *must* be "DB". - - :data:`db_name_length` - |length-of| :data:`db_name`. - Database ======== @@ -424,9 +371,19 @@ An |database-blurb1| Modifications to a database are performed via transactions. An opaque type that represents a database in the FoundationDB C API. +.. function:: fdb_error_t fdb_create_database(const char* cluster_file_path, FDBDatabase** out_database) + + Creates a new database connected the specified cluster. The caller assumes ownership of the :type:`FDBDatabase` object and must destroy it with :func:`fdb_database_destroy()`. + + :data:`cluster_file_path` + A NULL-terminated string giving a local path of a :ref:`cluster file ` (often called 'fdb.cluster') which contains connection information for the FoundationDB cluster. If cluster_file_path is NULL or an empty string, then a :ref:`default cluster file ` will be used. + + :data:`*out_database` + Set to point to the newly created :type:`FDBDatabase`. + .. function:: void fdb_database_destroy(FDBDatabase* database) - Destroys an :type:`FDBDatabase` object. It must be called exactly once for each successful call to :func:`fdb_future_get_database()`. This function only destroys a handle to the database -- your database will be fine! + Destroys an :type:`FDBDatabase` object. It must be called exactly once for each successful call to :func:`fdb_create_database()`. This function only destroys a handle to the database -- your database will be fine! .. function:: fdb_error_t fdb_database_set_option(FDBDatabase* database, FDBDatabaseOption option, uint8_t const* value, int value_length) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index cbbc036340..40325fe38a 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -234,7 +234,7 @@ .. |network-options-warning| replace:: - It is an error to set these options after the first call to |open-func| or |init-func| anywhere in your application. + It is an error to set these options after the first call to |open-func| anywhere in your application. .. |tls-options-burb| replace:: @@ -398,7 +398,7 @@ Cancels |future-type-string| and its associated asynchronous operation. If called before the future is ready, attempts to access its value will |error-raise-type| an :ref:`operation_cancelled ` |error-type|. Cancelling a future which is already ready has no effect. Note that even if a future is not ready, its associated asynchronous operation may have succesfully completed and be unable to be cancelled. .. |fdb-open-blurb| replace:: - Initializes the FoundationDB API, connects to the cluster specified by the :ref:`cluster file `, and opens the database with the specified name. This function is often called without any parameters, using only the defaults. If no cluster file is passed, FoundationDB automatically :ref:`determines a cluster file ` with which to connect to a cluster. + Initializes the FoundationDB API and connects to the cluster specified by the :ref:`cluster file `. This function is often called without any parameters, using only the defaults. If no cluster file is passed, FoundationDB automatically :ref:`determines a cluster file ` with which to connect to a cluster. .. |fdb-transactional-unknown-result-note| replace:: In some failure scenarios, it is possible that your transaction will be executed twice. See :ref:`developer-guide-unknown-results` for more information. diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index 9c1acd3fb7..cc71525808 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -14,7 +14,6 @@ .. |reset-func-name| replace:: :func:`reset ` .. |reset-func| replace:: :func:`Transaction.reset` .. |cancel-func| replace:: :func:`Transaction.cancel` -.. |init-func| replace:: :func:`fdb.init` .. |open-func| replace:: :func:`fdb.open` .. |on-error-func| replace:: :meth:`Transaction.on_error` .. |null-type| replace:: ``None`` @@ -86,33 +85,18 @@ For API changes between version 13 and |api-version| (for the purpose of porting Opening a database ================== -After importing the ``fdb`` module and selecting an API version, you probably want to open a :class:`Database`. The simplest way of doing this is using :func:`open`:: +After importing the ``fdb`` module and selecting an API version, you probably want to open a :class:`Database` using :func:`open`:: import fdb fdb.api_version(600) db = fdb.open() -.. function:: open( cluster_file=None, db_name="DB", event_model=None ) +.. function:: open( cluster_file=None, event_model=None ) |fdb-open-blurb| .. param event_model:: Can be used to select alternate :ref:`api-python-event-models` - .. note:: In this release, db_name must be "DB". - - .. note:: ``fdb.open()`` combines the effect of :func:`init`, :func:`create_cluster`, and :meth:`Cluster.open_database`. - -.. function:: init() - - Initializes the FoundationDB API, creating a thread for the FoundationDB client and initializing the client's networking engine. :func:`init()` can only be called once. If called subsequently or after :func:`open`, it will raise an ``client_invalid_operation`` error. - -.. function:: create_cluster( cluster_file=None ) - - Connects to the cluster specified by :ref:`cluster_file `, or by a :ref:`default cluster file ` if - ``cluster_file`` is None. :func:`init` must be called first. - - Returns a |future-type| :class:`Cluster` object. - .. data:: options |network-options-blurb| @@ -175,19 +159,6 @@ After importing the ``fdb`` module and selecting an API version, you probably wa |option-tls-key-bytes| -Cluster objects -=============== - -.. class:: Cluster - -.. method:: Cluster.open_database(name="DB") - - Opens a database with the given name. - - Returns a |future-type| :class:`Database` object. - - .. note:: In this release, name **must** be "DB". - .. _api-python-keys: Keys and values @@ -966,7 +937,7 @@ The following streaming modes are available: Event models ============ -By default, the FoundationDB Python API assumes that the calling program uses threads (as provided by the ``threading`` module) for concurrency. This means that blocking operations will block the current Python thread. This behavior can be changed by specifying the optional ``event_model`` parameter to the :func:`open` or :func:`init` functions. +By default, the FoundationDB Python API assumes that the calling program uses threads (as provided by the ``threading`` module) for concurrency. This means that blocking operations will block the current Python thread. This behavior can be changed by specifying the optional ``event_model`` parameter to the :func:`open` function. The following event models are available: diff --git a/documentation/sphinx/source/api-ruby.rst b/documentation/sphinx/source/api-ruby.rst index 34f777e2d4..87a12c31f4 100644 --- a/documentation/sphinx/source/api-ruby.rst +++ b/documentation/sphinx/source/api-ruby.rst @@ -12,7 +12,6 @@ .. |reset-func-name| replace:: :meth:`reset ` .. |reset-func| replace:: :meth:`Transaction.reset` .. |cancel-func| replace:: :meth:`Transaction.cancel` -.. |init-func| replace:: :func:`FDB.init` .. |open-func| replace:: :func:`FDB.open` .. |on-error-func| replace:: :meth:`Transaction.on_error` .. |null-type| replace:: ``nil`` @@ -75,28 +74,16 @@ For API changes between version 14 and |api-version| (for the purpose of porting Opening a database ================== -After requiring the ``FDB`` gem and selecting an API version, you probably want to open a :class:`Database`. The simplest way of doing this is using :func:`open`:: +After requiring the ``FDB`` gem and selecting an API version, you probably want to open a :class:`Database` using :func:`open`:: require 'fdb' FDB.api_version 600 db = FDB.open -.. function:: open( cluster_file=nil, db_name="DB" ) -> Database +.. function:: open( cluster_file=nil ) -> Database |fdb-open-blurb| - .. note:: In this release, db_name must be "DB". - - .. note:: ``fdb.open`` combines the effect of :func:`init`, :func:`create_cluster`, and :meth:`Cluster.open_database`. - -.. function:: init() -> nil - - Initializes the FoundationDB API, creating a thread for the FoundationDB client and initializing the client's networking engine. :func:`init` can only be called once. If called subsequently or after :func:`open`, it will raise a ``client_invalid_operation`` error. - -.. function:: create_cluster(cluster_file=nil) -> Cluster - - Connects to the cluster specified by :ref:`cluster_file `, or by a :ref:`default cluster file ` if ``cluster_file`` is ``nil``. - .. global:: FDB.options |network-options-blurb| @@ -160,17 +147,6 @@ After requiring the ``FDB`` gem and selecting an API version, you probably want .. method :: FDB.options.set_disable_multi_version_client_api() -> nil -Cluster objects -=============== - -.. class:: Cluster - -.. method:: Cluster.open_database(name="DB") -> Database - - Opens a database with the given name. - - .. note:: In this release, name **must** be "DB". - .. _api-ruby-keys: Keys and values diff --git a/documentation/sphinx/source/data-modeling.rst b/documentation/sphinx/source/data-modeling.rst index a12dfe8fec..92352b9a22 100644 --- a/documentation/sphinx/source/data-modeling.rst +++ b/documentation/sphinx/source/data-modeling.rst @@ -12,7 +12,6 @@ .. |get-key-func| replace:: get_key() .. |get-range-func| replace:: get_range() .. |commit-func| replace:: FIXME -.. |init-func| replace:: FIXME .. |open-func| replace:: FIXME .. |set-cluster-file-func| replace:: FIXME .. |set-local-address-func| replace:: FIXME diff --git a/documentation/sphinx/source/developer-guide.rst b/documentation/sphinx/source/developer-guide.rst index 01b01e4d14..30b8d2c542 100644 --- a/documentation/sphinx/source/developer-guide.rst +++ b/documentation/sphinx/source/developer-guide.rst @@ -12,7 +12,6 @@ .. |get-key-func| replace:: get_key() .. |get-range-func| replace:: get_range() .. |commit-func| replace:: ``commit()`` -.. |init-func| replace:: FIXME .. |open-func| replace:: FIXME .. |set-cluster-file-func| replace:: FIXME .. |set-local-address-func| replace:: FIXME diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index fd8f3dee7e..d2b16a6de0 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -78,8 +78,12 @@ public: Error deferredError; bool lockAware; + bool isError() { + return deferredError.code() != invalid_error_code; + } + void checkDeferredError() { - if( deferredError.code() != invalid_error_code ) { + if(isError()) { throw deferredError; } } @@ -94,6 +98,8 @@ public: Future clientInfoMonitor, Standalone dbId, int taskID, LocalityData const& clientLocality, bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST ); + explicit DatabaseContext( const Error &err ); + // Key DB-specific information AsyncTrigger masterProxiesChangeTrigger; Future monitorMasterProxiesInfoChange; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 6de4ad19fb..bf9c978004 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -496,6 +496,8 @@ DatabaseContext::DatabaseContext( clientStatusUpdater.actor = clientStatusUpdateActor(this); } +DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000) {} + ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference ccf, Reference> outInfo ) { try { loop { @@ -1851,8 +1853,9 @@ Transaction::Transaction( Database const& cx ) : cx(cx), info(cx->taskID), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF), committedVersion(invalidVersion), versionstampPromise(Promise>()), numErrors(0), trLogInfo(createTrLogInfoProbabilistically(cx)) { setPriority(GetReadVersionRequest::PRIORITY_DEFAULT); - if(cx->lockAware) + if(cx->lockAware) { options.lockAware = true; + } } Transaction::~Transaction() { @@ -3047,11 +3050,14 @@ Future< Standalone> > Transaction::splitStorageMetrics( KeyRan void Transaction::checkDeferredError() { cx->checkDeferredError(); } Reference Transaction::createTrLogInfoProbabilistically(const Database &cx) { - double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : cx->clientInfo->get().clientTxnInfoSampleRate; - if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && g_random->random01() < clientSamplingProbability && (!g_network->isSimulated() || !g_simulator.speedUpSimulation)) - return Reference(new TransactionLogInfo()); - else - return Reference(); + if(!cx->isError()) { + double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : cx->clientInfo->get().clientTxnInfoSampleRate; + if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && g_random->random01() < clientSamplingProbability && (!g_network->isSimulated() || !g_simulator.speedUpSimulation)) { + return Reference(new TransactionLogInfo()); + } + } + + return Reference(); } void enableClientInfoLogging() { diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index ff24a5432c..71a4a84333 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -1097,7 +1097,7 @@ public: } }; -ReadYourWritesTransaction::ReadYourWritesTransaction( Database const& cx ) : cache(&arena), writes(&arena), tr(cx), retries(0), creationTime(now()), commitStarted(false), options(tr) {} +ReadYourWritesTransaction::ReadYourWritesTransaction( Database const& cx ) : cache(&arena), writes(&arena), tr(cx), retries(0), creationTime(now()), commitStarted(false), options(tr), deferredError(cx->deferredError) {} ACTOR Future timebomb(double totalSeconds, Promise resetPromise) { if(totalSeconds == 0.0) { diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp index ac3bae657c..87b050834a 100644 --- a/fdbclient/ThreadSafeTransaction.actor.cpp +++ b/fdbclient/ThreadSafeTransaction.actor.cpp @@ -58,9 +58,18 @@ void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional< ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) { db = NULL; // All accesses to db happen on the main thread, so this pointer will be set by the time anybody uses it - onMainThreadVoid([this, connFilename, apiVersion](){ - Database db = Database::createDatabase(connFilename, apiVersion); - this->db = db.extractPtr(); + Reference connFile = Reference(new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first)); + onMainThreadVoid([this, connFile, apiVersion](){ + try { + Database db = Database::createDatabase(connFile, apiVersion); + this->db = db.extractPtr(); + } + catch(Error &e) { + this->db = new DatabaseContext(e); + } + catch(...) { + this->db = new DatabaseContext(unknown_error()); + } }, NULL); } diff --git a/fdbclient/vexillographer/java.cs b/fdbclient/vexillographer/java.cs index ba21eeec51..6dbffc7b80 100644 --- a/fdbclient/vexillographer/java.cs +++ b/fdbclient/vexillographer/java.cs @@ -85,8 +85,6 @@ namespace vexillographer { { Scope.NetworkOption, new ScopeOptions(true, "A set of options that can be set globally for the {@link FDB FoundationDB API}.") }, - { Scope.ClusterOption, new ScopeOptions(true, - "A set of options that can be set on a {@link Cluster}.") }, { Scope.DatabaseOption, new ScopeOptions(true, "A set of options that can be set on a {@link Database}.") }, { Scope.TransactionOption, new ScopeOptions(true, diff --git a/fdbclient/vexillographer/vexillographer.cs b/fdbclient/vexillographer/vexillographer.cs index d6711cc3ee..fab303c06c 100644 --- a/fdbclient/vexillographer/vexillographer.cs +++ b/fdbclient/vexillographer/vexillographer.cs @@ -30,7 +30,6 @@ namespace vexillographer public enum Scope { NetworkOption, - ClusterOption, DatabaseOption, TransactionOption, StreamingMode, @@ -172,8 +171,6 @@ namespace vexillographer { case Scope.NetworkOption: return "NET_OPTION"; - case Scope.ClusterOption: - return "CLUSTER_OPTION"; case Scope.DatabaseOption: return "DB_OPTION"; case Scope.TransactionOption: diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp old mode 100755 new mode 100644 From 7a40c9bfff9739e28c6c73ae1eb7edafb36e6170 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Wed, 3 Oct 2018 12:32:22 -0700 Subject: [PATCH 003/226] Add some documentation for bindings changes --- documentation/sphinx/source/release-notes.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index e67ece128e..7d715c6c31 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -20,6 +20,16 @@ Status Bindings -------- +* The API to create a database has been simplified across the bindings. All changes are backward compatible with previous API versions, with one exception in Java noted below. +* C: `FDBCluster` objects and related methods (`fdb_create_cluster`, `fdb_cluster_create_database`, `fdb_cluster_set_option`, `fdb_cluster_destroy`, `fdb_future_get_cluster`) have been removed. +* C: Added `fdb_create_database` that creates a new `FDBDatabase` object synchronously and removed `fdb_future_get_database`. +* Python: Removed `fdb.init`, `fdb.create_cluster`, and `fdb.Cluster`. `fdb.open` no longer accepts a `database_name` parameter. +* Java: Deprecated `FDB.createCluster` and `Cluster`. The preferred way to get a `Database` is by using `FDB.open`, which should work in both new and old API versions. +* Java: Removed `Cluster(long cPtr, Executor executor)` constructor. This is API breaking for any code that has subclassed the `Cluster` class, and is not protected by API versioning. +* Ruby: Removed `FDB.init`, `FDB.create_cluster`, and `FDB.Cluster`. `FDB.open` no longer accepts a `database_name` parameter. +* Golang: Deprecated `fdb.StartNetwork`, `fdb.Open`, `fdb.MustOpen`, and `fdb.CreateCluster` and added `fdb.OpenDatabase` and `fdb.MustOpenDatabase`. The preferred way to start the network and get a `Database` is by using `FDB.OpenDatabase` or `FDB.OpenDefault`. +* Flow: Deprecated `API::createCluster` and `Cluster` and added `API::createDatabase`. The preferred way to get a `Database` is by using `API::createDatabase`. + Other Changes ------------- From 96b9cbcfcf456b423b0b0b6a83cabd0ba7c11a7a Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Mon, 26 Nov 2018 13:07:36 -0800 Subject: [PATCH 004/226] fdb_c.cpp needs access to the legacy types, the C binding test uses the new API, and a couple javadoc links fixed. --- bindings/c/fdb_c.cpp | 1 + bindings/c/foundationdb/fdb_c.h | 14 ++++++++------ bindings/c/test/test.h | 16 +--------------- .../src/main/com/apple/foundationdb/Cluster.java | 2 +- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index fe4704039d..cb63feea70 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -19,6 +19,7 @@ */ #define FDB_API_VERSION 610 +#define FDB_INCLUDE_LEGACY_TYPES #include "fdbclient/MultiVersionTransaction.h" #include "foundationdb/fdb_c.h" diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index d96ecdad44..5c15eb6634 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -253,9 +253,16 @@ extern "C" { /* LEGACY API VERSIONS */ -#if FDB_API_VERSION < 610 +#if FDB_API_VERSION < 610 || defined FDB_INCLUDE_LEGACY_TYPES typedef struct FDB_cluster FDBCluster; + typedef enum { + // This option is only a placeholder for C compatibility and should not be used + FDB_CLUSTER_OPTION_DUMMY_DO_NOT_USE=-1 + } FDBClusterOption; +#endif + +#if FDB_API_VERSION < 610 DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_cluster( FDBFuture* f, FDBCluster** out_cluster ); @@ -266,11 +273,6 @@ extern "C" { DLLEXPORT void fdb_cluster_destroy( FDBCluster* c ); - typedef enum { - // This option is only a placeholder for C compatibility and should not be used - FDB_CLUSTER_OPTION_DUMMY_DO_NOT_USE=-1 - } FDBClusterOption; - DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_cluster_set_option( FDBCluster* c, FDBClusterOption option, uint8_t const* value, int value_length ); diff --git a/bindings/c/test/test.h b/bindings/c/test/test.h index 415cc23ccc..03051f30d6 100644 --- a/bindings/c/test/test.h +++ b/bindings/c/test/test.h @@ -236,22 +236,8 @@ FDBDatabase* openDatabase(struct ResultSet *rs, pthread_t *netThread) { checkError(fdb_setup_network(), "setup network", rs); pthread_create(netThread, NULL, &runNetwork, NULL); - FDBFuture *f = fdb_create_cluster(NULL); - checkError(fdb_future_block_until_ready(f), "block for cluster", rs); - - FDBCluster *cluster; - checkError(fdb_future_get_cluster(f, &cluster), "get cluster", rs); - - fdb_future_destroy(f); - - f = fdb_cluster_create_database(cluster, (uint8_t*)"DB", 2); - checkError(fdb_future_block_until_ready(f), "block for database", rs); - FDBDatabase *db; - checkError(fdb_future_get_database(f, &db), "get database", rs); - - fdb_future_destroy(f); - fdb_cluster_destroy(cluster); + checkError(fdb_create_database(NULL, &db), "create database", rs); return db; } diff --git a/bindings/java/src/main/com/apple/foundationdb/Cluster.java b/bindings/java/src/main/com/apple/foundationdb/Cluster.java index fbbcca6d4a..58e4b5f269 100644 --- a/bindings/java/src/main/com/apple/foundationdb/Cluster.java +++ b/bindings/java/src/main/com/apple/foundationdb/Cluster.java @@ -26,7 +26,7 @@ import java.util.concurrent.Executor; * The {@code Cluster} represents a connection to a physical set of cooperating machines * running FoundationDB. A {@code Cluster} is opened with a reference to a cluster file. * - * This class is deprecated. Use {@link #FDB.open} to open a {@link #Database} directly
+ * This class is deprecated. Use {@link FDB#open} to open a {@link Database} directly
*
* Note: {@code Cluster} objects must be {@link #close closed} when no longer in use * in order to free any associated resources. From eb2f27b8e557a036aad71bfc93ede97125eb8ab2 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 30 Nov 2018 10:46:04 -0800 Subject: [PATCH 005/226] Work in progress implementation of server-side latency tracking. The intent of this is to be able to measure the number of requests that achieve certain latency targets across the system relative to the total number of requests. --- fdbclient/MasterProxyInterface.h | 7 +- fdbclient/StorageServerInterface.h | 7 +- fdbserver/MasterProxyServer.actor.cpp | 68 +++++++--- fdbserver/Status.actor.cpp | 185 +++++++++++++++++++------- fdbserver/storageserver.actor.cpp | 38 +++++- flow/Stats.h | 53 +++++++- 6 files changed, 281 insertions(+), 77 deletions(-) diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/MasterProxyInterface.h index d04d1ec400..3145cea838 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/MasterProxyInterface.h @@ -25,6 +25,7 @@ #include "FDBTypes.h" #include "StorageServerInterface.h" #include "CommitTransaction.h" +#include "flow/Stats.h" struct MasterProxyInterface { enum { LocationAwareLoadBalance = 1 }; @@ -74,7 +75,7 @@ struct CommitID { CommitID( Version version, uint16_t txnBatchId ) : version(version), txnBatchId(txnBatchId) {} }; -struct CommitTransactionRequest { +struct CommitTransactionRequest : TimedRequest { enum { FLAG_IS_LOCK_AWARE = 0x1, FLAG_FIRST_IN_BATCH = 0x2 @@ -93,7 +94,7 @@ struct CommitTransactionRequest { template void serialize(Ar& ar) { - ar & transaction & reply & arena & flags & debugID; + ar & *(TimedRequest*)this & transaction & reply & arena & flags & debugID; } }; @@ -120,7 +121,7 @@ struct GetReadVersionReply { } }; -struct GetReadVersionRequest { +struct GetReadVersionRequest : TimedRequest { enum { PRIORITY_SYSTEM_IMMEDIATE = 15 << 24, // Highest possible priority, always executed even if writes are otherwise blocked PRIORITY_DEFAULT = 8 << 24, diff --git a/fdbclient/StorageServerInterface.h b/fdbclient/StorageServerInterface.h index 93906b7651..17042d11ff 100644 --- a/fdbclient/StorageServerInterface.h +++ b/fdbclient/StorageServerInterface.h @@ -27,6 +27,7 @@ #include "fdbrpc/QueueModel.h" #include "fdbrpc/fdbrpc.h" #include "fdbrpc/LoadBalance.actor.h" +#include "flow/Stats.h" struct StorageServerInterface { enum { @@ -107,7 +108,7 @@ struct GetValueReply : public LoadBalancedReply { } }; -struct GetValueRequest { +struct GetValueRequest : TimedRequest { Key key; Version version; Optional debugID; @@ -150,7 +151,7 @@ struct GetKeyValuesReply : public LoadBalancedReply { } }; -struct GetKeyValuesRequest { +struct GetKeyValuesRequest : TimedRequest { Arena arena; KeySelectorRef begin, end; Version version; // or latestVersion @@ -178,7 +179,7 @@ struct GetKeyReply : public LoadBalancedReply { } }; -struct GetKeyRequest { +struct GetKeyRequest : TimedRequest { Arena arena; KeySelectorRef sel; Version version; // or latestVersion diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index f31764f964..c85b409239 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -55,19 +55,33 @@ struct ProxyStats { Counter conflictRanges; Version lastCommitVersionAssigned; + LatencyBands commitLatencyBands; + LatencyBands grvLatencyBands; + Future logger; explicit ProxyStats(UID id, Version* pVersion, NotifiedVersion* pCommittedVersion, int64_t *commitBatchesMemBytesCountPtr) : cc("ProxyStats", id.toString()), txnStartIn("TxnStartIn", cc), txnStartOut("TxnStartOut", cc), txnStartBatch("TxnStartBatch", cc), txnSystemPriorityStartIn("TxnSystemPriorityStartIn", cc), txnSystemPriorityStartOut("TxnSystemPriorityStartOut", cc), txnBatchPriorityStartIn("TxnBatchPriorityStartIn", cc), txnBatchPriorityStartOut("TxnBatchPriorityStartOut", cc), txnDefaultPriorityStartIn("TxnDefaultPriorityStartIn", cc), txnDefaultPriorityStartOut("TxnDefaultPriorityStartOut", cc), txnCommitIn("TxnCommitIn", cc), txnCommitVersionAssigned("TxnCommitVersionAssigned", cc), txnCommitResolving("TxnCommitResolving", cc), txnCommitResolved("TxnCommitResolved", cc), txnCommitOut("TxnCommitOut", cc), - txnCommitOutSuccess("TxnCommitOutSuccess", cc), txnConflicts("TxnConflicts", cc), commitBatchIn("CommitBatchIn", cc), commitBatchOut("CommitBatchOut", cc), mutationBytes("MutationBytes", cc), mutations("Mutations", cc), conflictRanges("ConflictRanges", cc), lastCommitVersionAssigned(0) + txnCommitOutSuccess("TxnCommitOutSuccess", cc), txnConflicts("TxnConflicts", cc), commitBatchIn("CommitBatchIn", cc), commitBatchOut("CommitBatchOut", cc), mutationBytes("MutationBytes", cc), mutations("Mutations", cc), conflictRanges("ConflictRanges", cc), lastCommitVersionAssigned(0), commitLatencyBands("CommitLatency", cc), + grvLatencyBands("GRVLatency", cc) { specialCounter(cc, "LastAssignedCommitVersion", [this](){return this->lastCommitVersionAssigned;}); specialCounter(cc, "Version", [pVersion](){return *pVersion; }); specialCounter(cc, "CommittedVersion", [pCommittedVersion](){ return pCommittedVersion->get(); }); specialCounter(cc, "CommitBatchesMemBytesCount", [commitBatchesMemBytesCountPtr]() { return *commitBatchesMemBytesCountPtr; }); logger = traceCounters("ProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ProxyMetrics"); + + commitLatencyBands.addThreshold(0.001); + commitLatencyBands.addThreshold(0.01); + commitLatencyBands.addThreshold(0.1); + commitLatencyBands.addThreshold(1); + + grvLatencyBands.addThreshold(0.001); + grvLatencyBands.addThreshold(0.01); + grvLatencyBands.addThreshold(0.1); + grvLatencyBands.addThreshold(1); } }; @@ -458,11 +472,13 @@ ACTOR Future commitBatch( ResolutionRequestBuilder requests( self, commitVersion, prevVersion, self->version ); int conflictRangeCount = 0; + state int64_t maxTransactionBytes = 0; for (int t = 0; tdbgid).detail("Snapshot", trs[t].transaction.read_snapshot); //for(auto& m : trs[t].transaction.mutations) + maxTransactionBytes = std::max(maxTransactionBytes, trs[t].transaction.expectedSize()); // TraceEvent("MPTransactionsDump", self->dbgid).detail("Mutation", m.toString()); } self->stats.conflictRanges += conflictRangeCount; @@ -952,16 +968,21 @@ ACTOR Future commitBatch( } // Send replies to clients - for (int t = 0; t < trs.size(); t++) - { + double endTime = timer(); + for (int t = 0; t < trs.size(); t++) { if (committed[t] == ConflictBatch::TransactionCommitted && (!locked || trs[t].isLockAware())) { ASSERT_WE_THINK(commitVersion != invalidVersion); trs[t].reply.send(CommitID(commitVersion, t)); } - else if (committed[t] == ConflictBatch::TransactionTooOld) + else if (committed[t] == ConflictBatch::TransactionTooOld) { trs[t].reply.sendError(transaction_too_old()); - else + } + else { trs[t].reply.sendError(not_committed()); + } + + // TODO: filter if pipelined with large commit + self->stats.commitLatencyBands.addMeasurement(endTime - trs[t].requestTime, maxTransactionBytes > 1e6); } ++self->stats.commitBatchOut; @@ -1049,6 +1070,17 @@ ACTOR Future fetchVersions(ProxyCommitData *commitData) { } } +ACTOR Future sendGrvReplies(Future replyFuture, std::vector requests, ProxyStats *stats) { + GetReadVersionReply reply = wait(replyFuture); + double end = timer(); + for(GetReadVersionRequest request : requests) { + stats->grvLatencyBands.addMeasurement(end - request.requestTime); + request.reply.send(reply); + } + + return Void(); +} + ACTOR static Future transactionStarter( MasterProxyInterface proxy, MasterInterface master, @@ -1098,7 +1130,7 @@ ACTOR static Future transactionStarter( int defaultPriTransactionsStarted[2] = { 0, 0 }; int batchPriTransactionsStarted[2] = { 0, 0 }; - vector>> start(2); // start[0] is transactions starting with !(flags&CAUSAL_READ_RISKY), start[1] is transactions starting with flags&CAUSAL_READ_RISKY + vector> start(2); // start[0] is transactions starting with !(flags&CAUSAL_READ_RISKY), start[1] is transactions starting with flags&CAUSAL_READ_RISKY Optional debugID; double leftToStart = 0; @@ -1114,7 +1146,6 @@ ACTOR static Future transactionStarter( if (!debugID.present()) debugID = g_nondeterministic_random->randomUniqueID(); g_traceBatch.addAttach("TransactionAttachID", req.debugID.get().first(), debugID.get().first()); } - start[req.flags & 1].push_back(std::move(req.reply)); static_assert(GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY == 1, "Implementation dependent on flag value"); transactionsStarted[req.flags&1] += tc; if (req.priority() >= GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE) @@ -1124,6 +1155,7 @@ ACTOR static Future transactionStarter( else batchPriTransactionsStarted[req.flags & 1] += tc; + start[req.flags & 1].push_back(std::move(req)); static_assert(GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY == 1, "Implementation dependent on flag value"); transactionQueue.pop(); } @@ -1141,20 +1173,22 @@ ACTOR static Future transactionStarter( .detail("TransactionBudget", transactionBudget) .detail("LastLeftToStart", leftToStart);*/ - // dynamic batching - ReplyPromise GRVReply; - if (start[0].size()){ - start[0].push_back(GRVReply); // for now, base dynamic batching on the time for normal requests (not read_risky) - addActor.send(timeReply(GRVReply.getFuture(), replyTimes)); - } - transactionCount += transactionsStarted[0] + transactionsStarted[1]; transactionBudget = std::max(std::min(nTransactionsToStart - transactionsStarted[0] - transactionsStarted[1], SERVER_KNOBS->START_TRANSACTION_MAX_BUDGET_SIZE), -SERVER_KNOBS->START_TRANSACTION_MAX_BUDGET_SIZE); - if (debugID.present()) + + if (debugID.present()) { g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "MasterProxyServer.masterProxyServerCore.Broadcast"); - for (int i = 0; i readVersionReply = getLiveCommittedVersion(commitData, i, &otherProxies, debugID, transactionsStarted[i], systemTransactionsStarted[i], defaultPriTransactionsStarted[i], batchPriTransactionsStarted[i]); + addActor.send(sendGrvReplies(readVersionReply, start[i], &commitData->stats)); + + // for now, base dynamic batching on the time for normal requests (not read_risky) + if (i == 0) { + addActor.send(timeReply(readVersionReply, replyTimes)); + } } } } diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 709ad6109f..e5b769858d 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -160,39 +160,52 @@ static Optional> getWorker(std::map bands; + + for(auto itr = metrics.begin(); itr != metrics.end(); ++itr) { + bool regularMeasurement = itr->first.substr(0, regularPrefix.size()) == regularPrefix; + if(!regularMeasurement && itr->first.substr(0, filteredPrefix.size()) != filteredPrefix) { + continue; + } + + std::string band = itr->first.substr(regularMeasurement ? regularPrefix.size() : filteredPrefix.size()); + //bands[band][regularMeasurement ? "counted" : "filtered"] = StatusCounter(itr->second).getCounter(); + latency[band] = StatusCounter(itr->second).getCounter(); + } + /*for(auto itr : bands) { + latency[itr.first] = itr.second; + }*/ + obj["read_latency_bands"] = latency; + Version version = parseInt64(metrics.getValue("Version")); Version durableVersion = parseInt64(metrics.getValue("DurableVersion")); @@ -453,8 +487,11 @@ struct RolesInfo { if(e.code() != error_code_attribute_not_found) throw e; } - if (pDataLagSeconds) + + if (pDataLagSeconds) { *pDataLagSeconds = dataLagSeconds; + } + return roles.insert( std::make_pair(iface.address(), obj ))->second; } JsonBuilderObject& addRole(std::string const& role, TLogInterface& iface, TraceEventFields const& metrics, Version* pMetricVersion) { @@ -483,6 +520,38 @@ struct RolesInfo { *pMetricVersion = metricVersion; return roles.insert( std::make_pair(iface.address(), obj ))->second; } + JsonBuilderObject& addRole(std::string const& role, MasterProxyInterface& iface, TraceEventFields const& metrics) { + JsonBuilderObject obj; + obj["id"] = iface.id().shortString(); + obj["role"] = role; + try { + std::string grvPrefix = "GRVLatency"; + std::string commitPrefix = "CommitLatency"; + + JsonBuilderObject grvLatency; + JsonBuilderObject commitLatency; + + for(auto itr = metrics.begin(); itr != metrics.end(); ++itr) { + if(itr->first.substr(0, grvPrefix.size()) == grvPrefix) { + std::string band = itr->first.substr(grvPrefix.size()); + grvLatency[band] = StatusCounter(itr->second).getCounter(); + } + else if(itr->first.substr(0, commitPrefix.size()) == commitPrefix) { + std::string band = itr->first.substr(commitPrefix.size()); + commitLatency[band] = StatusCounter(itr->second).getCounter(); + } + } + + obj["grv_latency_bands"] = grvLatency; + obj["commit_latency_bands"] = commitLatency; + } catch (Error &e) { + if(e.code() != error_code_attribute_not_found) { + throw e; + } + } + + return roles.insert( std::make_pair(iface.address(), obj ))->second; + } template JsonBuilderObject& addRole(std::string const& role, InterfaceType& iface) { return addRole(iface.address(), role, iface.id()); @@ -509,6 +578,7 @@ ACTOR static Future processStatusFetcher( std::map processIssues, vector> storageServers, vector> tLogs, + vector> proxies, Database cx, Optional configuration, std::set *incomplete_reasons) { @@ -567,13 +637,10 @@ ACTOR static Future processStatusFetcher( roles.addRole("master", db->get().master); roles.addRole("cluster_controller", db->get().clusterInterface.clientInterface); - state Reference proxies = cx->getMasterProxies(); - if (proxies) { - state int proxyIndex; - for(proxyIndex = 0; proxyIndex < proxies->size(); proxyIndex++) { - roles.addRole( "proxy", proxies->getInterface(proxyIndex) ); - Void _ = wait(yield()); - } + state std::vector>::iterator proxy; + for(proxy = proxies.begin(); proxy != proxies.end(); ++proxy) { + roles.addRole( "proxy", proxy->first, proxy->second ); + Void _ = wait(yield()); } state std::vector>::iterator log; @@ -1217,10 +1284,10 @@ namespace std } ACTOR template -static Future>> getServerMetrics(vector servers, std::unordered_map address_workers, std::string suffix) { +static Future>> getServerMetrics(vector servers, std::unordered_map address_workers, std::string eventName, bool useId) { state vector>> futures; for (auto s : servers) { - futures.push_back(latestEventOnWorker(address_workers[s.address()], s.id().toString() + suffix)); + futures.push_back(latestEventOnWorker(address_workers[s.address()], (useId ? s.id().toString() + "/" + eventName : eventName))); } Void _ = wait(waitForAll(futures)); @@ -1234,13 +1301,26 @@ static Future>> getServerMetrics(vecto ACTOR static Future>> getStorageServersAndMetrics(Database cx, std::unordered_map address_workers) { vector servers = wait(timeoutError(getStorageServers(cx, true), 5.0)); - vector> results = wait(getServerMetrics(servers, address_workers, "/StorageMetrics")); + vector> results = wait(getServerMetrics(servers, address_workers, "StorageMetrics", true)); return results; } ACTOR static Future>> getTLogsAndMetrics(Reference> db, std::unordered_map address_workers) { vector servers = db->get().logSystemConfig.allPresentLogs(); - vector> results = wait(getServerMetrics(servers, address_workers, "/TLogMetrics")); + vector> results = wait(getServerMetrics(servers, address_workers, "TLogMetrics", true)); + return results; +} + +ACTOR static Future>> getProxiesAndMetrics(Database cx, std::unordered_map address_workers) { + Reference proxyInfo = cx->getMasterProxies(); + std::vector servers; + if(proxyInfo) { + for(int i = 0; i < proxyInfo->size(); ++i) { + servers.push_back(proxyInfo->getInterface(i)); + } + } + + vector> results = wait(getServerMetrics(servers, address_workers, "ProxyMetrics", false)); return results; } @@ -1782,6 +1862,7 @@ ACTOR Future clusterGetStatus( state std::map processIssues = getProcessIssuesAsMessages(workerIssues); state vector> storageServers; state vector> tLogs; + state vector> proxies; state JsonBuilderObject qos; state JsonBuilderObject data_overlay; @@ -1816,10 +1897,13 @@ ACTOR Future clusterGetStatus( // Start getting storage servers now (using system priority) concurrently. Using sys priority because having storage servers // in status output is important to give context to error messages in status that reference a storage server role ID. state std::unordered_map address_workers; - for (auto worker : workers) + for (auto worker : workers) { address_workers[worker.first.address()] = worker.first; + } + state Future>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers)); state Future>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers)); + state Future>>> proxyFuture = errorOr(getProxiesAndMetrics(cx, address_workers)); state int minReplicasRemaining = -1; std::vector> futures2; @@ -1876,16 +1960,27 @@ ACTOR Future clusterGetStatus( if (_storageServers.present()) { storageServers = _storageServers.get(); } - else + else { messages.push_back(JsonBuilder::makeMessage("storage_servers_error", "Timed out trying to retrieve storage servers.")); + } // ...also tlogs ErrorOr>> _tLogs = wait(tLogFuture); if (_tLogs.present()) { tLogs = _tLogs.get(); } - else + else { messages.push_back(JsonBuilder::makeMessage("log_servers_error", "Timed out trying to retrieve log servers.")); + } + + // ...also proxies + ErrorOr>> _proxies = wait(proxyFuture); + if (_proxies.present()) { + proxies = _proxies.get(); + } + else { + messages.push_back(JsonBuilder::makeMessage("proxies_error", "Timed out trying to retrieve proxies.")); + } } else { // Set layers status to { _valid: false, error: "configurationMissing"} @@ -1895,7 +1990,7 @@ ACTOR Future clusterGetStatus( statusObj["layers"] = layers; } - JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, latestError, traceFileOpenErrors, programStarts, processIssues, storageServers, tLogs, cx, configuration, &status_incomplete_reasons)); + JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, latestError, traceFileOpenErrors, programStarts, processIssues, storageServers, tLogs, proxies, cx, configuration, &status_incomplete_reasons)); statusObj["processes"] = processStatus; statusObj["clients"] = clientStatusFetcher(clientVersionMap, traceLogGroupMap); diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 4b903f4065..63b0787c8b 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -55,6 +55,9 @@ using std::make_pair; #define SHORT_CIRCUT_ACTUAL_STORAGE 0 +int64_t MAX_RESULT_SIZE = 1e4; +int64_t MAX_SELECTOR_OFFSET = 1e2; + struct StorageServer; class ValueOrClearToRef { public: @@ -413,6 +416,8 @@ public: Counter loops; Counter fetchWaitingMS, fetchWaitingCount, fetchExecutingMS, fetchExecutingCount; + LatencyBands readLatencyBands; + Counters(StorageServer* self) : cc("StorageServer", self->thisServerID.toString()), getKeyQueries("GetKeyQueries", cc), @@ -437,7 +442,8 @@ public: fetchWaitingMS("FetchWaitingMS", cc), fetchWaitingCount("FetchWaitingCount", cc), fetchExecutingMS("FetchExecutingMS", cc), - fetchExecutingCount("FetchExecutingCount", cc) + fetchExecutingCount("FetchExecutingCount", cc), + readLatencyBands("ReadLatency", cc) { specialCounter(cc, "LastTLogVersion", [self](){ return self->lastTLogVersion; }); specialCounter(cc, "Version", [self](){ return self->version.get(); }); @@ -459,6 +465,12 @@ public: specialCounter(cc, "KvstoreBytesFree", [self](){ return self->storage.getStorageBytes().free; }); specialCounter(cc, "KvstoreBytesAvailable", [self](){ return self->storage.getStorageBytes().available; }); specialCounter(cc, "KvstoreBytesTotal", [self](){ return self->storage.getStorageBytes().total; }); + + readLatencyBands.addThreshold(0.0001); + readLatencyBands.addThreshold(0.001); + readLatencyBands.addThreshold(0.01); + readLatencyBands.addThreshold(0.1); + readLatencyBands.addThreshold(1); } } counters; @@ -705,15 +717,16 @@ ACTOR Future waitForVersionNoTooOld( StorageServer* data, Version versi } ACTOR Future getValueQ( StorageServer* data, GetValueRequest req ) { - state double startTime = timer(); + state int64_t resultSize = 0; + try { - // Active load balancing runs at a very high priority (to obtain accurate queue lengths) - // so we need to downgrade here ++data->counters.getValueQueries; ++data->counters.allQueries; ++data->readQueueSizeMetric; data->maxQueryQueue = std::max( data->maxQueryQueue, data->counters.allQueries.getValue() - data->counters.finishedQueries.getValue()); + // Active load balancing runs at a very high priority (to obtain accurate queue lengths) + // so we need to downgrade here Void _ = wait( delay(0, TaskDefaultEndpoint) ); if( req.debugID.present() ) @@ -760,7 +773,8 @@ ACTOR Future getValueQ( StorageServer* data, GetValueRequest req ) { if (v.present()) { ++data->counters.rowsQueried; - data->counters.bytesQueried += v.get().size(); + resultSize = v.get().size(); + data->counters.bytesQueried += resultSize; } if( req.debugID.present() ) @@ -776,6 +790,7 @@ ACTOR Future getValueQ( StorageServer* data, GetValueRequest req ) { ++data->counters.finishedQueries; --data->readQueueSizeMetric; + data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > MAX_RESULT_SIZE); return Void(); }; @@ -1211,6 +1226,8 @@ ACTOR Future getKeyValues( StorageServer* data, GetKeyValuesRequest req ) // Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large selector offset prevents // all data from being read in one range read { + state int64_t resultSize = 0; + ++data->counters.getRangeQueries; ++data->counters.allQueries; ++data->readQueueSizeMetric; @@ -1299,8 +1316,9 @@ ACTOR Future getKeyValues( StorageServer* data, GetKeyValuesRequest req ) r.penalty = data->getPenalty(); req.reply.send( r ); + resultSize = req.limitBytes - remainingLimitBytes; + data->counters.bytesQueried += resultSize; data->counters.rowsQueried += r.data.size(); - data->counters.bytesQueried += req.limitBytes - remainingLimitBytes; } } catch (Error& e) { if (e.code() == error_code_internal_error || e.code() == error_code_actor_cancelled) throw; @@ -1309,11 +1327,14 @@ ACTOR Future getKeyValues( StorageServer* data, GetKeyValuesRequest req ) ++data->counters.finishedQueries; --data->readQueueSizeMetric; + data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > MAX_RESULT_SIZE || abs(req.begin.offset) > MAX_SELECTOR_OFFSET || abs(req.end.offset) > MAX_SELECTOR_OFFSET); return Void(); } ACTOR Future getKey( StorageServer* data, GetKeyRequest req ) { + state int64_t resultSize = 0; + ++data->counters.getKeyQueries; ++data->counters.allQueries; ++data->readQueueSizeMetric; @@ -1340,8 +1361,10 @@ ACTOR Future getKey( StorageServer* data, GetKeyRequest req ) { updated = firstGreaterOrEqual(k)+offset-1; // first thing on next shard OR (large offset case) keyAfter largest key retrieved in range read else updated = KeySelectorRef(k,true,0); //found + + resultSize = k.size(); + data->counters.bytesQueried += resultSize; ++data->counters.rowsQueried; - data->counters.bytesQueried += k.size(); GetKeyReply reply(updated); reply.penalty = data->getPenalty(); @@ -1355,6 +1378,7 @@ ACTOR Future getKey( StorageServer* data, GetKeyRequest req ) { ++data->counters.finishedQueries; --data->readQueueSizeMetric; + data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > MAX_RESULT_SIZE || abs(req.sel.offset > MAX_SELECTOR_OFFSET)); return Void(); } diff --git a/flow/Stats.h b/flow/Stats.h index 90e2f43432..bf19c9df8a 100644 --- a/flow/Stats.h +++ b/flow/Stats.h @@ -36,6 +36,17 @@ MyCounters() : foo("foo", cc), bar("bar", cc), baz("baz", cc) {} #include "flow.h" #include "TDMetric.actor.h" +struct TimedRequest { + double requestTime; + + TimedRequest() { + requestTime = timer(); + } + + template + void serialize(Ar& ar) {} +}; + struct ICounter { // All counters have a name and value virtual std::string const& getName() const = 0; @@ -60,7 +71,7 @@ struct CounterCollection { std::string id; }; -struct Counter : ICounter { +struct Counter : ICounter, NonCopyable { public: typedef int64_t Value; @@ -88,7 +99,7 @@ private: }; template -struct SpecialCounter : ICounter, FastAllocated> { +struct SpecialCounter : ICounter, FastAllocated>, NonCopyable { SpecialCounter(CounterCollection& collection, std::string const& name, F && f) : name(name), f(f) { collection.counters.push_back(this); collection.counters_to_remove.push_back(this); } virtual void remove() { delete this; } @@ -108,6 +119,44 @@ struct SpecialCounter : ICounter, FastAllocated> { template static void specialCounter(CounterCollection& collection, std::string const& name, F && f) { new SpecialCounter(collection, name, std::move(f)); } +class LatencyBands { +public: + LatencyBands(std::string name, CounterCollection &cc) : name(name), cc(cc) { + addThreshold(std::numeric_limits::infinity()); + } + + void addThreshold(double value) { + if(value > 0 && bands.count(value) == 0) { + bands.insert(std::make_pair(value, new Counter(format("%s%f", name.c_str(), value), cc))); + filteredBands.insert(std::make_pair(value, new Counter(format("Filtered%s%f", name.c_str(), value), cc))); + } + } + + void addMeasurement(double measurement, bool filtered=false) { + const auto &targetBands = filtered ? filteredBands : bands; + auto itr = targetBands.upper_bound(measurement); + if(itr == targetBands.end()) { + fprintf(stderr, "Can't add measurement %lf\n", measurement); + } + ASSERT(itr != targetBands.end()); + ++(*itr->second); + } + + ~LatencyBands() { + for(auto itr = bands.begin(); itr != bands.end(); ++itr) { + delete itr->second; + } + } + +private: + std::map bands; + std::map filteredBands; + + std::string name; + CounterCollection &cc; +}; + + Future traceCounters(std::string const& traceEventName, UID const& traceEventID, double const& interval, CounterCollection* const& counters, std::string const& trackLatestName = std::string()); #endif \ No newline at end of file From 0401d04380dc0569e255da0c393ce5f54ae596e1 Mon Sep 17 00:00:00 2001 From: Pieter Joost van de Sande Date: Tue, 8 Jan 2019 09:44:16 +0100 Subject: [PATCH 006/226] fdb.Key implements fmt.Stringer interface Add the ability to get a human readable string representation of a fdb.Key by satisfying the fmt.Stringer interface. This allows keys to be used with the fmt package. --- bindings/go/src/fdb/fdb.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index c73a27541f..1f6b8d2827 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -30,6 +30,7 @@ package fdb import "C" import ( + "encoding/hex" "fmt" "log" "runtime" @@ -372,6 +373,12 @@ type KeyConvertible interface { // bytes. Key implements the KeyConvertible interface. type Key []byte +// String returns human readable hexadecimal encoding of the key. +func (k Key) String() string { + return hex.EncodeToString(k) +} + + // FDBKey allows Key to (trivially) satisfy the KeyConvertible interface. func (k Key) FDBKey() Key { return k From 4722fa066700680682e36ff0316d47414d1f03cd Mon Sep 17 00:00:00 2001 From: Pieter Joost van de Sande Date: Wed, 9 Jan 2019 00:03:44 +0100 Subject: [PATCH 007/226] change fdb string representation to common format --- bindings/go/src/fdb/fdb.go | 18 ++++++++++++++++++ bindings/go/src/fdb/fdb_test.go | 19 ++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index 1f6b8d2827..b992c40b17 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -34,6 +34,7 @@ import ( "fmt" "log" "runtime" + "strings" "sync" "unsafe" ) @@ -384,6 +385,23 @@ func (k Key) FDBKey() Key { return k } +// String describes the key as a hexadecimal encoded string. +func (k Key) String() string { + var sb strings.Builder + for _, b := range k { + if b >= 32 && b < 127 && b != '\\' { + sb.WriteByte(b) + continue + } + if b == '\\' { + sb.WriteString("\\\\") + continue + } + sb.WriteString(fmt.Sprintf("\\x%x", b)) + } + return sb.String() +} + func panicToError(e *error) { if r := recover(); r != nil { fe, ok := r.(Error) diff --git a/bindings/go/src/fdb/fdb_test.go b/bindings/go/src/fdb/fdb_test.go index ee175e59b0..562ab5ff13 100644 --- a/bindings/go/src/fdb/fdb_test.go +++ b/bindings/go/src/fdb/fdb_test.go @@ -24,8 +24,9 @@ package fdb_test import ( "fmt" - "github.com/apple/foundationdb/bindings/go/src/fdb" "testing" + + "github.com/apple/foundationdb/bindings/go/src/fdb" ) func ExampleOpenDefault() { @@ -261,3 +262,19 @@ func ExampleRangeIterator() { // banana is bar // cherry is baz } + +func TestKeyToString(t *testing.T) { + cases := []struct { + key fdb.Key + expect string + }{ + {fdb.Key("plain-text"), "plain-text"}, + {fdb.Key("\xbdascii☻☺"), "\\xbdascii\\xe2\\x98\\xbb\\xe2\\x98\\xba"}, + } + + for i, c := range cases { + if s := c.key.String(); s != c.expect { + t.Errorf("got '%v', want '%v' at case %v", s, c.expect, i) + } + } +} From 696a876fd43fc3e9da133d79440f411be61dd16e Mon Sep 17 00:00:00 2001 From: Pieter Joost van de Sande Date: Wed, 9 Jan 2019 00:17:23 +0100 Subject: [PATCH 008/226] fix String duplication for fdb.Key --- bindings/go/src/fdb/fdb.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index b992c40b17..5dea6d0383 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -30,7 +30,6 @@ package fdb import "C" import ( - "encoding/hex" "fmt" "log" "runtime" @@ -374,12 +373,6 @@ type KeyConvertible interface { // bytes. Key implements the KeyConvertible interface. type Key []byte -// String returns human readable hexadecimal encoding of the key. -func (k Key) String() string { - return hex.EncodeToString(k) -} - - // FDBKey allows Key to (trivially) satisfy the KeyConvertible interface. func (k Key) FDBKey() Key { return k From 9f37e1f9b8cd7636dd69dcb4c32c2e43e5828e64 Mon Sep 17 00:00:00 2001 From: Alvin Moore Date: Wed, 9 Jan 2019 08:19:39 -0800 Subject: [PATCH 009/226] Switch from wget to curl in order to support the download scheme of file://. --- packaging/docker/Dockerfile | 10 ++++++---- packaging/docker/download_multiversion_libraries.bash | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/packaging/docker/Dockerfile b/packaging/docker/Dockerfile index b5cd01ee00..7468276c72 100644 --- a/packaging/docker/Dockerfile +++ b/packaging/docker/Dockerfile @@ -22,7 +22,7 @@ FROM ubuntu:18.04 # Install dependencies RUN apt-get update && \ - apt-get install -y wget=1.19.4-1ubuntu2 \ + apt-get install -y curl=7.58.0-2ubuntu3.5 \ dnsutils=1:9.11.3+dfsg-1ubuntu1.3 && \ rm -r /var/lib/apt/lists/* @@ -32,7 +32,9 @@ ARG FDB_VERSION ARG FDB_WEBSITE=https://www.foundationdb.org WORKDIR /var/fdb/tmp -RUN wget $FDB_WEBSITE/downloads/$FDB_VERSION/linux/fdb_$FDB_VERSION.tar.gz && \ +ADD website /mnt/website +RUN ls -l /mnt/website +RUN curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/fdb_$FDB_VERSION.tar.gz -o fdb_$FDB_VERSION.tar.gz && \ tar -xzf fdb_$FDB_VERSION.tar.gz --strip-components=1 && \ rm fdb_$FDB_VERSION.tar.gz && \ chmod u+x fdb* && \ @@ -47,7 +49,7 @@ ARG FDB_ADDITIONAL_VERSIONS="5.1.7" COPY download_multiversion_libraries.bash scripts/ -RUN wget $FDB_WEBSITE/downloads/$FDB_VERSION/linux/libfdb_c_$FDB_VERSION.so -O /usr/lib/libfdb_c.so && \ +RUN curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/libfdb_c_$FDB_VERSION.so -o /usr/lib/libfdb_c.so && \ bash scripts/download_multiversion_libraries.bash $FDB_WEBSITE $FDB_ADDITIONAL_VERSIONS # Set Up Runtime Scripts and Directoris @@ -68,4 +70,4 @@ ENV FDB_CLUSTER_FILE /var/fdb/fdb.cluster ENV FDB_NETWORKING_MODE container ENV FDB_COORDINATOR "" ENV FDB_CLUSTER_FILE_CONTENTS "" -ENV FDB_PROCESS_CLASS unset \ No newline at end of file +ENV FDB_PROCESS_CLASS unset diff --git a/packaging/docker/download_multiversion_libraries.bash b/packaging/docker/download_multiversion_libraries.bash index 4df401c6ea..1cd5770ff3 100644 --- a/packaging/docker/download_multiversion_libraries.bash +++ b/packaging/docker/download_multiversion_libraries.bash @@ -27,5 +27,5 @@ for version in $*; do origin=$website/downloads/$version/linux/libfdb_c_$version.so destination=/usr/lib/fdb/multiversion/libfdb_c_$version.so echo "Downloading $origin to $destination" - wget $origin -o $destination -done \ No newline at end of file + curl $origin -o $destination +done From b96c21210a492c0eb4d7019463cb106a140f20e1 Mon Sep 17 00:00:00 2001 From: Pieter Joost van de Sande Date: Thu, 10 Jan 2019 08:44:30 +0100 Subject: [PATCH 010/226] fix padding in key printing --- bindings/go/src/fdb/fdb.go | 2 +- bindings/go/src/fdb/fdb_test.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index 5dea6d0383..4037ac3360 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -390,7 +390,7 @@ func (k Key) String() string { sb.WriteString("\\\\") continue } - sb.WriteString(fmt.Sprintf("\\x%x", b)) + sb.WriteString(fmt.Sprintf("\\x%02x", b)) } return sb.String() } diff --git a/bindings/go/src/fdb/fdb_test.go b/bindings/go/src/fdb/fdb_test.go index 562ab5ff13..768e93c8fe 100644 --- a/bindings/go/src/fdb/fdb_test.go +++ b/bindings/go/src/fdb/fdb_test.go @@ -269,6 +269,7 @@ func TestKeyToString(t *testing.T) { expect string }{ {fdb.Key("plain-text"), "plain-text"}, + {fdb.Key([]byte{0}), "\\x00"}, {fdb.Key("\xbdascii☻☺"), "\\xbdascii\\xe2\\x98\\xbb\\xe2\\x98\\xba"}, } From ead5bb5bc6ddcca03e0d0b981e2f925e326788c7 Mon Sep 17 00:00:00 2001 From: Pieter Joost van de Sande Date: Thu, 10 Jan 2019 08:57:11 +0100 Subject: [PATCH 011/226] add Printable example to docs --- bindings/go/src/fdb/fdb.go | 9 ++++++++- bindings/go/src/fdb/fdb_test.go | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index 4037ac3360..212e712a31 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -380,8 +380,15 @@ func (k Key) FDBKey() Key { // String describes the key as a hexadecimal encoded string. func (k Key) String() string { + return Printable(k) +} + +// Printable returns a human readable version of a byte array. The bytes that correspond with +// ASCII printable characters [32-127) are passed through. Other bytes are +// replaced with \x followed by a two character zero-padded hex code for byte. +func Printable(d []byte) string { var sb strings.Builder - for _, b := range k { + for _, b := range d { if b >= 32 && b < 127 && b != '\\' { sb.WriteByte(b) continue diff --git a/bindings/go/src/fdb/fdb_test.go b/bindings/go/src/fdb/fdb_test.go index 768e93c8fe..ed9478878a 100644 --- a/bindings/go/src/fdb/fdb_test.go +++ b/bindings/go/src/fdb/fdb_test.go @@ -268,8 +268,8 @@ func TestKeyToString(t *testing.T) { key fdb.Key expect string }{ - {fdb.Key("plain-text"), "plain-text"}, {fdb.Key([]byte{0}), "\\x00"}, + {fdb.Key("plain-text"), "plain-text"}, {fdb.Key("\xbdascii☻☺"), "\\xbdascii\\xe2\\x98\\xbb\\xe2\\x98\\xba"}, } @@ -279,3 +279,8 @@ func TestKeyToString(t *testing.T) { } } } + +func ExamplePrintable() { + fmt.Println(fdb.Printable([]byte{0, 1, 2, 'a', 'b', 'c', '1', '2', '3', '!', '?', 255})) + // Output: \x00\x01\x02abc123!?\xff +} From bfa97d7ff206a38b7e39cbd962b1871f3014d962 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 10 Jan 2019 12:28:14 -0800 Subject: [PATCH 012/226] Address review comments --- bindings/c/fdb_c.cpp | 11 ------ bindings/go/src/fdb/fdb.go | 8 ++--- bindings/java/fdbJNI.cpp | 8 ++--- .../apple/foundationdb/ClusterOptions.java | 34 +++++++++++++++++-- .../main/com/apple/foundationdb/Database.java | 2 +- .../src/main/com/apple/foundationdb/FDB.java | 8 ++--- bindings/python/fdb/impl.py | 4 +-- documentation/sphinx/source/release-notes.rst | 2 +- 8 files changed, 44 insertions(+), 33 deletions(-) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index 52e055c7f2..90afdf243d 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -134,16 +134,6 @@ fdb_error_t fdb_add_network_thread_completion_hook(void (*hook)(void*), void *ho CATCH_AND_RETURN( API->addNetworkThreadCompletionHook(hook, hook_parameter); ); } - -extern "C" DLLEXPORT -FDBFuture* fdb_cluster_configure_database_v12( FDBCluster* c, int config_type, - int config_mode, uint8_t const* db_name, - int db_name_length ) -{ - // Obsolete, but needed for linker compatibility with api version 12 and below - return (FDBFuture*)ThreadFuture(client_invalid_operation()).extractPtr(); -} - extern "C" DLLEXPORT void fdb_future_cancel( FDBFuture* f ) { CATCH_AND_DIE( @@ -693,7 +683,6 @@ fdb_error_t fdb_select_api_version_impl( int runtime_version, int header_version FDB_API_CHANGED( fdb_transaction_get, 14 ); FDB_API_CHANGED( fdb_setup_network, 14 ); FDB_API_CHANGED( fdb_transaction_set_option, 14 ); - FDB_API_REMOVED( fdb_cluster_configure_database, 13 ); /* End versioned API changes */ return error_code_success; diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index 336d8a713d..b1e6f45513 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -230,7 +230,7 @@ func StartNetwork() error { return startNetwork() } -// DefaultClusterFile should be passed to fdb.Open to allow the FoundationDB C +// DefaultClusterFile should be passed to fdb.Open to allow the FoundationDB C // library to select the platform-appropriate default cluster file on the current machine. const DefaultClusterFile string = "" @@ -309,10 +309,6 @@ func MustOpen(clusterFile string, dbName []byte) Database { return db } -func createCluster(clusterFile string) (Cluster, error) { - return Cluster{clusterFile}, nil -} - func createDatabase(clusterFile string) (Database, error) { var cf *C.char @@ -347,7 +343,7 @@ func CreateCluster(clusterFile string) (Cluster, error) { return Cluster{}, errNetworkNotSetup } - return createCluster(clusterFile) + return Cluster{clusterFile}, nil } func byteSliceToPtr(b []byte) *C.uint8_t { diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 2a7e47f451..19590d60e3 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -38,14 +38,14 @@ #error Missing thread local storage #endif -static JavaVM* g_jvm = 0; -static thread_local JNIEnv* g_thread_jenv = 0; // Defined for the network thread once it is running, and for any thread that has called registerCallback +static JavaVM* g_jvm = nullptr; +static thread_local JNIEnv* g_thread_jenv = nullptr; // Defined for the network thread once it is running, and for any thread that has called registerCallback static thread_local jmethodID g_IFutureCallback_call_methodID = 0; static thread_local bool is_external = false; void detachIfExternalThread(void *ignore) { if(is_external && g_thread_jenv != 0) { - g_thread_jenv = 0; + g_thread_jenv = nullptr; g_IFutureCallback_call_methodID = 0; g_jvm->DetachCurrentThread(); } @@ -533,7 +533,7 @@ JNIEXPORT jboolean JNICALL Java_com_apple_foundationdb_FDB_Error_1predicate(JNIE } JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDB_Database_1create(JNIEnv *jenv, jobject, jstring clusterFileName) { - const char* fileName = 0; + const char* fileName = nullptr; if(clusterFileName != 0) { fileName = jenv->GetStringUTFChars(clusterFileName, 0); if(jenv->ExceptionOccurred()) { diff --git a/bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java b/bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java index 8eee964277..4f08154e0f 100644 --- a/bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java +++ b/bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java @@ -1,5 +1,33 @@ -package com.apple.foundationdb; /** +/* + * ClusterOptions.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb; + +/** * A set of options that can be set on a {@link Cluster}. * - * There are currently no options available. - */ public class ClusterOptions extends OptionsSet { public ClusterOptions( OptionConsumer consumer ) { super(consumer); } } \ No newline at end of file + * @deprecated There are no cluster options. + */ +@Deprecated +public class ClusterOptions extends OptionsSet { + public ClusterOptions( OptionConsumer consumer ) { + super(consumer); + } +} diff --git a/bindings/java/src/main/com/apple/foundationdb/Database.java b/bindings/java/src/main/com/apple/foundationdb/Database.java index 886b289876..86e06e42a9 100644 --- a/bindings/java/src/main/com/apple/foundationdb/Database.java +++ b/bindings/java/src/main/com/apple/foundationdb/Database.java @@ -26,7 +26,7 @@ import java.util.function.Function; /** * A mutable, lexicographically ordered mapping from binary keys to binary values. - * {@link Transaction}s are used to manipulate data within a single + * {@link Transaction}s are used to manipulate data within a single * {@code Database} -- multiple, concurrent * {@code Transaction}s on a {@code Database} enforce ACID properties.
*
diff --git a/bindings/java/src/main/com/apple/foundationdb/FDB.java b/bindings/java/src/main/com/apple/foundationdb/FDB.java index 94634153bb..e95ed3132c 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDB.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDB.java @@ -114,7 +114,7 @@ public class FDB { * Returns a set of options that can be set on a the FoundationDB API. Generally, * these options to the top level of the API affect the networking engine and * therefore must be set before the network engine is started. The network is started - * by calls to {@link #startNetwork()} and implicitly by a call to {@link #open()} and + * by calls to {@link #startNetwork()} or implicitly by a call to {@link #open()} and * and its variants. * * @return a set of options affecting this instance of the FoundationDB API @@ -218,7 +218,7 @@ public class FDB { * If the FoundationDB network has not been started, it will be started in the course of this call * as if {@link FDB#startNetwork()} had been called. * - * This function is deprecated. Use {@link #open()} instead. + * @deprecated Use {@link #open()} instead. * * @return a {@code CompletableFuture} that will be set to a FoundationDB {@code Cluster}. * @@ -235,7 +235,7 @@ public class FDB { * has not been started, it will be started in the course of this call as if * {@link #startNetwork()} had been called. * - * This function is deprecated. Use {@link #open(String)} instead. + * @deprecated Use {@link #open(String)} instead. * * @param clusterFilePath the * cluster file @@ -259,7 +259,7 @@ public class FDB { * {@link Executor} will be used as the default for the execution of all callbacks that * are produced from using the resulting {@link Cluster}. * - * This function is deprecated. Use {@link #open(String, Executor)} instead. + * @deprecated Use {@link #open(String, Executor)} instead. * * @param clusterFilePath the * cluster file diff --git a/bindings/python/fdb/impl.py b/bindings/python/fdb/impl.py index 532ad5cf89..d1231563c1 100644 --- a/bindings/python/fdb/impl.py +++ b/bindings/python/fdb/impl.py @@ -1090,6 +1090,7 @@ fill_operations() class Cluster(_FDBBase): def __init__(self, cluster_file): self.cluster_file = cluster_file + self.options = None def open_database(self, name): if name != b'DB': @@ -1097,9 +1098,6 @@ class Cluster(_FDBBase): return create_database(self.cluster_file) - def _set_option(self, option, param, length): - pass - def create_database(cluster_file=None): pointer = ctypes.c_void_p() diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index bfd14b6efb..19c9ce45d2 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -25,7 +25,7 @@ Bindings * C: Added `fdb_create_database` that creates a new `FDBDatabase` object synchronously and removed `fdb_future_get_database`. * Python: Removed `fdb.init`, `fdb.create_cluster`, and `fdb.Cluster`. `fdb.open` no longer accepts a `database_name` parameter. * Java: Deprecated `FDB.createCluster` and `Cluster`. The preferred way to get a `Database` is by using `FDB.open`, which should work in both new and old API versions. -* Java: Removed `Cluster(long cPtr, Executor executor)` constructor. This is API breaking for any code that has subclassed the `Cluster` class, and is not protected by API versioning. +* Java: Removed `Cluster(long cPtr, Executor executor)` constructor. This is API breaking for any code that has subclassed the `Cluster` class and is not protected by API versioning. * Ruby: Removed `FDB.init`, `FDB.create_cluster`, and `FDB.Cluster`. `FDB.open` no longer accepts a `database_name` parameter. * Golang: Deprecated `fdb.StartNetwork`, `fdb.Open`, `fdb.MustOpen`, and `fdb.CreateCluster` and added `fdb.OpenDatabase` and `fdb.MustOpenDatabase`. The preferred way to start the network and get a `Database` is by using `FDB.OpenDatabase` or `FDB.OpenDefault`. * Flow: Deprecated `API::createCluster` and `Cluster` and added `API::createDatabase`. The preferred way to get a `Database` is by using `API::createDatabase`. From 8451c0cfc12c8d46b24ea2f5c1e28a1b1fb24608 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 10 Jan 2019 13:13:21 -0800 Subject: [PATCH 013/226] Favor nullptr in JNI code. --- bindings/java/fdbJNI.cpp | 90 ++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 19590d60e3..d93c34b0f0 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -25,7 +25,7 @@ #include -#define JNI_NULL 0 +#define JNI_NULL nullptr #if defined(__GNUG__) #define thread_local __thread @@ -40,13 +40,13 @@ static JavaVM* g_jvm = nullptr; static thread_local JNIEnv* g_thread_jenv = nullptr; // Defined for the network thread once it is running, and for any thread that has called registerCallback -static thread_local jmethodID g_IFutureCallback_call_methodID = 0; +static thread_local jmethodID g_IFutureCallback_call_methodID = JNI_NULL; static thread_local bool is_external = false; void detachIfExternalThread(void *ignore) { - if(is_external && g_thread_jenv != 0) { + if(is_external && g_thread_jenv != nullptr) { g_thread_jenv = nullptr; - g_IFutureCallback_call_methodID = 0; + g_IFutureCallback_call_methodID = JNI_NULL; g_jvm->DetachCurrentThread(); } } @@ -58,7 +58,7 @@ void throwOutOfMem(JNIEnv *jenv) { if(jenv->ExceptionOccurred()) return; - if( jenv->ThrowNew( illegalArgClass, NULL ) != 0 ) { + if( jenv->ThrowNew( illegalArgClass, nullptr ) != 0 ) { if( !jenv->ExceptionOccurred() ) { jenv->FatalError("Could not throw OutOfMemoryError"); } else { @@ -68,7 +68,7 @@ void throwOutOfMem(JNIEnv *jenv) { } } -static jthrowable getThrowable(JNIEnv *jenv, fdb_error_t e, const char* msg = NULL) { +static jthrowable getThrowable(JNIEnv *jenv, fdb_error_t e, const char* msg = nullptr) { jclass excepClass = jenv->FindClass("com/apple/foundationdb/FDBException"); if(jenv->ExceptionOccurred()) return JNI_NULL; @@ -128,11 +128,11 @@ static bool findCallbackMethods(JNIEnv *jenv) { } static void callCallback( FDBFuture* f, void* data ) { - if (g_thread_jenv == 0) { + if (g_thread_jenv == nullptr) { // We are on an external thread and must attach to the JVM. // The shutdown hook will later detach this thread. is_external = true; - if( g_jvm != 0 && g_jvm->AttachCurrentThreadAsDaemon((void **) &g_thread_jenv, JNI_NULL) == JNI_OK ) { + if( g_jvm != nullptr && g_jvm->AttachCurrentThreadAsDaemon((void **) &g_thread_jenv, nullptr) == JNI_OK ) { if( !findCallbackMethods( g_thread_jenv ) ) { g_thread_jenv->FatalError("FDB: Could not find callback method.\n"); } @@ -169,9 +169,9 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_NativeFuture_Future_1register } FDBFuture *f = (FDBFuture *)future; - // This is documented as not throwing, but simply returning NULL on OMM. + // This is documented as not throwing, but simply returning null on OOM. // As belt and suspenders, we will check for pending exceptions and then, - // if there are none and the result is NULL, we'll throw our own OMM. + // if there are none and the result is null, we'll throw our own OOM. callback = jenv->NewGlobalRef( callback ); if( !callback ) { if( !jenv->ExceptionOccurred() ) @@ -280,7 +280,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureStrings_FutureString jclass str_clazz = jenv->FindClass("java/lang/String"); if( jenv->ExceptionOccurred() ) return JNI_NULL; - jobjectArray arr = jenv->NewObjectArray(count, str_clazz, NULL); + jobjectArray arr = jenv->NewObjectArray(count, str_clazz, JNI_NULL); if( !arr ) { if( !jenv->ExceptionOccurred() ) throwOutOfMem(jenv); @@ -327,7 +327,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult return JNI_NULL; } - jbyteArray lastKey = NULL; + jbyteArray lastKey = JNI_NULL; if(count) { lastKey = jenv->NewByteArray(kvs[count - 1].key_length); if( !lastKey ) { @@ -378,7 +378,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult throwOutOfMem(jenv); return JNI_NULL; } - uint8_t *keyvalues_barr = (uint8_t *)jenv->GetByteArrayElements(keyValueArray, NULL); + uint8_t *keyvalues_barr = (uint8_t *)jenv->GetByteArrayElements(keyValueArray, JNI_NULL); if (!keyvalues_barr) { throwRuntimeEx( jenv, "Error getting handle to native resources" ); return JNI_NULL; @@ -393,7 +393,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult return JNI_NULL; } - jint *length_barr = jenv->GetIntArrayElements(lengthArray, NULL); + jint *length_barr = jenv->GetIntArrayElements(lengthArray, JNI_NULL); if( !length_barr ) { if( !jenv->ExceptionOccurred() ) throwOutOfMem(jenv); @@ -509,11 +509,11 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1setOpti return; } FDBDatabase *c = (FDBDatabase *)dPtr; - uint8_t *barr = NULL; + uint8_t *barr = nullptr; int size = 0; - if(value != 0) { - barr = (uint8_t *)jenv->GetByteArrayElements( value, NULL ); + if(value != JNI_NULL) { + barr = (uint8_t *)jenv->GetByteArrayElements( value, JNI_NULL ); if (!barr) { throwRuntimeEx( jenv, "Error getting handle to native resources" ); return; @@ -521,7 +521,7 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1setOpti size = jenv->GetArrayLength( value ); } fdb_error_t err = fdb_database_set_option( c, (FDBDatabaseOption)code, barr, size ); - if(value != 0) + if(value != JNI_NULL) jenv->ReleaseByteArrayElements( value, (jbyte *)barr, JNI_ABORT ); if( err ) { safeThrow( jenv, getThrowable( jenv, err ) ); @@ -534,8 +534,8 @@ JNIEXPORT jboolean JNICALL Java_com_apple_foundationdb_FDB_Error_1predicate(JNIE JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDB_Database_1create(JNIEnv *jenv, jobject, jstring clusterFileName) { const char* fileName = nullptr; - if(clusterFileName != 0) { - fileName = jenv->GetStringUTFChars(clusterFileName, 0); + if(clusterFileName != JNI_NULL) { + fileName = jenv->GetStringUTFChars(clusterFileName, JNI_NULL); if(jenv->ExceptionOccurred()) { return 0; } @@ -544,7 +544,7 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDB_Database_1create(JNIEnv FDBDatabase *db; fdb_error_t err = fdb_create_database(fileName, &db); - if(clusterFileName != 0) { + if(clusterFileName != JNI_NULL) { jenv->ReleaseStringUTFChars(clusterFileName, fileName); } @@ -582,7 +582,7 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( keyBytes, NULL ); + uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( keyBytes, JNI_NULL ); if(!barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -602,7 +602,7 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( keyBytes, NULL ); + uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( keyBytes, JNI_NULL ); if(!barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -624,14 +624,14 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barrBegin = (uint8_t *)jenv->GetByteArrayElements( keyBeginBytes, NULL ); + uint8_t *barrBegin = (uint8_t *)jenv->GetByteArrayElements( keyBeginBytes, JNI_NULL ); if (!barrBegin) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); return 0; } - uint8_t *barrEnd = (uint8_t *)jenv->GetByteArrayElements( keyEndBytes, NULL ); + uint8_t *barrEnd = (uint8_t *)jenv->GetByteArrayElements( keyEndBytes, JNI_NULL ); if (!barrEnd) { jenv->ReleaseByteArrayElements( keyBeginBytes, (jbyte *)barrBegin, JNI_ABORT ); if( !jenv->ExceptionOccurred() ) @@ -655,14 +655,14 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1s } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barrKey = (uint8_t *)jenv->GetByteArrayElements( keyBytes, NULL ); + uint8_t *barrKey = (uint8_t *)jenv->GetByteArrayElements( keyBytes, JNI_NULL ); if (!barrKey) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); return; } - uint8_t *barrValue = (uint8_t *)jenv->GetByteArrayElements( valueBytes, NULL ); + uint8_t *barrValue = (uint8_t *)jenv->GetByteArrayElements( valueBytes, JNI_NULL ); if (!barrValue) { jenv->ReleaseByteArrayElements( keyBytes, (jbyte *)barrKey, JNI_ABORT ); if( !jenv->ExceptionOccurred() ) @@ -684,7 +684,7 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1c } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( keyBytes, NULL ); + uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( keyBytes, JNI_NULL ); if (!barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -702,14 +702,14 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1c } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barrKeyBegin = (uint8_t *)jenv->GetByteArrayElements( keyBeginBytes, NULL ); + uint8_t *barrKeyBegin = (uint8_t *)jenv->GetByteArrayElements( keyBeginBytes, JNI_NULL ); if (!barrKeyBegin) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); return; } - uint8_t *barrKeyEnd = (uint8_t *)jenv->GetByteArrayElements( keyEndBytes, NULL ); + uint8_t *barrKeyEnd = (uint8_t *)jenv->GetByteArrayElements( keyEndBytes, JNI_NULL ); if (!barrKeyEnd) { jenv->ReleaseByteArrayElements( keyBeginBytes, (jbyte *)barrKeyBegin, JNI_ABORT ); if( !jenv->ExceptionOccurred() ) @@ -732,14 +732,14 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1m } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barrKey = (uint8_t *)jenv->GetByteArrayElements( key, NULL ); + uint8_t *barrKey = (uint8_t *)jenv->GetByteArrayElements( key, JNI_NULL ); if (!barrKey) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); return; } - uint8_t *barrValue = (uint8_t *)jenv->GetByteArrayElements( value, NULL ); + uint8_t *barrValue = (uint8_t *)jenv->GetByteArrayElements( value, JNI_NULL ); if (!barrValue) { jenv->ReleaseByteArrayElements( key, (jbyte *)barrKey, JNI_ABORT ); if( !jenv->ExceptionOccurred() ) @@ -772,11 +772,11 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1s return; } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barr = NULL; + uint8_t *barr = nullptr; int size = 0; - if(value != 0) { - barr = (uint8_t *)jenv->GetByteArrayElements( value, NULL ); + if(value != JNI_NULL) { + barr = (uint8_t *)jenv->GetByteArrayElements( value, JNI_NULL ); if (!barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -785,7 +785,7 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1s size = jenv->GetArrayLength( value ); } fdb_error_t err = fdb_transaction_set_option( tr, (FDBTransactionOption)code, barr, size ); - if(value != 0) + if(value != JNI_NULL) jenv->ReleaseByteArrayElements( value, (jbyte *)barr, JNI_ABORT ); if( err ) { safeThrow( jenv, getThrowable( jenv, err ) ); @@ -824,7 +824,7 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( key, NULL ); + uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( key, JNI_NULL ); if (!barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -871,7 +871,7 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( key, NULL ); + uint8_t *barr = (uint8_t *)jenv->GetByteArrayElements( key, JNI_NULL ); if (!barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -900,7 +900,7 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1a } FDBTransaction *tr = (FDBTransaction *)tPtr; - uint8_t *begin_barr = (uint8_t *)jenv->GetByteArrayElements( keyBegin, NULL ); + uint8_t *begin_barr = (uint8_t *)jenv->GetByteArrayElements( keyBegin, JNI_NULL ); if (!begin_barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -908,7 +908,7 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1a } int begin_size = jenv->GetArrayLength( keyBegin ); - uint8_t *end_barr = (uint8_t *)jenv->GetByteArrayElements( keyEnd, NULL ); + uint8_t *end_barr = (uint8_t *)jenv->GetByteArrayElements( keyEnd, JNI_NULL ); if (!end_barr) { jenv->ReleaseByteArrayElements( keyBegin, (jbyte *)begin_barr, JNI_ABORT ); if( !jenv->ExceptionOccurred() ) @@ -953,10 +953,10 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDB_Select_1API_1version(JNIE } JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDB_Network_1setOption(JNIEnv *jenv, jobject, jint code, jbyteArray value) { - uint8_t *barr = NULL; + uint8_t *barr = nullptr; int size = 0; - if(value != 0) { - barr = (uint8_t *)jenv->GetByteArrayElements( value, NULL ); + if(value != JNI_NULL) { + barr = (uint8_t *)jenv->GetByteArrayElements( value, JNI_NULL ); if (!barr) { if( !jenv->ExceptionOccurred() ) throwRuntimeEx( jenv, "Error getting handle to native resources" ); @@ -965,7 +965,7 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDB_Network_1setOption(JNIEnv size = jenv->GetArrayLength( value ); } fdb_error_t err = fdb_network_set_option((FDBNetworkOption)code, barr, size); - if(value != 0) + if(value != JNI_NULL) jenv->ReleaseByteArrayElements( value, (jbyte *)barr, JNI_ABORT ); if( err ) { safeThrow( jenv, getThrowable( jenv, err ) ); @@ -987,7 +987,7 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDB_Network_1run(JNIEnv *jenv return; } - fdb_error_t hookErr = fdb_add_network_thread_completion_hook( &detachIfExternalThread, NULL ); + fdb_error_t hookErr = fdb_add_network_thread_completion_hook( &detachIfExternalThread, nullptr ); if( hookErr ) { safeThrow( jenv, getThrowable( jenv, hookErr ) ); } From 679a0e27fdd3b05dd834146d867cce02612751bb Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 10 Jan 2019 14:53:42 -0800 Subject: [PATCH 014/226] Fix two memory issues that occurred when a database object was destroyed. --- fdbclient/ThreadSafeTransaction.actor.cpp | 12 ++++++++---- fdbclient/ThreadSafeTransaction.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp index 450d9a0a6a..fdc59e2783 100644 --- a/fdbclient/ThreadSafeTransaction.actor.cpp +++ b/fdbclient/ThreadSafeTransaction.actor.cpp @@ -47,7 +47,7 @@ ThreadFuture> ThreadSafeDatabase::createFromExistingDatabas } Reference ThreadSafeDatabase::createTransaction() { - return Reference(new ThreadSafeTransaction(this)); + return Reference(new ThreadSafeTransaction(Reference::addRef(this))); } void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional value) { @@ -74,10 +74,11 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) } ThreadSafeDatabase::~ThreadSafeDatabase() { - onMainThreadVoid( [this](){ db->delref(); }, NULL ); + DatabaseContext *db = this->db; + onMainThreadVoid( [db](){ db->delref(); }, NULL ); } -ThreadSafeTransaction::ThreadSafeTransaction( ThreadSafeDatabase *cx ) { +ThreadSafeTransaction::ThreadSafeTransaction( Reference db ) { // Allocate memory for the transaction from this thread (so the pointer is known for subsequent method calls) // but run its constructor on the main thread @@ -87,7 +88,10 @@ ThreadSafeTransaction::ThreadSafeTransaction( ThreadSafeDatabase *cx ) { // these operations). ReadYourWritesTransaction *tr = this->tr = ReadYourWritesTransaction::allocateOnForeignThread(); // No deferred error -- if the construction of the RYW transaction fails, we have no where to put it - onMainThreadVoid( [tr,cx](){ cx->db->addref(); new (tr) ReadYourWritesTransaction( Database(cx->db) ); }, NULL ); + onMainThreadVoid( [tr, db](){ + db->db->addref(); + new (tr) ReadYourWritesTransaction( Database(db->db) ); + }, NULL ); } ThreadSafeTransaction::~ThreadSafeTransaction() { diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index 3038170ba4..de40b6ae6a 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -52,7 +52,7 @@ public: // Internal use only class ThreadSafeTransaction : public ITransaction, ThreadSafeReferenceCounted, NonCopyable { public: - explicit ThreadSafeTransaction( ThreadSafeDatabase *cx ); + explicit ThreadSafeTransaction( Reference db ); ~ThreadSafeTransaction(); void cancel(); From 627b785a24ab29a76a417a8e4f7a3bf045e0f459 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 11 Jan 2019 10:04:05 -0800 Subject: [PATCH 015/226] Fix: null handling for cluster file paths didn't work after merge --- bindings/c/fdb_c.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index 90afdf243d..6885e8c1c3 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -334,7 +334,7 @@ FDBFuture* fdb_cluster_create_database_v609( FDBCluster* c, uint8_t const* db_na extern "C" DLLEXPORT fdb_error_t fdb_create_database( const char* cluster_file_path, FDBDatabase** out_database ) { CATCH_AND_RETURN( - *out_database = (FDBDatabase*)API->createDatabase( cluster_file_path ).extractPtr(); + *out_database = (FDBDatabase*)API->createDatabase( cluster_file_path ? cluster_file_path : "" ).extractPtr(); ); } From c36f97ccb21c9395002ef917295a1ed1695bbe7d Mon Sep 17 00:00:00 2001 From: Alvin Moore Date: Fri, 11 Jan 2019 19:54:36 -0800 Subject: [PATCH 016/226] Added website directory to docker packaging in order to allow the building of runtime docker images --- .gitignore | 1 + packaging/docker/website/.gitkeep | 0 2 files changed, 1 insertion(+) create mode 100644 packaging/docker/website/.gitkeep diff --git a/.gitignore b/.gitignore index 7859816fcd..ac15104005 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ bindings/go/godoc bindings/java/.classstamp* bindings/java/classes*/ bindings/java/javadoc*/ +packaging/docker/website # Testing and logging packaging/msi/*.log diff --git a/packaging/docker/website/.gitkeep b/packaging/docker/website/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 From 401f8a677429a539ad7bf19c2991cea6d721fbdd Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Mon, 14 Jan 2019 10:41:54 -0800 Subject: [PATCH 017/226] Python and Ruby bindings displayed an unhelpful error if trying to load an old incompatible fdb_c with a new binding. --- bindings/python/fdb/__init__.py | 2 + bindings/python/fdb/impl.py | 221 ++++++++++++++++---------------- bindings/ruby/lib/fdb.rb | 2 + bindings/ruby/lib/fdbimpl.rb | 102 +++++++-------- 4 files changed, 167 insertions(+), 160 deletions(-) diff --git a/bindings/python/fdb/__init__.py b/bindings/python/fdb/__init__.py index f26ff3acba..abbc7ec18c 100644 --- a/bindings/python/fdb/__init__.py +++ b/bindings/python/fdb/__init__.py @@ -81,6 +81,8 @@ def api_version(ver): elif err != 0: raise RuntimeError('FoundationDB API error') + fdb.impl.init_c_api() + list = ( 'FDBError', 'predicates', diff --git a/bindings/python/fdb/impl.py b/bindings/python/fdb/impl.py index d1231563c1..1b1120027e 100644 --- a/bindings/python/fdb/impl.py +++ b/bindings/python/fdb/impl.py @@ -1285,159 +1285,160 @@ def optionalParamToBytes(v): _FDBBase.capi = _capi -_capi.fdb_select_api_version_impl.argtypes = [ctypes.c_int, ctypes.c_int] -_capi.fdb_select_api_version_impl.restype = ctypes.c_int +def init_c_api(): + _capi.fdb_select_api_version_impl.argtypes = [ctypes.c_int, ctypes.c_int] + _capi.fdb_select_api_version_impl.restype = ctypes.c_int -_capi.fdb_get_error.argtypes = [ctypes.c_int] -_capi.fdb_get_error.restype = ctypes.c_char_p + _capi.fdb_get_error.argtypes = [ctypes.c_int] + _capi.fdb_get_error.restype = ctypes.c_char_p -_capi.fdb_error_predicate.argtypes = [ctypes.c_int, ctypes.c_int] -_capi.fdb_error_predicate.restype = ctypes.c_int + _capi.fdb_error_predicate.argtypes = [ctypes.c_int, ctypes.c_int] + _capi.fdb_error_predicate.restype = ctypes.c_int -_capi.fdb_setup_network.argtypes = [] -_capi.fdb_setup_network.restype = ctypes.c_int -_capi.fdb_setup_network.errcheck = check_error_code + _capi.fdb_setup_network.argtypes = [] + _capi.fdb_setup_network.restype = ctypes.c_int + _capi.fdb_setup_network.errcheck = check_error_code -_capi.fdb_network_set_option.argtypes = [ctypes.c_int, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_network_set_option.restype = ctypes.c_int -_capi.fdb_network_set_option.errcheck = check_error_code + _capi.fdb_network_set_option.argtypes = [ctypes.c_int, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_network_set_option.restype = ctypes.c_int + _capi.fdb_network_set_option.errcheck = check_error_code -_capi.fdb_run_network.argtypes = [] -_capi.fdb_run_network.restype = ctypes.c_int -_capi.fdb_run_network.errcheck = check_error_code + _capi.fdb_run_network.argtypes = [] + _capi.fdb_run_network.restype = ctypes.c_int + _capi.fdb_run_network.errcheck = check_error_code -_capi.fdb_stop_network.argtypes = [] -_capi.fdb_stop_network.restype = ctypes.c_int -_capi.fdb_stop_network.errcheck = check_error_code + _capi.fdb_stop_network.argtypes = [] + _capi.fdb_stop_network.restype = ctypes.c_int + _capi.fdb_stop_network.errcheck = check_error_code -_capi.fdb_future_destroy.argtypes = [ctypes.c_void_p] -_capi.fdb_future_destroy.restype = None + _capi.fdb_future_destroy.argtypes = [ctypes.c_void_p] + _capi.fdb_future_destroy.restype = None -_capi.fdb_future_release_memory.argtypes = [ctypes.c_void_p] -_capi.fdb_future_release_memory.restype = None + _capi.fdb_future_release_memory.argtypes = [ctypes.c_void_p] + _capi.fdb_future_release_memory.restype = None -_capi.fdb_future_cancel.argtypes = [ctypes.c_void_p] -_capi.fdb_future_cancel.restype = None + _capi.fdb_future_cancel.argtypes = [ctypes.c_void_p] + _capi.fdb_future_cancel.restype = None -_capi.fdb_future_block_until_ready.argtypes = [ctypes.c_void_p] -_capi.fdb_future_block_until_ready.restype = ctypes.c_int -_capi.fdb_future_block_until_ready.errcheck = check_error_code + _capi.fdb_future_block_until_ready.argtypes = [ctypes.c_void_p] + _capi.fdb_future_block_until_ready.restype = ctypes.c_int + _capi.fdb_future_block_until_ready.errcheck = check_error_code -_capi.fdb_future_is_ready.argtypes = [ctypes.c_void_p] -_capi.fdb_future_is_ready.restype = ctypes.c_int + _capi.fdb_future_is_ready.argtypes = [ctypes.c_void_p] + _capi.fdb_future_is_ready.restype = ctypes.c_int -_CBFUNC = ctypes.CFUNCTYPE(None, ctypes.c_void_p) + _CBFUNC = ctypes.CFUNCTYPE(None, ctypes.c_void_p) -_capi.fdb_future_set_callback.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p] -_capi.fdb_future_set_callback.restype = int -_capi.fdb_future_set_callback.errcheck = check_error_code + _capi.fdb_future_set_callback.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p] + _capi.fdb_future_set_callback.restype = int + _capi.fdb_future_set_callback.errcheck = check_error_code -_capi.fdb_future_get_error.argtypes = [ctypes.c_void_p] -_capi.fdb_future_get_error.restype = int -_capi.fdb_future_get_error.errcheck = check_error_code + _capi.fdb_future_get_error.argtypes = [ctypes.c_void_p] + _capi.fdb_future_get_error.restype = int + _capi.fdb_future_get_error.errcheck = check_error_code -_capi.fdb_future_get_version.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int64)] -_capi.fdb_future_get_version.restype = ctypes.c_int -_capi.fdb_future_get_version.errcheck = check_error_code + _capi.fdb_future_get_version.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int64)] + _capi.fdb_future_get_version.restype = ctypes.c_int + _capi.fdb_future_get_version.errcheck = check_error_code -_capi.fdb_future_get_key.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)), - ctypes.POINTER(ctypes.c_int)] -_capi.fdb_future_get_key.restype = ctypes.c_int -_capi.fdb_future_get_key.errcheck = check_error_code + _capi.fdb_future_get_key.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)), + ctypes.POINTER(ctypes.c_int)] + _capi.fdb_future_get_key.restype = ctypes.c_int + _capi.fdb_future_get_key.errcheck = check_error_code -_capi.fdb_future_get_value.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int), - ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)), ctypes.POINTER(ctypes.c_int)] -_capi.fdb_future_get_value.restype = ctypes.c_int -_capi.fdb_future_get_value.errcheck = check_error_code + _capi.fdb_future_get_value.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int), + ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)), ctypes.POINTER(ctypes.c_int)] + _capi.fdb_future_get_value.restype = ctypes.c_int + _capi.fdb_future_get_value.errcheck = check_error_code -_capi.fdb_future_get_keyvalue_array.argtypes = [ctypes.c_void_p, ctypes.POINTER( - ctypes.POINTER(KeyValueStruct)), ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int)] -_capi.fdb_future_get_keyvalue_array.restype = int -_capi.fdb_future_get_keyvalue_array.errcheck = check_error_code + _capi.fdb_future_get_keyvalue_array.argtypes = [ctypes.c_void_p, ctypes.POINTER( + ctypes.POINTER(KeyValueStruct)), ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int)] + _capi.fdb_future_get_keyvalue_array.restype = int + _capi.fdb_future_get_keyvalue_array.errcheck = check_error_code -_capi.fdb_future_get_string_array.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p)), ctypes.POINTER(ctypes.c_int)] -_capi.fdb_future_get_string_array.restype = int -_capi.fdb_future_get_string_array.errcheck = check_error_code + _capi.fdb_future_get_string_array.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p)), ctypes.POINTER(ctypes.c_int)] + _capi.fdb_future_get_string_array.restype = int + _capi.fdb_future_get_string_array.errcheck = check_error_code -_capi.fdb_create_database.argtypes = [ctypes.c_char_p, ctypes.POINTER(ctypes.c_void_p)] -_capi.fdb_create_database.restype = ctypes.c_int -_capi.fdb_create_database.errcheck = check_error_code + _capi.fdb_create_database.argtypes = [ctypes.c_char_p, ctypes.POINTER(ctypes.c_void_p)] + _capi.fdb_create_database.restype = ctypes.c_int + _capi.fdb_create_database.errcheck = check_error_code -_capi.fdb_database_destroy.argtypes = [ctypes.c_void_p] -_capi.fdb_database_destroy.restype = None + _capi.fdb_database_destroy.argtypes = [ctypes.c_void_p] + _capi.fdb_database_destroy.restype = None -_capi.fdb_database_create_transaction.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_void_p)] -_capi.fdb_database_create_transaction.restype = ctypes.c_int -_capi.fdb_database_create_transaction.errcheck = check_error_code + _capi.fdb_database_create_transaction.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_void_p)] + _capi.fdb_database_create_transaction.restype = ctypes.c_int + _capi.fdb_database_create_transaction.errcheck = check_error_code -_capi.fdb_database_set_option.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_database_set_option.restype = ctypes.c_int -_capi.fdb_database_set_option.errcheck = check_error_code + _capi.fdb_database_set_option.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_database_set_option.restype = ctypes.c_int + _capi.fdb_database_set_option.errcheck = check_error_code -_capi.fdb_transaction_destroy.argtypes = [ctypes.c_void_p] -_capi.fdb_transaction_destroy.restype = None + _capi.fdb_transaction_destroy.argtypes = [ctypes.c_void_p] + _capi.fdb_transaction_destroy.restype = None -_capi.fdb_transaction_cancel.argtypes = [ctypes.c_void_p] -_capi.fdb_transaction_cancel.restype = None + _capi.fdb_transaction_cancel.argtypes = [ctypes.c_void_p] + _capi.fdb_transaction_cancel.restype = None -_capi.fdb_transaction_set_read_version.argtypes = [ctypes.c_void_p, ctypes.c_int64] -_capi.fdb_transaction_set_read_version.restype = None + _capi.fdb_transaction_set_read_version.argtypes = [ctypes.c_void_p, ctypes.c_int64] + _capi.fdb_transaction_set_read_version.restype = None -_capi.fdb_transaction_get_read_version.argtypes = [ctypes.c_void_p] -_capi.fdb_transaction_get_read_version.restype = ctypes.c_void_p + _capi.fdb_transaction_get_read_version.argtypes = [ctypes.c_void_p] + _capi.fdb_transaction_get_read_version.restype = ctypes.c_void_p -_capi.fdb_transaction_get.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int] -_capi.fdb_transaction_get.restype = ctypes.c_void_p + _capi.fdb_transaction_get.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int] + _capi.fdb_transaction_get.restype = ctypes.c_void_p -_capi.fdb_transaction_get_key.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int] -_capi.fdb_transaction_get_key.restype = ctypes.c_void_p + _capi.fdb_transaction_get_key.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int] + _capi.fdb_transaction_get_key.restype = ctypes.c_void_p -_capi.fdb_transaction_get_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_void_p, - ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, - ctypes.c_int, ctypes.c_int] -_capi.fdb_transaction_get_range.restype = ctypes.c_void_p + _capi.fdb_transaction_get_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_void_p, + ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, + ctypes.c_int, ctypes.c_int] + _capi.fdb_transaction_get_range.restype = ctypes.c_void_p -_capi.fdb_transaction_add_conflict_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int] -_capi.fdb_transaction_add_conflict_range.restype = ctypes.c_int -_capi.fdb_transaction_add_conflict_range.errcheck = check_error_code + _capi.fdb_transaction_add_conflict_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int] + _capi.fdb_transaction_add_conflict_range.restype = ctypes.c_int + _capi.fdb_transaction_add_conflict_range.errcheck = check_error_code -_capi.fdb_transaction_get_addresses_for_key.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_transaction_get_addresses_for_key.restype = ctypes.c_void_p + _capi.fdb_transaction_get_addresses_for_key.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_get_addresses_for_key.restype = ctypes.c_void_p -_capi.fdb_transaction_set_option.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_transaction_set_option.restype = ctypes.c_int -_capi.fdb_transaction_set_option.errcheck = check_error_code + _capi.fdb_transaction_set_option.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_set_option.restype = ctypes.c_int + _capi.fdb_transaction_set_option.errcheck = check_error_code -_capi.fdb_transaction_atomic_op.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int] -_capi.fdb_transaction_atomic_op.restype = None + _capi.fdb_transaction_atomic_op.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int] + _capi.fdb_transaction_atomic_op.restype = None -_capi.fdb_transaction_set.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_transaction_set.restype = None + _capi.fdb_transaction_set.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_set.restype = None -_capi.fdb_transaction_clear.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_transaction_clear.restype = None + _capi.fdb_transaction_clear.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_clear.restype = None -_capi.fdb_transaction_clear_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_transaction_clear_range.restype = None + _capi.fdb_transaction_clear_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_clear_range.restype = None -_capi.fdb_transaction_watch.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] -_capi.fdb_transaction_watch.restype = ctypes.c_void_p + _capi.fdb_transaction_watch.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_watch.restype = ctypes.c_void_p -_capi.fdb_transaction_commit.argtypes = [ctypes.c_void_p] -_capi.fdb_transaction_commit.restype = ctypes.c_void_p + _capi.fdb_transaction_commit.argtypes = [ctypes.c_void_p] + _capi.fdb_transaction_commit.restype = ctypes.c_void_p -_capi.fdb_transaction_get_committed_version.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int64)] -_capi.fdb_transaction_get_committed_version.restype = ctypes.c_int -_capi.fdb_transaction_get_committed_version.errcheck = check_error_code + _capi.fdb_transaction_get_committed_version.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int64)] + _capi.fdb_transaction_get_committed_version.restype = ctypes.c_int + _capi.fdb_transaction_get_committed_version.errcheck = check_error_code -_capi.fdb_transaction_get_versionstamp.argtypes = [ctypes.c_void_p] -_capi.fdb_transaction_get_versionstamp.restype = ctypes.c_void_p + _capi.fdb_transaction_get_versionstamp.argtypes = [ctypes.c_void_p] + _capi.fdb_transaction_get_versionstamp.restype = ctypes.c_void_p -_capi.fdb_transaction_on_error.argtypes = [ctypes.c_void_p, ctypes.c_int] -_capi.fdb_transaction_on_error.restype = ctypes.c_void_p + _capi.fdb_transaction_on_error.argtypes = [ctypes.c_void_p, ctypes.c_int] + _capi.fdb_transaction_on_error.restype = ctypes.c_void_p -_capi.fdb_transaction_reset.argtypes = [ctypes.c_void_p] -_capi.fdb_transaction_reset.restype = None + _capi.fdb_transaction_reset.argtypes = [ctypes.c_void_p] + _capi.fdb_transaction_reset.restype = None if hasattr(ctypes.pythonapi, 'Py_IncRef'): def _pin_callback(cb): diff --git a/bindings/ruby/lib/fdb.rb b/bindings/ruby/lib/fdb.rb index d4083be41a..c2431d8bf3 100644 --- a/bindings/ruby/lib/fdb.rb +++ b/bindings/ruby/lib/fdb.rb @@ -70,6 +70,8 @@ module FDB raise "FoundationDB API version error" end + FDBC.init_c_api() + require_relative 'fdbtuple' require_relative 'fdbdirectory' diff --git a/bindings/ruby/lib/fdbimpl.rb b/bindings/ruby/lib/fdbimpl.rb index dfae83b58c..dcbe246f89 100644 --- a/bindings/ruby/lib/fdbimpl.rb +++ b/bindings/ruby/lib/fdbimpl.rb @@ -64,59 +64,61 @@ module FDB typedef :int, :fdb_error typedef :int, :fdb_bool - attach_function :fdb_get_error, [ :fdb_error ], :string - - attach_function :fdb_network_set_option, [ :int, :pointer, :int ], :fdb_error - attach_function :fdb_setup_network, [ ], :fdb_error - attach_function :fdb_run_network, [ ], :fdb_error, :blocking => true - attach_function :fdb_stop_network, [ ], :fdb_error - - attach_function :fdb_future_cancel, [ :pointer ], :void - attach_function :fdb_future_release_memory, [ :pointer ], :void - attach_function :fdb_future_destroy, [ :pointer ], :void - attach_function :fdb_future_block_until_ready, [ :pointer ], :fdb_error, :blocking => true - attach_function :fdb_future_is_ready, [ :pointer ], :fdb_bool - - callback :fdb_future_callback, [ :pointer, :pointer ], :void - attach_function :fdb_future_set_callback, [ :pointer, :fdb_future_callback, :pointer ], :fdb_error - - attach_function :fdb_future_get_error, [ :pointer ], :fdb_error - attach_function :fdb_future_get_version, [ :pointer, :pointer ], :fdb_error - attach_function :fdb_future_get_key, [ :pointer, :pointer, :pointer ], :fdb_error - attach_function :fdb_future_get_value, [ :pointer, :pointer, :pointer, :pointer ], :fdb_error - attach_function :fdb_future_get_keyvalue_array, [ :pointer, :pointer, :pointer, :pointer ], :fdb_error - attach_function :fdb_future_get_string_array, [ :pointer, :pointer, :pointer ], :fdb_error - - attach_function :fdb_create_database, [ :string, :pointer ], :fdb_error - - attach_function :fdb_database_destroy, [ :pointer ], :void - attach_function :fdb_database_set_option, [ :pointer, :int, :pointer, :int ], :fdb_error - - attach_function :fdb_database_create_transaction, [ :pointer, :pointer ], :fdb_error - attach_function :fdb_transaction_destroy, [ :pointer ], :void - attach_function :fdb_transaction_cancel, [ :pointer ], :void - attach_function :fdb_transaction_atomic_op, [ :pointer, :pointer, :int, :pointer, :int, :int ], :void - attach_function :fdb_transaction_add_conflict_range, [ :pointer, :pointer, :int, :pointer, :int, :int ], :int - attach_function :fdb_transaction_get_addresses_for_key, [ :pointer, :pointer, :int ], :pointer - attach_function :fdb_transaction_set_option, [ :pointer, :int, :pointer, :int ], :fdb_error - attach_function :fdb_transaction_set_read_version, [ :pointer, :int64 ], :void - attach_function :fdb_transaction_get_read_version, [ :pointer ], :pointer - attach_function :fdb_transaction_get, [ :pointer, :pointer, :int, :int ], :pointer - attach_function :fdb_transaction_get_key, [ :pointer, :pointer, :int, :int, :int, :int ], :pointer - attach_function :fdb_transaction_get_range, [ :pointer, :pointer, :int, :int, :int, :pointer, :int, :int, :int, :int, :int, :int, :int, :int, :int ], :pointer - attach_function :fdb_transaction_set, [ :pointer, :pointer, :int, :pointer, :int ], :void - attach_function :fdb_transaction_clear, [ :pointer, :pointer, :int ], :void - attach_function :fdb_transaction_clear_range, [ :pointer, :pointer, :int, :pointer, :int ], :void - attach_function :fdb_transaction_watch, [ :pointer, :pointer, :int ], :pointer - attach_function :fdb_transaction_commit, [ :pointer ], :pointer - attach_function :fdb_transaction_get_committed_version, [ :pointer, :pointer ], :fdb_error - attach_function :fdb_transaction_get_versionstamp, [ :pointer ], :pointer - attach_function :fdb_transaction_on_error, [ :pointer, :fdb_error ], :pointer - attach_function :fdb_transaction_reset, [ :pointer ], :void - attach_function :fdb_select_api_version_impl, [ :int, :int ], :fdb_error attach_function :fdb_get_max_api_version, [ ], :int + def self.init_c_api + attach_function :fdb_get_error, [ :fdb_error ], :string + + attach_function :fdb_network_set_option, [ :int, :pointer, :int ], :fdb_error + attach_function :fdb_setup_network, [ ], :fdb_error + attach_function :fdb_run_network, [ ], :fdb_error, :blocking => true + attach_function :fdb_stop_network, [ ], :fdb_error + + attach_function :fdb_future_cancel, [ :pointer ], :void + attach_function :fdb_future_release_memory, [ :pointer ], :void + attach_function :fdb_future_destroy, [ :pointer ], :void + attach_function :fdb_future_block_until_ready, [ :pointer ], :fdb_error, :blocking => true + attach_function :fdb_future_is_ready, [ :pointer ], :fdb_bool + + callback :fdb_future_callback, [ :pointer, :pointer ], :void + attach_function :fdb_future_set_callback, [ :pointer, :fdb_future_callback, :pointer ], :fdb_error + + attach_function :fdb_future_get_error, [ :pointer ], :fdb_error + attach_function :fdb_future_get_version, [ :pointer, :pointer ], :fdb_error + attach_function :fdb_future_get_key, [ :pointer, :pointer, :pointer ], :fdb_error + attach_function :fdb_future_get_value, [ :pointer, :pointer, :pointer, :pointer ], :fdb_error + attach_function :fdb_future_get_keyvalue_array, [ :pointer, :pointer, :pointer, :pointer ], :fdb_error + attach_function :fdb_future_get_string_array, [ :pointer, :pointer, :pointer ], :fdb_error + + attach_function :fdb_create_database, [ :string, :pointer ], :fdb_error + + attach_function :fdb_database_destroy, [ :pointer ], :void + attach_function :fdb_database_set_option, [ :pointer, :int, :pointer, :int ], :fdb_error + + attach_function :fdb_database_create_transaction, [ :pointer, :pointer ], :fdb_error + attach_function :fdb_transaction_destroy, [ :pointer ], :void + attach_function :fdb_transaction_cancel, [ :pointer ], :void + attach_function :fdb_transaction_atomic_op, [ :pointer, :pointer, :int, :pointer, :int, :int ], :void + attach_function :fdb_transaction_add_conflict_range, [ :pointer, :pointer, :int, :pointer, :int, :int ], :int + attach_function :fdb_transaction_get_addresses_for_key, [ :pointer, :pointer, :int ], :pointer + attach_function :fdb_transaction_set_option, [ :pointer, :int, :pointer, :int ], :fdb_error + attach_function :fdb_transaction_set_read_version, [ :pointer, :int64 ], :void + attach_function :fdb_transaction_get_read_version, [ :pointer ], :pointer + attach_function :fdb_transaction_get, [ :pointer, :pointer, :int, :int ], :pointer + attach_function :fdb_transaction_get_key, [ :pointer, :pointer, :int, :int, :int, :int ], :pointer + attach_function :fdb_transaction_get_range, [ :pointer, :pointer, :int, :int, :int, :pointer, :int, :int, :int, :int, :int, :int, :int, :int, :int ], :pointer + attach_function :fdb_transaction_set, [ :pointer, :pointer, :int, :pointer, :int ], :void + attach_function :fdb_transaction_clear, [ :pointer, :pointer, :int ], :void + attach_function :fdb_transaction_clear_range, [ :pointer, :pointer, :int, :pointer, :int ], :void + attach_function :fdb_transaction_watch, [ :pointer, :pointer, :int ], :pointer + attach_function :fdb_transaction_commit, [ :pointer ], :pointer + attach_function :fdb_transaction_get_committed_version, [ :pointer, :pointer ], :fdb_error + attach_function :fdb_transaction_get_versionstamp, [ :pointer ], :pointer + attach_function :fdb_transaction_on_error, [ :pointer, :fdb_error ], :pointer + attach_function :fdb_transaction_reset, [ :pointer ], :void + end + class KeyValueStruct < FFI::Struct pack 4 layout :key, :pointer, From 65aa5105c7799c963ced720308c13cbd8bcb4fdb Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Mon, 14 Jan 2019 10:53:03 -0800 Subject: [PATCH 018/226] Fix: The multi-version client would block when trying to connect to an external cluster. --- fdbclient/MultiVersionTransaction.actor.cpp | 68 ++++++++++++++------- fdbclient/MultiVersionTransaction.h | 11 ++-- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 6649f3babe..6b9be3c679 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -212,6 +212,21 @@ void DLTransaction::reset() { } // DLDatabase +DLDatabase::DLDatabase(Reference api, ThreadFuture dbFuture) : api(api), db(nullptr) { + ready = mapThreadFuture(dbFuture, [this](ErrorOr db){ + if(db.isError()) { + return ErrorOr(db.getError()); + } + + this->db = db.get(); + return ErrorOr(Void()); + }); +} + +ThreadFuture DLDatabase::onReady() { + return ready; +} + Reference DLDatabase::createTransaction() { FdbCApi::FDBTransaction *tr; api->databaseCreateTransaction(db, &tr); @@ -346,7 +361,7 @@ void DLApi::stopNetwork() { } } -ThreadFuture> DLApi::createDatabase609(const char *clusterFilePath) { +Reference DLApi::createDatabase609(const char *clusterFilePath) { FdbCApi::FDBFuture *f = api->createCluster(clusterFilePath); auto clusterFuture = toThreadFuture(api, f, [](FdbCApi::FDBFuture *f, FdbCApi *api) { @@ -356,22 +371,24 @@ ThreadFuture> DLApi::createDatabase609(const char *clusterF }); Reference innerApi = api; - return flatMapThreadFuture>(clusterFuture, [innerApi](ErrorOr cluster) { + auto dbFuture = flatMapThreadFuture(clusterFuture, [innerApi](ErrorOr cluster) { if(cluster.isError()) { - return ErrorOr>>(cluster.getError()); + return ErrorOr>(cluster.getError()); } - auto dbFuture = toThreadFuture>(innerApi, innerApi->clusterCreateDatabase(cluster.get(), (uint8_t*)"DB", 2), [](FdbCApi::FDBFuture *f, FdbCApi *api) { + auto innerDbFuture = toThreadFuture(innerApi, innerApi->clusterCreateDatabase(cluster.get(), (uint8_t*)"DB", 2), [](FdbCApi::FDBFuture *f, FdbCApi *api) { FdbCApi::FDBDatabase *db; api->futureGetDatabase(f, &db); - return Reference(new DLDatabase(Reference::addRef(api), db)); + return db; }); - return ErrorOr>>(mapThreadFuture, Reference>(dbFuture, [cluster, innerApi](ErrorOr> db) { + return ErrorOr>(mapThreadFuture(innerDbFuture, [cluster, innerApi](ErrorOr db) { innerApi->clusterDestroy(cluster.get()); return db; })); }); + + return Reference(new DLDatabase(api, dbFuture)); } Reference DLApi::createDatabase(const char *clusterFilePath) { @@ -381,14 +398,7 @@ Reference DLApi::createDatabase(const char *clusterFilePath) { return Reference(new DLDatabase(api, db)); } else { - auto f = DLApi::createDatabase609(clusterFilePath); - - f.blockUntilReady(); - if(f.isError()) { - throw f.getError(); - } - - return f.get(); + return DLApi::createDatabase609(clusterFilePath); } } @@ -653,17 +663,31 @@ void MultiVersionDatabase::Connector::connect() { } candidateDatabase = client->api->createDatabase(clusterFilePath.c_str()); - tr = candidateDatabase->createTransaction(); - connectionFuture = mapThreadFuture(tr->getReadVersion(), [this](ErrorOr v) { - // If the version attempt returns an error, we regard that as a connection (except operation_cancelled) - if(v.isError() && v.getError().code() == error_code_operation_cancelled) { - return ErrorOr(v.getError()); - } - else { - return ErrorOr(Void()); + if(client->external) { + connectionFuture = candidateDatabase.castTo()->onReady(); + } + else { + connectionFuture = ThreadFuture(Void()); + } + + connectionFuture = flatMapThreadFuture(connectionFuture, [this](ErrorOr ready) { + if(ready.isError()) { + return ErrorOr>(ready.getError()); } + + tr = candidateDatabase->createTransaction(); + return ErrorOr>(mapThreadFuture(tr->getReadVersion(), [this](ErrorOr v) { + // If the version attempt returns an error, we regard that as a connection (except operation_cancelled) + if(v.isError() && v.getError().code() == error_code_operation_cancelled) { + return ErrorOr(v.getError()); + } + else { + return ErrorOr(Void()); + } + })); }); + int userParam; connectionFuture.callOrSetAsCallback(this, userParam, 0); } diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index 61c8be7c85..05edcf3687 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -159,9 +159,12 @@ private: class DLDatabase : public IDatabase, ThreadSafeReferenceCounted { public: - DLDatabase(Reference api, FdbCApi::FDBDatabase *db) : api(api), db(db) {} + DLDatabase(Reference api, FdbCApi::FDBDatabase *db) : api(api), db(db), ready(Void()) {} + DLDatabase(Reference api, ThreadFuture dbFuture); ~DLDatabase() { api->databaseDestroy(db); } + ThreadFuture onReady(); + Reference createTransaction(); void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()); @@ -170,7 +173,8 @@ public: private: const Reference api; - FdbCApi::FDBDatabase* const db; + FdbCApi::FDBDatabase* db; // Always set if API version >= 610, otherwise guaranteed to be set when onReady future is set + ThreadFuture ready; }; class DLApi : public IClientApi { @@ -186,7 +190,7 @@ public: void stopNetwork(); Reference createDatabase(const char *clusterFilePath); - ThreadFuture> createDatabase609(const char *clusterFilePath); // legacy database creation + Reference createDatabase609(const char *clusterFilePath); // legacy database creation void addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParameter); @@ -328,7 +332,6 @@ private: Reference db; const Reference>> dbVar; - ThreadFuture> dbFuture; ThreadFuture changed; bool cancelled; From e9ffe09b8b1522a0ef6cde948c1cb92f78d12e61 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Mon, 14 Jan 2019 10:55:28 -0800 Subject: [PATCH 019/226] Go bindings reported the wrong required version when loading an incompatible version of fdb_c. --- bindings/go/src/fdb/fdb.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index 76cf5ac67e..54992f902a 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -138,7 +138,7 @@ func APIVersion(version int) error { if e == 2203 { maxSupportedVersion := C.fdb_get_max_api_version() if headerVersion > int(maxSupportedVersion) { - return fmt.Errorf("This version of the FoundationDB Go binding is not supported by the installed FoundationDB C library. The binding requires a library that supports API version %d, but the installed library supports a maximum version of %d.", version, maxSupportedVersion) + return fmt.Errorf("This version of the FoundationDB Go binding is not supported by the installed FoundationDB C library. The binding requires a library that supports API version %d, but the installed library supports a maximum version of %d.", headerVersion, maxSupportedVersion) } return fmt.Errorf("API version %d is not supported by the installed FoundationDB C library.", version) } From 8e05e95045ac366167533a2bfe72f78f567b1617 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 18 Jan 2019 16:18:34 -0800 Subject: [PATCH 020/226] Added the ability to configure the latency band settings by setting a special key in \xff keyspace. --- fdbcli/fdbcli.actor.cpp | 2 +- fdbclient/ManagementAPI.actor.cpp | 185 +++++++++++++------------- fdbclient/ManagementAPI.h | 2 +- fdbclient/Schemas.cpp | 45 ++++++- fdbclient/Schemas.h | 3 +- fdbclient/SystemData.cpp | 3 + fdbclient/SystemData.h | 3 + fdbserver/ClusterController.actor.cpp | 38 ++++++ fdbserver/LatencyBandConfig.cpp | 120 +++++++++++++++++ fdbserver/LatencyBandConfig.h | 106 +++++++++++++++ fdbserver/MasterProxyServer.actor.cpp | 46 +++++-- fdbserver/ServerDBInfo.h | 4 +- fdbserver/Status.actor.cpp | 49 ++++--- fdbserver/fdbserver.vcxproj | 4 +- fdbserver/fdbserver.vcxproj.filters | 5 +- fdbserver/storageserver.actor.cpp | 44 ++++-- flow/Stats.h | 45 ++++--- 17 files changed, 551 insertions(+), 153 deletions(-) create mode 100644 fdbserver/LatencyBandConfig.cpp create mode 100644 fdbserver/LatencyBandConfig.h diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index 61058409af..dbd5a94c57 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -1650,7 +1650,7 @@ ACTOR Future fileConfigure(Database db, std::string filePath, bool isNewDa StatusObject configJSON = config.get_obj(); json_spirit::mValue schema; - if(!json_spirit::read_string( JSONSchemas::configurationSchema.toString(), schema )) { + if(!json_spirit::read_string( JSONSchemas::clusterConfigurationSchema.toString(), schema )) { ASSERT(false); } diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 86eb89ac09..67a430d634 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1553,120 +1553,121 @@ void schemaCoverage( std::string const& spath, bool covered ) { } } -bool schemaMatch( StatusObject const schema, StatusObject const result, std::string& errorStr, Severity sev, bool checkCoverage, std::string path, std::string schema_path ) { +bool schemaMatch( json_spirit::mValue const& schemaValue, json_spirit::mValue const& resultValue, std::string& errorStr, Severity sev, bool checkCoverage, std::string path, std::string schemaPath ) { // Returns true if everything in `result` is permitted by `schema` - - // Really this should recurse on "values" rather than "objects"? - bool ok = true; try { - for(auto& rkv : result) { - auto& key = rkv.first; - auto& rv = rkv.second; - std::string kpath = path + "." + key; - std::string spath = schema_path + "." + key; + if(normJSONType(schemaValue.type()) != normJSONType(resultValue.type())) { + errorStr += format("ERROR: Incorrect value type for key `%s'\n", path.c_str()); + TraceEvent(sev, "SchemaMismatch").detail("Path", path).detail("SchemaType", schemaValue.type()).detail("ValueType", resultValue.type()); + return false; + } - if(checkCoverage) schemaCoverage(spath); + if(resultValue.type() == json_spirit::obj_type) { + auto& result = resultValue.get_obj(); + auto& schema = schemaValue.get_obj(); - if (!schema.count(key)) { - errorStr += format("ERROR: Unknown key `%s'\n", kpath.c_str()); - TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaPath", spath); - ok = false; - continue; - } - auto& sv = schema.at(key); + for(auto& rkv : result) { + auto& key = rkv.first; + auto& rv = rkv.second; + std::string kpath = path + "." + key; + std::string spath = schemaPath + "." + key; - if (sv.type() == json_spirit::obj_type && sv.get_obj().count("$enum")) { - auto& enum_values = sv.get_obj().at("$enum").get_array(); + if(checkCoverage) { + schemaCoverage(spath); + } - bool any_match = false; - for(auto& enum_item : enum_values) - if (enum_item == rv) { - any_match = true; - if(checkCoverage) schemaCoverage(spath + ".$enum." + enum_item.get_str()); - break; + if(!schema.count(key)) { + errorStr += format("ERROR: Unknown key `%s'\n", kpath.c_str()); + TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaPath", spath); + ok = false; + continue; + } + auto& sv = schema.at(key); + + if(sv.type() == json_spirit::obj_type && sv.get_obj().count("$enum")) { + auto& enum_values = sv.get_obj().at("$enum").get_array(); + + bool any_match = false; + for(auto& enum_item : enum_values) + if(enum_item == rv) { + any_match = true; + if(checkCoverage) { + schemaCoverage(spath + ".$enum." + enum_item.get_str()); + } + break; + } + if(!any_match) { + errorStr += format("ERROR: Unknown value `%s' for key `%s'\n", json_spirit::write_string(rv).c_str(), kpath.c_str()); + TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaEnumItems", enum_values.size()).detail("Value", json_spirit::write_string(rv)); + if(checkCoverage) { + schemaCoverage(spath + ".$enum." + json_spirit::write_string(rv)); + } + ok = false; } - if (!any_match) { - errorStr += format("ERROR: Unknown value `%s' for key `%s'\n", json_spirit::write_string(rv).c_str(), kpath.c_str()); - TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaEnumItems", enum_values.size()).detail("Value", json_spirit::write_string(rv)); - if(checkCoverage) schemaCoverage(spath + ".$enum." + json_spirit::write_string(rv)); - ok = false; - } - } else if (sv.type() == json_spirit::obj_type && sv.get_obj().count("$map")) { - if (rv.type() != json_spirit::obj_type) { - errorStr += format("ERROR: Expected an object as the value for key `%s'\n", kpath.c_str()); - TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaType", sv.type()).detail("ValueType", rv.type()); - ok = false; - continue; - } - if(sv.get_obj().at("$map").type() != json_spirit::obj_type) { - continue; - } - auto& schema_obj = sv.get_obj().at("$map").get_obj(); - auto& value_obj = rv.get_obj(); - - if(checkCoverage) schemaCoverage(spath + ".$map"); - - for(auto& value_pair : value_obj) { - auto vpath = kpath + "[" + value_pair.first + "]"; - auto upath = spath + ".$map"; - if (value_pair.second.type() != json_spirit::obj_type) { - errorStr += format("ERROR: Expected an object for `%s'\n", vpath.c_str()); - TraceEvent(sev, "SchemaMismatch").detail("Path", vpath).detail("ValueType", value_pair.second.type()); + } else if(sv.type() == json_spirit::obj_type && sv.get_obj().count("$map")) { + if(rv.type() != json_spirit::obj_type) { + errorStr += format("ERROR: Expected an object as the value for key `%s'\n", kpath.c_str()); + TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaType", sv.type()).detail("ValueType", rv.type()); ok = false; continue; } - if (!schemaMatch(schema_obj, value_pair.second.get_obj(), errorStr, sev, checkCoverage, vpath, upath)) - ok = false; - } - } else { - // The schema entry isn't an operator, so it asserts a type and (depending on the type) recursive schema definition - if (normJSONType(sv.type()) != normJSONType(rv.type())) { - errorStr += format("ERROR: Incorrect value type for key `%s'\n", kpath.c_str()); - TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaType", sv.type()).detail("ValueType", rv.type()); - ok = false; - continue; - } - if (rv.type() == json_spirit::array_type) { - auto& value_array = rv.get_array(); - auto& schema_array = sv.get_array(); - if (!schema_array.size()) { - // An empty schema array means that the value array is required to be empty - if (value_array.size()) { - errorStr += format("ERROR: Expected an empty array for key `%s'\n", kpath.c_str()); - TraceEvent(sev, "SchemaMismatch").detail("Path", kpath).detail("SchemaSize", schema_array.size()).detail("ValueSize", value_array.size()); + if(sv.get_obj().at("$map").type() != json_spirit::obj_type) { + continue; + } + auto& schemaVal = sv.get_obj().at("$map"); + auto& valueObj = rv.get_obj(); + + if(checkCoverage) { + schemaCoverage(spath + ".$map"); + } + + for(auto& valuePair : valueObj) { + auto vpath = kpath + "[" + valuePair.first + "]"; + auto upath = spath + ".$map"; + if (valuePair.second.type() != json_spirit::obj_type) { + errorStr += format("ERROR: Expected an object for `%s'\n", vpath.c_str()); + TraceEvent(sev, "SchemaMismatch").detail("Path", vpath).detail("ValueType", valuePair.second.type()); ok = false; continue; } - } else if (schema_array.size() == 1 && schema_array[0].type() == json_spirit::obj_type) { - // A one item schema array means that all items in the value must match the first item in the schema - auto& schema_obj = schema_array[0].get_obj(); - int index = 0; - for(auto &value_item : value_array) { - if (value_item.type() != json_spirit::obj_type) { - errorStr += format("ERROR: Expected all array elements to be objects for key `%s'\n", kpath.c_str()); - TraceEvent(sev, "SchemaMismatch").detail("Path", kpath + format("[%d]",index)).detail("ValueType", value_item.type()); - ok = false; - continue; - } - if (!schemaMatch(schema_obj, value_item.get_obj(), errorStr, sev, checkCoverage, kpath + format("[%d]", index), spath + "[0]")) - ok = false; - index++; + if(!schemaMatch(schemaVal, valuePair.second, errorStr, sev, checkCoverage, vpath, upath)) { + ok = false; } - } else - ASSERT(false); // Schema doesn't make sense - } else if (rv.type() == json_spirit::obj_type) { - auto& schema_obj = sv.get_obj(); - auto& value_obj = rv.get_obj(); - if (!schemaMatch(schema_obj, value_obj, errorStr, sev, checkCoverage, kpath, spath)) + } + } else { + if(!schemaMatch(sv, rv, errorStr, sev, checkCoverage, kpath, spath)) { ok = false; + } } } + } else if(resultValue.type() == json_spirit::array_type) { + auto& valueArray = resultValue.get_array(); + auto& schemaArray = schemaValue.get_array(); + if(!schemaArray.size()) { + // An empty schema array means that the value array is required to be empty + if(valueArray.size()) { + errorStr += format("ERROR: Expected an empty array for key `%s'\n", path.c_str()); + TraceEvent(sev, "SchemaMismatch").detail("Path", path).detail("SchemaSize", schemaArray.size()).detail("ValueSize", valueArray.size()); + return false; + } + } else if(schemaArray.size() == 1) { + // A one item schema array means that all items in the value must match the first item in the schema + int index = 0; + for(auto &valueItem : valueArray) { + if(!schemaMatch(schemaArray[0], valueItem, errorStr, sev, checkCoverage, path + format("[%d]", index), schemaPath + "[0]")) { + ok = false; + } + index++; + } + } else { + ASSERT(false); // Schema doesn't make sense + } } return ok; } catch (std::exception& e) { - TraceEvent(SevError, "SchemaMatchException").detail("What", e.what()).detail("Path", path).detail("SchemaPath", schema_path); + TraceEvent(SevError, "SchemaMatchException").detail("What", e.what()).detail("Path", path).detail("SchemaPath", schemaPath); throw unknown_error(); } } diff --git a/fdbclient/ManagementAPI.h b/fdbclient/ManagementAPI.h index ce36d60f88..bebe2c1bdb 100644 --- a/fdbclient/ManagementAPI.h +++ b/fdbclient/ManagementAPI.h @@ -177,6 +177,6 @@ Future waitForPrimaryDC( Database const& cx, StringRef const& dcId ); Future> getCoordinators( Database const& cx ); void schemaCoverage( std::string const& spath, bool covered=true ); -bool schemaMatch( StatusObject const schema, StatusObject const result, std::string& errorStr, Severity sev=SevError, bool checkCoverage=false, std::string path = std::string(), std::string schema_path = std::string() ); +bool schemaMatch( json_spirit::mValue const& schema, json_spirit::mValue const& result, std::string& errorStr, Severity sev=SevError, bool checkCoverage=false, std::string path = std::string(), std::string schema_path = std::string() ); #endif diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index ddea34555d..13b90b4b14 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -124,6 +124,27 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "hz":0.0, "counter":0, "roughness":0.0 + }, + "grv_latency_bands":{ + "0.01": { + "hz":0.0, + "counter":0, + "roughness":0.0 + } + }, + "read_latency_bands":{ + "0.01": { + "hz":0.0, + "counter":0, + "roughness":0.0 + } + }, + "commit_latency_bands":{ + "0.01": { + "hz":0.0, + "counter":0, + "roughness":0.0 + } } } ], @@ -603,7 +624,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( } })statusSchema"); -const KeyRef JSONSchemas::configurationSchema = LiteralStringRef(R"configSchema( +const KeyRef JSONSchemas::clusterConfigurationSchema = LiteralStringRef(R"configSchema( { "create":{ "$enum":[ @@ -670,3 +691,25 @@ const KeyRef JSONSchemas::configurationSchema = LiteralStringRef(R"configSchema( "auto_logs":3, "proxies":5 })configSchema"); + +const KeyRef JSONSchemas::latencyBandConfigurationSchema = LiteralStringRef(R"configSchema( +{ + "get_read_version":{ + "bands":[ + 0.0 + ] + }, + "read":{ + "bands":[ + 0.0 + ], + "max_key_selector_offset":0, + "max_read_bytes":0 + }, + "commit":{ + "bands":[ + 0.0 + ], + "max_commit_bytes":0 + } +})configSchema"); diff --git a/fdbclient/Schemas.h b/fdbclient/Schemas.h index 8070fceac3..d71a88f7ba 100644 --- a/fdbclient/Schemas.h +++ b/fdbclient/Schemas.h @@ -28,7 +28,8 @@ struct JSONSchemas { static const KeyRef statusSchema; - static const KeyRef configurationSchema; + static const KeyRef clusterConfigurationSchema; + static const KeyRef latencyBandConfigurationSchema; }; #endif /* FDBCLIENT_SCHEMAS_H */ diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 2952ed3e75..be5faf661a 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -434,6 +434,9 @@ const KeyRangeRef fdbClientInfoPrefixRange(LiteralStringRef("\xff\x02/fdbClientI const KeyRef fdbClientInfoTxnSampleRate = LiteralStringRef("\xff\x02/fdbClientInfo/client_txn_sample_rate/"); const KeyRef fdbClientInfoTxnSizeLimit = LiteralStringRef("\xff\x02/fdbClientInfo/client_txn_size_limit/"); +// Request latency measurement key +const KeyRef latencyBandConfigKey = LiteralStringRef("\xff\x02/latencyBandConfig"); + // Keyspace to maintain wall clock to version map const KeyRangeRef timeKeeperPrefixRange(LiteralStringRef("\xff\x02/timeKeeper/map/"), LiteralStringRef("\xff\x02/timeKeeper/map0")); const KeyRef timeKeeperVersionKey = LiteralStringRef("\xff\x02/timeKeeper/version"); diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 4b50f6ff3f..6832948831 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -212,6 +212,9 @@ extern const KeyRangeRef fdbClientInfoPrefixRange; extern const KeyRef fdbClientInfoTxnSampleRate; extern const KeyRef fdbClientInfoTxnSizeLimit; +// Request latency measurement key +extern const KeyRef latencyBandConfigKey; + // Keyspace to maintain wall clock to version map extern const KeyRangeRef timeKeeperPrefixRange; extern const KeyRef timeKeeperVersionKey; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 8e7de1247f..c9707adde4 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -31,6 +31,7 @@ #include "ClusterRecruitmentInterface.h" #include "ServerDBInfo.h" #include "Status.h" +#include "fdbserver/LatencyBandConfig.h" #include #include "fdbclient/DatabaseContext.h" #include "RecoveryState.h" @@ -1960,6 +1961,42 @@ ACTOR Future monitorProcessClasses(ClusterControllerData *self) { } } +ACTOR Future monitorServerInfoConfig(ClusterControllerData::DBInfo* db) { + loop { + state ReadYourWritesTransaction tr(db->db); + loop { + try { + tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); + tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE); + + Optional configVal = wait(tr.get(latencyBandConfigKey)); + Optional config; + if(configVal.present()) { + config = LatencyBandConfig::parse(configVal.get()); + } + + ServerDBInfo serverInfo = db->serverInfo->get(); + if(config != serverInfo.latencyBandConfig) { + TraceEvent("LatencyBandConfigChanged").detail("Present", config.present()); + serverInfo.id = g_random->randomUniqueID(); + serverInfo.latencyBandConfig = config; + db->serverInfo->set(serverInfo); + } + + state Future configChangeFuture = tr.watch(latencyBandConfigKey); + Void _ = wait(tr.commit()); + Void _ = wait(configChangeFuture); + + break; + } + catch (Error &e) { + Void _ = wait(tr.onError(e)); + } + } + } +} + ACTOR Future monitorClientTxnInfoConfigs(ClusterControllerData::DBInfo* db) { loop { state ReadYourWritesTransaction tr(db->db); @@ -2183,6 +2220,7 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, addActor.send( statusServer( interf.clientInterface.databaseStatus.getFuture(), &self, coordinators)); addActor.send( timeKeeper(&self) ); addActor.send( monitorProcessClasses(&self) ); + addActor.send( monitorServerInfoConfig(&self.db) ); addActor.send( monitorClientTxnInfoConfigs(&self.db) ); addActor.send( updatedChangingDatacenters(&self) ); addActor.send( updatedChangedDatacenters(&self) ); diff --git a/fdbserver/LatencyBandConfig.cpp b/fdbserver/LatencyBandConfig.cpp new file mode 100644 index 0000000000..9f84f24ded --- /dev/null +++ b/fdbserver/LatencyBandConfig.cpp @@ -0,0 +1,120 @@ +/* + * LatencyBandConfig.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbserver/LatencyBandConfig.h" + +#include "fdbclient/ManagementAPI.h" +#include "fdbclient/Schemas.h" + +bool operator==(LatencyBandConfig::RequestConfig const& lhs, LatencyBandConfig::RequestConfig const& rhs) { + return typeid(lhs) == typeid(rhs) && lhs.isEqual(rhs); +} + +bool operator!=(LatencyBandConfig::RequestConfig const& lhs, LatencyBandConfig::RequestConfig const& rhs) { + return !(lhs == rhs); +} + +bool LatencyBandConfig::RequestConfig::isEqual(RequestConfig const& r) const { + return bands == r.bands; +}; + +void LatencyBandConfig::RequestConfig::fromJson(JSONDoc json) { + json_spirit::mArray bandsArray; + if(json.get("bands", bandsArray)) { + for(auto b : bandsArray) { + bands.insert(b.get_real()); + } + } +} + +void LatencyBandConfig::ReadConfig::fromJson(JSONDoc json) { + RequestConfig::fromJson(json); + + int value; + if(json.get("max_read_bytes", value)) { + maxReadBytes = value; + } + if(json.get("max_key_selector_offset", value)) { + maxKeySelectorOffset = value; + } +} + +bool LatencyBandConfig::ReadConfig::isEqual(RequestConfig const& r) const { + ReadConfig const& other = static_cast(r); + return RequestConfig::isEqual(r) && maxReadBytes == other.maxReadBytes && maxKeySelectorOffset == other.maxKeySelectorOffset; +} + +void LatencyBandConfig::CommitConfig::fromJson(JSONDoc json) { + RequestConfig::fromJson(json); + + int value; + if(json.get("max_commit_bytes", value)) { + maxCommitBytes = value; + } +} + +bool LatencyBandConfig::CommitConfig::isEqual(RequestConfig const& r) const { + CommitConfig const& other = static_cast(r); + return RequestConfig::isEqual(r) && maxCommitBytes == other.maxCommitBytes; +} + +Optional LatencyBandConfig::parse(ValueRef configurationString) { + Optional config; + if(configurationString.size() == 0) { + return config; + } + + json_spirit::mValue parsedConfig; + if(!json_spirit::read_string(configurationString.toString(), parsedConfig)) { + TraceEvent(SevWarnAlways, "InvalidLatencyBandConfiguration").detail("Reason", "InvalidJSON").detail("Configuration", printable(configurationString)); + return config; + } + + json_spirit::mObject configJson = parsedConfig.get_obj(); + + json_spirit::mValue schema; + if(!json_spirit::read_string(JSONSchemas::latencyBandConfigurationSchema.toString(), schema)) { + ASSERT(false); + } + + std::string errorStr; + if(!schemaMatch(schema.get_obj(), configJson, errorStr)) { + TraceEvent(SevWarnAlways, "InvalidLatencyBandConfiguration").detail("Reason", "SchemaMismatch").detail("Configuration", printable(configurationString)).detail("Error", errorStr); + return config; + } + + JSONDoc configDoc(configJson); + + config = LatencyBandConfig(); + + config.get().grvConfig.fromJson(configDoc.subDoc("get_read_version")); + config.get().readConfig.fromJson(configDoc.subDoc("read")); + config.get().commitConfig.fromJson(configDoc.subDoc("commit")); + + return config; +} + +bool LatencyBandConfig::operator==(LatencyBandConfig const& r) const { + return grvConfig == r.grvConfig && readConfig == r.readConfig && commitConfig == r.commitConfig; +} + +bool LatencyBandConfig::operator!=(LatencyBandConfig const& r) const { + return !(*this == r); +} diff --git a/fdbserver/LatencyBandConfig.h b/fdbserver/LatencyBandConfig.h new file mode 100644 index 0000000000..f355006dd9 --- /dev/null +++ b/fdbserver/LatencyBandConfig.h @@ -0,0 +1,106 @@ +/* + * LatencyBandConfig.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FDBSERVER_LATENCYBANDCONFIG_H +#define FDBSERVER_LATENCYBANDCONFIG_H +#pragma once + +#include "fdbclient/FDBTypes.h" +#include "fdbclient/JSONDoc.h" + +struct LatencyBandConfig { + struct RequestConfig { + std::set bands; + + friend bool operator==(RequestConfig const& lhs, RequestConfig const& rhs); + friend bool operator!=(RequestConfig const& lhs, RequestConfig const& rhs); + + virtual void fromJson(JSONDoc json); + + template + void serialize(Ar& ar) { + uint64_t bandsSize = (uint64_t)bands.size(); + ar & bandsSize; + + if(ar.isDeserializing) { + double band; + for(uint64_t i = 0; i < bandsSize; i++) { + ar & band; + bands.insert(band); + } + } + else { + for(double band : bands) { + ar & band; + } + } + } + + protected: + virtual bool isEqual(RequestConfig const& r) const; + }; + + struct GrvConfig : RequestConfig {}; + + struct ReadConfig : RequestConfig { + Optional maxReadBytes; + Optional maxKeySelectorOffset; + + virtual void fromJson(JSONDoc json); + + template + void serialize(Ar& ar) { + ar & *(RequestConfig*)this & maxReadBytes & maxKeySelectorOffset; + } + + protected: + virtual bool isEqual(RequestConfig const& r) const; + }; + + struct CommitConfig : RequestConfig { + Optional maxCommitBytes; + + virtual void fromJson(JSONDoc json); + + template + void serialize(Ar& ar) { + ar & *(RequestConfig*)this & maxCommitBytes; + } + + protected: + virtual bool isEqual(RequestConfig const& r) const; + }; + + GrvConfig grvConfig; + ReadConfig readConfig; + CommitConfig commitConfig; + + template + void serialize(Ar& ar) { + ar & grvConfig & readConfig & commitConfig; + } + + static Optional parse(ValueRef configurationString); + + bool operator==(LatencyBandConfig const& r) const; + bool operator!=(LatencyBandConfig const& r) const; +}; + +#endif diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index c85b409239..e573d74646 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -38,6 +38,7 @@ #include "flow/Stats.h" #include "ApplyMetadataMutation.h" #include "RecoveryState.h" +#include "fdbserver/LatencyBandConfig.h" #include "fdbclient/Atomic.h" #include "flow/TDMetric.actor.h" @@ -72,16 +73,6 @@ struct ProxyStats { specialCounter(cc, "CommittedVersion", [pCommittedVersion](){ return pCommittedVersion->get(); }); specialCounter(cc, "CommitBatchesMemBytesCount", [commitBatchesMemBytesCountPtr]() { return *commitBatchesMemBytesCountPtr; }); logger = traceCounters("ProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ProxyMetrics"); - - commitLatencyBands.addThreshold(0.001); - commitLatencyBands.addThreshold(0.01); - commitLatencyBands.addThreshold(0.1); - commitLatencyBands.addThreshold(1); - - grvLatencyBands.addThreshold(0.001); - grvLatencyBands.addThreshold(0.01); - grvLatencyBands.addThreshold(0.1); - grvLatencyBands.addThreshold(1); } }; @@ -222,6 +213,8 @@ struct ProxyCommitData { Version lastTxsPop; bool popRemoteTxs; + Optional latencyBandConfig; + //The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient. //When a tag related to a storage server does change, we empty out all of these vectors to signify they must be repopulated. //We do not repopulate them immediately to avoid a slow task. @@ -982,7 +975,10 @@ ACTOR Future commitBatch( } // TODO: filter if pipelined with large commit - self->stats.commitLatencyBands.addMeasurement(endTime - trs[t].requestTime, maxTransactionBytes > 1e6); + if(self->latencyBandConfig.present()) { + bool filter = maxTransactionBytes > self->latencyBandConfig.get().commitConfig.maxCommitBytes.orDefault(std::numeric_limits::max()); + self->stats.commitLatencyBands.addMeasurement(endTime - trs[t].requestTime, filter); + } } ++self->stats.commitBatchOut; @@ -1439,6 +1435,34 @@ ACTOR Future masterProxyServerCore( } commitData.logSystem->pop(commitData.lastTxsPop, txsTag, 0, tagLocalityRemoteLog); } + + Optional newLatencyBandConfig = db->get().latencyBandConfig; + + if(newLatencyBandConfig.present() != commitData.latencyBandConfig.present() + || (newLatencyBandConfig.present() && newLatencyBandConfig.get().grvConfig != commitData.latencyBandConfig.get().grvConfig)) + { + TraceEvent("LatencyBandGrvUpdatingConfig").detail("Present", newLatencyBandConfig.present()); + commitData.stats.grvLatencyBands.clearBands(); + if(newLatencyBandConfig.present()) { + for(auto band : newLatencyBandConfig.get().grvConfig.bands) { + commitData.stats.grvLatencyBands.addThreshold(band); + } + } + } + + if(newLatencyBandConfig.present() != commitData.latencyBandConfig.present() + || (newLatencyBandConfig.present() && newLatencyBandConfig.get().commitConfig != commitData.latencyBandConfig.get().commitConfig)) + { + TraceEvent("LatencyBandCommitUpdatingConfig").detail("Present", newLatencyBandConfig.present()); + commitData.stats.commitLatencyBands.clearBands(); + if(newLatencyBandConfig.present()) { + for(auto band : newLatencyBandConfig.get().commitConfig.bands) { + commitData.stats.commitLatencyBands.addThreshold(band); + } + } + } + + commitData.latencyBandConfig = newLatencyBandConfig; } when(Void _ = wait(onError)) {} when(std::pair, int> batchedRequests = waitNext(batchedCommits.getFuture())) { diff --git a/fdbserver/ServerDBInfo.h b/fdbserver/ServerDBInfo.h index 641a0fa3c2..7a0613f73e 100644 --- a/fdbserver/ServerDBInfo.h +++ b/fdbserver/ServerDBInfo.h @@ -26,6 +26,7 @@ #include "MasterInterface.h" #include "LogSystemConfig.h" #include "RecoveryState.h" +#include "LatencyBandConfig.h" struct ServerDBInfo { // This structure contains transient information which is broadcast to all workers for a database, @@ -44,6 +45,7 @@ struct ServerDBInfo { LocalityData myLocality; // (Not serialized) Locality information, if available, for the *local* process LogSystemConfig logSystemConfig; std::vector priorCommittedLogServers; // If !fullyRecovered and logSystemConfig refers to a new log system which may not have been committed to the coordinated state yet, then priorCommittedLogServers are the previous, fully committed generation which need to stay alive in case this recovery fails + Optional latencyBandConfig; ServerDBInfo() : recoveryCount(0), recoveryState(RecoveryState::UNINITIALIZED) {} explicit ServerDBInfo(StringRef const& dbName) : dbName(dbName), recoveryCount(0), recoveryState(RecoveryState::UNINITIALIZED) {} @@ -53,7 +55,7 @@ struct ServerDBInfo { template void serialize( Ar& ar ) { - ar & id & clusterInterface & client & master & resolvers & dbName & recoveryCount & masterLifetime & logSystemConfig & priorCommittedLogServers & recoveryState; + ar & id & clusterInterface & client & master & resolvers & dbName & recoveryCount & masterLifetime & logSystemConfig & priorCommittedLogServers & recoveryState & latencyBandConfig; } }; diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index e5b769858d..1b5cb791bf 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -434,26 +434,27 @@ struct RolesInfo { obj["mutation_bytes"] = StatusCounter(metrics.getValue("MutationBytes")).getStatus(); obj["mutations"] = StatusCounter(metrics.getValue("Mutations")).getStatus(); - std::string regularPrefix = "ReadLatency"; - std::string filteredPrefix = "FilteredReadLatency"; + std::string latencyBandPrefix = "ReadLatency"; JsonBuilderObject latency; std::map bands; + bool found = false; for(auto itr = metrics.begin(); itr != metrics.end(); ++itr) { - bool regularMeasurement = itr->first.substr(0, regularPrefix.size()) == regularPrefix; - if(!regularMeasurement && itr->first.substr(0, filteredPrefix.size()) != filteredPrefix) { - continue; - } + if(itr->first.substr(0, latencyBandPrefix.size()) == latencyBandPrefix) { + found = true; + std::string band = itr->first.substr(latencyBandPrefix.size()); + latency[band] = StatusCounter(itr->second).getCounter(); + } - std::string band = itr->first.substr(regularMeasurement ? regularPrefix.size() : filteredPrefix.size()); - //bands[band][regularMeasurement ? "counted" : "filtered"] = StatusCounter(itr->second).getCounter(); - latency[band] = StatusCounter(itr->second).getCounter(); + std::string value; + if(metrics.tryGetValue("Filtered" + latencyBandPrefix, value)) { + latency["filtered"] = StatusCounter(value).getCounter(); + } + } + if(found) { + obj["read_latency_bands"] = latency; } - /*for(auto itr : bands) { - latency[itr.first] = itr.second; - }*/ - obj["read_latency_bands"] = latency; Version version = parseInt64(metrics.getValue("Version")); Version durableVersion = parseInt64(metrics.getValue("DurableVersion")); @@ -531,19 +532,37 @@ struct RolesInfo { JsonBuilderObject grvLatency; JsonBuilderObject commitLatency; + bool grvFound = false; + bool commitFound = false; for(auto itr = metrics.begin(); itr != metrics.end(); ++itr) { if(itr->first.substr(0, grvPrefix.size()) == grvPrefix) { + grvFound = true; std::string band = itr->first.substr(grvPrefix.size()); grvLatency[band] = StatusCounter(itr->second).getCounter(); } else if(itr->first.substr(0, commitPrefix.size()) == commitPrefix) { + commitFound = true; std::string band = itr->first.substr(commitPrefix.size()); commitLatency[band] = StatusCounter(itr->second).getCounter(); } } - obj["grv_latency_bands"] = grvLatency; - obj["commit_latency_bands"] = commitLatency; + if(grvFound) { + std::string value; + if(metrics.tryGetValue("Filtered" + grvPrefix, value)) { + grvLatency["filtered"] = StatusCounter(value).getCounter(); + } + + obj["grv_latency_bands"] = grvLatency; + } + if(commitFound) { + std::string value; + if(metrics.tryGetValue("Filtered" + commitPrefix, value)) { + commitLatency["filtered"] = StatusCounter(value).getCounter(); + } + + obj["commit_latency_bands"] = commitLatency; + } } catch (Error &e) { if(e.code() != error_code_attribute_not_found) { throw e; diff --git a/fdbserver/fdbserver.vcxproj b/fdbserver/fdbserver.vcxproj index 368e6b2f61..5383e24f17 100644 --- a/fdbserver/fdbserver.vcxproj +++ b/fdbserver/fdbserver.vcxproj @@ -1,4 +1,4 @@ - + @@ -53,6 +53,7 @@ + @@ -154,6 +155,7 @@ + diff --git a/fdbserver/fdbserver.vcxproj.filters b/fdbserver/fdbserver.vcxproj.filters index 55c7e15281..c254c4789e 100644 --- a/fdbserver/fdbserver.vcxproj.filters +++ b/fdbserver/fdbserver.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -292,7 +292,6 @@ sqlite - workloads @@ -300,6 +299,7 @@ workloads + @@ -362,6 +362,7 @@ + diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index c7e0d9565d..d11fb6ce72 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -47,6 +47,7 @@ #include "LogSystem.h" #include "RecoveryState.h" #include "LogProtocolMessage.h" +#include "fdbserver/LatencyBandConfig.h" #include "flow/TDMetric.actor.h" using std::make_pair; @@ -55,9 +56,6 @@ using std::make_pair; #define SHORT_CIRCUT_ACTUAL_STORAGE 0 -int64_t MAX_RESULT_SIZE = 1e4; -int64_t MAX_SELECTOR_OFFSET = 1e2; - struct StorageServer; class ValueOrClearToRef { public: @@ -406,6 +404,8 @@ public: return val; } + Optional latencyBandConfig; + struct Counters { CounterCollection cc; Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried, watchQueries; @@ -465,12 +465,6 @@ public: specialCounter(cc, "KvstoreBytesFree", [self](){ return self->storage.getStorageBytes().free; }); specialCounter(cc, "KvstoreBytesAvailable", [self](){ return self->storage.getStorageBytes().available; }); specialCounter(cc, "KvstoreBytesTotal", [self](){ return self->storage.getStorageBytes().total; }); - - readLatencyBands.addThreshold(0.0001); - readLatencyBands.addThreshold(0.001); - readLatencyBands.addThreshold(0.01); - readLatencyBands.addThreshold(0.1); - readLatencyBands.addThreshold(1); } } counters; @@ -790,7 +784,10 @@ ACTOR Future getValueQ( StorageServer* data, GetValueRequest req ) { ++data->counters.finishedQueries; --data->readQueueSizeMetric; - data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > MAX_RESULT_SIZE); + if(data->latencyBandConfig.present()) { + int maxReadBytes = data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits::max()); + data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > maxReadBytes); + } return Void(); }; @@ -1327,7 +1324,12 @@ ACTOR Future getKeyValues( StorageServer* data, GetKeyValuesRequest req ) ++data->counters.finishedQueries; --data->readQueueSizeMetric; - data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > MAX_RESULT_SIZE || abs(req.begin.offset) > MAX_SELECTOR_OFFSET || abs(req.end.offset) > MAX_SELECTOR_OFFSET); + + if(data->latencyBandConfig.present()) { + int maxReadBytes = data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits::max()); + int maxSelectorOffset = data->latencyBandConfig.get().readConfig.maxKeySelectorOffset.orDefault(std::numeric_limits::max()); + data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > maxReadBytes || abs(req.begin.offset) > maxSelectorOffset || abs(req.end.offset) > maxSelectorOffset); + } return Void(); } @@ -1378,7 +1380,11 @@ ACTOR Future getKey( StorageServer* data, GetKeyRequest req ) { ++data->counters.finishedQueries; --data->readQueueSizeMetric; - data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > MAX_RESULT_SIZE || abs(req.sel.offset > MAX_SELECTOR_OFFSET)); + if(data->latencyBandConfig.present()) { + int maxReadBytes = data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits::max()); + int maxSelectorOffset = data->latencyBandConfig.get().readConfig.maxKeySelectorOffset.orDefault(std::numeric_limits::max()); + data->counters.readLatencyBands.addMeasurement(timer()-req.requestTime, resultSize > maxReadBytes || abs(req.sel.offset) > maxSelectorOffset); + } return Void(); } @@ -3303,6 +3309,20 @@ ACTOR Future storageServerCore( StorageServer* self, StorageServerInterfac doUpdate = Void(); } } + + Optional newLatencyBandConfig = self->db->get().latencyBandConfig; + if(newLatencyBandConfig.present() != self->latencyBandConfig.present() + || (newLatencyBandConfig.present() && newLatencyBandConfig.get().readConfig != self->latencyBandConfig.get().readConfig)) + { + self->latencyBandConfig = newLatencyBandConfig; + self->counters.readLatencyBands.clearBands(); + TraceEvent("LatencyBandReadUpdatingConfig").detail("Present", newLatencyBandConfig.present()); + if(self->latencyBandConfig.present()) { + for(auto band : self->latencyBandConfig.get().readConfig.bands) { + self->counters.readLatencyBands.addThreshold(band); + } + } + } } when( GetValueRequest req = waitNext(ssi.getValue.getFuture()) ) { // Warning: This code is executed at extremely high priority (TaskLoadBalancedEndpoint), so downgrade before doing real work diff --git a/flow/Stats.h b/flow/Stats.h index bf19c9df8a..d088093b85 100644 --- a/flow/Stats.h +++ b/flow/Stats.h @@ -121,39 +121,54 @@ static void specialCounter(CounterCollection& collection, std::string const& nam class LatencyBands { public: - LatencyBands(std::string name, CounterCollection &cc) : name(name), cc(cc) { - addThreshold(std::numeric_limits::infinity()); - } + LatencyBands(std::string name, CounterCollection &cc) : name(name), cc(cc), filteredCount(nullptr) {} void addThreshold(double value) { if(value > 0 && bands.count(value) == 0) { - bands.insert(std::make_pair(value, new Counter(format("%s%f", name.c_str(), value), cc))); - filteredBands.insert(std::make_pair(value, new Counter(format("Filtered%s%f", name.c_str(), value), cc))); + if(bands.size() == 0) { + filteredCount = new Counter(format("Filtered%s", name.c_str()), cc); + insertBand(std::numeric_limits::infinity()); + } + + insertBand(value); } } void addMeasurement(double measurement, bool filtered=false) { - const auto &targetBands = filtered ? filteredBands : bands; - auto itr = targetBands.upper_bound(measurement); - if(itr == targetBands.end()) { - fprintf(stderr, "Can't add measurement %lf\n", measurement); + if(filtered && filteredCount) { + ++(*filteredCount); } - ASSERT(itr != targetBands.end()); - ++(*itr->second); + else if(bands.size() > 0) { + auto itr = bands.upper_bound(measurement); + ASSERT(itr != bands.end()); + ++(*itr->second); + } + } + + void clearBands() { + for(auto itr : bands) { + delete itr.second; + } + + bands.clear(); + + delete filteredCount; } ~LatencyBands() { - for(auto itr = bands.begin(); itr != bands.end(); ++itr) { - delete itr->second; - } + clearBands(); } private: std::map bands; - std::map filteredBands; + Counter *filteredCount; std::string name; CounterCollection &cc; + + void insertBand(double value) { + bands.insert(std::make_pair(value, new Counter(format("%s%f", name.c_str(), value), cc))); + } }; From fb6e9c1b09d3d9d83fd66f93a9a6a636ade836b8 Mon Sep 17 00:00:00 2001 From: Tim Guggenmos Date: Mon, 21 Jan 2019 12:16:55 +0100 Subject: [PATCH 021/226] 'Directoris' -> 'Directories' --- packaging/docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packaging/docker/Dockerfile b/packaging/docker/Dockerfile index b5cd01ee00..fc584ee20c 100644 --- a/packaging/docker/Dockerfile +++ b/packaging/docker/Dockerfile @@ -50,7 +50,7 @@ COPY download_multiversion_libraries.bash scripts/ RUN wget $FDB_WEBSITE/downloads/$FDB_VERSION/linux/libfdb_c_$FDB_VERSION.so -O /usr/lib/libfdb_c.so && \ bash scripts/download_multiversion_libraries.bash $FDB_WEBSITE $FDB_ADDITIONAL_VERSIONS -# Set Up Runtime Scripts and Directoris +# Set Up Runtime Scripts and Directories COPY fdb.bash scripts/ COPY create_server_environment.bash scripts/ @@ -68,4 +68,4 @@ ENV FDB_CLUSTER_FILE /var/fdb/fdb.cluster ENV FDB_NETWORKING_MODE container ENV FDB_COORDINATOR "" ENV FDB_CLUSTER_FILE_CONTENTS "" -ENV FDB_PROCESS_CLASS unset \ No newline at end of file +ENV FDB_PROCESS_CLASS unset From 5a6f82a75b46a13948a079eb57d8189871710821 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 18 Jan 2019 15:40:59 -0800 Subject: [PATCH 022/226] bit_length python 2.6 compat --- bindings/python/fdb/tuple.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/bindings/python/fdb/tuple.py b/bindings/python/fdb/tuple.py index f9092e0b6a..6c1153f317 100644 --- a/bindings/python/fdb/tuple.py +++ b/bindings/python/fdb/tuple.py @@ -24,6 +24,7 @@ import ctypes import uuid import struct import math +import sys from bisect import bisect_left from fdb import six @@ -306,6 +307,14 @@ def _reduce_children(child_values): return bytes_list, version_pos +if sys.version_info < (2, 7): + def _bit_length(x): + return len(bin(x)) - 2 +else: + def _bit_length(x): + return x.bit_length() + + def _encode(value, nested=False): # returns [code][data] (code != 0xFF) # encoded values are self-terminating @@ -324,7 +333,7 @@ def _encode(value, nested=False): return b''.join([six.int2byte(INT_ZERO_CODE)]), -1 elif value > 0: if value >= _size_limits[-1]: - length = (value.bit_length() + 7) // 8 + length = (_bit_length(value) + 7) // 8 data = [six.int2byte(POS_INT_END), six.int2byte(length)] for i in _range(length - 1, -1, -1): data.append(six.int2byte((value >> (8 * i)) & 0xff)) @@ -334,7 +343,7 @@ def _encode(value, nested=False): return six.int2byte(INT_ZERO_CODE + n) + struct.pack(">Q", value)[-n:], -1 else: if -value >= _size_limits[-1]: - length = (value.bit_length() + 7) // 8 + length = (_bit_length(value) + 7) // 8 value += (1 << (length * 8)) - 1 data = [six.int2byte(NEG_INT_START), six.int2byte(length ^ 0xff)] for i in _range(length - 1, -1, -1): From 4bca5dad732e81f67ef97dc6061b889bbfe90115 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 22 Jan 2019 10:09:58 -0800 Subject: [PATCH 023/226] Fix _bit_length for 0 and negative numbers --- bindings/python/fdb/tuple.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bindings/python/fdb/tuple.py b/bindings/python/fdb/tuple.py index 6c1153f317..c05f1c2659 100644 --- a/bindings/python/fdb/tuple.py +++ b/bindings/python/fdb/tuple.py @@ -309,7 +309,9 @@ def _reduce_children(child_values): if sys.version_info < (2, 7): def _bit_length(x): - return len(bin(x)) - 2 + s = bin(x) # binary representation: bin(-37) --> '-0b100101' + s = s.lstrip('-0b') # remove leading zeros and minus sign + return len(s) else: def _bit_length(x): return x.bit_length() From 58964af7e1813ea836e5d5c2611d5f0320aaa1df Mon Sep 17 00:00:00 2001 From: mpilman Date: Tue, 22 Jan 2019 14:25:20 -0800 Subject: [PATCH 024/226] ctest improvements - #1058 - A set of CMake variables controls whether to keep the simfdb directory and the traces and whether we want to aggregate the traces into a single file - Test labels now contain the directory they are in so that one can now run `ctest -R fast/` - A different binary can be used for restart tests. CMake will automatically look for an installed fdb and use that by default. If none is found, it will use the built one but it will also print a warning - CMake will throw an error if there are any text files in the tests directory that are not associated with a test. - Moved testing from fdbserver/CMakeLists.txt to tests/CMakeLists.txt - Moved fdb testing functions to its own cmake module --- CMakeLists.txt | 1 + cmake/AddFdbTest.cmake | 119 ++++++++++++++++++ fdbserver/CMakeLists.txt | 220 --------------------------------- fdbserver/fdbserver.actor.cpp | 2 +- tests/CMakeLists.txt | 186 ++++++++++++++++++++++++++++ tests/TestRunner/TestRunner.py | 163 ++++++++++++++---------- 6 files changed, 406 insertions(+), 285 deletions(-) create mode 100644 cmake/AddFdbTest.cmake create mode 100644 tests/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 75839e8326..f9b2cd1587 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,6 +202,7 @@ add_subdirectory(fdbcli) add_subdirectory(fdbmonitor) add_subdirectory(bindings) add_subdirectory(fdbbackup) +add_subdirectory(tests) include(CPack) diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake new file mode 100644 index 0000000000..36b6c6accb --- /dev/null +++ b/cmake/AddFdbTest.cmake @@ -0,0 +1,119 @@ +# This configures the fdb testing system in cmake. Currently this simply means +# that it will get a list of all test files and store this list in a parent scope +# so that we can later verify that all of them were assigned to a test. +# +# - TEST_DIRECTORY The directory where all the tests are +# - ERROR_ON_ADDITIONAL_FILES if this is passed verify_fdb_tests will print +# an error if there are any .txt files in the test directory that do not +# correspond to a test or are not ignore by a pattern +# - IGNORE_PATTERNS regular expressions. All files that match any of those +# experessions don't need to be associated with a test +function(configure_testing) + set(options ERROR_ON_ADDITIONAL_FILES) + set(oneValueArgs TEST_DIRECTORY) + set(multiValueArgs IGNORE_PATTERNS) + cmake_parse_arguments(CONFIGURE_TESTING "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + set(no_tests YES) + if(CONFIGURE_TESTING_ERROR_ON_ADDITIONAL_FILES) + file(GLOB_RECURSE candidates "${CONFIGURE_TESTING_TEST_DIRECTORY}/*.txt") + foreach(candidate IN LISTS candidates) + set(candidate_is_test YES) + foreach(pattern IN LISTS CONFIGURE_TESTING_IGNORE_PATTERNS) + if("${candidate}" MATCHES "${pattern}") + set(candidate_is_test NO) + endif() + endforeach() + if(candidate_is_test) + if(no_tests) + set(no_tests NO) + set(fdb_test_files "${candidate}") + else() + set(fdb_test_files "${fdb_test_files};${candidate}") + endif() + endif() + endforeach() + set(fdb_test_files "${fdb_test_files}" PARENT_SCOPE) + endif() +endfunction() + +function(verify_testing) + foreach(test_file IN LISTS fdb_test_files) + message(SEND_ERROR "${test_file} found but it is not associated with a test") + endforeach() +endfunction() + +# This will add a test that can be run by ctest. This macro can be called +# with the following arguments: +# +# - UNIT will run the test as a unit test (it won't bring up a whole simulated system) +# - TEST_NAME followed the name of the test +# - TIMEOUT followed by a timeout - reaching the timeout makes the test fail (default is +# 3600 seconds). The timeout will be reached whenever it ran either too long in simulated +# time or in real time - whatever is smaller. +# - TEST_FILES followed by typically one test file. The test runner will run +# all these tests in serialized order and within the same directory. This is +# useful for restart tests +function(add_fdb_test) + set(options UNIT IGNORE) + set(oneValueArgs TEST_NAME TIMEOUT) + set(multiValueArgs TEST_FILES) + cmake_parse_arguments(ADD_FDB_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + set(this_test_timeout ${ADD_FDB_TEST_TIMEOUT}) + if(NOT this_test_timeout) + set(this_test_timeout 3600) + endif() + set(test_type "simulation") + set(fdb_test_files_ "${fdb_test_files}") + foreach(test_file IN LISTS ADD_FDB_TEST_TEST_FILES) + list(REMOVE_ITEM fdb_test_files_ "${CMAKE_CURRENT_SOURCE_DIR}/${test_file}") + endforeach() + set(fdb_test_files "${fdb_test_files_}" PARENT_SCOPE) + list(LENGTH ADD_FDB_TEST_TEST_FILES NUM_TEST_FILES) + if(ADD_FDB_TEST_IGNORE AND NOT RUN_IGNORED_TESTS) + return() + endif() + if(ADD_FDB_TEST_UNIT) + set(test_type "test") + endif() + list(GET ADD_FDB_TEST_TEST_FILES 0 first_file) + string(REGEX REPLACE "^(.*)\\.txt$" "\\1" test_name ${first_file}) + if("${test_name}" MATCHES "(-\\d)$") + string(REGEX REPLACE "(.*)(-\\d)$" "\\1" test_name_1 ${test_name}) + message(STATUS "new testname ${test_name_1}") + endif() + if (NOT "${ADD_FDB_TEST_TEST_NAME}" STREQUAL "") + set(test_name ${ADD_FDB_TEST_TEST_NAME}) + endif() + if(ADD_FDB_TEST_UNIT) + message(STATUS + "ADDING UNIT TEST ${test_name}") + else() + message(STATUS + "ADDING SIMULATOR TEST ${test_name}") + endif() + set(test_files "") + foreach(curr_test_file ${ADD_FDB_TEST_TEST_FILES}) + set(test_files "${test_files} ${curr_test_file}") + endforeach() + set(BUGGIFY_OPTION "") + if (ENABLE_BUGGIFY) + set(BUGGIFY_OPTION "-B") + endif() + list(TRANSFORM ADD_FDB_TEST_TEST_FILES PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/") + add_test(NAME ${test_name} + COMMAND ${PYTHON_EXECUTABLE} ${TestRunner} + -n ${test_name} + -b ${PROJECT_BINARY_DIR} + -t ${test_type} + -O ${OLD_FDBSERVER_BINARY} + --aggregate-traces ${TEST_AGGREGATE_TRACES} + --keep-logs ${TEST_KEEP_LOGS} + --keep-simdirs ${TEST_KEEP_SIMDIR} + --seed ${SEED} + ${BUGGIFY_OPTION} + ${ADD_FDB_TEST_TEST_FILES} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) + get_filename_component(test_dir_full ${first_file} DIRECTORY) + get_filename_component(test_dir ${test_dir_full} NAME) + set_tests_properties(${test_name} PROPERTIES TIMEOUT ${this_test_timeout} LABELS "${test_dir}") +endfunction() diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 1ff2c37e68..37adfa2366 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -183,223 +183,3 @@ target_link_libraries(fdbserver PRIVATE fdbclient) install(TARGETS fdbserver DESTINATION ${FDB_SBIN_DIR} COMPONENT server) -################################################################################ -# Testing -################################################################################ - -set(ENABLE_BUGGIFY OFF CACHE BOOL "Enable buggify for tests") - -set(TestRunner "${PROJECT_SOURCE_DIR}/tests/TestRunner/TestRunner.py") - -configure_file(${PROJECT_SOURCE_DIR}/tests/CTestCustom.ctest ${PROJECT_BINARY_DIR}/CTestCustom.ctest @ONLY) - -# This will add a test that can be run by ctest. This macro can be called -# with the following arguments: -# -# - UNIT will run the test as a unit test (it won't bring up a whole simulated system) -# - TEST_NAME followed the name of the test -# - TIMEOUT followed by a timeout - reaching the timeout makes the test fail (default is -# 3600 seconds). The timeout will be reached whenever it ran either too long in simulated -# time or in real time - whatever is smaller. -# - TEST_FILES followed by typically one test file. The test runner will run -# all these tests in serialized order and within the same directory. This is -# useful for restart tests -function(add_fdb_test) - set(options UNIT) - set(oneValueArgs TEST_NAME TIMEOUT) - set(multiValueArgs TEST_FILES) - cmake_parse_arguments(ADD_FDB_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") - set(this_test_timeout ${ADD_FDB_TEST_TIMEOUT}) - if(NOT this_test_timeout) - set(this_test_timeout 3600) - endif() - set(test_type "simulation") - list(LENGTH ADD_FDB_TEST_TEST_FILES NUM_TEST_FILES) - if(ADD_FDB_TEST_UNIT) - set(test_type "test") - endif() - list(GET ADD_FDB_TEST_TEST_FILES 0 first_file) - get_filename_component(test_name ${first_file} NAME_WE) - if (NOT "${ADD_FDB_TEST_TEST_NAME}" STREQUAL "") - set(test_name ${ADD_FDB_TEST_TEST_NAME}) - endif() - if(ADD_FDB_TEST_UNIT) - message(STATUS - "ADDING UNIT TEST ${test_name}") - else() - message(STATUS - "ADDING SIMULATOR TEST ${test_name}") - endif() - set(test_files "") - foreach(curr_test_file ${ADD_FDB_TEST_TEST_FILES}) - set(test_files "${test_files} ${curr_test_file}") - endforeach() - set(BUGGIFY_OPTION "") - if (ENABLE_BUGGIFY) - set(BUGGIFY_OPTION "-B") - endif() - add_test(NAME ${test_name} - COMMAND ${PYTHON_EXECUTABLE} ${TestRunner} - -n ${test_name} - -b ${PROJECT_BINARY_DIR} - -t ${test_type} - --seed ${SEED} - ${BUGGIFY_OPTION} - ${ADD_FDB_TEST_TEST_FILES} - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) - get_filename_component(test_dir_full ${first_file} DIRECTORY) - get_filename_component(test_dir ${test_dir_full} NAME) - set_tests_properties(${test_name} PROPERTIES TIMEOUT ${this_test_timeout} LABELS "${test_dir}") -endfunction() - -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/AsyncFileCorrectness.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/AsyncFileMix.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/AsyncFileRead.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/AsyncFileReadRandom.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/AsyncFileWrite.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/BackupContainers.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/BandwidthThrottle.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/BigInsert.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/BlobStore.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/ConsistencyCheck.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/DiskDurability.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/FileSystem.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/Happy.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/IncrementalDelete.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/KVStoreMemTest.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/KVStoreReadMostly.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/KVStoreTest.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/KVStoreTestRead.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/KVStoreTestWrite.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/KVStoreValueSize.txt UNIT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/LayerStatusMerge.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/PureNetwork.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/RRW2500.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/RandomRead.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/RandomReadWrite.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/ReadAbsent.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/ReadHalfAbsent.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/SlowTask.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/SpecificUnitTest.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/StreamingWrite.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/ThreadSafety.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/TraceEventMetrics.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/default.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/errors.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fail.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/killall.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/latency.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/performance-fs.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/performance.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/ping.TXT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/pingServers.TXT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/pt.TXT) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/randomSelector.txt) -#add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/selectorCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/AtomicBackupCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/AtomicBackupToDBCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/AtomicOps.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/AtomicOpsApiCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/BackupCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/BackupCorrectnessClean.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/BackupToDBCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/BackupToDBCorrectnessClean.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/CloggedSideband.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/ConstrainedRandomSelector.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/CycleAndLock.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/CycleTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/FuzzApiCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/FuzzApiCorrectnessClean.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/IncrementTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/InventoryTestAlmostReadOnly.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/InventoryTestSomeWrites.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/LongStackWriteDuringRead.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/LowLatency.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/MemoryLifetime.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/MoveKeysCycle.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/RandomSelector.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/RandomUnitTests.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/SelectorCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/Sideband.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/SidebandWithStatus.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/SwizzledRollbackSideband.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/SystemRebootTestCycle.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/TaskBucketCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/TimeKeeperCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/Unreadable.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/VersionStamp.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/Watches.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/WriteDuringRead.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/fast/WriteDuringReadClean.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/CheckRelocation.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/ClogUnclog.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/CloggedCycleWithKills.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/ConflictRangeCheck.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/ConflictRangeRYOWCheck.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/CycleRollbackClogged.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/CycleWithKills.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/FuzzTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/InventoryTestHeavyWrites.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/LargeApiCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/LargeApiCorrectnessStatus.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/RYWDisable.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/RandomReadWriteTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/rare/SwizzledLargeApiCorrectness.txt) -add_fdb_test( - TEST_FILES ${PROJECT_SOURCE_DIR}/tests/restarting/CycleTestRestart-1.txt - ${PROJECT_SOURCE_DIR}/tests/restarting/CycleTestRestart-2.txt - TEST_NAME CycleTestRestart) -add_fdb_test( - TEST_FILES ${PROJECT_SOURCE_DIR}/tests/restarting/StorefrontTestRestart-1.txt - ${PROJECT_SOURCE_DIR}/tests/restarting/StorefrontTestRestart-2.txt - TEST_NAME StorefrontTestRestart) -add_fdb_test( - TEST_FILES ${PROJECT_SOURCE_DIR}/tests/restarting/from_5.1.7/DrUpgradeRestart-1.txt - ${PROJECT_SOURCE_DIR}/tests/restarting/from_5.1.7/DrUpgradeRestart-2.txt - TEST_NAME DrUpgradeRestart) -add_fdb_test( - TEST_FILES ${PROJECT_SOURCE_DIR}/tests/restarting/from_5.2.0/ClientTransactionProfilingCorrectness-1.txt - ${PROJECT_SOURCE_DIR}/tests/restarting/from_5.2.0/ClientTransactionProfilingCorrectness-2.txt - TEST_NAME ClientTransactionProfilingCorrectness) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/ApiCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/ApiCorrectnessAtomicRestore.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/ApiCorrectnessSwitchover.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/ClogWithRollbacks.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/CloggedCycleTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/CloggedStorefront.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/CommitBug.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/ConfigureTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/CycleRollbackPlain.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/DDBalanceAndRemove.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/DDBalanceAndRemoveStatus.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/FastTriggeredWatches.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/LowLatencyWithFailures.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/MoveKeysClean.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/MoveKeysSideband.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/RyowCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/Serializability.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/SharedBackupCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/SharedBackupToDBCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/StorefrontTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/SwizzledApiCorrectness.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/SwizzledCycleTest.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/SwizzledDdBalance.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/SwizzledRollbackTimeLapse.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/SwizzledRollbackTimeLapseIncrement.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/VersionStampBackupToDB.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/VersionStampSwitchover.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/WriteDuringReadAtomicRestore.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/WriteDuringReadSwitchover.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/slow/ddbalance.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/invalid_proc_addresses.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/local_6_machine_no_replicas_remain.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_1_of_3_coordinators_remain.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_2_of_3_coordinators_remain.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_cannot_write_cluster_file.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_idle.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_initializing.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_no_coordinators.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_no_database.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_no_servers.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/separate_not_enough_servers.txt) -add_fdb_test(TEST_FILES ${PROJECT_SOURCE_DIR}/tests/status/single_process_too_many_config_params.txt) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 3943966253..b4b4ca00b0 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -329,7 +329,7 @@ UID getSharedMemoryMachineId() { criticalError(FDB_EXIT_ERROR, "SharedMemoryError", "Could not locate or create shared memory - 'machineId'"); return *machineId; } - catch (boost::interprocess::interprocess_exception &) { + catch (boost::interprocess::interprocess_exception &e) { try { //If the shared memory already exists, open it read-only in case it was created by another user boost::interprocess::managed_shared_memory segment(boost::interprocess::open_read_only, "fdbserver"); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000000..0a0b1d9d1b --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,186 @@ +include(AddFdbTest) + +# We need some variables to configure the test setup +set(ENABLE_BUGGIFY OFF CACHE BOOL "Enable buggify for tests") +set(RUN_IGNORED_TESTS OFF CACHE BOOL "Run tests that are marked for ignore") +set(TEST_KEEP_LOGS "FAILED" CACHE STRING "Which locks to keep (NONE, FAILED, ALL)") +set(TEST_KEEP_SIMDIR "NONE" CACHE STRING "Which simfdb directories to keep (NONE, FAILED, ALL)") +set(TEST_AGGREGATE_TRACES "NONE" CACHE STRING "Create aggregated trace files (NONE, FAILED, ALL)") + +# for the restart test we optimally want to use the last stable fdbserver +# to test upgrades + +find_program(OLD_FDBSERVER_BINARY + fdbserver + HINTS /usr/sbin /usr/libexec /usr/local/sbin /usr/local/libexec) +if(OLD_FDBSERVER_BINARY) + message(STATUS "Use old fdb at ${OLD_FDBSERVER_BINARY}") +else() + set(fdbserver_location ${CMAKE_BINARY_DIR}/bin/fdbserver) + set(OLD_FDBSERVER_BINARY ${fdbserver_location} CACHE FILEPATH "Old fdbserver binary" FORCE) + message(WARNING "\ +No old fdbserver binary found - using ${fdbserver_location} \ +It is recommended to install the current stable version from https://www.foundationdb.org/download/ \ +Or provide a path to another fdbserver") +endif() + +set(TestRunner "${PROJECT_SOURCE_DIR}/tests/TestRunner/TestRunner.py") + +configure_file(${PROJECT_SOURCE_DIR}/tests/CTestCustom.ctest ${PROJECT_BINARY_DIR}/CTestCustom.ctest @ONLY) + +configure_testing(TEST_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + ERROR_ON_ADDITIONAL_FILES + IGNORE_PATTERNS ".*/CMakeLists.txt") + +add_fdb_test(TEST_FILES AsyncFileCorrectness.txt UNIT IGNORE) +add_fdb_test(TEST_FILES AsyncFileMix.txt UNIT IGNORE) +add_fdb_test(TEST_FILES AsyncFileRead.txt UNIT IGNORE) +add_fdb_test(TEST_FILES AsyncFileReadRandom.txt UNIT IGNORE) +add_fdb_test(TEST_FILES AsyncFileWrite.txt UNIT IGNORE) +add_fdb_test(TEST_FILES BackupContainers.txt IGNORE) +add_fdb_test(TEST_FILES BandwidthThrottle.txt IGNORE) +add_fdb_test(TEST_FILES BigInsert.txt IGNORE) +add_fdb_test(TEST_FILES BlobStore.txt IGNORE) +add_fdb_test(TEST_FILES ConsistencyCheck.txt IGNORE) +add_fdb_test(TEST_FILES DiskDurability.txt IGNORE) +add_fdb_test(TEST_FILES FileSystem.txt IGNORE) +add_fdb_test(TEST_FILES Happy.txt IGNORE) +add_fdb_test(TEST_FILES IncrementalDelete.txt IGNORE) +add_fdb_test(TEST_FILES KillRegionCycle.txt IGNORE) +add_fdb_test(TEST_FILES KVStoreMemTest.txt UNIT IGNORE) +add_fdb_test(TEST_FILES KVStoreReadMostly.txt UNIT IGNORE) +add_fdb_test(TEST_FILES KVStoreTest.txt UNIT IGNORE) +add_fdb_test(TEST_FILES KVStoreTestRead.txt UNIT IGNORE) +add_fdb_test(TEST_FILES KVStoreTestWrite.txt UNIT IGNORE) +add_fdb_test(TEST_FILES KVStoreValueSize.txt UNIT IGNORE) +add_fdb_test(TEST_FILES LayerStatusMerge.txt IGNORE) +add_fdb_test(TEST_FILES PureNetwork.txt IGNORE) +add_fdb_test(TEST_FILES RRW2500.txt IGNORE) +add_fdb_test(TEST_FILES RandomRead.txt IGNORE) +add_fdb_test(TEST_FILES RandomReadWrite.txt IGNORE) +add_fdb_test(TEST_FILES ReadAbsent.txt IGNORE) +add_fdb_test(TEST_FILES ReadHalfAbsent.txt IGNORE) +add_fdb_test(TEST_FILES RedwoodCorrectness.txt IGNORE) +add_fdb_test(TEST_FILES RedwoodPerfTests.txt IGNORE) +add_fdb_test(TEST_FILES SlowTask.txt IGNORE) +add_fdb_test(TEST_FILES SpecificUnitTest.txt IGNORE) +add_fdb_test(TEST_FILES StreamingWrite.txt IGNORE) +add_fdb_test(TEST_FILES ThreadSafety.txt IGNORE) +add_fdb_test(TEST_FILES TraceEventMetrics.txt IGNORE) +add_fdb_test(TEST_FILES default.txt IGNORE) +add_fdb_test(TEST_FILES errors.txt IGNORE) +add_fdb_test(TEST_FILES fail.txt IGNORE) +add_fdb_test(TEST_FILES killall.txt IGNORE) +add_fdb_test(TEST_FILES latency.txt IGNORE) +add_fdb_test(TEST_FILES performance-fs.txt IGNORE) +add_fdb_test(TEST_FILES performance.txt IGNORE) +add_fdb_test(TEST_FILES ping.TXT IGNORE) +add_fdb_test(TEST_FILES pingServers.TXT IGNORE) +add_fdb_test(TEST_FILES pt.TXT IGNORE) +add_fdb_test(TEST_FILES randomSelector.txt IGNORE) +add_fdb_test(TEST_FILES selectorCorrectness.txt IGNORE) +add_fdb_test(TEST_FILES fast/AtomicBackupCorrectness.txt) +add_fdb_test(TEST_FILES fast/AtomicBackupToDBCorrectness.txt) +add_fdb_test(TEST_FILES fast/AtomicOps.txt) +add_fdb_test(TEST_FILES fast/AtomicOpsApiCorrectness.txt) +add_fdb_test(TEST_FILES fast/BackupCorrectness.txt) +add_fdb_test(TEST_FILES fast/BackupCorrectnessClean.txt) +add_fdb_test(TEST_FILES fast/BackupToDBCorrectness.txt) +add_fdb_test(TEST_FILES fast/BackupToDBCorrectnessClean.txt) +add_fdb_test(TEST_FILES fast/CloggedSideband.txt) +add_fdb_test(TEST_FILES fast/ConstrainedRandomSelector.txt) +add_fdb_test(TEST_FILES fast/CycleAndLock.txt) +add_fdb_test(TEST_FILES fast/CycleTest.txt) +add_fdb_test(TEST_FILES fast/FuzzApiCorrectness.txt) +add_fdb_test(TEST_FILES fast/FuzzApiCorrectnessClean.txt) +add_fdb_test(TEST_FILES fast/IncrementTest.txt) +add_fdb_test(TEST_FILES fast/InventoryTestAlmostReadOnly.txt) +add_fdb_test(TEST_FILES fast/InventoryTestSomeWrites.txt) +add_fdb_test(TEST_FILES fast/LongStackWriteDuringRead.txt) +add_fdb_test(TEST_FILES fast/LowLatency.txt) +add_fdb_test(TEST_FILES fast/MemoryLifetime.txt) +add_fdb_test(TEST_FILES fast/MoveKeysCycle.txt) +add_fdb_test(TEST_FILES fast/RandomSelector.txt) +add_fdb_test(TEST_FILES fast/RandomUnitTests.txt) +add_fdb_test(TEST_FILES fast/SelectorCorrectness.txt) +add_fdb_test(TEST_FILES fast/Sideband.txt) +add_fdb_test(TEST_FILES fast/SidebandWithStatus.txt) +add_fdb_test(TEST_FILES fast/SwizzledRollbackSideband.txt) +add_fdb_test(TEST_FILES fast/SystemRebootTestCycle.txt) +add_fdb_test(TEST_FILES fast/TaskBucketCorrectness.txt) +add_fdb_test(TEST_FILES fast/TimeKeeperCorrectness.txt) +add_fdb_test(TEST_FILES fast/Unreadable.txt) +add_fdb_test(TEST_FILES fast/VersionStamp.txt) +add_fdb_test(TEST_FILES fast/Watches.txt) +add_fdb_test(TEST_FILES fast/WriteDuringRead.txt) +add_fdb_test(TEST_FILES fast/WriteDuringReadClean.txt) +add_fdb_test(TEST_FILES rare/CheckRelocation.txt) +add_fdb_test(TEST_FILES rare/ClogUnclog.txt) +add_fdb_test(TEST_FILES rare/CloggedCycleWithKills.txt) +add_fdb_test(TEST_FILES rare/ConflictRangeCheck.txt) +add_fdb_test(TEST_FILES rare/ConflictRangeRYOWCheck.txt) +add_fdb_test(TEST_FILES rare/CycleRollbackClogged.txt) +add_fdb_test(TEST_FILES rare/CycleWithKills.txt) +add_fdb_test(TEST_FILES rare/FuzzTest.txt) +add_fdb_test(TEST_FILES rare/InventoryTestHeavyWrites.txt) +add_fdb_test(TEST_FILES rare/LargeApiCorrectness.txt) +add_fdb_test(TEST_FILES rare/LargeApiCorrectnessStatus.txt) +add_fdb_test(TEST_FILES rare/RYWDisable.txt) +add_fdb_test(TEST_FILES rare/RandomReadWriteTest.txt) +add_fdb_test(TEST_FILES rare/SwizzledLargeApiCorrectness.txt) +add_fdb_test( + TEST_FILES restarting/CycleTestRestart-1.txt + restarting/CycleTestRestart-2.txt) +add_fdb_test( + TEST_FILES restarting/StorefrontTestRestart-1.txt + restarting/StorefrontTestRestart-2.txt) +add_fdb_test( + TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt + restarting/from_5.1.7/DrUpgradeRestart-2.txt) +add_fdb_test( + TEST_FILES restarting/from_5.2.0/ClientTransactionProfilingCorrectness-1.txt + restarting/from_5.2.0/ClientTransactionProfilingCorrectness-2.txt) +add_fdb_test(TEST_FILES slow/ApiCorrectness.txt) +add_fdb_test(TEST_FILES slow/ApiCorrectnessAtomicRestore.txt) +add_fdb_test(TEST_FILES slow/ApiCorrectnessSwitchover.txt) +add_fdb_test(TEST_FILES slow/ClogWithRollbacks.txt) +add_fdb_test(TEST_FILES slow/CloggedCycleTest.txt) +add_fdb_test(TEST_FILES slow/CloggedStorefront.txt) +add_fdb_test(TEST_FILES slow/CommitBug.txt) +add_fdb_test(TEST_FILES slow/ConfigureTest.txt) +add_fdb_test(TEST_FILES slow/CycleRollbackPlain.txt) +add_fdb_test(TEST_FILES slow/DDBalanceAndRemove.txt) +add_fdb_test(TEST_FILES slow/DDBalanceAndRemoveStatus.txt) +add_fdb_test(TEST_FILES slow/FastTriggeredWatches.txt) +add_fdb_test(TEST_FILES slow/LowLatencyWithFailures.txt) +add_fdb_test(TEST_FILES slow/MoveKeysClean.txt) +add_fdb_test(TEST_FILES slow/MoveKeysSideband.txt) +add_fdb_test(TEST_FILES slow/RyowCorrectness.txt) +add_fdb_test(TEST_FILES slow/Serializability.txt) +add_fdb_test(TEST_FILES slow/SharedBackupCorrectness.txt) +add_fdb_test(TEST_FILES slow/SharedBackupToDBCorrectness.txt) +add_fdb_test(TEST_FILES slow/StorefrontTest.txt) +add_fdb_test(TEST_FILES slow/SwizzledApiCorrectness.txt) +add_fdb_test(TEST_FILES slow/SwizzledCycleTest.txt) +add_fdb_test(TEST_FILES slow/SwizzledDdBalance.txt) +add_fdb_test(TEST_FILES slow/SwizzledRollbackTimeLapse.txt) +add_fdb_test(TEST_FILES slow/SwizzledRollbackTimeLapseIncrement.txt) +add_fdb_test(TEST_FILES slow/VersionStampBackupToDB.txt) +add_fdb_test(TEST_FILES slow/VersionStampSwitchover.txt) +add_fdb_test(TEST_FILES slow/WriteDuringReadAtomicRestore.txt) +add_fdb_test(TEST_FILES slow/WriteDuringReadSwitchover.txt) +add_fdb_test(TEST_FILES slow/ddbalance.txt) +add_fdb_test(TEST_FILES status/invalid_proc_addresses.txt) +add_fdb_test(TEST_FILES status/local_6_machine_no_replicas_remain.txt) +add_fdb_test(TEST_FILES status/separate_1_of_3_coordinators_remain.txt) +add_fdb_test(TEST_FILES status/separate_2_of_3_coordinators_remain.txt) +add_fdb_test(TEST_FILES status/separate_cannot_write_cluster_file.txt) +add_fdb_test(TEST_FILES status/separate_idle.txt) +add_fdb_test(TEST_FILES status/separate_initializing.txt) +add_fdb_test(TEST_FILES status/separate_no_coordinators.txt) +add_fdb_test(TEST_FILES status/separate_no_database.txt) +add_fdb_test(TEST_FILES status/separate_no_servers.txt) +add_fdb_test(TEST_FILES status/separate_not_enough_servers.txt) +add_fdb_test(TEST_FILES status/single_process_too_many_config_params.txt) + +verify_testing() diff --git a/tests/TestRunner/TestRunner.py b/tests/TestRunner/TestRunner.py index 07ee5d009f..ee4d240159 100755 --- a/tests/TestRunner/TestRunner.py +++ b/tests/TestRunner/TestRunner.py @@ -14,6 +14,7 @@ import functools import multiprocessing import re import shutil +import io _logger = None @@ -38,14 +39,36 @@ def init_logging(loglevel, logdir): class LogParser: - def __init__(self, basedir, name, infile, out): + def __init__(self, basedir, name, infile, out, aggregationPolicy): self.basedir = basedir self.name = name self.infile = infile - self.out = out self.backtraces = [] self.result = True self.address_re = re.compile(r'(0x[0-9a-f]+\s+)+') + self.aggregationPolicy = aggregationPolicy + self.outStream = None + if self.aggregationPolicy == 'NONE': + self.out = None + elif self.aggregationPolicy != 'ALL': + self.out = io.StringIO() + self.outStream = out + else: + self.out = out + + def write(self, txt): + if self.aggregationPolicy == 'NONE': + pass + elif not self.result or self.aggregationPolicy == 'ALL': + self.out.write(txt) + else: + self.outStream.wite(txt) + + def fail(self): + self.result = False + if self.aggregationPolicy == 'FAILED': + self.out.write(self.outStream.getvalue()) + self.outStream = None def writeHeader(self): pass @@ -93,11 +116,8 @@ class LogParser: continue if 'Type' not in obj: continue - # FIXME: I don't know if this is actually a failure or not... - #if obj['Type'] == 'TestFailure': - # self.result = False if obj['Severity'] == '40': - self.result = False + self.fail() if self.name is not None: obj['testname'] = self.name if self.sanitizeBacktrace(obj) is not None and backtraces == 0: @@ -122,7 +142,7 @@ class LogParser: if return_code != 0: return_code_trace['Severity'] = '40' return_code_trace['Type'] = 'TestFailure' - self.result = False + self.fail() else: return_code_trace['Severity'] = '10' return_code_trace['Type'] = 'ReturnCode' @@ -134,8 +154,8 @@ class LogParser: class JSONParser(LogParser): - def __init__(self, basedir, name, infile, out): - super().__init__(basedir, name, infile, out) + def __init__(self, basedir, name, infile, out, aggregationPolicy): + super().__init__(basedir, name, infile, out, aggregationPolicy) def processLine(self, line, linenr): try: @@ -144,9 +164,8 @@ class JSONParser(LogParser): self.log_trace_parse_error(linenr, e) def writeObject(self, obj): - json.dump(obj, self.out) - self.out.write('\n') - + self.write(json.dumps(obj)) + self.write('\n') class XMLParser(LogParser): @@ -176,20 +195,20 @@ class XMLParser(LogParser): def warning(self, exception): self.warnings.append(exception) - def __init__(self, basedir, name, infile, out): - super().__init__(basedir, name, infile, out) + def __init__(self, basedir, name, infile, out, aggregationPolicy): + super().__init__(basedir, name, infile, out, aggregationPolicy) def writeHeader(self): - self.out.write('\n\n') + self.write('\n\n') def writeFooter(self): - self.out.write("") + self.write("") def writeObject(self, obj): - self.out.write('\n') + self.write(' {}="{}"'.format(key, value)) + self.write('/>\n') def processLine(self, line, linenr): if linenr < 3: @@ -222,57 +241,55 @@ def get_traces(d, log_format): return traces -def process_traces(basedir, testname, path, out, log_format, return_codes): +def process_traces(basedir, testname, path, out, aggregationPolicy, log_format, return_codes): res = True backtraces = [] parser = None + if log_format == 'json': + parser = JSONParser(basedir, testname, None, out, aggregationPolicy) + else: + parser = XMLParser(basedir, testname, None, out, aggregationPolicy) + parser.processReturnCodes(return_codes) + res = parser.result for trace in get_traces(path, log_format): if log_format == 'json': - parser = JSONParser(basedir, testname, trace, out) + parser = JSONParser(basedir, testname, trace, out, aggregationPolicy) else: - parser = XMLParser(basedir, testname, trace, out) - parser.processTraces() - res = res and parser.result - if log_format == 'json': - parser = JSONParser(basedir, testname, trace, out) - else: - parser = XMLParser(basedir, testname, trace, out) - parser.processReturnCodes(return_codes) - return res and parser.result + parser = XMLParser(basedir, testname, trace, out, aggregationPolicy) + if not res: + parser.fail() + parser.processTraces() + res = res and parser.result + return res -def run_simulation_test(basedir, - testtype, - testname, - testfiles, - log_format, - restart=False, - buggify=False, - seed=None): - pargs = [os.path.join(basedir, 'bin', 'fdbserver'), - '-r', testtype] - if testtype == 'test': +def run_simulation_test(basedir, options): + fdbserver = os.path.join(basedir, 'bin', 'fdbserver') + pargs = [fdbserver, + '-r', options.testtype] + if options.testtype == 'test': pargs.append('-C') pargs.append(os.path.join(args.builddir, 'fdb.cluster')) td = TestDirectory(basedir) - if restart: - pargs.append('-R') - if buggify: + if options.buggify: pargs.append('-b') pargs.append('on') # FIXME: include these lines as soon as json support is added #pargs.append('--trace_format') #pargs.append(log_format) test_dir = td.get_current_test_dir() - if seed is not None: + if options.seed is not None: pargs.append('-s') - pargs.append(str(args.seed)) + pargs.append("{}".format(int(options.seed, 0))) wd = os.path.join(test_dir, - 'test_{}'.format(testname.replace('/', '_'))) + 'test_{}'.format(options.name.replace('/', '_'))) os.mkdir(wd) return_codes = {} # {command: return_code} first = True - for testfile in testfiles: + for testfile in options.testfile: tmp = list(pargs) + if first and options.old_binary is not None and len(options.testfile) > 1: + _logger.info("Run old binary at {}".format(options.old_binary)) + tmp[0] = options.old_binary if not first: tmp.append('-R') first = False @@ -288,18 +305,32 @@ def run_simulation_test(basedir, return_codes[command] = proc.returncode if proc.returncode != 0: break - outfile = os.path.join(test_dir, 'traces.{}'.format(log_format)) + outfile = os.path.join(test_dir, 'traces.{}'.format(options.log_format)) res = True - with open(outfile, 'a') as f: - os.lockf(f.fileno(), os.F_LOCK, 0) - pos = f.tell() - res = process_traces(basedir, testname, - os.path.join(os.getcwd(), wd), f, log_format, - return_codes) - f.seek(pos) - os.lockf(f.fileno(), os.F_ULOCK, 0) - if res: - shutil.rmtree(wd) + if options.aggregate_traces == 'NONE': + res = process_traces(basedir, options.name, + wd, None, 'NONE', + options.log_format, return_codes) + else: + with open(outfile, 'a') as f: + os.lockf(f.fileno(), os.F_LOCK, 0) + pos = f.tell() + res = process_traces(basedir, options.name, + wd, f, options.aggregate_traces, + options.log_format, return_codes) + f.seek(pos) + os.lockf(f.fileno(), os.F_ULOCK, 0) + if options.keep_logs == 'NONE' or options.keep_logs == 'FAILED' and res: + print("Deleting old logs in {}".format(wd)) + traces = get_traces(wd, options.log_format) + for trace in traces: + os.remove(trace) + if options.keep_simdirs == 'NONE' or options.keep_simdirs == 'FAILED' and res: + print("Delete {}".format(os.path.join(wd, 'simfdb'))) + shutil.rmtree(os.path.join(wd, 'simfdb')) + if len(os.listdir(wd)) == 0: + print("Delete {} - empty".format(wd)) + os.rmdir(wd) return res @@ -313,8 +344,6 @@ if __name__ == '__main__': default='simulation', help='The type of test to run, choices are [{}]'.format( ', '.join(testtypes))), - parser.add_argument('-R', '--restart', action='store_true', - help='Mark as restart test') parser.add_argument('-B', '--buggify', action='store_true', help='Enable buggify') parser.add_argument('--logdir', default='logs', @@ -327,13 +356,19 @@ if __name__ == '__main__': help='The seed to use for this test') parser.add_argument('-F', '--log-format', required=False, default='xml', choices=['xml', 'json'], help='Log format (json or xml)') + parser.add_argument('-O', '--old-binary', required=False, default=None, + help='Path to the old binary to use for upgrade tests') + parser.add_argument('--aggregate-traces', default='NONE', + choices=['NONE', 'FAILED', 'ALL']) + parser.add_argument('--keep-logs', default='FAILED', + choices=['NONE', 'FAILED', 'ALL']) + parser.add_argument('--keep-simdirs', default='NONE', + choices=['NONE', 'FAILED', 'ALL']) parser.add_argument('testfile', nargs="+", help='The tests to run') args = parser.parse_args() init_logging(args.loglevel, args.logdir) basedir = os.getcwd() if args.builddir is not None: basedir = args.builddir - res = run_simulation_test(basedir, args.testtype, args.name, - args.testfile, args.log_format, args.restart, args.buggify, - args.seed) + res = run_simulation_test(basedir, args) sys.exit(0 if res else 1) From 1c2dc20007466b151b5a2dea11806289d7d20aa6 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 22 Jan 2019 16:57:37 -0800 Subject: [PATCH 025/226] Note dr_agent prerequisite for "fdbdr switch" If the user forgets to start a dr_agent with the right arguments on the destination cluster, the switch cannot complete. --- documentation/sphinx/source/backups.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/backups.rst b/documentation/sphinx/source/backups.rst index 009dad0da5..0aae1e72f1 100644 --- a/documentation/sphinx/source/backups.rst +++ b/documentation/sphinx/source/backups.rst @@ -501,7 +501,7 @@ The ``start`` subcommand is used to start a DR backup. If there is already a DR The ``switch`` subcommand is used to swap the source and destination database clusters of an active DR in differential mode. This means the destination will be unlocked and start streaming data into the source database, which will subsequently be locked. -This command requires both databases to be available. While the switch command is working, both databases will be locked for a few seconds. +This command requires both databases to be available. On the destination cluster, a ``dr_agent`` that points to the source cluster must be running. While the switch command is working, both databases will be locked for a few seconds. .. program:: fdbdr abort From c167ed5457b6da7164582eef0f7f7b7f2ee1849b Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 24 Jan 2019 09:59:38 -0800 Subject: [PATCH 026/226] Don't link flow into fdbmonitor for cmake It seems that fdbmonitor is not meant to depend on flow, since it redefines symbols such as `joinPath` --- fdbmonitor/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/fdbmonitor/CMakeLists.txt b/fdbmonitor/CMakeLists.txt index 361b25b44b..b62f985461 100644 --- a/fdbmonitor/CMakeLists.txt +++ b/fdbmonitor/CMakeLists.txt @@ -4,6 +4,5 @@ add_executable(fdbmonitor ${FDBMONITOR_SRCS}) # FIXME: This include directory is an ugly hack. We probably want to fix this # as soon as we get rid of the old build system target_include_directories(fdbmonitor PRIVATE ${CMAKE_BINARY_DIR}/fdbclient) -target_link_libraries(fdbmonitor flow) install(TARGETS fdbmonitor DESTINATION "${FDB_LIB_DIR}/foundationdb" COMPONENT server) From a50f2d953ac29d50816b7115bc8ef2da307338c7 Mon Sep 17 00:00:00 2001 From: Alex Miller <35046903+alexmiller-apple@users.noreply.github.com> Date: Thu, 24 Jan 2019 13:15:14 -0800 Subject: [PATCH 027/226] Update tests/CMakeLists.txt set buggify to on by default Co-Authored-By: mpilman --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0a0b1d9d1b..dfe242ff39 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,7 +1,7 @@ include(AddFdbTest) # We need some variables to configure the test setup -set(ENABLE_BUGGIFY OFF CACHE BOOL "Enable buggify for tests") +set(ENABLE_BUGGIFY ON CACHE BOOL "Enable buggify for tests") set(RUN_IGNORED_TESTS OFF CACHE BOOL "Run tests that are marked for ignore") set(TEST_KEEP_LOGS "FAILED" CACHE STRING "Which locks to keep (NONE, FAILED, ALL)") set(TEST_KEEP_SIMDIR "NONE" CACHE STRING "Which simfdb directories to keep (NONE, FAILED, ALL)") From 5b8c98fd22a400d1ff2c8d22d1e72a47b4b6c27f Mon Sep 17 00:00:00 2001 From: Alex Miller <35046903+alexmiller-apple@users.noreply.github.com> Date: Thu, 24 Jan 2019 13:15:42 -0800 Subject: [PATCH 028/226] Update tests/CMakeLists.txt Co-Authored-By: mpilman --- tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dfe242ff39..9bbcf472b4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -170,6 +170,7 @@ add_fdb_test(TEST_FILES slow/VersionStampSwitchover.txt) add_fdb_test(TEST_FILES slow/WriteDuringReadAtomicRestore.txt) add_fdb_test(TEST_FILES slow/WriteDuringReadSwitchover.txt) add_fdb_test(TEST_FILES slow/ddbalance.txt) +# Note that status tests are not deterministic. add_fdb_test(TEST_FILES status/invalid_proc_addresses.txt) add_fdb_test(TEST_FILES status/local_6_machine_no_replicas_remain.txt) add_fdb_test(TEST_FILES status/separate_1_of_3_coordinators_remain.txt) From 83a9d5790f2c52ce09cec0edaf055e025a0064ed Mon Sep 17 00:00:00 2001 From: Alex Miller <35046903+alexmiller-apple@users.noreply.github.com> Date: Thu, 24 Jan 2019 13:15:59 -0800 Subject: [PATCH 029/226] Update tests/CMakeLists.txt Co-Authored-By: mpilman --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9bbcf472b4..540421e1b7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -136,7 +136,7 @@ add_fdb_test( restarting/StorefrontTestRestart-2.txt) add_fdb_test( TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt - restarting/from_5.1.7/DrUpgradeRestart-2.txt) + restarting/from_5.1.7/DrUpgradeRestart-2.txt IGNORE) add_fdb_test( TEST_FILES restarting/from_5.2.0/ClientTransactionProfilingCorrectness-1.txt restarting/from_5.2.0/ClientTransactionProfilingCorrectness-2.txt) From f5e5f02a7f32b2a554a81daa1039fd5d8b1abf64 Mon Sep 17 00:00:00 2001 From: Alex Miller <35046903+alexmiller-apple@users.noreply.github.com> Date: Thu, 24 Jan 2019 13:16:06 -0800 Subject: [PATCH 030/226] Update tests/CMakeLists.txt Co-Authored-By: mpilman --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 540421e1b7..d30e342335 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -139,7 +139,7 @@ add_fdb_test( restarting/from_5.1.7/DrUpgradeRestart-2.txt IGNORE) add_fdb_test( TEST_FILES restarting/from_5.2.0/ClientTransactionProfilingCorrectness-1.txt - restarting/from_5.2.0/ClientTransactionProfilingCorrectness-2.txt) + restarting/from_5.2.0/ClientTransactionProfilingCorrectness-2.txt IGNORE) add_fdb_test(TEST_FILES slow/ApiCorrectness.txt) add_fdb_test(TEST_FILES slow/ApiCorrectnessAtomicRestore.txt) add_fdb_test(TEST_FILES slow/ApiCorrectnessSwitchover.txt) From efe06d71cef3037b1bde34602c885221e28f2fc7 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 24 Jan 2019 13:27:16 -0800 Subject: [PATCH 031/226] Change profilingEnabled to thread_local to eliminate a race. --- flow/Platform.cpp | 4 ++-- flow/Profiler.actor.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 43fdd1ea17..02d696b6b8 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2594,13 +2594,13 @@ extern volatile size_t net2backtraces_max; extern volatile bool net2backtraces_overflow; extern volatile int net2backtraces_count; extern volatile double net2liveness; -extern volatile int profilingEnabled; +extern volatile thread_local int profilingEnabled; extern void initProfiling(); volatile thread_local bool profileThread = false; #endif -volatile int profilingEnabled = 1; +volatile thread_local int profilingEnabled = 1; void setProfilingEnabled(int enabled) { profilingEnabled = enabled; diff --git a/flow/Profiler.actor.cpp b/flow/Profiler.actor.cpp index efaf36f120..fb95e87603 100644 --- a/flow/Profiler.actor.cpp +++ b/flow/Profiler.actor.cpp @@ -33,7 +33,7 @@ #include "flow/Platform.h" -extern volatile int profilingEnabled; +extern volatile thread_local int profilingEnabled; static uint64_t gettid() { return syscall(__NR_gettid); } From 7fde75c439a9bc0c023300352114af6a45dad93c Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 24 Jan 2019 13:28:45 -0800 Subject: [PATCH 032/226] Avoid possibly concurrent reassignment of dl_iterate_phdr. --- flow/SignalSafeUnwind.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/flow/SignalSafeUnwind.cpp b/flow/SignalSafeUnwind.cpp index 7b143546aa..f53abd7343 100644 --- a/flow/SignalSafeUnwind.cpp +++ b/flow/SignalSafeUnwind.cpp @@ -25,13 +25,14 @@ int64_t dl_iterate_phdr_calls = 0; #ifdef __linux__ #include +#include static bool phdr_cache_initialized = false; static std::vector< std::vector > phdr_cache; static int (*chain_dl_iterate_phdr)( int (*callback) (struct dl_phdr_info *info, size_t size, void *data), - void *data) = 0; + void *data) = nullptr; static int phdr_cache_add( struct dl_phdr_info *info, size_t size, void *data ) { phdr_cache.push_back( std::vector((uint8_t*)info, (uint8_t*)info + size) ); @@ -39,10 +40,14 @@ static int phdr_cache_add( struct dl_phdr_info *info, size_t size, void *data ) } static void initChain() { + static std::once_flag flag; + // Ensure that chain_dl_iterate_phdr points to the "real" function that we are overriding - *(void**)&chain_dl_iterate_phdr = dlsym(RTLD_NEXT, "dl_iterate_phdr"); - if (!chain_dl_iterate_phdr) + std::call_once(flag, [](){ *(void**)&chain_dl_iterate_phdr = dlsym(RTLD_NEXT, "dl_iterate_phdr"); }); + + if (!chain_dl_iterate_phdr) { criticalError(FDB_EXIT_ERROR, "SignalSafeUnwindError", "Unable to find dl_iterate_phdr symbol"); + } } void initSignalSafeUnwind() { From 5420808dbde6b1cdba4048e881cc3f8a102d9cc3 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 24 Jan 2019 13:48:51 -0800 Subject: [PATCH 033/226] Add a release note. --- documentation/sphinx/source/release-notes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 6f48648539..9f7111c342 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -8,6 +8,8 @@ Release Notes Features -------- +* Get read version, read, and commit requests are counted and aggregated by server-side latency in configurable latency bands and output in JSON status. `(PR #1084) `_ + Performance ----------- From 4925b61d0e5e3bb26c3cf694fc8647a67249fc7d Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 24 Jan 2019 13:49:22 -0800 Subject: [PATCH 034/226] Bump protocol version. --- flow/Net2.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 15ca0ba9b4..64b7d1715f 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -55,7 +55,7 @@ using namespace boost::asio::ip; // // xyzdev // vvvv -const uint64_t currentProtocolVersion = 0x0FDB00B061020001LL; +const uint64_t currentProtocolVersion = 0x0FDB00B061030001LL; const uint64_t compatibleProtocolVersionMask = 0xffffffffffff0000LL; const uint64_t minValidProtocolVersion = 0x0FDB00A200060001LL; From 79637f07ac50e156ee3ed5ff3ee419786dc9ec8c Mon Sep 17 00:00:00 2001 From: mpilman Date: Thu, 24 Jan 2019 14:43:12 -0800 Subject: [PATCH 035/226] Fixed several minor code issues These will become a problem as soon as we switch to C++17 --- fdbclient/DatabaseConfiguration.cpp | 2 +- fdbserver/OldTLogServer.actor.cpp | 4 ++-- fdbserver/PrefixTree.h | 12 ++++++------ fdbserver/tester.actor.cpp | 2 +- fdbservice/ThreadPool.h | 6 +++--- flow/Deque.h | 10 ++++++---- flow/IndexedSet.cpp | 6 ++++-- 7 files changed, 23 insertions(+), 19 deletions(-) diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index b6fe21a106..0bc0fd6d22 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -63,7 +63,7 @@ void parse( std::vector* regions, ValueRef const& v ) { RegionInfo info; json_spirit::mArray datacenters; dc.get("datacenters", datacenters); - bool nonSatelliteDatacenters = 0; + int nonSatelliteDatacenters = 0; for (StatusObjectReader s : datacenters) { std::string idStr; if (s.has("satellite") && s.last().get_int() == 1) { diff --git a/fdbserver/OldTLogServer.actor.cpp b/fdbserver/OldTLogServer.actor.cpp index b0d92885b7..2ac0e7d83c 100644 --- a/fdbserver/OldTLogServer.actor.cpp +++ b/fdbserver/OldTLogServer.actor.cpp @@ -1014,7 +1014,7 @@ namespace oldTLog { state Reference logData; loop { - bool foundCount = 0; + int foundCount = 0; for(auto it : self->id_data) { if(!it.second->stopped) { logData = it.second; @@ -1023,7 +1023,7 @@ namespace oldTLog { } ASSERT(foundCount < 2); - if(!foundCount) { + if(foundCount == 0) { wait( self->newLogData.onTrigger() ); continue; } diff --git a/fdbserver/PrefixTree.h b/fdbserver/PrefixTree.h index 3c261d1029..2f67c20ccd 100644 --- a/fdbserver/PrefixTree.h +++ b/fdbserver/PrefixTree.h @@ -32,8 +32,8 @@ static inline int commonPrefixLength(uint8_t const* ap, uint8_t const* bp, int c const int wordEnd = cl - sizeof(Word) + 1; for(; i < wordEnd; i += sizeof(Word)) { - register Word a = *(Word *)ap; - register Word b = *(Word *)bp; + Word a = *(Word *)ap; + Word b = *(Word *)bp; if(a != b) { return i + ctzll(a ^ b) / 8; } @@ -238,14 +238,14 @@ struct PrefixTree { void init(const Node *n) { node = n; - register union { + union { const uint8_t *p8; const uint16_t *p16; }; p8 = (const uint8_t *)&n->flags + 1; - register int flags = n->flags; - register bool large = flags & USE_LARGE_LENGTHS; + int flags = n->flags; + bool large = flags & USE_LARGE_LENGTHS; prefixLen = large ? *p16++ : *p8++; @@ -265,7 +265,7 @@ struct PrefixTree { if(flags & HAS_VALUE) rightPos += (large ? *p16++ : *p8++); - register int header = 2; // flags byte, first prefix len byte + int header = 2; // flags byte, first prefix len byte if(large) ++header; // second prefix len byte if(flags & HAS_SPLIT) diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 9fbbbd49d2..49eae1b034 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -394,7 +394,7 @@ void sendResult( ReplyPromise& reply, Optional> const& result ) { } ACTOR Future runWorkloadAsync( Database cx, WorkloadInterface workIface, TestWorkload *workload, double databasePingDelay ) { - state auto_ptr delw(workload); + state unique_ptr delw(workload); state Optional> setupResult; state Optional> startResult; state Optional> checkResult; diff --git a/fdbservice/ThreadPool.h b/fdbservice/ThreadPool.h index f5fb3282b6..1064232cdf 100644 --- a/fdbservice/ThreadPool.h +++ b/fdbservice/ThreadPool.h @@ -52,7 +52,7 @@ public: T *object, ULONG flags = WT_EXECUTELONGFUNCTION) { typedef std::pair CallbackType; - std::auto_ptr p(new CallbackType(function, object)); + std::unique_ptr p(new CallbackType(function, object)); if (::QueueUserWorkItem(ThreadProc, p.get(), flags)) { @@ -72,9 +72,9 @@ private: { typedef std::pair CallbackType; - std::auto_ptr p(static_cast(context)); + std::unique_ptr p(static_cast(context)); (p->second->*p->first)(); return 0; } -}; \ No newline at end of file +}; diff --git a/flow/Deque.h b/flow/Deque.h index 7b224397ab..9a10e3fadb 100644 --- a/flow/Deque.h +++ b/flow/Deque.h @@ -103,11 +103,13 @@ public: end++; } - template - void emplace_back(U && val) { + template + reference emplace_back(U&&... val) { if (full()) grow(); - new (&arr[end&mask]) T(std::forward(val)); + new (&arr[end&mask]) T(std::forward(val)...); + reference result = arr[end & mask]; end++; + return result; } void pop_back() { @@ -181,4 +183,4 @@ private: } }; -#endif \ No newline at end of file +#endif diff --git a/flow/IndexedSet.cpp b/flow/IndexedSet.cpp index 60b70be0a5..905ad2cf83 100644 --- a/flow/IndexedSet.cpp +++ b/flow/IndexedSet.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "flow/UnitTest.h" template @@ -383,11 +384,12 @@ TEST_CASE("/flow/IndexedSet/comparison to std::set") { TEST_CASE("/flow/IndexedSet/all numbers") { IndexedSet is; + std::mt19937_64 urng(g_random->randomUInt32()); std::vector allNumbers; for (int i = 0; i<1000000; i++) allNumbers.push_back(i); - std::random_shuffle(allNumbers.begin(), allNumbers.end()); + std::shuffle(allNumbers.begin(), allNumbers.end(), urng); for (int i = 0; i Date: Thu, 24 Jan 2019 15:41:27 -0800 Subject: [PATCH 036/226] Account for file mv's/rm's in java bindings cmake --- bindings/java/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 1d352b0302..162d7e980a 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -11,6 +11,7 @@ set(JAVA_BINDING_SRCS src/main/com/apple/foundationdb/async/CloseableAsyncIterator.java src/main/com/apple/foundationdb/async/package-info.java src/main/com/apple/foundationdb/Cluster.java + src/main/com/apple/foundationdb/ClusterOptions.java src/main/com/apple/foundationdb/Database.java src/main/com/apple/foundationdb/directory/Directory.java src/main/com/apple/foundationdb/directory/DirectoryAlreadyExistsException.java @@ -28,8 +29,6 @@ set(JAVA_BINDING_SRCS src/main/com/apple/foundationdb/FDB.java src/main/com/apple/foundationdb/FDBDatabase.java src/main/com/apple/foundationdb/FDBTransaction.java - src/main/com/apple/foundationdb/FutureCluster.java - src/main/com/apple/foundationdb/FutureDatabase.java src/main/com/apple/foundationdb/FutureKey.java src/main/com/apple/foundationdb/FutureResult.java src/main/com/apple/foundationdb/FutureResults.java From 6b34d62918970018b3337e574b5dd07420727d4b Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 24 Jan 2019 15:59:18 -0800 Subject: [PATCH 037/226] Fix GENERATED_JAVA_DIR --- bindings/java/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 162d7e980a..5a972aac00 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -97,7 +97,7 @@ set(JAVA_TESTS_SRCS src/test/com/apple/foundationdb/test/WatchTest.java src/test/com/apple/foundationdb/test/WhileTrueTest.java) -set(GENERATED_JAVA_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/main/com/foundationdb) +set(GENERATED_JAVA_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/main/com/apple/foundationdb) file(MAKE_DIRECTORY ${GENERATED_JAVA_DIR}) set(GENERATED_JAVA_FILES From 1c1e42396c7bb3e5608d4fcb9dd1f59e9e08ef02 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 24 Jan 2019 15:59:30 -0800 Subject: [PATCH 038/226] ClusterOptions.java is no longer generated --- bindings/java/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 5a972aac00..a47a7ad11f 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -101,7 +101,6 @@ set(GENERATED_JAVA_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/main/com/apple/foundation file(MAKE_DIRECTORY ${GENERATED_JAVA_DIR}) set(GENERATED_JAVA_FILES - ${GENERATED_JAVA_DIR}/ClusterOptions.java ${GENERATED_JAVA_DIR}/ConflictRangeType.java ${GENERATED_JAVA_DIR}/DatabaseOptions.java ${GENERATED_JAVA_DIR}/MutationType.java From 2173e0acdae9b16e6e89c78d231affbd4d3ecc94 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 25 Jan 2019 09:30:02 -0800 Subject: [PATCH 039/226] Remove ClusterOptions from generated sources in Java --- bindings/java/local.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/java/local.mk b/bindings/java/local.mk index d91bf93804..30f9e25152 100644 --- a/bindings/java/local.mk +++ b/bindings/java/local.mk @@ -50,7 +50,7 @@ else ifeq ($(PLATFORM),osx) java_ARCH := x86_64 endif -JAVA_GENERATED_SOURCES := bindings/java/src/main/com/apple/foundationdb/NetworkOptions.java bindings/java/src/main/com/apple/foundationdb/ClusterOptions.java bindings/java/src/main/com/apple/foundationdb/DatabaseOptions.java bindings/java/src/main/com/apple/foundationdb/TransactionOptions.java bindings/java/src/main/com/apple/foundationdb/StreamingMode.java bindings/java/src/main/com/apple/foundationdb/ConflictRangeType.java bindings/java/src/main/com/apple/foundationdb/MutationType.java bindings/java/src/main/com/apple/foundationdb/FDBException.java +JAVA_GENERATED_SOURCES := bindings/java/src/main/com/apple/foundationdb/NetworkOptions.java bindings/java/src/main/com/apple/foundationdb/DatabaseOptions.java bindings/java/src/main/com/apple/foundationdb/TransactionOptions.java bindings/java/src/main/com/apple/foundationdb/StreamingMode.java bindings/java/src/main/com/apple/foundationdb/ConflictRangeType.java bindings/java/src/main/com/apple/foundationdb/MutationType.java bindings/java/src/main/com/apple/foundationdb/FDBException.java JAVA_SOURCES := $(JAVA_GENERATED_SOURCES) bindings/java/src/main/com/apple/foundationdb/*.java bindings/java/src/main/com/apple/foundationdb/async/*.java bindings/java/src/main/com/apple/foundationdb/tuple/*.java bindings/java/src/main/com/apple/foundationdb/directory/*.java bindings/java/src/main/com/apple/foundationdb/subspace/*.java bindings/java/src/test/com/apple/foundationdb/test/*.java From ec995ebef060569f0a2eef754842a0a03a113d53 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 25 Jan 2019 11:32:49 -0800 Subject: [PATCH 040/226] _CBFUNC should be defined outside init_c_api, both because it doesn't depend on the C API and because it is used elsewhere and wasn't available globally as previously written --- bindings/python/fdb/impl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bindings/python/fdb/impl.py b/bindings/python/fdb/impl.py index 1b1120027e..7adeedbbac 100644 --- a/bindings/python/fdb/impl.py +++ b/bindings/python/fdb/impl.py @@ -1284,6 +1284,7 @@ def optionalParamToBytes(v): _FDBBase.capi = _capi +_CBFUNC = ctypes.CFUNCTYPE(None, ctypes.c_void_p) def init_c_api(): _capi.fdb_select_api_version_impl.argtypes = [ctypes.c_int, ctypes.c_int] @@ -1327,8 +1328,6 @@ def init_c_api(): _capi.fdb_future_is_ready.argtypes = [ctypes.c_void_p] _capi.fdb_future_is_ready.restype = ctypes.c_int - _CBFUNC = ctypes.CFUNCTYPE(None, ctypes.c_void_p) - _capi.fdb_future_set_callback.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p] _capi.fdb_future_set_callback.restype = int _capi.fdb_future_set_callback.errcheck = check_error_code From dc2b7404153dc832fa5980f128555c01ac790278 Mon Sep 17 00:00:00 2001 From: Trevor Clinkenbeard Date: Fri, 25 Jan 2019 13:15:19 -0800 Subject: [PATCH 041/226] Added server_overloaded error and client message --- documentation/sphinx/source/mr-status.rst | 1 + flow/error_definitions.h | 1 + 2 files changed, 2 insertions(+) diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst index 88d1cf2ea2..4a3ef63453 100644 --- a/documentation/sphinx/source/mr-status.rst +++ b/documentation/sphinx/source/mr-status.rst @@ -322,6 +322,7 @@ JSON Path Name Descriptio client.messages inconsistent_cluster_file Cluster file is not up to date. It contains the connection string ‘’. The current connection string is ‘’. This must mean that file permissions or other platform issues have prevented the file from being updated. To change coordinators without manual intervention, the cluster file and its containing folder must be writable by all servers and clients. If a majority of the coordinators referenced by the old connection string are lost, the database will stop working until the correct cluster file is distributed to all processes. client.messages no_cluster_controller Unable to locate a cluster controller within 2 seconds. Check that there are server processes running. client.messages quorum_not_reachable Unable to reach a quorum of coordinators. +client.messages server_overloaded Server is under too much load and cannot respond. client.messages status_incomplete_client Could not retrieve client status information. client.messages status_incomplete_cluster Could not retrieve cluster status information. client.messages status_incomplete_coordinators Could not fetch coordinator info. diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 9d560e1280..b6624c3ba9 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -81,6 +81,7 @@ ERROR( please_reboot, 1207, "Reboot of server process requested" ) ERROR( please_reboot_delete, 1208, "Reboot of server process requested, with deletion of state" ) ERROR( master_proxy_failed, 1209, "Master terminating because a Proxy failed" ) ERROR( master_resolver_failed, 1210, "Master terminating because a Resolver failed" ) +ERROR( server_overloaded, 1211, "Server is under too much load and cannot respond" ) // 15xx Platform errors ERROR( platform_error, 1500, "Platform error" ) From 768d7678be31a06644efcd42b15a1a180291566f Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 25 Jan 2019 13:47:12 -0800 Subject: [PATCH 042/226] Add trace_format network option --- bindings/go/src/fdb/generated.go | 7 +++++++ documentation/sphinx/source/api-common.rst.inc | 3 +++ documentation/sphinx/source/api-python.rst | 4 ++++ documentation/sphinx/source/api-ruby.rst | 4 ++++ fdbclient/NativeAPI.actor.cpp | 5 +++++ fdbclient/NativeAPI.h | 8 +++++--- fdbclient/vexillographer/fdb.options | 3 +++ 7 files changed, 31 insertions(+), 3 deletions(-) diff --git a/bindings/go/src/fdb/generated.go b/bindings/go/src/fdb/generated.go index c42757d677..aea915cb22 100644 --- a/bindings/go/src/fdb/generated.go +++ b/bindings/go/src/fdb/generated.go @@ -92,6 +92,13 @@ func (o NetworkOptions) SetTraceLogGroup(param string) error { return o.setOpt(33, []byte(param)) } +// Selects trace output format for this client. xml (the default) and json are supported. +// +// Parameter: trace format +func (o NetworkOptions) SetTraceFormat(param string) error { + return o.setOpt(34, []byte(param)) +} + // Set internal tuning or debugging knobs // // Parameter: knob_name=knob_value diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index 6052c382fc..75ed4f62cd 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -232,6 +232,9 @@ .. |option-trace-roll-size-blurb| replace:: Sets the maximum size in bytes of a single trace output file for this FoundationDB client. +.. |option-trace-format-blurb| replace:: + Select the format of the trace files for this FoundationDB client. xml (the default) and json are supported. + .. |network-options-warning| replace:: It is an error to set these options after the first call to |open-func| anywhere in your application. diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index ed0e93fdf8..11f04d652a 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -117,6 +117,10 @@ After importing the ``fdb`` module and selecting an API version, you probably wa |option-trace-roll-size-blurb| + .. method :: fdb.options.set_trace_format(format) + + |option-trace-format-blurb| + .. method :: fdb.options.set_disable_multi_version_client_api() |option-disable-multi-version-client-api| diff --git a/documentation/sphinx/source/api-ruby.rst b/documentation/sphinx/source/api-ruby.rst index ef3c33f423..b8c5a8cdc3 100644 --- a/documentation/sphinx/source/api-ruby.rst +++ b/documentation/sphinx/source/api-ruby.rst @@ -104,6 +104,10 @@ After requiring the ``FDB`` gem and selecting an API version, you probably want |option-trace-roll-size-blurb| + .. method:: FDB.options.set_trace_format(format) -> nil + + |option-trace-format-blurb| + |option-disable-multi-version-client-api| .. method :: FDB.options.set_callbacks_on_external_threads() -> nil diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 3ed2c3e767..54cc2303c4 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -743,6 +743,7 @@ void Cluster::init( Reference connFile, bool startClientI initTraceEventMetrics(); auto publicIP = determinePublicIPAutomatically( connFile->getConnectionString() ); + selectTraceFormatter(networkOptions.traceFormat); openTraceFile(NetworkAddress(publicIP, ::getpid()), networkOptions.traceRollSize, networkOptions.traceMaxLogsSize, networkOptions.traceDirectory.get(), "trace", networkOptions.traceLogGroup); TraceEvent("ClientStart") @@ -795,6 +796,10 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu if(value.present()) networkOptions.traceLogGroup = value.get().toString(); break; + case FDBNetworkOptions::TRACE_FORMAT: + validateOptionValue(value, true); + networkOptions.traceFormat = value.get().toString(); + break; case FDBNetworkOptions::KNOB: { validateOptionValue(value, true); diff --git a/fdbclient/NativeAPI.h b/fdbclient/NativeAPI.h index a64a6d1381..1c55751552 100644 --- a/fdbclient/NativeAPI.h +++ b/fdbclient/NativeAPI.h @@ -47,14 +47,16 @@ struct NetworkOptions { uint64_t traceRollSize; uint64_t traceMaxLogsSize; std::string traceLogGroup; + std::string traceFormat; Optional logClientInfo; Standalone> supportedVersions; bool slowTaskProfilingEnabled; // The default values, TRACE_DEFAULT_ROLL_SIZE and TRACE_DEFAULT_MAX_LOGS_SIZE are located in Trace.h. - NetworkOptions() : localAddress(""), clusterFile(""), traceDirectory(Optional()), traceRollSize(TRACE_DEFAULT_ROLL_SIZE), traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"), - slowTaskProfilingEnabled(false) - { } + NetworkOptions() + : localAddress(""), clusterFile(""), traceDirectory(Optional()), + traceRollSize(TRACE_DEFAULT_ROLL_SIZE), traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"), + traceFormat("xml"), slowTaskProfilingEnabled(false) {} }; class Database { diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index 89b07c125a..7f79d577fe 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -48,6 +48,9 @@ description is not currently required but encouraged. true From 0116f6cf8ccb522041d47d06d56a5eb23e3a3102 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 10:38:12 -0800 Subject: [PATCH 120/226] Add editor-generated files to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index f510ee3941..979a625f2e 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,8 @@ foundationdb.VC.db foundationdb.VC.VC.opendb ipch/ compile_commands.json +flow/actorcompiler/obj +flow/coveragetool/obj # Temporary and user configuration files *~ From d348846a10736d970a263aeb4637dca9a044a504 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 10:32:45 -0800 Subject: [PATCH 121/226] Introduce ErrorMessagePolicy class This encapsulates logic for how to report particular error messages. So far only used for reporting an actor not containing a wait statement, as this warning should be suppressed in certain contexts. We could change other error messages to use this class, but since they don't have any logic I don't think the extra indirection is appropriate. --- flow/actorcompiler/ActorParser.cs | 26 ++++++++++++++++++++------ flow/actorcompiler/ParseTree.cs | 1 + flow/actorcompiler/Program.cs | 3 ++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/flow/actorcompiler/ActorParser.cs b/flow/actorcompiler/ActorParser.cs index b2ada01e45..69077728e2 100644 --- a/flow/actorcompiler/ActorParser.cs +++ b/flow/actorcompiler/ActorParser.cs @@ -36,6 +36,18 @@ namespace actorcompiler } }; + class ErrorMessagePolicy + { + public bool ActorWithoutWaitEnabled = true; + public void HandleActorWithoutWait(String sourceFile, Actor actor) + { + if (ActorWithoutWaitEnabled && !actor.isTestCase) + { + Console.Error.WriteLine("{0}:{1}: warning: ACTOR {2} does not contain a wait() statement", sourceFile, actor.SourceLine, actor.name); + } + } + } + class Token { public string Value; @@ -200,10 +212,12 @@ namespace actorcompiler Token[] tokens; string sourceFile; + ErrorMessagePolicy errorMessagePolicy; - public ActorParser(string text, string sourceFile) + public ActorParser(string text, string sourceFile, ErrorMessagePolicy errorMessagePolicy) { this.sourceFile = sourceFile; + this.errorMessagePolicy = errorMessagePolicy; tokens = Tokenize(text).Select(t=>new Token{ Value=t }).ToArray(); CountParens(); //if (sourceFile.EndsWith(".h")) LineNumbersEnabled = false; @@ -872,21 +886,21 @@ namespace actorcompiler var body = range(heading.End+1, tokens.Length) .TakeWhile(t => t.BraceDepth > toks.First().BraceDepth); - bool warnOnNoWait = false; if (head_token.Value == "ACTOR") { ParseActorHeading(actor, heading); - warnOnNoWait = true; } - else if (head_token.Value == "TEST_CASE") + else if (head_token.Value == "TEST_CASE") { ParseTestCaseHeading(actor, heading); + actor.isTestCase = true; + } else head_token.Assert("ACTOR or TEST_CASE expected!", t => false); actor.body = ParseCodeBlock(body); - if (!actor.body.containsWait() && warnOnNoWait) - Console.Error.WriteLine("{0}:{1}: warning: ACTOR {2} does not contain a wait() statement", sourceFile, actor.SourceLine, actor.name); + if (!actor.body.containsWait()) + this.errorMessagePolicy.HandleActorWithoutWait(sourceFile, actor); end = body.End + 1; } diff --git a/flow/actorcompiler/ParseTree.cs b/flow/actorcompiler/ParseTree.cs index 26aad6f827..f161008ce2 100644 --- a/flow/actorcompiler/ParseTree.cs +++ b/flow/actorcompiler/ParseTree.cs @@ -234,6 +234,7 @@ namespace actorcompiler public string testCaseParameters = null; public string nameSpace = null; public bool isForwardDeclaration = false; + public bool isTestCase = false; }; class Descr diff --git a/flow/actorcompiler/Program.cs b/flow/actorcompiler/Program.cs index 57fb1da85f..8ecb2d125a 100644 --- a/flow/actorcompiler/Program.cs +++ b/flow/actorcompiler/Program.cs @@ -38,11 +38,12 @@ namespace actorcompiler } Console.WriteLine("actorcompiler {0}", string.Join(" ", args)); string input = args[0], output = args[1], outputtmp = args[1] + ".tmp"; + ErrorMessagePolicy errorMessagePolicy = new ErrorMessagePolicy(); try { var inputData = File.ReadAllText(input); using (var outputStream = new StreamWriter(outputtmp)) - new ActorParser(inputData, input.Replace('\\', '/')).Write(outputStream, output.Replace('\\', '/')); + new ActorParser(inputData, input.Replace('\\', '/'), errorMessagePolicy).Write(outputStream, output.Replace('\\', '/')); if (File.Exists(output)) { File.SetAttributes(output, FileAttributes.Normal); From 8b0e593f83bb2903c267c619cd61a927fde15883 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 10:37:08 -0800 Subject: [PATCH 122/226] Add --disable-actor-without-wait-error flag to actorcompiler --- flow/actorcompiler/Program.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/flow/actorcompiler/Program.cs b/flow/actorcompiler/Program.cs index 8ecb2d125a..dfce4ad014 100644 --- a/flow/actorcompiler/Program.cs +++ b/flow/actorcompiler/Program.cs @@ -33,12 +33,16 @@ namespace actorcompiler if (args.Length < 2) { Console.WriteLine("Usage:"); - Console.WriteLine(" actorcompiler [input] [output]"); + Console.WriteLine(" actorcompiler [--disable-actor-without-wait-error]"); return 100; } Console.WriteLine("actorcompiler {0}", string.Join(" ", args)); string input = args[0], output = args[1], outputtmp = args[1] + ".tmp"; ErrorMessagePolicy errorMessagePolicy = new ErrorMessagePolicy(); + if (args.Contains("--disable-actor-without-wait-error")) + { + errorMessagePolicy.ActorWithoutWaitEnabled = false; + } try { var inputData = File.ReadAllText(input); From 874a58cb4f631f1fe21b5b16ec699c1994efb06b Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 10:55:57 -0800 Subject: [PATCH 123/226] Suppress actor without wait for tests in cmake --- fdbrpc/CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fdbrpc/CMakeLists.txt b/fdbrpc/CMakeLists.txt index bd14736152..eaafd1e793 100644 --- a/fdbrpc/CMakeLists.txt +++ b/fdbrpc/CMakeLists.txt @@ -1,5 +1,4 @@ set(FDBRPC_SRCS - ActorFuzz.actor.cpp AsyncFileCached.actor.h AsyncFileEIO.actor.h AsyncFileKAIO.actor.h @@ -11,9 +10,7 @@ set(FDBRPC_SRCS AsyncFileWriteChecker.cpp batcher.actor.h crc32c.cpp - dsltest.actor.cpp FailureMonitor.actor.cpp - FlowTests.actor.cpp FlowTransport.actor.cpp genericactors.actor.h genericactors.actor.cpp @@ -55,8 +52,14 @@ if(NOT WIN32) list(APPEND FDBRPC_SRCS libcoroutine/context.c libeio/eio.c) endif() -actor_set(FDBRPC_BUILD "${FDBRPC_SRCS}") +set(FDBRPC_SRCS_ALLOW_ACTOR_WITHOUT_WAIT + ActorFuzz.actor.cpp + FlowTests.actor.cpp + dsltest.actor.cpp) + +actor_set(FDBRPC_BUILD "${FDBRPC_SRCS};${FDBRPC_SRCS_ALLOW_ACTOR_WITHOUT_WAIT}") add_library(fdbrpc STATIC ${FDBRPC_BUILD}) actor_compile(fdbrpc "${FDBRPC_SRCS}") +actor_compile(fdbrpc "${FDBRPC_SRCS_ALLOW_ACTOR_WITHOUT_WAIT}" DISABLE_ACTOR_WITHOUT_WAIT) target_include_directories(fdbrpc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libeio) target_link_libraries(fdbrpc PUBLIC flow) From df3454114e3f3040992f55d0d78ac75f916ab056 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 11:03:08 -0800 Subject: [PATCH 124/226] Add TODO --- flow/actorcompiler/ActorParser.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/flow/actorcompiler/ActorParser.cs b/flow/actorcompiler/ActorParser.cs index 69077728e2..07d8a6e0e1 100644 --- a/flow/actorcompiler/ActorParser.cs +++ b/flow/actorcompiler/ActorParser.cs @@ -43,6 +43,7 @@ namespace actorcompiler { if (ActorWithoutWaitEnabled && !actor.isTestCase) { + // TODO(atn34): Once cmake is the only build system we can make this an error instead of a warning. Console.Error.WriteLine("{0}:{1}: warning: ACTOR {2} does not contain a wait() statement", sourceFile, actor.SourceLine, actor.name); } } From 2f83e595cfd98daaa970a76204d2bcec0ec1df0b Mon Sep 17 00:00:00 2001 From: Colin <6332295+coadler@users.noreply.github.com> Date: Sat, 19 Jan 2019 21:47:49 -0600 Subject: [PATCH 125/226] Disable modules in Golang binding install script --- bindings/go/fdb-go-install.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bindings/go/fdb-go-install.sh b/bindings/go/fdb-go-install.sh index 897e694d5d..69a9e1e46c 100755 --- a/bindings/go/fdb-go-install.sh +++ b/bindings/go/fdb-go-install.sh @@ -11,6 +11,9 @@ # library. # +# This script will not work correctly with Go Modules enabled. +GO111MODULE=off + DESTDIR="${DESTDIR:-}" FDBVER="${FDBVER:-}" REMOTE="${REMOTE:-github.com}" From dc38c68c76b2e2f65b75d4f214c62328a1a7a5d3 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Mon, 21 Jan 2019 22:56:34 -0600 Subject: [PATCH 126/226] Add support for modules Because of how fdb-go-install.sh functions, it's necessary to use to use the folder that the script installs to, located at $GOPATH/src/github.com/apple/foundationdb/bindings/go. Since modules will use the version from $GOPATH/pkg/mod/github.com/apple/foundationdb/bindings/go and has a checksum of it stored in go.sum, it's not possible to use this package currently with modules enabled. I believe the solution to this problem is to use replace directives: https://github.com/golang/go/wiki/Modules#when-should-i-use-the-replace-directive After using the install script, adding `replace github.com/apple/foundationdb/bindings/go => $GOPATH/src/github.com/apple/foundationdb/bindings/go` will allow Go programs to be built normally with modules. Note: $GOPATH cannot be used directly and must be expanded into an absolute or relative path. This go.mod must be added for replace directives to work correctly. --- bindings/go/Gopkg.lock | 9 --------- bindings/go/Gopkg.toml | 2 -- bindings/go/fdb-go-install.sh | 8 +++++++- bindings/go/go.mod | 4 ++++ 4 files changed, 11 insertions(+), 12 deletions(-) delete mode 100644 bindings/go/Gopkg.lock delete mode 100644 bindings/go/Gopkg.toml create mode 100644 bindings/go/go.mod diff --git a/bindings/go/Gopkg.lock b/bindings/go/Gopkg.lock deleted file mode 100644 index bef2d0092e..0000000000 --- a/bindings/go/Gopkg.lock +++ /dev/null @@ -1,9 +0,0 @@ -# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. - - -[solve-meta] - analyzer-name = "dep" - analyzer-version = 1 - inputs-digest = "ab4fef131ee828e96ba67d31a7d690bd5f2f42040c6766b1b12fe856f87e0ff7" - solver-name = "gps-cdcl" - solver-version = 1 diff --git a/bindings/go/Gopkg.toml b/bindings/go/Gopkg.toml deleted file mode 100644 index 525774ebad..0000000000 --- a/bindings/go/Gopkg.toml +++ /dev/null @@ -1,2 +0,0 @@ -# The FoundationDB go bindings currently have no external golang dependencies outside of -# the go standard library. diff --git a/bindings/go/fdb-go-install.sh b/bindings/go/fdb-go-install.sh index 69a9e1e46c..770a0a38e9 100755 --- a/bindings/go/fdb-go-install.sh +++ b/bindings/go/fdb-go-install.sh @@ -317,7 +317,7 @@ else fi fi - # Step 5: Explain CGO flags. + # Step 5: Explain CGO flags and modules usage. if [[ "${status}" -eq 0 && ("${operation}" == "localinstall" || "${operation}" == "install" ) ]] ; then echo @@ -327,6 +327,12 @@ else echo " CGO_CPPFLAGS=\"${cgo_cppflags}\"" echo " CGO_CFLAGS=\"${cgo_cflags}\"" echo " CGO_LDFLAGS=\"${cgo_ldflags}\"" + echo + echo "If you use modules, it may be necessary to add this replace directive in your go.mod:" + echo " replace github.com/apple/foundationdb/bindings/go => ${GOPATH}/src/${REMOTE}/${FDBREPO}/bindings/go" + echo + echo "Note: this replace directive is an absolute path and will only work for your system." + echo "For shared repositories you may need to use a relative path pointing to \$GOPATH/src/github.com/apple/foundationdb." fi fi fi diff --git a/bindings/go/go.mod b/bindings/go/go.mod new file mode 100644 index 0000000000..16e502baaf --- /dev/null +++ b/bindings/go/go.mod @@ -0,0 +1,4 @@ +module github.com/apple/foundationdb/bindings/go + +// The FoundationDB go bindings currently have no external golang dependencies outside of +// the go standard library. From 676e1ceb73b99af588224235173ddd1b164dca57 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Tue, 12 Feb 2019 11:56:31 -0600 Subject: [PATCH 127/226] fixup! Add support for modules --- bindings/go/README.md | 18 ++++++++++++++++++ bindings/go/fdb-go-install.sh | 9 --------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/bindings/go/README.md b/bindings/go/README.md index 94faa9bb9d..f213fb48e3 100644 --- a/bindings/go/README.md +++ b/bindings/go/README.md @@ -31,3 +31,21 @@ Documentation * [API documentation](https://godoc.org/github.com/apple/foundationdb/bindings/go/src/fdb) * [Tutorial](https://apple.github.io/foundationdb/class-scheduling.html) + +Modules +------- + +If you used the bindings with modules before the addition of the `go.mod` file, +it may be necessary to update the import path in your `go.mod`. + +By default, a module enabled `go get` will add something like this to your `go.mod`: + + github.com/apple/foundationdb vx.x.x-xxxxxxxxxxxxxx-xxxxxxxxxxxx + +You will need to delete that line, then run `go get github.com/apple/foundationdb/bindings/go@version`. +You should now have a line like this in your `go.mod`: + + github.com/apple/foundationdb/bindings/go vx.x.x-xxxxxxxxxxxxxx-xxxxxxxxxxxx + +Note: `@version` is only necessary if you previously locked to a +specific version or commit, in which case you'd replace `version` with a commit hash or tag. diff --git a/bindings/go/fdb-go-install.sh b/bindings/go/fdb-go-install.sh index 770a0a38e9..834ebe36e1 100755 --- a/bindings/go/fdb-go-install.sh +++ b/bindings/go/fdb-go-install.sh @@ -11,9 +11,6 @@ # library. # -# This script will not work correctly with Go Modules enabled. -GO111MODULE=off - DESTDIR="${DESTDIR:-}" FDBVER="${FDBVER:-}" REMOTE="${REMOTE:-github.com}" @@ -327,12 +324,6 @@ else echo " CGO_CPPFLAGS=\"${cgo_cppflags}\"" echo " CGO_CFLAGS=\"${cgo_cflags}\"" echo " CGO_LDFLAGS=\"${cgo_ldflags}\"" - echo - echo "If you use modules, it may be necessary to add this replace directive in your go.mod:" - echo " replace github.com/apple/foundationdb/bindings/go => ${GOPATH}/src/${REMOTE}/${FDBREPO}/bindings/go" - echo - echo "Note: this replace directive is an absolute path and will only work for your system." - echo "For shared repositories you may need to use a relative path pointing to \$GOPATH/src/github.com/apple/foundationdb." fi fi fi From 789bc6d4ee8761920e8eb2f2e6c5fd158b664277 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Tue, 12 Feb 2019 11:57:20 -0600 Subject: [PATCH 128/226] fixup! fixup! Add support for modules --- bindings/go/fdb-go-install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/go/fdb-go-install.sh b/bindings/go/fdb-go-install.sh index 834ebe36e1..897e694d5d 100755 --- a/bindings/go/fdb-go-install.sh +++ b/bindings/go/fdb-go-install.sh @@ -314,7 +314,7 @@ else fi fi - # Step 5: Explain CGO flags and modules usage. + # Step 5: Explain CGO flags. if [[ "${status}" -eq 0 && ("${operation}" == "localinstall" || "${operation}" == "install" ) ]] ; then echo From 8be2de8cc2526ddffe5a9eb7b27a3922922436eb Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Tue, 12 Feb 2019 12:08:21 -0600 Subject: [PATCH 129/226] fixup! fixup! fixup! Add support for modules --- bindings/go/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/go/README.md b/bindings/go/README.md index f213fb48e3..ad42d48ccd 100644 --- a/bindings/go/README.md +++ b/bindings/go/README.md @@ -35,7 +35,7 @@ Documentation Modules ------- -If you used the bindings with modules before the addition of the `go.mod` file, +If you used the bindings with modules before the addition of the `go.mod` file in the foundation repo, it may be necessary to update the import path in your `go.mod`. By default, a module enabled `go get` will add something like this to your `go.mod`: From 1b04f9a71a47620ffd01f314629b8b58053cf18a Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Tue, 12 Feb 2019 13:24:42 -0600 Subject: [PATCH 130/226] fixup! fixup! fixup! fixup! Add support for modules --- bindings/go/fdb-go-install.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bindings/go/fdb-go-install.sh b/bindings/go/fdb-go-install.sh index 897e694d5d..ff3c739cc8 100755 --- a/bindings/go/fdb-go-install.sh +++ b/bindings/go/fdb-go-install.sh @@ -11,6 +11,9 @@ # library. # +# Currently, this script doesn't work with modules enabled. +GO111MODULE=off + DESTDIR="${DESTDIR:-}" FDBVER="${FDBVER:-}" REMOTE="${REMOTE:-github.com}" From c325465dac5bb3c6b3c1efd3c84cb59217ac124e Mon Sep 17 00:00:00 2001 From: Alvin Moore Date: Tue, 12 Feb 2019 16:22:56 -0800 Subject: [PATCH 131/226] Added support for a type of correctness service run against the fdbserver binary via docker-compose --- build/docker-compose.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build/docker-compose.yaml b/build/docker-compose.yaml index a86ba594f6..90ba9936c4 100644 --- a/build/docker-compose.yaml +++ b/build/docker-compose.yaml @@ -61,12 +61,20 @@ services: snapshot-ctest: &snapshot-ctest <<: *build-setup - command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p work && cd work && cmake .. && make -j "$${MAKEJOBS}" && ctest; fi' + command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p work && cd work && cmake .. && make -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure; fi' prb-ctest: <<: *snapshot-ctest + snapshot-correctness: &snapshot-correctness + <<: *build-setup + command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p work && cd work && cmake .. && make -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure; fi' + + prb-correctness: + <<: *snapshot-correctness + + shell: <<: *build-setup volumes: From c365b030c8c895f56f6b1dd6a411901c4ec3a115 Mon Sep 17 00:00:00 2001 From: Alvin Moore Date: Tue, 12 Feb 2019 17:01:04 -0800 Subject: [PATCH 132/226] Added support for variable build directory --- build/docker-compose.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/build/docker-compose.yaml b/build/docker-compose.yaml index 90ba9936c4..dd3561d5e5 100644 --- a/build/docker-compose.yaml +++ b/build/docker-compose.yaml @@ -12,12 +12,14 @@ services: working_dir: /foundationdb environment: - MAKEJOBS=1 + - BUILD_DIR=./work release-setup: &release-setup <<: *build-setup environment: - MAKEJOBS=1 - RELEASE=true + - BUILD_DIR=./work snapshot-setup: &snapshot-setup <<: *build-setup @@ -53,7 +55,7 @@ services: snapshot-cmake: &snapshot-cmake <<: *build-setup - command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p work && cd work && cmake .. && make -j "$${MAKEJOBS}"; fi' + command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake .. && make -j "$${MAKEJOBS}"; fi' prb-cmake: <<: *snapshot-cmake @@ -61,7 +63,7 @@ services: snapshot-ctest: &snapshot-ctest <<: *build-setup - command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p work && cd work && cmake .. && make -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure; fi' + command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake .. && make -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure; fi' prb-ctest: <<: *snapshot-ctest @@ -69,7 +71,7 @@ services: snapshot-correctness: &snapshot-correctness <<: *build-setup - command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p work && cd work && cmake .. && make -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure; fi' + command: bash -c 'if [ -f CMakeLists.txt ]; then mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake .. && make -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure; fi' prb-correctness: <<: *snapshot-correctness From f257cca1bcd2fd7532fdd3f7c4f92478665c6ef3 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 14:03:56 -0800 Subject: [PATCH 133/226] Update `Void _ = wait(...)` occurrences --- flow/README.md | 6 +++--- flow/actorcompiler/ActorCompiler.cs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flow/README.md b/flow/README.md index 0434051f90..f711c0fcd5 100644 --- a/flow/README.md +++ b/flow/README.md @@ -101,7 +101,7 @@ From 6.1, `wait()` on `Void` actors shouldn't assign the resulting value. So, th ```c++ Future asyncTask(); //defined elsewhere -Void _ = wait(asyncTask()); +wait(asyncTask()); ``` becomes @@ -303,7 +303,7 @@ some operation more than once: ```c++ ACTOR Future periodically(PromiseStream ps, int seconds) { loop { - Void _ = wait( delay( seconds ) ); + wait( delay( seconds ) ); ps.send(Void()); } } @@ -494,7 +494,7 @@ ACTOR Future foo(StringRef param) ACTOR Future bar() { Standalone str("string"); - Void _ = wait(foo(str)); + wait(foo(str)); return Void(); } ``` diff --git a/flow/actorcompiler/ActorCompiler.cs b/flow/actorcompiler/ActorCompiler.cs index d506e8da3b..2e0b56cd10 100644 --- a/flow/actorcompiler/ActorCompiler.cs +++ b/flow/actorcompiler/ActorCompiler.cs @@ -816,7 +816,7 @@ namespace actorcompiler if (firstChoice) { // Do this check only after evaluating the expression for the first wait expression, so that expression cannot be short circuited by cancellation. - // So Void _ = wait( expr() ) will always evaluate `expr()`, but choose { when ( Void _ = wait( expr1() ) ) {} when (Void _ = wait( expr2() ) {} } need + // So wait( expr() ) will always evaluate `expr()`, but choose { when ( Void _ = wait( expr2() ) {} } need // not evaluate `expr2()`. firstChoice = false; LineNumber(cx.target, stmt.FirstSourceLine); From 067a445e06337bb53251a4b86bbf2e1f8109be50 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 16:07:17 -0800 Subject: [PATCH 134/226] Replace unused _ variables with wait(success(...)) --- .../flow/tester/DirectoryTester.actor.cpp | 8 ++--- bindings/flow/tester/Tester.actor.cpp | 2 +- fdbbackup/backup.actor.cpp | 4 +-- fdbcli/fdbcli.actor.cpp | 6 ++-- fdbclient/BackupContainer.actor.cpp | 2 +- fdbclient/DatabaseBackupAgent.actor.cpp | 32 ++++++++--------- fdbclient/FileBackupAgent.actor.cpp | 36 +++++++++---------- fdbclient/HTTP.actor.cpp | 4 +-- fdbclient/ManagementAPI.actor.cpp | 2 +- fdbclient/NativeAPI.actor.cpp | 4 +-- fdbclient/TaskBucket.actor.cpp | 2 +- fdbrpc/AsyncFileNonDurable.actor.h | 10 +++--- fdbserver/DiskQueue.actor.cpp | 4 +-- fdbserver/KeyValueStoreSQLite.actor.cpp | 2 +- fdbserver/MasterProxyServer.actor.cpp | 2 +- fdbserver/QuietDatabase.actor.cpp | 4 +-- fdbserver/Status.actor.cpp | 2 +- fdbserver/VersionedBTree.actor.cpp | 2 +- fdbserver/workloads/AsyncFileRead.actor.cpp | 4 +-- fdbserver/workloads/AtomicRestore.actor.cpp | 2 +- .../workloads/AtomicSwitchover.actor.cpp | 6 ++-- .../workloads/BackupCorrectness.actor.cpp | 2 +- fdbserver/workloads/BackupToDBAbort.actor.cpp | 2 +- .../workloads/BackupToDBCorrectness.actor.cpp | 2 +- .../workloads/BackupToDBUpgrade.actor.cpp | 4 +-- fdbserver/workloads/BulkLoad.actor.cpp | 2 +- fdbserver/workloads/ChangeConfig.actor.cpp | 12 +++---- .../workloads/ConfigureDatabase.actor.cpp | 8 ++--- fdbserver/workloads/KillRegion.actor.cpp | 6 ++-- fdbserver/workloads/LowLatency.actor.cpp | 2 +- .../workloads/MachineAttrition.actor.cpp | 2 +- fdbserver/workloads/Ping.actor.cpp | 2 +- fdbserver/workloads/RYWDisable.actor.cpp | 2 +- fdbserver/workloads/RYWPerformance.actor.cpp | 18 +++++----- fdbserver/workloads/RandomMoveKeys.actor.cpp | 2 +- fdbserver/workloads/Throughput.actor.cpp | 2 +- flow/actorcompiler/ActorCompiler.cs | 2 +- 37 files changed, 105 insertions(+), 105 deletions(-) diff --git a/bindings/flow/tester/DirectoryTester.actor.cpp b/bindings/flow/tester/DirectoryTester.actor.cpp index 1b2d36766f..5e08efc3f2 100644 --- a/bindings/flow/tester/DirectoryTester.actor.cpp +++ b/bindings/flow/tester/DirectoryTester.actor.cpp @@ -334,17 +334,17 @@ struct DirectoryRemoveIfExistsFunc : InstructionFunc { if(count.getInt(0) == 0) { logOp(format("remove_if_exists %s", pathToString(directory->getPath()).c_str())); - bool _ = wait(executeMutation(instruction, [this] () { + wait(success(executeMutation(instruction, [this] () { return directory->removeIfExists(instruction->tr); - })); + }))); } else { IDirectory::Path path = wait(popPath(data)); logOp(format("remove_if_exists %s", pathToString(combinePaths(directory->getPath(), path)).c_str())); - bool _ = wait(executeMutation(instruction, [this, path] () { + wait(success(executeMutation(instruction, [this, path] () { return directory->removeIfExists(instruction->tr, path); - })); + }))); } return Void(); diff --git a/bindings/flow/tester/Tester.actor.cpp b/bindings/flow/tester/Tester.actor.cpp index eba46e1d8b..d0afc2ded3 100644 --- a/bindings/flow/tester/Tester.actor.cpp +++ b/bindings/flow/tester/Tester.actor.cpp @@ -353,7 +353,7 @@ struct PopFunc : InstructionFunc { ACTOR static Future call(Reference data, Reference instruction) { state std::vector items = data->stack.pop(); for(StackItem item : items) { - Standalone _ = wait(item.value); + wait(success(item.value)); } return Void(); } diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 673e5d9a1c..49f81e4b4f 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -1599,7 +1599,7 @@ ACTOR Future submitBackup(Database db, std::string url, int snapshotInterv // Wait for the backup to complete, if requested if (waitForCompletion) { printf("Submitted and now waiting for the backup on tag `%s' to complete.\n", printable(StringRef(tagName)).c_str()); - int _ = wait(backupAgent.waitBackup(db, tagName)); + wait(success(backupAgent.waitBackup(db, tagName))); } else { // Check if a backup agent is running @@ -1803,7 +1803,7 @@ ACTOR Future discontinueBackup(Database db, std::string tagName, bool wait // Wait for the backup to complete, if requested if (waitForCompletion) { printf("Discontinued and now waiting for the backup on tag `%s' to complete.\n", printable(StringRef(tagName)).c_str()); - int _ = wait(backupAgent.waitBackup(db, tagName)); + wait(success(backupAgent.waitBackup(db, tagName))); } else { printf("The backup on tag `%s' was successfully discontinued.\n", printable(StringRef(tagName)).c_str()); diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index f23984f6fc..81b4dcbb29 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -2592,7 +2592,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { } if( tokencmp(tokens[0], "waitopen")) { - Version _ = wait( getTransaction(db,tr,options,intrans)->getReadVersion() ); + wait(success( getTransaction(db,tr,options,intrans)->getReadVersion() )); continue; } @@ -3203,10 +3203,10 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { is_error = true; } else { if(tokencmp(tokens[1], "on")) { - int _ = wait(setDDMode(db, 1)); + wait(success(setDDMode(db, 1))); printf("Data distribution is enabled\n"); } else if(tokencmp(tokens[1], "off")) { - int _ = wait(setDDMode(db, 0)); + wait(success(setDDMode(db, 0))); printf("Data distribution is disabled\n"); } else { printf("Usage: datadistribution \n"); diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 61528980c0..f4a92f4f2c 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -345,7 +345,7 @@ public: state Reference f = wait(bc->readFile(snapshot.fileName)); int64_t size = wait(f->size()); state Standalone buf = makeString(size); - int _ = wait(f->read(mutateString(buf), buf.size(), 0)); + wait(success(f->read(mutateString(buf), buf.size(), 0))); json_spirit::mValue json; json_spirit::read_string(buf.toString(), json); JSONDoc doc(json); diff --git a/fdbclient/DatabaseBackupAgent.actor.cpp b/fdbclient/DatabaseBackupAgent.actor.cpp index e261ca8af4..dbaf25229e 100644 --- a/fdbclient/DatabaseBackupAgent.actor.cpp +++ b/fdbclient/DatabaseBackupAgent.actor.cpp @@ -712,7 +712,7 @@ namespace dbBackup { if (endVersion <= beginVersion) { wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); - Key _ = wait(CopyLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, beginVersion, TaskCompletionKey::signal(onDone))); + wait(success(CopyLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, beginVersion, TaskCompletionKey::signal(onDone)))); wait(taskBucket->finish(tr, task)); return Void(); } @@ -755,7 +755,7 @@ namespace dbBackup { } else { if(appliedVersion <= stopVersionData) { wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); - Key _ = wait(CopyLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, beginVersion, TaskCompletionKey::signal(onDone))); + wait(success(CopyLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, beginVersion, TaskCompletionKey::signal(onDone)))); wait(taskBucket->finish(tr, task)); return Void(); } @@ -880,7 +880,7 @@ namespace dbBackup { if (task->params.find(FinishedFullBackupTaskFunc::keyInsertTask) != task->params.end()) { state Reference onDone = futureBucket->unpack(task->params[Task::reservedTaskParamKeyDone]); - Key _ = wait(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::signal(onDone))); + wait(success(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::signal(onDone)))); wait(taskBucket->finish(tr, task)); return Void(); } @@ -926,7 +926,7 @@ namespace dbBackup { if (endVersion <= beginVersion) { wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); - Key _ = wait(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, beginVersion, TaskCompletionKey::signal(onDone))); + wait(success(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, beginVersion, TaskCompletionKey::signal(onDone)))); wait(taskBucket->finish(tr, task)); return Void(); } @@ -1336,13 +1336,13 @@ namespace dbBackup { if (task->params[BackupAgentBase::destUid].size() == 0) { TraceEvent("DBA_CopyDiffLogsUpgradeTaskFuncAbortInUpgrade"); - Key _ = wait(AbortOldBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::signal(onDone))); + wait(success(AbortOldBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::signal(onDone)))); } else { Version beginVersion = BinaryReader::fromStringRef(task->params[DatabaseBackupAgent::keyBeginVersion], Unversioned()); Subspace config = Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keyConfig).get(task->params[DatabaseBackupAgent::keyConfigLogUid]); tr->set(config.pack(BackupAgentBase::destUid), task->params[BackupAgentBase::destUid]); tr->set(config.pack(BackupAgentBase::keyDrVersion), BinaryWriter::toValue(DatabaseBackupAgent::LATEST_DR_VERSION, Unversioned())); - Key _ = wait(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, 0, beginVersion, TaskCompletionKey::signal(onDone))); + wait(success(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, 0, beginVersion, TaskCompletionKey::signal(onDone)))); } wait(taskBucket->finish(tr, task)); @@ -1409,7 +1409,7 @@ namespace dbBackup { // Start the complete task, if differential is not enabled if (stopWhenDone.present()) { // After the Backup completes, clear the backup subspace and update the status - Key _ = wait(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal())); + wait(success(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal()))); } else { // Start the writing of logs, if differential tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_DIFFERENTIAL))); @@ -1417,10 +1417,10 @@ namespace dbBackup { allPartsDone = futureBucket->future(tr); Version prevBeginVersion = BinaryReader::fromStringRef(task->params[DatabaseBackupAgent::keyPrevBeginVersion], Unversioned()); - Key _ = wait(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, restoreVersion, TaskCompletionKey::joinWith(allPartsDone))); + wait(success(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, restoreVersion, TaskCompletionKey::joinWith(allPartsDone)))); // After the Backup completes, clear the backup subspace and update the status - Key _ = wait(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), allPartsDone)); + wait(success(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), allPartsDone))); } wait(taskBucket->finish(tr, task)); @@ -1574,7 +1574,7 @@ namespace dbBackup { if(task->params[DatabaseBackupAgent::keyDatabasesInSync] != std::string("t")) { for (; rangeCount < backupRanges.size(); ++rangeCount) { - Key _ = wait(BackupRangeTaskFunc::addTask(tr, taskBucket, task, backupRanges[rangeCount].begin, backupRanges[rangeCount].end, TaskCompletionKey::joinWith(kvBackupRangeComplete))); + wait(success(BackupRangeTaskFunc::addTask(tr, taskBucket, task, backupRanges[rangeCount].begin, backupRanges[rangeCount].end, TaskCompletionKey::joinWith(kvBackupRangeComplete)))); } } else { @@ -1582,13 +1582,13 @@ namespace dbBackup { } // After the BackupRangeTask completes, set the stop key which will stop the BackupLogsTask - Key _ = wait(FinishFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), kvBackupRangeComplete)); + wait(success(FinishFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), kvBackupRangeComplete))); // Backup the logs which will create BackupLogRange tasks - Key _ = wait(CopyLogsTaskFunc::addTask(tr, taskBucket, task, 0, beginVersion, TaskCompletionKey::joinWith(kvBackupComplete))); + wait(success(CopyLogsTaskFunc::addTask(tr, taskBucket, task, 0, beginVersion, TaskCompletionKey::joinWith(kvBackupComplete)))); // After the Backup completes, clear the backup subspace and update the status - Key _ = wait(BackupRestorableTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), kvBackupComplete)); + wait(success(BackupRestorableTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), kvBackupComplete))); wait(taskBucket->finish(tr, task)); return Void(); @@ -1906,7 +1906,7 @@ public: throw; } - int _ = wait( backupAgent->waitBackup(dest, tagName, true) ); + wait(success( backupAgent->waitBackup(dest, tagName, true) )); TraceEvent("DBA_SwitchoverStopped"); @@ -1940,7 +1940,7 @@ public: TraceEvent("DBA_SwitchoverSubmitted"); - int _ = wait( drAgent.waitSubmitted(backupAgent->taskBucket->src, tagName) ); + wait(success( drAgent.waitSubmitted(backupAgent->taskBucket->src, tagName) )); TraceEvent("DBA_SwitchoverStarted"); @@ -2150,7 +2150,7 @@ public: loop{ try { - Version _ = wait(tr->getReadVersion()); //get the read version before getting a version from the source database to prevent the time differential from going negative + wait(success(tr->getReadVersion())); //get the read version before getting a version from the source database to prevent the time differential from going negative state Transaction scrTr(backupAgent->taskBucket->src); scrTr.setOption(FDBTransactionOptions::LOCK_AWARE); diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index cb99985e4c..7d8bf6d49a 100755 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -1201,7 +1201,7 @@ namespace fileBackup { if (nextKey != endKey) { // Add task to cover nextKey to the end, using the priority of the current task - Key _ = wait(addTask(tr, taskBucket, task, task->getPriority(), nextKey, endKey, TaskCompletionKey::joinWith(onDone), Reference(), task->getPriority())); + wait(success(addTask(tr, taskBucket, task, task->getPriority(), nextKey, endKey, TaskCompletionKey::joinWith(onDone), Reference(), task->getPriority()))); } return Void(); @@ -2051,13 +2051,13 @@ namespace fileBackup { state int priority = latestSnapshotEndVersion.present() ? 1 : 0; // Add the initial log range task to read/copy the mutations and the next logs dispatch task which will run after this batch is done - Key _ = wait(BackupLogRangeTaskFunc::addTask(tr, taskBucket, task, priority, beginVersion, endVersion, TaskCompletionKey::joinWith(logDispatchBatchFuture))); - Key _ = wait(BackupLogsDispatchTask::addTask(tr, taskBucket, task, priority, beginVersion, endVersion, TaskCompletionKey::signal(onDone), logDispatchBatchFuture)); + wait(success(BackupLogRangeTaskFunc::addTask(tr, taskBucket, task, priority, beginVersion, endVersion, TaskCompletionKey::joinWith(logDispatchBatchFuture)))); + wait(success(BackupLogsDispatchTask::addTask(tr, taskBucket, task, priority, beginVersion, endVersion, TaskCompletionKey::signal(onDone), logDispatchBatchFuture))); // Do not erase at the first time if (prevBeginVersion > 0) { state Key destUidValue = wait(config.destUidValue().getOrThrow(tr)); - Key _ = wait(EraseLogRangeTaskFunc::addTask(tr, taskBucket, config.getUid(), TaskCompletionKey::joinWith(logDispatchBatchFuture), destUidValue, beginVersion)); + wait(success(EraseLogRangeTaskFunc::addTask(tr, taskBucket, config.getUid(), TaskCompletionKey::joinWith(logDispatchBatchFuture), destUidValue, beginVersion))); } wait(taskBucket->finish(tr, task)); @@ -2108,7 +2108,7 @@ namespace fileBackup { tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); state Key destUidValue = wait(backup.destUidValue().getOrThrow(tr)); - Key _ = wait(EraseLogRangeTaskFunc::addTask(tr, taskBucket, backup.getUid(), TaskCompletionKey::noSignal(), destUidValue)); + wait(success(EraseLogRangeTaskFunc::addTask(tr, taskBucket, backup.getUid(), TaskCompletionKey::noSignal(), destUidValue))); backup.stateEnum().set(tr, EBackupState::STATE_COMPLETED); @@ -2348,12 +2348,12 @@ namespace fileBackup { wait(config.initNewSnapshot(tr, 0)); // Using priority 1 for both of these to at least start both tasks soon - Key _ = wait(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::joinWith(backupFinished))); - Key _ = wait(BackupLogsDispatchTask::addTask(tr, taskBucket, task, 1, 0, beginVersion, TaskCompletionKey::joinWith(backupFinished))); + wait(success(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::joinWith(backupFinished)))); + wait(success(BackupLogsDispatchTask::addTask(tr, taskBucket, task, 1, 0, beginVersion, TaskCompletionKey::joinWith(backupFinished)))); // If a clean stop is requested, the log and snapshot tasks will quit after the backup is restorable, then the following // task will clean up and set the completed state. - Key _ = wait(FileBackupFinishedTask::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), backupFinished)); + wait(success(FileBackupFinishedTask::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal(), backupFinished))); wait(taskBucket->finish(tr, task)); return Void(); @@ -2845,7 +2845,7 @@ namespace fileBackup { if(!addingToExistingBatch && applyLag > (BUGGIFY ? 1 : CLIENT_KNOBS->CORE_VERSIONSPERSECOND * 300)) { // Wait a small amount of time and then re-add this same task. wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); - Key _ = wait(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, beginVersion, "", 0, batchSize, remainingInBatch)); + wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, beginVersion, "", 0, batchSize, remainingInBatch))); TraceEvent("FileRestoreDispatch") .detail("RestoreUID", restore.getUid()) @@ -2885,7 +2885,7 @@ namespace fileBackup { // If adding to existing batch then blocks could be in progress so create a new Dispatch task that waits for them to finish if(addingToExistingBatch) { // Setting next begin to restoreVersion + 1 so that any files in the file map at the restore version won't be dispatched again. - Key _ = wait(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, restoreVersion + 1, "", 0, batchSize, 0, TaskCompletionKey::noSignal(), allPartsDone)); + wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, restoreVersion + 1, "", 0, batchSize, 0, TaskCompletionKey::noSignal(), allPartsDone))); TraceEvent("FileRestoreDispatch") .detail("RestoreUID", restore.getUid()) @@ -2899,7 +2899,7 @@ namespace fileBackup { } else if(beginVersion < restoreVersion) { // If beginVersion is less than restoreVersion then do one more dispatch task to get there - Key _ = wait(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, restoreVersion, "", 0, batchSize)); + wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, restoreVersion, "", 0, batchSize))); TraceEvent("FileRestoreDispatch") .detail("RestoreUID", restore.getUid()) @@ -2913,7 +2913,7 @@ namespace fileBackup { } else if(applyLag == 0) { // If apply lag is 0 then we are done so create the completion task - Key _ = wait(RestoreCompleteTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal())); + wait(success(RestoreCompleteTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal()))); TraceEvent("FileRestoreDispatch") .detail("RestoreUID", restore.getUid()) @@ -2926,7 +2926,7 @@ namespace fileBackup { } else { // Applying of mutations is not yet finished so wait a small amount of time and then re-add this same task. wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); - Key _ = wait(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, beginVersion, "", 0, batchSize)); + wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, beginVersion, "", 0, batchSize))); TraceEvent("FileRestoreDispatch") .detail("RestoreUID", restore.getUid()) @@ -3320,7 +3320,7 @@ namespace fileBackup { if(firstVersion == invalidVersion) { wait(restore.logError(tr->getDatabase(), restore_missing_data(), "StartFullRestore: The backup had no data.", this)); std::string tag = wait(restore.tag().getD(tr)); - ERestoreState _ = wait(abortRestore(tr, StringRef(tag))); + wait(success(abortRestore(tr, StringRef(tag)))); return Void(); } @@ -3331,7 +3331,7 @@ namespace fileBackup { restore.setApplyEndVersion(tr, firstVersion); // Apply range data and log data in order - Key _ = wait(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, 0, "", 0, CLIENT_KNOBS->RESTORE_DISPATCH_BATCH_SIZE)); + wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, 0, "", 0, CLIENT_KNOBS->RESTORE_DISPATCH_BATCH_SIZE))); wait(taskBucket->finish(tr, task)); return Void(); @@ -3672,7 +3672,7 @@ public: state Key destUidValue = wait(config.destUidValue().getOrThrow(tr)); state Version endVersion = wait(tr->getReadVersion()); - Key _ = wait(fileBackup::EraseLogRangeTaskFunc::addTask(tr, backupAgent->taskBucket, config.getUid(), TaskCompletionKey::noSignal(), destUidValue)); + wait(success(fileBackup::EraseLogRangeTaskFunc::addTask(tr, backupAgent->taskBucket, config.getUid(), TaskCompletionKey::noSignal(), destUidValue))); config.stateEnum().set(tr, EBackupState::STATE_COMPLETED); @@ -3712,7 +3712,7 @@ public: // Cancel backup task through tag wait(tag.cancel(tr)); - Key _ = wait(fileBackup::EraseLogRangeTaskFunc::addTask(tr, backupAgent->taskBucket, config.getUid(), TaskCompletionKey::noSignal(), destUidValue)); + wait(success(fileBackup::EraseLogRangeTaskFunc::addTask(tr, backupAgent->taskBucket, config.getUid(), TaskCompletionKey::noSignal(), destUidValue))); config.stateEnum().set(tr, EBackupState::STATE_ABORTED); @@ -4010,7 +4010,7 @@ public: } } - int _ = wait( waitBackup(backupAgent, cx, tagName.toString(), true) ); + wait(success( waitBackup(backupAgent, cx, tagName.toString(), true) )); TraceEvent("AS_BackupStopped"); ryw_tr->reset(); diff --git a/fdbclient/HTTP.actor.cpp b/fdbclient/HTTP.actor.cpp index 0517e33b1d..eb458f0906 100644 --- a/fdbclient/HTTP.actor.cpp +++ b/fdbclient/HTTP.actor.cpp @@ -124,7 +124,7 @@ namespace HTTP { // Next search will start at the current end of the buffer - delim size + 1 if(sPos >= lookBack) sPos -= lookBack; - int _ = wait(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE)); + wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE))); } } @@ -132,7 +132,7 @@ namespace HTTP { ACTOR Future read_fixed_into_string(Reference conn, int len, std::string *buf, size_t pos) { state int stop_size = pos + len; while(buf->size() < stop_size) - int _ = wait(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE)); + wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE))); return Void(); } diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 04695e41d8..72653131ae 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1522,7 +1522,7 @@ ACTOR Future forceRecovery (Reference clusterFile) wait(clusterInterface->onChange()); } - ErrorOr _ = wait(clusterInterface->get().get().forceRecovery.tryGetReply( ForceRecoveryRequest() )); + wait(success(clusterInterface->get().get().forceRecovery.tryGetReply( ForceRecoveryRequest() ))); return Void(); } diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index e305325814..7242714af1 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -256,7 +256,7 @@ ACTOR static Future > getSampleVersionStamp(Transaction *t try { tr->reset(); tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - Optional _ = wait(tr->get(LiteralStringRef("\xff/StatusJsonTestKey62793"))); + wait(success(tr->get(LiteralStringRef("\xff/StatusJsonTestKey62793")))); state Future > vstamp = tr->getVersionstamp(); tr->makeSelfConflicting(); wait(tr->commit()); @@ -1204,7 +1204,7 @@ ACTOR Future warmRange_impl( Transaction *self, Database cx, KeyRange keys try { tr.setOption( FDBTransactionOptions::LOCK_AWARE ); tr.setOption( FDBTransactionOptions::CAUSAL_READ_RISKY ); - Version _ = wait( tr.getReadVersion() ); + wait(success( tr.getReadVersion() )); break; } catch( Error &e ) { wait( tr.onError(e) ); diff --git a/fdbclient/TaskBucket.actor.cpp b/fdbclient/TaskBucket.actor.cpp index e0b2243ff0..2b6da02146 100644 --- a/fdbclient/TaskBucket.actor.cpp +++ b/fdbclient/TaskBucket.actor.cpp @@ -591,7 +591,7 @@ public: bool is_busy = wait(isBusy(tr, taskBucket)); if (!is_busy) { - Key _ = wait(addIdle(tr, taskBucket)); + wait(success(addIdle(tr, taskBucket))); } Optional val = wait(tr->get(taskBucket->active.key())); diff --git a/fdbrpc/AsyncFileNonDurable.actor.h b/fdbrpc/AsyncFileNonDurable.actor.h index 7ad97fb262..da111dcc1d 100644 --- a/fdbrpc/AsyncFileNonDurable.actor.h +++ b/fdbrpc/AsyncFileNonDurable.actor.h @@ -44,7 +44,7 @@ Future sendErrorOnProcess( ISimulator::ProcessInfo* const& process, Promis ACTOR template Future sendErrorOnShutdown( Future in ) { choose { - when( ISimulator::KillType _ = wait( g_simulator.getCurrentProcess()->shutdownSignal.getFuture() ) ) { + when( wait(success( g_simulator.getCurrentProcess()->shutdownSignal.getFuture() )) ) { throw io_error().asInjectedFault(); } when( T rep = wait( in ) ) { @@ -64,14 +64,14 @@ public: } ACTOR Future doShutdown( AsyncFileDetachable* self ) { - ISimulator::KillType _ = wait( g_simulator.getCurrentProcess()->shutdownSignal.getFuture() ); + wait(success( g_simulator.getCurrentProcess()->shutdownSignal.getFuture() )); self->file = Reference(); return Void(); } ACTOR static Future> open( Future> wrappedFile ) { choose { - when( ISimulator::KillType _ = wait( g_simulator.getCurrentProcess()->shutdownSignal.getFuture() ) ) { + when( wait(success( g_simulator.getCurrentProcess()->shutdownSignal.getFuture() )) ) { throw io_error().asInjectedFault(); } when( Reference f = wait( wrappedFile ) ) { @@ -637,14 +637,14 @@ private: if(durable) wait(allModifications); else - ErrorOr _ = wait(errorOr(allModifications)); + wait(success(errorOr(allModifications))); if(!durable) { //Sometimes sync the file if writes were made durably. Before a file is first synced, it is stored in a temporary file and then renamed to the correct //location once sync is called. By not calling sync, we simulate a failure to fsync the directory storing the file if(self->hasBeenSynced && writeDurable && g_random->random01() < 0.5) { TEST(true); //AsyncFileNonDurable kill was durable and synced - ErrorOr _ = wait(errorOr(self->file->sync())); + wait(success(errorOr(self->file->sync()))); } //Setting this promise could trigger the deletion of the AsyncFileNonDurable; after this none of its members should be used diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index d9277e4392..57fef766d1 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -410,7 +410,7 @@ public: // Wait for all reads and writes on the file, and all actors referencing self, to be finished state Error error = success(); try { - ErrorOr _ = wait(errorOr(self->lastCommit)); + wait(success(errorOr(self->lastCommit))); while (self->recoveryActorCount.get(false)) wait( self->recoveryActorCount.onChange(false) ); @@ -941,7 +941,7 @@ private: for( fileNum=0; fileNum<2; fileNum++) { state int sizeNum; for( sizeNum=0; sizeNum < self->rawQueue->files[fileNum].size; sizeNum += sizeof(Page) ) { - int _ = wait( self->rawQueue->files[fileNum].f->read( testPage.get(), sizeof(Page), sizeNum ) ); + wait(success( self->rawQueue->files[fileNum].f->read( testPage.get(), sizeof(Page), sizeNum ) )); TraceEvent("PageData").detail("File", self->rawQueue->files[fileNum].dbgFilename).detail("SizeNum", sizeNum).detail("Seq", testPage->seq).detail("Hash", testPage->checkHash()).detail("Popped", testPage->popped); } } diff --git a/fdbserver/KeyValueStoreSQLite.actor.cpp b/fdbserver/KeyValueStoreSQLite.actor.cpp index 31bee11cdf..c557d61740 100644 --- a/fdbserver/KeyValueStoreSQLite.actor.cpp +++ b/fdbserver/KeyValueStoreSQLite.actor.cpp @@ -2006,7 +2006,7 @@ ACTOR Future KVFileCheck(std::string filename, bool integrity) { ASSERT(store != nullptr); // Wait for integry check to finish - Optional _ = wait(store->readValue(StringRef())); + wait(success(store->readValue(StringRef()))); if(store->getError().isError()) wait(store->getError()); diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 75a2cc5800..d6e603c101 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -1387,7 +1387,7 @@ ACTOR Future masterProxyServerCore( addActor.send(readRequestServer(proxy, &commitData)); // wait for txnStateStore recovery - Optional _ = wait(commitData.txnStateStore->readValue(StringRef())); + wait(success(commitData.txnStateStore->readValue(StringRef()))); int commitBatchByteLimit = (int)std::min(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_MAX, diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index f925a8bad7..392cb690ea 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -291,12 +291,12 @@ ACTOR Future repairDeadDatacenter(Database cx, Referenceget().recoveryState < RecoveryState::STORAGE_RECOVERED ) { wait( dbInfo->onChange() ); } TraceEvent(SevWarnAlways, "DisablingFearlessConfiguration").detail("Location", context).detail("Stage", "Usable_Regions"); - ConfigurationResult::Type _ = wait( changeConfig( cx, "usable_regions=1", true ) ); + wait(success( changeConfig( cx, "usable_regions=1", true ) )); } } return Void(); diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index bd72fb2430..74abc79962 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -861,7 +861,7 @@ ACTOR static Future doGrvProbe(Transaction *tr, OptionalsetOption(priority.get()); } - Version _ = wait(tr->getReadVersion()); + wait(success(tr->getReadVersion())); return timer_monotonic() - start; } catch(Error &e) { diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 4ef9e06851..c68c512cdc 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -1317,7 +1317,7 @@ private: self->printMutationBuffer(mutations); - VersionedChildrenT _ = wait(commitSubtree(self, mutations, self->m_pager->getReadSnapshot(latestVersion), self->m_root, beginKey, endKey)); + wait(success(commitSubtree(self, mutations, self->m_pager->getReadSnapshot(latestVersion), self->m_root, beginKey, endKey))); self->m_pager->setLatestVersion(writeVersion); debug_printf("%s: Committing pager %lld\n", self->m_name.c_str(), writeVersion); diff --git a/fdbserver/workloads/AsyncFileRead.actor.cpp b/fdbserver/workloads/AsyncFileRead.actor.cpp index 57c31aba8e..56ded4d6a0 100644 --- a/fdbserver/workloads/AsyncFileRead.actor.cpp +++ b/fdbserver/workloads/AsyncFileRead.actor.cpp @@ -260,14 +260,14 @@ struct AsyncFileReadWorkload : public AsyncFileWorkload begin = now(); if (self->ioLog) self->ioLog->logIOIssue(writeFlag, begin); - int _ = wait( uncancellable + wait(success( uncancellable ( holdWhile ( self->fileHandle, holdWhile(self->readBuffers[bufferIndex], r) ) - ) ); + ) )); if (self->ioLog) self->ioLog->logIOCompletion(writeFlag, begin, now()); self->bytesRead += self->readSize; diff --git a/fdbserver/workloads/AtomicRestore.actor.cpp b/fdbserver/workloads/AtomicRestore.actor.cpp index 356e0e9591..7d6eef01f2 100644 --- a/fdbserver/workloads/AtomicRestore.actor.cpp +++ b/fdbserver/workloads/AtomicRestore.actor.cpp @@ -74,7 +74,7 @@ struct AtomicRestoreWorkload : TestWorkload { } TraceEvent("AtomicRestore_Wait"); - int _ = wait( backupAgent.waitBackup(cx, BackupAgentBase::getDefaultTagName(), false) ); + wait(success( backupAgent.waitBackup(cx, BackupAgentBase::getDefaultTagName(), false) )); TraceEvent("AtomicRestore_BackupStart"); wait( delay(self->restoreAfter * g_random->random01()) ); TraceEvent("AtomicRestore_RestoreStart"); diff --git a/fdbserver/workloads/AtomicSwitchover.actor.cpp b/fdbserver/workloads/AtomicSwitchover.actor.cpp index 06ad3aff0d..78b84c2a68 100644 --- a/fdbserver/workloads/AtomicSwitchover.actor.cpp +++ b/fdbserver/workloads/AtomicSwitchover.actor.cpp @@ -153,19 +153,19 @@ struct AtomicSwitchoverWorkload : TestWorkload { state DatabaseBackupAgent restoreAgent(self->extraDB); TraceEvent("AS_Wait1"); - int _ = wait( backupAgent.waitBackup(self->extraDB, BackupAgentBase::getDefaultTag(), false) ); + wait(success( backupAgent.waitBackup(self->extraDB, BackupAgentBase::getDefaultTag(), false) )); TraceEvent("AS_Ready1"); wait( delay(g_random->random01()*self->switch1delay) ); TraceEvent("AS_Switch1"); wait( backupAgent.atomicSwitchover(self->extraDB, BackupAgentBase::getDefaultTag(), self->backupRanges, StringRef(), StringRef()) ); TraceEvent("AS_Wait2"); - int _ = wait( restoreAgent.waitBackup(cx, BackupAgentBase::getDefaultTag(), false) ); + wait(success( restoreAgent.waitBackup(cx, BackupAgentBase::getDefaultTag(), false) )); TraceEvent("AS_Ready2"); wait( delay(g_random->random01()*self->switch2delay) ); TraceEvent("AS_Switch2"); wait( restoreAgent.atomicSwitchover(cx, BackupAgentBase::getDefaultTag(), self->backupRanges, StringRef(), StringRef()) ); TraceEvent("AS_Wait3"); - int _ = wait( backupAgent.waitBackup(self->extraDB, BackupAgentBase::getDefaultTag(), false) ); + wait(success( backupAgent.waitBackup(self->extraDB, BackupAgentBase::getDefaultTag(), false) )); TraceEvent("AS_Ready3"); wait( delay(g_random->random01()*self->stopDelay) ); TraceEvent("AS_Abort"); diff --git a/fdbserver/workloads/BackupCorrectness.actor.cpp b/fdbserver/workloads/BackupCorrectness.actor.cpp index 16441c139b..a5016090eb 100644 --- a/fdbserver/workloads/BackupCorrectness.actor.cpp +++ b/fdbserver/workloads/BackupCorrectness.actor.cpp @@ -289,7 +289,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload { // Try doing a restore without clearing the keys if (rowCount > 0) { try { - Version _ = wait(backupAgent->restore(cx, self->backupTag, KeyRef(lastBackupContainer), true, -1, true, normalKeys, Key(), Key(), self->locked)); + wait(success(backupAgent->restore(cx, self->backupTag, KeyRef(lastBackupContainer), true, -1, true, normalKeys, Key(), Key(), self->locked))); TraceEvent(SevError, "BARW_RestoreAllowedOverwrittingDatabase", randomID); ASSERT(false); } diff --git a/fdbserver/workloads/BackupToDBAbort.actor.cpp b/fdbserver/workloads/BackupToDBAbort.actor.cpp index 841c1e3aad..4f6df212f4 100644 --- a/fdbserver/workloads/BackupToDBAbort.actor.cpp +++ b/fdbserver/workloads/BackupToDBAbort.actor.cpp @@ -75,7 +75,7 @@ struct BackupToDBAbort : TestWorkload { TraceEvent("BDBA_Start").detail("Delay", self->abortDelay); wait(delay(self->abortDelay)); TraceEvent("BDBA_Wait"); - int _ = wait( backupAgent.waitBackup(self->extraDB, BackupAgentBase::getDefaultTag(), false) ); + wait(success( backupAgent.waitBackup(self->extraDB, BackupAgentBase::getDefaultTag(), false) )); TraceEvent("BDBA_Lock"); wait(lockDatabase(cx, self->lockid)); TraceEvent("BDBA_Abort"); diff --git a/fdbserver/workloads/BackupToDBCorrectness.actor.cpp b/fdbserver/workloads/BackupToDBCorrectness.actor.cpp index 9200860c33..38d2509d22 100644 --- a/fdbserver/workloads/BackupToDBCorrectness.actor.cpp +++ b/fdbserver/workloads/BackupToDBCorrectness.actor.cpp @@ -524,7 +524,7 @@ struct BackupToDBCorrectnessWorkload : TestWorkload { throw; } - int _ = wait(restoreAgent.waitBackup(cx, self->restoreTag)); + wait(success(restoreAgent.waitBackup(cx, self->restoreTag))); wait(restoreAgent.unlockBackup(cx, self->restoreTag)); } diff --git a/fdbserver/workloads/BackupToDBUpgrade.actor.cpp b/fdbserver/workloads/BackupToDBUpgrade.actor.cpp index f493ad78cb..f2ecf5a3d2 100644 --- a/fdbserver/workloads/BackupToDBUpgrade.actor.cpp +++ b/fdbserver/workloads/BackupToDBUpgrade.actor.cpp @@ -126,7 +126,7 @@ struct BackupToDBUpgradeWorkload : TestWorkload { } } - int _ = wait( backupAgent->waitBackup(self->extraDB, tag, false) ); + wait(success( backupAgent->waitBackup(self->extraDB, tag, false) )); return Void(); } @@ -445,7 +445,7 @@ struct BackupToDBUpgradeWorkload : TestWorkload { throw; } - int _ = wait(restoreAgent.waitBackup(cx, self->restoreTag)); + wait(success(restoreAgent.waitBackup(cx, self->restoreTag))); wait(restoreAgent.unlockBackup(cx, self->restoreTag)); wait(checkData(self->extraDB, logUid, logUid, self->backupTag, &backupAgent)); diff --git a/fdbserver/workloads/BulkLoad.actor.cpp b/fdbserver/workloads/BulkLoad.actor.cpp index 8bace8b23e..9d422b1ffc 100644 --- a/fdbserver/workloads/BulkLoad.actor.cpp +++ b/fdbserver/workloads/BulkLoad.actor.cpp @@ -83,7 +83,7 @@ struct BulkLoadWorkload : TestWorkload { for(int i = 0; i < self->writesPerTransaction; i++) tr.set( format( "/bulkload/%04x/%04x/%08x", self->clientId, actorId, idx + i ), self->value ); tr.makeSelfConflicting(); - Version _ = wait( tr.getReadVersion() ); + wait(success( tr.getReadVersion() )); wait( tr.commit() ); break; } catch (Error& e) { diff --git a/fdbserver/workloads/ChangeConfig.actor.cpp b/fdbserver/workloads/ChangeConfig.actor.cpp index 75f484da69..f43c42a745 100644 --- a/fdbserver/workloads/ChangeConfig.actor.cpp +++ b/fdbserver/workloads/ChangeConfig.actor.cpp @@ -61,15 +61,15 @@ struct ChangeConfigWorkload : TestWorkload { wait(delay(5*g_random->random01())); if (self->configMode.size()) { - ConfigurationResult::Type _ = wait(changeConfig(extraDB, self->configMode, true)); + wait(success(changeConfig(extraDB, self->configMode, true))); TraceEvent("WaitForReplicasExtra"); wait( waitForFullReplication( extraDB ) ); TraceEvent("WaitForReplicasExtraEnd"); } if (self->networkAddresses.size()) { if (self->networkAddresses == "auto") - CoordinatorsResult::Type _ = wait(changeQuorum(extraDB, autoQuorumChange())); + wait(success(changeQuorum(extraDB, autoQuorumChange()))); else - CoordinatorsResult::Type _ = wait(changeQuorum(extraDB, specifiedQuorumChange(NetworkAddress::parseList(self->networkAddresses)))); + wait(success(changeQuorum(extraDB, specifiedQuorumChange(NetworkAddress::parseList(self->networkAddresses))))); } wait(delay(5*g_random->random01())); } @@ -86,16 +86,16 @@ struct ChangeConfigWorkload : TestWorkload { } if( self->configMode.size() ) { - ConfigurationResult::Type _ = wait( changeConfig( cx, self->configMode, true ) ); + wait(success( changeConfig( cx, self->configMode, true ) )); TraceEvent("WaitForReplicas"); wait( waitForFullReplication( cx ) ); TraceEvent("WaitForReplicasEnd"); } if( self->networkAddresses.size() ) { if (self->networkAddresses == "auto") - CoordinatorsResult::Type _ = wait( changeQuorum( cx, autoQuorumChange() ) ); + wait(success( changeQuorum( cx, autoQuorumChange() ) )); else - CoordinatorsResult::Type _ = wait( changeQuorum( cx, specifiedQuorumChange(NetworkAddress::parseList( self->networkAddresses )) ) ); + wait(success( changeQuorum( cx, specifiedQuorumChange(NetworkAddress::parseList( self->networkAddresses )) ) )); } if(!extraConfigureBefore) { diff --git a/fdbserver/workloads/ConfigureDatabase.actor.cpp b/fdbserver/workloads/ConfigureDatabase.actor.cpp index 58b37e5cc3..851ccba40b 100644 --- a/fdbserver/workloads/ConfigureDatabase.actor.cpp +++ b/fdbserver/workloads/ConfigureDatabase.actor.cpp @@ -236,7 +236,7 @@ struct ConfigureDatabaseWorkload : TestWorkload { } ACTOR Future _setup( Database cx, ConfigureDatabaseWorkload *self ) { - ConfigurationResult::Type _ = wait( changeConfig( cx, "single", true ) ); + wait(success( changeConfig( cx, "single", true ) )); return Void(); } @@ -329,7 +329,7 @@ struct ConfigureDatabaseWorkload : TestWorkload { if (g_random->random01() < 0.5) config += " proxies=" + format("%d", randomRoleNumber()); if (g_random->random01() < 0.5) config += " resolvers=" + format("%d", randomRoleNumber()); - ConfigurationResult::Type _ = wait( changeConfig( cx, config, false ) ); + wait(success( changeConfig( cx, config, false ) )); //TraceEvent("ConfigureTestConfigureEnd").detail("NewConfig", newConfig); } @@ -338,11 +338,11 @@ struct ConfigureDatabaseWorkload : TestWorkload { auto ch = autoQuorumChange(); if (g_random->randomInt(0,2)) ch = nameQuorumChange( format("NewName%d", g_random->randomInt(0,100)), ch ); - CoordinatorsResult::Type _ = wait( changeQuorum( cx, ch ) ); + wait(success( changeQuorum( cx, ch ) )); //TraceEvent("ConfigureTestConfigureEnd").detail("NewQuorum", s); } else if ( randomChoice == 5) { - ConfigurationResult::Type _ = wait( changeConfig( cx, storeTypes[g_random->randomInt( 0, sizeof(storeTypes)/sizeof(storeTypes[0]))], true ) ); + wait(success( changeConfig( cx, storeTypes[g_random->randomInt( 0, sizeof(storeTypes)/sizeof(storeTypes[0]))], true ) )); } else { ASSERT(false); diff --git a/fdbserver/workloads/KillRegion.actor.cpp b/fdbserver/workloads/KillRegion.actor.cpp index e44bc0ea02..71a5215fe9 100644 --- a/fdbserver/workloads/KillRegion.actor.cpp +++ b/fdbserver/workloads/KillRegion.actor.cpp @@ -57,7 +57,7 @@ struct KillRegionWorkload : TestWorkload { ACTOR static Future _setup( KillRegionWorkload *self, Database cx ) { TraceEvent("ForceRecovery_DisablePrimaryBegin"); - ConfigurationResult::Type _ = wait( changeConfig( cx, g_simulator.disablePrimary, true ) ); + wait(success( changeConfig( cx, g_simulator.disablePrimary, true ) )); TraceEvent("ForceRecovery_WaitForRemote"); wait( waitForPrimaryDC(cx, LiteralStringRef("1")) ); TraceEvent("ForceRecovery_DisablePrimaryComplete"); @@ -67,11 +67,11 @@ struct KillRegionWorkload : TestWorkload { ACTOR static Future killRegion( KillRegionWorkload *self, Database cx ) { ASSERT( g_network->isSimulated() ); TraceEvent("ForceRecovery_DisableRemoteBegin"); - ConfigurationResult::Type _ = wait( changeConfig( cx, g_simulator.disableRemote, true ) ); + wait(success( changeConfig( cx, g_simulator.disableRemote, true ) )); TraceEvent("ForceRecovery_WaitForPrimary"); wait( waitForPrimaryDC(cx, LiteralStringRef("0")) ); TraceEvent("ForceRecovery_DisableRemoteComplete"); - ConfigurationResult::Type _ = wait( changeConfig( cx, g_simulator.originalRegions, true ) ); + wait(success( changeConfig( cx, g_simulator.originalRegions, true ) )); TraceEvent("ForceRecovery_RestoreOriginalComplete"); wait( delay( g_random->random01() * self->testDuration ) ); diff --git a/fdbserver/workloads/LowLatency.actor.cpp b/fdbserver/workloads/LowLatency.actor.cpp index c157b47f3b..a71db828f1 100644 --- a/fdbserver/workloads/LowLatency.actor.cpp +++ b/fdbserver/workloads/LowLatency.actor.cpp @@ -65,7 +65,7 @@ struct LowLatencyWorkload : TestWorkload { try { tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Version _ = wait(tr.getReadVersion()); + wait(success(tr.getReadVersion())); break; } catch( Error &e ) { wait( tr.onError(e) ); diff --git a/fdbserver/workloads/MachineAttrition.actor.cpp b/fdbserver/workloads/MachineAttrition.actor.cpp index a18b5e1b53..f5ea081226 100644 --- a/fdbserver/workloads/MachineAttrition.actor.cpp +++ b/fdbserver/workloads/MachineAttrition.actor.cpp @@ -158,7 +158,7 @@ struct MachineAttritionWorkload : TestWorkload { try { tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Version _ = wait(tr.getReadVersion()); + wait(success(tr.getReadVersion())); break; } catch( Error &e ) { wait( tr.onError(e) ); diff --git a/fdbserver/workloads/Ping.actor.cpp b/fdbserver/workloads/Ping.actor.cpp index f61f8b1aec..2203b7c99f 100644 --- a/fdbserver/workloads/Ping.actor.cpp +++ b/fdbserver/workloads/Ping.actor.cpp @@ -225,7 +225,7 @@ struct PingWorkload : TestWorkload { } // ACTOR Future receptionLogger( PingWorkload* self, Future done, NetworkAddress to, UID id ) { - // PingReply _ = wait( done ); + // wait(success( done )); // if( now() > self->testStart + 29 && now() < self->testStart + 31 ) // TraceEvent("PayloadReplyReceived", id).detail("To", to); // return Void(); diff --git a/fdbserver/workloads/RYWDisable.actor.cpp b/fdbserver/workloads/RYWDisable.actor.cpp index 56a8e2f62e..8fdb6e0fa1 100644 --- a/fdbserver/workloads/RYWDisable.actor.cpp +++ b/fdbserver/workloads/RYWDisable.actor.cpp @@ -69,7 +69,7 @@ struct RYWDisableWorkload : TestWorkload { Future> _ = tr.get( self->keyForIndex(g_random->randomInt(0, self->nodes))); } else if( opType == 2 ) { //TraceEvent("RYWGetAndWait"); - Optional _ = wait( tr.get( self->keyForIndex(g_random->randomInt(0, self->nodes))) ); + wait(success( tr.get( self->keyForIndex(g_random->randomInt(0, self->nodes))) )); } else { //TraceEvent("RYWNoOp"); shouldError = false; diff --git a/fdbserver/workloads/RYWPerformance.actor.cpp b/fdbserver/workloads/RYWPerformance.actor.cpp index 8e432f888c..32a4e51064 100644 --- a/fdbserver/workloads/RYWPerformance.actor.cpp +++ b/fdbserver/workloads/RYWPerformance.actor.cpp @@ -97,24 +97,24 @@ struct RYWPerformanceWorkload : TestWorkload { tr->set( self->keyForIndex(i), LiteralStringRef("foo")); } } else if( type == 4 ) { - Standalone _ = wait( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes )); + wait(success( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes ))); } else if( type == 5 ) { - Standalone _ = wait( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes )); + wait(success( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes ))); for( i = 0; i < self->nodes; i++ ) { tr->set( self->keyForIndex(i), LiteralStringRef("foo")); } } else if( type == 6 ) { - Standalone _ = wait( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes )); + wait(success( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes ))); for( i = 0; i < self->nodes; i+= 2 ) { tr->set( self->keyForIndex(i), LiteralStringRef("foo")); } } else if( type == 7 ) { - Standalone _ = wait( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes )); + wait(success( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes ))); for( i = 0; i < self->nodes; i++ ) { tr->clear( self->keyForIndex(i) ); } } else if( type == 8 ) { - Standalone _ = wait( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes )); + wait(success( tr->getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes ))); for( i = 0; i < self->nodes; i += 2 ) { tr->clear( KeyRangeRef( self->keyForIndex(i), self->keyForIndex(i+1) ) ); } @@ -175,7 +175,7 @@ struct RYWPerformanceWorkload : TestWorkload { state double startTime = timer(); for( i = 0; i < self->nodes; i++ ) { - Optional _ = wait( tr.get(self->keyForIndex(self->nodes/2))); + wait(success( tr.get(self->keyForIndex(self->nodes/2)))); } fprintf(stderr, "%f", self->nodes / (timer() - startTime)); @@ -198,7 +198,7 @@ struct RYWPerformanceWorkload : TestWorkload { state double startTime = timer(); for( i = 0; i < self->nodes; i++ ) { - Optional _ = wait( tr.get(self->keyForIndex(i))); + wait(success( tr.get(self->keyForIndex(i)))); } fprintf(stderr, "%f", self->nodes / (timer() - startTime)); @@ -221,7 +221,7 @@ struct RYWPerformanceWorkload : TestWorkload { state double startTime = timer(); for( i = 0; i < self->ranges; i++ ) { - Standalone _ = wait( tr.getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes )); + wait(success( tr.getRange(KeyRangeRef(self->keyForIndex(0),self->keyForIndex(self->nodes)),self->nodes ))); } fprintf(stderr, "%f", self->ranges / (timer() - startTime)); @@ -246,7 +246,7 @@ struct RYWPerformanceWorkload : TestWorkload { state double startTime = timer(); for( i = 0; i < self->nodes; i++ ) { - Optional _ = wait( tr.get(self->keyForIndex(self->nodes/2)) ); + wait(success( tr.get(self->keyForIndex(self->nodes/2)) )); tr.set( self->keyForIndex(self->nodes/2), self->keyForIndex(i) ); } diff --git a/fdbserver/workloads/RandomMoveKeys.actor.cpp b/fdbserver/workloads/RandomMoveKeys.actor.cpp index 06d406feea..5b915d386b 100644 --- a/fdbserver/workloads/RandomMoveKeys.actor.cpp +++ b/fdbserver/workloads/RandomMoveKeys.actor.cpp @@ -70,7 +70,7 @@ struct MoveKeysWorkload : TestWorkload { wait( timeout( reportErrors( self->worker( cx, self ), "MoveKeysWorkloadWorkerError" ), self->testDuration, Void() ) ); // Always set the DD mode back, even if we die with an error TraceEvent("RMKDoneMoving"); - int _ = wait( setDDMode( cx, oldMode ) ); + wait(success( setDDMode( cx, oldMode ) )); TraceEvent("RMKDoneModeSetting"); } return Void(); diff --git a/fdbserver/workloads/Throughput.actor.cpp b/fdbserver/workloads/Throughput.actor.cpp index e1d3a89bd2..0fadf8af4b 100644 --- a/fdbserver/workloads/Throughput.actor.cpp +++ b/fdbserver/workloads/Throughput.actor.cpp @@ -92,7 +92,7 @@ struct RWTransactor : ITransactor { loop { try { state double t_start = now(); - Version _ = wait( tr.getReadVersion() ); + wait(success( tr.getReadVersion() )); state double t_rv = now(); state double rrLatency = -t_rv * self->reads; diff --git a/flow/actorcompiler/ActorCompiler.cs b/flow/actorcompiler/ActorCompiler.cs index 2e0b56cd10..71080d613f 100644 --- a/flow/actorcompiler/ActorCompiler.cs +++ b/flow/actorcompiler/ActorCompiler.cs @@ -816,7 +816,7 @@ namespace actorcompiler if (firstChoice) { // Do this check only after evaluating the expression for the first wait expression, so that expression cannot be short circuited by cancellation. - // So wait( expr() ) will always evaluate `expr()`, but choose { when ( Void _ = wait( expr2() ) {} } need + // So wait( expr() ) will always evaluate `expr()`, but choose { when ( wait(success( expr2() )) {} } need // not evaluate `expr2()`. firstChoice = false; LineNumber(cx.target, stmt.FirstSourceLine); From 65136a2ecd4780fd2ba08146612f34170ec415f6 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 12 Feb 2019 15:38:15 -0800 Subject: [PATCH 135/226] Forward declare actors with ACTOR keyword. #1148 There are several more occurrences of this, but they're in .h files that now need to be .actor.h files. This gets the easy ones out of the way. --- fdbclient/NativeAPI.actor.cpp | 7 +++++-- fdbserver/pubsub.actor.cpp | 3 +-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 7242714af1..b25293d5a0 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1052,7 +1052,8 @@ Future> getRange( bool const& reverse, TransactionInfo const& info); -Future> getValue( Future const& version, Key const& key, Database const& cx, TransactionInfo const& info, Reference const& trLogInfo ) ; +ACTOR Future> getValue(Future version, Key key, Database cx, TransactionInfo info, + Reference trLogInfo); ACTOR Future> fetchServerInterface( Database cx, TransactionInfo info, UID id, Future ver = latestVersion ) { Optional val = wait( getValue(ver, serverListKeyFor(id), cx, info, Reference()) ); @@ -1353,7 +1354,9 @@ ACTOR Future waitForCommittedVersion( Database cx, Version version ) { } } -Future readVersionBatcher( DatabaseContext* const& cx, FutureStream< std::pair< Promise, Optional > > const& versionStream, uint32_t const& flags ); +ACTOR Future readVersionBatcher( + DatabaseContext* cx, FutureStream, Optional>> versionStream, + uint32_t flags); ACTOR Future< Void > watchValue( Future version, Key key, Optional value, Database cx, int readVersionFlags, TransactionInfo info ) { diff --git a/fdbserver/pubsub.actor.cpp b/fdbserver/pubsub.actor.cpp index 6069513152..7bd60f1a3c 100644 --- a/fdbserver/pubsub.actor.cpp +++ b/fdbserver/pubsub.actor.cpp @@ -396,8 +396,7 @@ ACTOR Future getMessage(Transaction *tr, Feed feed, MessageId id) { return m; } -Future> _listInboxMessages(Database const& cx, - uint64_t const& inbox, int const& count, uint64_t const& cursor); +ACTOR Future> _listInboxMessages(Database cx, uint64_t inbox, int count, uint64_t cursor); // inboxes with MANY fast feeds may be punished by the following checks // SOMEDAY: add a check on global lists (or on dispatching list) From 601b229c0551a29b316fab7f61fdb2886d6f8163 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:14 -0800 Subject: [PATCH 136/226] Add a no-op breakpoint_me for easy gdb'ing. --- flow/Error.cpp | 4 ++++ flow/Error.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/flow/Error.cpp b/flow/Error.cpp index 6a6c31e9ff..e2d096b8dd 100644 --- a/flow/Error.cpp +++ b/flow/Error.cpp @@ -117,3 +117,7 @@ void ErrorCodeTable::addCode(int code, const char *name, const char *description bool isAssertDisabled(int line) { return FLOW_KNOBS && (FLOW_KNOBS->DISABLE_ASSERTS == -1 || FLOW_KNOBS->DISABLE_ASSERTS == line); } + +void breakpoint_me() { + return; +} diff --git a/flow/Error.h b/flow/Error.h index 545fccb78a..df1170344c 100644 --- a/flow/Error.h +++ b/flow/Error.h @@ -98,6 +98,8 @@ extern bool isAssertDisabled( int line ); catch(Error &e) { criticalError(FDB_EXIT_ABORT, "AbortOnError", e.what()); } \ catch(...) { criticalError(FDB_EXIT_ABORT, "AbortOnError", "Aborted due to unknown error"); } +EXTERNC void breakpoint_me(); + #ifdef FDB_CLEAN_BUILD # define NOT_IN_CLEAN BOOST_STATIC_ASSERT_MSG(0, "This code can not be enabled in a clean build."); #else From 2570b37e6ec46cb08b0a29721815337cad73993f Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:15 -0800 Subject: [PATCH 137/226] Add function to read pages from RawDiskQueue_TwoFiles --- fdbserver/DiskQueue.actor.cpp | 11 +++++++++++ flow/Arena.h | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 57fef766d1..7198f3e892 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -143,6 +143,8 @@ public: Future setPoppedPage( int file, int64_t page, int64_t debugSeq ) { return setPoppedPage(this, file, page, debugSeq); } + // FIXME: let the caller pass in where to write the data. + Future> read(int file, int page, int nPages) { return read(this, file, page, nPages); } Future> readNextPage() { return readNextPage(this); } Future truncateBeforeLastReadPage() { return truncateBeforeLastReadPage(this); } @@ -540,6 +542,15 @@ public: } } + ACTOR static Future> read(RawDiskQueue_TwoFiles* self, int file, int pageOffset, int nPages) { + state TrackMe trackMe(self); + state const size_t bytesRequested = nPages * sizeof(Page); + state Standalone result = makeAlignedString(sizeof(Page), bytesRequested); + int bytesRead = wait( self->files[file].f->read( mutateString(result), bytesRequested, pageOffset*sizeof(Page) ) ); + ASSERT_WE_THINK(bytesRead == bytesRequested); + return result; + } + Future fillReadingBuffer() { // If we're right at the end of a file... if ( readingPage*sizeof(Page) >= (size_t)files[readingFile].size ) { diff --git a/flow/Arena.h b/flow/Arena.h index 65399f6acb..e89dc4dc6a 100644 --- a/flow/Arena.h +++ b/flow/Arena.h @@ -547,6 +547,14 @@ inline static Standalone makeString( int length ) { return returnString; } +inline static Standalone makeAlignedString( int alignment, int length ) { + Standalone returnString; + uint8_t *outData = new (returnString.arena()) uint8_t[alignment + length]; + outData = (uint8_t*)((((uintptr_t)outData + (alignment - 1)) / alignment) * alignment); + ((StringRef&)returnString) = StringRef(outData, length); + return returnString; +} + inline static StringRef makeString( int length, Arena& arena ) { uint8_t *outData = new (arena) uint8_t[length]; return StringRef(outData, length); From dbf7cefcd8f3fc52b4a2785bc41dbae9361c00ad Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:19:56 -0800 Subject: [PATCH 138/226] Add firstPages to DiskQueue --- fdbserver/DiskQueue.actor.cpp | 47 ++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 7198f3e892..1d23334449 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -121,6 +121,13 @@ public: fileExtensionBytes = 8<<10; files[0].dbgFilename = filename(0); files[1].dbgFilename = filename(1); + // We issue reads into firstPages, so it needs to be 4k aligned. + firstPages.reserve(firstPages.arena(), 2); + void* pageMemory = operator new (sizeof(Page) * 3, firstPages.arena()); + firstPages[0] = (Page*)((((uintptr_t)pageMemory + 4095) / 4096) * 4096); + memset(firstPages[0], 0, sizeof(Page)); + firstPages[1] = (Page*)((uintptr_t)firstPages[0] + 4096); + memset(firstPages[1], 0, sizeof(Page)); stallCount.init(LiteralStringRef("RawDiskQueue.StallCount")); } @@ -180,6 +187,7 @@ public: } }; File files[2]; // After readFirstAndLastPages(), files[0] is logically before files[1] (pushes are always into files[1]) + Standalone> firstPages; std::string basename; std::string fileExtension; @@ -245,6 +253,7 @@ public: dbg_file0BeginSeq += files[0].size; std::swap(files[0], files[1]); + std::swap(firstPages[0], firstPages[1]); files[1].popped = 0; writingPos = 0; } else { @@ -261,6 +270,10 @@ public: } } + if (writingPos == 0) { + *firstPages[1] = *(const Page*)pageData.begin(); + } + /*TraceEvent("RDQWrite", this->dbgid).detail("File1name", files[1].dbgFilename).detail("File1size", files[1].size) .detail("WritingPos", writingPos).detail("WritingBytes", pageData.size());*/ files[1].size = std::max( files[1].size, writingPos + pageData.size() ); @@ -445,12 +458,8 @@ public: ACTOR static UNCANCELLABLE Future> readFirstAndLastPages(RawDiskQueue_TwoFiles* self, compare_pages compare) { state TrackMe trackMe(self); - state StringBuffer result( self->dbgid ); try { - result.alignReserve( sizeof(Page), sizeof(Page)*3 ); - state Page* firstPage = (Page*)result.append(sizeof(Page)*3); - // Open both files or create both files wait( openFiles(self) ); @@ -466,20 +475,19 @@ public: } // Read the first pages - memset(firstPage, 0, sizeof(Page)*2); vector> reads; for(int i=0; i<2; i++) if( self->files[i].size > 0) - reads.push_back( self->files[i].f->read( &firstPage[i], sizeof(Page), 0 ) ); + reads.push_back( self->files[i].f->read( self->firstPages[i], sizeof(Page), 0 ) ); wait( waitForAll(reads) ); // Determine which file comes first - if ( compare( &firstPage[1], &firstPage[0] ) ) { - std::swap( firstPage[0], firstPage[1] ); + if ( compare( self->firstPages[1], self->firstPages[0] ) ) { + std::swap( self->firstPages[0], self->firstPages[1] ); std::swap( self->files[0], self->files[1] ); } - if ( !compare( &firstPage[1], &firstPage[1] ) ) { + if ( !compare( self->firstPages[1], self->firstPages[1] ) ) { // Both files are invalid... the queue is empty! // Begin pushing at the beginning of files[1] @@ -500,12 +508,13 @@ public: return Standalone(); } - // A page in files[1] is "valid" iff compare(&firstPage[1], page) + // A page in files[1] is "valid" iff compare(self->firstPages[1], page) // Binary search to find a page in files[1] that is "valid" but the next page is not valid // Invariant: the page at begin is valid, and the page at end is invalid state int64_t begin = 0; state int64_t end = self->files[1].size/sizeof(Page); - state Page *middlePage = &firstPage[2]; + state Standalone middlePageAllocation = makeAlignedString(sizeof(Page), sizeof(Page)); + state Page *middlePage = (Page*)middlePageAllocation.begin(); while ( begin + 1 != end ) { state int64_t middle = (begin+end)/2; ASSERT( middle > begin && middle < end ); // So the loop always changes begin or end @@ -513,7 +522,7 @@ public: int len = wait( self->files[1].f->read( middlePage, sizeof(Page), middle*sizeof(Page) ) ); ASSERT( len == sizeof(Page) ); - bool middleValid = compare( &firstPage[1], middlePage ); + bool middleValid = compare( self->firstPages[1], middlePage ); TraceEvent("RDQBS", self->dbgid).detail("Begin", begin).detail("End", end).detail("Middle", middle).detail("Valid", middleValid).detail("File0Name", self->files[0].dbgFilename); @@ -524,16 +533,16 @@ public: } // Now by the invariant and the loop condition, begin is a valid page and begin+1 is an invalid page // Check that begin+1 is invalid - int len = wait( self->files[1].f->read( &firstPage[2], sizeof(Page), (begin+1)*sizeof(Page) ) ); - ASSERT( !(len == sizeof(Page) && compare( &firstPage[1], &firstPage[2] )) ); + int len1 = wait( self->files[1].f->read( middlePage, sizeof(Page), (begin+1)*sizeof(Page) ) ); + ASSERT( !(len1 == sizeof(Page) && compare( self->firstPages[1], middlePage )) ); // Read it - int len = wait( self->files[1].f->read( &firstPage[2], sizeof(Page), begin*sizeof(Page) ) ); - ASSERT( len == sizeof(Page) && compare( &firstPage[1], &firstPage[2] ) ); + int len2 = wait( self->files[1].f->read( middlePage, sizeof(Page), begin*sizeof(Page) ) ); + ASSERT( len2 == sizeof(Page) && compare( self->firstPages[1], middlePage ) ); TraceEvent("RDQEndFound", self->dbgid).detail("File0Name", self->files[0].dbgFilename).detail("Pos", begin).detail("FileSize", self->files[1].size); - return result.str; + return middlePageAllocation; } catch (Error& e) { bool ok = e.code() == error_code_file_not_found; TraceEvent(ok ? SevInfo : SevError, "RDQReadFirstAndLastPagesError", self->dbgid).error(e, true).detail("File0Name", self->files[0].dbgFilename); @@ -610,6 +619,9 @@ public: state TrackMe trackMe(self); TraceEvent("DQTruncateFile", self->dbgid).detail("File", file).detail("Pos", pos).detail("File0Name", self->files[0].dbgFilename); state Reference f = self->files[file].f; // Hold onto a reference in the off-chance that the DQ is removed from underneath us. + if (pos == 0) { + memset(self->firstPages[file], 0, _PAGE_SIZE); + } wait( f->zeroRange( pos, self->files[file].size-pos ) ); wait(self->files[file].syncQueue->onSync()); // We intentionally don't return the f->zero future, so that TrackMe is destructed after f->zero finishes. @@ -640,6 +652,7 @@ public: if (swap) { std::swap(self->files[0], self->files[1]); + std::swap(self->firstPages[0], self->firstPages[1]); self->files[0].popped = self->files[0].size; } From 018d12fe9008861bebd484febc7cbdf2e446bf83 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:17 -0800 Subject: [PATCH 139/226] use firstpages instead of recoveryfirstpages --- fdbserver/DiskQueue.actor.cpp | 39 ++++++++++++++--------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 1d23334449..2d364e74f6 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -933,7 +933,6 @@ private: TraceEvent("DQRecovered", self->dbgid).detail("LastPoppedSeq", self->lastPoppedSeq).detail("PoppedSeq", self->poppedSeq).detail("NextPageSeq", self->nextPageSeq).detail("File0Name", self->rawQueue->files[0].dbgFilename); self->recovered = true; ASSERT( self->poppedSeq <= self->endLocation() ); - self->recoveryFirstPages = Standalone(); TEST( result.size() == 0 ); // End of queue at border between reads TEST( result.size() != 0 ); // Partial read at end of queue @@ -945,18 +944,15 @@ private: } ACTOR static Future findStart( DiskQueue* self ) { - Standalone epbuf = wait( self->rawQueue->readFirstAndLastPages( &comparePages ) ); - ASSERT( epbuf.size() % sizeof(Page) == 0 ); - self->recoveryFirstPages = epbuf; + Standalone lastPageData = wait( self->rawQueue->readFirstAndLastPages( &comparePages ) ); - if (!epbuf.size()) { + if (!lastPageData.size()) { // There are no valid pages, so apparently this is a completely empty queue self->nextReadLocation = 0; return false; } - int n = epbuf.size() / sizeof(Page); - Page* lastPage = (Page*)epbuf.end() - 1; + Page* lastPage = (Page*)lastPageData.begin(); self->nextReadLocation = self->poppedSeq = lastPage->popped; /* @@ -980,28 +976,25 @@ private: void findPhysicalLocation( loc_t loc, int* file, int64_t* page, const char* context ) { bool ok = false; - Page*p = (Page*)recoveryFirstPages.begin(); TraceEvent(SevInfo, "FindPhysicalLocation", dbgid) - .detail("RecoveryFirstPages", recoveryFirstPages.size()) - .detail("Page0Valid", p[0].checkHash()) - .detail("Page0Seq", p[0].seq) - .detail("Page1Valid", p[1].checkHash()) - .detail("Page1Seq", p[1].seq) + .detail("Page0Valid", firstPages(0).checkHash()) + .detail("Page0Seq", firstPages(0).seq) + .detail("Page1Valid", firstPages(1).checkHash()) + .detail("Page1Seq", firstPages(1).seq) .detail("Location", loc) .detail("Context", context) .detail("File0Name", rawQueue->files[0].dbgFilename); - for(int i=recoveryFirstPages.size() / sizeof(Page) - 2; i>=0; i--) - if ( p[i].checkHash() && p[i].seq <= (size_t)loc ) { + for(int i = 1; i >= 0; i--) + if ( firstPages(i).checkHash() && firstPages(i).seq <= (size_t)loc ) { *file = i; - *page = (loc - p[i].seq)/sizeof(Page); + *page = (loc - firstPages(i).seq)/sizeof(Page); TraceEvent("FoundPhysicalLocation", dbgid) .detail("PageIndex", i) .detail("PageLocation", *page) - .detail("RecoveryFirstPagesSize", recoveryFirstPages.size()) .detail("SizeofPage", sizeof(Page)) - .detail("PageSequence", p[i].seq) + .detail("PageSequence", firstPages(i).seq) .detail("Location", loc) .detail("Context", context) .detail("File0Name", rawQueue->files[0].dbgFilename); @@ -1010,11 +1003,10 @@ private: } if (!ok) TraceEvent(SevError, "DiskQueueLocationError", dbgid) - .detail("RecoveryFirstPages", recoveryFirstPages.size()) - .detail("Page0Valid", p[0].checkHash()) - .detail("Page0Seq", p[0].seq) - .detail("Page1Valid", p[1].checkHash()) - .detail("Page1Seq", p[1].seq) + .detail("Page0Valid", firstPages(0).checkHash()) + .detail("Page0Seq", firstPages(0).seq) + .detail("Page1Valid", firstPages(1).checkHash()) + .detail("Page1Seq", firstPages(1).seq) .detail("Location", loc) .detail("Context", context) .detail("File0Name", rawQueue->files[0].dbgFilename); @@ -1053,7 +1045,6 @@ private: Arena readBufArena; Page* readBufPage; int readBufPos; - Standalone recoveryFirstPages; }; //A class wrapping DiskQueue which durably allows uncommitted data to be popped From 40fe29c29b9e196b6e1fdeca9b5d5a0831248e86 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:18 -0800 Subject: [PATCH 140/226] Abstract TrackMe into a reusable CRTP class. --- fdbserver/DiskQueue.actor.cpp | 45 +++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 2d364e74f6..508dc7e97e 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -109,7 +109,35 @@ private: } }; -class RawDiskQueue_TwoFiles { +template +class Tracked { +protected: + struct TrackMe : NonCopyable { + T* self; + explicit TrackMe( T* self ) : self(self) { + self->actorCount++; + if (self->actorCount == 1) self->actorCountIsZero.set(false); + } + ~TrackMe() { + self->actorCount--; + if (self->actorCount == 0) self->actorCountIsZero.set(true); + } + }; + + Future onSafeToDestruct() { + if (actorCountIsZero.get()) { + return Void(); + } else { + return actorCountIsZero.onChange(); + } + } + +private: + int actorCount = 0; + AsyncVar actorCountIsZero = true; +}; + +class RawDiskQueue_TwoFiles : public Tracked { public: RawDiskQueue_TwoFiles( std::string basename, std::string fileExtension, UID dbgid, int64_t fileSizeWarningLimit ) : basename(basename), fileExtension(fileExtension), onError(delayed(error.getFuture())), onStopped(stopped.getFuture()), @@ -212,20 +240,8 @@ public: int64_t fileExtensionBytes; - AsyncMap recoveryActorCount; - Int64MetricHandle stallCount; - struct TrackMe : NonCopyable { - RawDiskQueue_TwoFiles* self; - TrackMe( RawDiskQueue_TwoFiles* self ) : self(self) { - self->recoveryActorCount.set(false, self->recoveryActorCount.get(false)+1); - } - ~TrackMe() { - self->recoveryActorCount.set(false, self->recoveryActorCount.get(false)-1); - } - }; - Future truncateFile(int file, int64_t pos) { return truncateFile(this, file, pos); } Future push(StringRef pageData, vector>& toSync) { @@ -426,8 +442,7 @@ public: state Error error = success(); try { wait(success(errorOr(self->lastCommit))); - while (self->recoveryActorCount.get(false)) - wait( self->recoveryActorCount.onChange(false) ); + wait( self->onSafeToDestruct() ); for(int i=0; i<2; i++) self->files[i].f.clear(); From 2d2b03a9ff724af2446cec5c163bfff50843a7ce Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:19 -0800 Subject: [PATCH 141/226] prepare DiskQueue for actors --- fdbserver/DiskQueue.actor.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 508dc7e97e..b1ba5c1cb6 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -680,7 +680,7 @@ public: } }; -class DiskQueue : public IDiskQueue { +class DiskQueue : public IDiskQueue, public Tracked { public: DiskQueue( std::string basename, std::string fileExtension, UID dbgid, int64_t fileSizeWarningLimit ) : rawQueue( new RawDiskQueue_TwoFiles(basename, fileExtension, dbgid, fileSizeWarningLimit) ), dbgid(dbgid), anyPopped(false), nextPageSeq(0), poppedSeq(0), lastPoppedSeq(0), @@ -782,11 +782,18 @@ public: virtual Future getError() { return rawQueue->getError(); } virtual Future onClosed() { return rawQueue->onClosed(); } + virtual void dispose() { TraceEvent("DQDestroy", dbgid).detail("LastPoppedSeq", lastPoppedSeq).detail("PoppedSeq", poppedSeq).detail("NextPageSeq", nextPageSeq).detail("File0Name", rawQueue->files[0].dbgFilename); - rawQueue->dispose(); - delete this; + dispose(this); } + ACTOR static void dispose(DiskQueue* self) { + wait( self->onSafeToDestruct() ); + TraceEvent("DQDestroyDone", self->dbgid).detail("File0Name", self->rawQueue->files[0].dbgFilename); + self->rawQueue->dispose(); + delete self; + } + virtual void close() { TraceEvent("DQClose", dbgid) .detail("LastPoppedSeq", lastPoppedSeq) @@ -794,8 +801,13 @@ public: .detail("NextPageSeq", nextPageSeq) .detail("PoppedCommitted", rawQueue->dbg_file0BeginSeq + rawQueue->files[0].popped + rawQueue->files[1].popped) .detail("File0Name", rawQueue->files[0].dbgFilename); - rawQueue->close(); - delete this; + close(this); + } + ACTOR static void close(DiskQueue* self) { + wait( self->onSafeToDestruct() ); + TraceEvent("DQCloseDone", self->dbgid).detail("File0Name", self->rawQueue->files[0].dbgFilename); + self->rawQueue->close(); + delete self; } virtual StorageBytes getStorageBytes() { From f1c31e2305bcf68851eec184c5dc0471d1d5ed43 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:20 -0800 Subject: [PATCH 142/226] Add a read function to disk queue --- fdbserver/DiskQueue.actor.cpp | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index b1ba5c1cb6..e0d14713be 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -22,6 +22,7 @@ #include "fdbrpc/IAsyncFile.h" #include "fdbserver/Knobs.h" #include "fdbrpc/simulator.h" +#include "flow/genericactors.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. typedef bool(*compare_pages)(void*,void*); @@ -570,6 +571,7 @@ public: state TrackMe trackMe(self); state const size_t bytesRequested = nPages * sizeof(Page); state Standalone result = makeAlignedString(sizeof(Page), bytesRequested); + if (file == 1) ASSERT_WE_THINK(pageOffset * sizeof(Page) + bytesRequested <= self->writingPos ); int bytesRead = wait( self->files[file].f->read( mutateString(result), bytesRequested, pageOffset*sizeof(Page) ) ); ASSERT_WE_THINK(bytesRead == bytesRequested); return result; @@ -879,6 +881,38 @@ private: } } + ACTOR static Future> readPages(DiskQueue *self, location start, location end) { + state TrackMe trackme(self); + state int fromFile; + state int toFile; + state int64_t fromPage; + state int64_t toPage; + state uint64_t file0size = self->firstPages(1).seq - self->firstPages(0).seq; + ASSERT(end > start); + ASSERT(start.lo >= self->firstPages(0).seq); + self->findPhysicalLocation(start.lo, &fromFile, &fromPage, "read"); + self->findPhysicalLocation(end.lo-1, &toFile, &toPage, "read"); + if (fromFile == 0) { ASSERT( fromPage < file0size / _PAGE_SIZE ); } + if (toFile == 0) { ASSERT( toPage < file0size / _PAGE_SIZE ); } + if (fromFile == 1) { ASSERT( fromPage < self->rawQueue->writingPos / _PAGE_SIZE ); } + if (toFile == 1) { ASSERT( toPage < self->rawQueue->writingPos / _PAGE_SIZE ); } + if (fromFile == toFile) { + ASSERT(toPage >= fromPage); + Standalone pagedData = wait( self->rawQueue->read( fromFile, fromPage, toPage - fromPage + 1 ) ); + ASSERT(pagedData.size() == (toPage - fromPage + 1) * _PAGE_SIZE ); + return pagedData; + } else { + ASSERT(fromFile == 0); + state Standalone firstChunk; + state Standalone secondChunk; + wait( store(firstChunk, self->rawQueue->read( fromFile, fromPage, ( file0size / sizeof(Page) ) - fromPage )) && + store(secondChunk, self->rawQueue->read( toFile, 0, toPage + 1 )) ); + ASSERT(firstChunk.size() == ( ( file0size / sizeof(Page) ) - fromPage ) * _PAGE_SIZE ); + ASSERT(secondChunk.size() == (toPage + 1) * _PAGE_SIZE); + return firstChunk.withSuffix(secondChunk); + } + } + void readFromBuffer( StringBuffer* result, int* bytes ) { // extract up to bytes from readBufPage into result int len = std::min( readBufPage->payloadSize - readBufPos, *bytes ); @@ -1001,6 +1035,10 @@ private: return true; } + Page& firstPages(int i) { + return *(Page*)rawQueue->firstPages[i]; + } + void findPhysicalLocation( loc_t loc, int* file, int64_t* page, const char* context ) { bool ok = false; From efa8aa7e2e23dfbc99fb4878b5a877656793ce26 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:21 -0800 Subject: [PATCH 143/226] Adjust findPhysicalLocation to not spam. Context is now optional, so that our high-volume calls don't get logged, but low-volume calls still get logged the same way that they did before. --- fdbserver/DiskQueue.actor.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index e0d14713be..38d68cd9aa 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -890,8 +890,8 @@ private: state uint64_t file0size = self->firstPages(1).seq - self->firstPages(0).seq; ASSERT(end > start); ASSERT(start.lo >= self->firstPages(0).seq); - self->findPhysicalLocation(start.lo, &fromFile, &fromPage, "read"); - self->findPhysicalLocation(end.lo-1, &toFile, &toPage, "read"); + self->findPhysicalLocation(start.lo, &fromFile, &fromPage, nullptr); + self->findPhysicalLocation(end.lo-1, &toFile, &toPage, nullptr); if (fromFile == 0) { ASSERT( fromPage < file0size / _PAGE_SIZE ); } if (toFile == 0) { ASSERT( toPage < file0size / _PAGE_SIZE ); } if (fromFile == 1) { ASSERT( fromPage < self->rawQueue->writingPos / _PAGE_SIZE ); } @@ -1042,7 +1042,8 @@ private: void findPhysicalLocation( loc_t loc, int* file, int64_t* page, const char* context ) { bool ok = false; - TraceEvent(SevInfo, "FindPhysicalLocation", dbgid) + if (context) + TraceEvent(SevInfo, "FindPhysicalLocation", dbgid) .detail("Page0Valid", firstPages(0).checkHash()) .detail("Page0Seq", firstPages(0).seq) .detail("Page1Valid", firstPages(1).checkHash()) @@ -1055,14 +1056,15 @@ private: if ( firstPages(i).checkHash() && firstPages(i).seq <= (size_t)loc ) { *file = i; *page = (loc - firstPages(i).seq)/sizeof(Page); - TraceEvent("FoundPhysicalLocation", dbgid) - .detail("PageIndex", i) - .detail("PageLocation", *page) - .detail("SizeofPage", sizeof(Page)) - .detail("PageSequence", firstPages(i).seq) - .detail("Location", loc) - .detail("Context", context) - .detail("File0Name", rawQueue->files[0].dbgFilename); + if (context) + TraceEvent("FoundPhysicalLocation", dbgid) + .detail("PageIndex", i) + .detail("PageLocation", *page) + .detail("SizeofPage", sizeof(Page)) + .detail("PageSequence", firstPages(i).seq) + .detail("Location", loc) + .detail("Context", context) + .detail("File0Name", rawQueue->files[0].dbgFilename); ok = true; break; } @@ -1073,7 +1075,7 @@ private: .detail("Page1Valid", firstPages(1).checkHash()) .detail("Page1Seq", firstPages(1).seq) .detail("Location", loc) - .detail("Context", context) + .detail("Context", context ? context : "") .detail("File0Name", rawQueue->files[0].dbgFilename); ASSERT( ok ); } From 9886386a837a9453907d931c490ecfa5eb9d19bc Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:22 -0800 Subject: [PATCH 144/226] temporarily verify commited data as a test for read --- fdbserver/DiskQueue.actor.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 38d68cd9aa..8d32f048b7 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -331,7 +331,9 @@ public: TEST(2==syncFiles.size()); // push spans both files wait( pushed ); - delete pageMem; + if (!g_network->isSimulated()) { + delete pageMem; + } pageMem = 0; Future sync = syncFiles[0]->onSync(); @@ -352,7 +354,9 @@ public: committed.send(Void()); } catch (Error& e) { - delete pageMem; + if (!g_network->isSimulated()) { + delete pageMem; + } TEST(true); // push error TEST(2==syncFiles.size()); // push spanning both files error TraceEvent(SevError, "RDQPushAndCommitError", dbgid).error(e, true).detail("InitialFilename0", filename); @@ -769,6 +773,9 @@ public: lastCommittedSeq = backPage().endSeq(); auto f = rawQueue->pushAndCommit( pushed_page_buffer->ref(), pushed_page_buffer, poppedSeq/sizeof(Page) - lastPoppedSeq/sizeof(Page) ); + if (g_network->isSimulated()) { + verifyCommit(this, f, pushed_page_buffer, ((Page*)pushed_page_buffer->ref().begin())->seq, lastCommittedSeq); + } lastPoppedSeq = poppedSeq; pushed_page_buffer = 0; return f; @@ -881,6 +888,16 @@ private: } } + ACTOR static void verifyCommit(DiskQueue* self, Future commitSynced, StringBuffer* buffer, loc_t start, loc_t end) { + state TrackMe trackme(self); + wait( commitSynced ); + Standalone pagedData = wait( readPages(self, start, end) ); + const int startOffset = start % _PAGE_SIZE; + const int dataLen = end - start; + ASSERT( pagedData.substr(startOffset, dataLen).compare( buffer->ref().substr(0, dataLen) ) == 0 ); + delete buffer; + } + ACTOR static Future> readPages(DiskQueue *self, location start, location end) { state TrackMe trackme(self); state int fromFile; From 63eb62cd36001b029375ab2388946a8ef295e9eb Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:23 -0800 Subject: [PATCH 145/226] Fix a bug when a read was delayed until after the entire disk queue has been rewritten. --- fdbserver/DiskQueue.actor.cpp | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 8d32f048b7..bf0359604b 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -890,11 +890,18 @@ private: ACTOR static void verifyCommit(DiskQueue* self, Future commitSynced, StringBuffer* buffer, loc_t start, loc_t end) { state TrackMe trackme(self); - wait( commitSynced ); - Standalone pagedData = wait( readPages(self, start, end) ); - const int startOffset = start % _PAGE_SIZE; - const int dataLen = end - start; - ASSERT( pagedData.substr(startOffset, dataLen).compare( buffer->ref().substr(0, dataLen) ) == 0 ); + try { + wait( commitSynced ); + Standalone pagedData = wait( readPages(self, start, end) ); + const int startOffset = start % _PAGE_SIZE; + const int dataLen = end - start; + ASSERT( pagedData.substr(startOffset, dataLen).compare( buffer->ref().substr(0, dataLen) ) == 0 ); + } catch (Error& e) { + if (e.code() != error_code_io_error) { + delete buffer; + throw; + } + } delete buffer; } @@ -916,7 +923,18 @@ private: if (fromFile == toFile) { ASSERT(toPage >= fromPage); Standalone pagedData = wait( self->rawQueue->read( fromFile, fromPage, toPage - fromPage + 1 ) ); + if ( self->firstPages(0).seq > start.lo ) { + // Simulation allows for reads to be delayed and executed after overlapping subsequent + // write operations. This means that by the time our read was executed, it's possible + // that both disk queue files have been completely overwritten. + // I'm not clear what is the actual contract for read/write in this case, so simulation + // might be a bit overly aggressive here, but it's behavior we need to tolerate. + throw io_error(); + } + ASSERT( ((Page*)pagedData.begin())->seq == start.lo / _PAGE_SIZE * _PAGE_SIZE ); ASSERT(pagedData.size() == (toPage - fromPage + 1) * _PAGE_SIZE ); + + ASSERT( ((Page*)pagedData.end() - 1)->seq == (end.lo - 1) / _PAGE_SIZE * _PAGE_SIZE ); return pagedData; } else { ASSERT(fromFile == 0); @@ -924,8 +942,14 @@ private: state Standalone secondChunk; wait( store(firstChunk, self->rawQueue->read( fromFile, fromPage, ( file0size / sizeof(Page) ) - fromPage )) && store(secondChunk, self->rawQueue->read( toFile, 0, toPage + 1 )) ); + if ( self->firstPages(0).seq > start.lo ) { + // See above. + throw io_error(); + } ASSERT(firstChunk.size() == ( ( file0size / sizeof(Page) ) - fromPage ) * _PAGE_SIZE ); + ASSERT( ((Page*)firstChunk.begin())->seq == start.lo / _PAGE_SIZE * _PAGE_SIZE ); ASSERT(secondChunk.size() == (toPage + 1) * _PAGE_SIZE); + ASSERT( ((Page*)secondChunk.end() - 1)->seq == (end.lo - 1) / _PAGE_SIZE * _PAGE_SIZE ); return firstChunk.withSuffix(secondChunk); } } From 2f49acc8a046e01d5b666e5f09cc5ddfc53592ea Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:24 -0800 Subject: [PATCH 146/226] Add a read function. --- fdbserver/DiskQueue.actor.cpp | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index bf0359604b..34e92f709b 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -110,6 +110,7 @@ private: } }; +// We use a Tracked instead of a Reference when the shutdown/destructor code would need to wait(). template class Tracked { protected: @@ -954,6 +955,60 @@ private: } } + ACTOR static Future> read(DiskQueue *self, location start, location end) { + // This `state` is unnecessary, but works around pagedData wrongly becoming const + // due to the actor compiler. + state Standalone pagedData = wait(readPages(self, start, end)); + ASSERT(start.lo % sizeof(Page) == 0 || + start.lo % sizeof(Page) >= sizeof(PageHeader)); + int startingOffset = start.lo % sizeof(Page); + if (startingOffset > 0) startingOffset -= sizeof(PageHeader); + ASSERT(end.lo % sizeof(Page) == 0 || + end.lo % sizeof(Page) > sizeof(PageHeader)); + int endingOffset = end.lo % sizeof(Page); + if (endingOffset == 0) endingOffset = sizeof(Page); + if (endingOffset > 0) endingOffset -= sizeof(PageHeader); + + if ((end.lo-1)/sizeof(Page)*sizeof(Page) == start.lo/sizeof(Page)*sizeof(Page)) { + // start and end are on the same page + ASSERT(pagedData.size() == sizeof(Page)); + pagedData.contents() = pagedData.substr(sizeof(PageHeader) + startingOffset, endingOffset - startingOffset); + return pagedData; + } else { + // FIXME: This allocation is excessive and unnecessary. We know the overhead per page that + // we'll be stripping out (sizeof(PageHeader)), so we should be able to do a smaller + // allocation. But we should be able to re-use the space allocated for pagedData, which + // would mean not having to allocate 2x the space for a read. + Standalone unpagedData = makeString(pagedData.size()); + uint8_t *buf = mutateString(unpagedData); + memset(buf, 0, unpagedData.size()); + const Page *data = reinterpret_cast(pagedData.begin()); + + // Only start copying from `start` in the first page. + if( data->payloadSize > startingOffset ) { + memcpy(buf, data->payload+startingOffset, data->payloadSize-startingOffset); + buf += data->payloadSize-startingOffset; + } + data++; + + // Copy all the middle pages + while (data->seq != ((end.lo-1)/sizeof(Page)*sizeof(Page))) { + // These pages can have varying amounts of data, as pages with partial + // data will be zero-filled when commit is called. + memcpy(buf, data->payload, data->payloadSize); + buf += data->payloadSize; + data++; + } + + // Copy only until `end` in the last page. + memcpy(buf, data->payload, std::min(endingOffset, data->payloadSize)); + buf += std::min(endingOffset, data->payloadSize); + + unpagedData.contents() = unpagedData.substr(0, buf - unpagedData.begin()); + return unpagedData; + } + } + void readFromBuffer( StringBuffer* result, int* bytes ) { // extract up to bytes from readBufPage into result int len = std::min( readBufPage->payloadSize - readBufPos, *bytes ); From 8b21d1ac8f69301426833baae031ec321b9c9810 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:25 -0800 Subject: [PATCH 147/226] Add a standalone recovery initialization function. --- fdbserver/DiskQueue.actor.cpp | 40 +++++++++++++++++---------- fdbserver/IDiskQueue.h | 8 ++++++ fdbserver/LogSystemDiskQueueAdapter.h | 1 + 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 34e92f709b..06debb8488 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -689,9 +689,10 @@ public: class DiskQueue : public IDiskQueue, public Tracked { public: + // FIXME: Is setting lastCommittedSeq to -1 instead of 0 necessary? DiskQueue( std::string basename, std::string fileExtension, UID dbgid, int64_t fileSizeWarningLimit ) : rawQueue( new RawDiskQueue_TwoFiles(basename, fileExtension, dbgid, fileSizeWarningLimit) ), dbgid(dbgid), anyPopped(false), nextPageSeq(0), poppedSeq(0), lastPoppedSeq(0), - nextReadLocation(-1), readBufPage(NULL), readBufPos(0), pushed_page_buffer(NULL), recovered(false), lastCommittedSeq(0), warnAlwaysForMemory(true) + nextReadLocation(-1), readBufPage(NULL), readBufPos(0), pushed_page_buffer(NULL), recovered(false), initialized(false), lastCommittedSeq(-1), warnAlwaysForMemory(true) { } @@ -786,8 +787,11 @@ public: rawQueue->stall(); } + virtual Future initializeRecovery() { return initializeRecovery( this ); } virtual Future> readNext( int bytes ) { return readNext(this, bytes); } + // FIXME: getNextReadLocation should ASSERT( initialized ), but the memory storage engine needs + // to be changed to understand the new intiailizeRecovery protocol. virtual location getNextReadLocation() { return nextReadLocation; } virtual Future getError() { return rawQueue->getError(); } @@ -1028,21 +1032,14 @@ private: ASSERT( !self->recovered ); - if (self->nextReadLocation < 0) { - bool nonempty = wait( findStart(self) ); - if (!nonempty) { - // The constructor has already put everything in the right state for an empty queue - self->recovered = true; - ASSERT( self->poppedSeq <= self->endLocation() ); + if (!self->initialized) { + bool recoveryComplete = wait( initializeRecovery(self) ); - //The next read location isn't necessarily the end of the last commit, but this is sufficient for helping us check an ASSERTion - self->lastCommittedSeq = self->nextReadLocation; + if (recoveryComplete) { + ASSERT( self->poppedSeq <= self->endLocation() ); return Standalone(); } - self->readBufPos = self->nextReadLocation % sizeof(Page) - sizeof(PageHeader); - if (self->readBufPos < 0) { self->nextReadLocation -= self->readBufPos; self->readBufPos = 0; } - TraceEvent("DQRecStart", self->dbgid).detail("ReadBufPos", self->readBufPos).detail("NextReadLoc", self->nextReadLocation).detail("File0Name", self->rawQueue->files[0].dbgFilename); } loop { @@ -1100,13 +1097,19 @@ private: return result.str; } - ACTOR static Future findStart( DiskQueue* self ) { + ACTOR static Future initializeRecovery( DiskQueue* self ) { + if (self->initialized) { + return self->recovered; + } Standalone lastPageData = wait( self->rawQueue->readFirstAndLastPages( &comparePages ) ); + self->initialized = true; if (!lastPageData.size()) { // There are no valid pages, so apparently this is a completely empty queue self->nextReadLocation = 0; - return false; + self->lastCommittedSeq = 0; + self->recovered = true; + return true; } Page* lastPage = (Page*)lastPageData.begin(); @@ -1128,10 +1131,15 @@ private: self->findPhysicalLocation( self->poppedSeq, &file, &page, "poppedSeq" ); self->rawQueue->setStartPage( file, page ); - return true; + self->readBufPos = self->nextReadLocation % sizeof(Page) - sizeof(PageHeader); + if (self->readBufPos < 0) { self->nextReadLocation -= self->readBufPos; self->readBufPos = 0; } + TraceEvent("DQRecStart", self->dbgid).detail("ReadBufPos", self->readBufPos).detail("NextReadLoc", self->nextReadLocation).detail("File0Name", self->rawQueue->files[0].dbgFilename); + + return false; } Page& firstPages(int i) { + ASSERT( initialized ); return *(Page*)rawQueue->firstPages[i]; } @@ -1204,6 +1212,7 @@ private: // Recovery state bool recovered; + bool initialized; loc_t nextReadLocation; Arena readBufArena; Page* readBufPage; @@ -1226,6 +1235,7 @@ public: void close() { queue->close(); delete this; } //IDiskQueue + Future initializeRecovery() { return queue->initializeRecovery(); } Future> readNext( int bytes ) { return readNext(this, bytes); } virtual location getNextReadLocation() { return queue->getNextReadLocation(); } diff --git a/fdbserver/IDiskQueue.h b/fdbserver/IDiskQueue.h index 438d39fdc0..0cd3498c91 100644 --- a/fdbserver/IDiskQueue.h +++ b/fdbserver/IDiskQueue.h @@ -41,6 +41,14 @@ public: } }; + //! Find the first and last pages in the disk queue, and initialize invariants. + //! + //! Most importantly, most invariants only hold after this function returns, and + //! some functions assert that the IDiskQueue has been initialized. + //! + //! \returns True, if DiskQueue is now considered in a recovered state. + //! False, if the caller should call readNext until recovered is true. + virtual Future initializeRecovery() = 0; // Before calling push or commit, the caller *must* perform recovery by calling readNext() until it returns less than the requested number of bytes. // Thereafter it may not be called again. virtual Future> readNext( int bytes ) = 0; // Return the next bytes in the queue (beginning, the first time called, with the first unpopped byte) diff --git a/fdbserver/LogSystemDiskQueueAdapter.h b/fdbserver/LogSystemDiskQueueAdapter.h index b5bc26c934..4983340b69 100644 --- a/fdbserver/LogSystemDiskQueueAdapter.h +++ b/fdbserver/LogSystemDiskQueueAdapter.h @@ -67,6 +67,7 @@ public: virtual void close(); // IDiskQueue interface + virtual Future initializeRecovery() { return false; } virtual Future> readNext( int bytes ); virtual IDiskQueue::location getNextReadLocation(); virtual IDiskQueue::location push( StringRef contents ); From 6c7229ec0785b6c655e27735479b806eb599a3dc Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:26 -0800 Subject: [PATCH 148/226] read fix while recovery --- fdbserver/DiskQueue.actor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index 06debb8488..aca58340b5 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -923,8 +923,9 @@ private: self->findPhysicalLocation(end.lo-1, &toFile, &toPage, nullptr); if (fromFile == 0) { ASSERT( fromPage < file0size / _PAGE_SIZE ); } if (toFile == 0) { ASSERT( toPage < file0size / _PAGE_SIZE ); } - if (fromFile == 1) { ASSERT( fromPage < self->rawQueue->writingPos / _PAGE_SIZE ); } - if (toFile == 1) { ASSERT( toPage < self->rawQueue->writingPos / _PAGE_SIZE ); } + // FIXME I think there's something with nextReadLocation we can do here when initialized && !recovered. + if (fromFile == 1 && self->recovered) { ASSERT( fromPage < self->rawQueue->writingPos / _PAGE_SIZE ); } + if (toFile == 1 && self->recovered) { ASSERT( toPage < self->rawQueue->writingPos / _PAGE_SIZE ); } if (fromFile == toFile) { ASSERT(toPage >= fromPage); Standalone pagedData = wait( self->rawQueue->read( fromFile, fromPage, toPage - fromPage + 1 ) ); From 12123f41d6ce85c888f0282cc9b1161a249d2132 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 7 Feb 2019 17:02:27 -0800 Subject: [PATCH 149/226] Plumb a read function up the stack to IDiskQueue --- fdbserver/DiskQueue.actor.cpp | 5 +++++ fdbserver/IDiskQueue.h | 1 + fdbserver/LogSystemDiskQueueAdapter.h | 1 + 3 files changed, 7 insertions(+) diff --git a/fdbserver/DiskQueue.actor.cpp b/fdbserver/DiskQueue.actor.cpp index aca58340b5..98a621fe63 100644 --- a/fdbserver/DiskQueue.actor.cpp +++ b/fdbserver/DiskQueue.actor.cpp @@ -713,6 +713,7 @@ public: } return endLocation(); } + virtual void pop( location upTo ) { ASSERT( !upTo.hi ); ASSERT( !recovered || upTo.lo <= endLocation() ); @@ -732,6 +733,8 @@ public: } } + virtual Future> read(location from, location to) { return read(this, from, to); } + int getMaxPayload() { return Page::maxPayload; } @@ -1241,6 +1244,8 @@ public: virtual location getNextReadLocation() { return queue->getNextReadLocation(); } + virtual Future> read( location start, location end ) { return queue->read( start, end ); } + virtual location push( StringRef contents ) { pushed = queue->push(contents); return pushed; diff --git a/fdbserver/IDiskQueue.h b/fdbserver/IDiskQueue.h index 0cd3498c91..70f5f9670a 100644 --- a/fdbserver/IDiskQueue.h +++ b/fdbserver/IDiskQueue.h @@ -54,6 +54,7 @@ public: virtual Future> readNext( int bytes ) = 0; // Return the next bytes in the queue (beginning, the first time called, with the first unpopped byte) virtual location getNextReadLocation() = 0; // Returns a location >= the location of all bytes previously returned by readNext(), and <= the location of all bytes subsequently returned + virtual Future> read( location start, location end ) = 0; virtual location push( StringRef contents ) = 0; // Appends the given bytes to the byte stream. Returns a location token representing the *end* of the contents. virtual void pop( location upTo ) = 0; // Removes all bytes before the given location token from the byte stream. virtual Future commit() = 0; // returns when all prior pushes and pops are durable. If commit does not return (due to close or a crash), any prefix of the pushed bytes and any prefix of the popped bytes may be durable. diff --git a/fdbserver/LogSystemDiskQueueAdapter.h b/fdbserver/LogSystemDiskQueueAdapter.h index 4983340b69..771f4ae4c6 100644 --- a/fdbserver/LogSystemDiskQueueAdapter.h +++ b/fdbserver/LogSystemDiskQueueAdapter.h @@ -70,6 +70,7 @@ public: virtual Future initializeRecovery() { return false; } virtual Future> readNext( int bytes ); virtual IDiskQueue::location getNextReadLocation(); + virtual Future> read( location start, location end ) { ASSERT(false); throw internal_error(); } virtual IDiskQueue::location push( StringRef contents ); virtual void pop( IDiskQueue::location upTo ); virtual Future commit(); From 3a38bff8eef43b2e368d8c039546ff2a3885c34c Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 13 Feb 2019 10:30:35 -0800 Subject: [PATCH 150/226] Use DISABLE_ACTOR_WITHOUT_WAIT_WARNING consistently --- cmake/FlowCommands.cmake | 4 ++-- fdbrpc/CMakeLists.txt | 6 +++--- flow/actorcompiler/ActorParser.cs | 4 ++-- flow/actorcompiler/Program.cs | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmake/FlowCommands.cmake b/cmake/FlowCommands.cmake index cd3cdea4ec..255689e7b6 100644 --- a/cmake/FlowCommands.cmake +++ b/cmake/FlowCommands.cmake @@ -14,7 +14,7 @@ endmacro() set(ACTOR_TARGET_COUNTER "0") macro(actor_compile target srcs) - set(options DISABLE_ACTOR_WITHOUT_WAIT) + set(options DISABLE_ACTOR_WITHOUT_WAIT_WARNING) set(oneValueArg) set(multiValueArgs) cmake_parse_arguments(ACTOR_COMPILE "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") @@ -27,7 +27,7 @@ macro(actor_compile target srcs) string(REPLACE ".actor.cpp" ".actor.g.cpp" tmp ${src}) endif() set(actor_compiler_flags "") - if(ACTOR_COMPILE_DISABLE_ACTOR_WITHOUT_WAIT) + if(ACTOR_COMPILE_DISABLE_ACTOR_WITHOUT_WAIT_WARNING) set(actor_compiler_flags "--disable-actor-without-wait-error") endif() if(tmp) diff --git a/fdbrpc/CMakeLists.txt b/fdbrpc/CMakeLists.txt index eaafd1e793..3447f4e066 100644 --- a/fdbrpc/CMakeLists.txt +++ b/fdbrpc/CMakeLists.txt @@ -52,14 +52,14 @@ if(NOT WIN32) list(APPEND FDBRPC_SRCS libcoroutine/context.c libeio/eio.c) endif() -set(FDBRPC_SRCS_ALLOW_ACTOR_WITHOUT_WAIT +set(FDBRPC_SRCS_DISABLE_ACTOR_WITHOUT_WAIT_WARNING ActorFuzz.actor.cpp FlowTests.actor.cpp dsltest.actor.cpp) -actor_set(FDBRPC_BUILD "${FDBRPC_SRCS};${FDBRPC_SRCS_ALLOW_ACTOR_WITHOUT_WAIT}") +actor_set(FDBRPC_BUILD "${FDBRPC_SRCS};${FDBRPC_SRCS_DISABLE_ACTOR_WITHOUT_WAIT_WARNING}") add_library(fdbrpc STATIC ${FDBRPC_BUILD}) actor_compile(fdbrpc "${FDBRPC_SRCS}") -actor_compile(fdbrpc "${FDBRPC_SRCS_ALLOW_ACTOR_WITHOUT_WAIT}" DISABLE_ACTOR_WITHOUT_WAIT) +actor_compile(fdbrpc "${FDBRPC_SRCS_DISABLE_ACTOR_WITHOUT_WAIT_WARNING}" DISABLE_ACTOR_WITHOUT_WAIT_WARNING) target_include_directories(fdbrpc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libeio) target_link_libraries(fdbrpc PUBLIC flow) diff --git a/flow/actorcompiler/ActorParser.cs b/flow/actorcompiler/ActorParser.cs index 07d8a6e0e1..ca6d709444 100644 --- a/flow/actorcompiler/ActorParser.cs +++ b/flow/actorcompiler/ActorParser.cs @@ -38,10 +38,10 @@ namespace actorcompiler class ErrorMessagePolicy { - public bool ActorWithoutWaitEnabled = true; + public bool DisableActorWithoutWaitWarning = false; public void HandleActorWithoutWait(String sourceFile, Actor actor) { - if (ActorWithoutWaitEnabled && !actor.isTestCase) + if (!DisableActorWithoutWaitWarning && !actor.isTestCase) { // TODO(atn34): Once cmake is the only build system we can make this an error instead of a warning. Console.Error.WriteLine("{0}:{1}: warning: ACTOR {2} does not contain a wait() statement", sourceFile, actor.SourceLine, actor.name); diff --git a/flow/actorcompiler/Program.cs b/flow/actorcompiler/Program.cs index dfce4ad014..9035998355 100644 --- a/flow/actorcompiler/Program.cs +++ b/flow/actorcompiler/Program.cs @@ -33,7 +33,7 @@ namespace actorcompiler if (args.Length < 2) { Console.WriteLine("Usage:"); - Console.WriteLine(" actorcompiler [--disable-actor-without-wait-error]"); + Console.WriteLine(" actorcompiler [--disable-actor-without-wait-warning]"); return 100; } Console.WriteLine("actorcompiler {0}", string.Join(" ", args)); @@ -41,7 +41,7 @@ namespace actorcompiler ErrorMessagePolicy errorMessagePolicy = new ErrorMessagePolicy(); if (args.Contains("--disable-actor-without-wait-error")) { - errorMessagePolicy.ActorWithoutWaitEnabled = false; + errorMessagePolicy.DisableActorWithoutWaitWarning = true; } try { From 1ea58c1e5ed40bc2b567834d2b028b496796ca1d Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 13 Feb 2019 10:44:58 -0800 Subject: [PATCH 151/226] Actually change name of flag --- cmake/FlowCommands.cmake | 2 +- flow/actorcompiler/Program.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/FlowCommands.cmake b/cmake/FlowCommands.cmake index 255689e7b6..4eb4c4735d 100644 --- a/cmake/FlowCommands.cmake +++ b/cmake/FlowCommands.cmake @@ -28,7 +28,7 @@ macro(actor_compile target srcs) endif() set(actor_compiler_flags "") if(ACTOR_COMPILE_DISABLE_ACTOR_WITHOUT_WAIT_WARNING) - set(actor_compiler_flags "--disable-actor-without-wait-error") + set(actor_compiler_flags "--disable-actor-without-wait-warning") endif() if(tmp) if(WIN32) diff --git a/flow/actorcompiler/Program.cs b/flow/actorcompiler/Program.cs index 9035998355..d483a8eacb 100644 --- a/flow/actorcompiler/Program.cs +++ b/flow/actorcompiler/Program.cs @@ -39,7 +39,7 @@ namespace actorcompiler Console.WriteLine("actorcompiler {0}", string.Join(" ", args)); string input = args[0], output = args[1], outputtmp = args[1] + ".tmp"; ErrorMessagePolicy errorMessagePolicy = new ErrorMessagePolicy(); - if (args.Contains("--disable-actor-without-wait-error")) + if (args.Contains("--disable-actor-without-wait-warning")) { errorMessagePolicy.DisableActorWithoutWaitWarning = true; } From 2ab921b2f2ad1d3e9f57cba6f3e307bcab231569 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Wed, 13 Feb 2019 10:56:22 -0800 Subject: [PATCH 152/226] Add release note for previous Go bindings fix. --- documentation/sphinx/source/release-notes.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 8838a2d73a..51345e3151 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -8,9 +8,10 @@ Release Notes Fixes ----- +* The Go bindings reported an incorrect required version when trying to load an incompatible fdb_c library. `(PR #1053) `_ * The ``include`` command in fdbcli would falsly include all machines with IP addresses that have the included IP address as a prefix (for example ``include 1.0.0.1`` would also include - ``1.0.0.10``) `(PR #1121) `_ + ``1.0.0.10``). `(PR #1121) `_ 6.0.18 ====== From 8a17905621393657153067e1f02a0492429a4155 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 14 Feb 2019 08:08:44 -0800 Subject: [PATCH 153/226] Add a couple new files to CMakeLists --- fdbserver/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 37adfa2366..ac4c205827 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -27,6 +27,8 @@ set(FDBSERVER_SRCS KeyValueStoreSQLite.actor.cpp Knobs.cpp Knobs.h + LatencyBandConfig.cpp + LatencyBandConfig.h LeaderElection.actor.cpp LeaderElection.h LogProtocolMessage.h From 886e7ab2ba75af1c8a2d4f6233b6bbc750800239 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 13 Dec 2018 13:31:37 -0800 Subject: [PATCH 154/226] Add a new DataDistributor role. Let cluster controller to start a new data distributor role by sending a message to a chosen worker. Change MasterInterface usage in DataDistribution to masterId Add DataDistributor rejoin handling. This allows the data distributor to tell the new cluster controller of its existence so that the controller doesn't spawn a new one. I.e., there should be only ONE data distributor in the cluster. If DataDistributor (DD) doesn't join in a while, then ClusterController (CC) tries to recruit one as DD. CC also monitors DD and restarts one if it failed. The Proxy is also monitoring the DD. If DD failed, the Proxy will ask CC for the new DD. Add GetRecoveryInfo RPC to master server, which is called by data distributor to obtain the recovery Transaction version from the master server. --- fdbrpc/Locality.cpp | 24 ++ fdbrpc/Locality.h | 7 +- fdbrpc/simulator.h | 1 + fdbserver/ClusterController.actor.cpp | 115 +++++++- fdbserver/ClusterRecruitmentInterface.h | 44 ++- fdbserver/DataDistribution.actor.cpp | 311 ++++++++++++++++------ fdbserver/DataDistribution.h | 15 +- fdbserver/DataDistributionQueue.actor.cpp | 48 ++-- fdbserver/DataDistributorInterface.h | 72 +++++ fdbserver/MasterInterface.h | 54 ++-- fdbserver/MasterProxyServer.actor.cpp | 64 ++++- fdbserver/QuietDatabase.actor.cpp | 125 +++++---- fdbserver/WorkerInterface.h | 16 +- fdbserver/fdbserver.vcxproj | 1 + fdbserver/masterserver.actor.cpp | 19 +- fdbserver/worker.actor.cpp | 13 +- 16 files changed, 711 insertions(+), 218 deletions(-) create mode 100644 fdbserver/DataDistributorInterface.h diff --git a/fdbrpc/Locality.cpp b/fdbrpc/Locality.cpp index 42a87f081e..704070fb03 100644 --- a/fdbrpc/Locality.cpp +++ b/fdbrpc/Locality.cpp @@ -148,6 +148,30 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons default: return ProcessClass::WorstFit; } + case ProcessClass::DataDistributor: + // TODO: understand all the fitnesses and choose from them. + switch( _class ) { + case ProcessClass::DataDistributorClass: + return ProcessClass::BestFit; + case ProcessClass::StatelessClass: + return ProcessClass::GoodFit; + case ProcessClass::MasterClass: + return ProcessClass::OkayFit; + case ProcessClass::ResolutionClass: + return ProcessClass::OkayFit; + case ProcessClass::TransactionClass: + return ProcessClass::OkayFit; + case ProcessClass::ProxyClass: + return ProcessClass::OkayFit; + case ProcessClass::LogRouterClass: + return ProcessClass::OkayFit; + case ProcessClass::UnsetClass: + return ProcessClass::UnsetFit; + case ProcessClass::TesterClass: + return ProcessClass::NeverAssign; + default: + return ProcessClass::WorstFit; + } default: return ProcessClass::NeverAssign; } diff --git a/fdbrpc/Locality.h b/fdbrpc/Locality.h index e98b894dd4..b90526ba62 100644 --- a/fdbrpc/Locality.h +++ b/fdbrpc/Locality.h @@ -26,9 +26,9 @@ struct ProcessClass { // This enum is stored in restartInfo.ini for upgrade tests, so be very careful about changing the existing items! - enum ClassType { UnsetClass, StorageClass, TransactionClass, ResolutionClass, TesterClass, ProxyClass, MasterClass, StatelessClass, LogClass, ClusterControllerClass, LogRouterClass, InvalidClass = -1 }; + enum ClassType { UnsetClass, StorageClass, TransactionClass, ResolutionClass, TesterClass, ProxyClass, MasterClass, StatelessClass, LogClass, ClusterControllerClass, LogRouterClass, DataDistributorClass, InvalidClass = -1 }; enum Fitness { BestFit, GoodFit, UnsetFit, OkayFit, WorstFit, ExcludeFit, NeverAssign }; //cannot be larger than 7 because of leader election mask - enum ClusterRole { Storage, TLog, Proxy, Master, Resolver, LogRouter, ClusterController, NoRole }; + enum ClusterRole { Storage, TLog, Proxy, Master, Resolver, LogRouter, ClusterController, DataDistributor, NoRole }; enum ClassSource { CommandLineSource, AutoSource, DBSource, InvalidSource = -1 }; int16_t _class; int16_t _source; @@ -48,6 +48,7 @@ public: else if (s=="log") _class = LogClass; else if (s=="router") _class = LogRouterClass; else if (s=="cluster_controller") _class = ClusterControllerClass; + else if (s=="data_distributor") _class = DataDistributorClass; else _class = InvalidClass; } @@ -63,6 +64,7 @@ public: else if (classStr=="log") _class = LogClass; else if (classStr=="router") _class = LogRouterClass; else if (classStr=="cluster_controller") _class = ClusterControllerClass; + else if (classStr=="data_distributor") _class = DataDistributorClass; else _class = InvalidClass; if (sourceStr=="command_line") _source = CommandLineSource; @@ -93,6 +95,7 @@ public: case LogClass: return "log"; case LogRouterClass: return "router"; case ClusterControllerClass: return "cluster_controller"; + case DataDistributorClass: return "data_distributor"; default: return "invalid"; } } diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h index 0f0b116f2d..59d5d900ba 100644 --- a/fdbrpc/simulator.h +++ b/fdbrpc/simulator.h @@ -94,6 +94,7 @@ public: case ProcessClass::LogClass: return true; case ProcessClass::LogRouterClass: return false; case ProcessClass::ClusterControllerClass: return false; + case ProcessClass::DataDistributorClass: return false; default: return false; } } diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 49616ef04b..a09f9fa56a 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -22,6 +22,7 @@ #include "flow/ActorCollection.h" #include "fdbclient/NativeAPI.h" #include "fdbserver/CoordinationInterface.h" +#include "fdbserver/DataDistributorInterface.h" #include "fdbserver/Knobs.h" #include "fdbserver/MoveKeys.h" #include "fdbserver/WorkerInterface.h" @@ -1017,6 +1018,7 @@ public: Optional remoteStartTime; Version datacenterVersionDifference; bool versionDifferenceUpdated; + AsyncVar dataDistributorInterface; ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality ) : id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false) @@ -1332,7 +1334,7 @@ ACTOR Future workerAvailabilityWatch( WorkerInterface worker, ProcessClass checkOutstandingRequests( cluster ); } } - when( wait( failed ) ) { // remove workers that have failed + when( wait( failed ) ) { // remote workers that have failed WorkerInfo& failedWorkerInfo = cluster->id_worker[ worker.locality.processId() ]; if (!failedWorkerInfo.reply.isSet()) { failedWorkerInfo.reply.send( RegisterWorkerReply(failedWorkerInfo.processClass, failedWorkerInfo.priorityInfo) ); @@ -2216,6 +2218,112 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel } } +ACTOR Future clusterGetDistributorInterface( ClusterControllerData *self, UID reqId, ReplyPromise reqReply ) { + TraceEvent("CCGetDistributorInterfaceRequest", reqId); + state Future distributorOnchange = Never(); + + while ( !self->dataDistributorInterface.get().isValid() ) { + wait( self->dataDistributorInterface.onChange() ); + TraceEvent("CCGetDistributorInterfaceID", self->dataDistributorInterface.get().id) + .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); + } + + GetDistributorInterfaceReply reply(self->dataDistributorInterface.get()); + TraceEvent("CCGetDistributorInterfaceReply", reqId) + .detail("DataDistributorId", reply.distributorInterface.id) + .detail("Endpoint", reply.distributorInterface.waitFailure.getEndpoint().token); + reqReply.send( reply ); + return Void(); +} + +ACTOR Future startDataDistributor( ClusterControllerData *self ) { + state Optional dcId = self->clusterControllerDcId; + while ( !dcId.present() || !self->masterProcessId.present() ) { + wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); + dcId = self->clusterControllerDcId; + } + ASSERT(dcId.present()); + + loop { + std::map>, int> id_used; + id_used[self->clusterControllerProcessId]++; + id_used[self->masterProcessId]++; + state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); + state InitializeDataDistributorRequest req; + req.reqId = g_random->randomUniqueID(); + TraceEvent("DataDistributor", req.reqId).detail("Recruit", data_distributor.worker.first.address()); + + choose { + when ( DataDistributorInterface dataDistributor = wait( data_distributor.worker.first.dataDistributor.getReply(req) ) ) { + TraceEvent("DataDistributor", req.reqId).detail("Recruited", data_distributor.worker.first.address()); + return dataDistributor; + } + when ( wait ( delay(SERVER_KNOBS->WORKER_FAILURE_TIME) ) ) {} + } + } +} + +ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterControllerFullInterface *clusterInterface ) { + state PromiseStream> addActor; + state Future collection = actorCollection( addActor.getFuture() ); + state Future newDistributor = Never(); + state Future distributorFailed = Never(); + + // wait for a while to see if existing data distributor will join. + loop choose { + when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { + TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id); + self->dataDistributorInterface.set( req.dataDistributor ); + distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + req.reply.send(true); + break; + } + when ( wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ) ) { break; } + } + + if ( !self->dataDistributorInterface.get().isValid() ) { // No rejoin happened + newDistributor = startDataDistributor( self ); + } + + // Wait on failures and restart it. + loop choose { + when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { + TraceEvent ev("ClusterController", self->id); + const UID myDdId = self->dataDistributorInterface.get().id; + if ( myDdId == UID() ) { + ev.detail("NewDataDistributorID", distributorInterf.id); + self->dataDistributorInterface.set( distributorInterf ); + distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + } else { + ev.detail("MyDataDistributorID", myDdId).detail("DiscardDataDistributorID", distributorInterf.id); + } + newDistributor = Never(); + } + when ( wait( distributorFailed ) ) { + distributorFailed = Never(); + TraceEvent("ClusterController", self->id).detail("DataDistributorFailed", self->dataDistributorInterface.get().id) + .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); + self->dataDistributorInterface.set( DataDistributorInterface() ); // clear the ID + newDistributor = startDataDistributor( self ); + } + when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { + if ( !self->dataDistributorInterface.get().isValid() ) { + self->dataDistributorInterface.set( req.dataDistributor ); + distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id); + } else { + const UID myDdId = self->dataDistributorInterface.get().id; + const bool success = myDdId == req.dataDistributor.id; + req.reply.send(success); + TraceEvent("ClusterController", self->id) + .detail("DataDistributorRejoin", success ? "OK" : "Failed") + .detail("OldDataDistributorID", myDdId) + .detail("ReqID", req.dataDistributor.id); + } + } + } +} + ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, Future leaderFail, ServerCoordinators coordinators, LocalityData locality ) { state ClusterControllerData self( interf, locality ); state Future coordinationPingDelay = delay( SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY ); @@ -2223,7 +2331,6 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, state PromiseStream> addActor; state Future> error = errorOr( actorCollection( addActor.getFuture() ) ); - auto pSelf = &self; addActor.send( failureDetectionServer( self.id, &self.db, interf.clientInterface.failureMonitoring.getFuture() ) ); addActor.send( clusterWatchDatabase( &self, &self.db ) ); // Start the master database addActor.send( self.updateWorkerList.init( self.db.db ) ); @@ -2235,6 +2342,7 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, addActor.send( updatedChangingDatacenters(&self) ); addActor.send( updatedChangedDatacenters(&self) ); addActor.send( updateDatacenterVersionDifference(&self) ); + addActor.send( waitDDRejoinOrStartDD(&self, &interf) ); //printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str()); loop choose { @@ -2322,6 +2430,9 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, when( ReplyPromise ping = waitNext( interf.clientInterface.ping.getFuture() ) ) { ping.send( Void() ); } + when ( GetDistributorInterfaceRequest req = waitNext( interf.getDistributorInterface.getFuture() ) ) { + addActor.send( clusterGetDistributorInterface( &self, req.reqId, req.reply ) ); + } } } diff --git a/fdbserver/ClusterRecruitmentInterface.h b/fdbserver/ClusterRecruitmentInterface.h index f3d0860c29..2c152b6dc0 100644 --- a/fdbserver/ClusterRecruitmentInterface.h +++ b/fdbserver/ClusterRecruitmentInterface.h @@ -26,6 +26,7 @@ #include "fdbclient/StorageServerInterface.h" #include "fdbclient/MasterProxyInterface.h" #include "fdbclient/DatabaseConfiguration.h" +#include "fdbserver/DataDistributorInterface.h" #include "fdbserver/MasterInterface.h" #include "fdbserver/RecoveryState.h" #include "fdbserver/TLogInterface.h" @@ -42,6 +43,8 @@ struct ClusterControllerFullInterface { RequestStream< struct GetWorkersRequest > getWorkers; RequestStream< struct RegisterMasterRequest > registerMaster; RequestStream< struct GetServerDBInfoRequest > getServerDBInfo; + RequestStream< struct DataDistributorRejoinRequest > dataDistributorRejoin; // sent by dataDistributor (may or may not rebooted) to communicate with a new CC + RequestStream< struct GetDistributorInterfaceRequest > getDistributorInterface; // sent by proxies & QuietDatabase.actor.cpp UID id() const { return clientInterface.id(); } bool operator == (ClusterControllerFullInterface const& r) const { return id() == r.id(); } @@ -56,12 +59,14 @@ struct ClusterControllerFullInterface { getWorkers.getEndpoint( TaskClusterController ); registerMaster.getEndpoint( TaskClusterController ); getServerDBInfo.getEndpoint( TaskClusterController ); + dataDistributorRejoin.getEndpoint( TaskClusterController ); + getDistributorInterface.getEndpoint( TaskClusterController ); } template void serialize( Ar& ar ) { ASSERT( ar.protocolVersion() >= 0x0FDB00A200040001LL ); - serializer(ar, clientInterface, recruitFromConfiguration, recruitRemoteFromConfiguration, recruitStorage, registerWorker, getWorkers, registerMaster, getServerDBInfo); + serializer(ar, clientInterface, recruitFromConfiguration, recruitRemoteFromConfiguration, recruitStorage, registerWorker, getWorkers, registerMaster, getServerDBInfo, dataDistributorRejoin, getDistributorInterface); } }; @@ -229,6 +234,43 @@ struct GetServerDBInfoRequest { } }; +struct GetDistributorInterfaceReply { + DataDistributorInterface distributorInterface; + + GetDistributorInterfaceReply() {} + explicit GetDistributorInterfaceReply(DataDistributorInterface di): distributorInterface(di) {} + template + void serialize(Ar& ar) { + serializer(ar, distributorInterface); + } +}; + +struct GetDistributorInterfaceRequest { + UID reqId; + ReplyPromise< struct GetDistributorInterfaceReply > reply; + + GetDistributorInterfaceRequest() {} + explicit GetDistributorInterfaceRequest(UID id) : reqId(id) {} + + template + void serialize(Ar& ar) { + serializer(ar, reqId, reply); + } +}; + +struct DataDistributorRejoinRequest { + DataDistributorInterface dataDistributor; + ReplyPromise reply; + + DataDistributorRejoinRequest() { } + explicit DataDistributorRejoinRequest(DataDistributorInterface di) : dataDistributor(di) {} + + template + void serialize(Ar& ar) { + serializer(ar, dataDistributor, reply); + } +}; + #include "fdbserver/ServerDBInfo.h" // include order hack #endif diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index c320dd3afc..4da46241e2 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -29,6 +29,7 @@ #include "fdbserver/WaitFailure.h" #include "fdbserver/ServerDBInfo.h" #include "fdbserver/IKeyValueStore.h" +#include "fdbserver/Ratekeeper.h" #include "fdbclient/ManagementAPI.h" #include "fdbrpc/Replication.h" #include "flow/UnitTest.h" @@ -394,7 +395,7 @@ ACTOR Future waitForAllDataRemoved( Database cx, UID serverID, Version add } // Read keyservers, return unique set of teams -ACTOR Future> getInitialDataDistribution( Database cx, UID masterId, MoveKeysLock moveKeysLock, std::vector> remoteDcIds ) { +ACTOR Future> getInitialDataDistribution( Database cx, UID distributorId, MoveKeysLock moveKeysLock, std::vector> remoteDcIds ) { state Reference result = Reference(new InitialDataDistribution); state Key beginKey = allKeys.begin; @@ -444,7 +445,7 @@ ACTOR Future> getInitialDataDistribution( Dat wait( tr.onError(e) ); ASSERT(!succeeded); //We shouldn't be retrying if we have already started modifying result in this loop - TraceEvent("GetInitialTeamsRetry", masterId); + TraceEvent("GetInitialTeamsRetry", distributorId); } } @@ -531,7 +532,7 @@ ACTOR Future> getInitialDataDistribution( Dat wait( tr.onError(e) ); ASSERT(!succeeded); //We shouldn't be retrying if we have already started modifying result in this loop - TraceEvent("GetInitialTeamsKeyServersRetry", masterId); + TraceEvent("GetInitialTeamsKeyServersRetry", distributorId); } } @@ -550,7 +551,7 @@ Future storageServerTracker( TCServerInfo* const& server, ServerStatusMap* const& statusMap, MoveKeysLock const& lock, - UID const& masterId, + UID const& distributorId, std::map>* const& other_servers, Optional> >> const& changes, Promise const& errorOut, @@ -563,7 +564,7 @@ struct DDTeamCollection : ReferenceCounted { PromiseStream> addActor; Database cx; - UID masterId; + UID distributorId; DatabaseConfiguration configuration; bool doBuildTeams; @@ -644,7 +645,7 @@ struct DDTeamCollection : ReferenceCounted { DDTeamCollection( Database const& cx, - UID masterId, + UID distributorId, MoveKeysLock const& lock, PromiseStream const& output, Reference const& shardsAffectedByTeamFailure, @@ -654,14 +655,14 @@ struct DDTeamCollection : ReferenceCounted { Optional> >> const& serverChanges, Future readyToStart, Reference> zeroHealthyTeams, bool primary, Reference> processingUnhealthy) - :cx(cx), masterId(masterId), lock(lock), output(output), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), teamBuilder( Void() ), badTeamRemover( Void() ), + :cx(cx), distributorId(distributorId), lock(lock), output(output), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), teamBuilder( Void() ), badTeamRemover( Void() ), configuration(configuration), serverChanges(serverChanges), readyToStart(readyToStart), checkTeamDelay( delay( SERVER_KNOBS->CHECK_TEAM_DELAY, TaskDataDistribution) ), initialFailureReactionDelay( delayed( readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskDataDistribution ) ), healthyTeamCount( 0 ), storageServerSet(new LocalityMap()), initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)), optimalTeamCount( 0 ), recruitingStream(0), restartRecruiting( SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY ), unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs), zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), processingUnhealthy(processingUnhealthy) { if(!primary || configuration.usableRegions == 1) { - TraceEvent("DDTrackerStarting", masterId) + TraceEvent("DDTrackerStarting", distributorId) .detail( "State", "Inactive" ) .trackLatest( "DDTrackerStarting" ); } @@ -690,7 +691,7 @@ struct DDTeamCollection : ReferenceCounted { wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskDataDistribution)); if(!self->primary || self->configuration.usableRegions == 1) { - TraceEvent("DDTrackerStarting", self->masterId) + TraceEvent("DDTrackerStarting", self->distributorId) .detail( "State", "Active" ) .trackLatest( "DDTrackerStarting" ); } @@ -963,7 +964,7 @@ struct DDTeamCollection : ReferenceCounted { for(auto it : servers) { serverIds.push_back(it->id); } - TraceEvent(SevWarnAlways, "CannotAddSubset", self->masterId).detail("Servers", describe(serverIds)); + TraceEvent(SevWarnAlways, "CannotAddSubset", self->distributorId).detail("Servers", describe(serverIds)); } } } @@ -1023,7 +1024,7 @@ struct DDTeamCollection : ReferenceCounted { TraceEvent( minTeams>0 ? SevInfo : SevWarn, - "DataDistributionTeamQuality", masterId) + "DataDistributionTeamQuality", distributorId) .detail("Servers", serverCount) .detail("Teams", teamCount) .detail("TeamsPerServer", teamsPerServer) @@ -1422,7 +1423,7 @@ struct DDTeamCollection : ReferenceCounted { addMachineTeam(machines); addedMachineTeams++; } else { - TraceEvent(SevWarn, "DataDistributionBuildTeams", masterId) + TraceEvent(SevWarn, "DataDistributionBuildTeams", distributorId) .detail("Primary", primary) .detail("Reason", "Unable to make desired machine Teams"); break; @@ -1742,7 +1743,7 @@ struct DDTeamCollection : ReferenceCounted { } } - TraceEvent("BuildTeamsBegin", self->masterId).detail("DesiredTeams", desiredTeams).detail("MaxTeams", maxTeams).detail("BadTeams", self->badTeams.size()) + TraceEvent("BuildTeamsBegin", self->distributorId).detail("DesiredTeams", desiredTeams).detail("MaxTeams", maxTeams).detail("BadTeams", self->badTeams.size()) .detail("UniqueMachines", uniqueMachines).detail("TeamSize", self->configuration.storageTeamSize).detail("Servers", serverCount) .detail("CurrentTrackedTeams", self->teams.size()).detail("HealthyTeamCount", teamCount).detail("TotalTeamCount", totalTeamCount); @@ -1794,7 +1795,7 @@ struct DDTeamCollection : ReferenceCounted { } vector desiredServerVector( desiredServerSet.begin(), desiredServerSet.end() ); - TraceEvent(SevWarn, "NoHealthyTeams", masterId) + TraceEvent(SevWarn, "NoHealthyTeams", distributorId) .detail("CurrentTeamCount", teams.size()) .detail("ServerCount", server_info.size()) .detail("NonFailedServerCount", desiredServerVector.size()); @@ -1813,19 +1814,19 @@ struct DDTeamCollection : ReferenceCounted { } allServers.push_back( newServer.id() ); - TraceEvent("AddedStorageServer", masterId).detail("ServerID", newServer.id()).detail("ProcessClass", processClass.toString()).detail("WaitFailureToken", newServer.waitFailure.getEndpoint().token).detail("Address", newServer.waitFailure.getEndpoint().address); + TraceEvent("AddedStorageServer", distributorId).detail("ServerID", newServer.id()).detail("ProcessClass", processClass.toString()).detail("WaitFailureToken", newServer.waitFailure.getEndpoint().token).detail("Address", newServer.waitFailure.getEndpoint().address); auto &r = server_info[newServer.id()] = Reference( new TCServerInfo( newServer, processClass, includedDCs.empty() || std::find(includedDCs.begin(), includedDCs.end(), newServer.locality.dcId()) != includedDCs.end(), storageServerSet ) ); // Establish the relation between server and machine checkAndCreateMachine(r); - r->tracker = storageServerTracker( this, cx, r.getPtr(), &server_status, lock, masterId, &server_info, serverChanges, errorOut, addedVersion ); + r->tracker = storageServerTracker( this, cx, r.getPtr(), &server_status, lock, distributorId, &server_info, serverChanges, errorOut, addedVersion ); doBuildTeams = true; // Adding a new server triggers to build new teams restartTeamBuilder.trigger(); } bool removeTeam( Reference team ) { - TraceEvent("RemovedTeam", masterId).detail("Team", team->getDesc()); + TraceEvent("RemovedTeam", distributorId).detail("Team", team->getDesc()); bool found = false; for(int t=0; t { } void removeServer( UID removedServer ) { - TraceEvent("RemovedStorageServer", masterId).detail("ServerID", removedServer); + TraceEvent("RemovedStorageServer", distributorId).detail("ServerID", removedServer); // ASSERT( !shardsAffectedByTeamFailure->getServersForTeam( t ) for all t in teams that contain removedServer ) Reference removedServerInfo = server_info[removedServer]; @@ -2021,7 +2022,7 @@ struct DDTeamCollection : ReferenceCounted { doBuildTeams = true; restartTeamBuilder.trigger(); - TraceEvent("DataDistributionTeamCollectionUpdate", masterId) + TraceEvent("DataDistributionTeamCollectionUpdate", distributorId) .detail("Teams", teams.size()) .detail("BadTeams", badTeams.size()) .detail("Servers", allServers.size()); @@ -2042,13 +2043,13 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te state bool firstCheck = true; if(logTeamEvents) { - TraceEvent("TeamTrackerStarting", self->masterId).detail("Reason", "Initial wait complete (sc)").detail("Team", team->getDesc()); + TraceEvent("TeamTrackerStarting", self->distributorId).detail("Reason", "Initial wait complete (sc)").detail("Team", team->getDesc()); } self->priority_teams[team->getPriority()]++; try { loop { - TraceEvent("TeamHealthChangeDetected", self->masterId) + TraceEvent("TeamHealthChangeDetected", self->distributorId) .detail("Primary", self->primary) .detail("IsReady", self->initialFailureReactionDelay.isReady()); // Check if the number of degraded machines has changed @@ -2103,7 +2104,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te if (serversLeft != lastServersLeft || anyUndesired != lastAnyUndesired || anyWrongConfiguration != lastWrongConfiguration || recheck) { // NOTE: do not check wrongSize if(logTeamEvents) { - TraceEvent("TeamHealthChanged", self->masterId) + TraceEvent("TeamHealthChanged", self->distributorId) .detail("Team", team->getDesc()).detail("ServersLeft", serversLeft) .detail("LastServersLeft", lastServersLeft).detail("ContainsUndesiredServer", anyUndesired) .detail("HealthyTeamsCount", self->healthyTeamCount).detail("IsWrongConfiguration", anyWrongConfiguration); @@ -2128,12 +2129,12 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te self->zeroHealthyTeams->set(self->healthyTeamCount == 0); if( self->healthyTeamCount == 0 ) { - TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->masterId) + TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId) .detail("SignallingTeam", team->getDesc()) .detail("Primary", self->primary); } - TraceEvent("TeamHealthDifference", self->masterId) + TraceEvent("TeamHealthDifference", self->distributorId) .detail("LastOptimal", lastOptimal) .detail("LastHealthy", lastHealthy) .detail("Optimal", optimal) @@ -2168,7 +2169,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te } if(logTeamEvents) { - TraceEvent("TeamPriorityChange", self->masterId).detail("Priority", team->getPriority()); + TraceEvent("TeamPriorityChange", self->distributorId).detail("Priority", team->getPriority()); } lastZeroHealthy = self->zeroHealthyTeams->get(); //set this again in case it changed from this teams health changing @@ -2216,7 +2217,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te self->output.send(rs); if(g_random->random01() < 0.01) { - TraceEvent("SendRelocateToDDQx100", self->masterId) + TraceEvent("SendRelocateToDDQx100", self->distributorId) .detail("Team", team->getDesc()) .detail("KeyBegin", printable(rs.keys.begin)) .detail("KeyEnd", printable(rs.keys.end)) @@ -2227,7 +2228,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te } } else { if(logTeamEvents) { - TraceEvent("TeamHealthNotReady", self->masterId).detail("HealthyTeamCount", self->healthyTeamCount); + TraceEvent("TeamHealthNotReady", self->distributorId).detail("HealthyTeamCount", self->healthyTeamCount); } } } @@ -2243,7 +2244,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te ASSERT( self->healthyTeamCount >= 0 ); if( self->healthyTeamCount == 0 ) { - TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->masterId).detail("SignallingTeam", team->getDesc()); + TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId).detail("SignallingTeam", team->getDesc()); self->zeroHealthyTeams->set(true); } } @@ -2273,7 +2274,7 @@ ACTOR Future trackExcludedServers( DDTeamCollection* self ) { excluded.insert( addr ); } - TraceEvent("DDExcludedServersChanged", self->masterId).detail("Rows", results.size()).detail("Exclusions", excluded.size()); + TraceEvent("DDExcludedServersChanged", self->distributorId).detail("Rows", results.size()).detail("Exclusions", excluded.size()); // Reset and reassign self->excludedServers based on excluded, but weonly // want to trigger entries that are different @@ -2412,7 +2413,7 @@ ACTOR Future removeBadTeams(DDTeamCollection* self) { } } wait(self->addSubsetComplete.getFuture()); - TraceEvent("DDRemovingBadTeams", self->masterId).detail("Primary", self->primary); + TraceEvent("DDRemovingBadTeams", self->distributorId).detail("Primary", self->primary); for(auto it : self->badTeams) { it->tracker.cancel(); } @@ -2461,7 +2462,7 @@ ACTOR Future storageServerFailureTracker( self->doBuildTeams = true; } - TraceEvent("StatusMapChange", self->masterId).detail("ServerID", interf.id()).detail("Status", status->toString()) + TraceEvent("StatusMapChange", self->distributorId).detail("ServerID", interf.id()).detail("Status", status->toString()) .detail("Available", IFailureMonitor::failureMonitor().getState(interf.waitFailure.getEndpoint()).isAvailable()); } when ( wait( status->isUnhealthy() ? waitForAllDataRemoved(cx, interf.id(), addedVersion) : Never() ) ) { break; } @@ -2479,7 +2480,7 @@ ACTOR Future storageServerTracker( TCServerInfo *server, //This actor is owned by this TCServerInfo ServerStatusMap *statusMap, MoveKeysLock lock, - UID masterId, + UID distributorId, std::map>* other_servers, Optional> >> changes, Promise errorOut, @@ -2509,7 +2510,7 @@ ACTOR Future storageServerTracker( for(auto i = other_servers->begin(); i != other_servers->end(); ++i) { if (i->second.getPtr() != server && i->second->lastKnownInterface.address() == server->lastKnownInterface.address()) { auto& statusInfo = statusMap->get( i->first ); - TraceEvent("SameAddress", masterId) + TraceEvent("SameAddress", distributorId) .detail("Failed", statusInfo.isFailed) .detail("Undesired", statusInfo.isUndesired) .detail("Server", server->id).detail("OtherServer", i->second->id) @@ -2522,7 +2523,7 @@ ACTOR Future storageServerTracker( if(!statusMap->get( i->second->id ).isUnhealthy()) { if(self->shardsAffectedByTeamFailure->getNumberOfShards(i->second->id) >= self->shardsAffectedByTeamFailure->getNumberOfShards(server->id)) { - TraceEvent(SevWarn, "UndesiredStorageServer", masterId) + TraceEvent(SevWarn, "UndesiredStorageServer", distributorId) .detail("Server", server->id) .detail("Address", server->lastKnownInterface.address()) .detail("OtherServer", i->second->id) @@ -2544,7 +2545,7 @@ ACTOR Future storageServerTracker( if( server->lastKnownClass.machineClassFitness( ProcessClass::Storage ) > ProcessClass::UnsetFit ) { if( self->optimalTeamCount > 0 ) { - TraceEvent(SevWarn, "UndesiredStorageServer", masterId) + TraceEvent(SevWarn, "UndesiredStorageServer", distributorId) .detail("Server", server->id) .detail("OptimalTeamCount", self->optimalTeamCount) .detail("Fitness", server->lastKnownClass.machineClassFitness(ProcessClass::Storage)); @@ -2555,7 +2556,7 @@ ACTOR Future storageServerTracker( //If this storage server has the wrong key-value store type, then mark it undesired so it will be replaced with a server having the correct type if(hasWrongStoreTypeOrDC) { - TraceEvent(SevWarn, "UndesiredStorageServer", masterId).detail("Server", server->id).detail("StoreType", "?"); + TraceEvent(SevWarn, "UndesiredStorageServer", distributorId).detail("Server", server->id).detail("StoreType", "?"); status.isUndesired = true; status.isWrongConfiguration = true; } @@ -2565,7 +2566,7 @@ ACTOR Future storageServerTracker( AddressExclusion addr( a.ip, a.port ); AddressExclusion ipaddr( a.ip ); if (self->excludedServers.get( addr ) || self->excludedServers.get( ipaddr )) { - TraceEvent(SevWarn, "UndesiredStorageServer", masterId).detail("Server", server->id) + TraceEvent(SevWarn, "UndesiredStorageServer", distributorId).detail("Server", server->id) .detail("Excluded", self->excludedServers.get( addr ) ? addr.toString() : ipaddr.toString()); status.isUndesired = true; status.isWrongConfiguration = true; @@ -2586,7 +2587,7 @@ ACTOR Future storageServerTracker( choose { when( wait( failureTracker ) ) { // The server is failed AND all data has been removed from it, so permanently remove it. - TraceEvent("StatusMapChange", masterId).detail("ServerID", server->id).detail("Status", "Removing"); + TraceEvent("StatusMapChange", distributorId).detail("ServerID", server->id).detail("Status", "Removing"); if(changes.present()) { changes.get().send( std::make_pair(server->id, Optional()) ); } @@ -2598,7 +2599,7 @@ ACTOR Future storageServerTracker( // Remove server from FF/serverList wait( removeStorageServer( cx, server->id, lock ) ); - TraceEvent("StatusMapChange", masterId).detail("ServerID", server->id).detail("Status", "Removed"); + TraceEvent("StatusMapChange", distributorId).detail("ServerID", server->id).detail("Status", "Removed"); // Sets removeSignal (alerting dataDistributionTeamCollection to remove the storage server from its own data structures) server->removed.send( Void() ); self->removedServers.send( server->id ); @@ -2609,7 +2610,7 @@ ACTOR Future storageServerTracker( bool localityChanged = server->lastKnownInterface.locality != newInterface.first.locality; bool machineLocalityChanged = server->lastKnownInterface.locality.zoneId().get() != newInterface.first.locality.zoneId().get(); - TraceEvent("StorageServerInterfaceChanged", masterId).detail("ServerID", server->id) + TraceEvent("StorageServerInterfaceChanged", distributorId).detail("ServerID", server->id) .detail("NewWaitFailureToken", newInterface.first.waitFailure.getEndpoint().token) .detail("OldWaitFailureToken", server->lastKnownInterface.waitFailure.getEndpoint().token) .detail("LocalityChanged", localityChanged); @@ -2705,10 +2706,10 @@ ACTOR Future storageServerTracker( self->restartRecruiting.trigger(); } when( wait( otherChanges.empty() ? Never() : quorum( otherChanges, 1 ) ) ) { - TraceEvent("SameAddressChangedStatus", masterId).detail("ServerID", server->id); + TraceEvent("SameAddressChangedStatus", distributorId).detail("ServerID", server->id); } when( KeyValueStoreType type = wait( storeTracker ) ) { - TraceEvent("KeyValueStoreTypeChanged", masterId) + TraceEvent("KeyValueStoreTypeChanged", distributorId) .detail("ServerID", server->id) .detail("StoreType", type.toString()) .detail("DesiredType", self->configuration.storageServerStoreType.toString()); @@ -2732,17 +2733,17 @@ ACTOR Future storageServerTracker( //Monitor whether or not storage servers are being recruited. If so, then a database cannot be considered quiet ACTOR Future monitorStorageServerRecruitment(DDTeamCollection* self) { state bool recruiting = false; - TraceEvent("StorageServerRecruitment", self->masterId) + TraceEvent("StorageServerRecruitment", self->distributorId) .detail("State", "Idle") - .trackLatest(("StorageServerRecruitment_" + self->masterId.toString()).c_str()); + .trackLatest(("StorageServerRecruitment_" + self->distributorId.toString()).c_str()); loop { if( !recruiting ) { while(self->recruitingStream.get() == 0) { wait( self->recruitingStream.onChange() ); } - TraceEvent("StorageServerRecruitment", self->masterId) + TraceEvent("StorageServerRecruitment", self->distributorId) .detail("State", "Recruiting") - .trackLatest(("StorageServerRecruitment_" + self->masterId.toString()).c_str()); + .trackLatest(("StorageServerRecruitment_" + self->distributorId.toString()).c_str()); recruiting = true; } else { loop { @@ -2751,9 +2752,9 @@ ACTOR Future monitorStorageServerRecruitment(DDTeamCollection* self) { when( wait( self->recruitingStream.get() == 0 ? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskDataDistribution) : Future(Never()) ) ) { break; } } } - TraceEvent("StorageServerRecruitment", self->masterId) + TraceEvent("StorageServerRecruitment", self->distributorId) .detail("State", "Idle") - .trackLatest(("StorageServerRecruitment_" + self->masterId.toString()).c_str()); + .trackLatest(("StorageServerRecruitment_" + self->distributorId.toString()).c_str()); recruiting = false; } } @@ -2843,7 +2844,7 @@ ACTOR Future storageRecruiter( DDTeamCollection* self, ReferencemasterId); + TraceEvent(SevWarn, "DDRecruitingEmergency", self->distributorId); } if(!fCandidateWorker.isValid() || fCandidateWorker.isReady() || rsr.excludeAddresses != lastRequest.excludeAddresses || rsr.criticalRecruitment != lastRequest.criticalRecruitment) { @@ -2880,7 +2881,7 @@ ACTOR Future updateReplicasKey(DDTeamCollection* self, Optional dcId) wait(self->initialFailureReactionDelay && waitForAll(serverUpdates)); loop { while(self->zeroHealthyTeams->get() || self->processingUnhealthy->get()) { - TraceEvent("DDUpdatingStalled", self->masterId).detail("DcId", printable(dcId)).detail("ZeroHealthy", self->zeroHealthyTeams->get()).detail("ProcessingUnhealthy", self->processingUnhealthy->get()); + TraceEvent("DDUpdatingStalled", self->distributorId).detail("DcId", printable(dcId)).detail("ZeroHealthy", self->zeroHealthyTeams->get()).detail("ProcessingUnhealthy", self->processingUnhealthy->get()); wait(self->zeroHealthyTeams->onChange() || self->processingUnhealthy->onChange()); } wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY, TaskLowPriority)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue. @@ -2888,14 +2889,14 @@ ACTOR Future updateReplicasKey(DDTeamCollection* self, Optional dcId) break; } } - TraceEvent("DDUpdatingReplicas", self->masterId).detail("DcId", printable(dcId)).detail("Replicas", self->configuration.storageTeamSize); + TraceEvent("DDUpdatingReplicas", self->distributorId).detail("DcId", printable(dcId)).detail("Replicas", self->configuration.storageTeamSize); state Transaction tr(self->cx); loop { try { Optional val = wait( tr.get(datacenterReplicasKeyFor(dcId)) ); state int oldReplicas = val.present() ? decodeDatacenterReplicasValue(val.get()) : 0; if(oldReplicas == self->configuration.storageTeamSize) { - TraceEvent("DDUpdatedAlready", self->masterId).detail("DcId", printable(dcId)).detail("Replicas", self->configuration.storageTeamSize); + TraceEvent("DDUpdatedAlready", self->distributorId).detail("DcId", printable(dcId)).detail("Replicas", self->configuration.storageTeamSize); return Void(); } if(oldReplicas < self->configuration.storageTeamSize) { @@ -2903,7 +2904,7 @@ ACTOR Future updateReplicasKey(DDTeamCollection* self, Optional dcId) } tr.set(datacenterReplicasKeyFor(dcId), datacenterReplicasValue(self->configuration.storageTeamSize)); wait( tr.commit() ); - TraceEvent("DDUpdatedReplicas", self->masterId).detail("DcId", printable(dcId)).detail("Replicas", self->configuration.storageTeamSize).detail("OldReplicas", oldReplicas); + TraceEvent("DDUpdatedReplicas", self->distributorId).detail("DcId", printable(dcId)).detail("Replicas", self->configuration.storageTeamSize).detail("OldReplicas", oldReplicas); return Void(); } catch( Error &e ) { wait( tr.onError(e) ); @@ -2935,9 +2936,9 @@ ACTOR Future dataDistributionTeamCollection( initData = Reference(); self->addActor.send(serverGetTeamRequests(tci, self)); - TraceEvent("DDTeamCollectionBegin", self->masterId).detail("Primary", self->primary); + TraceEvent("DDTeamCollectionBegin", self->distributorId).detail("Primary", self->primary); wait( self->readyToStart || error ); - TraceEvent("DDTeamCollectionReadyToStart", self->masterId).detail("Primary", self->primary); + TraceEvent("DDTeamCollectionReadyToStart", self->distributorId).detail("Primary", self->primary); if(self->badTeamRemover.isReady()) { self->badTeamRemover = removeBadTeams(self); @@ -2978,7 +2979,7 @@ ACTOR Future dataDistributionTeamCollection( } } - TraceEvent("TotalDataInFlight", self->masterId) + TraceEvent("TotalDataInFlight", self->distributorId) .detail("Primary", self->primary) .detail("TotalBytes", self->getDebugTotalDataInFlight()) .detail("UnhealthyServers", self->unhealthyServers) @@ -2993,7 +2994,7 @@ ACTOR Future dataDistributionTeamCollection( } } catch (Error& e) { if (e.code() != error_code_movekeys_conflict) - TraceEvent(SevError, "DataDistributionTeamCollectionError", self->masterId).error(e); + TraceEvent(SevError, "DataDistributionTeamCollectionError", self->distributorId).error(e); throw e; } } @@ -3099,9 +3100,8 @@ ACTOR Future pollMoveKeysLock( Database cx, MoveKeysLock lock ) { ACTOR Future dataDistribution( Reference> db, - MasterInterface mi, DatabaseConfiguration configuration, + UID myId, DatabaseConfiguration configuration, PromiseStream< std::pair> > serverChanges, - Reference logSystem, Version recoveryCommitVersion, std::vector> primaryDcId, std::vector> remoteDcIds, @@ -3148,25 +3148,25 @@ ACTOR Future dataDistribution( loop { try { loop { - TraceEvent("DDInitTakingMoveKeysLock", mi.id()); - state MoveKeysLock lock = wait( takeMoveKeysLock( cx, mi.id() ) ); - TraceEvent("DDInitTookMoveKeysLock", mi.id()); - state Reference initData = wait( getInitialDataDistribution(cx, mi.id(), lock, configuration.usableRegions > 1 ? remoteDcIds : std::vector>() ) ); + TraceEvent("DDInitTakingMoveKeysLock", myId); + state MoveKeysLock lock = wait( takeMoveKeysLock( cx, myId ) ); + TraceEvent("DDInitTookMoveKeysLock", myId); + state Reference initData = wait( getInitialDataDistribution(cx, myId, lock, configuration.usableRegions > 1 ? remoteDcIds : std::vector>() ) ); if(initData->shards.size() > 1) { - TraceEvent("DDInitGotInitialDD", mi.id()) + TraceEvent("DDInitGotInitialDD", myId) .detail("B", printable(initData->shards.end()[-2].key)) .detail("E", printable(initData->shards.end()[-1].key)) .detail("Src", describe(initData->shards.end()[-2].primarySrc)) .detail("Dest", describe(initData->shards.end()[-2].primaryDest)) .trackLatest("InitialDD"); } else { - TraceEvent("DDInitGotInitialDD", mi.id()).detail("B","").detail("E", "").detail("Src", "[no items]").detail("Dest", "[no items]").trackLatest("InitialDD"); + TraceEvent("DDInitGotInitialDD", myId).detail("B","").detail("E", "").detail("Src", "[no items]").detail("Dest", "[no items]").trackLatest("InitialDD"); } - if (initData->mode) break; // mode may be set true by system operator using fdbcli - TraceEvent("DataDistributionDisabled", mi.id()); + if (initData->mode) break; + TraceEvent("DataDistributionDisabled", myId); - TraceEvent("MovingData", mi.id()) + TraceEvent("MovingData", myId) .detail( "InFlight", 0 ) .detail( "InQueue", 0 ) .detail( "AverageShardSize", -1 ) @@ -3175,8 +3175,8 @@ ACTOR Future dataDistribution( .detail( "HighestPriority", 0 ) .trackLatest( "MovingData" ); - TraceEvent("TotalDataInFlight", mi.id()).detail("Primary", true).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", 0).trackLatest("TotalDataInFlight"); - TraceEvent("TotalDataInFlight", mi.id()).detail("Primary", false).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", configuration.usableRegions > 1 ? 0 : -1).trackLatest("TotalDataInFlightRemote"); + TraceEvent("TotalDataInFlight", myId).detail("Primary", true).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", 0).trackLatest("TotalDataInFlight"); + TraceEvent("TotalDataInFlight", myId).detail("Primary", false).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", configuration.usableRegions > 1 ? 0 : -1).trackLatest("TotalDataInFlightRemote"); wait( waitForDataDistributionEnabled(cx) ); TraceEvent("DataDistributionEnabled"); @@ -3241,20 +3241,20 @@ ACTOR Future dataDistribution( } actors.push_back( pollMoveKeysLock(cx, lock) ); - actors.push_back( reportErrorsExcept( dataDistributionTracker( initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics, getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, mi.id() ), "DDTracker", mi.id(), &normalDDQueueErrors() ) ); - actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, mi, storageTeamSize, lastLimited, recoveryCommitVersion ), "DDQueue", mi.id(), &normalDDQueueErrors() ) ); + actors.push_back( reportErrorsExcept( dataDistributionTracker( initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics, getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, myId ), "DDTracker", myId, &normalDDQueueErrors() ) ); + actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, myId, storageTeamSize, lastLimited, recoveryCommitVersion ), "DDQueue", myId, &normalDDQueueErrors() ) ); vector teamCollectionsPtrs; - Reference primaryTeamCollection( new DDTeamCollection(cx, mi.id(), lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy) ); + Reference primaryTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy) ); teamCollectionsPtrs.push_back(primaryTeamCollection.getPtr()); if (configuration.usableRegions > 1) { - Reference remoteTeamCollection( new DDTeamCollection(cx, mi.id(), lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds, Optional>>(), serverChanges, readyToStart.getFuture() && remoteRecovered, zeroHealthyTeams[1], false, processingUnhealthy) ); + Reference remoteTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds, Optional>>(), serverChanges, readyToStart.getFuture() && remoteRecovered, zeroHealthyTeams[1], false, processingUnhealthy) ); teamCollectionsPtrs.push_back(remoteTeamCollection.getPtr()); remoteTeamCollection->teamCollections = teamCollectionsPtrs; - actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( remoteTeamCollection, initData, tcis[1], db ), "DDTeamCollectionSecondary", mi.id(), &normalDDQueueErrors() ) ); + actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( remoteTeamCollection, initData, tcis[1], db ), "DDTeamCollectionSecondary", myId, &normalDDQueueErrors() ) ); } primaryTeamCollection->teamCollections = teamCollectionsPtrs; - actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( primaryTeamCollection, initData, tcis[0], db ), "DDTeamCollectionPrimary", mi.id(), &normalDDQueueErrors() ) ); + actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( primaryTeamCollection, initData, tcis[0], db ), "DDTeamCollectionPrimary", myId, &normalDDQueueErrors() ) ); actors.push_back(yieldPromiseStream(output.getFuture(), input)); wait( waitForAll( actors ) ); @@ -3272,6 +3272,165 @@ ACTOR Future dataDistribution( } } +struct DataDistributorData : NonCopyable, ReferenceCounted { + Reference> dbInfo; + Reference> configuration; + std::vector> primaryDcId; + std::vector> remoteDcIds; + AsyncTrigger configurationTrigger; + UID ddId; + PromiseStream< std::pair> > ddStorageServerChanges; + PromiseStream> addActor; + + DataDistributorData(Reference> const& db, Reference> const& dbConfig, UID id, PromiseStream> const& addActor) + : dbInfo(db), configuration(dbConfig), ddId(id), addActor(addActor) {} +}; + +ACTOR Future configurationMonitor( Reference self ) { + state Database cx = openDBOnServer(self->dbInfo, TaskDefaultEndpoint, true, true); + loop { + state ReadYourWritesTransaction tr(cx); + + loop { + try { + tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + Standalone results = wait( tr.getRange( configKeys, CLIENT_KNOBS->TOO_MANY ) ); + ASSERT( !results.more && results.size() < CLIENT_KNOBS->TOO_MANY ); + + DatabaseConfiguration conf; + conf.fromKeyValues( (VectorRef) results ); + if ( conf != self->configuration->get() ) { + TraceEvent("DataDistributor", self->ddId).detail("UpdateConfiguration", conf.toString()); + self->configuration->set( conf ); + self->configurationTrigger.trigger(); + } + + state Future watchFuture = tr.watch(excludedServersVersionKey); + wait( tr.commit() ); + wait( watchFuture ); + break; + } catch (Error& e) { + wait( tr.onError(e) ); + } + } + } +} + +static std::set const& normalDataDistributorErrors() { + static std::set s; + if (s.empty()) { + s.insert( error_code_worker_removed ); + s.insert( error_code_broken_promise ); + s.insert( error_code_actor_cancelled ); + s.insert( error_code_please_reboot ); + s.insert( error_code_movekeys_conflict ); + } + return s; +} + +static std::set const& normalRateKeeperErrors() { + static std::set s; + if (s.empty()) { + s.insert( error_code_worker_removed ); + s.insert( error_code_broken_promise ); + s.insert( error_code_actor_cancelled ); + s.insert( error_code_please_reboot ); + } + return s; +} + +ACTOR Future dataDistributor(DataDistributorInterface di, Reference> db ) { + state UID lastClusterControllerID(0,0); + state PromiseStream> addActor; + state Reference> configuration( new AsyncVar(DatabaseConfiguration()) ); + state Reference self( new DataDistributorData(db, configuration, di.id, addActor) ); + state Future collection = actorCollection( self->addActor.getFuture() ); + state Future trigger = self->configurationTrigger.onTrigger(); + state Version recoveryTransactionVersion = invalidVersion; + + TraceEvent("NewDataDistributorID", di.id); + self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) ); + self->addActor.send( configurationMonitor( self ) ); + + loop choose { + // Get configuration from the master. Can't use configurationMonitor for it + // because the transaction read needs ratekeeper, which is not started yet. + when ( GetRecoveryInfoReply infoReply = wait( brokenPromiseToNever(self->dbInfo->get().master.getRecoveryInfo.getReply(GetRecoveryInfoRequest(di.id)) )) ) { + configuration->set( infoReply.configuration ); + recoveryTransactionVersion = infoReply.recoveryTransactionVersion; + TraceEvent("DataDistributor", di.id) + .detail("RecoveryVersion", infoReply.recoveryTransactionVersion) + .detail("Configuration", configuration->get().toString()); + // TODO: is remoteRecovered.getFuture() as Void() in dataDistribution() correct? + break; + } + when ( wait(self->dbInfo->onChange()) ) {} + } + + const std::vector& regions = self->configuration->get().regions; + TraceEvent ev("DataDistributor", di.id); + if ( regions.size() > 0 ) { + self->primaryDcId.push_back( regions[0].dcId ); + ev.detail("PrimaryDcID", regions[0].dcId.toHexString()); + } + if ( regions.size() > 1 ) { + self->remoteDcIds.push_back( regions[1].dcId ); + ev.detail("SecondaryDcID", regions[1].dcId.toHexString()); + } + + try { + PromiseStream< std::pair> > ddStorageServerChanges; + state double lastLimited = 0; + TraceEvent("DataDistributor", di.id).detail("StartDD", "RK"); + self->addActor.send( reportErrorsExcept( dataDistribution( self->dbInfo, di.id, self->configuration->get(), ddStorageServerChanges, recoveryTransactionVersion, self->primaryDcId, self->remoteDcIds, &lastLimited, Void() ), "DataDistribution", di.id, &normalDataDistributorErrors() ) ); + self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id, &normalRateKeeperErrors() ) ); + + state Future reply; + loop { + if ( self->dbInfo->get().clusterInterface.id() != lastClusterControllerID ) { + // Rejoin the new cluster controller + DataDistributorRejoinRequest req(di); + TraceEvent("DataDistributorRejoining", di.id) + .detail("OldClusterControllerID", lastClusterControllerID) + .detail("ClusterControllerID", self->dbInfo->get().clusterInterface.id()); + reply = self->dbInfo->get().clusterInterface.dataDistributorRejoin.getReply(req); + } else { + reply = Never(); + } + choose { + when (bool success = wait(brokenPromiseToNever(reply))) { + if (success) { + lastClusterControllerID = self->dbInfo->get().clusterInterface.id(); + TraceEvent("DataDistributorRejoined", di.id) + .detail("ClusterControllerID", lastClusterControllerID); + } else { + TraceEvent("DataDistributorRejoinFailed", di.id); // Probably distributor exists. + break; + } + } + when (wait(self->dbInfo->onChange())) {} + when (wait(trigger)) { break; } // TODO: maybe break here? Since configuration changed. + when (wait(collection)) { + ASSERT(false); + throw internal_error(); + } + } + } + } + catch ( Error &err ) { + if ( normalDataDistributorErrors().count(err.code()) == 0 ) { + TraceEvent("DataDistributorError", di.id).error(err); + throw err; + } + TraceEvent("DataDistributorTerminated", di.id).error(err); + } + + while ( !self->addActor.isEmpty() ) { + self->addActor.getFuture().pop(); + } + return Void(); +} + DDTeamCollection* testTeamCollection(int teamSize, IRepPolicyRef policy, int processCount) { Database database = DatabaseContext::create( Reference>(new AsyncVar()), diff --git a/fdbserver/DataDistribution.h b/fdbserver/DataDistribution.h index 902cf4c48a..6160f48054 100644 --- a/fdbserver/DataDistribution.h +++ b/fdbserver/DataDistribution.h @@ -200,17 +200,6 @@ struct InitialDataDistribution : ReferenceCounted { vector shards; }; -Future dataDistribution( - Reference> const& db, - MasterInterface const& mi, DatabaseConfiguration const& configuration, - PromiseStream< std::pair> > const& serverChanges, - Reference const& logSystem, - Version const& recoveryCommitVersion, - std::vector> const& primaryDcId, - std::vector> const& remoteDcIds, - double* const& lastLimited, - Future const& remoteRecovered); - Future dataDistributionTracker( Reference const& initData, Database const& cx, @@ -220,7 +209,7 @@ Future dataDistributionTracker( FutureStream> const& getAverageShardBytes, Promise const& readyToStart, Reference> const& zeroHealthyTeams, - UID const& masterId); + UID const& distributorId); Future dataDistributionQueue( Database const& cx, @@ -232,7 +221,7 @@ Future dataDistributionQueue( Reference const& shardsAffectedByTeamFailure, MoveKeysLock const& lock, PromiseStream> const& getAverageShardBytes, - MasterInterface const& mi, + UID const& distributorId, int const& teamSize, double* const& lastLimited, Version const& recoveryVersion); diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp index 38cc158e43..9a5ff2e0fc 100644 --- a/fdbserver/DataDistributionQueue.actor.cpp +++ b/fdbserver/DataDistributionQueue.actor.cpp @@ -331,7 +331,7 @@ void complete( RelocateData const& relocation, std::map & busymap Future dataDistributionRelocator( struct DDQueueData* const& self, RelocateData const& rd ); struct DDQueueData { - MasterInterface mi; + UID distributorId; MoveKeysLock lock; Database cx; Version recoveryVersion; @@ -394,11 +394,11 @@ struct DDQueueData { priority_relocations[priority]--; } - DDQueueData( MasterInterface mi, MoveKeysLock lock, Database cx, std::vector teamCollections, + DDQueueData( UID mid, MoveKeysLock lock, Database cx, std::vector teamCollections, Reference sABTF, PromiseStream> getAverageShardBytes, int teamSize, PromiseStream output, FutureStream input, PromiseStream getShardMetrics, double* lastLimited, Version recoveryVersion ) : activeRelocations( 0 ), queuedRelocations( 0 ), bytesWritten ( 0 ), teamCollections( teamCollections ), - shardsAffectedByTeamFailure( sABTF ), getAverageShardBytes( getAverageShardBytes ), mi( mi ), lock( lock ), + shardsAffectedByTeamFailure( sABTF ), getAverageShardBytes( getAverageShardBytes ), distributorId( mid ), lock( lock ), cx( cx ), teamSize( teamSize ), output( output ), input( input ), getShardMetrics( getShardMetrics ), startMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ), finishMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ), lastLimited(lastLimited), recoveryVersion(recoveryVersion), suppressIntervals(0), lastInterval(0), unhealthyRelocations(0), rawProcessingUnhealthy( new AsyncVar(false) ) {} @@ -506,7 +506,7 @@ struct DDQueueData { } } - ACTOR Future getSourceServersForRange( Database cx, MasterInterface mi, RelocateData input, PromiseStream output ) { + ACTOR Future getSourceServersForRange( Database cx, RelocateData input, PromiseStream output ) { state std::set servers; state Transaction tr(cx); @@ -637,14 +637,14 @@ struct DDQueueData { rrs.keys = affectedQueuedItems[r]; rrs.interval = TraceInterval("QueuedRelocation"); - /*TraceEvent(rrs.interval.begin(), mi.id()); + /*TraceEvent(rrs.interval.begin(), distributorId); .detail("KeyBegin", printable(rrs.keys.begin)).detail("KeyEnd", printable(rrs.keys.end)) .detail("Priority", rrs.priority).detail("WantsNewServers", rrs.wantsNewServers);*/ queuedRelocations++; startRelocation(rrs.priority); fetchingSourcesQueue.insert( rrs ); - getSourceActors.insert( rrs.keys, getSourceServersForRange( cx, mi, rrs, fetchSourceServersComplete ) ); + getSourceActors.insert( rrs.keys, getSourceServersForRange( cx, rrs, fetchSourceServersComplete ) ); } else { RelocateData newData( rrs ); newData.keys = affectedQueuedItems[r]; @@ -657,7 +657,7 @@ struct DDQueueData { if( serverQueue.erase(rrs) > 0 ) { if( !foundActiveRelocation ) { newData.interval = TraceInterval("QueuedRelocation"); - /*TraceEvent(newData.interval.begin(), mi.id()); + /*TraceEvent(newData.interval.begin(), distributorId); .detail("KeyBegin", printable(newData.keys.begin)).detail("KeyEnd", printable(newData.keys.end)) .detail("Priority", newData.priority).detail("WantsNewServers", newData.wantsNewServers);*/ queuedRelocations++; @@ -677,7 +677,7 @@ struct DDQueueData { } } - /*TraceEvent("ReceivedRelocateShard", mi.id()) + /*TraceEvent("ReceivedRelocateShard", distributorId) .detail("KeyBegin", printable(rd.keys.begin)) .detail("KeyEnd", printable(rd.keys.end)) .detail("Priority", rd.priority) @@ -701,7 +701,7 @@ struct DDQueueData { for(int i = 0; i < rd.src.size() && i < teamSize * 2; i++) busyString += describe(rd.src[i]) + " - (" + busymap[ rd.src[i] ].toString() + "); "; - TraceEvent(title, mi.id()) + TraceEvent(title, distributorId) .detail("KeyBegin", printable(rd.keys.begin)) .detail("KeyEnd", printable(rd.keys.end)) .detail("Priority", rd.priority) @@ -759,7 +759,7 @@ struct DDQueueData { !rd.keys.contains( it->range() ) && it->value().priority >= rd.priority && rd.priority < PRIORITY_TEAM_UNHEALTHY ) { - /*TraceEvent("OverlappingInFlight", mi.id()) + /*TraceEvent("OverlappingInFlight", distributorId) .detail("KeyBegin", printable(it->value().keys.begin)) .detail("KeyEnd", printable(it->value().keys.end)) .detail("Priority", it->value().priority); */ @@ -792,7 +792,7 @@ struct DDQueueData { //logRelocation( rd, "LaunchingRelocation" ); - //TraceEvent(rd.interval.end(), mi.id()).detail("Result","Success"); + //TraceEvent(rd.interval.end(), distributorId).detail("Result","Success"); queuedRelocations--; finishRelocation(rd.priority); @@ -832,7 +832,7 @@ struct DDQueueData { TraceEvent(SevWarnAlways, "LaunchingQueueSlowx1000").detail("Elapsed", now() - startTime ); /*if( startedHere > 0 ) { - TraceEvent("StartedDDRelocators", mi.id()) + TraceEvent("StartedDDRelocators", distributorId) .detail("QueueSize", queuedRelocations) .detail("StartedHere", startedHere) .detail("ActiveRelocations", activeRelocations); @@ -853,7 +853,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd state PromiseStream dataTransferComplete( self->dataTransferComplete ); state PromiseStream relocationComplete( self->relocationComplete ); state bool signalledTransferComplete = false; - state UID masterId = self->mi.id(); + state UID distributorId = self->distributorId; state ParallelTCInfo healthyDestinations; state bool anyHealthy = false; @@ -867,7 +867,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd self->suppressIntervals++; } - TraceEvent(relocateShardInterval.begin(), masterId) + TraceEvent(relocateShardInterval.begin(), distributorId) .detail("KeyBegin", printable(rd.keys.begin)).detail("KeyEnd", printable(rd.keys.end)) .detail("Priority", rd.priority).detail("RelocationID", relocateShardInterval.pairID).detail("SuppressedEventCount", self->suppressIntervals); @@ -928,7 +928,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd TEST(true); //did not find a healthy destination team on the first attempt stuckCount++; - TraceEvent(stuckCount > 50 ? SevWarnAlways : SevWarn, "BestTeamStuck", masterId) + TraceEvent(stuckCount > 50 ? SevWarnAlways : SevWarn, "BestTeamStuck", distributorId) .suppressFor(1.0) .detail("Count", stuckCount) .detail("TeamCollectionId", tciIndex) @@ -981,7 +981,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd //FIXME: do not add data in flight to servers that were already in the src. healthyDestinations.addDataInFlightToTeam(+metrics.bytes); - TraceEvent(relocateShardInterval.severity, "RelocateShardHasDestination", masterId) + TraceEvent(relocateShardInterval.severity, "RelocateShardHasDestination", distributorId) .detail("PairId", relocateShardInterval.pairID) .detail("DestinationTeam", describe(destIds)) .detail("ExtraIds", describe(extraIds)); @@ -1027,7 +1027,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd error = e; } - //TraceEvent("RelocateShardFinished", masterId).detail("RelocateId", relocateShardInterval.pairID); + //TraceEvent("RelocateShardFinished", distributorId).detail("RelocateId", relocateShardInterval.pairID); if( error.code() != error_code_move_to_removed_server ) { if( !error.code() ) { @@ -1042,7 +1042,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd // onFinished.send( rs ); if( !error.code() ) { - TraceEvent(relocateShardInterval.end(), masterId).detail("Result","Success"); + TraceEvent(relocateShardInterval.end(), distributorId).detail("Result","Success"); if(rd.keys.begin == keyServersPrefix) { TraceEvent("MovedKeyServerKeys").detail("Dest", describe(destIds)).trackLatest("MovedKeyServers"); } @@ -1066,7 +1066,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd } } } catch (Error& e) { - TraceEvent(relocateShardInterval.end(), masterId).error(e, true); + TraceEvent(relocateShardInterval.end(), distributorId).error(e, true); if( !signalledTransferComplete ) dataTransferComplete.send( rd ); @@ -1100,7 +1100,7 @@ ACTOR Future rebalanceTeams( DDQueueData* self, int priority, Reference shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) ); for( int i = 0; i < shards.size(); i++ ) { if( moveShard == shards[i] ) { - TraceEvent(priority == PRIORITY_REBALANCE_OVERUTILIZED_TEAM ? "BgDDMountainChopper" : "BgDDValleyFiller", self->mi.id()) + TraceEvent(priority == PRIORITY_REBALANCE_OVERUTILIZED_TEAM ? "BgDDMountainChopper" : "BgDDValleyFiller", self->distributorId) .detail("SourceBytes", sourceBytes) .detail("DestBytes", destBytes) .detail("ShardBytes", metrics.bytes) @@ -1195,12 +1195,12 @@ ACTOR Future dataDistributionQueue( Reference shardsAffectedByTeamFailure, MoveKeysLock lock, PromiseStream> getAverageShardBytes, - MasterInterface mi, + UID distributorId, int teamSize, double* lastLimited, Version recoveryVersion) { - state DDQueueData self( mi, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, output, input, getShardMetrics, lastLimited, recoveryVersion ); + state DDQueueData self( distributorId, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, output, input, getShardMetrics, lastLimited, recoveryVersion ); state std::set serversToLaunchFrom; state KeyRange keysToLaunchFrom; state RelocateData launchData; @@ -1286,7 +1286,7 @@ ACTOR Future dataDistributionQueue( highPriorityRelocations += it->second; } - TraceEvent("MovingData", mi.id()) + TraceEvent("MovingData", distributorId) .detail( "InFlight", self.activeRelocations ) .detail( "InQueue", self.queuedRelocations ) .detail( "AverageShardSize", req.getFuture().isReady() ? req.getFuture().get() : -1 ) @@ -1303,7 +1303,7 @@ ACTOR Future dataDistributionQueue( } catch (Error& e) { if (e.code() != error_code_broken_promise && // FIXME: Get rid of these broken_promise errors every time we are killed by the master dying e.code() != error_code_movekeys_conflict) - TraceEvent(SevError, "DataDistributionQueueError", mi.id()).error(e); + TraceEvent(SevError, "DataDistributionQueueError", distributorId).error(e); throw e; } } diff --git a/fdbserver/DataDistributorInterface.h b/fdbserver/DataDistributorInterface.h new file mode 100644 index 0000000000..df11ceef9e --- /dev/null +++ b/fdbserver/DataDistributorInterface.h @@ -0,0 +1,72 @@ +/* + * DataDistributorInterface.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOUNDATIONDB_DATADISTRIBUTORINTERFACE_H +#define FOUNDATIONDB_DATADISTRIBUTORINTERFACE_H + +#include "fdbrpc/fdbrpc.h" + +struct DataDistributorInterface { + UID id; + RequestStream> waitFailure; + RequestStream< struct GetRateInfoRequest > getRateInfo; + + DataDistributorInterface() {} + + NetworkAddress address() const { return getRateInfo.getEndpoint().address; } + bool operator== (const DataDistributorInterface& r) const { + return id == r.id; + } + bool operator!= (const DataDistributorInterface& r) const { + return !(*this == r); + } + bool isValid() const { return id != UID(); } + + template + void serialize(Archive& ar) { + serializer(ar, id, waitFailure, getRateInfo); + } +}; + +struct GetRateInfoRequest { + UID requesterID; + int64_t totalReleasedTransactions; + ReplyPromise reply; + + GetRateInfoRequest() {} + GetRateInfoRequest( UID const& requesterID, int64_t totalReleasedTransactions ) : requesterID(requesterID), totalReleasedTransactions(totalReleasedTransactions) {} + + template + void serialize(Ar& ar) { + serializer(ar, requesterID, totalReleasedTransactions, reply); + } +}; + +struct GetRateInfoReply { + double transactionRate; + double leaseDuration; + + template + void serialize(Ar& ar) { + serializer(ar, transactionRate, leaseDuration); + } +}; + +#endif //FOUNDATIONDB_DATADISTRIBUTORINTERFACE_H diff --git a/fdbserver/MasterInterface.h b/fdbserver/MasterInterface.h index d4b5f396ab..948bb4de59 100644 --- a/fdbserver/MasterInterface.h +++ b/fdbserver/MasterInterface.h @@ -25,6 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbclient/StorageServerInterface.h" #include "fdbclient/CommitTransaction.h" +#include "fdbclient/DatabaseConfiguration.h" #include "fdbserver/TLogInterface.h" typedef uint64_t DBRecoveryCount; @@ -32,10 +33,10 @@ typedef uint64_t DBRecoveryCount; struct MasterInterface { LocalityData locality; RequestStream< ReplyPromise > waitFailure; - RequestStream< struct GetRateInfoRequest > getRateInfo; RequestStream< struct TLogRejoinRequest > tlogRejoin; // sent by tlog (whether or not rebooted) to communicate with a new master RequestStream< struct ChangeCoordinatorsRequest > changeCoordinators; RequestStream< struct GetCommitVersionRequest > getCommitVersion; + RequestStream< struct GetRecoveryInfoRequest > getRecoveryInfo; NetworkAddress address() const { return changeCoordinators.getEndpoint().address; } @@ -43,7 +44,7 @@ struct MasterInterface { template void serialize(Archive& ar) { ASSERT( ar.protocolVersion() >= 0x0FDB00A200040001LL ); - serializer(ar, locality, waitFailure, getRateInfo, tlogRejoin, changeCoordinators, getCommitVersion); + serializer(ar, locality, waitFailure, tlogRejoin, changeCoordinators, getCommitVersion, getRecoveryInfo); } void initEndpoints() { @@ -51,30 +52,6 @@ struct MasterInterface { } }; -struct GetRateInfoRequest { - UID requesterID; - int64_t totalReleasedTransactions; - ReplyPromise reply; - - GetRateInfoRequest() {} - GetRateInfoRequest( UID const& requesterID, int64_t totalReleasedTransactions ) : requesterID(requesterID), totalReleasedTransactions(totalReleasedTransactions) {} - - template - void serialize(Ar& ar) { - serializer(ar, requesterID, totalReleasedTransactions, reply); - } -}; - -struct GetRateInfoReply { - double transactionRate; - double leaseDuration; - - template - void serialize(Ar& ar) { - serializer(ar, transactionRate, leaseDuration); - } -}; - struct TLogRejoinRequest { TLogInterface myInterface; ReplyPromise reply; // false means someone else registered, so we should re-register. true means this master is recovered, so don't send again to the same master. @@ -157,6 +134,31 @@ struct GetCommitVersionRequest { } }; +struct GetRecoveryInfoReply { + Version recoveryTransactionVersion; + DatabaseConfiguration configuration; + + GetRecoveryInfoReply() : recoveryTransactionVersion(invalidVersion) {} + explicit GetRecoveryInfoReply(Version v, DatabaseConfiguration c) : recoveryTransactionVersion(v), configuration(c) {} + + template + void serialize(Ar& ar) { + serializer(ar, recoveryTransactionVersion, configuration); + } +}; + +struct GetRecoveryInfoRequest { + UID reqId; + ReplyPromise reply; + + GetRecoveryInfoRequest() {} + explicit GetRecoveryInfoRequest(UID id) : reqId(id) {} + template + void serialize(Ar& ar) { + serializer(ar, reqId, reply); + } +}; + struct LifetimeToken { UID ccID; int64_t count; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 200eaa7200..8330f4197d 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -87,28 +87,63 @@ Future forwardValue(Promise out, Future in) int getBytes(Promise const& r) { return 0; } -ACTOR Future getRate(UID myID, MasterInterface master, int64_t* inTransactionCount, double* outTransactionRate) { - state Future nextRequestTimer = Void(); +ACTOR Future monitorDataDistributor(UID myID, Reference> db, Reference> dataDistributor) { + state Future distributorFailure = Never(); + state Future reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); + + loop choose { + when ( GetDistributorInterfaceReply r = wait( reply ) ) { + reply = Never(); + dataDistributor->set( r.distributorInterface ); + distributorFailure = waitFailureClient( dataDistributor->get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + TraceEvent("Proxy", myID).detail("DataDistributorChangedID", dataDistributor->get().id) + .detail("Endpoint", dataDistributor->get().waitFailure.getEndpoint().token); + } + when ( wait( db->onChange() ) ) { + distributorFailure = Never(); + reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); + } + when ( wait( distributorFailure ) ) { + distributorFailure = Never(); + TraceEvent("Proxy", myID) + .detail("CC", db->get().clusterInterface.id()) + .detail("DataDistributorFailed", dataDistributor->get().id) + .detail("Token", dataDistributor->get().waitFailure.getEndpoint().token); + wait( delay(0.001) ); + reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); + } + } +} + +ACTOR Future getRate(UID myID, Reference> db, int64_t* inTransactionCount, double* outTransactionRate, Reference> dataDistributor) { + state Future nextRequestTimer = Never(); state Future leaseTimeout = Never(); - state Future reply; + state Future reply = Never(); state int64_t lastTC = 0; - loop choose{ - when(wait(nextRequestTimer)) { - nextRequestTimer = Never(); - reply = brokenPromiseToNever(master.getRateInfo.getReply(GetRateInfoRequest(myID, *inTransactionCount))); + loop choose { + when ( wait( dataDistributor->onChange() ) ) { + if ( dataDistributor->get().isValid() ) { + nextRequestTimer = Void(); // trigger GetRate request + } else { + nextRequestTimer = Never(); + } } - when(GetRateInfoReply rep = wait(reply)) { + when ( wait( nextRequestTimer ) ) { + nextRequestTimer = Never(); + reply = brokenPromiseToNever(dataDistributor->get().getRateInfo.getReply(GetRateInfoRequest(myID, *inTransactionCount))); + } + when ( GetRateInfoReply rep = wait(reply) ) { reply = Never(); *outTransactionRate = rep.transactionRate; - //TraceEvent("MasterProxyRate", myID).detail("Rate", rep.transactionRate).detail("Lease", rep.leaseDuration).detail("ReleasedTransactions", *inTransactionCount - lastTC); + TraceEvent("MasterProxyRate", myID).detail("Rate", rep.transactionRate).detail("Lease", rep.leaseDuration).detail("ReleasedTransactions", *inTransactionCount - lastTC); lastTC = *inTransactionCount; leaseTimeout = delay(rep.leaseDuration); nextRequestTimer = delayJittered(rep.leaseDuration / 2); } - when(wait(leaseTimeout)) { + when ( wait(leaseTimeout ) ) { *outTransactionRate = 0; - //TraceEvent("MasterProxyRate", myID).detail("Rate", 0).detail("Lease", "Expired"); + TraceEvent("MasterProxyRate", myID).detail("Rate", 0).detail("Lease", "Expired"); leaseTimeout = Never(); } } @@ -1079,7 +1114,6 @@ ACTOR Future sendGrvReplies(Future replyFuture, std:: ACTOR static Future transactionStarter( MasterProxyInterface proxy, - MasterInterface master, Reference> db, PromiseStream> addActor, ProxyCommitData* commitData @@ -1096,7 +1130,6 @@ ACTOR static Future transactionStarter( state vector otherProxies; state PromiseStream replyTimes; - addActor.send(getRate(proxy.id(), master, &transactionCount, &transactionRate)); addActor.send(queueTransactionStartRequests(&transactionQueue, proxy.getConsistentReadVersion.getFuture(), GRVTimer, &lastGRVTime, &GRVBatchTime, replyTimes.getFuture(), &commitData->stats)); // Get a list of the other proxies that go together with us @@ -1106,6 +1139,9 @@ ACTOR static Future transactionStarter( if (mp != proxy) otherProxies.push_back(mp); } + state Reference> dataDistributor( new AsyncVar(DataDistributorInterface()) ); + addActor.send( getRate(proxy.id(), db, &transactionCount, &transactionRate, dataDistributor) ); // do this after correct CC is obtained. + addActor.send( monitorDataDistributor(proxy.id(), db, dataDistributor) ); ASSERT(db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS); // else potentially we could return uncommitted read versions (since self->committedVersion is only a committed version if this recovery succeeds) @@ -1413,7 +1449,7 @@ ACTOR Future masterProxyServerCore( TraceEvent(SevInfo, "CommitBatchesMemoryLimit").detail("BytesLimit", commitBatchesMemoryLimit); addActor.send(monitorRemoteCommitted(&commitData, db)); - addActor.send(transactionStarter(proxy, master, db, addActor, &commitData)); + addActor.send(transactionStarter(proxy, db, addActor, &commitData)); addActor.send(readRequestServer(proxy, &commitData)); // wait for txnStateStore recovery diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index 392cb690ea..7e587abb3f 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -30,6 +30,8 @@ #include "fdbclient/ManagementAPI.h" #include "flow/actorcompiler.h" // This must be the last #include. +using DistributorPair = std::pair; + ACTOR Future>> getWorkers( Reference> dbInfo, int flags = 0 ) { loop { choose { @@ -64,17 +66,50 @@ ACTOR Future getMasterWorker( Database cx, Reference getDataDistributorWorker( Database cx, Reference> dbInfo ) { + state Future>> newWorkers = getWorkers( dbInfo ); + state vector> workers; + TraceEvent("GetDataDistributorWorker").detail("Stage", "GettingWorkers"); + + loop choose { + when ( wait( dbInfo->onChange() ) ) { + newWorkers = getWorkers( dbInfo ); + } + when ( vector> w = wait( newWorkers ) ) { + workers = w; + newWorkers = Never(); + } + when ( GetDistributorInterfaceReply reply = wait( brokenPromiseToNever( dbInfo->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest() ) ) ) ) { + const DataDistributorInterface& ddInterf = reply.distributorInterface; + + for( int i = 0; i < workers.size(); i++ ) { + if( workers[i].first.address() == ddInterf.address() ) { + TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers").detail("DataDistributorId", ddInterf.id).detail("WorkerId", workers[i].first.id()); + return std::make_pair(workers[i].first, ddInterf.id); + } + } + + TraceEvent(SevWarn, "GetDataDistributorWorker") + .detail("Error", "DataDistributorWorkerNotFound") + .detail("DataDistributorId", ddInterf.id) + .detail("DataDistributorAddress", ddInterf.address()) + .detail("WorkerCount", workers.size()); + } + } +} + //Gets the number of bytes in flight from the master -ACTOR Future getDataInFlight( Database cx, WorkerInterface masterWorker ) { +ACTOR Future getDataInFlight( Database cx, WorkerInterface distributorWorker ) { try { - TraceEvent("DataInFlight").detail("Stage", "ContactingMaster"); - TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEvent("DataInFlight").detail("Stage", "ContactingDataDistributor"); + TraceEventFields md = wait( timeoutError(distributorWorker.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("TotalDataInFlight") ) ), 1.0 ) ); int64_t dataInFlight; sscanf(md.getValue("TotalBytes").c_str(), "%lld", &dataInFlight); return dataInFlight; } catch( Error &e ) { - TraceEvent("QuietDatabaseFailure", masterWorker.id()).error(e).detail("Reason", "Failed to extract DataInFlight"); + TraceEvent("QuietDatabaseFailure", distributorWorker.id()).error(e).detail("Reason", "Failed to extract DataInFlight"); throw; } @@ -83,8 +118,8 @@ ACTOR Future getDataInFlight( Database cx, WorkerInterface masterWorker //Gets the number of bytes in flight from the master //Convenience method that first finds the master worker from a zookeeper interface ACTOR Future getDataInFlight( Database cx, Reference> dbInfo ) { - WorkerInterface masterWorker = wait(getMasterWorker(cx, dbInfo)); - int64_t dataInFlight = wait(getDataInFlight(cx, masterWorker)); + DistributorPair distributorPair = wait( getDataDistributorWorker(cx, dbInfo) ); + int64_t dataInFlight = wait(getDataInFlight(cx, distributorPair.first)); return dataInFlight; } @@ -101,7 +136,7 @@ int64_t getQueueSize( TraceEventFields md ) { } // This is not robust in the face of a TLog failure -ACTOR Future getMaxTLogQueueSize( Database cx, Reference> dbInfo, WorkerInterface masterWorker ) { +ACTOR Future getMaxTLogQueueSize( Database cx, Reference> dbInfo ) { TraceEvent("MaxTLogQueueSize").detail("Stage", "ContactingLogs"); state std::vector> workers = wait(getWorkers(dbInfo)); @@ -139,12 +174,6 @@ ACTOR Future getMaxTLogQueueSize( Database cx, Reference getMaxTLogQueueSize( Database cx, Reference> dbInfo ) { - WorkerInterface masterWorker = wait(getMasterWorker(cx, dbInfo)); - int64_t maxQueueSize = wait(getMaxTLogQueueSize(cx, dbInfo, masterWorker)); - return maxQueueSize; -} - ACTOR Future> getStorageServers( Database cx, bool use_system_priority = false) { state Transaction tr( cx ); if (use_system_priority) @@ -167,7 +196,7 @@ ACTOR Future> getStorageServers( Database cx, boo } //Gets the maximum size of all the storage server queues -ACTOR Future getMaxStorageServerQueueSize( Database cx, Reference> dbInfo, WorkerInterface masterWorker ) { +ACTOR Future getMaxStorageServerQueueSize( Database cx, Reference> dbInfo ) { TraceEvent("MaxStorageServerQueueSize").detail("Stage", "ContactingStorageServers"); Future> serversFuture = getStorageServers(cx); @@ -202,7 +231,7 @@ ACTOR Future getMaxStorageServerQueueSize( Database cx, Reference getMaxStorageServerQueueSize( Database cx, Reference getMaxStorageServerQueueSize( Database cx, Reference> dbInfo ) { - WorkerInterface masterWorker = wait(getMasterWorker(cx, dbInfo)); - int64_t maxQueueSize = wait(getMaxStorageServerQueueSize(cx, dbInfo, masterWorker)); - return maxQueueSize; -} - //Gets the size of the data distribution queue. If reportInFlight is true, then data in flight is considered part of the queue -ACTOR Future getDataDistributionQueueSize( Database cx, WorkerInterface masterWorker, bool reportInFlight) { +ACTOR Future getDataDistributionQueueSize( Database cx, WorkerInterface distributorWorker, bool reportInFlight) { try { - TraceEvent("DataDistributionQueueSize").detail("Stage", "ContactingMaster"); + TraceEvent("DataDistributionQueueSize").detail("Stage", "ContactingDataDistributor"); - TraceEventFields movingDataMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEventFields movingDataMessage = wait( timeoutError(distributorWorker.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MovingData") ) ), 1.0 ) ); TraceEvent("DataDistributionQueueSize").detail("Stage", "GotString"); @@ -239,7 +260,7 @@ ACTOR Future getDataDistributionQueueSize( Database cx, WorkerInterface return inQueue; } catch( Error &e ) { - TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataDistributionQueueSize"); + TraceEvent("QuietDatabaseFailure", distributorWorker.id()).detail("Reason", "Failed to extract DataDistributionQueueSize"); throw; } } @@ -247,37 +268,39 @@ ACTOR Future getDataDistributionQueueSize( Database cx, WorkerInterface //Gets the size of the data distribution queue. If reportInFlight is true, then data in flight is considered part of the queue //Convenience method that first finds the master worker from a zookeeper interface ACTOR Future getDataDistributionQueueSize( Database cx, Reference> dbInfo, bool reportInFlight ) { - WorkerInterface masterWorker = wait(getMasterWorker(cx, dbInfo)); - int64_t inQueue = wait(getDataDistributionQueueSize( cx, masterWorker, reportInFlight)); + DistributorPair distributorPair = wait( getDataDistributorWorker(cx, dbInfo) ); + int64_t inQueue = wait( getDataDistributionQueueSize( cx, distributorPair.first, reportInFlight) ); return inQueue; } -//Checks that data distribution is active -ACTOR Future getDataDistributionActive( Database cx, WorkerInterface masterWorker ) { +// Checks that data distribution is active +ACTOR Future getDataDistributionActive( Database cx, WorkerInterface distributorWorker ) { try { - TraceEvent("DataDistributionActive").detail("Stage", "ContactingMaster"); + TraceEvent("DataDistributionActive").detail("Stage", "ContactingDataDistributor"); - TraceEventFields activeMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEventFields activeMessage = wait( timeoutError(distributorWorker.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("DDTrackerStarting") ) ), 1.0 ) ); return activeMessage.getValue("State") == "Active"; } catch( Error &e ) { - TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataDistributionActive"); + TraceEvent("QuietDatabaseFailure", distributorWorker.id()).detail("Reason", "Failed to extract DataDistributionActive"); throw; } } -//Checks to see if any storage servers are being recruited -ACTOR Future getStorageServersRecruiting( Database cx, Reference> dbInfo, WorkerInterface masterWorker ) { +// Checks to see if any storage servers are being recruited +ACTOR Future getStorageServersRecruiting( Database cx, Reference> dbInfo, WorkerInterface distributorWorker, UID distributorUID ) { try { - TraceEvent("StorageServersRecruiting").detail("Stage", "ContactingMaster"); - - TraceEventFields recruitingMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( - EventLogRequest( StringRef( "StorageServerRecruitment_" + dbInfo->get().master.id().toString()) ) ), 1.0 ) ); + TraceEvent("StorageServersRecruiting").detail("Stage", "ContactingDataDistributor"); + TraceEventFields recruitingMessage = wait( timeoutError(distributorWorker.eventLogRequest.getReply( + EventLogRequest( StringRef( "StorageServerRecruitment_" + distributorUID.toString()) ) ), 1.0 ) ); + TraceEvent("StorageServersRecruiting").detail("Message", recruitingMessage.toString()); return recruitingMessage.getValue("State") == "Recruiting"; } catch( Error &e ) { - TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract StorageServersRecruiting").detail("MasterID", dbInfo->get().master.id()); + TraceEvent("QuietDatabaseFailure", distributorWorker.id()) + .detail("Reason", "Failed to extract StorageServersRecruiting") + .detail("DataDistributorID", distributorUID); throw; } } @@ -323,16 +346,18 @@ ACTOR Future waitForQuietDatabase( Database cx, Reference dataInFlight = getDataInFlight( cx, masterWorker); - state Future tLogQueueSize = getMaxTLogQueueSize( cx, dbInfo, masterWorker ); - state Future dataDistributionQueueSize = getDataDistributionQueueSize( cx, masterWorker, dataInFlightGate == 0); - state Future storageQueueSize = getMaxStorageServerQueueSize( cx, dbInfo, masterWorker ); - state Future dataDistributionActive = getDataDistributionActive( cx, masterWorker ); - state Future storageServersRecruiting = getStorageServersRecruiting ( cx, dbInfo, masterWorker ); + state Future dataInFlight = getDataInFlight( cx, distributorWorker); + state Future tLogQueueSize = getMaxTLogQueueSize( cx, dbInfo ); + state Future dataDistributionQueueSize = getDataDistributionQueueSize( cx, distributorWorker, dataInFlightGate == 0); + state Future storageQueueSize = getMaxStorageServerQueueSize( cx, dbInfo ); + state Future dataDistributionActive = getDataDistributionActive( cx, distributorWorker ); + state Future storageServersRecruiting = getStorageServersRecruiting ( cx, dbInfo, distributorWorker, distributorUID ); wait( success( dataInFlight ) && success( tLogQueueSize ) && success( dataDistributionQueueSize ) && success( storageQueueSize ) && success( dataDistributionActive ) && success( storageServersRecruiting ) ); diff --git a/fdbserver/WorkerInterface.h b/fdbserver/WorkerInterface.h index b9de0c4484..0c9998f20a 100644 --- a/fdbserver/WorkerInterface.h +++ b/fdbserver/WorkerInterface.h @@ -22,6 +22,7 @@ #define FDBSERVER_WORKERINTERFACE_H #pragma once +#include "fdbserver/DataDistributorInterface.h" #include "fdbserver/MasterInterface.h" #include "fdbserver/TLogInterface.h" #include "fdbserver/ResolverInterface.h" @@ -40,6 +41,7 @@ struct WorkerInterface { RequestStream< struct InitializeTLogRequest > tLog; RequestStream< struct RecruitMasterRequest > master; RequestStream< struct InitializeMasterProxyRequest > masterProxy; + RequestStream< struct InitializeDataDistributorRequest > dataDistributor; RequestStream< struct InitializeResolverRequest > resolver; RequestStream< struct InitializeStorageRequest > storage; RequestStream< struct InitializeLogRouterRequest > logRouter; @@ -62,7 +64,7 @@ struct WorkerInterface { template void serialize(Ar& ar) { - serializer(ar, clientInterface, locality, tLog, master, masterProxy, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest); + serializer(ar, clientInterface, locality, tLog, master, masterProxy, dataDistributor, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest); } }; @@ -133,6 +135,16 @@ struct InitializeMasterProxyRequest { } }; +struct InitializeDataDistributorRequest { + UID reqId; + ReplyPromise reply; + + template + void serialize( Ar& ar ) { + serializer(ar, reqId, reply); + } +}; + struct InitializeResolverRequest { uint64_t recoveryCount; int proxyCount; @@ -281,6 +293,7 @@ struct Role { static const Role CLUSTER_CONTROLLER; static const Role TESTER; static const Role LOG_ROUTER; + static const Role DATA_DISTRIBUTOR; std::string roleName; std::string abbreviation; @@ -330,6 +343,7 @@ Future tLog( class IKeyValueStore* const& persistentData, class IDiskQueue Future monitorServerDBInfo( Reference>> const& ccInterface, Reference const&, LocalityData const&, Reference> const& dbInfo ); Future resolver( ResolverInterface const& proxy, InitializeResolverRequest const&, Reference> const& db ); Future logRouter( TLogInterface const& interf, InitializeLogRouterRequest const& req, Reference> const& db ); +Future dataDistributor( DataDistributorInterface const& ddi, Reference> const& db ); void registerThreadForProfiling(); void updateCpuProfiler(ProfilerRequest req); diff --git a/fdbserver/fdbserver.vcxproj b/fdbserver/fdbserver.vcxproj index 3c6924588b..befce949f3 100644 --- a/fdbserver/fdbserver.vcxproj +++ b/fdbserver/fdbserver.vcxproj @@ -158,6 +158,7 @@ + diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index 142586cdd8..5258dad0da 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -911,6 +911,16 @@ ACTOR Future provideVersions(Reference self) { } } +ACTOR Future provideRecoveryInfo( Reference self ) { + loop choose { + when( GetRecoveryInfoRequest req = waitNext(self->myInterface.getRecoveryInfo.getFuture()) ) { + TraceEvent("MasterGetRecoveryInfo", self->dbgid).detail("ReqID", req.reqId); + GetRecoveryInfoReply reply(self->recoveryTransactionVersion, self->configuration); + req.reply.send( reply ); + } + } +} + std::pair findRange( CoalescedKeyRangeMap& key_resolver, Standalone>& movedRanges, int src, int dest ) { auto ranges = key_resolver.ranges(); auto prev = ranges.begin(); @@ -1031,7 +1041,6 @@ static std::set const& normalMasterErrors() { s.insert( error_code_no_more_servers ); s.insert( error_code_master_recovery_failed ); s.insert( error_code_coordinated_state_conflict ); - s.insert( error_code_movekeys_conflict ); s.insert( error_code_master_max_versions_in_flight ); s.insert( error_code_worker_removed ); s.insert( error_code_new_coordinators_timed_out ); @@ -1349,13 +1358,7 @@ ACTOR Future masterCore( Reference self ) { .detail("RecoveryDuration", recoveryDuration) .trackLatest("MasterRecoveryState"); - // Now that the master is recovered we can start auxiliary services that happen to run here - { - PromiseStream< std::pair> > ddStorageServerChanges; - state double lastLimited = 0; - self->addActor.send( reportErrorsExcept( dataDistribution( self->dbInfo, self->myInterface, self->configuration, ddStorageServerChanges, self->logSystem, self->recoveryTransactionVersion, self->primaryDcId, self->remoteDcIds, &lastLimited, remoteRecovered.getFuture() ), "DataDistribution", self->dbgid, &normalMasterErrors() ) ); - self->addActor.send( reportErrors( rateKeeper( self->dbInfo, ddStorageServerChanges, self->myInterface.getRateInfo.getFuture(), self->configuration, &lastLimited ), "Ratekeeper", self->dbgid) ); - } + self->addActor.send( provideRecoveryInfo(self) ); if( self->resolvers.size() > 1 ) self->addActor.send( resolutionBalancing(self) ); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index c2d868e9d3..dedd4398a8 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -31,6 +31,7 @@ #include "fdbserver/IDiskQueue.h" #include "fdbclient/DatabaseContext.h" #include "fdbserver/ClusterRecruitmentInterface.h" +#include "fdbserver/DataDistributorInterface.h" #include "fdbserver/ServerDBInfo.h" #include "fdbserver/CoordinationInterface.h" #include "fdbclient/FailureMonitorClient.h" @@ -703,7 +704,6 @@ ACTOR Future workerServer( Reference connFile, Refe startRole( Role::MASTER, recruited.id(), interf.id() ); DUMPTOKEN( recruited.waitFailure ); - DUMPTOKEN( recruited.getRateInfo ); DUMPTOKEN( recruited.tlogRejoin ); DUMPTOKEN( recruited.changeCoordinators ); DUMPTOKEN( recruited.getCommitVersion ); @@ -713,6 +713,16 @@ ACTOR Future workerServer( Reference connFile, Refe errorForwarders.add( zombie(recruited, forwardError( errors, Role::MASTER, recruited.id(), masterProcess )) ); req.reply.send(recruited); } + when ( InitializeDataDistributorRequest req = waitNext(interf.dataDistributor.getFuture()) ) { + DataDistributorInterface recruited; + recruited.id = req.reqId; + TraceEvent("DataDistributorReceived", req.reqId).detail("Addr", interf.address()).detail("WorkerId", interf.id()); + startRole( Role::DATA_DISTRIBUTOR, req.reqId, interf.id()); + + Future dataDistributorProcess = dataDistributor( recruited, dbInfo ); + errorForwarders.add( forwardError( errors, Role::DATA_DISTRIBUTOR, req.reqId, dataDistributorProcess ) ); + req.reply.send(recruited); + } when( InitializeTLogRequest req = waitNext(interf.tLog.getFuture()) ) { auto& logData = sharedLogs[req.storeType]; logData.second.send(req); @@ -1086,3 +1096,4 @@ const Role Role::RESOLVER("Resolver", "RV"); const Role Role::CLUSTER_CONTROLLER("ClusterController", "CC"); const Role Role::TESTER("Tester", "TS"); const Role Role::LOG_ROUTER("LogRouter", "LR"); +const Role Role::DATA_DISTRIBUTOR("DataDistributor", "DD"); From 1818aab205dd817f11b3108077a91fb70d21f065 Mon Sep 17 00:00:00 2001 From: Evan Tschannen <36455792+etschannen@users.noreply.github.com> Date: Thu, 17 Jan 2019 11:31:52 -0800 Subject: [PATCH 155/226] Apply suggestions from code review Co-Authored-By: jzhou77 --- fdbserver/ClusterController.actor.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index a09f9fa56a..80d64796c5 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1334,7 +1334,7 @@ ACTOR Future workerAvailabilityWatch( WorkerInterface worker, ProcessClass checkOutstandingRequests( cluster ); } } - when( wait( failed ) ) { // remote workers that have failed + when( wait( failed ) ) { // remove workers that have failed WorkerInfo& failedWorkerInfo = cluster->id_worker[ worker.locality.processId() ]; if (!failedWorkerInfo.reply.isSet()) { failedWorkerInfo.reply.send( RegisterWorkerReply(failedWorkerInfo.processClass, failedWorkerInfo.priorityInfo) ); @@ -2220,7 +2220,7 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel ACTOR Future clusterGetDistributorInterface( ClusterControllerData *self, UID reqId, ReplyPromise reqReply ) { TraceEvent("CCGetDistributorInterfaceRequest", reqId); - state Future distributorOnchange = Never(); + state Future distributorOnChange = Never(); while ( !self->dataDistributorInterface.get().isValid() ) { wait( self->dataDistributorInterface.onChange() ); @@ -2238,11 +2238,9 @@ ACTOR Future clusterGetDistributorInterface( ClusterControllerData *self, ACTOR Future startDataDistributor( ClusterControllerData *self ) { state Optional dcId = self->clusterControllerDcId; - while ( !dcId.present() || !self->masterProcessId.present() ) { + while ( !self->clusterControllerProcessId.present() || !self->masterProcessId.present() ) { wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); - dcId = self->clusterControllerDcId; } - ASSERT(dcId.present()); loop { std::map>, int> id_used; From c35d1bf2ef043b44c83db51d16577911bc7bbc67 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 17 Jan 2019 09:47:14 -0800 Subject: [PATCH 156/226] Fix according Alex's comment --- fdbrpc/Locality.cpp | 3 --- fdbserver/CMakeLists.txt | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fdbrpc/Locality.cpp b/fdbrpc/Locality.cpp index 704070fb03..7ce5dbfee6 100644 --- a/fdbrpc/Locality.cpp +++ b/fdbrpc/Locality.cpp @@ -149,7 +149,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons return ProcessClass::WorstFit; } case ProcessClass::DataDistributor: - // TODO: understand all the fitnesses and choose from them. switch( _class ) { case ProcessClass::DataDistributorClass: return ProcessClass::BestFit; @@ -163,8 +162,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons return ProcessClass::OkayFit; case ProcessClass::ProxyClass: return ProcessClass::OkayFit; - case ProcessClass::LogRouterClass: - return ProcessClass::OkayFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; case ProcessClass::TesterClass: diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index ac4c205827..398898d3c1 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -13,6 +13,7 @@ set(FDBSERVER_SRCS DataDistribution.h DataDistributionQueue.actor.cpp DataDistributionTracker.actor.cpp + DataDistributorInterface.h DBCoreState.h DiskQueue.actor.cpp fdbserver.actor.cpp From 04901607142fee1ca021e165edcc6d6df950dd3f Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 17 Jan 2019 11:32:33 -0800 Subject: [PATCH 157/226] Fix according to Evan's comments Use getRateInfo's endpoint as the ID for the DataDistributorInterface. For now, added a "rejoined" flag for ClusterControllerData and Proxy. TODO: move DataDistributorInterface into ServerDBInfo. --- fdbserver/ClusterController.actor.cpp | 55 ++++++++++++------------- fdbserver/ClusterRecruitmentInterface.h | 2 +- fdbserver/DataDistribution.actor.cpp | 41 ++++++++---------- fdbserver/DataDistributorInterface.h | 11 +++-- fdbserver/MasterProxyServer.actor.cpp | 17 ++++---- fdbserver/QuietDatabase.actor.cpp | 6 +-- fdbserver/worker.actor.cpp | 7 ++-- 7 files changed, 66 insertions(+), 73 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 80d64796c5..c95b4657ff 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1019,6 +1019,7 @@ public: Version datacenterVersionDifference; bool versionDifferenceUpdated; AsyncVar dataDistributorInterface; + bool rejoined = false; ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality ) : id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false) @@ -2220,17 +2221,15 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel ACTOR Future clusterGetDistributorInterface( ClusterControllerData *self, UID reqId, ReplyPromise reqReply ) { TraceEvent("CCGetDistributorInterfaceRequest", reqId); - state Future distributorOnChange = Never(); - - while ( !self->dataDistributorInterface.get().isValid() ) { + while ( !self->rejoined ) { wait( self->dataDistributorInterface.onChange() ); - TraceEvent("CCGetDistributorInterfaceID", self->dataDistributorInterface.get().id) - .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); + TraceEvent("CCGetDistributorInterfaceID", self->dataDistributorInterface.get().id()) + .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); } GetDistributorInterfaceReply reply(self->dataDistributorInterface.get()); TraceEvent("CCGetDistributorInterfaceReply", reqId) - .detail("DataDistributorId", reply.distributorInterface.id) + .detail("DataDistributorId", reply.distributorInterface.id()) .detail("Endpoint", reply.distributorInterface.waitFailure.getEndpoint().token); reqReply.send( reply ); return Void(); @@ -2270,16 +2269,17 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo // wait for a while to see if existing data distributor will join. loop choose { when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id); + TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id()); self->dataDistributorInterface.set( req.dataDistributor ); + self->rejoined = true; distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); - req.reply.send(true); + req.reply.send( Void() ); break; } when ( wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ) ) { break; } } - if ( !self->dataDistributorInterface.get().isValid() ) { // No rejoin happened + if ( !self->rejoined ) { newDistributor = startDataDistributor( self ); } @@ -2287,36 +2287,35 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo loop choose { when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { TraceEvent ev("ClusterController", self->id); - const UID myDdId = self->dataDistributorInterface.get().id; - if ( myDdId == UID() ) { - ev.detail("NewDataDistributorID", distributorInterf.id); - self->dataDistributorInterface.set( distributorInterf ); - distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); - } else { - ev.detail("MyDataDistributorID", myDdId).detail("DiscardDataDistributorID", distributorInterf.id); - } + const UID myDdId = self->dataDistributorInterface.get().id(); + ev.detail("NewDataDistributorID", distributorInterf.id()); + self->dataDistributorInterface.set( distributorInterf ); + self->rejoined = true; + distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); newDistributor = Never(); } when ( wait( distributorFailed ) ) { distributorFailed = Never(); - TraceEvent("ClusterController", self->id).detail("DataDistributorFailed", self->dataDistributorInterface.get().id) - .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); - self->dataDistributorInterface.set( DataDistributorInterface() ); // clear the ID + TraceEvent("ClusterController", self->id) + .detail("DataDistributorFailed", self->dataDistributorInterface.get().id()) + .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); + self->rejoined = false; newDistributor = startDataDistributor( self ); } when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - if ( !self->dataDistributorInterface.get().isValid() ) { + if ( !self->rejoined ) { self->dataDistributorInterface.set( req.dataDistributor ); distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); - TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id); + self->rejoined = true; + TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id()); } else { - const UID myDdId = self->dataDistributorInterface.get().id; - const bool success = myDdId == req.dataDistributor.id; - req.reply.send(success); + const UID myDdId = self->dataDistributorInterface.get().id(); + const bool success = myDdId == req.dataDistributor.id(); + req.reply.send( Void() ); TraceEvent("ClusterController", self->id) - .detail("DataDistributorRejoin", success ? "OK" : "Failed") - .detail("OldDataDistributorID", myDdId) - .detail("ReqID", req.dataDistributor.id); + .detail("DataDistributorRejoin", success ? "OK" : "Failed") + .detail("OldDataDistributorID", myDdId) + .detail("ReqID", req.dataDistributor.id()); } } } diff --git a/fdbserver/ClusterRecruitmentInterface.h b/fdbserver/ClusterRecruitmentInterface.h index 2c152b6dc0..670a5930e3 100644 --- a/fdbserver/ClusterRecruitmentInterface.h +++ b/fdbserver/ClusterRecruitmentInterface.h @@ -260,7 +260,7 @@ struct GetDistributorInterfaceRequest { struct DataDistributorRejoinRequest { DataDistributorInterface dataDistributor; - ReplyPromise reply; + ReplyPromise reply; DataDistributorRejoinRequest() { } explicit DataDistributorRejoinRequest(DataDistributorInterface di) : dataDistributor(di) {} diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 4da46241e2..6c31dae243 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3294,6 +3294,7 @@ ACTOR Future configurationMonitor( Reference self ) { loop { try { tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr.setOption( FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE ); Standalone results = wait( tr.getRange( configKeys, CLIENT_KNOBS->TOO_MANY ) ); ASSERT( !results.more && results.size() < CLIENT_KNOBS->TOO_MANY ); @@ -3343,22 +3344,22 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference> addActor; state Reference> configuration( new AsyncVar(DatabaseConfiguration()) ); - state Reference self( new DataDistributorData(db, configuration, di.id, addActor) ); + state Reference self( new DataDistributorData(db, configuration, di.id(), addActor) ); state Future collection = actorCollection( self->addActor.getFuture() ); state Future trigger = self->configurationTrigger.onTrigger(); state Version recoveryTransactionVersion = invalidVersion; - TraceEvent("NewDataDistributorID", di.id); + TraceEvent("NewDataDistributorID", di.id()); self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) ); self->addActor.send( configurationMonitor( self ) ); loop choose { // Get configuration from the master. Can't use configurationMonitor for it // because the transaction read needs ratekeeper, which is not started yet. - when ( GetRecoveryInfoReply infoReply = wait( brokenPromiseToNever(self->dbInfo->get().master.getRecoveryInfo.getReply(GetRecoveryInfoRequest(di.id)) )) ) { + when ( GetRecoveryInfoReply infoReply = wait( brokenPromiseToNever(self->dbInfo->get().master.getRecoveryInfo.getReply(GetRecoveryInfoRequest(di.id())) )) ) { configuration->set( infoReply.configuration ); recoveryTransactionVersion = infoReply.recoveryTransactionVersion; - TraceEvent("DataDistributor", di.id) + TraceEvent("DataDistributor", di.id()) .detail("RecoveryVersion", infoReply.recoveryTransactionVersion) .detail("Configuration", configuration->get().toString()); // TODO: is remoteRecovered.getFuture() as Void() in dataDistribution() correct? @@ -3368,7 +3369,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference& regions = self->configuration->get().regions; - TraceEvent ev("DataDistributor", di.id); + TraceEvent ev("DataDistributor", di.id()); if ( regions.size() > 0 ) { self->primaryDcId.push_back( regions[0].dcId ); ev.detail("PrimaryDcID", regions[0].dcId.toHexString()); @@ -3381,16 +3382,16 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference> > ddStorageServerChanges; state double lastLimited = 0; - TraceEvent("DataDistributor", di.id).detail("StartDD", "RK"); - self->addActor.send( reportErrorsExcept( dataDistribution( self->dbInfo, di.id, self->configuration->get(), ddStorageServerChanges, recoveryTransactionVersion, self->primaryDcId, self->remoteDcIds, &lastLimited, Void() ), "DataDistribution", di.id, &normalDataDistributorErrors() ) ); - self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id, &normalRateKeeperErrors() ) ); + TraceEvent("DataDistributor", di.id()).detail("StartDD", "RK"); + self->addActor.send( reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, recoveryTransactionVersion, self->primaryDcId, self->remoteDcIds, &lastLimited, Void() ), "DataDistribution", di.id(), &normalDataDistributorErrors() ) ); + self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id(), &normalRateKeeperErrors() ) ); - state Future reply; + state Future reply; loop { if ( self->dbInfo->get().clusterInterface.id() != lastClusterControllerID ) { // Rejoin the new cluster controller DataDistributorRejoinRequest req(di); - TraceEvent("DataDistributorRejoining", di.id) + TraceEvent("DataDistributorRejoining", di.id()) .detail("OldClusterControllerID", lastClusterControllerID) .detail("ClusterControllerID", self->dbInfo->get().clusterInterface.id()); reply = self->dbInfo->get().clusterInterface.dataDistributorRejoin.getReply(req); @@ -3398,15 +3399,10 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferencedbInfo->get().clusterInterface.id(); - TraceEvent("DataDistributorRejoined", di.id) - .detail("ClusterControllerID", lastClusterControllerID); - } else { - TraceEvent("DataDistributorRejoinFailed", di.id); // Probably distributor exists. - break; - } + when (wait(brokenPromiseToNever(reply))) { + lastClusterControllerID = self->dbInfo->get().clusterInterface.id(); + TraceEvent("DataDistributorRejoined", di.id()) + .detail("ClusterControllerID", lastClusterControllerID); } when (wait(self->dbInfo->onChange())) {} when (wait(trigger)) { break; } // TODO: maybe break here? Since configuration changed. @@ -3419,15 +3415,12 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceaddActor.isEmpty() ) { - self->addActor.getFuture().pop(); - } return Void(); } diff --git a/fdbserver/DataDistributorInterface.h b/fdbserver/DataDistributorInterface.h index df11ceef9e..7135c6d066 100644 --- a/fdbserver/DataDistributorInterface.h +++ b/fdbserver/DataDistributorInterface.h @@ -3,7 +3,7 @@ * * This source file is part of the FoundationDB open source project * - * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,24 +24,23 @@ #include "fdbrpc/fdbrpc.h" struct DataDistributorInterface { - UID id; RequestStream> waitFailure; - RequestStream< struct GetRateInfoRequest > getRateInfo; + RequestStream getRateInfo; DataDistributorInterface() {} + UID id() const { return getRateInfo.getEndpoint().token; } NetworkAddress address() const { return getRateInfo.getEndpoint().address; } bool operator== (const DataDistributorInterface& r) const { - return id == r.id; + return id() == r.id(); } bool operator!= (const DataDistributorInterface& r) const { return !(*this == r); } - bool isValid() const { return id != UID(); } template void serialize(Archive& ar) { - serializer(ar, id, waitFailure, getRateInfo); + serializer(ar, waitFailure, getRateInfo); } }; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 8330f4197d..bf724bb0c6 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -87,7 +87,7 @@ Future forwardValue(Promise out, Future in) int getBytes(Promise const& r) { return 0; } -ACTOR Future monitorDataDistributor(UID myID, Reference> db, Reference> dataDistributor) { +ACTOR Future monitorDataDistributor(UID myID, Reference> db, Reference> dataDistributor, bool *rejoined) { state Future distributorFailure = Never(); state Future reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); @@ -95,8 +95,9 @@ ACTOR Future monitorDataDistributor(UID myID, Referenceset( r.distributorInterface ); + *rejoined = true; distributorFailure = waitFailureClient( dataDistributor->get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); - TraceEvent("Proxy", myID).detail("DataDistributorChangedID", dataDistributor->get().id) + TraceEvent("Proxy", myID).detail("DataDistributorChangedID", dataDistributor->get().id()) .detail("Endpoint", dataDistributor->get().waitFailure.getEndpoint().token); } when ( wait( db->onChange() ) ) { @@ -105,9 +106,10 @@ ACTOR Future monitorDataDistributor(UID myID, Referenceget().clusterInterface.id()) - .detail("DataDistributorFailed", dataDistributor->get().id) + .detail("DataDistributorFailed", dataDistributor->get().id()) .detail("Token", dataDistributor->get().waitFailure.getEndpoint().token); wait( delay(0.001) ); reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); @@ -115,7 +117,7 @@ ACTOR Future monitorDataDistributor(UID myID, Reference getRate(UID myID, Reference> db, int64_t* inTransactionCount, double* outTransactionRate, Reference> dataDistributor) { +ACTOR Future getRate(UID myID, Reference> db, int64_t* inTransactionCount, double* outTransactionRate, Reference> dataDistributor, bool *rejoined) { state Future nextRequestTimer = Never(); state Future leaseTimeout = Never(); state Future reply = Never(); @@ -123,7 +125,7 @@ ACTOR Future getRate(UID myID, Reference> db, int64 loop choose { when ( wait( dataDistributor->onChange() ) ) { - if ( dataDistributor->get().isValid() ) { + if ( *rejoined ) { nextRequestTimer = Void(); // trigger GetRate request } else { nextRequestTimer = Never(); @@ -1140,8 +1142,9 @@ ACTOR static Future transactionStarter( otherProxies.push_back(mp); } state Reference> dataDistributor( new AsyncVar(DataDistributorInterface()) ); - addActor.send( getRate(proxy.id(), db, &transactionCount, &transactionRate, dataDistributor) ); // do this after correct CC is obtained. - addActor.send( monitorDataDistributor(proxy.id(), db, dataDistributor) ); + bool rejoined = false; + addActor.send( getRate(proxy.id(), db, &transactionCount, &transactionRate, dataDistributor, &rejoined) ); // do this after correct CC is obtained. + addActor.send( monitorDataDistributor(proxy.id(), db, dataDistributor, &rejoined) ); ASSERT(db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS); // else potentially we could return uncommitted read versions (since self->committedVersion is only a committed version if this recovery succeeds) diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index 7e587abb3f..bc8000c7df 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -85,14 +85,14 @@ ACTOR Future getDataDistributorWorker( Database cx, Reference workerServer( Reference connFile, Refe } when ( InitializeDataDistributorRequest req = waitNext(interf.dataDistributor.getFuture()) ) { DataDistributorInterface recruited; - recruited.id = req.reqId; - TraceEvent("DataDistributorReceived", req.reqId).detail("Addr", interf.address()).detail("WorkerId", interf.id()); - startRole( Role::DATA_DISTRIBUTOR, req.reqId, interf.id()); + TraceEvent("DataDistributorReceived", req.reqId).detail("Addr", interf.address()).detail("DataDistributorId", recruited.id()); + startRole( Role::DATA_DISTRIBUTOR, recruited.id(), interf.id() ); Future dataDistributorProcess = dataDistributor( recruited, dbInfo ); - errorForwarders.add( forwardError( errors, Role::DATA_DISTRIBUTOR, req.reqId, dataDistributorProcess ) ); + errorForwarders.add( forwardError( errors, Role::DATA_DISTRIBUTOR, recruited.id(), dataDistributorProcess ) ); req.reply.send(recruited); } when( InitializeTLogRequest req = waitNext(interf.tLog.getFuture()) ) { From ef868f599cec63dc261baff40c3cf3597ec7818f Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Fri, 18 Jan 2019 11:30:18 -0800 Subject: [PATCH 158/226] Add DataDistributorInterface to ServerDBInfo Also change the Proxy and QuietDatabase to use the DataDistributorInterface. --- fdbserver/ClusterController.actor.cpp | 67 +++++++++++++++------------ fdbserver/DataDistribution.actor.cpp | 2 +- fdbserver/DataDistributorInterface.h | 7 ++- fdbserver/MasterProxyServer.actor.cpp | 47 ++++--------------- fdbserver/MoveKeys.actor.cpp | 2 +- fdbserver/QuietDatabase.actor.cpp | 35 +++++--------- fdbserver/ServerDBInfo.h | 4 +- fdbserver/worker.actor.cpp | 8 ++-- 8 files changed, 74 insertions(+), 98 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index c95b4657ff..5bd671d583 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -107,7 +107,13 @@ public: serverInfo( new AsyncVar( ServerDBInfo() ) ), db( DatabaseContext::create( clientInfo, Future(), LocalityData(), true, TaskDefaultEndpoint, true ) ) // SOMEDAY: Locality! { + } + void setDistributor(DataDistributorInterface distributorInterf) { + ServerDBInfo newInfo = serverInfo->get(); + newInfo.id = g_random->randomUniqueID(); + newInfo.distributor = distributorInterf; + serverInfo->set( newInfo ); } }; @@ -1018,6 +1024,7 @@ public: Optional remoteStartTime; Version datacenterVersionDifference; bool versionDifferenceUpdated; + PromiseStream> addActor; AsyncVar dataDistributorInterface; bool rejoined = false; @@ -1103,6 +1110,7 @@ ACTOR Future clusterWatchDatabase( ClusterControllerData* cluster, Cluster dbInfo.masterLifetime = db->serverInfo->get().masterLifetime; ++dbInfo.masterLifetime; dbInfo.clusterInterface = db->serverInfo->get().clusterInterface; + dbInfo.distributor = db->serverInfo->get().distributor; TraceEvent("CCWDB", cluster->id).detail("Lifetime", dbInfo.masterLifetime.toString()).detail("ChangeID", dbInfo.id); db->serverInfo->set( dbInfo ); @@ -2241,7 +2249,7 @@ ACTOR Future startDataDistributor( ClusterControllerDa wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); } - loop { + while (true) { std::map>, int> id_used; id_used[self->clusterControllerProcessId]++; id_used[self->masterProcessId]++; @@ -2250,19 +2258,18 @@ ACTOR Future startDataDistributor( ClusterControllerDa req.reqId = g_random->randomUniqueID(); TraceEvent("DataDistributor", req.reqId).detail("Recruit", data_distributor.worker.first.address()); - choose { - when ( DataDistributorInterface dataDistributor = wait( data_distributor.worker.first.dataDistributor.getReply(req) ) ) { - TraceEvent("DataDistributor", req.reqId).detail("Recruited", data_distributor.worker.first.address()); - return dataDistributor; - } - when ( wait ( delay(SERVER_KNOBS->WORKER_FAILURE_TIME) ) ) {} + ErrorOr distributor = wait( data_distributor.worker.first.dataDistributor.getReplyUnlessFailedFor(req, 1, 0) ); + if (distributor.present()) { + TraceEvent("DataDistributor", req.reqId).detail("Recruited", data_distributor.worker.first.address()); + return distributor.get(); } + TraceEvent("DataDistributor", req.reqId) + .detail("RecruitFailed", data_distributor.worker.first.address()) + .error(distributor.getError()); } } ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterControllerFullInterface *clusterInterface ) { - state PromiseStream> addActor; - state Future collection = actorCollection( addActor.getFuture() ); state Future newDistributor = Never(); state Future distributorFailed = Never(); @@ -2271,6 +2278,7 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id()); self->dataDistributorInterface.set( req.dataDistributor ); + self->db.setDistributor( req.dataDistributor ); self->rejoined = true; distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); req.reply.send( Void() ); @@ -2288,9 +2296,10 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { TraceEvent ev("ClusterController", self->id); const UID myDdId = self->dataDistributorInterface.get().id(); - ev.detail("NewDataDistributorID", distributorInterf.id()); + ev.detail("NewDataDistributorID", distributorInterf.id()).detail("Valid", distributorInterf.isValid()); self->dataDistributorInterface.set( distributorInterf ); self->rejoined = true; + self->db.setDistributor( distributorInterf ); distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); newDistributor = Never(); } @@ -2300,6 +2309,7 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo .detail("DataDistributorFailed", self->dataDistributorInterface.get().id()) .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); self->rejoined = false; + // self->db.setDistributor( DataDistributorInterface() ); newDistributor = startDataDistributor( self ); } when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { @@ -2307,6 +2317,7 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo self->dataDistributorInterface.set( req.dataDistributor ); distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); self->rejoined = true; + self->db.setDistributor( req.dataDistributor ); TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id()); } else { const UID myDdId = self->dataDistributorInterface.get().id(); @@ -2325,21 +2336,19 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, state ClusterControllerData self( interf, locality ); state Future coordinationPingDelay = delay( SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY ); state uint64_t step = 0; - state PromiseStream> addActor; - state Future> error = errorOr( actorCollection( addActor.getFuture() ) ); + state Future> error = errorOr( actorCollection( self.addActor.getFuture() ) ); - addActor.send( failureDetectionServer( self.id, &self.db, interf.clientInterface.failureMonitoring.getFuture() ) ); - addActor.send( clusterWatchDatabase( &self, &self.db ) ); // Start the master database - addActor.send( self.updateWorkerList.init( self.db.db ) ); - addActor.send( statusServer( interf.clientInterface.databaseStatus.getFuture(), &self, coordinators)); - addActor.send( timeKeeper(&self) ); - addActor.send( monitorProcessClasses(&self) ); - addActor.send( monitorServerInfoConfig(&self.db) ); - addActor.send( monitorClientTxnInfoConfigs(&self.db) ); - addActor.send( updatedChangingDatacenters(&self) ); - addActor.send( updatedChangedDatacenters(&self) ); - addActor.send( updateDatacenterVersionDifference(&self) ); - addActor.send( waitDDRejoinOrStartDD(&self, &interf) ); + self.addActor.send( failureDetectionServer( self.id, &self.db, interf.clientInterface.failureMonitoring.getFuture() ) ); + self.addActor.send( clusterWatchDatabase( &self, &self.db ) ); // Start the master database + self.addActor.send( self.updateWorkerList.init( self.db.db ) ); + self.addActor.send( statusServer( interf.clientInterface.databaseStatus.getFuture(), &self, coordinators)); + self.addActor.send( timeKeeper(&self) ); + self.addActor.send( monitorProcessClasses(&self) ); + self.addActor.send( monitorClientTxnInfoConfigs(&self.db) ); + self.addActor.send( updatedChangingDatacenters(&self) ); + self.addActor.send( updatedChangedDatacenters(&self) ); + self.addActor.send( updateDatacenterVersionDifference(&self) ); + self.addActor.send( waitDDRejoinOrStartDD(&self, &interf) ); //printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str()); loop choose { @@ -2355,13 +2364,13 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, return Void(); } when( OpenDatabaseRequest req = waitNext( interf.clientInterface.openDatabase.getFuture() ) ) { - addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.traceLogGroup, req.reply ) ); + self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.traceLogGroup, req.reply ) ); } when( RecruitFromConfigurationRequest req = waitNext( interf.recruitFromConfiguration.getFuture() ) ) { - addActor.send( clusterRecruitFromConfiguration( &self, req ) ); + self.addActor.send( clusterRecruitFromConfiguration( &self, req ) ); } when( RecruitRemoteFromConfigurationRequest req = waitNext( interf.recruitRemoteFromConfiguration.getFuture() ) ) { - addActor.send( clusterRecruitRemoteFromConfiguration( &self, req ) ); + self.addActor.send( clusterRecruitRemoteFromConfiguration( &self, req ) ); } when( RecruitStorageRequest req = waitNext( interf.recruitStorage.getFuture() ) ) { clusterRecruitStorage( &self, req ); @@ -2416,7 +2425,7 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, clusterRegisterMaster( &self, req ); } when( GetServerDBInfoRequest req = waitNext( interf.getServerDBInfo.getFuture() ) ) { - addActor.send( clusterGetServerInfo( &self.db, req.knownServerInfoID, req.issues.toString(), req.incompatiblePeers, req.reply ) ); + self.addActor.send( clusterGetServerInfo( &self.db, req.knownServerInfoID, req.issues.toString(), req.incompatiblePeers, req.reply ) ); } when( wait( leaderFail ) ) { // We are no longer the leader if this has changed. @@ -2428,7 +2437,7 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, ping.send( Void() ); } when ( GetDistributorInterfaceRequest req = waitNext( interf.getDistributorInterface.getFuture() ) ) { - addActor.send( clusterGetDistributorInterface( &self, req.reqId, req.reply ) ); + self.addActor.send( clusterGetDistributorInterface( &self, req.reqId, req.reply ) ); } } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 6c31dae243..ffdb35838b 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3349,7 +3349,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference trigger = self->configurationTrigger.onTrigger(); state Version recoveryTransactionVersion = invalidVersion; - TraceEvent("NewDataDistributorID", di.id()); + TraceEvent("NewDataDistributorID", di.id()).detail("Valid", di.isValid()); self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) ); self->addActor.send( configurationMonitor( self ) ); diff --git a/fdbserver/DataDistributorInterface.h b/fdbserver/DataDistributorInterface.h index 7135c6d066..b210f256cb 100644 --- a/fdbserver/DataDistributorInterface.h +++ b/fdbserver/DataDistributorInterface.h @@ -26,9 +26,12 @@ struct DataDistributorInterface { RequestStream> waitFailure; RequestStream getRateInfo; + bool valid; - DataDistributorInterface() {} + DataDistributorInterface() : valid(false) {} + explicit DataDistributorInterface(bool v) : valid(v) {} + bool isValid() const { return valid; } UID id() const { return getRateInfo.getEndpoint().token; } NetworkAddress address() const { return getRateInfo.getEndpoint().address; } bool operator== (const DataDistributorInterface& r) const { @@ -40,7 +43,7 @@ struct DataDistributorInterface { template void serialize(Archive& ar) { - serializer(ar, waitFailure, getRateInfo); + serializer(ar, waitFailure, getRateInfo, valid); } }; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index bf724bb0c6..2b35a1085d 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -87,53 +87,27 @@ Future forwardValue(Promise out, Future in) int getBytes(Promise const& r) { return 0; } -ACTOR Future monitorDataDistributor(UID myID, Reference> db, Reference> dataDistributor, bool *rejoined) { - state Future distributorFailure = Never(); - state Future reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); - - loop choose { - when ( GetDistributorInterfaceReply r = wait( reply ) ) { - reply = Never(); - dataDistributor->set( r.distributorInterface ); - *rejoined = true; - distributorFailure = waitFailureClient( dataDistributor->get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); - TraceEvent("Proxy", myID).detail("DataDistributorChangedID", dataDistributor->get().id()) - .detail("Endpoint", dataDistributor->get().waitFailure.getEndpoint().token); - } - when ( wait( db->onChange() ) ) { - distributorFailure = Never(); - reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); - } - when ( wait( distributorFailure ) ) { - distributorFailure = Never(); - *rejoined = false; - TraceEvent("Proxy", myID) - .detail("CC", db->get().clusterInterface.id()) - .detail("DataDistributorFailed", dataDistributor->get().id()) - .detail("Token", dataDistributor->get().waitFailure.getEndpoint().token); - wait( delay(0.001) ); - reply = brokenPromiseToNever( db->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest(myID) ) ); - } - } -} - -ACTOR Future getRate(UID myID, Reference> db, int64_t* inTransactionCount, double* outTransactionRate, Reference> dataDistributor, bool *rejoined) { +ACTOR Future getRate(UID myID, Reference> db, int64_t* inTransactionCount, double* outTransactionRate) { state Future nextRequestTimer = Never(); state Future leaseTimeout = Never(); state Future reply = Never(); state int64_t lastTC = 0; loop choose { - when ( wait( dataDistributor->onChange() ) ) { - if ( *rejoined ) { + when ( wait( db->onChange() ) ) { + if ( db->get().distributor.isValid() ) { + TraceEvent("Proxy", myID) + .detail("DataDistributorChangedID", db->get().distributor.id()); nextRequestTimer = Void(); // trigger GetRate request } else { + TraceEvent("Proxy", myID) + .detail("DataDistributorDied", db->get().distributor.id()); nextRequestTimer = Never(); } } when ( wait( nextRequestTimer ) ) { nextRequestTimer = Never(); - reply = brokenPromiseToNever(dataDistributor->get().getRateInfo.getReply(GetRateInfoRequest(myID, *inTransactionCount))); + reply = brokenPromiseToNever(db->get().distributor.getRateInfo.getReply(GetRateInfoRequest(myID, *inTransactionCount))); } when ( GetRateInfoReply rep = wait(reply) ) { reply = Never(); @@ -1132,6 +1106,7 @@ ACTOR static Future transactionStarter( state vector otherProxies; state PromiseStream replyTimes; + addActor.send( getRate(proxy.id(), db, &transactionCount, &transactionRate) ); addActor.send(queueTransactionStartRequests(&transactionQueue, proxy.getConsistentReadVersion.getFuture(), GRVTimer, &lastGRVTime, &GRVBatchTime, replyTimes.getFuture(), &commitData->stats)); // Get a list of the other proxies that go together with us @@ -1141,10 +1116,6 @@ ACTOR static Future transactionStarter( if (mp != proxy) otherProxies.push_back(mp); } - state Reference> dataDistributor( new AsyncVar(DataDistributorInterface()) ); - bool rejoined = false; - addActor.send( getRate(proxy.id(), db, &transactionCount, &transactionRate, dataDistributor, &rejoined) ); // do this after correct CC is obtained. - addActor.send( monitorDataDistributor(proxy.id(), db, dataDistributor, &rejoined) ); ASSERT(db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS); // else potentially we could return uncommitted read versions (since self->committedVersion is only a committed version if this recovery succeeds) diff --git a/fdbserver/MoveKeys.actor.cpp b/fdbserver/MoveKeys.actor.cpp index eb590d89e1..f8b6529690 100644 --- a/fdbserver/MoveKeys.actor.cpp +++ b/fdbserver/MoveKeys.actor.cpp @@ -391,7 +391,7 @@ ACTOR Future waitForShardReady( StorageServerInterface server, KeyRange ke loop { try { std::pair rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskMoveKeys ) ); - if (rep.first >= minVersion && (recoveryVersion == invalidVersion || rep.second >= recoveryVersion)) { + if (rep.first >= minVersion) { return Void(); } wait( delayJittered( SERVER_KNOBS->SHARD_READY_DELAY, TaskMoveKeys ) ); diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index bc8000c7df..43a288fd23 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -68,34 +68,23 @@ ACTOR Future getMasterWorker( Database cx, Reference getDataDistributorWorker( Database cx, Reference> dbInfo ) { - state Future>> newWorkers = getWorkers( dbInfo ); - state vector> workers; TraceEvent("GetDataDistributorWorker").detail("Stage", "GettingWorkers"); - loop choose { - when ( wait( dbInfo->onChange() ) ) { - newWorkers = getWorkers( dbInfo ); - } - when ( vector> w = wait( newWorkers ) ) { - workers = w; - newWorkers = Never(); - } - when ( GetDistributorInterfaceReply reply = wait( brokenPromiseToNever( dbInfo->get().clusterInterface.getDistributorInterface.getReply( GetDistributorInterfaceRequest() ) ) ) ) { - const DataDistributorInterface& ddInterf = reply.distributorInterface; + loop { + state vector> workers = wait( getWorkers( dbInfo ) ); - for( int i = 0; i < workers.size(); i++ ) { - if( workers[i].first.address() == ddInterf.address() ) { - TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers").detail("DataDistributorId", ddInterf.id()).detail("WorkerId", workers[i].first.id()); - return std::make_pair(workers[i].first, ddInterf.id()); - } + for( int i = 0; i < workers.size(); i++ ) { + if( workers[i].first.address() == dbInfo->get().distributor.address() ) { + TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers").detail("DataDistributorId", dbInfo->get().distributor.id()).detail("WorkerId", workers[i].first.id()); + return std::make_pair(workers[i].first, dbInfo->get().distributor.id()); } - - TraceEvent(SevWarn, "GetDataDistributorWorker") - .detail("Error", "DataDistributorWorkerNotFound") - .detail("DataDistributorId", ddInterf.id()) - .detail("DataDistributorAddress", ddInterf.address()) - .detail("WorkerCount", workers.size()); } + + TraceEvent(SevWarn, "GetDataDistributorWorker") + .detail("Error", "DataDistributorWorkerNotFound") + .detail("DataDistributorId", dbInfo->get().distributor.id()) + .detail("DataDistributorAddress", dbInfo->get().distributor.address()) + .detail("WorkerCount", workers.size()); } } diff --git a/fdbserver/ServerDBInfo.h b/fdbserver/ServerDBInfo.h index 0b51f4bd83..8d315ad5b0 100644 --- a/fdbserver/ServerDBInfo.h +++ b/fdbserver/ServerDBInfo.h @@ -23,6 +23,7 @@ #pragma once #include "fdbserver/ClusterRecruitmentInterface.h" +#include "fdbserver/DataDistributorInterface.h" #include "fdbserver/MasterInterface.h" #include "fdbserver/LogSystemConfig.h" #include "fdbserver/RecoveryState.h" @@ -36,6 +37,7 @@ struct ServerDBInfo { UID id; // Changes each time any other member changes ClusterControllerFullInterface clusterInterface; ClientDBInfo client; // After a successful recovery, eventually proxies that communicate with it + DataDistributorInterface distributor; // The best guess of current data distributor, which might be unknown. MasterInterface master; // The best guess as to the most recent master, which might still be recovering vector resolvers; DBRecoveryCount recoveryCount; // A recovery count from DBCoreState. A successful master recovery increments it twice; unsuccessful recoveries may increment it once. Depending on where the current master is in its recovery process, this might not have been written by the current master. @@ -53,7 +55,7 @@ struct ServerDBInfo { template void serialize( Ar& ar ) { - serializer(ar, id, clusterInterface, client, master, resolvers, recoveryCount, masterLifetime, logSystemConfig, priorCommittedLogServers, recoveryState, latencyBandConfig); + serializer(ar, id, clusterInterface, client, distributor, master, resolvers, recoveryCount, recoveryState, masterLifetime, logSystemConfig, priorCommittedLogServers, latencyBandConfig); } }; diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 06da4ffe73..577a7f93d0 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -505,7 +505,8 @@ ACTOR Future monitorServerDBInfo( Referenceget().present() ? brokenPromiseToNever( ccInterface->get().get().getServerDBInfo.getReply( req ) ) : Never() ) ) { - TraceEvent("GotServerDBInfoChange").detail("ChangeID", ni.id).detail("MasterID", ni.master.id()); + TraceEvent("GotServerDBInfoChange").detail("ChangeID", ni.id).detail("MasterID", ni.master.id()) + .detail("DataDistributorID", ni.distributor.id()); ServerDBInfo localInfo = ni; localInfo.myLocality = locality; dbInfo->set(localInfo); @@ -714,8 +715,9 @@ ACTOR Future workerServer( Reference connFile, Refe req.reply.send(recruited); } when ( InitializeDataDistributorRequest req = waitNext(interf.dataDistributor.getFuture()) ) { - DataDistributorInterface recruited; - TraceEvent("DataDistributorReceived", req.reqId).detail("Addr", interf.address()).detail("DataDistributorId", recruited.id()); + DataDistributorInterface recruited(true); + TraceEvent("DataDistributorReceived", req.reqId).detail("Addr", interf.address()) + .detail("DataDistributorId", recruited.id()); startRole( Role::DATA_DISTRIBUTOR, recruited.id(), interf.id() ); Future dataDistributorProcess = dataDistributor( recruited, dbInfo ); From 3f7bbc68aad031d8c14c1e94796d20c0d3d7963d Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Fri, 18 Jan 2019 14:36:11 -0800 Subject: [PATCH 159/226] Remove getDistributorInterface from cluster controller --- fdbserver/ClusterController.actor.cpp | 19 ----------------- fdbserver/ClusterRecruitmentInterface.h | 28 +------------------------ 2 files changed, 1 insertion(+), 46 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 5bd671d583..fadeee3810 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2227,22 +2227,6 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel } } -ACTOR Future clusterGetDistributorInterface( ClusterControllerData *self, UID reqId, ReplyPromise reqReply ) { - TraceEvent("CCGetDistributorInterfaceRequest", reqId); - while ( !self->rejoined ) { - wait( self->dataDistributorInterface.onChange() ); - TraceEvent("CCGetDistributorInterfaceID", self->dataDistributorInterface.get().id()) - .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); - } - - GetDistributorInterfaceReply reply(self->dataDistributorInterface.get()); - TraceEvent("CCGetDistributorInterfaceReply", reqId) - .detail("DataDistributorId", reply.distributorInterface.id()) - .detail("Endpoint", reply.distributorInterface.waitFailure.getEndpoint().token); - reqReply.send( reply ); - return Void(); -} - ACTOR Future startDataDistributor( ClusterControllerData *self ) { state Optional dcId = self->clusterControllerDcId; while ( !self->clusterControllerProcessId.present() || !self->masterProcessId.present() ) { @@ -2436,9 +2420,6 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, when( ReplyPromise ping = waitNext( interf.clientInterface.ping.getFuture() ) ) { ping.send( Void() ); } - when ( GetDistributorInterfaceRequest req = waitNext( interf.getDistributorInterface.getFuture() ) ) { - self.addActor.send( clusterGetDistributorInterface( &self, req.reqId, req.reply ) ); - } } } diff --git a/fdbserver/ClusterRecruitmentInterface.h b/fdbserver/ClusterRecruitmentInterface.h index 670a5930e3..8e021be991 100644 --- a/fdbserver/ClusterRecruitmentInterface.h +++ b/fdbserver/ClusterRecruitmentInterface.h @@ -44,7 +44,6 @@ struct ClusterControllerFullInterface { RequestStream< struct RegisterMasterRequest > registerMaster; RequestStream< struct GetServerDBInfoRequest > getServerDBInfo; RequestStream< struct DataDistributorRejoinRequest > dataDistributorRejoin; // sent by dataDistributor (may or may not rebooted) to communicate with a new CC - RequestStream< struct GetDistributorInterfaceRequest > getDistributorInterface; // sent by proxies & QuietDatabase.actor.cpp UID id() const { return clientInterface.id(); } bool operator == (ClusterControllerFullInterface const& r) const { return id() == r.id(); } @@ -60,13 +59,12 @@ struct ClusterControllerFullInterface { registerMaster.getEndpoint( TaskClusterController ); getServerDBInfo.getEndpoint( TaskClusterController ); dataDistributorRejoin.getEndpoint( TaskClusterController ); - getDistributorInterface.getEndpoint( TaskClusterController ); } template void serialize( Ar& ar ) { ASSERT( ar.protocolVersion() >= 0x0FDB00A200040001LL ); - serializer(ar, clientInterface, recruitFromConfiguration, recruitRemoteFromConfiguration, recruitStorage, registerWorker, getWorkers, registerMaster, getServerDBInfo, dataDistributorRejoin, getDistributorInterface); + serializer(ar, clientInterface, recruitFromConfiguration, recruitRemoteFromConfiguration, recruitStorage, registerWorker, getWorkers, registerMaster, getServerDBInfo, dataDistributorRejoin); } }; @@ -234,30 +232,6 @@ struct GetServerDBInfoRequest { } }; -struct GetDistributorInterfaceReply { - DataDistributorInterface distributorInterface; - - GetDistributorInterfaceReply() {} - explicit GetDistributorInterfaceReply(DataDistributorInterface di): distributorInterface(di) {} - template - void serialize(Ar& ar) { - serializer(ar, distributorInterface); - } -}; - -struct GetDistributorInterfaceRequest { - UID reqId; - ReplyPromise< struct GetDistributorInterfaceReply > reply; - - GetDistributorInterfaceRequest() {} - explicit GetDistributorInterfaceRequest(UID id) : reqId(id) {} - - template - void serialize(Ar& ar) { - serializer(ar, reqId, reply); - } -}; - struct DataDistributorRejoinRequest { DataDistributorInterface dataDistributor; ReplyPromise reply; From 7a205b173200f78cadf4b1d5ceb3c5be027c16c4 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Fri, 18 Jan 2019 15:09:51 -0800 Subject: [PATCH 160/226] Move remoteRecovered to dataDistributionTeamCollection() Let the remote DC to wait until fully recovered before team collection starts. --- fdbserver/DataDistribution.actor.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index ffdb35838b..a835db3ac9 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -2938,6 +2938,9 @@ ACTOR Future dataDistributionTeamCollection( TraceEvent("DDTeamCollectionBegin", self->distributorId).detail("Primary", self->primary); wait( self->readyToStart || error ); + while(!self->primary && db->get().recoveryState < RecoveryState::FULLY_RECOVERED) { + wait( db->onChange() ); + } TraceEvent("DDTeamCollectionReadyToStart", self->distributorId).detail("Primary", self->primary); if(self->badTeamRemover.isReady()) { @@ -3105,8 +3108,7 @@ ACTOR Future dataDistribution( Version recoveryCommitVersion, std::vector> primaryDcId, std::vector> remoteDcIds, - double* lastLimited, - Future remoteRecovered) + double* lastLimited) { state Database cx = openDBOnServer(db, TaskDataDistributionLaunch, true, true); cx->locationCacheSize = SERVER_KNOBS->DD_LOCATION_CACHE_SIZE; @@ -3248,7 +3250,7 @@ ACTOR Future dataDistribution( Reference primaryTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy) ); teamCollectionsPtrs.push_back(primaryTeamCollection.getPtr()); if (configuration.usableRegions > 1) { - Reference remoteTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds, Optional>>(), serverChanges, readyToStart.getFuture() && remoteRecovered, zeroHealthyTeams[1], false, processingUnhealthy) ); + Reference remoteTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds, Optional>>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[1], false, processingUnhealthy) ); teamCollectionsPtrs.push_back(remoteTeamCollection.getPtr()); remoteTeamCollection->teamCollections = teamCollectionsPtrs; actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( remoteTeamCollection, initData, tcis[1], db ), "DDTeamCollectionSecondary", myId, &normalDDQueueErrors() ) ); @@ -3362,7 +3364,6 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Referenceget().toString()); - // TODO: is remoteRecovered.getFuture() as Void() in dataDistribution() correct? break; } when ( wait(self->dbInfo->onChange()) ) {} @@ -3383,7 +3384,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference> > ddStorageServerChanges; state double lastLimited = 0; TraceEvent("DataDistributor", di.id()).detail("StartDD", "RK"); - self->addActor.send( reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, recoveryTransactionVersion, self->primaryDcId, self->remoteDcIds, &lastLimited, Void() ), "DataDistribution", di.id(), &normalDataDistributorErrors() ) ); + self->addActor.send( reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, recoveryTransactionVersion, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ) ); self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id(), &normalRateKeeperErrors() ) ); state Future reply; From efd000dd1116f381de181483cc81577b0da262a7 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Fri, 18 Jan 2019 15:26:47 -0800 Subject: [PATCH 161/226] Remove distributor interface from ClusterControllerData This information is now kept in ServerDBInfo. --- fdbserver/ClusterController.actor.cpp | 29 +++++++++------------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index fadeee3810..7203172075 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1025,8 +1025,6 @@ public: Version datacenterVersionDifference; bool versionDifferenceUpdated; PromiseStream> addActor; - AsyncVar dataDistributorInterface; - bool rejoined = false; ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality ) : id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false) @@ -2261,17 +2259,15 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo loop choose { when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id()); - self->dataDistributorInterface.set( req.dataDistributor ); self->db.setDistributor( req.dataDistributor ); - self->rejoined = true; - distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); req.reply.send( Void() ); break; } when ( wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ) ) { break; } } - if ( !self->rejoined ) { + if ( !self->db.serverInfo->get().distributor.isValid() ) { newDistributor = startDataDistributor( self ); } @@ -2279,32 +2275,27 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo loop choose { when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { TraceEvent ev("ClusterController", self->id); - const UID myDdId = self->dataDistributorInterface.get().id(); + const UID myDdId = self->db.serverInfo->get().distributor.id(); ev.detail("NewDataDistributorID", distributorInterf.id()).detail("Valid", distributorInterf.isValid()); - self->dataDistributorInterface.set( distributorInterf ); - self->rejoined = true; self->db.setDistributor( distributorInterf ); - distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); newDistributor = Never(); } when ( wait( distributorFailed ) ) { distributorFailed = Never(); TraceEvent("ClusterController", self->id) - .detail("DataDistributorFailed", self->dataDistributorInterface.get().id()) - .detail("Endpoint", self->dataDistributorInterface.get().waitFailure.getEndpoint().token); - self->rejoined = false; - // self->db.setDistributor( DataDistributorInterface() ); + .detail("DataDistributorFailed", self->db.serverInfo->get().distributor.id()) + .detail("Endpoint", self->db.serverInfo->get().distributor.getRateInfo.getEndpoint().token); + self->db.setDistributor( DataDistributorInterface() ); newDistributor = startDataDistributor( self ); } when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - if ( !self->rejoined ) { - self->dataDistributorInterface.set( req.dataDistributor ); - distributorFailed = waitFailureClient( self->dataDistributorInterface.get().waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); - self->rejoined = true; + if ( !self->db.serverInfo->get().distributor.isValid() ) { self->db.setDistributor( req.dataDistributor ); + distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id()); } else { - const UID myDdId = self->dataDistributorInterface.get().id(); + const UID myDdId = self->db.serverInfo->get().distributor.id(); const bool success = myDdId == req.dataDistributor.id(); req.reply.send( Void() ); TraceEvent("ClusterController", self->id) From e0a7162cf8b9a3ec27c919a88c5742ff65ee66a9 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Fri, 18 Jan 2019 15:43:43 -0800 Subject: [PATCH 162/226] Add a failure timeout knob for data distributor. Set default time to 1.0s. --- fdbserver/ClusterController.actor.cpp | 8 ++++---- fdbserver/Knobs.cpp | 1 + fdbserver/Knobs.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 7203172075..c385fb01b3 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2260,7 +2260,7 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id()); self->db.setDistributor( req.dataDistributor ); - distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); req.reply.send( Void() ); break; } @@ -2278,7 +2278,7 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo const UID myDdId = self->db.serverInfo->get().distributor.id(); ev.detail("NewDataDistributorID", distributorInterf.id()).detail("Valid", distributorInterf.isValid()); self->db.setDistributor( distributorInterf ); - distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); newDistributor = Never(); } when ( wait( distributorFailed ) ) { @@ -2292,17 +2292,17 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { if ( !self->db.serverInfo->get().distributor.isValid() ) { self->db.setDistributor( req.dataDistributor ); - distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME ); + distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id()); } else { const UID myDdId = self->db.serverInfo->get().distributor.id(); const bool success = myDdId == req.dataDistributor.id(); - req.reply.send( Void() ); TraceEvent("ClusterController", self->id) .detail("DataDistributorRejoin", success ? "OK" : "Failed") .detail("OldDataDistributorID", myDdId) .detail("ReqID", req.dataDistributor.id()); } + req.reply.send( Void() ); } } } diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 320198ce1c..8ddaeb28db 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -165,6 +165,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( DD_LOCATION_CACHE_SIZE, 2000000 ); if( randomize && BUGGIFY ) DD_LOCATION_CACHE_SIZE = 3; init( MOVEKEYS_LOCK_POLLING_DELAY, 5.0 ); init( DEBOUNCE_RECRUITING_DELAY, 5.0 ); + init( DD_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) DD_FAILURE_TIME = 10.0; // Redwood Storage Engine init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index dcb4f74766..9b084d80f2 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -128,6 +128,7 @@ public: int64_t DD_LOCATION_CACHE_SIZE; double MOVEKEYS_LOCK_POLLING_DELAY; double DEBOUNCE_RECRUITING_DELAY; + double DD_FAILURE_TIME; // Redwood Storage Engine int PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT; From f5242bda7c1f3bceed0652baa62a27f5a06838b8 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sat, 19 Jan 2019 16:25:05 -0800 Subject: [PATCH 163/226] Update data distributor to use configuration monitor This enable removal of GetRecoveryInfoRequest from master interface. Remove recoveryTransactionVersion from dataDistribution(). --- fdbserver/DataDistribution.actor.cpp | 73 +++++++++++++++------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index a835db3ac9..1ec924bd87 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3105,7 +3105,6 @@ ACTOR Future dataDistribution( Reference> db, UID myId, DatabaseConfiguration configuration, PromiseStream< std::pair> > serverChanges, - Version recoveryCommitVersion, std::vector> primaryDcId, std::vector> remoteDcIds, double* lastLimited) @@ -3244,7 +3243,7 @@ ACTOR Future dataDistribution( actors.push_back( pollMoveKeysLock(cx, lock) ); actors.push_back( reportErrorsExcept( dataDistributionTracker( initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics, getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, myId ), "DDTracker", myId, &normalDDQueueErrors() ) ); - actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, myId, storageTeamSize, lastLimited, recoveryCommitVersion ), "DDQueue", myId, &normalDDQueueErrors() ) ); + actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, myId, storageTeamSize, lastLimited, invalidVersion ), "DDQueue", myId, &normalDDQueueErrors() ) ); vector teamCollectionsPtrs; Reference primaryTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy) ); @@ -3286,6 +3285,22 @@ struct DataDistributorData : NonCopyable, ReferenceCounted DataDistributorData(Reference> const& db, Reference> const& dbConfig, UID id, PromiseStream> const& addActor) : dbInfo(db), configuration(dbConfig), ddId(id), addActor(addActor) {} + + void refreshDcIds() { + primaryDcId.clear(); + remoteDcIds.clear(); + + const std::vector& regions = configuration->get().regions; + TraceEvent ev("DataDistributor", ddId); + if ( regions.size() > 0 ) { + primaryDcId.push_back( regions[0].dcId ); + ev.detail("PrimaryDcID", regions[0].dcId.toHexString()); + } + if ( regions.size() > 1 ) { + remoteDcIds.push_back( regions[1].dcId ); + ev.detail("SecondaryDcID", regions[1].dcId.toHexString()); + } + } }; ACTOR Future configurationMonitor( Reference self ) { @@ -3349,42 +3364,21 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference self( new DataDistributorData(db, configuration, di.id(), addActor) ); state Future collection = actorCollection( self->addActor.getFuture() ); state Future trigger = self->configurationTrigger.onTrigger(); - state Version recoveryTransactionVersion = invalidVersion; - TraceEvent("NewDataDistributorID", di.id()).detail("Valid", di.isValid()); + TraceEvent("NewDataDistributorID", di.id()); self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) ); self->addActor.send( configurationMonitor( self ) ); loop choose { - // Get configuration from the master. Can't use configurationMonitor for it - // because the transaction read needs ratekeeper, which is not started yet. - when ( GetRecoveryInfoReply infoReply = wait( brokenPromiseToNever(self->dbInfo->get().master.getRecoveryInfo.getReply(GetRecoveryInfoRequest(di.id())) )) ) { - configuration->set( infoReply.configuration ); - recoveryTransactionVersion = infoReply.recoveryTransactionVersion; - TraceEvent("DataDistributor", di.id()) - .detail("RecoveryVersion", infoReply.recoveryTransactionVersion) - .detail("Configuration", configuration->get().toString()); - break; - } + when ( wait( trigger ) ) { break; } when ( wait(self->dbInfo->onChange()) ) {} } - - const std::vector& regions = self->configuration->get().regions; - TraceEvent ev("DataDistributor", di.id()); - if ( regions.size() > 0 ) { - self->primaryDcId.push_back( regions[0].dcId ); - ev.detail("PrimaryDcID", regions[0].dcId.toHexString()); - } - if ( regions.size() > 1 ) { - self->remoteDcIds.push_back( regions[1].dcId ); - ev.detail("SecondaryDcID", regions[1].dcId.toHexString()); - } + self->refreshDcIds(); try { - PromiseStream< std::pair> > ddStorageServerChanges; + state PromiseStream< std::pair> > ddStorageServerChanges; state double lastLimited = 0; - TraceEvent("DataDistributor", di.id()).detail("StartDD", "RK"); - self->addActor.send( reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, recoveryTransactionVersion, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ) ); + state Future distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id(), &normalRateKeeperErrors() ) ); state Future reply; @@ -3393,20 +3387,31 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferencedbInfo->get().clusterInterface.id()); + .detail("OldClusterControllerID", lastClusterControllerID) + .detail("ClusterControllerID", self->dbInfo->get().clusterInterface.id()); reply = self->dbInfo->get().clusterInterface.dataDistributorRejoin.getReply(req); + lastClusterControllerID = self->dbInfo->get().clusterInterface.id(); } else { reply = Never(); } + + trigger = self->configurationTrigger.onTrigger(); choose { when (wait(brokenPromiseToNever(reply))) { - lastClusterControllerID = self->dbInfo->get().clusterInterface.id(); TraceEvent("DataDistributorRejoined", di.id()) - .detail("ClusterControllerID", lastClusterControllerID); + .detail("ClusterControllerID", lastClusterControllerID); + } + when (wait(self->dbInfo->onChange())) { + const DataDistributorInterface& distributor = self->dbInfo->get().distributor; + if ( distributor.isValid() && distributor.id() != di.id() ) { + TraceEvent("DataDistributor", di.id()).detail("FoundAnotherDdID", distributor.id()); + break; + } + } + when (wait(trigger)) { + self->refreshDcIds(); + distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); } - when (wait(self->dbInfo->onChange())) {} - when (wait(trigger)) { break; } // TODO: maybe break here? Since configuration changed. when (wait(collection)) { ASSERT(false); throw internal_error(); From aea602d9c78fd86ce64a29ff6744a7f9291c0fe6 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sat, 19 Jan 2019 16:37:40 -0800 Subject: [PATCH 164/226] Remove getRecoveryInfo from master interface. --- fdbserver/MasterInterface.h | 28 +--------------------------- fdbserver/masterserver.actor.cpp | 12 ------------ 2 files changed, 1 insertion(+), 39 deletions(-) diff --git a/fdbserver/MasterInterface.h b/fdbserver/MasterInterface.h index 948bb4de59..5912bbaaff 100644 --- a/fdbserver/MasterInterface.h +++ b/fdbserver/MasterInterface.h @@ -36,7 +36,6 @@ struct MasterInterface { RequestStream< struct TLogRejoinRequest > tlogRejoin; // sent by tlog (whether or not rebooted) to communicate with a new master RequestStream< struct ChangeCoordinatorsRequest > changeCoordinators; RequestStream< struct GetCommitVersionRequest > getCommitVersion; - RequestStream< struct GetRecoveryInfoRequest > getRecoveryInfo; NetworkAddress address() const { return changeCoordinators.getEndpoint().address; } @@ -44,7 +43,7 @@ struct MasterInterface { template void serialize(Archive& ar) { ASSERT( ar.protocolVersion() >= 0x0FDB00A200040001LL ); - serializer(ar, locality, waitFailure, tlogRejoin, changeCoordinators, getCommitVersion, getRecoveryInfo); + serializer(ar, locality, waitFailure, tlogRejoin, changeCoordinators, getCommitVersion); } void initEndpoints() { @@ -134,31 +133,6 @@ struct GetCommitVersionRequest { } }; -struct GetRecoveryInfoReply { - Version recoveryTransactionVersion; - DatabaseConfiguration configuration; - - GetRecoveryInfoReply() : recoveryTransactionVersion(invalidVersion) {} - explicit GetRecoveryInfoReply(Version v, DatabaseConfiguration c) : recoveryTransactionVersion(v), configuration(c) {} - - template - void serialize(Ar& ar) { - serializer(ar, recoveryTransactionVersion, configuration); - } -}; - -struct GetRecoveryInfoRequest { - UID reqId; - ReplyPromise reply; - - GetRecoveryInfoRequest() {} - explicit GetRecoveryInfoRequest(UID id) : reqId(id) {} - template - void serialize(Ar& ar) { - serializer(ar, reqId, reply); - } -}; - struct LifetimeToken { UID ccID; int64_t count; diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index 5258dad0da..376152bf43 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -911,16 +911,6 @@ ACTOR Future provideVersions(Reference self) { } } -ACTOR Future provideRecoveryInfo( Reference self ) { - loop choose { - when( GetRecoveryInfoRequest req = waitNext(self->myInterface.getRecoveryInfo.getFuture()) ) { - TraceEvent("MasterGetRecoveryInfo", self->dbgid).detail("ReqID", req.reqId); - GetRecoveryInfoReply reply(self->recoveryTransactionVersion, self->configuration); - req.reply.send( reply ); - } - } -} - std::pair findRange( CoalescedKeyRangeMap& key_resolver, Standalone>& movedRanges, int src, int dest ) { auto ranges = key_resolver.ranges(); auto prev = ranges.begin(); @@ -1358,8 +1348,6 @@ ACTOR Future masterCore( Reference self ) { .detail("RecoveryDuration", recoveryDuration) .trackLatest("MasterRecoveryState"); - self->addActor.send( provideRecoveryInfo(self) ); - if( self->resolvers.size() > 1 ) self->addActor.send( resolutionBalancing(self) ); From c38b2a8c38eb24069d324a6f12032515daabd0e4 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 22 Jan 2019 16:26:00 -0800 Subject: [PATCH 165/226] Change masterId to distributorId in tracker. This reflects the change of moving data distribution out of master server. --- fdbserver/DataDistributionTracker.actor.cpp | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fdbserver/DataDistributionTracker.actor.cpp b/fdbserver/DataDistributionTracker.actor.cpp index 9b877ee645..f4d6e589f2 100644 --- a/fdbserver/DataDistributionTracker.actor.cpp +++ b/fdbserver/DataDistributionTracker.actor.cpp @@ -64,7 +64,7 @@ struct ShardTrackedData { struct DataDistributionTracker { Database cx; - UID masterId; + UID distributorId; KeyRangeMap< ShardTrackedData > shards; ActorCollection sizeChanges; @@ -79,8 +79,8 @@ struct DataDistributionTracker { Promise readyToStart; Reference> anyZeroHealthyTeams; - DataDistributionTracker(Database cx, UID masterId, Promise const& readyToStart, PromiseStream const& output, Reference shardsAffectedByTeamFailure, Reference> anyZeroHealthyTeams) - : cx(cx), masterId( masterId ), dbSizeEstimate( new AsyncVar() ), + DataDistributionTracker(Database cx, UID distributorId, Promise const& readyToStart, PromiseStream const& output, Reference shardsAffectedByTeamFailure, Reference> anyZeroHealthyTeams) + : cx(cx), distributorId( distributorId ), dbSizeEstimate( new AsyncVar() ), maxShardSize( new AsyncVar>() ), sizeChanges(false), readyToStart(readyToStart), output( output ), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams) {} @@ -328,7 +328,7 @@ ACTOR Future shardSplitter( int numShards = splitKeys.size() - 1; if( g_random->random01() < 0.01 ) { - TraceEvent("RelocateShardStartSplitx100", self->masterId) + TraceEvent("RelocateShardStartSplitx100", self->distributorId) .detail("Begin", printable(keys.begin)) .detail("End", printable(keys.end)) .detail("MaxBytes", shardBounds.max.bytes) @@ -449,7 +449,7 @@ Future shardMerger( //restarting shard tracker will derefenced values in the shard map, so make a copy KeyRange mergeRange = merged; - TraceEvent("RelocateShardMergeMetrics", self->masterId) + TraceEvent("RelocateShardMergeMetrics", self->distributorId) .detail("OldKeys", printable(keys)) .detail("NewKeys", printable(mergeRange)) .detail("EndingSize", endingStats.bytes) @@ -495,7 +495,7 @@ ACTOR Future shardEvaluator( } } - /*TraceEvent("ShardEvaluator", self->masterId) + /*TraceEvent("ShardEvaluator", self->distributorId) .detail("TrackerId", trackerID) .detail("ShouldSplit", shouldSplit) .detail("ShouldMerge", shouldMerge) @@ -531,7 +531,7 @@ ACTOR Future shardTracker( // Since maxShardSize will become present for all shards at once, avoid slow tasks with a short delay wait( delay( 0, TaskDataDistribution ) ); - /*TraceEvent("ShardTracker", self->masterId) + /*TraceEvent("ShardTracker", self->distributorId) .detail("Begin", printable(keys.begin)) .detail("End", printable(keys.end)) .detail("TrackerID", trackerID) @@ -571,7 +571,7 @@ void restartShardTrackers( DataDistributionTracker* self, KeyRangeRef keys, Opti // we can use the old size if it is available. This will be the case when merging shards. if( startingSize.present() ) { ASSERT( ranges.size() == 1 ); - /*TraceEvent("ShardTrackerSizePreset", self->masterId) + /*TraceEvent("ShardTrackerSizePreset", self->distributorId) .detail("Keys", printable(keys)) .detail("Size", startingSize.get().metrics.bytes) .detail("Merges", startingSize.get().merges);*/ @@ -589,7 +589,7 @@ void restartShardTrackers( DataDistributionTracker* self, KeyRangeRef keys, Opti ACTOR Future trackInitialShards(DataDistributionTracker *self, Reference initData) { - TraceEvent("TrackInitialShards", self->masterId).detail("InitialShardCount", initData->shards.size()); + TraceEvent("TrackInitialShards", self->distributorId).detail("InitialShardCount", initData->shards.size()); //This line reduces the priority of shard initialization to prevent interference with failure monitoring. //SOMEDAY: Figure out what this priority should actually be @@ -659,9 +659,9 @@ ACTOR Future dataDistributionTracker( FutureStream> getAverageShardBytes, Promise readyToStart, Reference> anyZeroHealthyTeams, - UID masterId) + UID distributorId) { - state DataDistributionTracker self(cx, masterId, readyToStart, output, shardsAffectedByTeamFailure, anyZeroHealthyTeams); + state DataDistributionTracker self(cx, distributorId, readyToStart, output, shardsAffectedByTeamFailure, anyZeroHealthyTeams); state Future loggingTrigger = Void(); try { wait( trackInitialShards( &self, initData ) ); @@ -672,7 +672,7 @@ ACTOR Future dataDistributionTracker( req.send( self.maxShardSize->get().get() / 2 ); } when( wait( loggingTrigger ) ) { - TraceEvent("DDTrackerStats", self.masterId) + TraceEvent("DDTrackerStats", self.distributorId) .detail("Shards", self.shards.size()) .detail("TotalSizeBytes", self.dbSizeEstimate->get()) .trackLatest( "DDTrackerStats" ); @@ -685,7 +685,7 @@ ACTOR Future dataDistributionTracker( when( wait( self.sizeChanges.getResult() ) ) {} } } catch (Error& e) { - TraceEvent(SevError, "DataDistributionTrackerError", self.masterId).error(e); + TraceEvent(SevError, "DataDistributionTrackerError", self.distributorId).error(e); throw e; } } From 99e109d6c59d2ebf4f167a0db8845a8d2fefe2a5 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 23 Jan 2019 15:53:17 -0800 Subject: [PATCH 166/226] Fix timeout error due to lost exception Found in tests, a move key conflict exception was not handled because the Future object was not waited by someone. As a result, the data distributor did not die and database checking couldn't get the metric and keep trying until timeout. --- fdbserver/DataDistribution.actor.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 1ec924bd87..13725c90cc 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3379,6 +3379,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference> > ddStorageServerChanges; state double lastLimited = 0; state Future distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); + self->addActor.send( distributor ); self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id(), &normalRateKeeperErrors() ) ); state Future reply; @@ -3397,22 +3398,25 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceconfigurationTrigger.onTrigger(); choose { - when (wait(brokenPromiseToNever(reply))) { + when ( wait( brokenPromiseToNever(reply) ) ) { TraceEvent("DataDistributorRejoined", di.id()) .detail("ClusterControllerID", lastClusterControllerID); } - when (wait(self->dbInfo->onChange())) { + when ( wait( self->dbInfo->onChange() ) ) { const DataDistributorInterface& distributor = self->dbInfo->get().distributor; if ( distributor.isValid() && distributor.id() != di.id() ) { - TraceEvent("DataDistributor", di.id()).detail("FoundAnotherDdID", distributor.id()); + TraceEvent("DataDistributorExit", di.id()).detail("CurrentLiveID", distributor.id()); break; } } - when (wait(trigger)) { + when ( wait( trigger ) ) { + TraceEvent("DataDistributorRestart", di.id()) + .detail("ClusterControllerID", lastClusterControllerID); self->refreshDcIds(); distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); + self->addActor.send( distributor ); } - when (wait(collection)) { + when ( wait( collection ) ) { ASSERT(false); throw internal_error(); } From 3135f1d84bec51585f61b51c5e78eb0ef1c106bf Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 22 Jan 2019 21:19:31 -0800 Subject: [PATCH 167/226] Cluster controller ignores distrobutor rejoin After controller starts one, it will wait for that one and ignore any rejoins received later. Add remoteRecovered() to data distribution for remote team collection. --- fdbserver/ClusterController.actor.cpp | 3 +++ fdbserver/DataDistribution.actor.cpp | 27 +++++++++++++++++---------- fdbserver/QuietDatabase.actor.cpp | 11 +++++------ 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index c385fb01b3..5abcf85cb2 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2290,6 +2290,8 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo newDistributor = startDataDistributor( self ); } when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { + TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinIgnored", req.dataDistributor.id()); + /* if ( !self->db.serverInfo->get().distributor.isValid() ) { self->db.setDistributor( req.dataDistributor ); distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); @@ -2302,6 +2304,7 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo .detail("OldDataDistributorID", myDdId) .detail("ReqID", req.dataDistributor.id()); } + */ req.reply.send( Void() ); } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 13725c90cc..b70b2066be 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -2919,6 +2919,15 @@ ACTOR Future serverGetTeamRequests(TeamCollectionInterface tci, DDTeamColl } } +ACTOR Future remoteRecovered( Reference> db ) { + TraceEvent("DDTrackerStarting"); + while ( db->get().recoveryState < RecoveryState::ALL_LOGS_RECRUITED ) { + TraceEvent("DDTrackerStarting").detail("RecoveryState", (int)db->get().recoveryState); + wait( db->onChange() ); + } + return Void(); +} + // Keep track of servers and teams -- serves requests for getRandomTeam ACTOR Future dataDistributionTeamCollection( Reference teamCollection, @@ -2938,9 +2947,6 @@ ACTOR Future dataDistributionTeamCollection( TraceEvent("DDTeamCollectionBegin", self->distributorId).detail("Primary", self->primary); wait( self->readyToStart || error ); - while(!self->primary && db->get().recoveryState < RecoveryState::FULLY_RECOVERED) { - wait( db->onChange() ); - } TraceEvent("DDTeamCollectionReadyToStart", self->distributorId).detail("Primary", self->primary); if(self->badTeamRemover.isReady()) { @@ -3249,7 +3255,7 @@ ACTOR Future dataDistribution( Reference primaryTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy) ); teamCollectionsPtrs.push_back(primaryTeamCollection.getPtr()); if (configuration.usableRegions > 1) { - Reference remoteTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds, Optional>>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[1], false, processingUnhealthy) ); + Reference remoteTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds, Optional>>(), serverChanges, readyToStart.getFuture() && remoteRecovered(db), zeroHealthyTeams[1], false, processingUnhealthy) ); teamCollectionsPtrs.push_back(remoteTeamCollection.getPtr()); remoteTeamCollection->teamCollections = teamCollectionsPtrs; actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( remoteTeamCollection, initData, tcis[1], db ), "DDTeamCollectionSecondary", myId, &normalDDQueueErrors() ) ); @@ -3370,10 +3376,12 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceaddActor.send( configurationMonitor( self ) ); loop choose { - when ( wait( trigger ) ) { break; } + when ( wait( trigger ) ) { + self->refreshDcIds(); + break; + } when ( wait(self->dbInfo->onChange()) ) {} } - self->refreshDcIds(); try { state PromiseStream< std::pair> > ddStorageServerChanges; @@ -3398,12 +3406,10 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceconfigurationTrigger.onTrigger(); choose { - when ( wait( brokenPromiseToNever(reply) ) ) { - TraceEvent("DataDistributorRejoined", di.id()) - .detail("ClusterControllerID", lastClusterControllerID); - } + when ( wait( brokenPromiseToNever(reply) ) ) {} when ( wait( self->dbInfo->onChange() ) ) { const DataDistributorInterface& distributor = self->dbInfo->get().distributor; + TraceEvent("DataDistributor", di.id()).detail("IncomingID", distributor.id()).detail("Valid", distributor.isValid()); if ( distributor.isValid() && distributor.id() != di.id() ) { TraceEvent("DataDistributorExit", di.id()).detail("CurrentLiveID", distributor.id()); break; @@ -3413,6 +3419,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferencerefreshDcIds(); + TraceEvent("DataDistributor", di.id()).detail("RestartDistribution", self->configuration->get().toString()); distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); self->addActor.send( distributor ); } diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index 43a288fd23..88f0357da8 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -75,7 +75,7 @@ ACTOR Future getDataDistributorWorker( Database cx, Referenceget().distributor.address() ) { - TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers").detail("DataDistributorId", dbInfo->get().distributor.id()).detail("WorkerId", workers[i].first.id()); + TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers").detail("DataDistributorId", dbInfo->get().distributor.id()).detail("WorkerId", workers[i].first.id()); return std::make_pair(workers[i].first, dbInfo->get().distributor.id()); } } @@ -88,7 +88,7 @@ ACTOR Future getDataDistributorWorker( Database cx, Reference getDataInFlight( Database cx, WorkerInterface distributorWorker ) { try { TraceEvent("DataInFlight").detail("Stage", "ContactingDataDistributor"); @@ -104,8 +104,7 @@ ACTOR Future getDataInFlight( Database cx, WorkerInterface distributorW } -//Gets the number of bytes in flight from the master -//Convenience method that first finds the master worker from a zookeeper interface +// Gets the number of bytes in flight from the data distributor. ACTOR Future getDataInFlight( Database cx, Reference> dbInfo ) { DistributorPair distributorPair = wait( getDataDistributorWorker(cx, dbInfo) ); int64_t dataInFlight = wait(getDataInFlight(cx, distributorPair.first)); @@ -278,7 +277,7 @@ ACTOR Future getDataDistributionActive( Database cx, WorkerInterface distr } // Checks to see if any storage servers are being recruited -ACTOR Future getStorageServersRecruiting( Database cx, Reference> dbInfo, WorkerInterface distributorWorker, UID distributorUID ) { +ACTOR Future getStorageServersRecruiting( Database cx, WorkerInterface distributorWorker, UID distributorUID ) { try { TraceEvent("StorageServersRecruiting").detail("Stage", "ContactingDataDistributor"); TraceEventFields recruitingMessage = wait( timeoutError(distributorWorker.eventLogRequest.getReply( @@ -346,7 +345,7 @@ ACTOR Future waitForQuietDatabase( Database cx, Reference dataDistributionQueueSize = getDataDistributionQueueSize( cx, distributorWorker, dataInFlightGate == 0); state Future storageQueueSize = getMaxStorageServerQueueSize( cx, dbInfo ); state Future dataDistributionActive = getDataDistributionActive( cx, distributorWorker ); - state Future storageServersRecruiting = getStorageServersRecruiting ( cx, dbInfo, distributorWorker, distributorUID ); + state Future storageServersRecruiting = getStorageServersRecruiting ( cx, distributorWorker, distributorUID ); wait( success( dataInFlight ) && success( tLogQueueSize ) && success( dataDistributionQueueSize ) && success( storageQueueSize ) && success( dataDistributionActive ) && success( storageServersRecruiting ) ); From be5c962bb7af3b81ae1e1fa22e0f2eff1045af13 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 24 Jan 2019 16:58:31 -0800 Subject: [PATCH 168/226] Add a new configuration version key \xff/conf/version This fixed a bug found by upgrade test, where the configuration monitor of the data distributor was monitoring excludedServersVersionKey, which doesn't change in ChangeConfig workload. As a result, data distributor was not aware of configuration changes. Adding this new key and make sure this key is updated in configuration changes so that the monitor can detect configuration changes. --- fdbclient/ManagementAPI.actor.cpp | 15 +++++++++++++-- fdbclient/SystemData.cpp | 2 ++ fdbclient/SystemData.h | 5 +++++ fdbserver/ClusterController.actor.cpp | 14 -------------- fdbserver/DataDistribution.actor.cpp | 14 ++++++++------ 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 72dbc69fe1..5c11328568 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -287,6 +287,7 @@ ACTOR Future changeConfig( Database cx, std::map tooLong = delay(4.5); + state std::string versionKey = g_random->randomUniqueID().toString(); loop { try { tr.setOption( FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE ); @@ -432,6 +433,9 @@ ACTOR Future changeConfig( Database cx, std::mapfirst), StringRef(i->second) ); + tr.addReadConflictRange( singleKeyRange(configVersionKey) ); + tr.set( configVersionKey, versionKey ); + wait( tr.commit() ); break; } catch (Error& e) { @@ -1125,6 +1129,7 @@ Reference autoQuorumChange( int desired ) { return Reference excludeServers( Database cx, vector servers ) { state Transaction tr(cx); state std::string versionKey = g_random->randomUniqueID().toString(); + state std::string excludeVersionKey = g_random->randomUniqueID().toString(); loop { try { tr.setOption( FDBTransactionOptions::ACCESS_SYSTEM_KEYS ); @@ -1132,7 +1137,9 @@ ACTOR Future excludeServers( Database cx, vector servers tr.setOption( FDBTransactionOptions::LOCK_AWARE ); tr.addReadConflictRange( singleKeyRange(excludedServersVersionKey) ); //To conflict with parallel includeServers - tr.set( excludedServersVersionKey, versionKey ); + tr.addReadConflictRange( singleKeyRange(configVersionKey) ); + tr.set( configVersionKey, versionKey ); + tr.set( excludedServersVersionKey, excludeVersionKey ); for(auto& s : servers) tr.set( encodeExcludedServersKey(s), StringRef() ); @@ -1150,6 +1157,7 @@ ACTOR Future includeServers( Database cx, vector servers state bool includeAll = false; state Transaction tr(cx); state std::string versionKey = g_random->randomUniqueID().toString(); + state std::string excludeVersionKey = g_random->randomUniqueID().toString(); loop { try { tr.setOption( FDBTransactionOptions::ACCESS_SYSTEM_KEYS ); @@ -1159,8 +1167,11 @@ ACTOR Future includeServers( Database cx, vector servers // includeServers might be used in an emergency transaction, so make sure it is retry-self-conflicting and CAUSAL_WRITE_RISKY tr.setOption( FDBTransactionOptions::CAUSAL_WRITE_RISKY ); tr.addReadConflictRange( singleKeyRange(excludedServersVersionKey) ); + tr.addReadConflictRange( singleKeyRange(configVersionKey) ); + + tr.set( configVersionKey, versionKey ); + tr.set( excludedServersVersionKey, excludeVersionKey ); - tr.set( excludedServersVersionKey, versionKey ); for(auto& s : servers ) { if (!s.isValid()) { tr.clear( excludedServersKeys ); diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 9959eeda37..d4288cb104 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -381,6 +381,8 @@ std::string encodeExcludedServersKey( AddressExclusion const& addr ) { return excludedServersPrefix.toString() + as; } +const KeyRef configVersionKey = LiteralStringRef("\xff/conf/version"); + const KeyRangeRef workerListKeys( LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0") ); const KeyRef workerListPrefix = workerListKeys.begin; diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 9b2276589b..d07a1789d7 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -133,6 +133,11 @@ extern const KeyRef excludedServersVersionKey; // The value of this key shall b const AddressExclusion decodeExcludedServersKey( KeyRef const& key ); // where key.startsWith(excludedServersPrefix) std::string encodeExcludedServersKey( AddressExclusion const& ); +// "\xff/conf/version" := "" +// This is the key representing the version of the configuration, which should be updated for each +// new configuration. +extern const KeyRef configVersionKey; + // "\xff/workers/[[processID]]" := "" // Asynchronously updated by the cluster controller, this is a list of fdbserver processes that have joined the cluster // and are currently (recently) available diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 5abcf85cb2..9439e8d39f 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2291,20 +2291,6 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo } when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinIgnored", req.dataDistributor.id()); - /* - if ( !self->db.serverInfo->get().distributor.isValid() ) { - self->db.setDistributor( req.dataDistributor ); - distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); - TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id()); - } else { - const UID myDdId = self->db.serverInfo->get().distributor.id(); - const bool success = myDdId == req.dataDistributor.id(); - TraceEvent("ClusterController", self->id) - .detail("DataDistributorRejoin", success ? "OK" : "Failed") - .detail("OldDataDistributorID", myDdId) - .detail("ReqID", req.dataDistributor.id()); - } - */ req.reply.send( Void() ); } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index b70b2066be..3d89549e35 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3289,8 +3289,8 @@ struct DataDistributorData : NonCopyable, ReferenceCounted PromiseStream< std::pair> > ddStorageServerChanges; PromiseStream> addActor; - DataDistributorData(Reference> const& db, Reference> const& dbConfig, UID id, PromiseStream> const& addActor) - : dbInfo(db), configuration(dbConfig), ddId(id), addActor(addActor) {} + DataDistributorData(Reference> const& db, Reference> const& dbConfig, UID id) + : dbInfo(db), configuration(dbConfig), ddId(id) {} void refreshDcIds() { primaryDcId.clear(); @@ -3316,6 +3316,7 @@ ACTOR Future configurationMonitor( Reference self ) { loop { try { + TraceEvent("DataDistributor", self->ddId).detail("MonitorConfiguration", "Starting"); tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr.setOption( FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE ); Standalone results = wait( tr.getRange( configKeys, CLIENT_KNOBS->TOO_MANY ) ); @@ -3329,7 +3330,7 @@ ACTOR Future configurationMonitor( Reference self ) { self->configurationTrigger.trigger(); } - state Future watchFuture = tr.watch(excludedServersVersionKey); + state Future watchFuture = tr.watch(configVersionKey); wait( tr.commit() ); wait( watchFuture ); break; @@ -3365,9 +3366,8 @@ static std::set const& normalRateKeeperErrors() { ACTOR Future dataDistributor(DataDistributorInterface di, Reference> db ) { state UID lastClusterControllerID(0,0); - state PromiseStream> addActor; state Reference> configuration( new AsyncVar(DatabaseConfiguration()) ); - state Reference self( new DataDistributorData(db, configuration, di.id(), addActor) ); + state Reference self( new DataDistributorData(db, configuration, di.id()) ); state Future collection = actorCollection( self->addActor.getFuture() ); state Future trigger = self->configurationTrigger.onTrigger(); @@ -3378,12 +3378,14 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferencerefreshDcIds(); + trigger = self->configurationTrigger.onTrigger(); break; } when ( wait(self->dbInfo->onChange()) ) {} } try { + TraceEvent("DataDistributorRunning", di.id()); state PromiseStream< std::pair> > ddStorageServerChanges; state double lastLimited = 0; state Future distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); @@ -3404,7 +3406,6 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceconfigurationTrigger.onTrigger(); choose { when ( wait( brokenPromiseToNever(reply) ) ) {} when ( wait( self->dbInfo->onChange() ) ) { @@ -3422,6 +3423,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Referenceconfiguration->get().toString()); distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); self->addActor.send( distributor ); + trigger = self->configurationTrigger.onTrigger(); } when ( wait( collection ) ) { ASSERT(false); From 6a655143e8b20cc6cc81a70883af882e940bf39b Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 24 Jan 2019 21:19:11 -0800 Subject: [PATCH 169/226] A follow-on fix for config key usage And some trace event cleanups. --- fdbclient/ManagementAPI.actor.cpp | 4 ++++ fdbserver/ClusterController.actor.cpp | 11 +++++------ fdbserver/DataDistribution.actor.cpp | 2 +- fdbserver/masterserver.actor.cpp | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 5c11328568..a912578cce 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -702,6 +702,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { ACTOR Future autoConfig( Database cx, ConfigureAutoResult conf ) { state Transaction tr(cx); + state std::string versionKey = g_random->randomUniqueID().toString(); if(!conf.address_class.size()) return ConfigurationResult::INCOMPLETE_CONFIGURATION; //FIXME: correct return type @@ -751,6 +752,9 @@ ACTOR Future autoConfig( Database cx, ConfigureAutoRe tr.set(kv.first, kv.second); } + tr.addReadConflictRange( singleKeyRange(configVersionKey) ); + tr.set( configVersionKey, versionKey ); + wait( tr.commit() ); return ConfigurationResult::SUCCESS; } catch( Error &e ) { diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 9439e8d39f..cda78a48f7 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2274,9 +2274,9 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo // Wait on failures and restart it. loop choose { when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { - TraceEvent ev("ClusterController", self->id); - const UID myDdId = self->db.serverInfo->get().distributor.id(); - ev.detail("NewDataDistributorID", distributorInterf.id()).detail("Valid", distributorInterf.isValid()); + TraceEvent("ClusterController", self->id) + .detail("DataDistributorID", distributorInterf.id()) + .detail("Valid", distributorInterf.isValid()); self->db.setDistributor( distributorInterf ); distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); newDistributor = Never(); @@ -2284,13 +2284,12 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo when ( wait( distributorFailed ) ) { distributorFailed = Never(); TraceEvent("ClusterController", self->id) - .detail("DataDistributorFailed", self->db.serverInfo->get().distributor.id()) - .detail("Endpoint", self->db.serverInfo->get().distributor.getRateInfo.getEndpoint().token); + .detail("DataDistributorDied", self->db.serverInfo->get().distributor.id()); self->db.setDistributor( DataDistributorInterface() ); newDistributor = startDataDistributor( self ); } when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinIgnored", req.dataDistributor.id()); + // TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinIgnored", req.dataDistributor.id()); req.reply.send( Void() ); } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 3d89549e35..0d665501ce 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3437,7 +3437,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference configurationMonitor( Reference self ) { self->registrationTrigger.trigger(); } - state Future watchFuture = tr.watch(excludedServersVersionKey); + state Future watchFuture = tr.watch(configVersionKey); wait(tr.commit()); wait(watchFuture); break; From 39e4a591540fd86e2cda94d2ef143f11743d0e93 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 28 Jan 2019 09:25:15 -0800 Subject: [PATCH 170/226] Add used worker IDs to cluster controller This "usedIds" is updated when receiving a master registration message, so that when recruiting new data distributor, existing assignment is known. --- fdbserver/ClusterController.actor.cpp | 60 +++++++++++++++++++++++---- fdbserver/DataDistributorInterface.h | 6 ++- fdbserver/worker.actor.cpp | 5 +-- 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index cda78a48f7..81c76fa0e5 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -27,6 +27,7 @@ #include "fdbserver/MoveKeys.h" #include "fdbserver/WorkerInterface.h" #include "fdbserver/LeaderElection.h" +#include "fdbserver/LogSystemConfig.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/ClusterRecruitmentInterface.h" #include "fdbserver/ServerDBInfo.h" @@ -999,8 +1000,48 @@ public: return false; } + void updateUsedIds(RegisterMasterRequest const& req) { + auto dbInfo = db.serverInfo->get(); + std::map>, int> idUsed; + idUsed[clusterControllerProcessId]++; + idUsed[masterProcessId]++; + const auto& distributorInterf = dbInfo.distributor; + if (distributorInterf.isValid()) { + idUsed[distributorInterf.locality.processId()]++; + } + for (const auto& tlog : req.logSystemConfig.tLogs) { + for (const auto& locality: tlog.tLogLocalities) { + if (locality.processId().present()) { + idUsed[locality.processId()]++; + } else { + TraceEvent("UsedID").detail("Tlog", locality.toString()); + } + } + } + for (const MasterProxyInterface& interf : req.proxies) { + ASSERT(interf.locality.processId().present()); + idUsed[interf.locality.processId()]++; + } + for (const ResolverInterface& interf: req.resolvers) { + ASSERT(interf.locality.processId().present()); + idUsed[interf.locality.processId()]++; + } + usedIds.set( idUsed ); + } + + void traceUsedIds() { + auto idUsed = usedIds.get(); + for (const auto& it : idUsed) { + TraceEvent ev("UsedID"); + if (it.first.present()) ev.detail("Key", it.first.get().contents().toString()); + ev.detail("Value", idUsed[it.first]); + ev.detail("Address", id_worker[it.first].interf.address().toString()); + } + } + std::map< Optional>, WorkerInfo > id_worker; - std::map< Optional>, ProcessClass > id_class; //contains the mapping from process id to process class from the database + std::map< Optional>, ProcessClass > id_class; //contains the mapping from process id to process class from the database + AsyncVar>, int>> usedIds; // current used process IDs reported by master Standalone lastProcessClasses; bool gotProcessClasses; bool gotFullyRecoveredConfig; @@ -1658,6 +1699,9 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c self->db.serverInfo->set( dbInfo ); } + self->updateUsedIds(req); + self->traceUsedIds(); + checkOutstandingRequests(self); } @@ -2232,20 +2276,22 @@ ACTOR Future startDataDistributor( ClusterControllerDa } while (true) { - std::map>, int> id_used; - id_used[self->clusterControllerProcessId]++; - id_used[self->masterProcessId]++; + if ( self->usedIds.get().size() == 0 ) { + wait( self->usedIds.onChange() ); + } + self->traceUsedIds(); + std::map>, int> id_used = self->usedIds.get(); state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); state InitializeDataDistributorRequest req; req.reqId = g_random->randomUniqueID(); - TraceEvent("DataDistributor", req.reqId).detail("Recruit", data_distributor.worker.first.address()); + TraceEvent("DataDistributorReqID", req.reqId).detail("Recruit", data_distributor.worker.first.address()); ErrorOr distributor = wait( data_distributor.worker.first.dataDistributor.getReplyUnlessFailedFor(req, 1, 0) ); if (distributor.present()) { - TraceEvent("DataDistributor", req.reqId).detail("Recruited", data_distributor.worker.first.address()); + TraceEvent("DataDistributorReqID", req.reqId).detail("Recruited", data_distributor.worker.first.address()); return distributor.get(); } - TraceEvent("DataDistributor", req.reqId) + TraceEvent("DataDistributorReqID", req.reqId) .detail("RecruitFailed", data_distributor.worker.first.address()) .error(distributor.getError()); } diff --git a/fdbserver/DataDistributorInterface.h b/fdbserver/DataDistributorInterface.h index b210f256cb..ddd59cc701 100644 --- a/fdbserver/DataDistributorInterface.h +++ b/fdbserver/DataDistributorInterface.h @@ -22,14 +22,16 @@ #define FOUNDATIONDB_DATADISTRIBUTORINTERFACE_H #include "fdbrpc/fdbrpc.h" +#include "fdbrpc/Locality.h" struct DataDistributorInterface { RequestStream> waitFailure; RequestStream getRateInfo; + struct LocalityData locality; bool valid; DataDistributorInterface() : valid(false) {} - explicit DataDistributorInterface(bool v) : valid(v) {} + explicit DataDistributorInterface(struct LocalityData l) : locality(l), valid(true) {} bool isValid() const { return valid; } UID id() const { return getRateInfo.getEndpoint().token; } @@ -43,7 +45,7 @@ struct DataDistributorInterface { template void serialize(Archive& ar) { - serializer(ar, waitFailure, getRateInfo, valid); + serializer(ar, waitFailure, getRateInfo, locality, valid); } }; diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 577a7f93d0..7fa0010c71 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -715,9 +715,8 @@ ACTOR Future workerServer( Reference connFile, Refe req.reply.send(recruited); } when ( InitializeDataDistributorRequest req = waitNext(interf.dataDistributor.getFuture()) ) { - DataDistributorInterface recruited(true); - TraceEvent("DataDistributorReceived", req.reqId).detail("Addr", interf.address()) - .detail("DataDistributorId", recruited.id()); + DataDistributorInterface recruited(locality); + TraceEvent("DataDistributorReceived", req.reqId).detail("DataDistributorId", recruited.id()); startRole( Role::DATA_DISTRIBUTOR, recruited.id(), interf.id() ); Future dataDistributorProcess = dataDistributor( recruited, dbInfo ); From 00f225322991ffcbd4a37b200b00aedb9e1b92f2 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 28 Jan 2019 11:29:39 -0800 Subject: [PATCH 171/226] Piggyback data distributor interface in worker registration This allows cluster controller to know data distributor during worker registration phase, thus avoiding recruiting a new data distributor after starting. Also change the worker to skip creating a new data distributor if there is already one running on the worker, which can trigger operation timeout in tests. --- fdbserver/ClusterController.actor.cpp | 21 +++++++++++---- fdbserver/ClusterRecruitmentInterface.h | 7 ++--- fdbserver/worker.actor.cpp | 34 ++++++++++++++++++------- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 81c76fa0e5..9e7798e283 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -89,6 +89,7 @@ public: struct DBInfo { Reference> clientInfo; Reference> serverInfo; + Future distributorFailed; ProcessIssuesMap clientsWithIssues, workersWithIssues; std::map incompatibleConnections; ClientVersionMap clientVersionMap; @@ -106,6 +107,7 @@ public: DBInfo() : masterRegistrationCount(0), recoveryStalled(false), forceRecovery(false), unfinishedRecoveries(0), logGenerations(0), clientInfo( new AsyncVar( ClientDBInfo() ) ), serverInfo( new AsyncVar( ServerDBInfo() ) ), + distributorFailed( Never() ), db( DatabaseContext::create( clientInfo, Future(), LocalityData(), true, TaskDefaultEndpoint, true ) ) // SOMEDAY: Locality! { } @@ -115,6 +117,11 @@ public: newInfo.id = g_random->randomUniqueID(); newInfo.distributor = distributorInterf; serverInfo->set( newInfo ); + if ( distributorInterf.isValid() ) { + distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); + } else { + distributorFailed = Never(); + } } }; @@ -1759,6 +1766,11 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) { } } + if ( req.distributorInterf.present() && !self->db.serverInfo->get().distributor.isValid() ) { + const DataDistributorInterface& di = req.distributorInterf.get(); + TraceEvent("ClusterController").detail("RegisterDataDistributor", di.id()).detail("Valid", di.isValid()); + self->db.setDistributor( di ); + } if( info == self->id_worker.end() ) { self->id_worker[w.locality.processId()] = WorkerInfo( workerAvailabilityWatch( w, newProcessClass, self ), req.reply, req.generation, w, req.initialClass, newProcessClass, newPriorityInfo ); checkOutstandingRequests( self ); @@ -2299,18 +2311,19 @@ ACTOR Future startDataDistributor( ClusterControllerDa ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterControllerFullInterface *clusterInterface ) { state Future newDistributor = Never(); - state Future distributorFailed = Never(); // wait for a while to see if existing data distributor will join. loop choose { when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id()); self->db.setDistributor( req.dataDistributor ); - distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); req.reply.send( Void() ); break; } when ( wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ) ) { break; } + when ( wait( self->db.serverInfo->onChange() ) ) { // Rejoins via worker registration + if ( self->db.serverInfo->get().distributor.isValid() ) break; + } } if ( !self->db.serverInfo->get().distributor.isValid() ) { @@ -2324,11 +2337,9 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo .detail("DataDistributorID", distributorInterf.id()) .detail("Valid", distributorInterf.isValid()); self->db.setDistributor( distributorInterf ); - distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); newDistributor = Never(); } - when ( wait( distributorFailed ) ) { - distributorFailed = Never(); + when ( wait( self->db.distributorFailed ) ) { TraceEvent("ClusterController", self->id) .detail("DataDistributorDied", self->db.serverInfo->get().distributor.id()); self->db.setDistributor( DataDistributorInterface() ); diff --git a/fdbserver/ClusterRecruitmentInterface.h b/fdbserver/ClusterRecruitmentInterface.h index 8e021be991..c3363027bf 100644 --- a/fdbserver/ClusterRecruitmentInterface.h +++ b/fdbserver/ClusterRecruitmentInterface.h @@ -169,15 +169,16 @@ struct RegisterWorkerRequest { ProcessClass processClass; ClusterControllerPriorityInfo priorityInfo; Generation generation; + Optional distributorInterf; ReplyPromise reply; RegisterWorkerRequest() : priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {} - RegisterWorkerRequest(WorkerInterface wi, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, Generation generation) : - wi(wi), initialClass(initialClass), processClass(processClass), priorityInfo(priorityInfo), generation(generation) {} + RegisterWorkerRequest(WorkerInterface wi, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, Generation generation, Optional ddInterf) : + wi(wi), initialClass(initialClass), processClass(processClass), priorityInfo(priorityInfo), generation(generation), distributorInterf(ddInterf) {} template void serialize( Ar& ar ) { - serializer(ar, wi, initialClass, processClass, priorityInfo, generation, reply); + serializer(ar, wi, initialClass, processClass, priorityInfo, generation, distributorInterf, reply); } }; diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 7fa0010c71..a27c2a28bb 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -135,7 +135,7 @@ ACTOR Future handleIOErrors( Future actor, IClosable* store, UID id, } } -ACTOR Future workerHandleErrors(FutureStream errors) { +ACTOR Future workerHandleErrors(FutureStream errors, Reference>> ddInterf) { loop choose { when( ErrorInfo _err = waitNext(errors) ) { ErrorInfo err = _err; @@ -151,6 +151,9 @@ ACTOR Future workerHandleErrors(FutureStream errors) { } endRole(err.role, err.id, "Error", ok, err.error); + if (err.role == Role::DATA_DISTRIBUTOR && ddInterf->get().present()) { + ddInterf->set(Optional()); + } if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout) throw err.error; } @@ -267,13 +270,20 @@ std::vector< DiskStore > getDiskStores( std::string folder ) { return result; } -ACTOR Future registrationClient( Reference>> ccInterface, WorkerInterface interf, Reference> asyncPriorityInfo, ProcessClass initialClass) { +ACTOR Future registrationClient( + Reference>> ccInterface, + WorkerInterface interf, + Reference> asyncPriorityInfo, + ProcessClass initialClass, + Reference>> ddInterf) { // Keeps the cluster controller (as it may be re-elected) informed that this worker exists // The cluster controller uses waitFailureClient to find out if we die, and returns from registrationReply (requiring us to re-register) + // The registration request piggybacks optional distributor interface if it exists. state Generation requestGeneration = 0; state ProcessClass processClass = initialClass; loop { - Future registrationReply = ccInterface->get().present() ? brokenPromiseToNever( ccInterface->get().get().registerWorker.getReply( RegisterWorkerRequest(interf, initialClass, processClass, asyncPriorityInfo->get(), requestGeneration++) ) ) : Never(); + RegisterWorkerRequest request(interf, initialClass, processClass, asyncPriorityInfo->get(), requestGeneration++, ddInterf->get()); + Future registrationReply = ccInterface->get().present() ? brokenPromiseToNever( ccInterface->get().get().registerWorker.getReply(request) ) : Never(); choose { when ( RegisterWorkerReply reply = wait( registrationReply )) { processClass = reply.processClass; @@ -522,7 +532,8 @@ ACTOR Future monitorServerDBInfo( Reference workerServer( Reference connFile, Reference>> ccInterface, LocalityData locality, Reference> asyncPriorityInfo, ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, Promise recoveredDiskFiles) { state PromiseStream< ErrorInfo > errors; - state Future handleErrors = workerHandleErrors( errors.getFuture() ); // Needs to be stopped last + state Reference>> ddInterf( new AsyncVar>() ); + state Future handleErrors = workerHandleErrors( errors.getFuture(), ddInterf ); // Needs to be stopped last state ActorCollection errorForwarders(false); state Future loggingTrigger = Void(); state double loggingDelay = SERVER_KNOBS->WORKER_LOGGING_INTERVAL; @@ -650,7 +661,7 @@ ACTOR Future workerServer( Reference connFile, Refe wait(waitForAll(recoveries)); recoveredDiskFiles.send(Void()); - errorForwarders.add( registrationClient( ccInterface, interf, asyncPriorityInfo, initialClass ) ); + errorForwarders.add( registrationClient( ccInterface, interf, asyncPriorityInfo, initialClass, ddInterf ) ); TraceEvent("RecoveriesComplete", interf.id()); @@ -716,11 +727,16 @@ ACTOR Future workerServer( Reference connFile, Refe } when ( InitializeDataDistributorRequest req = waitNext(interf.dataDistributor.getFuture()) ) { DataDistributorInterface recruited(locality); - TraceEvent("DataDistributorReceived", req.reqId).detail("DataDistributorId", recruited.id()); - startRole( Role::DATA_DISTRIBUTOR, recruited.id(), interf.id() ); + if ( ddInterf->get().present() ) { + recruited = ddInterf->get().get(); + } else { + startRole( Role::DATA_DISTRIBUTOR, recruited.id(), interf.id() ); - Future dataDistributorProcess = dataDistributor( recruited, dbInfo ); - errorForwarders.add( forwardError( errors, Role::DATA_DISTRIBUTOR, recruited.id(), dataDistributorProcess ) ); + Future dataDistributorProcess = dataDistributor( recruited, dbInfo ); + errorForwarders.add( forwardError( errors, Role::DATA_DISTRIBUTOR, recruited.id(), dataDistributorProcess ) ); + ddInterf->set(Optional(recruited)); + } + TraceEvent("DataDistributorReceived", req.reqId).detail("DataDistributorId", recruited.id()); req.reply.send(recruited); } when( InitializeTLogRequest req = waitNext(interf.tLog.getFuture()) ) { From 7897616164b6f22b649d90e33bbccb96e9e01fc6 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 29 Jan 2019 10:14:11 -0800 Subject: [PATCH 172/226] Fix wait failure bug on cluster controller The setDistributor() sets an AsyncVar and then runs waitFailureClient. This ordering is wrong because the AsyncVar::set triggers the other loop to run first, which will wait on Never(). The correct code should wait on the Future returned by the waitFailureClient. --- fdbserver/ClusterController.actor.cpp | 15 +++++++++------ fdbserver/DataDistribution.actor.cpp | 6 +++--- fdbserver/TLogServer.actor.cpp | 1 - fdbserver/worker.actor.cpp | 1 + 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 9e7798e283..45be11c892 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -116,12 +116,12 @@ public: ServerDBInfo newInfo = serverInfo->get(); newInfo.id = g_random->randomUniqueID(); newInfo.distributor = distributorInterf; - serverInfo->set( newInfo ); if ( distributorInterf.isValid() ) { distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); } else { distributorFailed = Never(); } + serverInfo->set( newInfo ); } }; @@ -2296,14 +2296,14 @@ ACTOR Future startDataDistributor( ClusterControllerDa state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); state InitializeDataDistributorRequest req; req.reqId = g_random->randomUniqueID(); - TraceEvent("DataDistributorReqID", req.reqId).detail("Recruit", data_distributor.worker.first.address()); + TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruit", data_distributor.worker.first.address()); ErrorOr distributor = wait( data_distributor.worker.first.dataDistributor.getReplyUnlessFailedFor(req, 1, 0) ); if (distributor.present()) { - TraceEvent("DataDistributorReqID", req.reqId).detail("Recruited", data_distributor.worker.first.address()); + TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruited", data_distributor.worker.first.address()); return distributor.get(); } - TraceEvent("DataDistributorReqID", req.reqId) + TraceEvent("ClusterController_DataDistributorReqID", req.reqId) .detail("RecruitFailed", data_distributor.worker.first.address()) .error(distributor.getError()); } @@ -2315,14 +2315,17 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo // wait for a while to see if existing data distributor will join. loop choose { when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id()); + TraceEvent("ClusterController_Rejoin", self->id).detail("DataDistributorID", req.dataDistributor.id()); self->db.setDistributor( req.dataDistributor ); req.reply.send( Void() ); break; } when ( wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ) ) { break; } when ( wait( self->db.serverInfo->onChange() ) ) { // Rejoins via worker registration - if ( self->db.serverInfo->get().distributor.isValid() ) break; + if ( self->db.serverInfo->get().distributor.isValid() ) { + TraceEvent("ClusterController_InfoChange", self->id).detail("DataDistributorID", self->db.serverInfo->get().distributor.id()); + break; + } } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 0d665501ce..ed8eeb355c 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3413,7 +3413,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference dataDistributor(DataDistributorInterface di, Reference updateLogSystem(TLogData* self, Reference logData, L ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, LocalityData locality ) { state TLogInterface recruited(self->dbgid, locality); - recruited.locality = locality; recruited.initEndpoints(); DUMPTOKEN( recruited.peekMessages ); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index a27c2a28bb..0d8af70e71 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -290,6 +290,7 @@ ACTOR Future registrationClient( asyncPriorityInfo->set( reply.priorityInfo ); } when ( wait( ccInterface->onChange() )) { } + when ( wait( ddInterf->onChange() ) ) {} } } } From 8c61de318f7c26fba3865167d5087db621568c69 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 30 Jan 2019 09:05:12 -0800 Subject: [PATCH 173/226] Fix segfault and no_more_servers errors --- fdbserver/ClusterController.actor.cpp | 51 ++++++++++++++++----------- fdbserver/DataDistribution.actor.cpp | 31 +++++++--------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 45be11c892..8af069062b 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1033,22 +1033,23 @@ public: ASSERT(interf.locality.processId().present()); idUsed[interf.locality.processId()]++; } - usedIds.set( idUsed ); + usedIds.swap( idUsed ); + usedIdsTrigger.trigger(); } void traceUsedIds() { - auto idUsed = usedIds.get(); - for (const auto& it : idUsed) { + for (const auto& it : usedIds) { TraceEvent ev("UsedID"); if (it.first.present()) ev.detail("Key", it.first.get().contents().toString()); - ev.detail("Value", idUsed[it.first]); + ev.detail("Value", usedIds[it.first]); ev.detail("Address", id_worker[it.first].interf.address().toString()); } } std::map< Optional>, WorkerInfo > id_worker; std::map< Optional>, ProcessClass > id_class; //contains the mapping from process id to process class from the database - AsyncVar>, int>> usedIds; // current used process IDs reported by master + std::map< Optional>, int> usedIds; // current used process IDs reported by master + AsyncTrigger usedIdsTrigger; Standalone lastProcessClasses; bool gotProcessClasses; bool gotFullyRecoveredConfig; @@ -1089,6 +1090,7 @@ public: ~ClusterControllerData() { ac.clear(false); id_worker.clear(); + usedIds.clear(); } }; @@ -1395,6 +1397,7 @@ ACTOR Future workerAvailabilityWatch( WorkerInterface worker, ProcessClass failedWorkerInfo.reply.send( RegisterWorkerReply(failedWorkerInfo.processClass, failedWorkerInfo.priorityInfo) ); } cluster->id_worker.erase( worker.locality.processId() ); + cluster->usedIds.erase( worker.locality.processId() ); cluster->updateWorkerList.set( worker.locality.processId(), Optional() ); return Void(); } @@ -2283,29 +2286,35 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel ACTOR Future startDataDistributor( ClusterControllerData *self ) { state Optional dcId = self->clusterControllerDcId; + state InitializeDataDistributorRequest req; while ( !self->clusterControllerProcessId.present() || !self->masterProcessId.present() ) { wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); } while (true) { - if ( self->usedIds.get().size() == 0 ) { - wait( self->usedIds.onChange() ); - } - self->traceUsedIds(); - std::map>, int> id_used = self->usedIds.get(); - state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); - state InitializeDataDistributorRequest req; - req.reqId = g_random->randomUniqueID(); - TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruit", data_distributor.worker.first.address()); + try { + if ( self->usedIds.size() == 0 ) { + wait( self->usedIdsTrigger.onTrigger() ); + } + self->traceUsedIds(); + std::map>, int> id_used = self->usedIds; + state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); + req.reqId = g_random->randomUniqueID(); + TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruit", data_distributor.worker.first.address()); - ErrorOr distributor = wait( data_distributor.worker.first.dataDistributor.getReplyUnlessFailedFor(req, 1, 0) ); - if (distributor.present()) { - TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruited", data_distributor.worker.first.address()); - return distributor.get(); + ErrorOr distributor = wait( data_distributor.worker.first.dataDistributor.getReplyUnlessFailedFor(req, 1, 0) ); + if (distributor.present()) { + TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruited", data_distributor.worker.first.address()); + return distributor.get(); + } + } + catch (Error& e) { + TraceEvent("ClusterController_DataDistributorReqID", req.reqId).error(e); + if ( e.code() != error_code_no_more_servers ) { + throw; + } + wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); } - TraceEvent("ClusterController_DataDistributorReqID", req.reqId) - .detail("RecruitFailed", data_distributor.worker.first.address()) - .error(distributor.getError()); } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index ed8eeb355c..9ac6369536 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3316,7 +3316,7 @@ ACTOR Future configurationMonitor( Reference self ) { loop { try { - TraceEvent("DataDistributor", self->ddId).detail("MonitorConfiguration", "Starting"); + TraceEvent("DataDistributor_MonitorConfigurationStart", self->ddId); tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr.setOption( FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE ); Standalone results = wait( tr.getRange( configKeys, CLIENT_KNOBS->TOO_MANY ) ); @@ -3325,7 +3325,7 @@ ACTOR Future configurationMonitor( Reference self ) { DatabaseConfiguration conf; conf.fromKeyValues( (VectorRef) results ); if ( conf != self->configuration->get() ) { - TraceEvent("DataDistributor", self->ddId).detail("UpdateConfiguration", conf.toString()); + TraceEvent("DataDistributor_UpdateConfiguration", self->ddId).detail("Config", conf.toString()); self->configuration->set( conf ); self->configurationTrigger.trigger(); } @@ -3371,7 +3371,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference collection = actorCollection( self->addActor.getFuture() ); state Future trigger = self->configurationTrigger.onTrigger(); - TraceEvent("NewDataDistributorID", di.id()); + TraceEvent("DataDistributor_Starting", di.id()); self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) ); self->addActor.send( configurationMonitor( self ) ); @@ -3385,7 +3385,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference> > ddStorageServerChanges; state double lastLimited = 0; state Future distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); @@ -3397,7 +3397,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferencedbInfo->get().clusterInterface.id() != lastClusterControllerID ) { // Rejoin the new cluster controller DataDistributorRejoinRequest req(di); - TraceEvent("DataDistributorRejoining", di.id()) + TraceEvent("DataDistributor_Rejoining", di.id()) .detail("OldClusterControllerID", lastClusterControllerID) .detail("ClusterControllerID", self->dbInfo->get().clusterInterface.id()); reply = self->dbInfo->get().clusterInterface.dataDistributorRejoin.getReply(req); @@ -3407,20 +3407,13 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferencedbInfo->onChange() ) ) { - const DataDistributorInterface& distributor = self->dbInfo->get().distributor; - TraceEvent("DataDistributor", di.id()).detail("IncomingID", distributor.id()).detail("Valid", distributor.isValid()); - if ( distributor.isValid() && distributor.id() != di.id() ) { - TraceEvent("DataDistributorExit", di.id()).detail("CurrentLiveID", distributor.id()); - // break; - } - } + when ( wait( brokenPromiseToNever(reply) ) ) { reply = Never(); } + when ( wait( self->dbInfo->onChange() ) ) {} when ( wait( trigger ) ) { - TraceEvent("DataDistributorRestart", di.id()) - .detail("ClusterControllerID", lastClusterControllerID); + TraceEvent("DataDistributor_Restart", di.id()) + .detail("ClusterControllerID", lastClusterControllerID) + .detail("Configuration", self->configuration->get().toString()); self->refreshDcIds(); - TraceEvent("DataDistributor", di.id()).detail("RestartDistribution", self->configuration->get().toString()); distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); self->addActor.send( distributor ); trigger = self->configurationTrigger.onTrigger(); @@ -3434,10 +3427,10 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference Date: Thu, 31 Jan 2019 09:01:59 -0800 Subject: [PATCH 174/226] Update fdbserver/DataDistributorInterface.h Co-Authored-By: jzhou77 --- fdbserver/DataDistributorInterface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/DataDistributorInterface.h b/fdbserver/DataDistributorInterface.h index ddd59cc701..0f20892eaf 100644 --- a/fdbserver/DataDistributorInterface.h +++ b/fdbserver/DataDistributorInterface.h @@ -31,7 +31,7 @@ struct DataDistributorInterface { bool valid; DataDistributorInterface() : valid(false) {} - explicit DataDistributorInterface(struct LocalityData l) : locality(l), valid(true) {} + explicit DataDistributorInterface(const struct LocalityData& l) : locality(l), valid(true) {} bool isValid() const { return valid; } UID id() const { return getRateInfo.getEndpoint().token; } From 5fb48083cd14d6c2151386507b6c7116227accb1 Mon Sep 17 00:00:00 2001 From: Evan Tschannen <36455792+etschannen@users.noreply.github.com> Date: Thu, 31 Jan 2019 09:02:18 -0800 Subject: [PATCH 175/226] Update fdbserver/ClusterController.actor.cpp Co-Authored-By: jzhou77 --- fdbserver/ClusterController.actor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 8af069062b..737d8fbfe7 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1090,7 +1090,6 @@ public: ~ClusterControllerData() { ac.clear(false); id_worker.clear(); - usedIds.clear(); } }; From 107b361396ab579d06429581825e47e70d2d8e70 Mon Sep 17 00:00:00 2001 From: Evan Tschannen <36455792+etschannen@users.noreply.github.com> Date: Thu, 31 Jan 2019 09:02:27 -0800 Subject: [PATCH 176/226] Update fdbclient/SystemData.h Co-Authored-By: jzhou77 --- fdbclient/SystemData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index d07a1789d7..eba7caa0ce 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -133,7 +133,7 @@ extern const KeyRef excludedServersVersionKey; // The value of this key shall b const AddressExclusion decodeExcludedServersKey( KeyRef const& key ); // where key.startsWith(excludedServersPrefix) std::string encodeExcludedServersKey( AddressExclusion const& ); -// "\xff/conf/version" := "" +// "\xff/conf/confChange" := "" // This is the key representing the version of the configuration, which should be updated for each // new configuration. extern const KeyRef configVersionKey; From abc3c01fb2a498883b5f18704c5bb7d67b76da13 Mon Sep 17 00:00:00 2001 From: Evan Tschannen <36455792+etschannen@users.noreply.github.com> Date: Thu, 31 Jan 2019 09:02:36 -0800 Subject: [PATCH 177/226] Update fdbclient/SystemData.cpp Co-Authored-By: jzhou77 --- fdbclient/SystemData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index d4288cb104..3c591392e4 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -381,7 +381,7 @@ std::string encodeExcludedServersKey( AddressExclusion const& addr ) { return excludedServersPrefix.toString() + as; } -const KeyRef configVersionKey = LiteralStringRef("\xff/conf/version"); +const KeyRef configVersionKey = LiteralStringRef("\xff/conf/confChange"); const KeyRangeRef workerListKeys( LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0") ); const KeyRef workerListPrefix = workerListKeys.begin; From b3d163311433d21107388671f45d3a25da6101d2 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 30 Jan 2019 19:53:15 -0800 Subject: [PATCH 178/226] Fix bugs of missing request The quite database can fail to send out requests and report timeout. This seems to be caused by reusing a request that uses the same ReplyPromise. Another bug is Proxy can wait for unneeded time for a dabase change, while the distributor is already known to itself. --- fdbserver/ClusterController.actor.cpp | 4 ++-- fdbserver/DataDistribution.actor.cpp | 2 +- fdbserver/MasterProxyServer.actor.cpp | 5 +++-- fdbserver/QuietDatabase.actor.cpp | 21 +++++++++------------ 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 737d8fbfe7..188ffc530c 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1042,7 +1042,7 @@ public: TraceEvent ev("UsedID"); if (it.first.present()) ev.detail("Key", it.first.get().contents().toString()); ev.detail("Value", usedIds[it.first]); - ev.detail("Address", id_worker[it.first].interf.address().toString()); + ev.detail("Locality", id_worker[it.first].interf.locality.toString()); } } @@ -2285,7 +2285,6 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel ACTOR Future startDataDistributor( ClusterControllerData *self ) { state Optional dcId = self->clusterControllerDcId; - state InitializeDataDistributorRequest req; while ( !self->clusterControllerProcessId.present() || !self->masterProcessId.present() ) { wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); } @@ -2298,6 +2297,7 @@ ACTOR Future startDataDistributor( ClusterControllerDa self->traceUsedIds(); std::map>, int> id_used = self->usedIds; state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); + state InitializeDataDistributorRequest req; req.reqId = g_random->randomUniqueID(); TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruit", data_distributor.worker.first.address()); diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 9ac6369536..ecf1e2b23d 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3170,7 +3170,7 @@ ACTOR Future dataDistribution( TraceEvent("DDInitGotInitialDD", myId).detail("B","").detail("E", "").detail("Src", "[no items]").detail("Dest", "[no items]").trackLatest("InitialDD"); } - if (initData->mode) break; + if (initData->mode) break; // mode may be set true by system operator using fdbcli TraceEvent("DataDistributionDisabled", myId); TraceEvent("MovingData", myId) diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 2b35a1085d..1718fb845d 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -93,6 +93,7 @@ ACTOR Future getRate(UID myID, Reference> db, int64 state Future reply = Never(); state int64_t lastTC = 0; + if (db->get().distributor.isValid()) nextRequestTimer = Void(); loop choose { when ( wait( db->onChange() ) ) { if ( db->get().distributor.isValid() ) { @@ -112,14 +113,14 @@ ACTOR Future getRate(UID myID, Reference> db, int64 when ( GetRateInfoReply rep = wait(reply) ) { reply = Never(); *outTransactionRate = rep.transactionRate; - TraceEvent("MasterProxyRate", myID).detail("Rate", rep.transactionRate).detail("Lease", rep.leaseDuration).detail("ReleasedTransactions", *inTransactionCount - lastTC); + // TraceEvent("MasterProxyRate", myID).detail("Rate", rep.transactionRate).detail("Lease", rep.leaseDuration).detail("ReleasedTransactions", *inTransactionCount - lastTC); lastTC = *inTransactionCount; leaseTimeout = delay(rep.leaseDuration); nextRequestTimer = delayJittered(rep.leaseDuration / 2); } when ( wait(leaseTimeout ) ) { *outTransactionRate = 0; - TraceEvent("MasterProxyRate", myID).detail("Rate", 0).detail("Lease", "Expired"); + // TraceEvent("MasterProxyRate", myID).detail("Rate", 0).detail("Lease", "Expired"); leaseTimeout = Never(); } } diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index 88f0357da8..d902d6c06a 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -30,8 +30,6 @@ #include "fdbclient/ManagementAPI.h" #include "flow/actorcompiler.h" // This must be the last #include. -using DistributorPair = std::pair; - ACTOR Future>> getWorkers( Reference> dbInfo, int flags = 0 ) { loop { choose { @@ -67,7 +65,7 @@ ACTOR Future getMasterWorker( Database cx, Reference getDataDistributorWorker( Database cx, Reference> dbInfo ) { +ACTOR Future getDataDistributorWorker( Database cx, Reference> dbInfo ) { TraceEvent("GetDataDistributorWorker").detail("Stage", "GettingWorkers"); loop { @@ -76,7 +74,7 @@ ACTOR Future getDataDistributorWorker( Database cx, Referenceget().distributor.address() ) { TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers").detail("DataDistributorId", dbInfo->get().distributor.id()).detail("WorkerId", workers[i].first.id()); - return std::make_pair(workers[i].first, dbInfo->get().distributor.id()); + return workers[i].first; } } @@ -106,8 +104,8 @@ ACTOR Future getDataInFlight( Database cx, WorkerInterface distributorW // Gets the number of bytes in flight from the data distributor. ACTOR Future getDataInFlight( Database cx, Reference> dbInfo ) { - DistributorPair distributorPair = wait( getDataDistributorWorker(cx, dbInfo) ); - int64_t dataInFlight = wait(getDataInFlight(cx, distributorPair.first)); + WorkerInterface distributorInterf = wait( getDataDistributorWorker(cx, dbInfo) ); + int64_t dataInFlight = wait(getDataInFlight(cx, distributorInterf)); return dataInFlight; } @@ -256,8 +254,8 @@ ACTOR Future getDataDistributionQueueSize( Database cx, WorkerInterface //Gets the size of the data distribution queue. If reportInFlight is true, then data in flight is considered part of the queue //Convenience method that first finds the master worker from a zookeeper interface ACTOR Future getDataDistributionQueueSize( Database cx, Reference> dbInfo, bool reportInFlight ) { - DistributorPair distributorPair = wait( getDataDistributorWorker(cx, dbInfo) ); - int64_t inQueue = wait( getDataDistributionQueueSize( cx, distributorPair.first, reportInFlight) ); + WorkerInterface distributorInterf = wait( getDataDistributorWorker(cx, dbInfo) ); + int64_t inQueue = wait( getDataDistributionQueueSize( cx, distributorInterf, reportInFlight) ); return inQueue; } @@ -335,10 +333,9 @@ ACTOR Future waitForQuietDatabase( Database cx, Referenceget().distributor.id(); + TraceEvent("QuietDatabaseGotDataDistributor", distributorUID).detail("Locality", distributorWorker.locality.toString()); state Future dataInFlight = getDataInFlight( cx, distributorWorker); state Future tLogQueueSize = getMaxTLogQueueSize( cx, dbInfo ); From 578473a974af2ab3d22cd5e2a8cacd2968ea0740 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 31 Jan 2019 10:10:41 -0800 Subject: [PATCH 179/226] Various review comments fixes --- fdbserver/ClusterController.actor.cpp | 16 +++++++--------- fdbserver/DataDistributorInterface.h | 7 ++++--- fdbserver/worker.actor.cpp | 11 +++++------ flow/genericactors.actor.h | 10 ++++++++++ 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 188ffc530c..4c6cfa9f5c 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1016,12 +1016,10 @@ public: if (distributorInterf.isValid()) { idUsed[distributorInterf.locality.processId()]++; } - for (const auto& tlog : req.logSystemConfig.tLogs) { - for (const auto& locality: tlog.tLogLocalities) { - if (locality.processId().present()) { - idUsed[locality.processId()]++; - } else { - TraceEvent("UsedID").detail("Tlog", locality.toString()); + for (const auto& tlogset : req.logSystemConfig.tLogs) { + for (const auto& tlog: tlogset.tLogs) { + if (tlog.present()) { + idUsed[tlog.interf().locality.processId()]++; } } } @@ -1703,14 +1701,14 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c dbInfo.recoveryCount = req.recoveryCount; } + self->updateUsedIds(req); + self->traceUsedIds(); + if( isChanged ) { dbInfo.id = g_random->randomUniqueID(); self->db.serverInfo->set( dbInfo ); } - self->updateUsedIds(req); - self->traceUsedIds(); - checkOutstandingRequests(self); } diff --git a/fdbserver/DataDistributorInterface.h b/fdbserver/DataDistributorInterface.h index 0f20892eaf..5223ac824b 100644 --- a/fdbserver/DataDistributorInterface.h +++ b/fdbserver/DataDistributorInterface.h @@ -18,8 +18,8 @@ * limitations under the License. */ -#ifndef FOUNDATIONDB_DATADISTRIBUTORINTERFACE_H -#define FOUNDATIONDB_DATADISTRIBUTORINTERFACE_H +#ifndef FDBSERVER_DATADISTRIBUTORINTERFACE_H +#define FDBSERVER_DATADISTRIBUTORINTERFACE_H #include "fdbrpc/fdbrpc.h" #include "fdbrpc/Locality.h" @@ -33,6 +33,7 @@ struct DataDistributorInterface { DataDistributorInterface() : valid(false) {} explicit DataDistributorInterface(const struct LocalityData& l) : locality(l), valid(true) {} + void initEndpoints() {} bool isValid() const { return valid; } UID id() const { return getRateInfo.getEndpoint().token; } NetworkAddress address() const { return getRateInfo.getEndpoint().address; } @@ -73,4 +74,4 @@ struct GetRateInfoReply { } }; -#endif //FOUNDATIONDB_DATADISTRIBUTORINTERFACE_H +#endif //FDBSERVER_DATADISTRIBUTORINTERFACE_H diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 0d8af70e71..ace99334f4 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -135,7 +135,7 @@ ACTOR Future handleIOErrors( Future actor, IClosable* store, UID id, } } -ACTOR Future workerHandleErrors(FutureStream errors, Reference>> ddInterf) { +ACTOR Future workerHandleErrors(FutureStream errors) { loop choose { when( ErrorInfo _err = waitNext(errors) ) { ErrorInfo err = _err; @@ -151,9 +151,6 @@ ACTOR Future workerHandleErrors(FutureStream errors, Reference< } endRole(err.role, err.id, "Error", ok, err.error); - if (err.role == Role::DATA_DISTRIBUTOR && ddInterf->get().present()) { - ddInterf->set(Optional()); - } if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout) throw err.error; } @@ -534,7 +531,7 @@ ACTOR Future workerServer( Reference connFile, Refe Reference> asyncPriorityInfo, ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, Promise recoveredDiskFiles) { state PromiseStream< ErrorInfo > errors; state Reference>> ddInterf( new AsyncVar>() ); - state Future handleErrors = workerHandleErrors( errors.getFuture(), ddInterf ); // Needs to be stopped last + state Future handleErrors = workerHandleErrors( errors.getFuture() ); // Needs to be stopped last state ActorCollection errorForwarders(false); state Future loggingTrigger = Void(); state double loggingDelay = SERVER_KNOBS->WORKER_LOGGING_INTERVAL; @@ -728,13 +725,15 @@ ACTOR Future workerServer( Reference connFile, Refe } when ( InitializeDataDistributorRequest req = waitNext(interf.dataDistributor.getFuture()) ) { DataDistributorInterface recruited(locality); + recruited.initEndpoints(); + if ( ddInterf->get().present() ) { recruited = ddInterf->get().get(); } else { startRole( Role::DATA_DISTRIBUTOR, recruited.id(), interf.id() ); Future dataDistributorProcess = dataDistributor( recruited, dbInfo ); - errorForwarders.add( forwardError( errors, Role::DATA_DISTRIBUTOR, recruited.id(), dataDistributorProcess ) ); + errorForwarders.add( forwardError( errors, Role::DATA_DISTRIBUTOR, recruited.id(), setWhenDoneOrError( dataDistributorProcess, ddInterf, Optional() ) ) ); ddInterf->set(Optional(recruited)); } TraceEvent("DataDistributorReceived", req.reqId).detail("DataDistributorId", recruited.id()); diff --git a/flow/genericactors.actor.h b/flow/genericactors.actor.h index 33cc3a2fbb..3ad083006b 100644 --- a/flow/genericactors.actor.h +++ b/flow/genericactors.actor.h @@ -775,6 +775,16 @@ Future setAfter( Reference> var, double time, T val ) { return Void(); } +ACTOR template +Future setWhenDoneOrError( Future condition, Reference> var, T val ) { + try { + wait( condition ); + } + catch ( Error& e ) {} + var->set( val ); + return Void(); +} + Future allTrue( const std::vector>& all ); Future anyTrue( std::vector>> const& input, Reference> const& output ); Future cancelOnly( std::vector> const& futures ); From 816f8b1ae1880e18c5a79d15722ad37962f69bd0 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 31 Jan 2019 10:51:25 -0800 Subject: [PATCH 180/226] Per review comments Add a knob for starting distributor delay. Move distributor failed variable to a local loop. --- fdbserver/ClusterController.actor.cpp | 56 +++++++++++++-------------- fdbserver/Knobs.cpp | 1 + fdbserver/Knobs.h | 1 + 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 4c6cfa9f5c..5080d3d613 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -89,7 +89,6 @@ public: struct DBInfo { Reference> clientInfo; Reference> serverInfo; - Future distributorFailed; ProcessIssuesMap clientsWithIssues, workersWithIssues; std::map incompatibleConnections; ClientVersionMap clientVersionMap; @@ -107,7 +106,6 @@ public: DBInfo() : masterRegistrationCount(0), recoveryStalled(false), forceRecovery(false), unfinishedRecoveries(0), logGenerations(0), clientInfo( new AsyncVar( ClientDBInfo() ) ), serverInfo( new AsyncVar( ServerDBInfo() ) ), - distributorFailed( Never() ), db( DatabaseContext::create( clientInfo, Future(), LocalityData(), true, TaskDefaultEndpoint, true ) ) // SOMEDAY: Locality! { } @@ -116,11 +114,6 @@ public: ServerDBInfo newInfo = serverInfo->get(); newInfo.id = g_random->randomUniqueID(); newInfo.distributor = distributorInterf; - if ( distributorInterf.isValid() ) { - distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); - } else { - distributorFailed = Never(); - } serverInfo->set( newInfo ); } }; @@ -2289,10 +2282,10 @@ ACTOR Future startDataDistributor( ClusterControllerDa while (true) { try { - if ( self->usedIds.size() == 0 ) { - wait( self->usedIdsTrigger.onTrigger() ); + while ( self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS ) { + wait( self->db.serverInfo->onChange() ); } - self->traceUsedIds(); + std::map>, int> id_used = self->usedIds; state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); state InitializeDataDistributorRequest req; @@ -2310,7 +2303,7 @@ ACTOR Future startDataDistributor( ClusterControllerDa if ( e.code() != error_code_no_more_servers ) { throw; } - wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); + wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); } } } @@ -2326,7 +2319,7 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo req.reply.send( Void() ); break; } - when ( wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ) ) { break; } + when ( wait( delay(SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY) ) ) { break; } when ( wait( self->db.serverInfo->onChange() ) ) { // Rejoins via worker registration if ( self->db.serverInfo->get().distributor.isValid() ) { TraceEvent("ClusterController_InfoChange", self->id).detail("DataDistributorID", self->db.serverInfo->get().distributor.id()); @@ -2340,23 +2333,30 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo } // Wait on failures and restart it. - loop choose { - when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { - TraceEvent("ClusterController", self->id) - .detail("DataDistributorID", distributorInterf.id()) - .detail("Valid", distributorInterf.isValid()); - self->db.setDistributor( distributorInterf ); - newDistributor = Never(); + loop { + state Future distributorFailed = Never(); + if ( self->db.serverInfo->get().distributor.isValid() ) { + distributorFailed = waitFailureClient( self->db.serverInfo->get().distributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); } - when ( wait( self->db.distributorFailed ) ) { - TraceEvent("ClusterController", self->id) - .detail("DataDistributorDied", self->db.serverInfo->get().distributor.id()); - self->db.setDistributor( DataDistributorInterface() ); - newDistributor = startDataDistributor( self ); - } - when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - // TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinIgnored", req.dataDistributor.id()); - req.reply.send( Void() ); + + choose { + when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { + TraceEvent("ClusterController", self->id) + .detail("DataDistributorID", distributorInterf.id()) + .detail("Valid", distributorInterf.isValid()); + self->db.setDistributor( distributorInterf ); + newDistributor = Never(); + } + when ( wait( distributorFailed ) ) { + TraceEvent("ClusterController", self->id) + .detail("DataDistributorDied", self->db.serverInfo->get().distributor.id()); + self->db.setDistributor( DataDistributorInterface() ); + newDistributor = startDataDistributor( self ); + } + when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { + // TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinIgnored", req.dataDistributor.id()); + req.reply.send( Void() ); + } } } } diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 8ddaeb28db..4698632078 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -296,6 +296,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 1.0 ); init( WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY, 5.0 ); init( ATTEMPT_RECRUITMENT_DELAY, 0.035 ); + init( WAIT_FOR_DISTRIBUTOR_JOIN_DELAY, 1.0 ); init( WORKER_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) WORKER_FAILURE_TIME = 10.0; init( CHECK_OUTSTANDING_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) CHECK_OUTSTANDING_INTERVAL = 0.001; init( VERSION_LAG_METRIC_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) VERSION_LAG_METRIC_INTERVAL = 10.0; diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 9b084d80f2..be5e7b626e 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -235,6 +235,7 @@ public: double WAIT_FOR_GOOD_RECRUITMENT_DELAY; double WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY; double ATTEMPT_RECRUITMENT_DELAY; + double WAIT_FOR_DISTRIBUTOR_JOIN_DELAY; double WORKER_FAILURE_TIME; double CHECK_OUTSTANDING_INTERVAL; double INCOMPATIBLE_PEERS_LOGGING_INTERVAL; From 21066b013a069cd14e13434ab5014aa475d9e0fc Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 31 Jan 2019 11:13:46 -0800 Subject: [PATCH 181/226] Remove DataDistributorRejoinRequest This is no longer needed, since worker registration piggybacks distributor interface now. --- fdbserver/ClusterController.actor.cpp | 10 ------ fdbserver/ClusterRecruitmentInterface.h | 17 +--------- fdbserver/DataDistribution.actor.cpp | 44 +++++++------------------ 3 files changed, 13 insertions(+), 58 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 5080d3d613..477fe054c9 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2313,12 +2313,6 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo // wait for a while to see if existing data distributor will join. loop choose { - when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - TraceEvent("ClusterController_Rejoin", self->id).detail("DataDistributorID", req.dataDistributor.id()); - self->db.setDistributor( req.dataDistributor ); - req.reply.send( Void() ); - break; - } when ( wait( delay(SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY) ) ) { break; } when ( wait( self->db.serverInfo->onChange() ) ) { // Rejoins via worker registration if ( self->db.serverInfo->get().distributor.isValid() ) { @@ -2353,10 +2347,6 @@ ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo self->db.setDistributor( DataDistributorInterface() ); newDistributor = startDataDistributor( self ); } - when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) { - // TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinIgnored", req.dataDistributor.id()); - req.reply.send( Void() ); - } } } } diff --git a/fdbserver/ClusterRecruitmentInterface.h b/fdbserver/ClusterRecruitmentInterface.h index c3363027bf..56dfde330d 100644 --- a/fdbserver/ClusterRecruitmentInterface.h +++ b/fdbserver/ClusterRecruitmentInterface.h @@ -43,7 +43,6 @@ struct ClusterControllerFullInterface { RequestStream< struct GetWorkersRequest > getWorkers; RequestStream< struct RegisterMasterRequest > registerMaster; RequestStream< struct GetServerDBInfoRequest > getServerDBInfo; - RequestStream< struct DataDistributorRejoinRequest > dataDistributorRejoin; // sent by dataDistributor (may or may not rebooted) to communicate with a new CC UID id() const { return clientInterface.id(); } bool operator == (ClusterControllerFullInterface const& r) const { return id() == r.id(); } @@ -58,13 +57,12 @@ struct ClusterControllerFullInterface { getWorkers.getEndpoint( TaskClusterController ); registerMaster.getEndpoint( TaskClusterController ); getServerDBInfo.getEndpoint( TaskClusterController ); - dataDistributorRejoin.getEndpoint( TaskClusterController ); } template void serialize( Ar& ar ) { ASSERT( ar.protocolVersion() >= 0x0FDB00A200040001LL ); - serializer(ar, clientInterface, recruitFromConfiguration, recruitRemoteFromConfiguration, recruitStorage, registerWorker, getWorkers, registerMaster, getServerDBInfo, dataDistributorRejoin); + serializer(ar, clientInterface, recruitFromConfiguration, recruitRemoteFromConfiguration, recruitStorage, registerWorker, getWorkers, registerMaster, getServerDBInfo); } }; @@ -233,19 +231,6 @@ struct GetServerDBInfoRequest { } }; -struct DataDistributorRejoinRequest { - DataDistributorInterface dataDistributor; - ReplyPromise reply; - - DataDistributorRejoinRequest() { } - explicit DataDistributorRejoinRequest(DataDistributorInterface di) : dataDistributor(di) {} - - template - void serialize(Ar& ar) { - serializer(ar, dataDistributor, reply); - } -}; - #include "fdbserver/ServerDBInfo.h" // include order hack #endif diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index ecf1e2b23d..fca508380d 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3369,16 +3369,14 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference> configuration( new AsyncVar(DatabaseConfiguration()) ); state Reference self( new DataDistributorData(db, configuration, di.id()) ); state Future collection = actorCollection( self->addActor.getFuture() ); - state Future trigger = self->configurationTrigger.onTrigger(); TraceEvent("DataDistributor_Starting", di.id()); self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) ); self->addActor.send( configurationMonitor( self ) ); loop choose { - when ( wait( trigger ) ) { + when ( wait( self->configurationTrigger.onTrigger() ) ) { self->refreshDcIds(); - trigger = self->configurationTrigger.onTrigger(); break; } when ( wait(self->dbInfo->onChange()) ) {} @@ -3392,36 +3390,18 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceaddActor.send( distributor ); self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id(), &normalRateKeeperErrors() ) ); - state Future reply; - loop { - if ( self->dbInfo->get().clusterInterface.id() != lastClusterControllerID ) { - // Rejoin the new cluster controller - DataDistributorRejoinRequest req(di); - TraceEvent("DataDistributor_Rejoining", di.id()) - .detail("OldClusterControllerID", lastClusterControllerID) - .detail("ClusterControllerID", self->dbInfo->get().clusterInterface.id()); - reply = self->dbInfo->get().clusterInterface.dataDistributorRejoin.getReply(req); - lastClusterControllerID = self->dbInfo->get().clusterInterface.id(); - } else { - reply = Never(); + loop choose { + when ( wait( self->configurationTrigger.onTrigger() ) ) { + TraceEvent("DataDistributor_Restart", di.id()) + .detail("ClusterControllerID", lastClusterControllerID) + .detail("Configuration", self->configuration->get().toString()); + self->refreshDcIds(); + distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); + self->addActor.send( distributor ); } - - choose { - when ( wait( brokenPromiseToNever(reply) ) ) { reply = Never(); } - when ( wait( self->dbInfo->onChange() ) ) {} - when ( wait( trigger ) ) { - TraceEvent("DataDistributor_Restart", di.id()) - .detail("ClusterControllerID", lastClusterControllerID) - .detail("Configuration", self->configuration->get().toString()); - self->refreshDcIds(); - distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); - self->addActor.send( distributor ); - trigger = self->configurationTrigger.onTrigger(); - } - when ( wait( collection ) ) { - ASSERT(false); - throw internal_error(); - } + when ( wait( collection ) ) { + ASSERT(false); + throw internal_error(); } } } From 62c67a50e529da63abb370b814682c90425ce040 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 31 Jan 2019 22:23:49 -0800 Subject: [PATCH 182/226] Fix segfault error The usedIds is updated by master registration request, which populates the usedIds map. However, this request may contain processes that cluster controller is not aware, i.e., not in id_worker map. This is ok until I added tracing the usedIds, which silently insert an empty entry into id_worker map for the unknown process. This new entry can cause crashing failure when trying to access its LocalityData. Remove AsyncTrigger for usedIds, and change to serverInfo->onChange. Use const & to avoid unnecessary copies in WorkerInterface's LocalityData and getExtraTLogEligibleMachines(). --- fdbserver/ApplyMetadataMutation.h | 2 +- fdbserver/ClusterController.actor.cpp | 39 +++++++++++++++++---------- fdbserver/Status.actor.cpp | 2 +- fdbserver/WorkerInterface.h | 2 +- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/fdbserver/ApplyMetadataMutation.h b/fdbserver/ApplyMetadataMutation.h index 65c3817a8f..85f438749b 100644 --- a/fdbserver/ApplyMetadataMutation.h +++ b/fdbserver/ApplyMetadataMutation.h @@ -149,7 +149,7 @@ static void applyMetadataMutations(UID const& dbgid, Arena &arena, VectorRef(m.param2) != txnStateStore->readValue(m.param1).get().castTo()) { // FIXME: Make this check more specific, here or by reading configuration whenever there is a change - if(!m.param1.startsWith( excludedServersPrefix ) && m.param1 != excludedServersVersionKey) { + if(!m.param1.startsWith( excludedServersPrefix ) && m.param1 != excludedServersVersionKey && m.param1 != configVersionKey) { auto t = txnStateStore->readValue(m.param1).get(); TraceEvent("MutationRequiresRestart", dbgid).detail("M", m.toString()).detail("PrevValue", t.present() ? printable(t.get()) : "(none)").detail("ToCommit", toCommit!=NULL); if(confChange) *confChange = true; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 477fe054c9..44bd2a919c 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -517,6 +517,9 @@ public: id_used[masterProcessId]++; id_used[clusterControllerProcessId]++; + if (db.serverInfo->get().distributor.isValid()) { + id_used[db.serverInfo->get().distributor.locality.processId()]++; + } std::set> remoteDC; remoteDC.insert(req.dcId); @@ -556,6 +559,9 @@ public: std::map< Optional>, int> id_used; id_used[masterProcessId]++; id_used[clusterControllerProcessId]++; + if (db.serverInfo->get().distributor.isValid()) { + id_used[db.serverInfo->get().distributor.locality.processId()]++; + } ASSERT(dcId.present()); @@ -685,6 +691,9 @@ public: std::map< Optional>, int> id_used; id_used[masterProcessId]++; id_used[clusterControllerProcessId]++; + if (db.serverInfo->get().distributor.isValid()) { + id_used[db.serverInfo->get().distributor.locality.processId()]++; + } auto tlogs = getWorkersForTlogs( req.configuration, req.configuration.tLogReplicationFactor, req.configuration.getDesiredLogs(), req.configuration.tLogPolicy, id_used ); for(int i = 0; i < tlogs.size(); i++) { @@ -907,6 +916,9 @@ public: std::map< Optional>, int> id_used; id_used[clusterControllerProcessId]++; + if (db.serverInfo->get().distributor.isValid()) { + id_used[db.serverInfo->get().distributor.locality.processId()]++; + } WorkerFitnessInfo mworker = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db.config, id_used, true); if ( oldMasterFit < mworker.fitness ) @@ -1005,9 +1017,8 @@ public: std::map>, int> idUsed; idUsed[clusterControllerProcessId]++; idUsed[masterProcessId]++; - const auto& distributorInterf = dbInfo.distributor; - if (distributorInterf.isValid()) { - idUsed[distributorInterf.locality.processId()]++; + if (dbInfo.distributor.isValid()) { + idUsed[dbInfo.distributor.locality.processId()]++; } for (const auto& tlogset : req.logSystemConfig.tLogs) { for (const auto& tlog: tlogset.tLogs) { @@ -1025,7 +1036,6 @@ public: idUsed[interf.locality.processId()]++; } usedIds.swap( idUsed ); - usedIdsTrigger.trigger(); } void traceUsedIds() { @@ -1033,14 +1043,18 @@ public: TraceEvent ev("UsedID"); if (it.first.present()) ev.detail("Key", it.first.get().contents().toString()); ev.detail("Value", usedIds[it.first]); - ev.detail("Locality", id_worker[it.first].interf.locality.toString()); + if (id_worker.find(it.first) != id_worker.end()) { + ev.detail("Locality", id_worker[it.first].interf.locality.toString()); + ev.detail("Addr", id_worker[it.first].interf.address().toString()); + } else { + ev.detail("Locality", "Not found!"); + } } } std::map< Optional>, WorkerInfo > id_worker; std::map< Optional>, ProcessClass > id_class; //contains the mapping from process id to process class from the database std::map< Optional>, int> usedIds; // current used process IDs reported by master - AsyncTrigger usedIdsTrigger; Standalone lastProcessClasses; bool gotProcessClasses; bool gotFullyRecoveredConfig; @@ -1084,14 +1098,6 @@ public: } }; -template -vector values( std::map const& map ) { - vector t; - for(auto i = map.begin(); i!=map.end(); ++i) - t.push_back(i->second); - return t; -} - ACTOR Future clusterWatchDatabase( ClusterControllerData* cluster, ClusterControllerData::DBInfo* db ) { state MasterInterface iMaster; @@ -1113,6 +1119,9 @@ ACTOR Future clusterWatchDatabase( ClusterControllerData* cluster, Cluster //This should always be possible, because we can recruit the master on the same process as the cluster controller. std::map< Optional>, int> id_used; id_used[cluster->clusterControllerProcessId]++; + if (cluster->db.serverInfo->get().distributor.isValid()) { + id_used[cluster->db.serverInfo->get().distributor.locality.processId()]++; + } state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used); if( ( masterWorker.worker.second.machineClassFitness( ProcessClass::Master ) > SERVER_KNOBS->EXPECTED_MASTER_FITNESS || masterWorker.worker.first.locality.processId() == cluster->clusterControllerProcessId ) && now() - cluster->startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY ) { @@ -1694,6 +1703,8 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c dbInfo.recoveryCount = req.recoveryCount; } + // The master may tell us processes that we are not aware of. Thus, when + // using usedIds, proceed with caution as id_workers may not have the process. self->updateUsedIds(req); self->traceUsedIds(); diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 8460459962..59ff7745be 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -1344,7 +1344,7 @@ ACTOR static Future>> getProxie return results; } -static int getExtraTLogEligibleMachines(vector> workers, DatabaseConfiguration configuration) { +static int getExtraTLogEligibleMachines(const vector>& workers, const DatabaseConfiguration& configuration) { std::set allMachines; std::map> dcId_machine; for(auto const& worker : workers) { diff --git a/fdbserver/WorkerInterface.h b/fdbserver/WorkerInterface.h index 0c9998f20a..da3e836cd1 100644 --- a/fdbserver/WorkerInterface.h +++ b/fdbserver/WorkerInterface.h @@ -60,7 +60,7 @@ struct WorkerInterface { NetworkAddress address() const { return tLog.getEndpoint().address; } WorkerInterface() {} - WorkerInterface( LocalityData locality ) : locality( locality ) {} + WorkerInterface( const LocalityData& locality ) : locality( locality ) {} template void serialize(Ar& ar) { From 5deeec29e3c3807d07360eeb84948516ec5df1dc Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 4 Feb 2019 16:58:51 -0800 Subject: [PATCH 183/226] Fix a bug where team is not rebuild after storage failure When two failures happened to a team, one of the server recovered. The current logic skips for building a new team, which is wrong. --- fdbserver/DataDistribution.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index fca508380d..c805722f52 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -2458,7 +2458,7 @@ ACTOR Future storageServerFailureTracker( wait(delay(SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME - elapsed)); } status->isFailed = !status->isFailed; - if(!status->isFailed && !server->teams.size()) { + if( !status->isFailed && (!server->teams.size() || self->zeroHealthyTeams->get()) ) { self->doBuildTeams = true; } From 961d71538e0b056da5e98376894b0bb1e207e696 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 6 Feb 2019 16:42:22 -0800 Subject: [PATCH 184/226] A follow-on fix to ensure build team for zero teams --- fdbserver/DataDistribution.actor.cpp | 34 +++++++++++++--------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index c805722f52..f2835003ce 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -568,6 +568,7 @@ struct DDTeamCollection : ReferenceCounted { DatabaseConfiguration configuration; bool doBuildTeams; + Future checkBuildTeam; Future teamBuilder; AsyncTrigger restartTeamBuilder; @@ -1667,6 +1668,8 @@ struct DDTeamCollection : ReferenceCounted { for (auto& server : serverTeam) { score += server_info[server]->teams.size(); } + TraceEvent("BuildMachineTeams").detail("Score", score).detail("BestScore", bestScore) + .detail("TeamSize", serverTeam.size()).detail("StorageTeamSize", configuration.storageTeamSize); if (score < bestScore) { bestScore = score; bestServerTeam = serverTeam; @@ -1752,22 +1755,11 @@ struct DDTeamCollection : ReferenceCounted { state int teamsToBuild = std::min(desiredTeams - teamCount, maxTeams - totalTeamCount); if (teamsToBuild > 0) { - std::set desiredServerSet; - for (auto i = self->server_info.begin(); i != self->server_info.end(); ++i) { - if (!self->server_status.get(i->first).isUnhealthy()) { - desiredServerSet.insert(i->second->id); - } - } - - vector desiredServerVector( desiredServerSet.begin(), desiredServerSet.end() ); - - state vector> builtTeams; - int addedTeams = self->addTeamsBestOf(teamsToBuild); if (addedTeams <= 0 && self->teams.size() == 0) { TraceEvent(SevWarn, "NoTeamAfterBuildTeam") - .detail("TeamNum", self->teams.size()) - .detail("Debug", "Check information below"); + .detail("TeamNum", self->teams.size()) + .detail("Debug", "Check information below"); // Debug: set true for traceAllInfo() to print out more information self->traceAllInfo(); } @@ -1793,12 +1785,12 @@ struct DDTeamCollection : ReferenceCounted { desc += i->first.shortString() + " (" + i->second->lastKnownInterface.toString() + "), "; } } - vector desiredServerVector( desiredServerSet.begin(), desiredServerSet.end() ); + checkBuildTeam = DDTeamCollection::checkBuildTeams(this); TraceEvent(SevWarn, "NoHealthyTeams", distributorId) .detail("CurrentTeamCount", teams.size()) .detail("ServerCount", server_info.size()) - .detail("NonFailedServerCount", desiredServerVector.size()); + .detail("NonFailedServerCount", desiredServerSet.size()); } bool shouldHandleServer(const StorageServerInterface &newServer) { @@ -2130,8 +2122,9 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te if( self->healthyTeamCount == 0 ) { TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId) - .detail("SignallingTeam", team->getDesc()) - .detail("Primary", self->primary); + .detail("SignallingTeam", team->getDesc()) + .detail("Primary", self->primary); + self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); } TraceEvent("TeamHealthDifference", self->distributorId) @@ -2246,6 +2239,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te if( self->healthyTeamCount == 0 ) { TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId).detail("SignallingTeam", team->getDesc()); self->zeroHealthyTeams->set(true); + self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); } } throw; @@ -2460,6 +2454,7 @@ ACTOR Future storageServerFailureTracker( status->isFailed = !status->isFailed; if( !status->isFailed && (!server->teams.size() || self->zeroHealthyTeams->get()) ) { self->doBuildTeams = true; + self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); } TraceEvent("StatusMapChange", self->distributorId).detail("ServerID", interf.id()).detail("Status", status->toString()) @@ -2580,8 +2575,11 @@ ACTOR Future storageServerTracker( if(hasWrongStoreTypeOrDC) self->restartRecruiting.trigger(); - if( lastIsUnhealthy && !status.isUnhealthy() && !server->teams.size() ) + TraceEvent("StatusMapChange", distributorId).detail("Status", status.toString()).detail("LastIsUnhealthy", lastIsUnhealthy); + if ( lastIsUnhealthy && !status.isUnhealthy() && (!server->teams.size() || self->zeroHealthyTeams->get()) ) { self->doBuildTeams = true; + self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); + } lastIsUnhealthy = status.isUnhealthy(); choose { From 07dab561337e2645dbed49f292907de270e1613a Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 7 Feb 2019 15:31:03 -0800 Subject: [PATCH 185/226] Fix a data movement stuck bug When moving keys to a team, if one of the server in the target team died, then the move can become stuck. This is because the DDTeamCollection waits for all the data movement of the failed server to be completed. However, in this case, because the movement has not finished yet, checking the database tells us there is no key assocated with this server and it is safe to go ahead. In reality, only the in-memory structure knows there is pending movement, i.e., unfinished move causes some keys to be attributed to the failed server. Thus, the server can't be removed yet. Fix by adding a check with in-memory structure in waitForAllDataRemoved(). Use const& to optimize a few function parameters. --- fdbserver/DataDistribution.actor.cpp | 53 ++++++++++++----------- fdbserver/DataDistributionQueue.actor.cpp | 4 +- fdbserver/QuietDatabase.actor.cpp | 2 +- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index f2835003ce..c6c0c8071e 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -370,30 +370,6 @@ struct ServerStatus { }; typedef AsyncMap ServerStatusMap; -ACTOR Future waitForAllDataRemoved( Database cx, UID serverID, Version addedVersion ) { - state Transaction tr(cx); - loop { - try { - tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - Version ver = wait( tr.getReadVersion() ); - - //we cannot remove a server immediately after adding it, because a perfectly timed master recovery could cause us to not store the mutations sent to the short lived storage server. - if(ver > addedVersion + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) { - bool canRemove = wait( canRemoveStorageServer( &tr, serverID ) ); - if (canRemove) { - return Void(); - } - } - - // Wait for any change to the serverKeys for this server - wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskDataDistribution) ); - tr.reset(); - } catch (Error& e) { - wait( tr.onError(e) ); - } - } -} - // Read keyservers, return unique set of teams ACTOR Future> getInitialDataDistribution( Database cx, UID distributorId, MoveKeysLock moveKeysLock, std::vector> remoteDcIds ) { state Reference result = Reference(new InitialDataDistribution); @@ -2162,7 +2138,8 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te } if(logTeamEvents) { - TraceEvent("TeamPriorityChange", self->distributorId).detail("Priority", team->getPriority()); + TraceEvent("TeamPriorityChange", self->distributorId).detail("Priority", team->getPriority()) + .detail("Info", team->getDesc()); } lastZeroHealthy = self->zeroHealthyTeams->get(); //set this again in case it changed from this teams health changing @@ -2415,6 +2392,30 @@ ACTOR Future removeBadTeams(DDTeamCollection* self) { return Void(); } +ACTOR Future waitForAllDataRemoved( Database cx, UID serverID, Version addedVersion, DDTeamCollection* teams ) { + state Transaction tr(cx); + loop { + try { + tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + Version ver = wait( tr.getReadVersion() ); + + //we cannot remove a server immediately after adding it, because a perfectly timed master recovery could cause us to not store the mutations sent to the short lived storage server. + if(ver > addedVersion + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) { + bool canRemove = wait( canRemoveStorageServer( &tr, serverID ) ); + if (canRemove && teams->shardsAffectedByTeamFailure->getNumberOfShards(serverID) == 0) { + return Void(); + } + } + + // Wait for any change to the serverKeys for this server + wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskDataDistribution) ); + tr.reset(); + } catch (Error& e) { + wait( tr.onError(e) ); + } + } +} + ACTOR Future storageServerFailureTracker( DDTeamCollection* self, TCServerInfo *server, @@ -2460,7 +2461,7 @@ ACTOR Future storageServerFailureTracker( TraceEvent("StatusMapChange", self->distributorId).detail("ServerID", interf.id()).detail("Status", status->toString()) .detail("Available", IFailureMonitor::failureMonitor().getState(interf.waitFailure.getEndpoint()).isAvailable()); } - when ( wait( status->isUnhealthy() ? waitForAllDataRemoved(cx, interf.id(), addedVersion) : Never() ) ) { break; } + when ( wait( status->isUnhealthy() ? waitForAllDataRemoved(cx, interf.id(), addedVersion, self) : Never() ) ) { break; } } } diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp index 9a5ff2e0fc..5a76a0dcae 100644 --- a/fdbserver/DataDistributionQueue.actor.cpp +++ b/fdbserver/DataDistributionQueue.actor.cpp @@ -684,7 +684,7 @@ struct DDQueueData { .detail("AffectedRanges", affectedQueuedItems.size()); */ } - void completeSourceFetch( RelocateData results ) { + void completeSourceFetch( const RelocateData& results ) { ASSERT( fetchingSourcesQueue.count( results ) ); //logRelocation( results, "GotSourceServers" ); @@ -696,7 +696,7 @@ struct DDQueueData { } } - void logRelocation( RelocateData rd, const char *title ) { + void logRelocation( const RelocateData& rd, const char *title ) { std::string busyString; for(int i = 0; i < rd.src.size() && i < teamSize * 2; i++) busyString += describe(rd.src[i]) + " - (" + busymap[ rd.src[i] ].toString() + "); "; diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index d902d6c06a..44f346d119 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -110,7 +110,7 @@ ACTOR Future getDataInFlight( Database cx, Reference Date: Fri, 8 Feb 2019 14:57:00 -0800 Subject: [PATCH 186/226] Fix segfault during configuration change This bug was introduced in cee23ee3. During a configuration change, the data distributor was restarted, which destroys previous DDTeamCollection and cancels all previous teamTracker(). In this case, even though the healthy team count reaches 0, there is no need to try to rebuild teams. The bug is triggered when trying rebuilding teams, DDTeamCollection is already destroyed. --- fdbserver/DataDistribution.actor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index c6c0c8071e..11a4abd0eb 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -2216,7 +2216,6 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te if( self->healthyTeamCount == 0 ) { TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId).detail("SignallingTeam", team->getDesc()); self->zeroHealthyTeams->set(true); - self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); } } throw; From 0e4791219280d96003b56b72584dd0e100d3cae6 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sun, 10 Feb 2019 10:19:36 -0800 Subject: [PATCH 187/226] Fix an out-of-memory error --- fdbserver/ClusterController.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 44bd2a919c..5629f1cbc0 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2314,8 +2314,8 @@ ACTOR Future startDataDistributor( ClusterControllerDa if ( e.code() != error_code_no_more_servers ) { throw; } - wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); } + wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); } } From a7d1111a10223594eccde2fe4d12caca8216c4ef Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sun, 10 Feb 2019 08:58:56 -0800 Subject: [PATCH 188/226] Make servers and serverIDs private for TCTeamInfo Make both accessible through public member functions instead. --- fdbserver/DataDistribution.actor.cpp | 62 +++++++++++++++------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 11a4abd0eb..69f822953d 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -179,9 +179,11 @@ public: }; class TCTeamInfo : public ReferenceCounted, public IDataDistributionTeam { -public: +private: vector< Reference > servers; vector serverIDs; + +public: Reference machineTeam; Future tracker; bool healthy; @@ -206,8 +208,12 @@ public: v.push_back(servers[i]->lastKnownInterface); return v; } - virtual int size() { return servers.size(); } + virtual int size() { + ASSERT(servers.size() == serverIDs.size()); + return servers.size(); + } virtual vector const& getServerIDs() { return serverIDs; } + const vector>& getServers() { return servers; } virtual std::string getServerIDsStr() { std::stringstream ss; @@ -748,17 +754,17 @@ struct DDTeamCollection : ReferenceCounted { for( int j = 0; j < teamList.size(); j++ ) { if( teamList[j]->isHealthy() && (!req.preferLowerUtilization || teamList[j]->hasHealthyFreeSpace())) { int sharedMembers = 0; - for( int k = 0; k < teamList[j]->serverIDs.size(); k++ ) - if( sources.count( teamList[j]->serverIDs[k] ) ) + for( const UID& id : teamList[j]->getServerIDs() ) + if( sources.count( id ) ) sharedMembers++; - if( !foundExact && sharedMembers == teamList[j]->serverIDs.size() ) { + if( !foundExact && sharedMembers == teamList[j]->size() ) { foundExact = true; bestOption = Optional>(); similarTeams.clear(); } - if( (sharedMembers == teamList[j]->serverIDs.size()) || (!foundExact && req.wantsTrueBest) ) { + if( (sharedMembers == teamList[j]->size()) || (!foundExact && req.wantsTrueBest) ) { int64_t loadBytes = SOME_SHARED * teamList[j]->getLoadBytes(true, req.inflightPenalty); if( !bestOption.present() || ( req.preferLowerUtilization && loadBytes < bestLoadBytes ) || ( !req.preferLowerUtilization && loadBytes > bestLoadBytes ) ) { bestLoadBytes = loadBytes; @@ -841,15 +847,16 @@ struct DDTeamCollection : ReferenceCounted { auto& teamList = self->server_info[ req.completeSources[i] ]->teams; for( int j = 0; j < teamList.size(); j++ ) { bool found = true; - for( int k = 0; k < teamList[j]->serverIDs.size(); k++ ) { - if( !completeSources.count( teamList[j]->serverIDs[k] ) ) { + auto serverIDs = teamList[j]->getServerIDs(); + for( int k = 0; k < teamList[j]->size(); k++ ) { + if( !completeSources.count( serverIDs[k] ) ) { found = false; break; } } - if(found && teamList[j]->serverIDs.size() > bestSize) { + if(found && teamList[j]->size() > bestSize) { bestOption = teamList[j]; - bestSize = teamList[j]->serverIDs.size(); + bestSize = teamList[j]->size(); } } break; @@ -882,7 +889,7 @@ struct DDTeamCollection : ReferenceCounted { for(; idx < self->badTeams.size(); idx++ ) { servers.clear(); - for(auto server : self->badTeams[idx]->servers) { + for(const auto& server : self->badTeams[idx]->getServers()) { if(server->inDesiredDC && !self->server_status.get(server->id).isUnhealthy()) { servers.push_back(server); } @@ -1067,7 +1074,7 @@ struct DDTeamCollection : ReferenceCounted { void addTeam(const vector>& newTeamServers, bool isInitialTeam) { Reference teamInfo(new TCTeamInfo(newTeamServers)); - bool badTeam = !satisfiesPolicy(teamInfo->servers) || teamInfo->servers.size() != configuration.storageTeamSize; + bool badTeam = !satisfiesPolicy(teamInfo->getServers()) || teamInfo->size() != configuration.storageTeamSize; teamInfo->tracker = teamTracker(this, teamInfo, badTeam); // ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization @@ -1186,7 +1193,7 @@ struct DDTeamCollection : ReferenceCounted { TraceEvent("ServerTeamInfo") .detail("TeamIndex", i++) .detail("Healthy", team->isHealthy()) - .detail("ServerNumber", team->serverIDs.size()) + .detail("ServerNumber", team->size()) .detail("MemberIDs", team->getServerIDsStr()); } } @@ -1506,15 +1513,15 @@ struct DDTeamCollection : ReferenceCounted { // Check if it is true bool isOnSameMachineTeam(Reference& team) { std::vector> machineIDs; - for (auto& server : team->servers) { + for (const auto& server : team->getServers()) { if (!server->machine.isValid()) return false; machineIDs.push_back(server->machine->machineID); } std::sort(machineIDs.begin(), machineIDs.end()); int numExistance = 0; - for (auto& server : team->servers) { - for (auto& candidateMachineTeam : server->machine->machineTeams) { + for (const auto& server : team->getServers()) { + for (const auto& candidateMachineTeam : server->machine->machineTeams) { std::sort(candidateMachineTeam->machineIDs.begin(), candidateMachineTeam->machineIDs.end()); if (machineIDs == candidateMachineTeam->machineIDs) { numExistance++; @@ -1522,7 +1529,7 @@ struct DDTeamCollection : ReferenceCounted { } } } - return (numExistance == team->servers.size()); + return (numExistance == team->size()); } // Sanity check the property of teams in unit test @@ -1805,7 +1812,7 @@ struct DDTeamCollection : ReferenceCounted { } } - for(auto& server : team->servers) { + for(const auto& server : team->getServers()) { for(int t = 0; tteams.size(); t++) { if( server->teams[t] == team ) { ASSERT(found); @@ -1847,7 +1854,7 @@ struct DDTeamCollection : ReferenceCounted { // Check if the serverTeam belongs to a machine team; If not, create the machine team Reference checkAndCreateMachineTeam(Reference serverTeam) { std::vector> machineIDs; - for (auto& server : serverTeam->servers) { + for (auto& server : serverTeam->getServers()) { Reference machine = server->machine; machineIDs.push_back(machine->machineID); } @@ -1999,7 +2006,7 @@ struct DDTeamCollection : ReferenceCounted { // Track a team and issue RelocateShards when the level of degradation changes ACTOR Future teamTracker( DDTeamCollection* self, Reference team, bool badTeam ) { - state int lastServersLeft = team->getServerIDs().size(); + state int lastServersLeft = team->size(); state bool lastAnyUndesired = false; state bool logTeamEvents = g_network->isSimulated() || !badTeam; state bool lastReady = false; @@ -2022,14 +2029,13 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te .detail("IsReady", self->initialFailureReactionDelay.isReady()); // Check if the number of degraded machines has changed state vector> change; - auto servers = team->getServerIDs(); bool anyUndesired = false; bool anyWrongConfiguration = false; int serversLeft = 0; - for(auto s = servers.begin(); s != servers.end(); ++s) { - change.push_back( self->server_status.onChange( *s ) ); - auto& status = self->server_status.get(*s); + for (const UID& uid : team->getServerIDs()) { + change.push_back( self->server_status.onChange( uid ) ); + auto& status = self->server_status.get(uid); if (!status.isFailed) { serversLeft++; } @@ -2164,7 +2170,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te bool found = false; for( int k = 0; k < info->teams.size(); k++ ) { - if( info->teams[k]->serverIDs == t.servers ) { + if( info->teams[k]->getServerIDs() == t.servers ) { maxPriority = std::max( maxPriority, info->teams[k]->getPriority() ); found = true; break; @@ -2192,7 +2198,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te .detail("KeyBegin", printable(rs.keys.begin)) .detail("KeyEnd", printable(rs.keys.end)) .detail("Priority", rs.priority) - .detail("TeamFailedMachines", team->getServerIDs().size()-serversLeft) + .detail("TeamFailedMachines", team->size() - serversLeft) .detail("TeamOKMachines", serversLeft); } } @@ -2657,7 +2663,7 @@ ACTOR Future storageServerTracker( // Get the newBadTeams due to the locality change vector> newBadTeams; for (auto& serverTeam : server->teams) { - if (!self->satisfiesPolicy(serverTeam->servers)) { + if (!self->satisfiesPolicy(serverTeam->getServers())) { newBadTeams.push_back(serverTeam); continue; } @@ -2677,7 +2683,7 @@ ACTOR Future storageServerTracker( bool addedNewBadTeam = false; for(auto it : newBadTeams) { if( self->removeTeam(it) ) { - self->addTeam(it->servers, true); + self->addTeam(it->getServers(), true); addedNewBadTeam = true; } } From 8afe84d31ba74bb41b9e71c58d783459bd6519e4 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 11 Feb 2019 11:41:51 -0800 Subject: [PATCH 189/226] Fix an ordering bug for buildTeam When zeroHealthyTeams signals and the storage server becomes healthy, we could attempt buildTeam before the ServerStatusMap is updated. As a result, the healthy server is not available for use. Fix by delaying the buildTeam after the status map is updated. --- fdbserver/DataDistribution.actor.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 69f822953d..b5aff23611 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -1708,9 +1708,10 @@ struct DDTeamCollection : ReferenceCounted { } uniqueMachines = machines.size(); TraceEvent("BuildTeams") - .detail("ServerNumber", self->server_info.size()) - .detail("UniqueMachines", uniqueMachines) - .detail("StorageTeamSize", self->configuration.storageTeamSize); + .detail("ServerNumber", self->server_info.size()) + .detail("UniqueMachines", uniqueMachines) + .detail("Primary", self->primary) + .detail("StorageTeamSize", self->configuration.storageTeamSize); // If there are too few machines to even build teams or there are too few represented datacenters, build no new teams if( uniqueMachines >= self->configuration.storageTeamSize ) { @@ -2145,7 +2146,7 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te if(logTeamEvents) { TraceEvent("TeamPriorityChange", self->distributorId).detail("Priority", team->getPriority()) - .detail("Info", team->getDesc()); + .detail("Info", team->getDesc()).detail("ZeroHealthyTeams", self->zeroHealthyTeams->get()); } lastZeroHealthy = self->zeroHealthyTeams->get(); //set this again in case it changed from this teams health changing @@ -2252,7 +2253,7 @@ ACTOR Future trackExcludedServers( DDTeamCollection* self ) { TraceEvent("DDExcludedServersChanged", self->distributorId).detail("Rows", results.size()).detail("Exclusions", excluded.size()); - // Reset and reassign self->excludedServers based on excluded, but weonly + // Reset and reassign self->excludedServers based on excluded, but we only // want to trigger entries that are different auto old = self->excludedServers.getKeys(); for(auto& o : old) @@ -2430,6 +2431,7 @@ ACTOR Future storageServerFailureTracker( Version addedVersion ) { state StorageServerInterface interf = server->lastKnownInterface; + state bool doBuildTeam = false; loop { if( statusMap->get(interf.id()).initialized ) { bool unhealthy = statusMap->get(interf.id()).isUnhealthy(); @@ -2444,6 +2446,11 @@ ACTOR Future storageServerFailureTracker( } statusMap->set( interf.id(), *status ); + if (doBuildTeam) { + doBuildTeam = false; + self->doBuildTeams = true; + self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); + } if( status->isFailed ) self->restartRecruiting.trigger(); @@ -2459,8 +2466,7 @@ ACTOR Future storageServerFailureTracker( } status->isFailed = !status->isFailed; if( !status->isFailed && (!server->teams.size() || self->zeroHealthyTeams->get()) ) { - self->doBuildTeams = true; - self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); + doBuildTeam = true; } TraceEvent("StatusMapChange", self->distributorId).detail("ServerID", interf.id()).detail("Status", status->toString()) From fc3a7849639f685b6cac477dc8a90b444c4d88fe Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 12 Feb 2019 14:02:21 -0800 Subject: [PATCH 190/226] Fix another build team bug The buildTeam() can create teams with undesired storage servers, which are considered unhealthy. As a result, the data movement can become stuck. Fix this by adding an ACTOR monitorHealthyTeams that builds team every one second whenever there is no healthy teams. Clean up storageServerTracker() interface. --- fdbserver/DataDistribution.actor.cpp | 103 +++++++++++++-------------- fdbserver/Knobs.cpp | 1 + fdbserver/Knobs.h | 1 + 3 files changed, 51 insertions(+), 54 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index b5aff23611..884f3faafb 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -531,11 +531,6 @@ Future storageServerTracker( struct DDTeamCollection* const& self, Database const& cx, TCServerInfo* const& server, - ServerStatusMap* const& statusMap, - MoveKeysLock const& lock, - UID const& distributorId, - std::map>* const& other_servers, - Optional> >> const& changes, Promise const& errorOut, Version const& addedVersion); @@ -550,7 +545,6 @@ struct DDTeamCollection : ReferenceCounted { DatabaseConfiguration configuration; bool doBuildTeams; - Future checkBuildTeam; Future teamBuilder; AsyncTrigger restartTeamBuilder; @@ -1769,7 +1763,6 @@ struct DDTeamCollection : ReferenceCounted { desc += i->first.shortString() + " (" + i->second->lastKnownInterface.toString() + "), "; } } - checkBuildTeam = DDTeamCollection::checkBuildTeams(this); TraceEvent(SevWarn, "NoHealthyTeams", distributorId) .detail("CurrentTeamCount", teams.size()) @@ -1796,7 +1789,7 @@ struct DDTeamCollection : ReferenceCounted { // Establish the relation between server and machine checkAndCreateMachine(r); - r->tracker = storageServerTracker( this, cx, r.getPtr(), &server_status, lock, distributorId, &server_info, serverChanges, errorOut, addedVersion ); + r->tracker = storageServerTracker( this, cx, r.getPtr(), errorOut, addedVersion ); doBuildTeams = true; // Adding a new server triggers to build new teams restartTeamBuilder.trigger(); } @@ -2107,7 +2100,6 @@ ACTOR Future teamTracker( DDTeamCollection* self, Reference te TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId) .detail("SignallingTeam", team->getDesc()) .detail("Primary", self->primary); - self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); } TraceEvent("TeamHealthDifference", self->distributorId) @@ -2426,15 +2418,14 @@ ACTOR Future storageServerFailureTracker( DDTeamCollection* self, TCServerInfo *server, Database cx, - ServerStatusMap *statusMap, ServerStatus *status, Version addedVersion ) { state StorageServerInterface interf = server->lastKnownInterface; state bool doBuildTeam = false; loop { - if( statusMap->get(interf.id()).initialized ) { - bool unhealthy = statusMap->get(interf.id()).isUnhealthy(); + if( self->server_status.get(interf.id()).initialized ) { + bool unhealthy = self->server_status.get(interf.id()).isUnhealthy(); if(unhealthy && !status->isUnhealthy()) { self->unhealthyServers--; } @@ -2445,11 +2436,10 @@ ACTOR Future storageServerFailureTracker( self->unhealthyServers++; } - statusMap->set( interf.id(), *status ); + self->server_status.set( interf.id(), *status ); if (doBuildTeam) { doBuildTeam = false; self->doBuildTeams = true; - self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); } if( status->isFailed ) self->restartRecruiting.trigger(); @@ -2485,11 +2475,6 @@ ACTOR Future storageServerTracker( DDTeamCollection* self, Database cx, TCServerInfo *server, //This actor is owned by this TCServerInfo - ServerStatusMap *statusMap, - MoveKeysLock lock, - UID distributorId, - std::map>* other_servers, - Optional> >> changes, Promise errorOut, Version addedVersion) { @@ -2502,8 +2487,8 @@ ACTOR Future storageServerTracker( state Future storeTracker = keyValueStoreTypeTracker( self, server ); state bool hasWrongStoreTypeOrDC = false; - if(changes.present()) { - changes.get().send( std::make_pair(server->id, server->lastKnownInterface) ); + if(self->serverChanges.present()) { + self->serverChanges.get().send( std::make_pair(server->id, server->lastKnownInterface) ); } try { @@ -2514,33 +2499,33 @@ ACTOR Future storageServerTracker( // If there is any other server on this exact NetworkAddress, this server is undesired and will eventually be eliminated state std::vector> otherChanges; std::vector> wakeUpTrackers; - for(auto i = other_servers->begin(); i != other_servers->end(); ++i) { - if (i->second.getPtr() != server && i->second->lastKnownInterface.address() == server->lastKnownInterface.address()) { - auto& statusInfo = statusMap->get( i->first ); - TraceEvent("SameAddress", distributorId) + for(const auto& i : self->server_info) { + if (i.second.getPtr() != server && i.second->lastKnownInterface.address() == server->lastKnownInterface.address()) { + auto& statusInfo = self->server_status.get( i.first ); + TraceEvent("SameAddress", self->distributorId) .detail("Failed", statusInfo.isFailed) .detail("Undesired", statusInfo.isUndesired) - .detail("Server", server->id).detail("OtherServer", i->second->id) + .detail("Server", server->id).detail("OtherServer", i.second->id) .detail("Address", server->lastKnownInterface.address()) .detail("NumShards", self->shardsAffectedByTeamFailure->getNumberOfShards(server->id)) - .detail("OtherNumShards", self->shardsAffectedByTeamFailure->getNumberOfShards(i->second->id)) - .detail("OtherHealthy", !statusMap->get( i->second->id ).isUnhealthy()); + .detail("OtherNumShards", self->shardsAffectedByTeamFailure->getNumberOfShards(i.second->id)) + .detail("OtherHealthy", !self->server_status.get( i.second->id ).isUnhealthy()); // wait for the server's ip to be changed - otherChanges.push_back(statusMap->onChange(i->second->id)); - if(!statusMap->get( i->second->id ).isUnhealthy()) { - if(self->shardsAffectedByTeamFailure->getNumberOfShards(i->second->id) >= self->shardsAffectedByTeamFailure->getNumberOfShards(server->id)) + otherChanges.push_back(self->server_status.onChange(i.second->id)); + if(!self->server_status.get( i.second->id ).isUnhealthy()) { + if(self->shardsAffectedByTeamFailure->getNumberOfShards(i.second->id) >= self->shardsAffectedByTeamFailure->getNumberOfShards(server->id)) { - TraceEvent(SevWarn, "UndesiredStorageServer", distributorId) + TraceEvent(SevWarn, "UndesiredStorageServer", self->distributorId) .detail("Server", server->id) .detail("Address", server->lastKnownInterface.address()) - .detail("OtherServer", i->second->id) + .detail("OtherServer", i.second->id) .detail("NumShards", self->shardsAffectedByTeamFailure->getNumberOfShards(server->id)) - .detail("OtherNumShards", self->shardsAffectedByTeamFailure->getNumberOfShards(i->second->id)); + .detail("OtherNumShards", self->shardsAffectedByTeamFailure->getNumberOfShards(i.second->id)); status.isUndesired = true; } else - wakeUpTrackers.push_back(i->second->wakeUpTracker); + wakeUpTrackers.push_back(i.second->wakeUpTracker); } } } @@ -2552,7 +2537,7 @@ ACTOR Future storageServerTracker( if( server->lastKnownClass.machineClassFitness( ProcessClass::Storage ) > ProcessClass::UnsetFit ) { if( self->optimalTeamCount > 0 ) { - TraceEvent(SevWarn, "UndesiredStorageServer", distributorId) + TraceEvent(SevWarn, "UndesiredStorageServer", self->distributorId) .detail("Server", server->id) .detail("OptimalTeamCount", self->optimalTeamCount) .detail("Fitness", server->lastKnownClass.machineClassFitness(ProcessClass::Storage)); @@ -2563,7 +2548,7 @@ ACTOR Future storageServerTracker( //If this storage server has the wrong key-value store type, then mark it undesired so it will be replaced with a server having the correct type if(hasWrongStoreTypeOrDC) { - TraceEvent(SevWarn, "UndesiredStorageServer", distributorId).detail("Server", server->id).detail("StoreType", "?"); + TraceEvent(SevWarn, "UndesiredStorageServer", self->distributorId).detail("Server", server->id).detail("StoreType", "?"); status.isUndesired = true; status.isWrongConfiguration = true; } @@ -2573,7 +2558,7 @@ ACTOR Future storageServerTracker( AddressExclusion addr( a.ip, a.port ); AddressExclusion ipaddr( a.ip ); if (self->excludedServers.get( addr ) || self->excludedServers.get( ipaddr )) { - TraceEvent(SevWarn, "UndesiredStorageServer", distributorId).detail("Server", server->id) + TraceEvent(SevWarn, "UndesiredStorageServer", self->distributorId).detail("Server", server->id) .detail("Excluded", self->excludedServers.get( addr ) ? addr.toString() : ipaddr.toString()); status.isUndesired = true; status.isWrongConfiguration = true; @@ -2581,25 +2566,25 @@ ACTOR Future storageServerTracker( otherChanges.push_back( self->excludedServers.onChange( addr ) ); otherChanges.push_back( self->excludedServers.onChange( ipaddr ) ); - failureTracker = storageServerFailureTracker( self, server, cx, statusMap, &status, addedVersion ); + failureTracker = storageServerFailureTracker( self, server, cx, &status, addedVersion ); //We need to recruit new storage servers if the key value store type has changed if(hasWrongStoreTypeOrDC) self->restartRecruiting.trigger(); - TraceEvent("StatusMapChange", distributorId).detail("Status", status.toString()).detail("LastIsUnhealthy", lastIsUnhealthy); + TraceEvent("StatusMapChange", self->distributorId).detail("Status", status.toString()) + .detail("Server", server->id).detail("LastIsUnhealthy", lastIsUnhealthy); if ( lastIsUnhealthy && !status.isUnhealthy() && (!server->teams.size() || self->zeroHealthyTeams->get()) ) { self->doBuildTeams = true; - self->checkBuildTeam = DDTeamCollection::checkBuildTeams(self); } lastIsUnhealthy = status.isUnhealthy(); choose { when( wait( failureTracker ) ) { // The server is failed AND all data has been removed from it, so permanently remove it. - TraceEvent("StatusMapChange", distributorId).detail("ServerID", server->id).detail("Status", "Removing"); - if(changes.present()) { - changes.get().send( std::make_pair(server->id, Optional()) ); + TraceEvent("StatusMapChange", self->distributorId).detail("ServerID", server->id).detail("Status", "Removing"); + if(self->serverChanges.present()) { + self->serverChanges.get().send( std::make_pair(server->id, Optional()) ); } if(server->updated.canBeSet()) { @@ -2607,9 +2592,9 @@ ACTOR Future storageServerTracker( } // Remove server from FF/serverList - wait( removeStorageServer( cx, server->id, lock ) ); + wait( removeStorageServer( cx, server->id, self->lock ) ); - TraceEvent("StatusMapChange", distributorId).detail("ServerID", server->id).detail("Status", "Removed"); + TraceEvent("StatusMapChange", self->distributorId).detail("ServerID", server->id).detail("Status", "Removed"); // Sets removeSignal (alerting dataDistributionTeamCollection to remove the storage server from its own data structures) server->removed.send( Void() ); self->removedServers.send( server->id ); @@ -2620,7 +2605,7 @@ ACTOR Future storageServerTracker( bool localityChanged = server->lastKnownInterface.locality != newInterface.first.locality; bool machineLocalityChanged = server->lastKnownInterface.locality.zoneId().get() != newInterface.first.locality.zoneId().get(); - TraceEvent("StorageServerInterfaceChanged", distributorId).detail("ServerID", server->id) + TraceEvent("StorageServerInterfaceChanged", self->distributorId).detail("ServerID", server->id) .detail("NewWaitFailureToken", newInterface.first.waitFailure.getEndpoint().token) .detail("OldWaitFailureToken", server->lastKnownInterface.waitFailure.getEndpoint().token) .detail("LocalityChanged", localityChanged); @@ -2702,8 +2687,8 @@ ACTOR Future storageServerTracker( } interfaceChanged = server->onInterfaceChanged; - if(changes.present()) { - changes.get().send( std::make_pair(server->id, server->lastKnownInterface) ); + if(self->serverChanges.present()) { + self->serverChanges.get().send( std::make_pair(server->id, server->lastKnownInterface) ); } // We rely on the old failureTracker being actorCancelled since the old actor now has a pointer to an invalid location status = ServerStatus( status.isFailed, status.isUndesired, server->lastKnownInterface.locality ); @@ -2716,10 +2701,10 @@ ACTOR Future storageServerTracker( self->restartRecruiting.trigger(); } when( wait( otherChanges.empty() ? Never() : quorum( otherChanges, 1 ) ) ) { - TraceEvent("SameAddressChangedStatus", distributorId).detail("ServerID", server->id); + TraceEvent("SameAddressChangedStatus", self->distributorId).detail("ServerID", server->id); } when( KeyValueStoreType type = wait( storeTracker ) ) { - TraceEvent("KeyValueStoreTypeChanged", distributorId) + TraceEvent("KeyValueStoreTypeChanged", self->distributorId) .detail("ServerID", server->id) .detail("StoreType", type.toString()) .detail("DesiredType", self->configuration.storageServerStoreType.toString()); @@ -2808,9 +2793,6 @@ ACTOR Future initializeStorage( DDTeamCollection* self, RecruitStorageRepl TraceEvent(SevWarn, "DDRecruitmentError").detail("Reason", "Server ID already recruited"); self->doBuildTeams = true; - if( self->healthyTeamCount == 0 ) { - wait( self->checkBuildTeams( self ) ); - } } self->restartRecruiting.trigger(); @@ -2938,6 +2920,18 @@ ACTOR Future remoteRecovered( Reference> db return Void(); } +ACTOR Future monitorHealthyTeams( DDTeamCollection* self ) { + state Future checkHealth; + loop choose { + when ( wait( delay( SERVER_KNOBS->DD_ZERO_HEALTHY_TEAM_DELAY ) ) ) { + if ( self->healthyTeamCount == 0 ) { + self->doBuildTeams = true; + checkHealth = DDTeamCollection::checkBuildTeams(self); + } + } + } +} + // Keep track of servers and teams -- serves requests for getRandomTeam ACTOR Future dataDistributionTeamCollection( Reference teamCollection, @@ -2973,6 +2967,7 @@ ACTOR Future dataDistributionTeamCollection( self->addActor.send(monitorStorageServerRecruitment( self )); self->addActor.send(waitServerListChange( self, serverRemoved.getFuture() )); self->addActor.send(trackExcludedServers( self )); + self->addActor.send(monitorHealthyTeams( self )); // SOMEDAY: Monitor FF/serverList for (new) servers that aren't in allServers and add or remove them diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 4698632078..82c93ca4c5 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -166,6 +166,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( MOVEKEYS_LOCK_POLLING_DELAY, 5.0 ); init( DEBOUNCE_RECRUITING_DELAY, 5.0 ); init( DD_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) DD_FAILURE_TIME = 10.0; + init( DD_ZERO_HEALTHY_TEAM_DELAY, 1.0 ); // Redwood Storage Engine init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index be5e7b626e..0e12ba974a 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -129,6 +129,7 @@ public: double MOVEKEYS_LOCK_POLLING_DELAY; double DEBOUNCE_RECRUITING_DELAY; double DD_FAILURE_TIME; + double DD_ZERO_HEALTHY_TEAM_DELAY; // Redwood Storage Engine int PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT; From a4b2c9ef883adeb4c5bc0d4a515eddbd3aaba321 Mon Sep 17 00:00:00 2001 From: Evan Tschannen <36455792+etschannen@users.noreply.github.com> Date: Wed, 13 Feb 2019 11:53:44 -0800 Subject: [PATCH 191/226] Update fdbserver/ClusterController.actor.cpp Co-Authored-By: jzhou77 --- fdbserver/ClusterController.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 5629f1cbc0..e672fd8094 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -110,7 +110,7 @@ public: { } - void setDistributor(DataDistributorInterface distributorInterf) { + void setDistributor(const DataDistributorInterface& distributorInterf) { ServerDBInfo newInfo = serverInfo->get(); newInfo.id = g_random->randomUniqueID(); newInfo.distributor = distributorInterf; From 171a69c810cd1430f38262f4cc74f4417a031f7b Mon Sep 17 00:00:00 2001 From: Evan Tschannen <36455792+etschannen@users.noreply.github.com> Date: Wed, 13 Feb 2019 11:54:35 -0800 Subject: [PATCH 192/226] Update fdbserver/ClusterController.actor.cpp Co-Authored-By: jzhou77 --- fdbserver/ClusterController.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index e672fd8094..d2a0501e5a 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2291,7 +2291,7 @@ ACTOR Future startDataDistributor( ClusterControllerDa wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); } - while (true) { + loop { try { while ( self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS ) { wait( self->db.serverInfo->onChange() ); From 038144adb1705fbbe708d232f00bff64143e6b04 Mon Sep 17 00:00:00 2001 From: Evan Tschannen <36455792+etschannen@users.noreply.github.com> Date: Wed, 13 Feb 2019 13:11:07 -0800 Subject: [PATCH 193/226] Update fdbserver/DataDistribution.actor.cpp Co-Authored-By: jzhou77 --- fdbserver/DataDistribution.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 884f3faafb..2f7612db97 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3380,7 +3380,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceaddActor.send( configurationMonitor( self ) ); loop choose { - when ( wait( self->configurationTrigger.onTrigger() ) ) { + when ( wait( self->configuration.onChange() ) ) { self->refreshDcIds(); break; } From bf6da81bf9f378f0e0f4f0f30354e921e99499b3 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 12 Feb 2019 16:07:23 -0800 Subject: [PATCH 194/226] Remove recovery version from data distribution queue This parameter is no longer used/needed. --- fdbserver/DataDistribution.actor.cpp | 2 +- fdbserver/DataDistribution.h | 3 +-- fdbserver/DataDistributionQueue.actor.cpp | 14 ++++++-------- fdbserver/MoveKeys.actor.cpp | 11 +++++------ fdbserver/MoveKeys.h | 1 - fdbserver/workloads/RandomMoveKeys.actor.cpp | 2 +- 6 files changed, 14 insertions(+), 19 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 2f7612db97..ca9358d0c3 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3254,7 +3254,7 @@ ACTOR Future dataDistribution( actors.push_back( pollMoveKeysLock(cx, lock) ); actors.push_back( reportErrorsExcept( dataDistributionTracker( initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics, getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, myId ), "DDTracker", myId, &normalDDQueueErrors() ) ); - actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, myId, storageTeamSize, lastLimited, invalidVersion ), "DDQueue", myId, &normalDDQueueErrors() ) ); + actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, myId, storageTeamSize, lastLimited ), "DDQueue", myId, &normalDDQueueErrors() ) ); vector teamCollectionsPtrs; Reference primaryTeamCollection( new DDTeamCollection(cx, myId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy) ); diff --git a/fdbserver/DataDistribution.h b/fdbserver/DataDistribution.h index 6160f48054..d5c4a20658 100644 --- a/fdbserver/DataDistribution.h +++ b/fdbserver/DataDistribution.h @@ -223,8 +223,7 @@ Future dataDistributionQueue( PromiseStream> const& getAverageShardBytes, UID const& distributorId, int const& teamSize, - double* const& lastLimited, - Version const& recoveryVersion); + double* const& lastLimited); //Holds the permitted size and IO Bounds for a shard struct ShardSizeBounds { diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp index 5a76a0dcae..9405c46d28 100644 --- a/fdbserver/DataDistributionQueue.actor.cpp +++ b/fdbserver/DataDistributionQueue.actor.cpp @@ -334,7 +334,6 @@ struct DDQueueData { UID distributorId; MoveKeysLock lock; Database cx; - Version recoveryVersion; std::vector teamCollections; Reference shardsAffectedByTeamFailure; @@ -396,11 +395,11 @@ struct DDQueueData { DDQueueData( UID mid, MoveKeysLock lock, Database cx, std::vector teamCollections, Reference sABTF, PromiseStream> getAverageShardBytes, - int teamSize, PromiseStream output, FutureStream input, PromiseStream getShardMetrics, double* lastLimited, Version recoveryVersion ) : + int teamSize, PromiseStream output, FutureStream input, PromiseStream getShardMetrics, double* lastLimited ) : activeRelocations( 0 ), queuedRelocations( 0 ), bytesWritten ( 0 ), teamCollections( teamCollections ), shardsAffectedByTeamFailure( sABTF ), getAverageShardBytes( getAverageShardBytes ), distributorId( mid ), lock( lock ), cx( cx ), teamSize( teamSize ), output( output ), input( input ), getShardMetrics( getShardMetrics ), startMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ), - finishMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ), lastLimited(lastLimited), recoveryVersion(recoveryVersion), + finishMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ), lastLimited(lastLimited), suppressIntervals(0), lastInterval(0), unhealthyRelocations(0), rawProcessingUnhealthy( new AsyncVar(false) ) {} void validate() { @@ -988,7 +987,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd state Error error = success(); state Promise dataMovementComplete; - state Future doMoveKeys = moveKeys(self->cx, rd.keys, destIds, healthyIds, self->lock, dataMovementComplete, &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock, self->recoveryVersion, self->teamCollections.size() > 1, relocateShardInterval.pairID ); + state Future doMoveKeys = moveKeys(self->cx, rd.keys, destIds, healthyIds, self->lock, dataMovementComplete, &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock, self->teamCollections.size() > 1, relocateShardInterval.pairID ); state Future pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch ); try { loop { @@ -999,7 +998,7 @@ ACTOR Future dataDistributionRelocator( DDQueueData *self, RelocateData rd healthyIds.insert(healthyIds.end(), extraIds.begin(), extraIds.end()); extraIds.clear(); ASSERT(totalIds == destIds.size()); // Sanity check the destIDs before we move keys - doMoveKeys = moveKeys(self->cx, rd.keys, destIds, healthyIds, self->lock, Promise(), &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock, self->recoveryVersion, self->teamCollections.size() > 1, relocateShardInterval.pairID ); + doMoveKeys = moveKeys(self->cx, rd.keys, destIds, healthyIds, self->lock, Promise(), &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock, self->teamCollections.size() > 1, relocateShardInterval.pairID ); } else { self->fetchKeysComplete.insert( rd ); break; @@ -1197,10 +1196,9 @@ ACTOR Future dataDistributionQueue( PromiseStream> getAverageShardBytes, UID distributorId, int teamSize, - double* lastLimited, - Version recoveryVersion) + double* lastLimited) { - state DDQueueData self( distributorId, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, output, input, getShardMetrics, lastLimited, recoveryVersion ); + state DDQueueData self( distributorId, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, output, input, getShardMetrics, lastLimited ); state std::set serversToLaunchFrom; state KeyRange keysToLaunchFrom; state RelocateData launchData; diff --git a/fdbserver/MoveKeys.actor.cpp b/fdbserver/MoveKeys.actor.cpp index f8b6529690..06768f6e9b 100644 --- a/fdbserver/MoveKeys.actor.cpp +++ b/fdbserver/MoveKeys.actor.cpp @@ -387,7 +387,7 @@ ACTOR Future startMoveKeys( Database occ, KeyRange keys, vector serve return Void(); } -ACTOR Future waitForShardReady( StorageServerInterface server, KeyRange keys, Version minVersion, Version recoveryVersion, GetShardStateRequest::waitMode mode){ +ACTOR Future waitForShardReady( StorageServerInterface server, KeyRange keys, Version minVersion, GetShardStateRequest::waitMode mode ) { loop { try { std::pair rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskMoveKeys ) ); @@ -431,7 +431,7 @@ ACTOR Future checkFetchingState( Database cx, vector dest, KeyRange k } auto si = decodeServerListValue(serverListValues[s].get()); ASSERT( si.id() == dest[s] ); - requests.push_back( waitForShardReady( si, keys, tr.getReadVersion().get(), invalidVersion, GetShardStateRequest::FETCHING ) ); + requests.push_back( waitForShardReady( si, keys, tr.getReadVersion().get(), GetShardStateRequest::FETCHING ) ); } wait( timeoutError( waitForAll( requests ), @@ -452,7 +452,7 @@ ACTOR Future checkFetchingState( Database cx, vector dest, KeyRange k // keyServers[k].dest must be the same for all k in keys // Set serverKeys[dest][keys] = true; serverKeys[src][keys] = false for all src not in dest // Should be cancelled and restarted if keyServers[keys].dest changes (?so this is no longer true?) -ACTOR Future finishMoveKeys( Database occ, KeyRange keys, vector destinationTeam, MoveKeysLock lock, FlowLock *finishMoveKeysParallelismLock, Version recoveryVersion, bool hasRemote, UID relocationIntervalId ) +ACTOR Future finishMoveKeys( Database occ, KeyRange keys, vector destinationTeam, MoveKeysLock lock, FlowLock *finishMoveKeysParallelismLock, bool hasRemote, UID relocationIntervalId ) { state TraceInterval interval("RelocateShard_FinishMoveKeys"); state TraceInterval waitInterval(""); @@ -626,7 +626,7 @@ ACTOR Future finishMoveKeys( Database occ, KeyRange keys, vector dest } for(int s=0; sSERVER_READY_QUORUM_TIMEOUT, Void(), TaskMoveKeys ) ); int count = dest.size() - newDestinations.size(); for(int s=0; s moveKeys( Promise dataMovementComplete, FlowLock *startMoveKeysParallelismLock, FlowLock *finishMoveKeysParallelismLock, - Version recoveryVersion, bool hasRemote, UID relocationIntervalId) { @@ -891,7 +890,7 @@ ACTOR Future moveKeys( state Future completionSignaller = checkFetchingState( cx, healthyDestinations, keys, dataMovementComplete, relocationIntervalId ); - wait( finishMoveKeys( cx, keys, destinationTeam, lock, finishMoveKeysParallelismLock, recoveryVersion, hasRemote, relocationIntervalId ) ); + wait( finishMoveKeys( cx, keys, destinationTeam, lock, finishMoveKeysParallelismLock, hasRemote, relocationIntervalId ) ); //This is defensive, but make sure that we always say that the movement is complete before moveKeys completes completionSignaller.cancel(); diff --git a/fdbserver/MoveKeys.h b/fdbserver/MoveKeys.h index 7f6a4f5f5a..d37eb858cd 100644 --- a/fdbserver/MoveKeys.h +++ b/fdbserver/MoveKeys.h @@ -59,7 +59,6 @@ Future moveKeys( Promise const& dataMovementComplete, FlowLock* const& startMoveKeysParallelismLock, FlowLock* const& finishMoveKeysParallelismLock, - Version const& recoveryVersion, bool const& hasRemote, UID const& relocationIntervalId); // for logging only // Eventually moves the given keys to the given destination team diff --git a/fdbserver/workloads/RandomMoveKeys.actor.cpp b/fdbserver/workloads/RandomMoveKeys.actor.cpp index 5b915d386b..efab223682 100644 --- a/fdbserver/workloads/RandomMoveKeys.actor.cpp +++ b/fdbserver/workloads/RandomMoveKeys.actor.cpp @@ -133,7 +133,7 @@ struct MoveKeysWorkload : TestWorkload { try { state Promise signal; - wait( moveKeys( cx, keys, destinationTeamIDs, destinationTeamIDs, lock, signal, &fl1, &fl2, invalidVersion, false, relocateShardInterval.pairID ) ); + wait( moveKeys( cx, keys, destinationTeamIDs, destinationTeamIDs, lock, signal, &fl1, &fl2, false, relocateShardInterval.pairID ) ); TraceEvent(relocateShardInterval.end()).detail("Result","Success"); return Void(); } catch (Error& e) { From 5e6577cc8270d6e270febc98572ba455815cd52d Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 12 Feb 2019 15:50:44 -0800 Subject: [PATCH 195/226] Final cleanup per review comments Make distributor interface optional in ServerDBInfo and many other small changes. --- fdbserver/ClusterController.actor.cpp | 138 ++++++++++---------------- fdbserver/DataDistribution.actor.cpp | 32 ++---- fdbserver/DataDistributorInterface.h | 8 +- fdbserver/MasterProxyServer.actor.cpp | 14 +-- fdbserver/QuietDatabase.actor.cpp | 13 ++- fdbserver/ServerDBInfo.h | 2 +- fdbserver/worker.actor.cpp | 3 +- flow/genericactors.actor.h | 4 +- 8 files changed, 85 insertions(+), 129 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index d2a0501e5a..7fc2e0da73 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -116,6 +116,13 @@ public: newInfo.distributor = distributorInterf; serverInfo->set( newInfo ); } + + void clearDistributor() { + ServerDBInfo newInfo = serverInfo->get(); + newInfo.id = g_random->randomUniqueID(); + newInfo.distributor = Optional(); + serverInfo->set( newInfo ); + } }; struct UpdateWorkerList { @@ -511,15 +518,19 @@ public: return result; } + void updateKnownIds(std::map< Optional>, int>* id_used) { + (*id_used)[masterProcessId]++; + (*id_used)[clusterControllerProcessId]++; + if (db.serverInfo->get().distributor.present()) { + (*id_used)[db.serverInfo->get().distributor.get().locality.processId()]++; + } + } + RecruitRemoteFromConfigurationReply findRemoteWorkersForConfiguration( RecruitRemoteFromConfigurationRequest const& req ) { RecruitRemoteFromConfigurationReply result; std::map< Optional>, int> id_used; - id_used[masterProcessId]++; - id_used[clusterControllerProcessId]++; - if (db.serverInfo->get().distributor.isValid()) { - id_used[db.serverInfo->get().distributor.locality.processId()]++; - } + updateKnownIds(&id_used); std::set> remoteDC; remoteDC.insert(req.dcId); @@ -557,11 +568,7 @@ public: ErrorOr findWorkersForConfiguration( RecruitFromConfigurationRequest const& req, Optional dcId ) { RecruitFromConfigurationReply result; std::map< Optional>, int> id_used; - id_used[masterProcessId]++; - id_used[clusterControllerProcessId]++; - if (db.serverInfo->get().distributor.isValid()) { - id_used[db.serverInfo->get().distributor.locality.processId()]++; - } + updateKnownIds(&id_used); ASSERT(dcId.present()); @@ -689,11 +696,7 @@ public: } else { RecruitFromConfigurationReply result; std::map< Optional>, int> id_used; - id_used[masterProcessId]++; - id_used[clusterControllerProcessId]++; - if (db.serverInfo->get().distributor.isValid()) { - id_used[db.serverInfo->get().distributor.locality.processId()]++; - } + updateKnownIds(&id_used); auto tlogs = getWorkersForTlogs( req.configuration, req.configuration.tLogReplicationFactor, req.configuration.getDesiredLogs(), req.configuration.tLogPolicy, id_used ); for(int i = 0; i < tlogs.size(); i++) { @@ -916,8 +919,8 @@ public: std::map< Optional>, int> id_used; id_used[clusterControllerProcessId]++; - if (db.serverInfo->get().distributor.isValid()) { - id_used[db.serverInfo->get().distributor.locality.processId()]++; + if (db.serverInfo->get().distributor.present()) { + id_used[db.serverInfo->get().distributor.get().locality.processId()]++; } WorkerFitnessInfo mworker = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db.config, id_used, true); @@ -1012,49 +1015,31 @@ public: return false; } - void updateUsedIds(RegisterMasterRequest const& req) { - auto dbInfo = db.serverInfo->get(); + std::map< Optional>, int> getUsedIds() { std::map>, int> idUsed; - idUsed[clusterControllerProcessId]++; - idUsed[masterProcessId]++; - if (dbInfo.distributor.isValid()) { - idUsed[dbInfo.distributor.locality.processId()]++; - } - for (const auto& tlogset : req.logSystemConfig.tLogs) { + updateKnownIds(&idUsed); + + auto dbInfo = db.serverInfo->get(); + for (const auto& tlogset : dbInfo.logSystemConfig.tLogs) { for (const auto& tlog: tlogset.tLogs) { if (tlog.present()) { idUsed[tlog.interf().locality.processId()]++; } } } - for (const MasterProxyInterface& interf : req.proxies) { + for (const MasterProxyInterface& interf : dbInfo.client.proxies) { ASSERT(interf.locality.processId().present()); idUsed[interf.locality.processId()]++; } - for (const ResolverInterface& interf: req.resolvers) { + for (const ResolverInterface& interf: dbInfo.resolvers) { ASSERT(interf.locality.processId().present()); idUsed[interf.locality.processId()]++; } - usedIds.swap( idUsed ); - } - - void traceUsedIds() { - for (const auto& it : usedIds) { - TraceEvent ev("UsedID"); - if (it.first.present()) ev.detail("Key", it.first.get().contents().toString()); - ev.detail("Value", usedIds[it.first]); - if (id_worker.find(it.first) != id_worker.end()) { - ev.detail("Locality", id_worker[it.first].interf.locality.toString()); - ev.detail("Addr", id_worker[it.first].interf.address().toString()); - } else { - ev.detail("Locality", "Not found!"); - } - } + return idUsed; } std::map< Optional>, WorkerInfo > id_worker; std::map< Optional>, ProcessClass > id_class; //contains the mapping from process id to process class from the database - std::map< Optional>, int> usedIds; // current used process IDs reported by master Standalone lastProcessClasses; bool gotProcessClasses; bool gotFullyRecoveredConfig; @@ -1119,8 +1104,8 @@ ACTOR Future clusterWatchDatabase( ClusterControllerData* cluster, Cluster //This should always be possible, because we can recruit the master on the same process as the cluster controller. std::map< Optional>, int> id_used; id_used[cluster->clusterControllerProcessId]++; - if (cluster->db.serverInfo->get().distributor.isValid()) { - id_used[cluster->db.serverInfo->get().distributor.locality.processId()]++; + if (cluster->db.serverInfo->get().distributor.present()) { + id_used[cluster->db.serverInfo->get().distributor.get().locality.processId()]++; } state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used); if( ( masterWorker.worker.second.machineClassFitness( ProcessClass::Master ) > SERVER_KNOBS->EXPECTED_MASTER_FITNESS || masterWorker.worker.first.locality.processId() == cluster->clusterControllerProcessId ) @@ -1396,7 +1381,6 @@ ACTOR Future workerAvailabilityWatch( WorkerInterface worker, ProcessClass failedWorkerInfo.reply.send( RegisterWorkerReply(failedWorkerInfo.processClass, failedWorkerInfo.priorityInfo) ); } cluster->id_worker.erase( worker.locality.processId() ); - cluster->usedIds.erase( worker.locality.processId() ); cluster->updateWorkerList.set( worker.locality.processId(), Optional() ); return Void(); } @@ -1703,11 +1687,6 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c dbInfo.recoveryCount = req.recoveryCount; } - // The master may tell us processes that we are not aware of. Thus, when - // using usedIds, proceed with caution as id_workers may not have the process. - self->updateUsedIds(req); - self->traceUsedIds(); - if( isChanged ) { dbInfo.id = g_random->randomUniqueID(); self->db.serverInfo->set( dbInfo ); @@ -1770,9 +1749,9 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) { } } - if ( req.distributorInterf.present() && !self->db.serverInfo->get().distributor.isValid() ) { + if ( req.distributorInterf.present() && !self->db.serverInfo->get().distributor.present() ) { const DataDistributorInterface& di = req.distributorInterf.get(); - TraceEvent("ClusterController").detail("RegisterDataDistributor", di.id()).detail("Valid", di.isValid()); + TraceEvent("ClusterController_RegisterDataDistributor", self->id).detail("DDID", di.id()); self->db.setDistributor( di ); } if( info == self->id_worker.end() ) { @@ -2297,20 +2276,20 @@ ACTOR Future startDataDistributor( ClusterControllerDa wait( self->db.serverInfo->onChange() ); } - std::map>, int> id_used = self->usedIds; + std::map>, int> id_used = self->getUsedIds(); state WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(dcId, ProcessClass::DataDistributor, ProcessClass::NeverAssign, self->db.config, id_used); state InitializeDataDistributorRequest req; req.reqId = g_random->randomUniqueID(); - TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruit", data_distributor.worker.first.address()); + TraceEvent("ClusterController_DataDistributorRecruit", req.reqId).detail("Addr", data_distributor.worker.first.address()); - ErrorOr distributor = wait( data_distributor.worker.first.dataDistributor.getReplyUnlessFailedFor(req, 1, 0) ); + ErrorOr distributor = wait( data_distributor.worker.first.dataDistributor.getReplyUnlessFailedFor(req, SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY, 0) ); if (distributor.present()) { - TraceEvent("ClusterController_DataDistributorReqID", req.reqId).detail("Recruited", data_distributor.worker.first.address()); + TraceEvent("ClusterController_DataDistributorRecruited", req.reqId).detail("Addr", data_distributor.worker.first.address()); return distributor.get(); } } catch (Error& e) { - TraceEvent("ClusterController_DataDistributorReqID", req.reqId).error(e); + TraceEvent("ClusterController_DataDistributorRecruitError", req.reqId).error(e); if ( e.code() != error_code_no_more_servers ) { throw; } @@ -2320,44 +2299,29 @@ ACTOR Future startDataDistributor( ClusterControllerDa } ACTOR Future waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterControllerFullInterface *clusterInterface ) { - state Future newDistributor = Never(); + state Future initialDelay = delay(SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY); // wait for a while to see if existing data distributor will join. loop choose { - when ( wait( delay(SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY) ) ) { break; } - when ( wait( self->db.serverInfo->onChange() ) ) { // Rejoins via worker registration - if ( self->db.serverInfo->get().distributor.isValid() ) { - TraceEvent("ClusterController_InfoChange", self->id).detail("DataDistributorID", self->db.serverInfo->get().distributor.id()); + when ( wait(initialDelay) ) { break; } + when ( wait(self->db.serverInfo->onChange()) ) { // Rejoins via worker registration + if ( self->db.serverInfo->get().distributor.present() ) { + TraceEvent("ClusterController_InfoChange", self->id) + .detail("DataDistributorID", self->db.serverInfo->get().distributor.get().id()); break; } } } - if ( !self->db.serverInfo->get().distributor.isValid() ) { - newDistributor = startDataDistributor( self ); - } - - // Wait on failures and restart it. loop { - state Future distributorFailed = Never(); - if ( self->db.serverInfo->get().distributor.isValid() ) { - distributorFailed = waitFailureClient( self->db.serverInfo->get().distributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ); - } - - choose { - when ( DataDistributorInterface distributorInterf = wait( newDistributor ) ) { - TraceEvent("ClusterController", self->id) - .detail("DataDistributorID", distributorInterf.id()) - .detail("Valid", distributorInterf.isValid()); - self->db.setDistributor( distributorInterf ); - newDistributor = Never(); - } - when ( wait( distributorFailed ) ) { - TraceEvent("ClusterController", self->id) - .detail("DataDistributorDied", self->db.serverInfo->get().distributor.id()); - self->db.setDistributor( DataDistributorInterface() ); - newDistributor = startDataDistributor( self ); - } + if ( self->db.serverInfo->get().distributor.present() ) { + wait( waitFailureClient( self->db.serverInfo->get().distributor.get().waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ) ); + TraceEvent("ClusterController", self->id) + .detail("DataDistributorDied", self->db.serverInfo->get().distributor.get().id()); + self->db.clearDistributor(); + } else { + DataDistributorInterface distributorInterf = wait( startDataDistributor(self) ); + self->db.setDistributor( distributorInterf ); } } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index ca9358d0c3..5877c1695a 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -2422,7 +2422,6 @@ ACTOR Future storageServerFailureTracker( Version addedVersion ) { state StorageServerInterface interf = server->lastKnownInterface; - state bool doBuildTeam = false; loop { if( self->server_status.get(interf.id()).initialized ) { bool unhealthy = self->server_status.get(interf.id()).isUnhealthy(); @@ -2437,10 +2436,6 @@ ACTOR Future storageServerFailureTracker( } self->server_status.set( interf.id(), *status ); - if (doBuildTeam) { - doBuildTeam = false; - self->doBuildTeams = true; - } if( status->isFailed ) self->restartRecruiting.trigger(); @@ -2455,8 +2450,8 @@ ACTOR Future storageServerFailureTracker( wait(delay(SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME - elapsed)); } status->isFailed = !status->isFailed; - if( !status->isFailed && (!server->teams.size() || self->zeroHealthyTeams->get()) ) { - doBuildTeam = true; + if(!status->isFailed && !server->teams.size()) { + self->doBuildTeams = true; } TraceEvent("StatusMapChange", self->distributorId).detail("ServerID", interf.id()).detail("Status", status->toString()) @@ -2572,9 +2567,7 @@ ACTOR Future storageServerTracker( if(hasWrongStoreTypeOrDC) self->restartRecruiting.trigger(); - TraceEvent("StatusMapChange", self->distributorId).detail("Status", status.toString()) - .detail("Server", server->id).detail("LastIsUnhealthy", lastIsUnhealthy); - if ( lastIsUnhealthy && !status.isUnhealthy() && (!server->teams.size() || self->zeroHealthyTeams->get()) ) { + if ( lastIsUnhealthy && !status.isUnhealthy() && !server->teams.size() ) { self->doBuildTeams = true; } lastIsUnhealthy = status.isUnhealthy(); @@ -2921,14 +2914,12 @@ ACTOR Future remoteRecovered( Reference> db } ACTOR Future monitorHealthyTeams( DDTeamCollection* self ) { - state Future checkHealth; loop choose { - when ( wait( delay( SERVER_KNOBS->DD_ZERO_HEALTHY_TEAM_DELAY ) ) ) { - if ( self->healthyTeamCount == 0 ) { - self->doBuildTeams = true; - checkHealth = DDTeamCollection::checkBuildTeams(self); - } + when ( wait(self->zeroHealthyTeams->get() ? delay(SERVER_KNOBS->DD_ZERO_HEALTHY_TEAM_DELAY) : Never()) ) { + self->doBuildTeams = true; + wait( DDTeamCollection::checkBuildTeams(self) ); } + when ( wait(self->zeroHealthyTeams->onChange()) ) {} } } @@ -3289,7 +3280,6 @@ struct DataDistributorData : NonCopyable, ReferenceCounted Reference> configuration; std::vector> primaryDcId; std::vector> remoteDcIds; - AsyncTrigger configurationTrigger; UID ddId; PromiseStream< std::pair> > ddStorageServerChanges; PromiseStream> addActor; @@ -3332,7 +3322,6 @@ ACTOR Future configurationMonitor( Reference self ) { if ( conf != self->configuration->get() ) { TraceEvent("DataDistributor_UpdateConfiguration", self->ddId).detail("Config", conf.toString()); self->configuration->set( conf ); - self->configurationTrigger.trigger(); } state Future watchFuture = tr.watch(configVersionKey); @@ -3380,7 +3369,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, ReferenceaddActor.send( configurationMonitor( self ) ); loop choose { - when ( wait( self->configuration.onChange() ) ) { + when ( wait( self->configuration->onChange() ) ) { self->refreshDcIds(); break; } @@ -3392,22 +3381,21 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference> > ddStorageServerChanges; state double lastLimited = 0; state Future distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); - self->addActor.send( distributor ); self->addActor.send( reportErrorsExcept( rateKeeper( self->dbInfo, ddStorageServerChanges, di.getRateInfo.getFuture(), self->configuration->get(), &lastLimited ), "Ratekeeper", di.id(), &normalRateKeeperErrors() ) ); loop choose { - when ( wait( self->configurationTrigger.onTrigger() ) ) { + when ( wait( self->configuration->onChange() ) ) { TraceEvent("DataDistributor_Restart", di.id()) .detail("ClusterControllerID", lastClusterControllerID) .detail("Configuration", self->configuration->get().toString()); self->refreshDcIds(); distributor = reportErrorsExcept( dataDistribution( self->dbInfo, di.id(), self->configuration->get(), ddStorageServerChanges, self->primaryDcId, self->remoteDcIds, &lastLimited ), "DataDistribution", di.id(), &normalDataDistributorErrors() ); - self->addActor.send( distributor ); } when ( wait( collection ) ) { ASSERT(false); throw internal_error(); } + when ( wait( distributor ) ) {} } } catch ( Error &err ) { diff --git a/fdbserver/DataDistributorInterface.h b/fdbserver/DataDistributorInterface.h index 5223ac824b..ac60a11afe 100644 --- a/fdbserver/DataDistributorInterface.h +++ b/fdbserver/DataDistributorInterface.h @@ -28,13 +28,11 @@ struct DataDistributorInterface { RequestStream> waitFailure; RequestStream getRateInfo; struct LocalityData locality; - bool valid; - DataDistributorInterface() : valid(false) {} - explicit DataDistributorInterface(const struct LocalityData& l) : locality(l), valid(true) {} + DataDistributorInterface() {} + explicit DataDistributorInterface(const struct LocalityData& l) : locality(l) {} void initEndpoints() {} - bool isValid() const { return valid; } UID id() const { return getRateInfo.getEndpoint().token; } NetworkAddress address() const { return getRateInfo.getEndpoint().address; } bool operator== (const DataDistributorInterface& r) const { @@ -46,7 +44,7 @@ struct DataDistributorInterface { template void serialize(Archive& ar) { - serializer(ar, waitFailure, getRateInfo, locality, valid); + serializer(ar, waitFailure, getRateInfo, locality); } }; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 1718fb845d..2a5914c4f0 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -93,22 +93,22 @@ ACTOR Future getRate(UID myID, Reference> db, int64 state Future reply = Never(); state int64_t lastTC = 0; - if (db->get().distributor.isValid()) nextRequestTimer = Void(); + if (db->get().distributor.present()) nextRequestTimer = Void(); loop choose { when ( wait( db->onChange() ) ) { - if ( db->get().distributor.isValid() ) { - TraceEvent("Proxy", myID) - .detail("DataDistributorChangedID", db->get().distributor.id()); + if ( db->get().distributor.present() ) { + TraceEvent("Proxy_DataDistributorChanged", myID) + .detail("DDID", db->get().distributor.get().id()); nextRequestTimer = Void(); // trigger GetRate request } else { - TraceEvent("Proxy", myID) - .detail("DataDistributorDied", db->get().distributor.id()); + TraceEvent("Proxy_DataDistributorDied", myID); nextRequestTimer = Never(); + reply = Never(); } } when ( wait( nextRequestTimer ) ) { nextRequestTimer = Never(); - reply = brokenPromiseToNever(db->get().distributor.getRateInfo.getReply(GetRateInfoRequest(myID, *inTransactionCount))); + reply = brokenPromiseToNever(db->get().distributor.get().getRateInfo.getReply(GetRateInfoRequest(myID, *inTransactionCount))); } when ( GetRateInfoReply rep = wait(reply) ) { reply = Never(); diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index 44f346d119..c76e70de1f 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -70,18 +70,21 @@ ACTOR Future getDataDistributorWorker( Database cx, Reference> workers = wait( getWorkers( dbInfo ) ); + if (!dbInfo->get().distributor.present()) continue; for( int i = 0; i < workers.size(); i++ ) { - if( workers[i].first.address() == dbInfo->get().distributor.address() ) { - TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers").detail("DataDistributorId", dbInfo->get().distributor.id()).detail("WorkerId", workers[i].first.id()); + if( workers[i].first.address() == dbInfo->get().distributor.get().address() ) { + TraceEvent("GetDataDistributorWorker").detail("Stage", "GotWorkers") + .detail("DataDistributorId", dbInfo->get().distributor.get().id()) + .detail("WorkerId", workers[i].first.id()); return workers[i].first; } } TraceEvent(SevWarn, "GetDataDistributorWorker") .detail("Error", "DataDistributorWorkerNotFound") - .detail("DataDistributorId", dbInfo->get().distributor.id()) - .detail("DataDistributorAddress", dbInfo->get().distributor.address()) + .detail("DataDistributorId", dbInfo->get().distributor.get().id()) + .detail("DataDistributorAddress", dbInfo->get().distributor.get().address()) .detail("WorkerCount", workers.size()); } } @@ -334,7 +337,7 @@ ACTOR Future waitForQuietDatabase( Database cx, Referenceget().distributor.id(); + UID distributorUID = dbInfo->get().distributor.get().id(); TraceEvent("QuietDatabaseGotDataDistributor", distributorUID).detail("Locality", distributorWorker.locality.toString()); state Future dataInFlight = getDataInFlight( cx, distributorWorker); diff --git a/fdbserver/ServerDBInfo.h b/fdbserver/ServerDBInfo.h index 8d315ad5b0..abb7be412c 100644 --- a/fdbserver/ServerDBInfo.h +++ b/fdbserver/ServerDBInfo.h @@ -37,7 +37,7 @@ struct ServerDBInfo { UID id; // Changes each time any other member changes ClusterControllerFullInterface clusterInterface; ClientDBInfo client; // After a successful recovery, eventually proxies that communicate with it - DataDistributorInterface distributor; // The best guess of current data distributor, which might be unknown. + Optional distributor; // The best guess of current data distributor. MasterInterface master; // The best guess as to the most recent master, which might still be recovering vector resolvers; DBRecoveryCount recoveryCount; // A recovery count from DBCoreState. A successful master recovery increments it twice; unsuccessful recoveries may increment it once. Depending on where the current master is in its recovery process, this might not have been written by the current master. diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index ace99334f4..873587ae6e 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -514,7 +514,7 @@ ACTOR Future monitorServerDBInfo( Referenceget().present() ? brokenPromiseToNever( ccInterface->get().get().getServerDBInfo.getReply( req ) ) : Never() ) ) { TraceEvent("GotServerDBInfoChange").detail("ChangeID", ni.id).detail("MasterID", ni.master.id()) - .detail("DataDistributorID", ni.distributor.id()); + .detail("DataDistributorID", ni.distributor.present() ? ni.distributor.get().id() : UID()); ServerDBInfo localInfo = ni; localInfo.myLocality = locality; dbInfo->set(localInfo); @@ -729,6 +729,7 @@ ACTOR Future workerServer( Reference connFile, Refe if ( ddInterf->get().present() ) { recruited = ddInterf->get().get(); + TEST(true); // Recruited while already a data distributor. } else { startRole( Role::DATA_DISTRIBUTOR, recruited.id(), interf.id() ); diff --git a/flow/genericactors.actor.h b/flow/genericactors.actor.h index 3ad083006b..11aa4f6c67 100644 --- a/flow/genericactors.actor.h +++ b/flow/genericactors.actor.h @@ -780,7 +780,9 @@ Future setWhenDoneOrError( Future condition, Reference> try { wait( condition ); } - catch ( Error& e ) {} + catch ( Error& e ) { + if (e.code() == error_code_actor_cancelled) throw; + } var->set( val ); return Void(); } From f601aa740750536c5bf6696968e4c37430cfe5a7 Mon Sep 17 00:00:00 2001 From: mpilman Date: Thu, 7 Feb 2019 22:18:31 -0800 Subject: [PATCH 196/226] Make Java an optional dependency --- CMakeLists.txt | 27 +++++++++-------------- bindings/CMakeLists.txt | 4 +++- bindings/java/CMakeLists.txt | 4 ---- cmake/FDBComponents.cmake | 42 ++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 22 deletions(-) create mode 100644 cmake/FDBComponents.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 38c93090c7..911da1ee39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ project(fdb VERSION 6.1.0 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" - LANGUAGES ASM C CXX Java) + LANGUAGES ASM C CXX) if(WIN32) # C# is currently only supported on Windows. @@ -57,29 +57,16 @@ find_package(PythonLibs 3.4 REQUIRED) ################################################################################ -# LibreSSL +# Compiler configuration ################################################################################ -set(DISABLE_TLS OFF CACHE BOOL "Don't try to find LibreSSL and always build without TLS support") -if(DISABLE_TLS) - set(WITH_TLS FALSE) -else() - set(LIBRESSL_USE_STATIC_LIBS TRUE) - find_package(LibreSSL) - if(LibreSSL_FOUND) - set(WITH_TLS TRUE) - else() - message(STATUS "LibreSSL NOT Found - Will compile without TLS Support") - message(STATUS "You can set LibreSSL_ROOT to the LibreSSL install directory to help cmake find it") - set(WITH_TLS FALSE) - endif() -endif() +include(ConfigureCompiler) ################################################################################ # Compiler configuration ################################################################################ -include(ConfigureCompiler) +include(FDBComponents) ################################################################################ # Get repository information @@ -256,3 +243,9 @@ if (CMAKE_EXPORT_COMPILE_COMMANDS) ) add_custom_target(procossed_compile_commands ALL DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/compile_commands.json ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json) endif() + +################################################################################ +# Inform user which components we are going to build +################################################################################ + +print_components() diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index c3c7cadf79..a5a3ca6a6a 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -1,3 +1,5 @@ add_subdirectory(c) add_subdirectory(python) -add_subdirectory(java) +if(BUILD_JAVA) + add_subdirectory(java) +endif() diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index e22d5b01a7..917f80187e 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -1,7 +1,3 @@ -include(UseJava) -find_package(JNI 1.8 REQUIRED) -find_package(Java 1.8 COMPONENTS Development REQUIRED) - set(JAVA_BINDING_SRCS src/main/com/apple/foundationdb/async/AsyncIterable.java src/main/com/apple/foundationdb/async/AsyncIterator.java diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake new file mode 100644 index 0000000000..7fa1f95bfc --- /dev/null +++ b/cmake/FDBComponents.cmake @@ -0,0 +1,42 @@ +################################################################################ +# Java Bindings +################################################################################ + +set(BUILD_JAVA OFF) +find_package(JNI 1.8 REQUIRED) +find_package(Java 1.8 COMPONENTS Development) +if(JNI_FOUND AND Java_FOUND AND Java_Development_FOUND) + set(BUILD_JAVA ON) + include(UseJava) + enable_language(Java) +endif() + +################################################################################ +# LibreSSL +################################################################################ + +set(DISABLE_TLS OFF CACHE BOOL "Don't try to find LibreSSL and always build without TLS support") +if(DISABLE_TLS) + set(WITH_TLS OFF) +else() + set(LIBRESSL_USE_STATIC_LIBS TRUE) + find_package(LibreSSL) + if(LibreSSL_FOUND) + set(WITH_TLS ON) + else() + message(STATUS "LibreSSL NOT Found - Will compile without TLS Support") + message(STATUS "You can set LibreSSL_ROOT to the LibreSSL install directory to help cmake find it") + set(WITH_TLS OFF) + endif() +endif() + + +function(print_components) + message(STATUS "=============================") + message(STATUS " Components Build Overview ") + message(STATUS "=============================") + message(STATUS "Build Python Bindings: ON") + message(STATUS "Build Java Bindings: ${BUILD_JAVA}") + message(STATUS "Build with TLS support: ${WITH_TLS}") + message(STATUS "=============================") +endfunction() From 1ed149e6241e34afe22dbc2fdc32e41184e28717 Mon Sep 17 00:00:00 2001 From: mpilman Date: Thu, 7 Feb 2019 23:46:35 -0800 Subject: [PATCH 197/226] Add option to build fat jar --- bindings/java/CMakeLists.txt | 62 +++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 917f80187e..47391874b0 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -125,11 +125,65 @@ set_target_properties(fdb_java PROPERTIES set(CMAKE_JAVA_COMPILE_FLAGS "-source" "1.8" "-target" "1.8") set(CMAKE_JNI_TARGET TRUE) set(JAR_VERSION "${FDB_MAJOR}.${FDB_MINOR}.${FDB_REVISION}") -add_jar(fdb-java ${JAVA_BINDING_SRCS} ${GENERATED_JAVA_FILES} - OUTPUT_DIR ${PROJECT_BINARY_DIR}/lib) +add_jar(fdb-java ${JAVA_BINDING_SRCS} ${GENERATED_JAVA_FILES} ${CMAKE_SOURCE_DIR}/LICENSE + OUTPUT_DIR ${PROJECT_BINARY_DIR}/lib VERSION ${CMAKE_PROJECT_VERSION}) add_dependencies(fdb-java fdb_java_options fdb_java) add_jar(foundationdb-tests SOURCES ${JAVA_TESTS_SRCS} INCLUDE_JARS fdb-java) add_dependencies(foundationdb-tests fdb_java_options) -install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT clients) -install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT clients) +install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java) +install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java) + +set(BUILD_FAT_JAR OFF CACHE BOOL "Build a Jar that includes the jni libraries") +set(FAT_JAR_BINARIES "NOTFOUND" CACHE STRING + "Path of a directory structure with libraries to include in fat jar (a lib directory)") + +if(BUILD_FAT_JAR) + set(jar_destination ${CMAKE_BINARY_DIR}/fat_jar) + set(unpack_dir ${CMAKE_CURRENT_BINARY_DIR}/fat_jar) + file(MAKE_DIRECTORY ${jar_destination}) + file(MAKE_DIRECTORY ${unpack_dir}) + message(STATUS "Building fat jar to ${jar_destination}") + get_property(jar_path TARGET fdb-java PROPERTY JAR_FILE) + add_custom_command(OUTPUT ${unpack_dir}/META-INF/MANIFEST.MF + COMMAND ${Java_JAR_EXECUTABLE} xf ${jar_path} + WORKING_DIRECTORY ${unpack_dir} + COMMENT "Unpack jar-file") + add_custom_target(unpack_jar DEPENDS ${unpack_dir}/META-INF/MANIFEST.MF) + add_dependencies(unpack_jar fdb_java) + add_custom_command(OUTPUT ${unpack_dir}/LICENSE + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/LICENSE ${unpack_dir} + COMMENT "copy license") + add_custom_target(copy_license DEPENDS ${unpack_dir}/LICENSE) + add_dependencies(unpack_jar copy_license) + if(FAT_JAR_BINARIES) + add_custom_command(OUTPUT ${unpack_dir}/lib + COMMAND ${CMAKE_COMMAND} -E copy_directory ${FAT_JAR_BINARIES} ${unpack_dir} + COMMENT "copy additional libraries" + DEPENDS ${unpack_dir}/META-INF/MANIFEST.MF) + add_custom_target(copy_libs DEPENDS ${unpack_dir}/lib) + add_dependencies(unpack_jar copy_libs) + endif() + if(WIN32) + set(lib_destination "windows/amd64") + elseif(APPLE) + set(lib_destination "osx/x86_64") + else() + set(lib_destination "linux/amd64") + endif() + set(lib_destination "${unpack_dir}/lib/${lib_destination}") + file(MAKE_DIRECTORY ${lib_destination}) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib_copied + COMMAND ${CMAKE_COMMAND} -E copy $ ${lib_destination} && + ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/lib_copied + COMMENT "Copy library") + add_custom_target(copy_lib DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/lib_copied) + add_dependencies(copy_lib unpack_jar) + set(target_jar ${jar_destination}/fdb-java-${CMAKE_PROJECT_VERSION}.jar) + add_custom_command(OUTPUT ${target_jar} + COMMAND ${Java_JAR_EXECUTABLE} cf ${target_jar} . + WORKING_DIRECTORY ${unpack_dir} + COMMENT "Build ${jar_destination}/fdb-java-${CMAKE_PROJECT_VERSION}.jar") + add_custom_target(fat-jar ALL DEPENDS ${target_jar}) + add_dependencies(fat-jar copy_lib) +endif() From 70545e07d814d476c0fbe35b507d4b2290b34c82 Mon Sep 17 00:00:00 2001 From: mpilman Date: Fri, 8 Feb 2019 00:03:16 -0800 Subject: [PATCH 198/226] Added documentation on how to build the jar-files --- README.md | 10 ++++++++ bindings/java/README.md | 54 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 bindings/java/README.md diff --git a/README.md b/README.md index 76782d7d88..7153ef8add 100755 --- a/README.md +++ b/README.md @@ -104,6 +104,16 @@ cmake -DLibreSSL_ROOT=/usr/local/libressl-2.8.3/ ../foundationdb FoundationDB will build just fine without LibreSSL, however, the resulting binaries won't support TLS connections. +### Language Bindings + +The language bindings that are supported by cmake will have a corresponding +`README.md` file in the corresponding `bindings/lang` directory. + +Generally, cmake will build all language bindings for which it can find all +necessary dependencies. After each successful cmake run, cmake will tell you +which language bindings it is going to build. + + ### Generating compile_commands.json CMake can build a compilation database for you. However, the default generatd diff --git a/bindings/java/README.md b/bindings/java/README.md new file mode 100644 index 0000000000..3b53dc5bb8 --- /dev/null +++ b/bindings/java/README.md @@ -0,0 +1,54 @@ +FoundationDB logo + +FoundationDB is a distributed database designed to handle large volumes of structured data across clusters of commodity servers. It organizes data as an ordered key-value store and employs ACID transactions for all operations. It is especially well-suited for read/write workloads but also has excellent performance for write-intensive workloads. Users interact with the database using API language binding. + +To learn more about FoundationDB, visit [foundationdb.org](https://www.foundationdb.org/) + +## FoundationDB Java Bindings + +In order to build the java bindings, +[JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html) >= 8 +has to be installed. CMake will try to find a JDK installation, if it can find +one it will automatically build the java bindings. + +If you have Java installed but cmake fails to find them, set the +`JAVA_HOME`environment variable. + +### Fat Jar + +By default, the generated jar file will depend on an installed libfdb_java +(provided with the generated RPM/DEB file on Linux). However, users usually find +a Jar-file that contains this library more convenient. This is also what you +will get if you download the jar file from Maven. + +If you want to build a jar file that contains the library enable the cmake +variable `BUILD_FAT_JAR`. You can do this with the following command: + +``` +cmake -DBUILD_FAT_JAR +``` + +This will add the jni library of for the current architecture to the jar file. + +#### Multi-Platform Jar-File + +If you want to create a jar file that can run on more than one supported +architecture (the offical one supports MacOS, Linux, and Windows), you can do +that by executing the following steps: + +1. Create a directory called `lib` somewhere on your file system. +1. Create a subdirectory for each *additional* platform you want to support + (`windows` for windows, `osx` for MacOS, and `linux` for Linux). +1. Under each of those create a subdirectory with the name of the architecture + (currently only `amd64` is supported - on MacOS this has to be called + `x86_64` - `amd64` on all others). +1. Set the cmake variable `FAT_JAR_BINARIES` to this `lib` directory. For + example, if you created this directory structure under `/foo/bar`, the + corresponding cmake command would be: + +``` +cmake -DFAT_JAR_BINARIES=/foo/bar/lib +``` + +After executing building (with `make` or `Visual Studio`) you will find a +jar-file in the `fat-jar` directory in your build directory. From 543e6b900077606609397358b0b6b418b8560599 Mon Sep 17 00:00:00 2001 From: mpilman Date: Wed, 6 Feb 2019 17:54:01 -0800 Subject: [PATCH 199/226] intermediate commit --- CMakeLists.txt | 1 + cmake/FindSphinx.cmake | 7 +++++++ documentation/CMakeLists.txt | 16 ++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 cmake/FindSphinx.cmake create mode 100644 documentation/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 911da1ee39..e79e44b0a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,6 +222,7 @@ endif() add_subdirectory(bindings) add_subdirectory(fdbbackup) add_subdirectory(tests) +add_subdirectory(documentation) if(WIN32) add_subdirectory(packaging/msi) diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake new file mode 100644 index 0000000000..8d3715d880 --- /dev/null +++ b/cmake/FindSphinx.cmake @@ -0,0 +1,7 @@ +find_program(SPHINXBUILD + sphinx-build + DOC "Sphinx-build tool") + +find_package_handle_standard_args(Sphinx + FOUND_VAR SPHINX_FOUND + REQUIRED_VARS SPHINXBUILD) diff --git a/documentation/CMakeLists.txt b/documentation/CMakeLists.txt new file mode 100644 index 0000000000..2678ef9ac4 --- /dev/null +++ b/documentation/CMakeLists.txt @@ -0,0 +1,16 @@ +find_package(Sphinx) +if(SPHINX_FOUND) + set(SPHINX_OPTIONS "" CACHE STRING "Additional Options to pass to sphinx-build") + set(SPHINX_PAPER_SIZE "letter" CACHE STRING "Paper size for documents generated with sphinx") + set(SPHINX_OPTS "${SPHINX_OPTIONS} -c . -d ${CMAKE_CURRENT_BINARY_DIR}/doctrees -D latex_paper_size=${SPHINX_PAPER_SIZE}") + file(GLOB_RECURSE RST_FILES "source/*rst") + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html ${CMAKE_CURRENT_BINARY_DIR}/doctrees) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/html/index.html + COMMAND ${SPHINXBUILD} -b html ${SPHINX_OPTS} ${CMAKE_CURRENT_BINARY_DIR}/html + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/sphinx + DEPENDS ${RST_FILES}) + add_custom_target(html DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/html/index.html) +else() + message(WARNING "Cound not find sphinx-build - will not build documentatioin") +endif() From 308db3772c6cd0c0d2e069fcf63a9a14ef048ad2 Mon Sep 17 00:00:00 2001 From: mpilman Date: Fri, 8 Feb 2019 13:46:50 -0800 Subject: [PATCH 200/226] build sphinx documentation --- CMakeLists.txt | 12 +++++- cmake/FindVirtualenv.cmake | 20 ++++++++++ documentation/CMakeLists.txt | 73 ++++++++++++++++++++++++++++-------- 3 files changed, 89 insertions(+), 16 deletions(-) create mode 100644 cmake/FindVirtualenv.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index e79e44b0a5..9b9e58e2ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,14 @@ find_package(PythonInterp 3.4 REQUIRED) set(Python_ADDITIONAL_VERSIONS 3.4 3.5 3.5) find_package(PythonLibs 3.4 REQUIRED) +################################################################################ +# Pip +################################################################################ + +find_package(Virtualenv) +if (Virtualenv_FOUND) + set(BUILD_DOCUMENTATION ON) +endif() ################################################################################ # Compiler configuration @@ -222,7 +230,9 @@ endif() add_subdirectory(bindings) add_subdirectory(fdbbackup) add_subdirectory(tests) -add_subdirectory(documentation) +if(BUILD_DOCUMENTATION) + add_subdirectory(documentation) +endif() if(WIN32) add_subdirectory(packaging/msi) diff --git a/cmake/FindVirtualenv.cmake b/cmake/FindVirtualenv.cmake new file mode 100644 index 0000000000..ace748f672 --- /dev/null +++ b/cmake/FindVirtualenv.cmake @@ -0,0 +1,20 @@ +find_program(_VIRTUALENV_EXE virtualenv) + +# get version and test that program actually works +if(_VIRTUALENV_EXE) + execute_process( + COMMAND ${_VIRTUALENV_EXE} --version + RESULT_VARIABLE ret_code + OUTPUT_VARIABLE version_string + ERROR_VARIABLE error_output + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(ret_code EQUAL 0 AND NOT ERROR_VARIABLE) + # we found a working virtualenv + set(VIRTUALENV_EXE ${_VIRTUALENV_EXE}) + set(VIRTUALENV_VERSION version_string) + endif() +endif() + +find_package_handle_standard_args(Virtualenv + REQUIRED_VARS VIRTUALENV_EXE + VERSION_VAR ${VIRTUALENV_VERSION}) diff --git a/documentation/CMakeLists.txt b/documentation/CMakeLists.txt index 2678ef9ac4..2f09c009f6 100644 --- a/documentation/CMakeLists.txt +++ b/documentation/CMakeLists.txt @@ -1,16 +1,59 @@ -find_package(Sphinx) -if(SPHINX_FOUND) - set(SPHINX_OPTIONS "" CACHE STRING "Additional Options to pass to sphinx-build") - set(SPHINX_PAPER_SIZE "letter" CACHE STRING "Paper size for documents generated with sphinx") - set(SPHINX_OPTS "${SPHINX_OPTIONS} -c . -d ${CMAKE_CURRENT_BINARY_DIR}/doctrees -D latex_paper_size=${SPHINX_PAPER_SIZE}") - file(GLOB_RECURSE RST_FILES "source/*rst") - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html ${CMAKE_CURRENT_BINARY_DIR}/doctrees) - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/html/index.html - COMMAND ${SPHINXBUILD} -b html ${SPHINX_OPTS} ${CMAKE_CURRENT_BINARY_DIR}/html - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/sphinx - DEPENDS ${RST_FILES}) - add_custom_target(html DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/html/index.html) -else() - message(WARNING "Cound not find sphinx-build - will not build documentatioin") +# build a virtualenv +set(sphinx_dir ${CMAKE_CURRENT_SOURCE_DIR}/sphinx) +set(venv_dir ${CMAKE_CURRENT_BINARY_DIR}/venv) +set(EXE_SUFFIX "") +if(WIN32) + set(EXE_SUFFIX ".exe") endif() +set(pip_command ${venv_dir}/bin/pip${EXE_SUFFIX}) +set(python_command ${venv_dir}/bin/python${EXE_SUFFIX}) + +add_custom_command(OUTPUT ${venv_dir}/venv_setup + COMMAND ${VIRTUALENV_EXE} venv && + ${CMAKE_COMMAND} -E copy ${sphinx_dir}/.pip.conf ${venv_dir}/pip.conf && + . ${venv_dir}/bin/activate && + ${pip_command} install --upgrade pip && + ${pip_command} install --upgrade -r ${sphinx_dir}/requirements.txt && + ${pip_command} install sphinx-autobuild && # somehow this is missing in requirements.txt + ${CMAKE_COMMAND} -E touch ${venv_dir}/venv_setup + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Set up virtualenv") +add_custom_target(buildsphinx DEPENDS ${venv_dir}/venv_setup) + +file(GLOB_RECURSE SRCS *.rst) + +function(add_documentation_target) + set(options) + set(oneValueArgs GENERATOR SPHINX_COMMAND DOCTREE) + set(multiValueArgs ADDITIONAL_ARGUMENTS) + cmake_parse_arguments(ADT "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + if(NOT ADT_GENERATOR) + message(ERROR "GENERATOR is a required argument to add_documentation_target") + endif() + set(target ${ADT_GENERATOR}) + set(SPHINX_COMMAND "${venv_dir}/bin/sphinx-build") + if(ADT_SPHINX_COMMAND) + set(SPHINX_COMMAND "${venv_dir}/bin/${ADT_SPHINX_COMMAND}") + endif() + set(doctree "doctree") + if (ADT_DOCTREE) + set(doctree "${ADT_DOCTREE}") + endif() + set(out_dir ${CMAKE_CURRENT_BINARY_DIR}/${target}) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${target}_done + COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir} && + ${python_command} ${SPHINX_COMMAND} -b ${target} + -d ${doctree} -c ${sphinx_dir} + ${sphinx_dir}/source + ${CMAKE_CURRENT_BINARY_DIR}/${target} && + ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/${target}_done + DEPENDS ${SRCS} + WORKING_DIRECTORY ${venv_dir}) + message(STATUS "add_custom_target(${target} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${target}_done)") + add_custom_target(${target} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${target}_done) + add_dependencies(${target} buildsphinx) +endfunction() + +message(STATUS "Add html target") +add_documentation_target(GENERATOR html) From 0d01dc073b8479f1183a2bb2946df80162b492ff Mon Sep 17 00:00:00 2001 From: mpilman Date: Fri, 8 Feb 2019 14:10:13 -0800 Subject: [PATCH 201/226] add package_html target to create tgz of html --- CMakeLists.txt | 2 +- documentation/CMakeLists.txt | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b9e58e2ad..56e1775d42 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ # See the License for the specific language governing permissions and # limitations under the License. cmake_minimum_required(VERSION 3.12) -project(fdb +project(foundationdb VERSION 6.1.0 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" diff --git a/documentation/CMakeLists.txt b/documentation/CMakeLists.txt index 2f09c009f6..ac409fbbc2 100644 --- a/documentation/CMakeLists.txt +++ b/documentation/CMakeLists.txt @@ -57,3 +57,11 @@ endfunction() message(STATUS "Add html target") add_documentation_target(GENERATOR html) + +set(tar_file ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}.tar.gz) +add_custom_command( + OUTPUT ${tar_file} + COMMAND ${CMAKE_COMMAND} -E tar czf ${tar_file} . + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html) +add_custom_target(package_html DEPENDS ${tar_file}) +add_dependencies(package_html html) From 75f692b9317b42bc3237e940228cf23d996dea35 Mon Sep 17 00:00:00 2001 From: mpilman Date: Fri, 8 Feb 2019 16:51:13 -0800 Subject: [PATCH 202/226] simplify actorcompiler and target to compile coveragetool --- CMakeLists.txt | 11 ++-- bindings/java/CMakeLists.txt | 3 +- cmake/CompileActorCompiler.cmake | 21 ------ cmake/CompileCoverageTool.cmake | 25 +++++++ cmake/EnableCsharp.cmake | 27 ++++++++ cmake/FlowCommands.cmake | 108 ++++++++++++++++++------------- fdbbackup/CMakeLists.txt | 4 +- fdbcli/CMakeLists.txt | 4 +- fdbclient/CMakeLists.txt | 4 +- fdbrpc/CMakeLists.txt | 7 +- fdbserver/CMakeLists.txt | 4 +- flow/CMakeLists.txt | 4 +- 12 files changed, 130 insertions(+), 92 deletions(-) create mode 100644 cmake/CompileCoverageTool.cmake create mode 100644 cmake/EnableCsharp.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 56e1775d42..32d92bed24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,12 +23,6 @@ project(foundationdb HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES ASM C CXX) -if(WIN32) - # C# is currently only supported on Windows. - # On other platforms we find mono manually - enable_language(CSharp) -endif() - set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}/cmake") message (STATUS "${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR}") if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}") @@ -122,10 +116,15 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/fdbclient/versions.h.cmake ${CMAKE_CU # Flow ################################################################################ +# Flow and other tools are written in C# - so we need that dependency +include(EnableCSharp) + # First thing we need is the actor compiler - and to compile and run the # actor compiler, we need mono include(CompileActorCompiler) +include(CompileCoverageTool) + # with the actor compiler, we can now make the flow commands available include(FlowCommands) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 47391874b0..75191cc670 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -148,9 +148,10 @@ if(BUILD_FAT_JAR) add_custom_command(OUTPUT ${unpack_dir}/META-INF/MANIFEST.MF COMMAND ${Java_JAR_EXECUTABLE} xf ${jar_path} WORKING_DIRECTORY ${unpack_dir} + DEPENDS "${jar_path}" COMMENT "Unpack jar-file") add_custom_target(unpack_jar DEPENDS ${unpack_dir}/META-INF/MANIFEST.MF) - add_dependencies(unpack_jar fdb_java) + add_dependencies(unpack_jar fdb-java) add_custom_command(OUTPUT ${unpack_dir}/LICENSE COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/LICENSE ${unpack_dir} COMMENT "copy license") diff --git a/cmake/CompileActorCompiler.cmake b/cmake/CompileActorCompiler.cmake index 4c39dc7b8e..2f737b6179 100644 --- a/cmake/CompileActorCompiler.cmake +++ b/cmake/CompileActorCompiler.cmake @@ -16,27 +16,6 @@ if(WIN32) "System.Data" "System.Xml") else() - find_program(MONO_EXECUTABLE mono) - find_program(MCS_EXECUTABLE dmcs) - - if (NOT MCS_EXECUTABLE) - find_program(MCS_EXECUTABLE mcs) - endif() - - set(MONO_FOUND FALSE CACHE INTERNAL "") - - if (NOT MCS_EXECUTABLE) - find_program(MCS_EXECUTABLE mcs) - endif() - - if (MONO_EXECUTABLE AND MCS_EXECUTABLE) - set(MONO_FOUND True CACHE INTERNAL "") - endif() - - if (NOT MONO_FOUND) - message(FATAL_ERROR "Could not find mono") - endif() - set(ACTOR_COMPILER_REFERENCES "-r:System,System.Core,System.Xml.Linq,System.Data.DataSetExtensions,Microsoft.CSharp,System.Data,System.Xml") diff --git a/cmake/CompileCoverageTool.cmake b/cmake/CompileCoverageTool.cmake new file mode 100644 index 0000000000..f7bfddc438 --- /dev/null +++ b/cmake/CompileCoverageTool.cmake @@ -0,0 +1,25 @@ +set(COVERAGETOOL_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/flow/coveragetool/Program.cs + ${CMAKE_CURRENT_SOURCE_DIR}/flow/coveragetool/Properties/AssemblyInfo.cs) +if(WIN32) + add_executable(coveragetool ${COVERAGETOOL_SRCS}) + target_compile_options(coveragetool PRIVATE "/langversion:6") + set_property(TARGET coveragetool PROPERTY VS_DOTNET_REFERENCES + "System" + "ystem.Core" + "System.Xml.Linq" + "ystem.Data.DataSetExtensions" + "Microsoft.CSharp" + "ystem.Data" + "System.Xml") +else() + set(COVERAGETOOL_COMPILER_REFERENCES + "-r:System,System.Core,System.Xml.Linq,System.Data.DataSetExtensions,Microsoft.CSharp,System.Data,System.Xml") + + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/coveragetool.exe + COMMAND ${MCS_EXECUTABLE} ARGS ${COVERAGETOOL_COMPILER_REFERENCES} ${COVERAGETOOL_SRCS} "-target:exe" "-out:coveragetool.exe" + DEPENDS ${COVERAGETOOL_SRCS} + COMMENT "Compile coveragetool" VERBATIM) + add_custom_target(coveragetool DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/coveragetool.exe) + set(coveragetool_exe "${CMAKE_CURRENT_BINARY_DIR}/coveragetool.exe") +endif() diff --git a/cmake/EnableCsharp.cmake b/cmake/EnableCsharp.cmake new file mode 100644 index 0000000000..89a38bab1a --- /dev/null +++ b/cmake/EnableCsharp.cmake @@ -0,0 +1,27 @@ +if(WIN32) + # C# is currently only supported on Windows. + # On other platforms we find mono manually + enable_language(CSharp) +else() + # for other platforms we currently use mono + find_program(MONO_EXECUTABLE mono) + find_program(MCS_EXECUTABLE dmcs) + + if (NOT MCS_EXECUTABLE) + find_program(MCS_EXECUTABLE mcs) + endif() + + set(MONO_FOUND FALSE CACHE INTERNAL "") + + if (NOT MCS_EXECUTABLE) + find_program(MCS_EXECUTABLE mcs) + endif() + + if (MONO_EXECUTABLE AND MCS_EXECUTABLE) + set(MONO_FOUND True CACHE INTERNAL "") + endif() + + if (NOT MONO_FOUND) + message(FATAL_ERROR "Could not find mono") + endif() +endif() diff --git a/cmake/FlowCommands.cmake b/cmake/FlowCommands.cmake index 4eb4c4735d..61a54d18e6 100644 --- a/cmake/FlowCommands.cmake +++ b/cmake/FlowCommands.cmake @@ -1,53 +1,71 @@ -macro(actor_set varname srcs) - set(${varname}) - foreach(src ${srcs}) - set(tmp "${src}") +function(add_flow_target) + set(options EXECUTABLE STATIC_LIBRARY + DYNAMIC_LIBRARY) + set(oneValueArgs NAME) + set(multiValueArgs SRCS DISABLE_ACTOR_WITHOUT_WAIT_WARNING) + cmake_parse_arguments(AFT "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + if(NOT AFT_NAME) + message(ERROR "add_flow_target requires option NAME") + endif() + if(NOT AFT_SRCS) + message(ERROR "No sources provided") + endif() + foreach(src IN LISTS AFT_SRCS AFT_DISABLE_ACTOR_WITHOUT_WAIT_WARNING) + if(${src} MATCHES ".*\\.actor\\.(h|cpp)") + list(APPEND actors ${src}) if(${src} MATCHES ".*\\.h") - continue() - elseif(${src} MATCHES ".*\\.actor\\.cpp") - string(REPLACE ".actor.cpp" ".actor.g.cpp" tmp ${src}) - set(tmp "${CMAKE_CURRENT_BINARY_DIR}/${tmp}") - endif() - set(${varname} "${${varname}};${tmp}") - endforeach() -endmacro() - -set(ACTOR_TARGET_COUNTER "0") -macro(actor_compile target srcs) - set(options DISABLE_ACTOR_WITHOUT_WAIT_WARNING) - set(oneValueArg) - set(multiValueArgs) - cmake_parse_arguments(ACTOR_COMPILE "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") - set(_tmp_out "") - foreach(src ${srcs}) - set(tmp "") - if(${src} MATCHES ".*\\.actor\\.h") - string(REPLACE ".actor.h" ".actor.g.h" tmp ${src}) - elseif(${src} MATCHES ".*\\.actor\\.cpp") - string(REPLACE ".actor.cpp" ".actor.g.cpp" tmp ${src}) - endif() - set(actor_compiler_flags "") - if(ACTOR_COMPILE_DISABLE_ACTOR_WITHOUT_WAIT_WARNING) - set(actor_compiler_flags "--disable-actor-without-wait-warning") - endif() - if(tmp) + string(REPLACE ".actor.h" ".actor.g.h" generated ${src}) + else() + string(REPLACE ".actor.cpp" ".actor.g.cpp" generated ${src}) + endif() + set(actor_compiler_flags "") + foreach(s IN LISTS AFT_DISABLE_ACTOR_WITHOUT_WAIT_WARNING) + if("${s}" STREQUAL "${src}") + set(actor_compiler_flags "--disable-actor-without-wait-warning") + break() + endif() + endforeach() + list(APPEND sources ${generated}) + list(APPEND generated_files ${CMAKE_CURRENT_BINARY_DIR}/${generated}) if(WIN32) - add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${tmp}" - COMMAND $ "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${tmp}" ${actor_compiler_flags} - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler ${actor_exe} + add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}" + COMMAND $ "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} ${actor_compiler_flags} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler COMMENT "Compile actor: ${src}") else() - add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${tmp}" - COMMAND ${MONO_EXECUTABLE} ${actor_exe} "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${tmp}" ${actor_compiler_flags} > /dev/null - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler ${actor_exe} + add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}" + COMMAND ${MONO_EXECUTABLE} ${actor_exe} "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} ${actor_compiler_flags} > /dev/null + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler COMMENT "Compile actor: ${src}") endif() - set(_tmp_out "${_tmp_out};${CMAKE_CURRENT_BINARY_DIR}/${tmp}") + else() + list(APPEND sources ${src}) endif() endforeach() - MATH(EXPR ACTOR_TARGET_COUNTER "${ACTOR_TARGET_COUNTER}+1") - add_custom_target(${target}_actors_${ACTOR_TARGET_COUNTER} DEPENDS ${_tmp_out}) - add_dependencies(${target} ${target}_actors_${ACTOR_TARGET_COUNTER}) - target_include_directories(${target} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) - target_include_directories(${target} PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) -endmacro() + if(AFT_EXECUTABLE) + set(target_type exec) + add_executable(${AFT_NAME} ${sources}) + endif() + if(AFT_STATIC_LIBRARY) + if(target_type) + message(FATAL_ERROR "add_flow_target can only be of one type") + endif() + add_library(${AFT_NAME} STATIC ${sources}) + endif() + if(AFT_DYNAMIC_LIBRARY) + if(target_type) + message(FATAL_ERROR "add_flow_target can only be of one type") + endif() + add_library(${AFT_NAME} DYNAMIC ${sources}) + endif() + if(AFT_OBJECT_LIBRARY) + if(target_type) + message(FATAL_ERROR "add_flow_target can only be of one type") + endif() + add_library(${AFT_NAME} OBJECT ${sources}) + endif() + + add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files}) + add_dependencies(${AFT_NAME} ${AFT_NAME}_actors) + target_include_directories(${AFT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +endfunction() diff --git a/fdbbackup/CMakeLists.txt b/fdbbackup/CMakeLists.txt index dd6f46fa5b..c35aec3c3c 100644 --- a/fdbbackup/CMakeLists.txt +++ b/fdbbackup/CMakeLists.txt @@ -1,9 +1,7 @@ set(FDBBACKUP_SRCS backup.actor.cpp) -actor_set(FDBBACKUP_BUILD "${FDBBACKUP_SRCS}") -add_executable(fdbbackup "${FDBBACKUP_BUILD}") -actor_compile(fdbbackup "${FDBBACKUP_SRCS}") +add_flow_target(EXECUTABLE NAME fdbbackup SRCS ${FDBBACKUP_SRCS}) target_link_libraries(fdbbackup PRIVATE fdbclient) install(TARGETS fdbbackup DESTINATION ${FDB_BIN_DIR} COMPONENT clients) diff --git a/fdbcli/CMakeLists.txt b/fdbcli/CMakeLists.txt index 3cfbb7b793..251bbd86ef 100644 --- a/fdbcli/CMakeLists.txt +++ b/fdbcli/CMakeLists.txt @@ -8,9 +8,7 @@ if(NOT WIN32) list(APPEND FDBCLI_SRCS linenoise/linenoise.c) endif() -actor_set(FDBCLI_BUILD "${FDBCLI_SRCS}") -add_executable(fdbcli "${FDBCLI_BUILD}") -actor_compile(fdbcli "${FDBCLI_SRCS}") +add_flow_target(EXECUTABLE NAME fdbcli SRCS ${FDBCLI_SRCS}) target_link_libraries(fdbcli PRIVATE fdbclient) install(TARGETS fdbcli DESTINATION ${FDB_BIN_DIR} COMPONENT clients) diff --git a/fdbclient/CMakeLists.txt b/fdbclient/CMakeLists.txt index 1f7a00e11a..d0be44e3a8 100644 --- a/fdbclient/CMakeLists.txt +++ b/fdbclient/CMakeLists.txt @@ -87,8 +87,6 @@ set(FDBCLIENT_SRCS vexillographer_compile(TARGET fdboptions LANG cpp OUT ${CMAKE_CURRENT_BINARY_DIR}/FDBOptions.g OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/FDBOptions.g.h ${CMAKE_CURRENT_BINARY_DIR}/FDBOptions.g.cpp) -actor_set(FDBCLIENT_BUILD "${FDBCLIENT_SRCS}") -add_library(fdbclient STATIC ${FDBCLIENT_BUILD}) +add_flow_target(STATIC_LIBRARY NAME fdbclient SRCS ${FDBCLIENT_SRCS}) add_dependencies(fdbclient fdboptions) -actor_compile(fdbclient "${FDBCLIENT_SRCS}") target_link_libraries(fdbclient PUBLIC fdbrpc) diff --git a/fdbrpc/CMakeLists.txt b/fdbrpc/CMakeLists.txt index 3447f4e066..a873c5696c 100644 --- a/fdbrpc/CMakeLists.txt +++ b/fdbrpc/CMakeLists.txt @@ -57,9 +57,8 @@ set(FDBRPC_SRCS_DISABLE_ACTOR_WITHOUT_WAIT_WARNING FlowTests.actor.cpp dsltest.actor.cpp) -actor_set(FDBRPC_BUILD "${FDBRPC_SRCS};${FDBRPC_SRCS_DISABLE_ACTOR_WITHOUT_WAIT_WARNING}") -add_library(fdbrpc STATIC ${FDBRPC_BUILD}) -actor_compile(fdbrpc "${FDBRPC_SRCS}") -actor_compile(fdbrpc "${FDBRPC_SRCS_DISABLE_ACTOR_WITHOUT_WAIT_WARNING}" DISABLE_ACTOR_WITHOUT_WAIT_WARNING) +add_flow_target(STATIC_LIBRARY NAME fdbrpc + SRCS ${FDBRPC_SRCS} + DISABLE_ACTOR_WITHOUT_WAIT_WARNING ${FDBRPC_SRCS_DISABLE_ACTOR_WITHOUT_WAIT_WARNING}) target_include_directories(fdbrpc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libeio) target_link_libraries(fdbrpc PUBLIC flow) diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index ac4c205827..9d20584f7e 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -175,9 +175,7 @@ set(FDBSERVER_SRCS file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/workloads) -actor_set(FDBSERVER_BUILD "${FDBSERVER_SRCS}") -add_executable(fdbserver ${FDBSERVER_BUILD}) -actor_compile(fdbserver "${FDBSERVER_SRCS}") +add_flow_target(EXECUTABLE NAME fdbserver SRCS ${FDBSERVER_SRCS}) target_include_directories(fdbserver PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/workloads ${CMAKE_CURRENT_SOURCE_DIR}/workloads) diff --git a/flow/CMakeLists.txt b/flow/CMakeLists.txt index a827c7ba71..bd78d737c7 100644 --- a/flow/CMakeLists.txt +++ b/flow/CMakeLists.txt @@ -77,9 +77,7 @@ set(FLOW_SRCS configure_file(${CMAKE_CURRENT_SOURCE_DIR}/hgVersion.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/hgVersion.h) -actor_set(FLOW_BUILD "${FLOW_SRCS}") -add_library(flow STATIC ${FLOW_BUILD}) -actor_compile(flow "${FLOW_SRCS}") +add_flow_target(STATIC_LIBRARY NAME flow SRCS ${FLOW_SRCS}) target_include_directories(flow SYSTEM PUBLIC ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(flow PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) if (NOT APPLE AND NOT WIN32) From cd6b72b49210844369f2b41fa439a41667563437 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sat, 9 Feb 2019 13:59:51 -0800 Subject: [PATCH 203/226] generate coverage.target.xml files --- cmake/FlowCommands.cmake | 76 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/cmake/FlowCommands.cmake b/cmake/FlowCommands.cmake index 61a54d18e6..f05431ba5f 100644 --- a/cmake/FlowCommands.cmake +++ b/cmake/FlowCommands.cmake @@ -1,8 +1,78 @@ +define_property(TARGET PROPERTY SOURCE_FILES + BRIEF_DOCS "Source files a flow target is built off" + FULL_DOCS "When compiling a flow target, this property contains a list of the non-generated source files. \ +This property is set by the add_flow_target function") + +define_property(TARGET PROPERTY COVERAGE_FILTERS + BRIEF_DOCS "List of filters for the coverage tool" + FULL_DOCS "Holds a list of regular expressions. All filenames matching any regular \ +expression in this list will be ignored when the coverage.target.xml file is \ +generated. This property is set through the add_flow_target function.") + +function(generate_coverage_xml) + if(NOT (${ARGC} EQUAL "1")) + message(ERROR "generate_coverage_xml expects one argument") + endif() + set(target_name ${ARGV0}) + get_target_property(sources ${target_name} SOURCE_FILES) + get_target_property(filters ${target_name} COVERAGE_FILTER_OUT) + foreach(src IN LISTS sources) + set(include TRUE) + foreach(f IN LISTS filters) + if("${f}" MATCHES "${src}") + set(include FALSE) + endif() + endforeach() + if(include) + list(APPEND in_files ${src}) + endif() + endforeach() + set(target_file ${CMAKE_CURRENT_SOURCE_DIR}/coverage_target_${target_name}) + # we can't get the targets output dir through a generator expression as this would + # create a cyclic dependency. + # Instead we follow the following rules: + # - For executable we place the coverage file into the directory EXECUTABLE_OUTPUT_PATH + # - For static libraries we place it into the directory LIBRARY_OUTPUT_PATH + # - For dynamic libraries we place it into LIBRARY_OUTPUT_PATH on Linux and MACOS + # and to EXECUTABLE_OUTPUT_PATH on Windows + get_target_property(type ${target_name} TYPE) + # STATIC_LIBRARY, MODULE_LIBRARY, SHARED_LIBRARY, OBJECT_LIBRARY, INTERFACE_LIBRARY, EXECUTABLE + if(type STREQUAL "STATIC_LIBRARY") + set(target_file ${LIBRARY_OUTPUT_PATH}/coverage.${target_name}.xml) + elseif(type STREQUAL "SHARED_LIBRARY") + if(WIN32) + set(target_file ${EXECUTABLE_OUTPUT_PATH}/coverage.${target_name}.xml) + else() + set(target_file ${LIBRARY_OUTPUT_PATH}/coverage.${target_name}.xml) + endif() + elseif(type STREQUAL "EXECUTABLE") + set(target_file ${EXECUTABLE_OUTPUT_PATH}/coverage.${target_name}.xml) + endif() + if(WIN32) + add_custom_command( + OUTPUT ${target_file} + COMMAND $ ${target_file} ${in_files} + DEPENDS ${in_files} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generate coverage xml") + else() + add_custom_command( + OUTPUT ${target_file} + COMMAND ${MONO_EXECUTABLE} ${coveragetool_exe} ${target_file} ${in_files} + DEPENDS ${in_files} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generate coverage xml") + endif() + add_custom_target(coverage_${target_name} DEPENDS ${target_file}) + add_dependencies(coverage_${target_name} coveragetool) + add_dependencies(${target_name} coverage_${target_name}) +endfunction() + function(add_flow_target) set(options EXECUTABLE STATIC_LIBRARY DYNAMIC_LIBRARY) set(oneValueArgs NAME) - set(multiValueArgs SRCS DISABLE_ACTOR_WITHOUT_WAIT_WARNING) + set(multiValueArgs SRCS COVERAGE_FILTER_OUT DISABLE_ACTOR_WITHOUT_WAIT_WARNING) cmake_parse_arguments(AFT "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") if(NOT AFT_NAME) message(ERROR "add_flow_target requires option NAME") @@ -65,7 +135,11 @@ function(add_flow_target) add_library(${AFT_NAME} OBJECT ${sources}) endif() + set_property(TARGET ${AFT_NAME} PROPERTY SOURCE_FILES ${AFT_SRCS}) + set_property(TARGET ${AFT_NAME} PROPERTY COVERAGE_FILTERS ${AFT_SRCS}) + add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files}) add_dependencies(${AFT_NAME} ${AFT_NAME}_actors) + generate_coverage_xml(${AFT_NAME}) target_include_directories(${AFT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) endfunction() From 0d7d1a2b63b14fb3523abef5d11ec37474c08cf7 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sat, 9 Feb 2019 14:33:36 -0800 Subject: [PATCH 204/226] added IDE support for cmake --- CMakeLists.txt | 2 ++ README.md | 23 ++++++++++++++++ cmake/ConfigureCompiler.cmake | 1 - cmake/FlowCommands.cmake | 50 ++++++++++++++++++++++------------- 4 files changed, 57 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 32d92bed24..32259daf13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,8 @@ endif() set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) +set(OPEN_FOR_IDE OFF CACHE BOOL "Open this in an IDE (won't compile/link)") + ################################################################################ # Packages used for bindings ################################################################################ diff --git a/README.md b/README.md index 7153ef8add..7a65543b1b 100755 --- a/README.md +++ b/README.md @@ -125,6 +125,29 @@ directory. This can than be used for tools like code-completion and code navigation in flow. It is not yet perfect (it will show a few errors) but we are constantly working on improving the developement experience. +### Using IDEs + +CMake has built in support for a number of popular IDEs. However, because flow +files are precompiled with the actor compiler, an IDE will not be very useful as +a user will only be presented with the generated code - which is not what she +wants to edit and get IDE features for. + +The good news is, that it is possible to generate project files for edititing +flow with a supported IDE. There is a cmake option called `OPEN_FOR_IDE` which +will generate a project which can be opened in an IDE for editing. You won't be +able to build this project, but you will be able to edit the files and get most +edit and navigation features your IDE supports. + +For example, if you want to use XCode to make changes to FoundationDB you can +create a XCode-project with the following command: + +``` +cmake -G Xcode -DOPEN_FOR_IDE=ON +``` + +You should create a second build-directory which you will use for building +(probably with make or ninja) and debugging. + ### Linux There are no special requirements for Linux. However, we are currently working diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 1d29e2df7a..3c475e1521 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -4,7 +4,6 @@ set(USE_VALGRIND OFF CACHE BOOL "Compile for valgrind usage") set(USE_GOLD_LINKER OFF CACHE BOOL "Use gold linker") set(ALLOC_INSTRUMENTATION OFF CACHE BOOL "Instrument alloc") set(WITH_UNDODB OFF CACHE BOOL "Use rr or undodb") -set(OPEN_FOR_IDE OFF CACHE BOOL "Open this in an IDE (won't compile/link)") set(FDB_RELEASE OFF CACHE BOOL "This is a building of a final release") add_compile_options(-DCMAKE_BUILD) diff --git a/cmake/FlowCommands.cmake b/cmake/FlowCommands.cmake index f05431ba5f..ecdbdb6d08 100644 --- a/cmake/FlowCommands.cmake +++ b/cmake/FlowCommands.cmake @@ -11,7 +11,7 @@ generated. This property is set through the add_flow_target function.") function(generate_coverage_xml) if(NOT (${ARGC} EQUAL "1")) - message(ERROR "generate_coverage_xml expects one argument") + message(FATAL_ERROR "generate_coverage_xml expects one argument") endif() set(target_name ${ARGV0}) get_target_property(sources ${target_name} SOURCE_FILES) @@ -75,12 +75,31 @@ function(add_flow_target) set(multiValueArgs SRCS COVERAGE_FILTER_OUT DISABLE_ACTOR_WITHOUT_WAIT_WARNING) cmake_parse_arguments(AFT "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") if(NOT AFT_NAME) - message(ERROR "add_flow_target requires option NAME") + message(FATAL_ERROR "add_flow_target requires option NAME") endif() if(NOT AFT_SRCS) - message(ERROR "No sources provided") + message(FATAL_ERROR "No sources provided") endif() - foreach(src IN LISTS AFT_SRCS AFT_DISABLE_ACTOR_WITHOUT_WAIT_WARNING) + if(OPEN_FOR_IDE) + set(sources ${AFT_SRCS} ${AFT_DISABLE_ACTOR_WRITHOUT_WAIT_WARNING}) + if(AFT_EXECUTABLE) + set(target_type exec) + add_executable(${AFT_NAME} ${sources}) + endif() + if(AFT_STATIC_LIBRARY) + if(target_type) + message(FATAL_ERROR "add_flow_target can only be of one type") + endif() + add_library(${AFT_NAME} STATIC ${sources}) + endif() + if(AFT_DYNAMIC_LIBRARY) + if(target_type) + message(FATAL_ERROR "add_flow_target can only be of one type") + endif() + add_library(${AFT_NAME} DYNAMIC ${sources}) + endif() + else() + foreach(src IN LISTS AFT_SRCS AFT_DISABLE_ACTOR_WITHOUT_WAIT_WARNING) if(${src} MATCHES ".*\\.actor\\.(h|cpp)") list(APPEND actors ${src}) if(${src} MATCHES ".*\\.h") @@ -108,8 +127,8 @@ function(add_flow_target) DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler COMMENT "Compile actor: ${src}") endif() - else() - list(APPEND sources ${src}) + else() + list(APPEND sources ${src}) endif() endforeach() if(AFT_EXECUTABLE) @@ -128,18 +147,13 @@ function(add_flow_target) endif() add_library(${AFT_NAME} DYNAMIC ${sources}) endif() - if(AFT_OBJECT_LIBRARY) - if(target_type) - message(FATAL_ERROR "add_flow_target can only be of one type") - endif() - add_library(${AFT_NAME} OBJECT ${sources}) + + set_property(TARGET ${AFT_NAME} PROPERTY SOURCE_FILES ${AFT_SRCS}) + set_property(TARGET ${AFT_NAME} PROPERTY COVERAGE_FILTERS ${AFT_SRCS}) + + add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files}) + add_dependencies(${AFT_NAME} ${AFT_NAME}_actors) + generate_coverage_xml(${AFT_NAME}) endif() - - set_property(TARGET ${AFT_NAME} PROPERTY SOURCE_FILES ${AFT_SRCS}) - set_property(TARGET ${AFT_NAME} PROPERTY COVERAGE_FILTERS ${AFT_SRCS}) - - add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files}) - add_dependencies(${AFT_NAME} ${AFT_NAME}_actors) - generate_coverage_xml(${AFT_NAME}) target_include_directories(${AFT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) endfunction() From 733ba0a75175e6f86711083c6998860c5a30fae2 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sat, 9 Feb 2019 15:13:25 -0800 Subject: [PATCH 205/226] Build c_performance_test and rwy_benchmark addresses #1035 --- bindings/c/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index e256c459c9..98341c08be 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -41,6 +41,12 @@ if(WIN32) enable_language(ASM_MASM) set_property(SOURCE ${asm_file} PROPERTY LANGUAGE ASM_MASM) endif() + +add_executable(fdb_c_performance_test test/performance_test.c test/test.h) +target_link_libraries(fdb_c_performance_test PRIVATE fdb_c) +add_executable(fdb_c_ryw_benchmark test/ryw_benchmark.c test/test.h) +target_link_libraries(fdb_c_ryw_benchmark PRIVATE fdb_c) + # TODO: re-enable once the old vcxproj-based build system is removed. #generate_export_header(fdb_c EXPORT_MACRO_NAME "DLLEXPORT" # EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/fdb_c_export.h) From 50a01fb8ce277f9f97bea74bb883a1c016c309ef Mon Sep 17 00:00:00 2001 From: mpilman Date: Sat, 9 Feb 2019 15:53:52 -0800 Subject: [PATCH 206/226] Build flow-binding --- bindings/CMakeLists.txt | 1 + bindings/flow/CMakeLists.txt | 41 +++++++++++++++++++++++++++++ bindings/flow/tester/CMakeLists.txt | 6 +++++ 3 files changed, 48 insertions(+) create mode 100644 bindings/flow/CMakeLists.txt create mode 100644 bindings/flow/tester/CMakeLists.txt diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index a5a3ca6a6a..077fac75fb 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(c) +add_subdirectory(flow) add_subdirectory(python) if(BUILD_JAVA) add_subdirectory(java) diff --git a/bindings/flow/CMakeLists.txt b/bindings/flow/CMakeLists.txt new file mode 100644 index 0000000000..ef7965ffad --- /dev/null +++ b/bindings/flow/CMakeLists.txt @@ -0,0 +1,41 @@ +set(SRCS + DirectoryLayer.actor.cpp + DirectoryLayer.h + DirectoryPartition.h + DirectorySubspace.cpp + DirectorySubspace.h + FDBLoanerTypes.h + HighContentionAllocator.actor.cpp + HighContentionAllocator.h + IDirectory.h + Node.actor.cpp + Subspace.cpp + Subspace.h + Tuple.cpp + Tuple.h + fdb_flow.actor.cpp + fdb_flow.h) + +add_flow_target(NAME fdb_flow SRCS ${SRCS} STATIC_LIBRARY) +target_link_libraries(fdb_flow PUBLIC fdb_c) + +add_subdirectory(tester) + +# generate flow-package +foreach(f IN LISTS SRCS) + if(f MATCHES ".*\\.h$") + list(APPEND headers ${CMAKE_CURRENT_SOURCE_DIR}/${f}) + endif() +endforeach() +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/packages) +file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/packages) +set(package_dir ${CMAKE_CURRENT_BINARY_DIR}/packages/fdb-flow-${CMAKE_PROJECT_VERSION}) +set(tar_file ${CMAKE_BINARY_DIR}/packages/fdb-flow-${CMAKE_PROJECT_VERSION}.tar.gz) +add_custom_command(OUTPUT ${tar_file} + COMMAND + ${CMAKE_COMMAND} -E make_directory ${package_dir} && + ${CMAKE_COMMAND} -E copy $ ${headers} ${package_dir} && + ${CMAKE_COMMAND} -E tar czf ${tar_file} ${package_dir} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/packages + COMMENT "Build fdb_flow package") +add_custom_target(package_flow DEPENDS ${tar_file}) diff --git a/bindings/flow/tester/CMakeLists.txt b/bindings/flow/tester/CMakeLists.txt new file mode 100644 index 0000000000..4e017cddba --- /dev/null +++ b/bindings/flow/tester/CMakeLists.txt @@ -0,0 +1,6 @@ +set(TEST_SRCS + DirectoryTester.actor.cpp + Tester.actor.cpp + Tester.actor.h) +add_flow_target(NAME fdb_flow_tester EXECUTABLE SRCS ${TEST_SRCS}) +target_link_libraries(fdb_flow_tester fdb_flow) From 44cb835ffedb2da8332386e1c46364693e5232d7 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sat, 9 Feb 2019 16:31:29 -0800 Subject: [PATCH 207/226] build python sdist --- CMakeLists.txt | 4 +--- bindings/python/CMakeLists.txt | 38 ++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 32259daf13..acf11efe28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,9 +47,7 @@ set(OPEN_FOR_IDE OFF CACHE BOOL "Open this in an IDE (won't compile/link)") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") -find_package(PythonInterp 3.4 REQUIRED) -set(Python_ADDITIONAL_VERSIONS 3.4 3.5 3.5) -find_package(PythonLibs 3.4 REQUIRED) +find_package(Python COMPONENTS Interpreter) ################################################################################ # Pip diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 0613bd7e90..4589582a50 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -5,7 +5,9 @@ set(SRCS fdb/locality.py fdb/six.py fdb/subspace_impl.py - fdb/tuple.py) + fdb/tuple.py + README.rst + MANIFEST.in) if(APPLE) list(APPEND SRCS fdb/libfdb_c.dylib.pth) @@ -20,17 +22,10 @@ foreach(src ${SRCS}) if(NOT EXISTS ${dirname}) file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/bindings/python/${dirname}) endif() - set(copy_command "cp") set(from_path ${CMAKE_CURRENT_SOURCE_DIR}/${src}) set(to_path ${CMAKE_CURRENT_BINARY_DIR}/${src}) - if (WIN32) - set(copy_command "copy") - # copy on Windows doesn't understand '/' separators - string(REPLACE "/" "\\" from_path "${from_path}") - string(REPLACE "/" "\\" to_path "${to_path}") - endif() add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/bindings/python/${src} - COMMAND ${copy_command} ${from_path} ${to_path} + COMMAND ${CMAKE_COMMAND} -E copy ${from_path} ${to_path} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "copy ${src}") @@ -47,3 +42,28 @@ add_dependencies(python_binding fdb_python_options) set(out_files "${out_files};${options_file}") install(FILES ${out_files} DESTINATION ${FDB_PYTHON_INSTALL_DIR} COMPONENT clients) + +# Create sdist +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.cmake ${CMAKE_CURRENT_BINARY_DIR}/setup.py) +configure_file(${CMAKE_SOURCE_DIR}/LICENSE ${CMAKE_CURRENT_BINARY_DIR}/LICENSE COPYONLY) +find_program(pycodestyle pycodestyle) +if (pycodestyle) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/check_py_code_style + COMMAND ${pycodestyle} bindings/python --config=${CMAKE_CURRENT_SOURCE_DIR}/setup.cfg && + ${CMAKE_COMMAND} -E ${CMAKE_CURRENT_BINARY_DIR}/check_py_code_style + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS ${out_files} + COMMENT "Check python code style") + add_custom_target(fdb_python_check DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/check_py_code_style) +else() + add_custom_target(fdb_python_check COMMAND ${CMAKE_COMMAND} -E echo "Skipped Python style check! Missing: pycodestyle") +endif() +set(package_file_name foundationdb-${FDB_VERSION}.tar.gz) +set(package_file ${CMAKE_BINARY_DIR}/packages/${package_file_name}) +add_custom_command(OUTPUT ${package_file} + COMMAND $ setup.py sdist && + ${CMAKE_COMMAND} -E copy dist/${package_file_name} ${package_file} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Create Python sdist package") +add_custom_target(python_package DEPENDS ${package_file}) +add_dependencies(python_package python_binding) From 9b7dcc4ed50db1628efc3cefdd3b04c8330ef10e Mon Sep 17 00:00:00 2001 From: mpilman Date: Sat, 9 Feb 2019 22:08:34 -0800 Subject: [PATCH 208/226] flow, python, and go bindings --- CMakeLists.txt | 11 -- bindings/CMakeLists.txt | 3 + bindings/flow/CMakeLists.txt | 1 + bindings/go/CMakeLists.txt | 110 ++++++++++++++++++ .../go/src/_util/translate_fdb_options.go | 92 ++++++++++----- bindings/python/CMakeLists.txt | 1 + bindings/python/setup.py.cmake | 38 ++++++ cmake/FDBComponents.cmake | 74 +++++++++--- documentation/CMakeLists.txt | 3 +- 9 files changed, 270 insertions(+), 63 deletions(-) create mode 100644 bindings/go/CMakeLists.txt create mode 100644 bindings/python/setup.py.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index acf11efe28..b129f2ec1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,17 +47,6 @@ set(OPEN_FOR_IDE OFF CACHE BOOL "Open this in an IDE (won't compile/link)") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") -find_package(Python COMPONENTS Interpreter) - -################################################################################ -# Pip -################################################################################ - -find_package(Virtualenv) -if (Virtualenv_FOUND) - set(BUILD_DOCUMENTATION ON) -endif() - ################################################################################ # Compiler configuration ################################################################################ diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index 077fac75fb..2ef14e428c 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -4,3 +4,6 @@ add_subdirectory(python) if(BUILD_JAVA) add_subdirectory(java) endif() +if(WITH_GO) + add_subdirectory(go) +endif() diff --git a/bindings/flow/CMakeLists.txt b/bindings/flow/CMakeLists.txt index ef7965ffad..a959bf729a 100644 --- a/bindings/flow/CMakeLists.txt +++ b/bindings/flow/CMakeLists.txt @@ -39,3 +39,4 @@ add_custom_command(OUTPUT ${tar_file} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/packages COMMENT "Build fdb_flow package") add_custom_target(package_flow DEPENDS ${tar_file}) +add_dependencies(packages package_flow) diff --git a/bindings/go/CMakeLists.txt b/bindings/go/CMakeLists.txt new file mode 100644 index 0000000000..86ae628b07 --- /dev/null +++ b/bindings/go/CMakeLists.txt @@ -0,0 +1,110 @@ +set(SRCS + src/_stacktester/directory.go + src/fdb/directory/allocator.go + src/fdb/directory/node.go + src/fdb/futures.go + src/fdb/subspace/subspace.go + src/_stacktester/stacktester.go + src/fdb/directory/directory.go + src/fdb/doc.go + src/fdb/transaction.go + src/fdb/directory/directoryLayer.go + src/fdb/errors.go + src/fdb/keyselector.go + src/fdb/tuple/tuple.go + src/fdb/cluster.go + src/fdb/directory/directoryPartition.go + src/fdb/fdb.go + src/fdb/range.go + src/fdb/tuple/tuple_test.go + src/fdb/database.go + src/fdb/directory/directorySubspace.go + src/fdb/fdb_test.go + src/fdb/snapshot.go) + +set(GOPATH ${CMAKE_CURRENT_BINARY_DIR}) +set(GO_PACKAGE_ROOT github.com/apple/foundationdb/bindings/go) +set(GO_IMPORT_PATH ${GO_PACKAGE_ROOT}/src) +set(GO_DEST ${GOPATH}/src/${GO_PACKAGE_ROOT}) + +set(GO_PACKAGE_OUTDIR ${GOPATH}/pkg/${GOPLATFORM}/${GO_IMPORT_PATH}) + +file(MAKE_DIRECTORY ${GOPATH} + ${GO_DEST}) +set(go_options_file ${GO_DEST}/src/fdb/generated.go) + +set(go_env GOPATH=${GOPATH}) + +foreach(src_file IN LISTS SRCS) + set(dest_file ${GO_DEST}/${src_file}) + get_filename_component(dest_dir ${dest_file} DIRECTORY) + list(APPEND SRCS_OUT ${dest_file}) + add_custom_command(OUTPUT ${dest_file} + COMMAND ${CMAKE_COMMAND} -E make_directory ${dest_dir} && + ${CMAKE_COMMAND} -E copy ${src_file} ${dest_file} + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src_file} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Creating fdb_go_path") +endforeach() +add_custom_target(copy_go_sources DEPENDS ${SRCS_OUT}) +add_custom_command(OUTPUT ${go_options_file} + COMMAND ${GO_EXECUTABLE} run ${CMAKE_CURRENT_SOURCE_DIR}/src/_util/translate_fdb_options.go + -in ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options + -out ${go_options_file} + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/_util/translate_fdb_options.go + ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options + COMMENT "Generate FDBOptions for GO") +add_custom_target(go_options_file DEPENDS ${go_options_file}) +add_dependencies(go_options_file copy_go_sources) + +function(build_go_package) + set(options LIBRARY EXECUTABLE) + set(oneValueArgs NAME PATH) + set(multiValueArgs) + cmake_parse_arguments(BGP "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + + if(NOT BGP_NAME OR NOT BGP_PATH) + message(FATAL_ERROR "NAME and PATH arguments are missing") + endif() + if(BGP_LIBRARY AND BGP_EXECUTABLE) + message(FATAL_ERROR "Package can't be a library and an executable") + endif() + if(NOT BGP_LIBRARY AND NOT BGP_EXECUTABLE) + message(FATAL_ERROR "Missing type") + endif() + + if(BGP_LIBRARY) + if(WIN32) + set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}.lib) + else() + set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}.a) + endif() + else() + if(WIN32) + set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}.exe) + else() + set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}) + endif() + endif() + add_custom_command(OUTPUT ${outfile} + COMMAND ${CMAKE_COMMAND} -E env ${go_env} + ${GO_EXECUTABLE} install ${GO_IMPORT_PATH}/${BGP_PATH} + DEPENDS ${fdb_options_file} + COMMENT "Building ${BGP_NAME}") + add_custom_target(${BGP_NAME} ALL DEPENDS ${outfile}) +endfunction() + +build_go_package(LIBRARY NAME fdb_go PATH fdb) +add_dependencies(fdb_go fdb_c go_options_file) + +build_go_package(LIBRARY NAME tuple_go PATH fdb/tuple) +add_dependencies(tuple_go fdb_go) + +build_go_package(LIBRARY NAME subspace_go PATH fdb/subspace) +add_dependencies(subspace_go tuple_go) + +build_go_package(LIBRARY NAME directory_go PATH fdb/directory) +add_dependencies(directory_go tuple_go) + +build_go_package(EXECUTABLE NAME fdb_go_tester PATH _stacktester) +add_dependencies(fdb_go_tester directory_go) diff --git a/bindings/go/src/_util/translate_fdb_options.go b/bindings/go/src/_util/translate_fdb_options.go index 37d64af6c6..f0527683da 100644 --- a/bindings/go/src/_util/translate_fdb_options.go +++ b/bindings/go/src/_util/translate_fdb_options.go @@ -23,6 +23,7 @@ package main import ( + "flag" "encoding/xml" "fmt" "go/doc" @@ -30,6 +31,7 @@ import ( "log" "os" "strings" + "io" ) type Option struct { @@ -48,22 +50,22 @@ type Options struct { Scope []Scope } -func writeOptString(receiver string, function string, opt Option) { - fmt.Printf(`func (o %s) %s(param string) error { +func writeOptString(w io.Writer, receiver string, function string, opt Option) { + fmt.Fprintf(w, `func (o %s) %s(param string) error { return o.setOpt(%d, []byte(param)) } `, receiver, function, opt.Code) } -func writeOptBytes(receiver string, function string, opt Option) { - fmt.Printf(`func (o %s) %s(param []byte) error { +func writeOptBytes(w io.Writer, receiver string, function string, opt Option) { + fmt.Fprintf(w, `func (o %s) %s(param []byte) error { return o.setOpt(%d, param) } `, receiver, function, opt.Code) } -func writeOptInt(receiver string, function string, opt Option) { - fmt.Printf(`func (o %s) %s(param int64) error { +func writeOptInt(w io.Writer, receiver string, function string, opt Option) { + fmt.Fprintf(w, `func (o %s) %s(param int64) error { b, e := int64ToBytes(param) if e != nil { return e @@ -73,36 +75,36 @@ func writeOptInt(receiver string, function string, opt Option) { `, receiver, function, opt.Code) } -func writeOptNone(receiver string, function string, opt Option) { - fmt.Printf(`func (o %s) %s() error { +func writeOptNone(w io.Writer, receiver string, function string, opt Option) { + fmt.Fprintf(w, `func (o %s) %s() error { return o.setOpt(%d, nil) } `, receiver, function, opt.Code) } -func writeOpt(receiver string, opt Option) { +func writeOpt(w io.Writer, receiver string, opt Option) { function := "Set" + translateName(opt.Name) - fmt.Println() + fmt.Fprintln(w) if opt.Description != "" { - fmt.Printf("// %s\n", opt.Description) + fmt.Fprintf(w, "// %s\n", opt.Description) if opt.ParamDesc != "" { - fmt.Printf("//\n// Parameter: %s\n", opt.ParamDesc) + fmt.Fprintf(w, "//\n// Parameter: %s\n", opt.ParamDesc) } } else { - fmt.Printf("// Not yet implemented.\n") + fmt.Fprintf(w, "// Not yet implemented.\n") } switch opt.ParamType { case "String": - writeOptString(receiver, function, opt) + writeOptString(w, receiver, function, opt) case "Bytes": - writeOptBytes(receiver, function, opt) + writeOptBytes(w, receiver, function, opt) case "Int": - writeOptInt(receiver, function, opt) + writeOptInt(w, receiver, function, opt) case "": - writeOptNone(receiver, function, opt) + writeOptNone(w, receiver, function, opt) default: log.Fatalf("Totally unexpected ParamType %s", opt.ParamType) } @@ -112,9 +114,9 @@ func translateName(old string) string { return strings.Replace(strings.Title(strings.Replace(old, "_", " ", -1)), " ", "", -1) } -func writeMutation(opt Option) { +func writeMutation(w io.Writer, opt Option) { tname := translateName(opt.Name) - fmt.Printf(` + fmt.Fprintf(w, ` // %s func (t Transaction) %s(key KeyConvertible, param []byte) { t.atomicOp(key.FDBKey(), param, %d) @@ -122,23 +124,38 @@ func (t Transaction) %s(key KeyConvertible, param []byte) { `, opt.Description, tname, opt.Code) } -func writeEnum(scope Scope, opt Option, delta int) { - fmt.Println() +func writeEnum(w io.Writer, scope Scope, opt Option, delta int) { + fmt.Fprintln(w) if opt.Description != "" { - doc.ToText(os.Stdout, opt.Description, "\t// ", "", 73) + doc.ToText(w, opt.Description, "\t// ", "", 73) // fmt.Printf(" // %s\n", opt.Description) } - fmt.Printf(" %s %s = %d\n", scope.Name+translateName(opt.Name), scope.Name, opt.Code+delta) + fmt.Fprintf(w, " %s %s = %d\n", scope.Name+translateName(opt.Name), scope.Name, opt.Code+delta) } func main() { + var inFile string + var outFile string + flag.StringVar(&inFile, "in", "stdin", "Input file") + flag.StringVar(&outFile, "out", "stdout", "Output file") + flag.Parse() + var err error v := Options{} - data, err := ioutil.ReadAll(os.Stdin) - if err != nil { - log.Fatal(err) + var data []byte + + if inFile == "stdin" { + data, err = ioutil.ReadAll(os.Stdin) + if err != nil { + log.Fatal(err) + } + } else { + data, err = ioutil.ReadFile(inFile) + if err != nil { + log.Fatal(err) + } } err = xml.Unmarshal(data, &v) @@ -146,7 +163,17 @@ func main() { log.Fatal(err) } - fmt.Print(`/* + var out *os.File + if outFile == "stdout" { + out = os.Stdout + } else { + out, err = os.Create(outFile) + if err != nil { + log.Fatal(err) + } + } + + fmt.Fprint(out, `/* * generated.go * * This source file is part of the FoundationDB open source project @@ -197,7 +224,7 @@ func int64ToBytes(i int64) ([]byte, error) { for _, opt := range scope.Option { if !opt.Hidden { - writeOpt(receiver, opt) + writeOpt(out, receiver, opt) } } continue @@ -206,7 +233,7 @@ func int64ToBytes(i int64) ([]byte, error) { if scope.Name == "MutationType" { for _, opt := range scope.Option { if !opt.Hidden { - writeMutation(opt) + writeMutation(out, opt) } } continue @@ -223,16 +250,17 @@ func int64ToBytes(i int64) ([]byte, error) { scope.Name = "conflictRangeType" } - fmt.Printf(` + fmt.Fprintf(out, ` type %s int const ( `, scope.Name) for _, opt := range scope.Option { if !opt.Hidden { - writeEnum(scope, opt, d) + writeEnum(out, scope, opt, d) } } - fmt.Println(")") + fmt.Fprintln(out, ")") } + out.Close() } diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 4589582a50..cb7a236e1f 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -67,3 +67,4 @@ add_custom_command(OUTPUT ${package_file} COMMENT "Create Python sdist package") add_custom_target(python_package DEPENDS ${package_file}) add_dependencies(python_package python_binding) +add_dependencies(packages python_package) diff --git a/bindings/python/setup.py.cmake b/bindings/python/setup.py.cmake new file mode 100644 index 0000000000..da8bf77c82 --- /dev/null +++ b/bindings/python/setup.py.cmake @@ -0,0 +1,38 @@ +from distutils.core import setup + +try: + with open("README.rst") as f: + long_desc = f.read() +except: + long_desc = "" + +setup(name="foundationdb", + version="${FDB_VERSION}", + author="FoundationDB", + author_email="fdb-dist@apple.com", + description="Python bindings for the FoundationDB database", + url="https://www.foundationdb.org", + packages=['fdb'], + package_data={'fdb': ["fdb/*.py"]}, + long_description=long_desc, + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.0', + 'Programming Language :: Python :: 3.1', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: Implementation :: CPython', + 'Topic :: Database', + 'Topic :: Database :: Front-Ends' + ] + ) diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index 7fa1f95bfc..e020acaf70 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -1,16 +1,3 @@ -################################################################################ -# Java Bindings -################################################################################ - -set(BUILD_JAVA OFF) -find_package(JNI 1.8 REQUIRED) -find_package(Java 1.8 COMPONENTS Development) -if(JNI_FOUND AND Java_FOUND AND Java_Development_FOUND) - set(BUILD_JAVA ON) - include(UseJava) - enable_language(Java) -endif() - ################################################################################ # LibreSSL ################################################################################ @@ -30,13 +17,62 @@ else() endif() endif() +################################################################################ +# Java Bindings +################################################################################ + +set(BUILD_JAVA OFF) +find_package(JNI 1.8 REQUIRED) +find_package(Java 1.8 COMPONENTS Development) +if(JNI_FOUND AND Java_FOUND AND Java_Development_FOUND) + set(BUILD_JAVA ON) + include(UseJava) + enable_language(Java) +endif() + +################################################################################ +# Python Bindings +################################################################################ + +find_package(Python COMPONENTS Interpreter) +if(Python_Interpreter_FOUND) + set(WITH_PYTHON ON) +else() + set(WITH_PYTHON OFF) +endif() + +################################################################################ +# Pip +################################################################################ + +find_package(Virtualenv) +if (Virtualenv_FOUND) + set(BUILD_DOCUMENTATION ON) +endif() + +################################################################################ +# GO +################################################################################ + +find_program(GO_EXECUTABLE go) +if(GO_EXECUTABLE) + set(WITH_GO ON) +else() + set(WITH_GO OFF) +endif() + +file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/packages) +add_custom_target(packages) + function(print_components) - message(STATUS "=============================") + message(STATUS "=========================================") message(STATUS " Components Build Overview ") - message(STATUS "=============================") - message(STATUS "Build Python Bindings: ON") - message(STATUS "Build Java Bindings: ${BUILD_JAVA}") - message(STATUS "Build with TLS support: ${WITH_TLS}") - message(STATUS "=============================") + message(STATUS "=========================================") + message(STATUS "Build Java Bindings: ${BUILD_JAoA}") + message(STATUS "Build with TLS support: ${WITH_TLS}") + message(STATUS "Build GO bindings: ${WITH_GO}") + message(STATUS "Build Python sdist (make package): ${WITH_PYTHON}") + message(STATUS "Build Documentation (make html): ${BUILD_DOCUMENTATION}") + message(STATUS "=========================================") endfunction() diff --git a/documentation/CMakeLists.txt b/documentation/CMakeLists.txt index ac409fbbc2..20262c29ae 100644 --- a/documentation/CMakeLists.txt +++ b/documentation/CMakeLists.txt @@ -58,10 +58,11 @@ endfunction() message(STATUS "Add html target") add_documentation_target(GENERATOR html) -set(tar_file ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}.tar.gz) +set(tar_file ${CMAKE_BINARY_DIR}/packages/${CMAKE_PROJECT_NAME}-docs-${FDB_VERSION}.tar.gz) add_custom_command( OUTPUT ${tar_file} COMMAND ${CMAKE_COMMAND} -E tar czf ${tar_file} . WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html) add_custom_target(package_html DEPENDS ${tar_file}) add_dependencies(package_html html) +add_dependencies(packages package_html) From 7ae3cc8ce4d473af71d8c23f8914d53c877fe41c Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 09:41:29 -0800 Subject: [PATCH 209/226] Fix go binding dependency paths Before this commit, the go bindings would always be recompiled --- bindings/go/CMakeLists.txt | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/bindings/go/CMakeLists.txt b/bindings/go/CMakeLists.txt index 86ae628b07..666ad1164d 100644 --- a/bindings/go/CMakeLists.txt +++ b/bindings/go/CMakeLists.txt @@ -27,6 +27,14 @@ set(GO_PACKAGE_ROOT github.com/apple/foundationdb/bindings/go) set(GO_IMPORT_PATH ${GO_PACKAGE_ROOT}/src) set(GO_DEST ${GOPATH}/src/${GO_PACKAGE_ROOT}) +if(APPLE) + set(GOPLATFORM darwin_amd64) +elseif(WIN32) + set(GOPLATFORM windows_amd64) +else() + set(GOPLATFORM linux_amd64) +endif() + set(GO_PACKAGE_OUTDIR ${GOPATH}/pkg/${GOPLATFORM}/${GO_IMPORT_PATH}) file(MAKE_DIRECTORY ${GOPATH} @@ -75,15 +83,16 @@ function(build_go_package) if(BGP_LIBRARY) if(WIN32) - set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}.lib) + set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_PATH}.lib) else() - set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}.a) + set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_PATH}.a) endif() else() + get_filename_component(exec_filename ${BGP_PATH} NAME) if(WIN32) - set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}.exe) + set(outfile ${GOPATH}/bin/${exec_filename}.exe) else() - set(outfile ${GO_PACKAGE_OUTDIR}/${BGP_NAME}) + set(outfile ${GOPATH}/bin/${exec_filename}) endif() endif() add_custom_command(OUTPUT ${outfile} From 458c14ffcd8e7bd2204162712be469428540d65f Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 10:05:52 -0800 Subject: [PATCH 210/226] Ruby bindings for cmake + gem generation make packages will now also generate a gem file and put it into the packages directory within the build directory --- bindings/CMakeLists.txt | 3 +++ bindings/ruby/CMakeLists.txt | 16 ++++++++++++++++ bindings/ruby/fdb.gemspec.cmake | 22 ++++++++++++++++++++++ cmake/CompileVexillographer.cmake | 16 ++++++++++++---- cmake/FDBComponents.cmake | 19 ++++++++++++++++++- 5 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 bindings/ruby/CMakeLists.txt create mode 100644 bindings/ruby/fdb.gemspec.cmake diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index 2ef14e428c..106e65ba16 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -7,3 +7,6 @@ endif() if(WITH_GO) add_subdirectory(go) endif() +if(WITH_RUBY) + add_subdirectory(ruby) +endif() diff --git a/bindings/ruby/CMakeLists.txt b/bindings/ruby/CMakeLists.txt new file mode 100644 index 0000000000..0ec41bb68f --- /dev/null +++ b/bindings/ruby/CMakeLists.txt @@ -0,0 +1,16 @@ +# we put this generated file into the src dir, as it +# greatly simplifies debugging +vexillographer_compile(TARGET ruby_options LANG ruby + OUT ${CMAKE_CURRENT_SOURCE_DIR}/lib/fdboptions.rb ALL) +configure_file(fdb.gemspec.cmake fdb.gemspec) + +set(gem_file fdb-${FDB_VERSION}.gem) +set(gem_target ${CMAKE_BINARY_DIR}/packages/${gem_file}) +add_custom_command(OUTPUT ${gem_target} + COMMAND ${GEM_COMMAND} build fdb.gemspec && + ${CMAKE_COMMAND} -E copy ${gem_file} ${gem_target} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Building ruby gem") +add_custom_target(gem_package DEPENDS ${gem_target}) +add_dependencies(gem_package ruby_options) +add_dependencies(packages gem_package) diff --git a/bindings/ruby/fdb.gemspec.cmake b/bindings/ruby/fdb.gemspec.cmake new file mode 100644 index 0000000000..bc32a59622 --- /dev/null +++ b/bindings/ruby/fdb.gemspec.cmake @@ -0,0 +1,22 @@ +# -*- mode: ruby; -*- + +Gem::Specification.new do |s| + s.name = 'fdb' + s.version = '${FDB_VERSION}' + s.date = Time.new.strftime '%Y-%m-%d' + s.summary = "Ruby bindings for the FoundationDB database" + s.description = <<-EOF +Ruby bindings for the FoundationDB database. + +Complete documentation of the FoundationDB Ruby API can be found at: +https://apple.github.io/foundationdb/api-ruby.html. +EOF + s.authors = ["FoundationDB"] + s.email = 'fdb-dist@apple.com' + s.files = ["${CMAKE_SOURCE_DIR}/LICENSE", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdb.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbdirectory.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbimpl.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdblocality.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdboptions.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbsubspace.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbtuple.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbimpl_v609.rb"] + s.homepage = 'https://www.foundationdb.org' + s.license = 'Apache v2' + s.add_dependency('ffi', '>= 1.1.5') + s.required_ruby_version = '>= 1.9.3' + s.requirements << 'These bindings require the FoundationDB client. The client can be obtained from https://www.foundationdb.org/download/.' +end diff --git a/cmake/CompileVexillographer.cmake b/cmake/CompileVexillographer.cmake index ac82a75665..36ddbf9dc0 100644 --- a/cmake/CompileVexillographer.cmake +++ b/cmake/CompileVexillographer.cmake @@ -25,10 +25,14 @@ else() add_custom_target(vexillographer DEPENDS ${VEXILLOGRAPHER_EXE}) endif() -macro(vexillographer_compile) +function(vexillographer_compile) + set(CX_OPTIONS ALL) set(CX_ONE_VALUE_ARGS TARGET LANG OUT) set(CX_MULTI_VALUE_ARGS OUTPUT) - cmake_parse_arguments(VX "" "${CX_ONE_VALUE_ARGS}" "${CX_MULTI_VALUE_ARGS}" "${ARGN}") + cmake_parse_arguments(VX "${CX_OPTIONS}" "${CX_ONE_VALUE_ARGS}" "${CX_MULTI_VALUE_ARGS}" "${ARGN}") + if(NOT VX_OUTPUT) + set(VX_OUTPUT ${VX_OUT}) + endif() if(WIN32) add_custom_command( OUTPUT ${VX_OUTPUT} @@ -42,5 +46,9 @@ macro(vexillographer_compile) DEPENDS ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options vexillographer COMMENT "Generate FDBOptions ${VX_LANG} files") endif() - add_custom_target(${VX_TARGET} DEPENDS ${VX_OUTPUT}) -endmacro() + if(VX_ALL) + add_custom_target(${VX_TARGET} ALL DEPENDS ${VX_OUTPUT}) + else() + add_custom_target(${VX_TARGET} DEPENDS ${VX_OUTPUT}) + endif() +endfunction() diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index e020acaf70..7fffc2abd8 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -61,6 +61,21 @@ else() set(WITH_GO OFF) endif() +################################################################################ +# Ruby +################################################################################ + +find_package(Ruby) +set(WITH_RUBY OFF) +if(RUBY_FOUND) + get_filename_component(ruby_exec_dir ${RUBY_EXECUTABLE} DIRECTORY) + find_program(GEM_EXECUTABLE gem HINTS ${ruby_exec_dir}) + if(GEM_EXECUTABLE) + set(GEM_COMMAND ${RUBY_EXECUTABLE} ${GEM_EXECUTABLE}) + set(WITH_RUBY ON) + endif() +endif() + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/packages) add_custom_target(packages) @@ -71,8 +86,10 @@ function(print_components) message(STATUS "=========================================") message(STATUS "Build Java Bindings: ${BUILD_JAoA}") message(STATUS "Build with TLS support: ${WITH_TLS}") - message(STATUS "Build GO bindings: ${WITH_GO}") + message(STATUS "Build Go bindings: ${WITH_GO}") + message(STATUS "Build Ruby bindings: ${WITH_RUBY}") message(STATUS "Build Python sdist (make package): ${WITH_PYTHON}") message(STATUS "Build Documentation (make html): ${BUILD_DOCUMENTATION}") + message(STATUS "Build Documentation (make html): ${BUILD_DOCUMENTATION}") message(STATUS "=========================================") endfunction() From ffaaaf5028378498213fc9142131e5160b1717b8 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 10:10:53 -0800 Subject: [PATCH 211/226] build fat-jar with packages The packages target will now build the fat jar and it will place the jar file into the packages directory --- bindings/java/CMakeLists.txt | 98 ++++++++++++++++++------------------ bindings/java/README.md | 15 +++--- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 75191cc670..9cfd3d7a93 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -134,57 +134,55 @@ add_dependencies(foundationdb-tests fdb_java_options) install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java) install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java) -set(BUILD_FAT_JAR OFF CACHE BOOL "Build a Jar that includes the jni libraries") set(FAT_JAR_BINARIES "NOTFOUND" CACHE STRING "Path of a directory structure with libraries to include in fat jar (a lib directory)") -if(BUILD_FAT_JAR) - set(jar_destination ${CMAKE_BINARY_DIR}/fat_jar) - set(unpack_dir ${CMAKE_CURRENT_BINARY_DIR}/fat_jar) - file(MAKE_DIRECTORY ${jar_destination}) - file(MAKE_DIRECTORY ${unpack_dir}) - message(STATUS "Building fat jar to ${jar_destination}") - get_property(jar_path TARGET fdb-java PROPERTY JAR_FILE) - add_custom_command(OUTPUT ${unpack_dir}/META-INF/MANIFEST.MF - COMMAND ${Java_JAR_EXECUTABLE} xf ${jar_path} - WORKING_DIRECTORY ${unpack_dir} - DEPENDS "${jar_path}" - COMMENT "Unpack jar-file") - add_custom_target(unpack_jar DEPENDS ${unpack_dir}/META-INF/MANIFEST.MF) - add_dependencies(unpack_jar fdb-java) - add_custom_command(OUTPUT ${unpack_dir}/LICENSE - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/LICENSE ${unpack_dir} - COMMENT "copy license") - add_custom_target(copy_license DEPENDS ${unpack_dir}/LICENSE) - add_dependencies(unpack_jar copy_license) - if(FAT_JAR_BINARIES) - add_custom_command(OUTPUT ${unpack_dir}/lib - COMMAND ${CMAKE_COMMAND} -E copy_directory ${FAT_JAR_BINARIES} ${unpack_dir} - COMMENT "copy additional libraries" - DEPENDS ${unpack_dir}/META-INF/MANIFEST.MF) - add_custom_target(copy_libs DEPENDS ${unpack_dir}/lib) - add_dependencies(unpack_jar copy_libs) - endif() - if(WIN32) - set(lib_destination "windows/amd64") - elseif(APPLE) - set(lib_destination "osx/x86_64") - else() - set(lib_destination "linux/amd64") - endif() - set(lib_destination "${unpack_dir}/lib/${lib_destination}") - file(MAKE_DIRECTORY ${lib_destination}) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib_copied - COMMAND ${CMAKE_COMMAND} -E copy $ ${lib_destination} && - ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/lib_copied - COMMENT "Copy library") - add_custom_target(copy_lib DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/lib_copied) - add_dependencies(copy_lib unpack_jar) - set(target_jar ${jar_destination}/fdb-java-${CMAKE_PROJECT_VERSION}.jar) - add_custom_command(OUTPUT ${target_jar} - COMMAND ${Java_JAR_EXECUTABLE} cf ${target_jar} . - WORKING_DIRECTORY ${unpack_dir} - COMMENT "Build ${jar_destination}/fdb-java-${CMAKE_PROJECT_VERSION}.jar") - add_custom_target(fat-jar ALL DEPENDS ${target_jar}) - add_dependencies(fat-jar copy_lib) +set(jar_destination ${CMAKE_BINARY_DIR}/packages) +set(unpack_dir ${CMAKE_CURRENT_BINARY_DIR}/fat_jar) +file(MAKE_DIRECTORY ${jar_destination}) +file(MAKE_DIRECTORY ${unpack_dir}) +message(STATUS "Building fat jar to ${jar_destination}") +get_property(jar_path TARGET fdb-java PROPERTY JAR_FILE) +add_custom_command(OUTPUT ${unpack_dir}/META-INF/MANIFEST.MF + COMMAND ${Java_JAR_EXECUTABLE} xf ${jar_path} + WORKING_DIRECTORY ${unpack_dir} + DEPENDS "${jar_path}" + COMMENT "Unpack jar-file") +add_custom_target(unpack_jar DEPENDS ${unpack_dir}/META-INF/MANIFEST.MF) +add_dependencies(unpack_jar fdb-java) +add_custom_command(OUTPUT ${unpack_dir}/LICENSE + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/LICENSE ${unpack_dir} + COMMENT "copy license") +add_custom_target(copy_license DEPENDS ${unpack_dir}/LICENSE) +add_dependencies(unpack_jar copy_license) +if(FAT_JAR_BINARIES) + add_custom_command(OUTPUT ${unpack_dir}/lib + COMMAND ${CMAKE_COMMAND} -E copy_directory ${FAT_JAR_BINARIES} ${unpack_dir} + COMMENT "copy additional libraries" + DEPENDS ${unpack_dir}/META-INF/MANIFEST.MF) + add_custom_target(copy_libs DEPENDS ${unpack_dir}/lib) + add_dependencies(unpack_jar copy_libs) endif() +if(WIN32) + set(lib_destination "windows/amd64") +elseif(APPLE) + set(lib_destination "osx/x86_64") +else() + set(lib_destination "linux/amd64") +endif() +set(lib_destination "${unpack_dir}/lib/${lib_destination}") +file(MAKE_DIRECTORY ${lib_destination}) +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib_copied + COMMAND ${CMAKE_COMMAND} -E copy $ ${lib_destination} && + ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/lib_copied + COMMENT "Copy library") +add_custom_target(copy_lib DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/lib_copied) +add_dependencies(copy_lib unpack_jar) +set(target_jar ${jar_destination}/fdb-java-${CMAKE_PROJECT_VERSION}.jar) +add_custom_command(OUTPUT ${target_jar} + COMMAND ${Java_JAR_EXECUTABLE} cf ${target_jar} . + WORKING_DIRECTORY ${unpack_dir} + COMMENT "Build ${jar_destination}/fdb-java-${CMAKE_PROJECT_VERSION}.jar") +add_custom_target(fat-jar DEPENDS ${target_jar}) +add_dependencies(fat-jar copy_lib) +add_dependencies(packages fat-jar) diff --git a/bindings/java/README.md b/bindings/java/README.md index 3b53dc5bb8..dddbe5a09e 100644 --- a/bindings/java/README.md +++ b/bindings/java/README.md @@ -21,14 +21,12 @@ By default, the generated jar file will depend on an installed libfdb_java a Jar-file that contains this library more convenient. This is also what you will get if you download the jar file from Maven. -If you want to build a jar file that contains the library enable the cmake -variable `BUILD_FAT_JAR`. You can do this with the following command: +This file can be generated by compiling the `packages` target. For example with +make, you can run: +``` shell +make packages ``` -cmake -DBUILD_FAT_JAR -``` - -This will add the jni library of for the current architecture to the jar file. #### Multi-Platform Jar-File @@ -50,5 +48,6 @@ that by executing the following steps: cmake -DFAT_JAR_BINARIES=/foo/bar/lib ``` -After executing building (with `make` or `Visual Studio`) you will find a -jar-file in the `fat-jar` directory in your build directory. +After executing building the packages (with `make packages` or the packages +target in `Visual Studio`) you will find a jar-file in the `packages` +directory in your build directory. From 723f208cd164015a5650ade97543e5cd1cc92b16 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 10:30:18 -0800 Subject: [PATCH 212/226] Fix cmake output --- cmake/FDBComponents.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index 7fffc2abd8..c2a9a4307e 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -28,6 +28,8 @@ if(JNI_FOUND AND Java_FOUND AND Java_Development_FOUND) set(BUILD_JAVA ON) include(UseJava) enable_language(Java) +else() + set(BUILD_JAVA OFF) endif() ################################################################################ @@ -48,6 +50,8 @@ endif() find_package(Virtualenv) if (Virtualenv_FOUND) set(BUILD_DOCUMENTATION ON) +else() + set(BUILD_DOCUMENTATION OFF) endif() ################################################################################ @@ -84,12 +88,11 @@ function(print_components) message(STATUS "=========================================") message(STATUS " Components Build Overview ") message(STATUS "=========================================") - message(STATUS "Build Java Bindings: ${BUILD_JAoA}") + message(STATUS "Build Java Bindings: ${BUILD_JAVA}") message(STATUS "Build with TLS support: ${WITH_TLS}") message(STATUS "Build Go bindings: ${WITH_GO}") message(STATUS "Build Ruby bindings: ${WITH_RUBY}") message(STATUS "Build Python sdist (make package): ${WITH_PYTHON}") message(STATUS "Build Documentation (make html): ${BUILD_DOCUMENTATION}") - message(STATUS "Build Documentation (make html): ${BUILD_DOCUMENTATION}") message(STATUS "=========================================") endfunction() From f6ab5770023ac6652a9ca7e2ca2f521a2f425ef0 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 10:53:05 -0800 Subject: [PATCH 213/226] Fix Windows issue with libressl --- cmake/ConfigureCompiler.cmake | 3 --- cmake/FDBComponents.cmake | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 3c475e1521..ba56b1b067 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -7,9 +7,6 @@ set(WITH_UNDODB OFF CACHE BOOL "Use rr or undodb") set(FDB_RELEASE OFF CACHE BOOL "This is a building of a final release") add_compile_options(-DCMAKE_BUILD) -if(WITH_TLS) - add_compile_options(-DHAVE_OPENSSL) -endif() find_package(Threads REQUIRED) if(ALLOC_INSTRUMENTATION) diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index c2a9a4307e..7468732aa1 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -10,6 +10,7 @@ else() find_package(LibreSSL) if(LibreSSL_FOUND) set(WITH_TLS ON) + add_compile_options(-DHAVE_OPENSSL) else() message(STATUS "LibreSSL NOT Found - Will compile without TLS Support") message(STATUS "You can set LibreSSL_ROOT to the LibreSSL install directory to help cmake find it") From 6529e2790b38a10262e7aea6149ebe42138b047d Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 12:48:15 -0800 Subject: [PATCH 214/226] several minor bug fixes --- CMakeLists.txt | 2 +- bindings/go/CMakeLists.txt | 4 +++- cmake/FDBComponents.cmake | 15 ++++++--------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b129f2ec1e..650819df48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,7 +106,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/fdbclient/versions.h.cmake ${CMAKE_CU ################################################################################ # Flow and other tools are written in C# - so we need that dependency -include(EnableCSharp) +include(EnableCsharp) # First thing we need is the actor compiler - and to compile and run the # actor compiler, we need mono diff --git a/bindings/go/CMakeLists.txt b/bindings/go/CMakeLists.txt index 666ad1164d..793089a3f7 100644 --- a/bindings/go/CMakeLists.txt +++ b/bindings/go/CMakeLists.txt @@ -41,7 +41,9 @@ file(MAKE_DIRECTORY ${GOPATH} ${GO_DEST}) set(go_options_file ${GO_DEST}/src/fdb/generated.go) -set(go_env GOPATH=${GOPATH}) +set(go_env GOPATH=${GOPATH} + C_INCLUDE_PATH=${CMAKE_BINARY_DIR}/bindings/c/foundationdb:${CMAKE_SOURCE_DIR}/bindings/c + CGO_LDFLAGS=-L${CMAKE_BINARY_DIR}/lib) foreach(src_file IN LISTS SRCS) set(dest_file ${GO_DEST}/${src_file}) diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index 7468732aa1..f219829394 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -60,7 +60,8 @@ endif() ################################################################################ find_program(GO_EXECUTABLE go) -if(GO_EXECUTABLE) +# building the go binaries is currently not supported on Windows +if(GO_EXECUTABLE AND NOT WIN32) set(WITH_GO ON) else() set(WITH_GO OFF) @@ -70,15 +71,11 @@ endif() # Ruby ################################################################################ -find_package(Ruby) +find_program(GEM_EXECUTABLE gem) set(WITH_RUBY OFF) -if(RUBY_FOUND) - get_filename_component(ruby_exec_dir ${RUBY_EXECUTABLE} DIRECTORY) - find_program(GEM_EXECUTABLE gem HINTS ${ruby_exec_dir}) - if(GEM_EXECUTABLE) - set(GEM_COMMAND ${RUBY_EXECUTABLE} ${GEM_EXECUTABLE}) - set(WITH_RUBY ON) - endif() +if(GEM_EXECUTABLE) + set(GEM_COMMAND ${RUBY_EXECUTABLE} ${GEM_EXECUTABLE}) + set(WITH_RUBY ON) endif() file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/packages) From 5f25e5cb3235e76988fab376b46562378b40ec20 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 15:50:11 -0800 Subject: [PATCH 215/226] docker container and docker-compose for cmake This is still in the testing-phase. However, it should be mostly functional --- build/cmake/Dockerfile | 44 ++++++ build/cmake/build.sh | 235 +++++++++++++++++++++++++++++++++ build/cmake/docker-compose.yml | 53 ++++++++ cmake/InstallLayout.cmake | 14 +- 4 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 build/cmake/Dockerfile create mode 100644 build/cmake/build.sh create mode 100644 build/cmake/docker-compose.yml diff --git a/build/cmake/Dockerfile b/build/cmake/Dockerfile new file mode 100644 index 0000000000..09f7b80cad --- /dev/null +++ b/build/cmake/Dockerfile @@ -0,0 +1,44 @@ +FROM centos:6 +LABEL version=0.0.4 + +RUN yum install -y yum-utils +RUN yum-config-manager --enable rhel-server-rhscl-7-rpms +RUN yum -y install centos-release-scl +RUN yum install -y devtoolset-7 + +# install cmake +RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz > /tmp/cmake.tar.gz &&\ + echo "563a39e0a7c7368f81bfa1c3aff8b590a0617cdfe51177ddc808f66cc0866c76 /tmp/cmake.tar.gz" > /tmp/cmake-sha.txt &&\ + sha256sum -c /tmp/cmake-sha.txt &&\ + cd /tmp && tar xf cmake.tar.gz && cp -r cmake-3.13.4-Linux-x86_64/* /usr/local/ + +# install boost +RUN curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 > /tmp/boost.tar.bz2 &&\ + cd /tmp && echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost.tar.bz2" > boost-sha.txt &&\ + sha256sum -c boost-sha.txt && tar xf boost.tar.bz2 && cp -r boost_1_67_0/boost /usr/local/include/ &&\ + rm -rf boost.tar.bz2 boost_1_67_0 + +# install mono (for actorcompiler) +RUN yum install -y epel-release +RUN yum install -y mono-core + +# install Java +RUN yum install -y java-1.8.0-openjdk-devel + +# install LibreSSL +RUN curl https://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.8.2.tar.gz > /tmp/libressl.tar.gz &&\ + cd /tmp && echo "b8cb31e59f1294557bfc80f2a662969bc064e83006ceef0574e2553a1c254fd5 libressl.tar.gz" > libressl-sha.txt &&\ + sha256sum -c libressl-sha.txt && tar xf libressl.tar.gz &&\ + cd libressl-2.8.2 && cd /tmp/libressl-2.8.2 && scl enable devtoolset-7 -- ./configure --prefix=/usr/local/stow/libressl CFLAGS="-fPIC -O3" --prefix=/usr/local &&\ + cd /tmp/libressl-2.8.2 && scl enable devtoolset-7 -- make -j`nproc` install &&\ + rm -rf /tmp/libressl-2.8.2 /tmp/libressl.tar.gz + + +# install dependencies for bindings and documentation +# python 2.7 is required for the documentation +RUN yum install -y rh-python36-python-devel rh-ruby24 golang python27 + +# install packaging tools +RUN yum install -y rpm-build debbuild + +CMD scl enable devtoolset-7 python27 rh-python36 rh-ruby24 -- bash diff --git a/build/cmake/build.sh b/build/cmake/build.sh new file mode 100644 index 0000000000..79150f5acf --- /dev/null +++ b/build/cmake/build.sh @@ -0,0 +1,235 @@ +#!env bash + +arguments_usage() { + cat < Date: Sun, 10 Feb 2019 20:51:36 -0800 Subject: [PATCH 216/226] docker-compose can now build rpm weirdly it still generates a foundationd-unspecified rpm. However, it is empty and can be ignored for now --- CMakeLists.txt | 46 +-------------------- bindings/java/CMakeLists.txt | 7 +++- bindings/python/CMakeLists.txt | 3 +- build/cmake/build.sh | 2 +- cmake/InstallLayout.cmake | 73 +++++++++++++++++++++++----------- fdbbackup/CMakeLists.txt | 24 +++++------ 6 files changed, 72 insertions(+), 83 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 650819df48..f56f7857cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,50 +123,6 @@ include(FlowCommands) include(CompileVexillographer) -# This macro can be used to install symlinks, which turns out to be -# non-trivial due to CMake version differences and limitations on how -# files can be installed when building binary packages. -# -# The rule for binary packaging is that files (including symlinks) must -# be installed with the standard CMake install() macro. -# -# The rule for non-binary packaging is that CMake 2.6 cannot install() -# symlinks, but can create the symlink at install-time via scripting. -# Though, we assume that CMake 2.6 isn't going to be used to generate -# packages because versions later than 2.8.3 are superior for that purpose. -# -# _filepath: the absolute path to the file to symlink -# _sympath: absolute path of the installed symlink - -macro(InstallSymlink _filepath _sympath) - get_filename_component(_symname ${_sympath} NAME) - get_filename_component(_installdir ${_sympath} PATH) - - if (BINARY_PACKAGING_MODE) - execute_process(COMMAND "${CMAKE_COMMAND}" -E create_symlink - ${_filepath} - ${CMAKE_CURRENT_BINARY_DIR}/${_symname}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${_symname} - DESTINATION ${_installdir} - COMPONENT clients) - else () - # scripting the symlink installation at install time should work - # for CMake 2.6.x and 2.8.x - install(CODE " - if (\"\$ENV{DESTDIR}\" STREQUAL \"\") - execute_process(COMMAND \"${CMAKE_COMMAND}\" -E create_symlink - ${_filepath} - ${_installdir}/${_symname}) - else () - execute_process(COMMAND \"${CMAKE_COMMAND}\" -E create_symlink - ${_filepath} - \$ENV{DESTDIR}/${_installdir}/${_symname}) - endif () - " - COMPONENT clients) - endif () -endmacro(InstallSymlink) - ################################################################################ # Generate config file ################################################################################ @@ -248,3 +204,5 @@ endif() ################################################################################ print_components() + +message(STATUS "CPACK_COMPONENTS_ALL ${CPACK_COMPONENTS_ALL}") diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 9cfd3d7a93..b55579cbfc 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -131,8 +131,11 @@ add_dependencies(fdb-java fdb_java_options fdb_java) add_jar(foundationdb-tests SOURCES ${JAVA_TESTS_SRCS} INCLUDE_JARS fdb-java) add_dependencies(foundationdb-tests fdb_java_options) -install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java) -install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java) +# TODO[mpilman]: The java RPM will require some more effort (mostly on debian). However, +# most people will use the fat-jar, so it is not clear how high this priority is. + +#install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java) +#install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java) set(FAT_JAR_BINARIES "NOTFOUND" CACHE STRING "Path of a directory structure with libraries to include in fat jar (a lib directory)") diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index cb7a236e1f..57259e7337 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -41,7 +41,8 @@ vexillographer_compile(TARGET fdb_python_options LANG python OUT ${options_file} add_dependencies(python_binding fdb_python_options) set(out_files "${out_files};${options_file}") -install(FILES ${out_files} DESTINATION ${FDB_PYTHON_INSTALL_DIR} COMPONENT clients) +# TODO[mpilman]: it is not clear whether we want to have rpms for python +#install(FILES ${out_files} DESTINATION ${FDB_PYTHON_INSTALL_DIR} COMPONENT python) # Create sdist configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.cmake ${CMAKE_CURRENT_BINARY_DIR}/setup.py) diff --git a/build/cmake/build.sh b/build/cmake/build.sh index 79150f5acf..c0de2cd11d 100644 --- a/build/cmake/build.sh +++ b/build/cmake/build.sh @@ -132,7 +132,7 @@ rpm() { then break fi - cpack + fakeroot cpack __res=$? if [ ${__res} -ne 0 ] then diff --git a/cmake/InstallLayout.cmake b/cmake/InstallLayout.cmake index a67fcdbeeb..de7b4c3329 100644 --- a/cmake/InstallLayout.cmake +++ b/cmake/InstallLayout.cmake @@ -1,3 +1,30 @@ +################################################################################ +# Helper Functions +################################################################################ + +function(install_symlink) + set(options "") + set(one_value_options COMPONENT FROM TO) + set(multi_value_options) + cmake_parse_arguments(SYM "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}") + + if(NOT SYM_COMPONENT OR NOT SYM_FROM OR NOT SYM_TO) + message(FATA_ERROR "Invalid call to install_symlink") + endif() + get_filename_component(dest_dir ${SYM_TO} DIRECTORY) + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${dest_dir})" COMPONENT ${SYM_COMPONENT}) + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${SYM_FROM} ${SYM_TO})" COMPONENT ${SYM_COMPONENT}) + install(CODE "message(\"-- Created symlink: ${SYM_FROM} -> ${SYM_TO}\")") +endfunction() +function(install_mkdir) + set(options "") + set(one_value_options COMPONENT NAME) + set(multi_value_options) + cmake_parse_arguments(MK "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}") + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${MK_NAME})" COMPONENT ${MK_COMPONENT}) + install(CODE "message(\"-- Created directory: ${MK_NAME}\")") +endfunction() + if(NOT INSTALL_LAYOUT) if(WIN32) set(DEFAULT_INSTALL_LAYOUT "WIN") @@ -56,19 +83,24 @@ elseif(DIR_LAYOUT MATCHES "OSX") set(FDB_INCLUDE_INSTALL_DIR "usr/local/include") set(FDB_PYTHON_INSTALL_DIR "Library/Python/2.7/site-packages/fdb") set(FDB_SHARE_DIR "usr/local/share") -elseif(DIR_LAYOUT MATCHES "RPM") - set(CPACK_GENERATOR RPM) - set(CPACK_PACKAGE_INSTALL_DIRECTORY "/") +else() + if(DIR_LAYOUT MATCHES "RPM") + set(CPACK_GENERATOR RPM) + else() + # DEB + set(CPACK_GENERATOR "DEB") + set(LIBSUFFIX "") + endif() + set(CMAKE_INSTALL_PREFIX "/") + set(CPACK_PACKAGING_INSTALL_PREFIX "/") + set(FDB_CONFIG_DIR "etc/foundationdb") set(FDB_LIB_DIR "usr/lib${LIBSUFFIX}") - set(FDB_LIBEXEC_DIR "usr/${FDB_LIB_DIR}") + set(FDB_LIBEXEC_DIR ${FDB_LIB_DIR}) set(FDB_BIN_DIR "usr/bin") set(FDB_SBIN_DIR "usr/sbin") set(FDB_INCLUDE_INSTALL_DIR "usr/include") set(FDB_PYTHON_INSTALL_DIR "${FDB_LIB_DIR}/python2.7/site-packages/fdb") set(FDB_SHARE_DIR "usr/share") -else() - # DEB - set(CPACK_GENERATOR "DEB") endif() if(INSTALL_LAYOUT MATCHES "OSX") @@ -135,7 +167,9 @@ if(INSTALL_LAYOUT MATCHES "RPM") "/lib/systemd" "/lib/systemd/system" "/etc/rc.d/init.d") - set(CPACK_RPM_DEBUGINFO_PACKAGE ON) + set(CPACK_RPM_server_DEBUGINFO_PACKAGE ON) + set(CPACK_RPM_clients_DEBUGINFO_PACKAGE ON) + set(CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX /usr/src) set(CPACK_RPM_COMPONENT_INSTALL ON) set(CPACK_RPM_clients_PRE_INSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preclients.sh) @@ -149,6 +183,12 @@ if(INSTALL_LAYOUT MATCHES "RPM") ${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preunserver.sh) set(CPACK_RPM_server_PACKAGE_REQUIRES "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}") + #set(CPACK_RPM_java_PACKAGE_REQUIRES + # "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}") + set(CPACK_RPM_python_PACKAGE_REQUIRES + "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}") + install_mkdir(NAME "var/log/foundationdb" COMPONENT server) + install_mkdir(NAME "var/lib/foundationdb" COMPONENT server) endif() ################################################################################ @@ -160,8 +200,8 @@ if(INSTALL_LAYOUT MATCHES "DEB") set(CPACK_DEBIAN_PACKAGE_SECTION "database") set(CPACK_DEBIAN_ENABLE_COMPONENT_DEPENDS ON) - set(CPACK_DEBIAN_server_PACKAGE_DEPENDS "adduser, libc6 (>= 2.11), python (>= 2.6)") - set(CPACK_DEBIAN_clients_PACKAGE_DEPENDS "adduser, libc6 (>= 2.11)") + set(CPACK_DEBIAN_server_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12), python (>= 2.6), foundationdb-clients (= ${FDB_VERSION})") + set(CPACK_DEBIAN_clients_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12)") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://www.foundationdb.org") set(CPACK_DEBIAN_clients_PACKAGE_CONTROL_EXTRA ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-clients/postinst) @@ -255,16 +295,3 @@ if((INSTALL_LAYOUT MATCHES "RPM") OR (INSTALL_LAYOUT MATCHES "DEB")) endif() endif() endif() - -################################################################################ -# Helper Macros -################################################################################ - -macro(install_symlink filepath sympath compondent) - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${filepath} ${sympath})" COMPONENT ${component}) - install(CODE "message(\"-- Created symlink: ${sympath} -> ${filepath}\")") -endmacro() -macro(install_mkdir dirname component) - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${dirname})" COMPONENT ${component}) - install(CODE "message(\"-- Created directory: ${dirname}\")") -endmacro() diff --git a/fdbbackup/CMakeLists.txt b/fdbbackup/CMakeLists.txt index c35aec3c3c..35bed6a2c9 100644 --- a/fdbbackup/CMakeLists.txt +++ b/fdbbackup/CMakeLists.txt @@ -5,19 +5,19 @@ add_flow_target(EXECUTABLE NAME fdbbackup SRCS ${FDBBACKUP_SRCS}) target_link_libraries(fdbbackup PRIVATE fdbclient) install(TARGETS fdbbackup DESTINATION ${FDB_BIN_DIR} COMPONENT clients) -install(PROGRAMS $ - DESTINATION ${FDB_LIB_DIR}/foundationdb/backup_agent - RENAME backup_agent +install_symlink( + FROM ${FDB_BIN_DIR}/fdbbackup + TO ${FDB_LIB_DIR}/foundationdb/backup_agent COMPONENT clients) -install(PROGRAMS $ - DESTINATION ${FDB_BIN_DIR} - RENAME fdbrestore +install_symlink( + FROM ${FDB_BIN_DIR}/fdbbackup + TO ${FDB_BIN_DIR}/fdbrestore COMPONENT clients) -install(PROGRAMS $ - DESTINATION ${FDB_BIN_DIR} - RENAME dr_agent +install_symlink( + FROM ${FDB_BIN_DIR}/fdbbackup + TO ${FDB_BIN_DIR}/dr_agent COMPONENT clients) -install(PROGRAMS $ - DESTINATION ${FDB_BIN_DIR} - RENAME fdbdr +install_symlink( + FROM ${FDB_BIN_DIR}/fdbbackup + TO ${FDB_BIN_DIR}/fdbdr COMPONENT clients) From 990e084fa290fba01b56d2ce2293a961654017fb Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 21:01:26 -0800 Subject: [PATCH 217/226] Debian packages are successfully generated --- build/cmake/build.sh | 3 ++- build/cmake/docker-compose.yml | 8 ++++---- cmake/InstallLayout.cmake | 10 +++++----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/build/cmake/build.sh b/build/cmake/build.sh index c0de2cd11d..8b39b413d6 100644 --- a/build/cmake/build.sh +++ b/build/cmake/build.sh @@ -158,7 +158,7 @@ deb() { then break fi - cpack + fakeroot cpack __res=$? if [ ${__res} -ne 0 ] then @@ -213,6 +213,7 @@ main() { rpm ;; deb ) + deb ;; linux-pkgs) rpm diff --git a/build/cmake/docker-compose.yml b/build/cmake/docker-compose.yml index e5e6e9805d..02a9d9172e 100644 --- a/build/cmake/docker-compose.yml +++ b/build/cmake/docker-compose.yml @@ -31,14 +31,14 @@ services: <<: *build-setup command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh build/fast - rpm-fast: &rpm-fast - <<: *build-setup - command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh rpm/fast - rpm: &rpm <<: *build-setup command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh rpm + deb: &deb + <<: *build-setup + command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh deb + package: &package <<: *build-setup command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh package diff --git a/cmake/InstallLayout.cmake b/cmake/InstallLayout.cmake index de7b4c3329..65d884c54e 100644 --- a/cmake/InstallLayout.cmake +++ b/cmake/InstallLayout.cmake @@ -200,12 +200,12 @@ if(INSTALL_LAYOUT MATCHES "DEB") set(CPACK_DEBIAN_PACKAGE_SECTION "database") set(CPACK_DEBIAN_ENABLE_COMPONENT_DEPENDS ON) - set(CPACK_DEBIAN_server_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12), python (>= 2.6), foundationdb-clients (= ${FDB_VERSION})") - set(CPACK_DEBIAN_clients_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12)") + set(CPACK_DEBIAN_SERVER_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12), python (>= 2.6), foundationdb-clients (= ${FDB_VERSION})") + set(CPACK_DEBIAN_CLIENTS_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12)") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://www.foundationdb.org") - set(CPACK_DEBIAN_clients_PACKAGE_CONTROL_EXTRA - ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-clients/postinst) - set(CPACK_DEBIAN_server_PACKAGE_CONTROL_EXTRA + set(CPACK_DEBIAN_CLIENTS_PACKAGE_CONTROL_EXTRA + ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-CLIENTS/postinst) + set(CPACK_DEBIAN_SERVER_PACKAGE_CONTROL_EXTRA ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-server/conffiles ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-server/preinst ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-server/postinst From 4266429dcfb6d7c13afa6aa4dc157ce9d6302050 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 21:14:14 -0800 Subject: [PATCH 218/226] Fixed python dependency --- bindings/c/CMakeLists.txt | 2 +- cmake/AddFdbTest.cmake | 2 +- cmake/FDBComponents.cmake | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index 98341c08be..3851398e44 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -16,7 +16,7 @@ elseif(WIN32) endif() add_custom_command(OUTPUT ${asm_file} ${CMAKE_CURRENT_BINARY_DIR}/fdb_c_function_pointers.g.h - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_asm.py ${platform} + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/generate_asm.py ${platform} ${CMAKE_CURRENT_SOURCE_DIR}/fdb_c.cpp ${asm_file} ${CMAKE_CURRENT_BINARY_DIR}/fdb_c_function_pointers.g.h diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index 36b6c6accb..7a11008d4e 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -101,7 +101,7 @@ function(add_fdb_test) endif() list(TRANSFORM ADD_FDB_TEST_TEST_FILES PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/") add_test(NAME ${test_name} - COMMAND ${PYTHON_EXECUTABLE} ${TestRunner} + COMMAND $ ${TestRunner} -n ${test_name} -b ${PROJECT_BINARY_DIR} -t ${test_type} diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index f219829394..81c2f59ee9 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -41,6 +41,7 @@ find_package(Python COMPONENTS Interpreter) if(Python_Interpreter_FOUND) set(WITH_PYTHON ON) else() + message(FATAL_ERROR "Could not found a suitable python interpreter") set(WITH_PYTHON OFF) endif() From 46119dce0ddf5b6f756fa5653351a7ae24078c58 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 21:29:03 -0800 Subject: [PATCH 219/226] Disabled c-bindin tests on Windows --- bindings/c/CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index 3851398e44..7fce9f112b 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -42,10 +42,13 @@ if(WIN32) set_property(SOURCE ${asm_file} PROPERTY LANGUAGE ASM_MASM) endif() -add_executable(fdb_c_performance_test test/performance_test.c test/test.h) -target_link_libraries(fdb_c_performance_test PRIVATE fdb_c) -add_executable(fdb_c_ryw_benchmark test/ryw_benchmark.c test/test.h) -target_link_libraries(fdb_c_ryw_benchmark PRIVATE fdb_c) +# The tests don't build on windows +if(NOT WIN32) + add_executable(fdb_c_performance_test test/performance_test.c test/test.h) + target_link_libraries(fdb_c_performance_test PRIVATE fdb_c) + add_executable(fdb_c_ryw_benchmark test/ryw_benchmark.c test/test.h) + target_link_libraries(fdb_c_ryw_benchmark PRIVATE fdb_c) +endif() # TODO: re-enable once the old vcxproj-based build system is removed. #generate_export_header(fdb_c EXPORT_MACRO_NAME "DLLEXPORT" From a20e58bb4c00934a772d510031817cf42863dd75 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 21:30:50 -0800 Subject: [PATCH 220/226] Added linux-pkgs command to docker-compose file --- build/cmake/docker-compose.yml | 4 ++++ cmake/InstallLayout.cmake | 27 +++++++++++---------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/build/cmake/docker-compose.yml b/build/cmake/docker-compose.yml index 02a9d9172e..373d759361 100644 --- a/build/cmake/docker-compose.yml +++ b/build/cmake/docker-compose.yml @@ -39,6 +39,10 @@ services: <<: *build-setup command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh deb + linux-pkgs: + <<: *build-setup + command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh linux-pkgs + package: &package <<: *build-setup command: scl enable devtoolset-7 rh-ruby24 rh-python36 python27 -- bash ../src/build/cmake/build.sh package diff --git a/cmake/InstallLayout.cmake b/cmake/InstallLayout.cmake index 65d884c54e..aed423eeac 100644 --- a/cmake/InstallLayout.cmake +++ b/cmake/InstallLayout.cmake @@ -275,23 +275,18 @@ if((INSTALL_LAYOUT MATCHES "RPM") OR (INSTALL_LAYOUT MATCHES "DEB")) RESULT_VARIABLE IS_SYSTEMD OUTPUT_QUIET ERROR_QUIET) - if(IS_SYSTEMD EQUAL "0") - configure_file(${CMAKE_SOURCE_DIR}/packaging/rpm/foundationdb.service - ${CMAKE_BINARY_DIR}/packaging/rpm/foundationdb.service) - install(FILES ${CMAKE_BINARY_DIR}/packaging/rpm/foundationdb.service - DESTINATION "lib/systemd/system" + install(FILES ${CMAKE_SOURCE_DIR}/packaging/rpm/foundationdb.service + DESTINATION "lib/systemd/system" + COMPONENT server) + if(INSTALL_LAYOUT MATCHES "RPM") + install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/rpm/foundationdb-init + DESTINATION "etc/rc.d/init.d" + RENAME "foundationdb" COMPONENT server) else() - if(INSTALL_LAYOUT MATCHES "RPM") - install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/rpm/foundationdb-init - DESTINATION "etc/rc.d/init.d" - RENAME "foundationdb" - COMPONENT server) - else() - install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/deb/foundationdb-init - DESTINATION "etc/init.d" - RENAME "foundationdb" - COMPONENT server) - endif() + install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/deb/foundationdb-init + DESTINATION "etc/init.d" + RENAME "foundationdb" + COMPONENT server) endif() endif() From b06aa3b8ea04c0762b33bc04b07e3f287cee733a Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 22:51:40 -0800 Subject: [PATCH 221/226] Several bug fixes - debian package working Did some manual minor testing on the debian package --- build/cmake/debian-test/Dockerfile | 3 +++ cmake/InstallLayout.cmake | 34 +++++++++++++----------------- fdbbackup/CMakeLists.txt | 16 +++++++------- 3 files changed, 26 insertions(+), 27 deletions(-) create mode 100644 build/cmake/debian-test/Dockerfile diff --git a/build/cmake/debian-test/Dockerfile b/build/cmake/debian-test/Dockerfile new file mode 100644 index 0000000000..94ecd6dbef --- /dev/null +++ b/build/cmake/debian-test/Dockerfile @@ -0,0 +1,3 @@ +FROM ubuntu:16.04 + +RUN apt-get update diff --git a/cmake/InstallLayout.cmake b/cmake/InstallLayout.cmake index aed423eeac..f3384c7cf4 100644 --- a/cmake/InstallLayout.cmake +++ b/cmake/InstallLayout.cmake @@ -4,25 +4,16 @@ function(install_symlink) set(options "") - set(one_value_options COMPONENT FROM TO) + set(one_value_options COMPONENT TO DESTINATION) set(multi_value_options) cmake_parse_arguments(SYM "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}") - if(NOT SYM_COMPONENT OR NOT SYM_FROM OR NOT SYM_TO) - message(FATA_ERROR "Invalid call to install_symlink") - endif() - get_filename_component(dest_dir ${SYM_TO} DIRECTORY) - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${dest_dir})" COMPONENT ${SYM_COMPONENT}) - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${SYM_FROM} ${SYM_TO})" COMPONENT ${SYM_COMPONENT}) - install(CODE "message(\"-- Created symlink: ${SYM_FROM} -> ${SYM_TO}\")") -endfunction() -function(install_mkdir) - set(options "") - set(one_value_options COMPONENT NAME) - set(multi_value_options) - cmake_parse_arguments(MK "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}") - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${MK_NAME})" COMPONENT ${MK_COMPONENT}) - install(CODE "message(\"-- Created directory: ${MK_NAME}\")") + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/symlinks) + get_filename_component(fname ${SYM_DESTINATION} NAME) + get_filename_component(dest_dir ${SYM_DESTINATION} DIRECTORY) + set(sl ${CMAKE_CURRENT_BINARY_DIR}/symlinks/${fname}) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${SYM_TO} ${sl}) + install(FILES ${sl} DESTINATION ${dest_dir} COMPONENT ${SYM_COMPONENT}) endfunction() if(NOT INSTALL_LAYOUT) @@ -95,6 +86,7 @@ else() set(CPACK_PACKAGING_INSTALL_PREFIX "/") set(FDB_CONFIG_DIR "etc/foundationdb") set(FDB_LIB_DIR "usr/lib${LIBSUFFIX}") + set(FDB_LIB_NOSUFFIX "usr/lib") set(FDB_LIBEXEC_DIR ${FDB_LIB_DIR}) set(FDB_BIN_DIR "usr/bin") set(FDB_SBIN_DIR "usr/sbin") @@ -149,6 +141,12 @@ endif() ################################################################################ # Configuration for RPM ################################################################################ +################################################################################ + +if(UNIX AND NOT APPLE) + install(DIRECTORY DESTINATION "var/log/foundationdb" COMPONENT server) + install(DIRECTORY DESTINATION "var/lib/foundationdb/data" COMPONENT server) +endif() if(INSTALL_LAYOUT MATCHES "RPM") set(CPACK_RPM_server_USER_FILELIST @@ -187,8 +185,6 @@ if(INSTALL_LAYOUT MATCHES "RPM") # "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}") set(CPACK_RPM_python_PACKAGE_REQUIRES "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}") - install_mkdir(NAME "var/log/foundationdb" COMPONENT server) - install_mkdir(NAME "var/lib/foundationdb" COMPONENT server) endif() ################################################################################ @@ -204,7 +200,7 @@ if(INSTALL_LAYOUT MATCHES "DEB") set(CPACK_DEBIAN_CLIENTS_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12)") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://www.foundationdb.org") set(CPACK_DEBIAN_CLIENTS_PACKAGE_CONTROL_EXTRA - ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-CLIENTS/postinst) + ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-clients/postinst) set(CPACK_DEBIAN_SERVER_PACKAGE_CONTROL_EXTRA ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-server/conffiles ${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-server/preinst diff --git a/fdbbackup/CMakeLists.txt b/fdbbackup/CMakeLists.txt index 35bed6a2c9..9091d7aeb1 100644 --- a/fdbbackup/CMakeLists.txt +++ b/fdbbackup/CMakeLists.txt @@ -6,18 +6,18 @@ target_link_libraries(fdbbackup PRIVATE fdbclient) install(TARGETS fdbbackup DESTINATION ${FDB_BIN_DIR} COMPONENT clients) install_symlink( - FROM ${FDB_BIN_DIR}/fdbbackup - TO ${FDB_LIB_DIR}/foundationdb/backup_agent + TO /${FDB_BIN_DIR}/fdbbackup + DESTINATION ${FDB_LIB_DIR}/foundationdb/backup_agent/backup_agent COMPONENT clients) install_symlink( - FROM ${FDB_BIN_DIR}/fdbbackup - TO ${FDB_BIN_DIR}/fdbrestore + TO /${FDB_BIN_DIR}/fdbbackup + DESTINATION ${FDB_BIN_DIR}/fdbrestore COMPONENT clients) install_symlink( - FROM ${FDB_BIN_DIR}/fdbbackup - TO ${FDB_BIN_DIR}/dr_agent + TO /${FDB_BIN_DIR}/fdbbackup + DESTINATION ${FDB_BIN_DIR}/dr_agent COMPONENT clients) install_symlink( - FROM ${FDB_BIN_DIR}/fdbbackup - TO ${FDB_BIN_DIR}/fdbdr + TO /${FDB_BIN_DIR}/fdbbackup + DESTINATION ${FDB_BIN_DIR}/fdbdr COMPONENT clients) From a0b20ef2c1a5fa650002bc9d35e62df1b1961bf4 Mon Sep 17 00:00:00 2001 From: mpilman Date: Sun, 10 Feb 2019 23:12:44 -0800 Subject: [PATCH 222/226] Add initscripts dependency to foundationdb-server --- build/cmake/centos-test/Dockerfile | 3 +++ cmake/InstallLayout.cmake | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 build/cmake/centos-test/Dockerfile diff --git a/build/cmake/centos-test/Dockerfile b/build/cmake/centos-test/Dockerfile new file mode 100644 index 0000000000..3a2f07a4f8 --- /dev/null +++ b/build/cmake/centos-test/Dockerfile @@ -0,0 +1,3 @@ +FROM centos:6 + +RUN yum install -y yum-utils diff --git a/cmake/InstallLayout.cmake b/cmake/InstallLayout.cmake index f3384c7cf4..f56ba83acb 100644 --- a/cmake/InstallLayout.cmake +++ b/cmake/InstallLayout.cmake @@ -180,7 +180,8 @@ if(INSTALL_LAYOUT MATCHES "RPM") set(CPACK_RPM_server_PRE_UNINSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preunserver.sh) set(CPACK_RPM_server_PACKAGE_REQUIRES - "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}") + "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}, initscripts >= 9.03") + set(CPACK_RPM_server_PACKAGE_RE) #set(CPACK_RPM_java_PACKAGE_REQUIRES # "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}") set(CPACK_RPM_python_PACKAGE_REQUIRES From 3401c1f771f8acff7cc4203390079565ad9e9257 Mon Sep 17 00:00:00 2001 From: mpilman Date: Mon, 11 Feb 2019 09:16:40 -0800 Subject: [PATCH 223/226] add option to fail cmake if not all dependencies are found --- cmake/FDBComponents.cmake | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index 81c2f59ee9..a4df73b5cc 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -1,3 +1,5 @@ +set(FORCE_ALL_COMPONENTS OFF CACHE BOOL "Fails cmake if not all dependencies are found") + ################################################################################ # LibreSSL ################################################################################ @@ -82,7 +84,6 @@ endif() file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/packages) add_custom_target(packages) - function(print_components) message(STATUS "=========================================") message(STATUS " Components Build Overview ") @@ -95,3 +96,10 @@ function(print_components) message(STATUS "Build Documentation (make html): ${BUILD_DOCUMENTATION}") message(STATUS "=========================================") endfunction() + +if(FORCE_ALL_COMPONENTS) + if(NOT BUILD_JAVA OR NOT WITH_TLS OR NOT WITH_GO OR NOT WITH_RUBY OR NOT WITH_PYTHON OR NOT BUILD_DOCUMENTATION) + print_components() + message(FATAL_ERROR "FORCE_ALL_COMPONENTS is set but not all dependencies could be found") + endif() +endif() From 0754ce1a435b90e548d402f77ad0da079280be90 Mon Sep 17 00:00:00 2001 From: mpilman Date: Mon, 11 Feb 2019 14:40:20 -0800 Subject: [PATCH 224/226] fixef formatting of go code --- bindings/go/src/_util/translate_fdb_options.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/go/src/_util/translate_fdb_options.go b/bindings/go/src/_util/translate_fdb_options.go index f0527683da..418a6b2e1c 100644 --- a/bindings/go/src/_util/translate_fdb_options.go +++ b/bindings/go/src/_util/translate_fdb_options.go @@ -23,15 +23,15 @@ package main import ( - "flag" "encoding/xml" + "flag" "fmt" "go/doc" + "io" "io/ioutil" "log" "os" "strings" - "io" ) type Option struct { From 0bbbfb6ee33fe4a684d6488ff03c45beefca073a Mon Sep 17 00:00:00 2001 From: mpilman Date: Thu, 14 Feb 2019 16:37:48 -0800 Subject: [PATCH 225/226] Address Andrew's review comments --- CMakeLists.txt | 2 +- README.md | 8 ++++---- bindings/CMakeLists.txt | 2 +- build/cmake/build.sh | 4 ++-- build/cmake/docker-compose.yml | 2 +- cmake/FDBComponents.cmake | 16 ++++++++-------- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f56f7857cd..c5e384ab73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,7 +174,7 @@ endif() add_subdirectory(bindings) add_subdirectory(fdbbackup) add_subdirectory(tests) -if(BUILD_DOCUMENTATION) +if(WITH_DOCUMENTATION) add_subdirectory(documentation) endif() diff --git a/README.md b/README.md index 7a65543b1b..bedd5c4798 100755 --- a/README.md +++ b/README.md @@ -116,14 +116,14 @@ which language bindings it is going to build. ### Generating compile_commands.json -CMake can build a compilation database for you. However, the default generatd +CMake can build a compilation database for you. However, the default generated one is not too useful as it operates on the generated files. When running make, the build system will create another `compile_commands.json` file in the source directory. This can than be used for tools like [CCLS](https://github.com/MaskRay/ccls), [CQuery](https://github.com/cquery-project/cquery), etc. This way you can get code-completion and code navigation in flow. It is not yet perfect (it will show -a few errors) but we are constantly working on improving the developement experience. +a few errors) but we are constantly working on improving the development experience. ### Using IDEs @@ -132,7 +132,7 @@ files are precompiled with the actor compiler, an IDE will not be very useful as a user will only be presented with the generated code - which is not what she wants to edit and get IDE features for. -The good news is, that it is possible to generate project files for edititing +The good news is, that it is possible to generate project files for editing flow with a supported IDE. There is a cmake option called `OPEN_FOR_IDE` which will generate a project which can be opened in an IDE for editing. You won't be able to build this project, but you will be able to edit the files and get most @@ -206,7 +206,7 @@ that Visual Studio is used to compile. 1. This should succeed. In which case you can build using msbuild: `msbuild /p:Configuration=Release fdb.sln`. You can also open the resulting solution in Visual Studio and compile from there. However, be aware that - using Visual Studio for developement is currently not supported as Visual + using Visual Studio for development is currently not supported as Visual Studio will only know about the generated files. If you want TLS support to be enabled under Windows you currently have to build diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index 106e65ba16..0af761e714 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -1,7 +1,7 @@ add_subdirectory(c) add_subdirectory(flow) add_subdirectory(python) -if(BUILD_JAVA) +if(WITH_JAVA) add_subdirectory(java) endif() if(WITH_GO) diff --git a/build/cmake/build.sh b/build/cmake/build.sh index 8b39b413d6..02f6224a79 100644 --- a/build/cmake/build.sh +++ b/build/cmake/build.sh @@ -1,4 +1,4 @@ -#!env bash +#!/usr/bin/env bash arguments_usage() { cat < Date: Sat, 16 Feb 2019 13:43:25 -0800 Subject: [PATCH 226/226] Update docs regarding versionstamp operations Since API version 520, the last 4 bytes are used as an index for both the key and value transformations. --- documentation/sphinx/source/api-common.rst.inc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index 75ed4f62cd..80f26fcd9b 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -127,13 +127,13 @@ Performs lexicographic comparison of byte strings. If the existing value in the database is not present, then ``param`` is stored. Otherwise the larger of the two values is then stored in the database. .. |atomic-set-versionstamped-key-1| replace:: - Transforms ``key`` using a versionstamp for the transaction. This key must be at least 12 bytes long. The final 2 bytes will be interpreted as a 16-bit little-endian integer denoting an index into the key at which to perform the transformation, and then trimmed off the key. The 10 bytes in the key beginning at the index will be overwritten with the versionstamp. If the index plus 10 bytes points past the end of the key, the result will be an error. Sets the transformed key in the database to ``param``. + Transforms ``key`` using a versionstamp for the transaction. This key must be at least 14 bytes long. The final 4 bytes will be interpreted as a 32-bit little-endian integer denoting an index into the key at which to perform the transformation, and then trimmed off the key. The 10 bytes in the key beginning at the index will be overwritten with the versionstamp. If the index plus 10 bytes points past the end of the key, the result will be an error. Sets the transformed key in the database to ``param``. .. |atomic-set-versionstamped-key-2| replace:: This operation is not compatible with |read-your-writes-disable-option| and will generate an error if used with it. .. |atomic-set-versionstamped-value| replace:: - Transforms ``param`` using a versionstamp for the transaction. This parameter must be at least 10 bytes long, and the first 10 bytes will be overwritten with the versionstamp. Sets ``key`` in the database to the transformed parameter. + Transforms ``param`` using a versionstamp for the transaction. This parameter must be at least 14 bytes long. The final 4 bytes will be interpreted as a 32-bit little-endian integer denoting an index into the parameter at which to perform the transformation, and then trimmed off the key. The 10 bytes in the parameter beginning at the index will be overwritten with the versionstamp. If the index plus 10 bytes points past the end of the parameter, the result will be an error. Sets ``key`` in the database to the transformed parameter. .. |atomic-versionstamps-1| replace:: A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database. The last 2 bytes are monotonic in the serialization order for transactions. @@ -145,7 +145,7 @@ At this time, versionstamped keys are not compatible with the Tuple layer except in Java and Python. Note that this implies versionstamped keys may not be used with the Subspace and Directory layers except in those languages. .. |atomic-versionstamps-tuple-warning-value| replace:: - At this time, versionstamped values are not compatible with the Tuple layer. + At this time, versionstamped values are not compatible with the Tuple layer except in Java and Python. Note that this implies versionstamped values may not be used with the Subspace and Directory layers except in those languages. .. |api-version| replace:: 610