FastRestore:Refactor code and add missing files
Add RestoreWorker.actor.cpp and RestoreWorkerInterface.actor.h back.
This commit is contained in:
parent
022b555b69
commit
701676dbd2
|
@ -48,7 +48,6 @@ static const char* typeString[] = { "SetValue",
|
|||
|
||||
struct MutationRef;
|
||||
std::string getHexString(StringRef input);
|
||||
std::string getHexKey(StringRef input, int skip);
|
||||
|
||||
struct MutationRef {
|
||||
static const int OVERHEAD_BYTES = 12; //12 is the size of Header in MutationList entries
|
||||
|
|
|
@ -184,6 +184,4 @@ typedef Standalone<MutationListRef> MutationList;
|
|||
template <class Ar> void load( Ar& ar, MutationListRef& r ) { r.serialize_load(ar); }
|
||||
template <class Ar> void save( Ar& ar, MutationListRef const& r ) { r.serialize_save(ar); }
|
||||
|
||||
void printMutationListRefHex(MutationListRef m, std::string prefix);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -256,7 +256,6 @@ void splitMutation(Reference<RestoreLoaderData> self, MutationRef m, Arena& mve
|
|||
ASSERT(mvector.empty());
|
||||
ASSERT(nodeIDs.empty());
|
||||
// key range [m->param1, m->param2)
|
||||
// printf("SPLITMUTATION: orignal mutation:%s\n", m.toString().c_str());
|
||||
std::map<Standalone<KeyRef>, UID>::iterator itlow, itup; //we will return [itlow, itup)
|
||||
itlow = self->range2Applier.lower_bound(m.param1); // lower_bound returns the iterator that is >= m.param1
|
||||
if ( itlow->first > m.param1 ) {
|
||||
|
@ -266,7 +265,6 @@ void splitMutation(Reference<RestoreLoaderData> self, MutationRef m, Arena& mve
|
|||
}
|
||||
|
||||
itup = self->range2Applier.upper_bound(m.param2); // upper_bound returns the iterator that is > m.param2; return rmap::end if no keys are considered to go after m.param2.
|
||||
// printf("SPLITMUTATION: itlow_key:%s itup_key:%s\n", itlow->first.toString().c_str(), itup == self->range2Applier.end() ? "[end]" : itup->first.toString().c_str());
|
||||
ASSERT( itup == self->range2Applier.end() || itup->first > m.param2 );
|
||||
|
||||
std::map<Standalone<KeyRef>, UID>::iterator itApplier;
|
||||
|
@ -290,14 +288,11 @@ void splitMutation(Reference<RestoreLoaderData> self, MutationRef m, Arena& mve
|
|||
} else {
|
||||
curm.param2 = itlow->first;
|
||||
}
|
||||
// printf("SPLITMUTATION: mvector.push_back:%s\n", curm.toString().c_str());
|
||||
ASSERT( curm.param1 <= curm.param2 );
|
||||
mvector.push_back_deep(mvector_arena, curm);
|
||||
nodeIDs.push_back(nodeIDs_arena, itApplier->second);
|
||||
}
|
||||
|
||||
// printf("SPLITMUTATION: mvector.size:%d\n", mvector.size());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@ struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoade
|
|||
role = RestoreRole::Loader;
|
||||
}
|
||||
|
||||
~RestoreLoaderData() {}
|
||||
~RestoreLoaderData() {} = default;
|
||||
|
||||
std::string describeNode() {
|
||||
std::stringstream ss;
|
||||
|
|
|
@ -60,13 +60,6 @@ ACTOR Future<Void> handleInitVersionBatchRequest(RestoreVersionBatchRequest req,
|
|||
if ( !self->versionBatchStart ) {
|
||||
self->versionBatchStart = true;
|
||||
self->resetPerVersionBatch();
|
||||
// if ( self->role == RestoreRole::Applier) {
|
||||
// RestoreApplierData* applier = (RestoreApplierData*) self.getPtr();
|
||||
// applier->dbApplier = Optional<Future<Void>>(); // reset dbApplier for next version batch
|
||||
// // if ( applier->dbApplier.present() ) {
|
||||
// // applier->dbApplier.~Optional(); // reset dbApplier for next version batch
|
||||
// // }
|
||||
// }
|
||||
}
|
||||
TraceEvent("FastRestore").detail("InitVersionBatch", req.batchID)
|
||||
.detail("Role", getRoleStr(self->role)).detail("Node", self->id());
|
||||
|
@ -93,91 +86,3 @@ std::string getHexString(StringRef input) {
|
|||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string getHexKey(StringRef input, int skip) {
|
||||
std::stringstream ss;
|
||||
for (int i = 0; i<skip; i++) {
|
||||
if ( i % 4 == 0 )
|
||||
ss << " ";
|
||||
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[i]; // [] operator moves the pointer in step of unit8
|
||||
}
|
||||
ss << "||";
|
||||
|
||||
//hashvalue
|
||||
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[skip]; // [] operator moves the pointer in step of unit8
|
||||
ss << "|";
|
||||
|
||||
// commitversion in 64bit
|
||||
int count = 0;
|
||||
for (int i = skip+1; i<input.size() && i < skip+1+8; i++) {
|
||||
if ( count++ % 4 == 0 )
|
||||
ss << " ";
|
||||
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[i]; // [] operator moves the pointer in step of unit8
|
||||
}
|
||||
// part value
|
||||
count = 0;
|
||||
for (int i = skip+1+8; i<input.size(); i++) {
|
||||
if ( count++ % 4 == 0 )
|
||||
ss << " ";
|
||||
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[i]; // [] operator moves the pointer in step of unit8
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
void printMutationListRefHex(MutationListRef m, std::string prefix) {
|
||||
MutationListRef::Iterator iter = m.begin();
|
||||
for ( ;iter != m.end(); ++iter) {
|
||||
printf("%s mType:%04x param1:%s param2:%s param1_size:%d, param2_size:%d\n", prefix.c_str(), iter->type,
|
||||
getHexString(iter->param1).c_str(), getHexString(iter->param2).c_str(), iter->param1.size(), iter->param2.size());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void printBackupLogKeyHex(Standalone<StringRef> key_input, std::string prefix) {
|
||||
std::stringstream ss;
|
||||
// const int version_size = 12;
|
||||
// const int header_size = 12;
|
||||
StringRef val = key_input.contents();
|
||||
StringRefReaderMX reader(val, restore_corrupted_data());
|
||||
|
||||
int count_size = 0;
|
||||
// Get the version
|
||||
uint64_t version = reader.consume<uint64_t>();
|
||||
count_size += 8;
|
||||
uint32_t val_length_decode = reader.consume<uint32_t>();
|
||||
count_size += 4;
|
||||
|
||||
printf("----------------------------------------------------------\n");
|
||||
printf("To decode value:%s at version:%ld\n", getHexString(val).c_str(), version);
|
||||
if ( val_length_decode != (val.size() - 12) ) {
|
||||
fprintf(stderr, "%s[PARSE ERROR]!!! val_length_decode:%d != val.size:%d\n", prefix.c_str(), val_length_decode, val.size());
|
||||
} else {
|
||||
printf("%s[PARSE SUCCESS] val_length_decode:%d == (val.size:%d - 12)\n", prefix.c_str(), val_length_decode, val.size());
|
||||
}
|
||||
|
||||
// Get the mutation header
|
||||
while (1) {
|
||||
// stop when reach the end of the string
|
||||
if(reader.eof() ) { //|| *reader.rptr == 0xFF
|
||||
//printf("Finish decode the value\n");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
uint32_t type = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
|
||||
uint32_t kLen = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
|
||||
uint32_t vLen = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
|
||||
const uint8_t *k = reader.consume(kLen);
|
||||
const uint8_t *v = reader.consume(vLen);
|
||||
count_size += 4 * 3 + kLen + vLen;
|
||||
|
||||
if ( kLen < 0 || kLen > val.size() || vLen < 0 || vLen > val.size() ) {
|
||||
printf("%s[PARSE ERROR]!!!! kLen:%d(0x%04x) vLen:%d(0x%04x)\n", prefix.c_str(), kLen, kLen, vLen, vLen);
|
||||
}
|
||||
|
||||
printf("%s---DedoceBackupMutation: Type:%d K:%s V:%s k_size:%d v_size:%d\n", prefix.c_str(),
|
||||
type, getHexString(KeyRef(k, kLen)).c_str(), getHexString(KeyRef(v, vLen)).c_str(), kLen, vLen);
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,447 @@
|
|||
/*
|
||||
* Restore.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <ctime>
|
||||
#include <climits>
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/MutationList.h"
|
||||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbrpc/IAsyncFile.h"
|
||||
#include "flow/genericactors.actor.h"
|
||||
#include "flow/Hash3.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "fdbserver/RestoreUtil.h"
|
||||
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||
#include "fdbserver/RestoreCommon.actor.h"
|
||||
#include "fdbserver/RestoreRoleCommon.actor.h"
|
||||
#include "fdbserver/RestoreLoader.actor.h"
|
||||
#include "fdbserver/RestoreApplier.actor.h"
|
||||
#include "fdbserver/RestoreMaster.actor.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
|
||||
FastRestoreOpConfig opConfig;
|
||||
|
||||
int NUM_APPLIERS = 40;
|
||||
|
||||
int restoreStatusIndex = 0;
|
||||
|
||||
class RestoreConfig;
|
||||
struct RestoreWorkerData; // Only declare the struct exist but we cannot use its field
|
||||
|
||||
void initRestoreWorkerConfig();
|
||||
|
||||
ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx);
|
||||
ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self);
|
||||
ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self, ActorCollection *actors, Database cx);
|
||||
ACTOR Future<Void> collectRestoreWorkerInterface(Reference<RestoreWorkerData> self, Database cx, int min_num_workers = 2);
|
||||
ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> self);
|
||||
ACTOR Future<Void> monitorleader(Reference<AsyncVar<RestoreWorkerInterface>> leader, Database cx, RestoreWorkerInterface myWorkerInterf);
|
||||
ACTOR Future<Void> startRestoreWorkerLeader(Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx);
|
||||
ACTOR Future<Void> handleRestoreSysInfoRequest(RestoreSysInfoRequest req, Reference<RestoreWorkerData> self);
|
||||
|
||||
template<> Tuple Codec<ERestoreState>::pack(ERestoreState const &val);
|
||||
template<> ERestoreState Codec<ERestoreState>::unpack(Tuple const &val);
|
||||
|
||||
// Each restore worker (a process) is assigned for a role.
|
||||
// MAYBE Later: We will support multiple restore roles on a worker
|
||||
struct RestoreWorkerData : NonCopyable, public ReferenceCounted<RestoreWorkerData> {
|
||||
UID workerID;
|
||||
std::map<UID, RestoreWorkerInterface> workerInterfaces; // UID is worker's node id, RestoreWorkerInterface is worker's communication workerInterface
|
||||
|
||||
// Restore Roles
|
||||
Optional<RestoreLoaderInterface> loaderInterf;
|
||||
Reference<RestoreLoaderData> loaderData;
|
||||
Optional<RestoreApplierInterface> applierInterf;
|
||||
Reference<RestoreApplierData> applierData;
|
||||
Reference<RestoreMasterData> masterData;
|
||||
|
||||
uint32_t inProgressFlag = 0; // To avoid race between duplicate message delivery that invokes the same actor multiple times
|
||||
|
||||
UID id() const { return workerID; };
|
||||
|
||||
RestoreWorkerData() = default;
|
||||
|
||||
~RestoreWorkerData() {
|
||||
printf("[Exit] Worker:%s RestoreWorkerData is deleted\n", workerID.toString().c_str());
|
||||
}
|
||||
|
||||
std::string describeNode() {
|
||||
std::stringstream ss;
|
||||
ss << "RestoreWorker workerID:" << workerID.toString();
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
// Remove the worker interface from restoreWorkerKey and remove its roles interfaces from their keys.
|
||||
ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx) {
|
||||
wait( runRYWTransaction( cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr->clear(restoreWorkerKeyFor(workerInterf.id()));
|
||||
return Void();
|
||||
}) );
|
||||
|
||||
TraceEvent("FastRestore").detail("HandleTerminateWorkerReq", self->id());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Assume only 1 role on a restore worker.
|
||||
// Future: Multiple roles in a restore worker
|
||||
ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self, ActorCollection *actors, Database cx) {
|
||||
// Already recruited a role
|
||||
if (self->loaderInterf.present()) {
|
||||
ASSERT( req.role == RestoreRole::Loader );
|
||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get()));
|
||||
return Void();
|
||||
} else if (self->applierInterf.present()) {
|
||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Applier, self->applierInterf.get()));
|
||||
return Void();
|
||||
}
|
||||
|
||||
if (req.role == RestoreRole::Loader) {
|
||||
ASSERT( !self->loaderInterf.present() );
|
||||
self->loaderInterf = RestoreLoaderInterface();
|
||||
self->loaderInterf.get().initEndpoints();
|
||||
RestoreLoaderInterface &recruited = self->loaderInterf.get();
|
||||
DUMPTOKEN(recruited.setApplierKeyRangeVectorRequest);
|
||||
DUMPTOKEN(recruited.initVersionBatch);
|
||||
DUMPTOKEN(recruited.collectRestoreRoleInterfaces);
|
||||
DUMPTOKEN(recruited.finishRestore);
|
||||
self->loaderData = Reference<RestoreLoaderData>( new RestoreLoaderData(self->loaderInterf.get().id(), req.nodeIndex) );
|
||||
actors->add( restoreLoaderCore(self->loaderData, self->loaderInterf.get(), cx) );
|
||||
TraceEvent("FastRestore").detail("LoaderRecruited", self->loaderData->id());
|
||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get()));
|
||||
} else if (req.role == RestoreRole::Applier) {
|
||||
ASSERT( !self->applierInterf.present() );
|
||||
self->applierInterf = RestoreApplierInterface();
|
||||
self->applierInterf.get().initEndpoints();
|
||||
RestoreApplierInterface &recruited = self->applierInterf.get();
|
||||
DUMPTOKEN(recruited.sendMutationVector);
|
||||
DUMPTOKEN(recruited.applyToDB);
|
||||
DUMPTOKEN(recruited.initVersionBatch);
|
||||
DUMPTOKEN(recruited.collectRestoreRoleInterfaces);
|
||||
DUMPTOKEN(recruited.finishRestore);
|
||||
self->applierData = Reference<RestoreApplierData>( new RestoreApplierData(self->applierInterf.get().id(), req.nodeIndex) );
|
||||
actors->add( restoreApplierCore(self->applierData, self->applierInterf.get(), cx) );
|
||||
TraceEvent("FastRestore").detail("ApplierRecruited", self->applierData->id());
|
||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Applier, self->applierInterf.get()));
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestore").detail("HandleRecruitRoleRequest", "UnknownRole"); //.detail("Request", req.printable());
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Assume: Only update the local data if it (applierInterf) has not been set
|
||||
ACTOR Future<Void> handleRestoreSysInfoRequest(RestoreSysInfoRequest req, Reference<RestoreWorkerData> self) {
|
||||
TraceEvent("FastRestore").detail("HandleRestoreSysInfoRequest", self->id());
|
||||
// Applier does not need to know appliers interfaces
|
||||
if ( !self->loaderData.isValid() ) {
|
||||
req.reply.send(RestoreCommonReply(self->id()));
|
||||
return Void();
|
||||
}
|
||||
// The loader has received the appliers interfaces
|
||||
if ( !self->loaderData->appliersInterf.empty() ) {
|
||||
req.reply.send(RestoreCommonReply(self->id()));
|
||||
return Void();
|
||||
}
|
||||
|
||||
self->loaderData->appliersInterf = req.sysInfo.appliers;
|
||||
|
||||
req.reply.send(RestoreCommonReply(self->id()) );
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
||||
// Read restoreWorkersKeys from DB to get each restore worker's restore workerInterface and set it to self->workerInterfaces
|
||||
// This is done before we assign restore roles for restore workers
|
||||
ACTOR Future<Void> collectRestoreWorkerInterface(Reference<RestoreWorkerData> self, Database cx, int min_num_workers) {
|
||||
state Transaction tr(cx);
|
||||
state vector<RestoreWorkerInterface> agents; // agents is cmdsInterf
|
||||
|
||||
loop {
|
||||
try {
|
||||
self->workerInterfaces.clear();
|
||||
agents.clear();
|
||||
tr.reset();
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
Standalone<RangeResultRef> agentValues = wait(tr.getRange(restoreWorkersKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!agentValues.more);
|
||||
// If agentValues.size() < min_num_workers, we should wait for coming workers to register their workerInterface before we read them once for all
|
||||
if(agentValues.size() >= min_num_workers) {
|
||||
for(auto& it : agentValues) {
|
||||
agents.push_back(BinaryReader::fromStringRef<RestoreWorkerInterface>(it.value, IncludeVersion()));
|
||||
// Save the RestoreWorkerInterface for the later operations
|
||||
self->workerInterfaces.insert(std::make_pair(agents.back().id(), agents.back()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
wait( delay(5.0) );
|
||||
} catch( Error &e ) {
|
||||
wait( tr.onError(e) );
|
||||
}
|
||||
}
|
||||
ASSERT(agents.size() >= min_num_workers); // ASSUMPTION: We must have at least 1 loader and 1 applier
|
||||
|
||||
TraceEvent("FastRestore").detail("CollectWorkerInterfaceNumWorkers", self->workerInterfaces.size());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
||||
// Periodically send worker heartbeat to
|
||||
ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self) {
|
||||
ASSERT( !self->workerInterfaces.empty() );
|
||||
|
||||
state std::map<UID, RestoreWorkerInterface>::iterator workerInterf;
|
||||
loop {
|
||||
std::vector<std::pair<UID, RestoreSimpleRequest>> requests;
|
||||
for (auto& worker : self->workerInterfaces) {
|
||||
requests.push_back(std::make_pair(worker.first, RestoreSimpleRequest()));
|
||||
}
|
||||
wait( sendBatchRequests(&RestoreWorkerInterface::heartbeat, self->workerInterfaces, requests) );
|
||||
wait( delay(60.0) );
|
||||
}
|
||||
}
|
||||
|
||||
void initRestoreWorkerConfig() {
|
||||
opConfig.num_loaders = g_network->isSimulated() ? 3 : opConfig.num_loaders;
|
||||
opConfig.num_appliers = g_network->isSimulated() ? 3 : opConfig.num_appliers;
|
||||
opConfig.transactionBatchSizeThreshold = g_network->isSimulated() ? 512 : opConfig.transactionBatchSizeThreshold; // Byte
|
||||
TraceEvent("FastRestore").detail("InitOpConfig", "Result")
|
||||
.detail("NumLoaders", opConfig.num_loaders).detail("NumAppliers", opConfig.num_appliers)
|
||||
.detail("TxnBatchSize", opConfig.transactionBatchSizeThreshold);
|
||||
}
|
||||
|
||||
// RestoreWorker that has restore master role: Recruite a role for each worker
|
||||
ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> self) {
|
||||
TraceEvent("FastRestore").detail("RecruitRestoreRoles", self->workerInterfaces.size())
|
||||
.detail("NumLoaders", opConfig.num_loaders).detail("NumAppliers", opConfig.num_appliers);
|
||||
|
||||
ASSERT( self->masterData.isValid() );
|
||||
ASSERT( opConfig.num_loaders > 0 && opConfig.num_appliers > 0 );
|
||||
ASSERT( opConfig.num_loaders + opConfig.num_appliers <= self->workerInterfaces.size() ); // We assign 1 role per worker for now
|
||||
|
||||
// Assign a role to each worker
|
||||
state int nodeIndex = 0;
|
||||
state RestoreRole role;
|
||||
std::map<UID, RestoreRecruitRoleRequest> requests;
|
||||
for (auto &workerInterf : self->workerInterfaces) {
|
||||
if ( nodeIndex >= 0 && nodeIndex < opConfig.num_appliers ) {
|
||||
// [0, numApplier) are appliers
|
||||
role = RestoreRole::Applier;
|
||||
} else if ( nodeIndex >= opConfig.num_appliers && nodeIndex < opConfig.num_loaders + opConfig.num_appliers ) {
|
||||
// [numApplier, numApplier + numLoader) are loaders
|
||||
role = RestoreRole::Loader;
|
||||
}
|
||||
|
||||
TraceEvent("FastRestore").detail("Role", getRoleStr(role)).detail("WorkerNode", workerInterf.first);
|
||||
requests[workerInterf.first] = RestoreRecruitRoleRequest(role, nodeIndex);
|
||||
nodeIndex++;
|
||||
}
|
||||
|
||||
state std::vector<RestoreRecruitRoleReply> replies;
|
||||
wait( getBatchReplies(&RestoreWorkerInterface::recruitRole, self->workerInterfaces, requests, &replies) );
|
||||
for (auto& reply : replies) {
|
||||
if ( reply.role == RestoreRole::Applier ) {
|
||||
ASSERT_WE_THINK(reply.applier.present());
|
||||
self->masterData->appliersInterf[reply.applier.get().id()] = reply.applier.get();
|
||||
} else if ( reply.role == RestoreRole::Loader ) {
|
||||
ASSERT_WE_THINK(reply.loader.present());
|
||||
self->masterData->loadersInterf[reply.loader.get().id()] = reply.loader.get();
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestore").detail("RecruitRestoreRoles_InvalidRole", reply.role);
|
||||
}
|
||||
}
|
||||
TraceEvent("FastRestore").detail("RecruitRestoreRolesDone", self->workerInterfaces.size());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> distributeRestoreSysInfo(Reference<RestoreWorkerData> self) {
|
||||
ASSERT( self->masterData.isValid() );
|
||||
ASSERT( !self->masterData->loadersInterf.empty() );
|
||||
RestoreSysInfo sysInfo(self->masterData->appliersInterf);
|
||||
std::vector<std::pair<UID, RestoreSysInfoRequest>> requests;
|
||||
for (auto &worker : self->workerInterfaces) {
|
||||
requests.push_back( std::make_pair(worker.first, RestoreSysInfoRequest(sysInfo)) );
|
||||
}
|
||||
|
||||
TraceEvent("FastRestore").detail("DistributeRestoreSysInfo", self->workerInterfaces.size());
|
||||
wait( sendBatchRequests(&RestoreWorkerInterface::updateRestoreSysInfo, self->workerInterfaces, requests) );
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// RestoreWorkerLeader is the worker that runs RestoreMaster role
|
||||
ACTOR Future<Void> startRestoreWorkerLeader(Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx) {
|
||||
self->masterData = Reference<RestoreMasterData>(new RestoreMasterData());
|
||||
// We must wait for enough time to make sure all restore workers have registered their workerInterfaces into the DB
|
||||
printf("[INFO][Master] NodeID:%s Restore master waits for agents to register their workerKeys\n",
|
||||
workerInterf.id().toString().c_str());
|
||||
wait( delay(10.0) );
|
||||
printf("[INFO][Master] NodeID:%s starts configuring roles for workers\n", workerInterf.id().toString().c_str());
|
||||
|
||||
wait( collectRestoreWorkerInterface(self, cx, opConfig.num_loaders + opConfig.num_appliers) );
|
||||
|
||||
// TODO: Needs to keep this monitor's future. May use actorCollection
|
||||
state Future<Void> workersFailureMonitor = monitorWorkerLiveness(self);
|
||||
|
||||
// recruitRestoreRoles must be after collectWorkerInterface
|
||||
wait( recruitRestoreRoles(self) );
|
||||
|
||||
wait( distributeRestoreSysInfo(self) );
|
||||
|
||||
wait( startRestoreMaster(self->masterData, cx) );
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startRestoreWorker(Reference<RestoreWorkerData> self, RestoreWorkerInterface interf, Database cx) {
|
||||
state double lastLoopTopTime;
|
||||
state ActorCollection actors(false); // Collect the main actor for each role
|
||||
state Future<Void> exitRole = Never();
|
||||
|
||||
loop {
|
||||
double loopTopTime = now();
|
||||
double elapsedTime = loopTopTime - lastLoopTopTime;
|
||||
if( elapsedTime > 0.050 ) {
|
||||
if (g_random->random01() < 0.01)
|
||||
TraceEvent(SevWarn, "SlowRestoreLoaderLoopx100").detail("NodeDesc", self->describeNode()).detail("Elapsed", elapsedTime);
|
||||
}
|
||||
lastLoopTopTime = loopTopTime;
|
||||
state std::string requestTypeStr = "[Init]";
|
||||
|
||||
try {
|
||||
choose {
|
||||
when ( RestoreSimpleRequest req = waitNext(interf.heartbeat.getFuture()) ) {
|
||||
requestTypeStr = "heartbeat";
|
||||
actors.add( handleHeartbeat(req, interf.id()) );
|
||||
}
|
||||
when ( RestoreRecruitRoleRequest req = waitNext(interf.recruitRole.getFuture()) ) {
|
||||
requestTypeStr = "recruitRole";
|
||||
actors.add( handleRecruitRoleRequest(req, self, &actors, cx) );
|
||||
}
|
||||
when ( RestoreSysInfoRequest req = waitNext(interf.updateRestoreSysInfo.getFuture()) ) {
|
||||
requestTypeStr = "updateRestoreSysInfo";
|
||||
actors.add( handleRestoreSysInfoRequest(req, self) );
|
||||
}
|
||||
when ( RestoreSimpleRequest req = waitNext(interf.terminateWorker.getFuture()) ) {
|
||||
// Destroy the worker at the end of the restore
|
||||
requestTypeStr = "terminateWorker";
|
||||
exitRole = handlerTerminateWorkerRequest(req, self, interf, cx);
|
||||
}
|
||||
when ( wait(exitRole) ) {
|
||||
TraceEvent("FastRestore").detail("RestoreWorkerCore", "ExitRole").detail("NodeID", self->id());
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (Error &e) {
|
||||
TraceEvent(SevWarn, "FastRestore").detail("RestoreWorkerError", e.what()).detail("RequestType", requestTypeStr);
|
||||
break;
|
||||
// if ( requestTypeStr.find("[Init]") != std::string::npos ) {
|
||||
// TraceEvent(SevError, "FastRestore").detail("RestoreWorkerUnexpectedExit", "RequestType_Init");
|
||||
// break;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> _restoreWorker(Database cx, LocalityData locality) {
|
||||
state ActorCollection actors(false);
|
||||
state Future<Void> myWork = Never();
|
||||
state Reference<AsyncVar<RestoreWorkerInterface>> leader = Reference<AsyncVar<RestoreWorkerInterface>>(
|
||||
new AsyncVar<RestoreWorkerInterface>() );
|
||||
|
||||
state RestoreWorkerInterface myWorkerInterf;
|
||||
myWorkerInterf.initEndpoints();
|
||||
state Reference<RestoreWorkerData> self = Reference<RestoreWorkerData>(new RestoreWorkerData());
|
||||
self->workerID = myWorkerInterf.id();
|
||||
initRestoreWorkerConfig();
|
||||
|
||||
wait( monitorleader(leader, cx, myWorkerInterf) );
|
||||
|
||||
printf("Wait for leader\n");
|
||||
wait(delay(1));
|
||||
if (leader->get() == myWorkerInterf) {
|
||||
// Restore master worker: doLeaderThings();
|
||||
myWork = startRestoreWorkerLeader(self, myWorkerInterf, cx);
|
||||
} else {
|
||||
// Restore normal worker (for RestoreLoader and RestoreApplier roles): doWorkerThings();
|
||||
myWork = startRestoreWorker(self, myWorkerInterf, cx);
|
||||
}
|
||||
|
||||
wait(myWork);
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// RestoreMaster is the leader
|
||||
ACTOR Future<Void> monitorleader(Reference<AsyncVar<RestoreWorkerInterface>> leader, Database cx, RestoreWorkerInterface myWorkerInterf) {
|
||||
state ReadYourWritesTransaction tr(cx);
|
||||
//state Future<Void> leaderWatch;
|
||||
state RestoreWorkerInterface leaderInterf;
|
||||
loop {
|
||||
try {
|
||||
tr.reset();
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
Optional<Value> leaderValue = wait(tr.get(restoreLeaderKey));
|
||||
if(leaderValue.present()) {
|
||||
leaderInterf = BinaryReader::fromStringRef<RestoreWorkerInterface>(leaderValue.get(), IncludeVersion());
|
||||
// Register my interface as an worker
|
||||
tr.set(restoreWorkerKeyFor(myWorkerInterf.id()), restoreWorkerInterfaceValue(myWorkerInterf));
|
||||
} else {
|
||||
// Workers compete to be the leader
|
||||
tr.set(restoreLeaderKey, BinaryWriter::toValue(myWorkerInterf, IncludeVersion()));
|
||||
leaderInterf = myWorkerInterf;
|
||||
}
|
||||
wait( tr.commit() );
|
||||
leader->set(leaderInterf);
|
||||
break;
|
||||
} catch( Error &e ) {
|
||||
wait( tr.onError(e) );
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> ccf, LocalityData locality) {
|
||||
Database cx = Database::createDatabase(ccf->getFilename(), Database::API_VERSION_LATEST,locality);
|
||||
wait(_restoreWorker(cx, locality));
|
||||
return Void();
|
||||
}
|
||||
|
|
@ -0,0 +1,544 @@
|
|||
/*
|
||||
* RestoreWorkerInterface.actor.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// This file declare and define the interface for RestoreWorker and restore roles
|
||||
// which are RestoreMaster, RestoreLoader, and RestoreApplier
|
||||
|
||||
|
||||
#pragma once
|
||||
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_G_H)
|
||||
#define FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_G_H
|
||||
#include "fdbserver/RestoreWorkerInterface.actor.g.h"
|
||||
#elif !defined(FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_H)
|
||||
#define FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_H
|
||||
|
||||
|
||||
#include <sstream>
|
||||
#include "flow/Stats.h"
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/CommitTransaction.h"
|
||||
#include "fdbserver/CoordinationInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/RestoreUtil.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
#define DUMPTOKEN( name ) TraceEvent("DumpToken", recruited.id()).detail("Name", #name).detail("Token", name.getEndpoint().token)
|
||||
|
||||
class RestoreConfig;
|
||||
|
||||
struct RestoreCommonReply;
|
||||
struct RestoreRecruitRoleRequest;
|
||||
struct RestoreSysInfoRequest;
|
||||
struct RestoreLoadFileRequest;
|
||||
struct RestoreVersionBatchRequest;
|
||||
struct RestoreSendMutationVectorVersionedRequest;
|
||||
struct RestoreSetApplierKeyRangeVectorRequest;
|
||||
struct RestoreSysInfo;
|
||||
struct RestoreApplierInterface;
|
||||
|
||||
|
||||
struct RestoreSysInfo {
|
||||
std::map<UID, RestoreApplierInterface> appliers;
|
||||
|
||||
RestoreSysInfo() = default;
|
||||
explicit RestoreSysInfo(std::map<UID, RestoreApplierInterface> appliers) : appliers(appliers) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, appliers);
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreWorkerInterface {
|
||||
UID interfID;
|
||||
|
||||
RequestStream<RestoreSimpleRequest> heartbeat;
|
||||
RequestStream<RestoreRecruitRoleRequest> recruitRole;
|
||||
RequestStream<RestoreSysInfoRequest> updateRestoreSysInfo;
|
||||
RequestStream<RestoreSimpleRequest> terminateWorker;
|
||||
|
||||
bool operator == (RestoreWorkerInterface const& r) const { return id() == r.id(); }
|
||||
bool operator != (RestoreWorkerInterface const& r) const { return id() != r.id(); }
|
||||
|
||||
UID id() const { return interfID; } //cmd.getEndpoint().token;
|
||||
|
||||
NetworkAddress address() const { return recruitRole.getEndpoint().addresses.address; }
|
||||
|
||||
void initEndpoints() {
|
||||
heartbeat.getEndpoint( TaskClusterController );
|
||||
recruitRole.getEndpoint( TaskClusterController );// Q: Why do we need this?
|
||||
updateRestoreSysInfo.getEndpoint(TaskClusterController);
|
||||
terminateWorker.getEndpoint( TaskClusterController );
|
||||
|
||||
interfID = g_random->randomUniqueID();
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, interfID, heartbeat, updateRestoreSysInfo, recruitRole, terminateWorker);
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreRoleInterface {
|
||||
UID nodeID;
|
||||
RestoreRole role;
|
||||
|
||||
RestoreRoleInterface() {
|
||||
role = RestoreRole::Invalid;
|
||||
}
|
||||
|
||||
explicit RestoreRoleInterface(RestoreRoleInterface const& interf) : nodeID(interf.nodeID), role(interf.role) {};
|
||||
|
||||
UID id() const { return nodeID; }
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "Role:" << getRoleStr(role) << " interfID:" << nodeID.toString();
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, nodeID, role);
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreLoaderInterface : RestoreRoleInterface {
|
||||
RequestStream<RestoreSimpleRequest> heartbeat;
|
||||
RequestStream<RestoreSetApplierKeyRangeVectorRequest> setApplierKeyRangeVectorRequest;
|
||||
RequestStream<RestoreLoadFileRequest> loadFile;
|
||||
RequestStream<RestoreVersionBatchRequest> initVersionBatch;
|
||||
RequestStream<RestoreSimpleRequest> collectRestoreRoleInterfaces; // TODO: Change to collectRestoreRoleInterfaces
|
||||
RequestStream<RestoreVersionBatchRequest> finishRestore;
|
||||
|
||||
bool operator == (RestoreWorkerInterface const& r) const { return id() == r.id(); }
|
||||
bool operator != (RestoreWorkerInterface const& r) const { return id() != r.id(); }
|
||||
|
||||
RestoreLoaderInterface () {
|
||||
role = RestoreRole::Loader;
|
||||
nodeID = g_random->randomUniqueID();
|
||||
}
|
||||
|
||||
NetworkAddress address() const { return heartbeat.getEndpoint().addresses.address; }
|
||||
|
||||
void initEndpoints() {
|
||||
heartbeat.getEndpoint( TaskClusterController );
|
||||
setApplierKeyRangeVectorRequest.getEndpoint( TaskClusterController );
|
||||
loadFile.getEndpoint( TaskClusterController );
|
||||
initVersionBatch.getEndpoint( TaskClusterController );
|
||||
collectRestoreRoleInterfaces.getEndpoint( TaskClusterController );
|
||||
finishRestore.getEndpoint( TaskClusterController );
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, * (RestoreRoleInterface*) this, heartbeat,
|
||||
setApplierKeyRangeVectorRequest, loadFile,
|
||||
initVersionBatch, collectRestoreRoleInterfaces, finishRestore);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct RestoreApplierInterface : RestoreRoleInterface {
|
||||
RequestStream<RestoreSimpleRequest> heartbeat;
|
||||
RequestStream<RestoreSendMutationVectorVersionedRequest> sendMutationVector;
|
||||
RequestStream<RestoreVersionBatchRequest> applyToDB;
|
||||
RequestStream<RestoreVersionBatchRequest> initVersionBatch;
|
||||
RequestStream<RestoreSimpleRequest> collectRestoreRoleInterfaces;
|
||||
RequestStream<RestoreVersionBatchRequest> finishRestore;
|
||||
|
||||
|
||||
bool operator == (RestoreWorkerInterface const& r) const { return id() == r.id(); }
|
||||
bool operator != (RestoreWorkerInterface const& r) const { return id() != r.id(); }
|
||||
|
||||
RestoreApplierInterface() {
|
||||
role = RestoreRole::Applier;
|
||||
nodeID = g_random->randomUniqueID();
|
||||
}
|
||||
|
||||
NetworkAddress address() const { return heartbeat.getEndpoint().addresses.address; }
|
||||
|
||||
void initEndpoints() {
|
||||
heartbeat.getEndpoint( TaskClusterController );
|
||||
sendMutationVector.getEndpoint( TaskClusterController );
|
||||
applyToDB.getEndpoint( TaskClusterController );
|
||||
initVersionBatch.getEndpoint( TaskClusterController );
|
||||
collectRestoreRoleInterfaces.getEndpoint( TaskClusterController );
|
||||
finishRestore.getEndpoint( TaskClusterController );
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, * (RestoreRoleInterface*) this, heartbeat,
|
||||
sendMutationVector, applyToDB, initVersionBatch, collectRestoreRoleInterfaces, finishRestore);
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
return nodeID.toString();
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: MX: It is probably better to specify the (beginVersion, endVersion] for each loadingParam. beginVersion (endVersion) is the version the applier is before (after) it receives the request.
|
||||
struct LoadingParam {
|
||||
bool isRangeFile;
|
||||
Key url;
|
||||
Version prevVersion;
|
||||
Version endVersion;
|
||||
Version version;
|
||||
std::string filename;
|
||||
int64_t offset;
|
||||
int64_t length;
|
||||
int64_t blockSize;
|
||||
KeyRange restoreRange;
|
||||
Key addPrefix;
|
||||
Key removePrefix;
|
||||
Key mutationLogPrefix;
|
||||
|
||||
// TODO: Compare all fields for loadingParam
|
||||
bool operator == ( const LoadingParam& r ) const { return isRangeFile == r.isRangeFile && filename == r.filename; }
|
||||
bool operator != ( const LoadingParam& r ) const { return isRangeFile != r.isRangeFile || filename != r.filename; }
|
||||
bool operator < ( const LoadingParam& r ) const {
|
||||
return (isRangeFile < r.isRangeFile) ||
|
||||
(isRangeFile == r.isRangeFile && filename < r.filename);
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, isRangeFile, url, prevVersion, endVersion, version, filename, offset, length, blockSize, restoreRange, addPrefix, removePrefix, mutationLogPrefix);
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream str;
|
||||
str << "isRangeFile:" << isRangeFile << "url:" << url.toString() << " prevVersion:" << prevVersion << " endVersion:" << endVersion << " version:" << version
|
||||
<< " filename:" << filename << " offset:" << offset << " length:" << length << " blockSize:" << blockSize
|
||||
<< " restoreRange:" << restoreRange.toString()
|
||||
<< " addPrefix:" << addPrefix.toString() << " removePrefix:" << removePrefix.toString();
|
||||
return str.str();
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreRecruitRoleReply : TimedRequest {
|
||||
UID id;
|
||||
RestoreRole role;
|
||||
Optional<RestoreLoaderInterface> loader;
|
||||
Optional<RestoreApplierInterface> applier;
|
||||
|
||||
RestoreRecruitRoleReply() = default;
|
||||
explicit RestoreRecruitRoleReply(UID id, RestoreRole role, RestoreLoaderInterface const& loader): id(id), role(role), loader(loader) {}
|
||||
explicit RestoreRecruitRoleReply(UID id, RestoreRole role, RestoreApplierInterface const& applier): id(id), role(role), applier(applier) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, id, role, loader, applier);
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "roleInterf role:" << getRoleStr(role) << " replyID:" << id.toString();
|
||||
if (loader.present()) {
|
||||
ss << "loader:" << loader.get().toString();
|
||||
}
|
||||
if (applier.present()) {
|
||||
ss << "applier:" << applier.get().toString();
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreRecruitRoleRequest : TimedRequest {
|
||||
RestoreRole role;
|
||||
int nodeIndex; // Each role is a node
|
||||
|
||||
ReplyPromise<RestoreRecruitRoleReply> reply;
|
||||
|
||||
RestoreRecruitRoleRequest() : role(RestoreRole::Invalid) {}
|
||||
explicit RestoreRecruitRoleRequest(RestoreRole role, int nodeIndex) : role(role), nodeIndex(nodeIndex){}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, role, nodeIndex, reply);
|
||||
}
|
||||
|
||||
std::string printable() {
|
||||
std::stringstream ss;
|
||||
ss << "RestoreRecruitRoleRequest Role:" << getRoleStr(role) << " NodeIndex:" << nodeIndex;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
return printable();
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreSysInfoRequest : TimedRequest {
|
||||
RestoreSysInfo sysInfo;
|
||||
|
||||
ReplyPromise<RestoreCommonReply> reply;
|
||||
|
||||
RestoreSysInfoRequest() = default;
|
||||
explicit RestoreSysInfoRequest(RestoreSysInfo sysInfo) : sysInfo(sysInfo) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, sysInfo, reply);
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "RestoreSysInfoRequest";
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Sample_Range_File and Assign_Loader_Range_File, Assign_Loader_Log_File
|
||||
struct RestoreLoadFileRequest : TimedRequest {
|
||||
LoadingParam param;
|
||||
|
||||
ReplyPromise<RestoreCommonReply> reply;
|
||||
|
||||
RestoreLoadFileRequest() = default;
|
||||
explicit RestoreLoadFileRequest(LoadingParam param) : param(param) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, param, reply);
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "RestoreLoadFileRequest param:" << param.toString();
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreSendMutationVectorVersionedRequest : TimedRequest {
|
||||
Version prevVersion, version; // version is the commitVersion of the mutation vector.
|
||||
bool isRangeFile;
|
||||
Standalone<VectorRef<MutationRef>> mutations; // All mutations are at version
|
||||
|
||||
ReplyPromise<RestoreCommonReply> reply;
|
||||
|
||||
RestoreSendMutationVectorVersionedRequest() = default;
|
||||
explicit RestoreSendMutationVectorVersionedRequest(Version prevVersion, Version version, bool isRangeFile, VectorRef<MutationRef> mutations) :
|
||||
prevVersion(prevVersion), version(version), isRangeFile(isRangeFile), mutations(mutations) {}
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "prevVersion:" << prevVersion << " version:" << version << " isRangeFile:" << isRangeFile << " mutations.size:" << mutations.size();
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, prevVersion, version, isRangeFile, mutations, reply);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct RestoreVersionBatchRequest : TimedRequest {
|
||||
int batchID;
|
||||
|
||||
ReplyPromise<RestoreCommonReply> reply;
|
||||
|
||||
RestoreVersionBatchRequest() = default;
|
||||
explicit RestoreVersionBatchRequest(int batchID) : batchID(batchID) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, batchID, reply);
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "RestoreVersionBatchRequest BatchID:" << batchID;
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreSetApplierKeyRangeVectorRequest : TimedRequest {
|
||||
std::map<Standalone<KeyRef>, UID> range2Applier;
|
||||
|
||||
ReplyPromise<RestoreCommonReply> reply;
|
||||
|
||||
RestoreSetApplierKeyRangeVectorRequest() = default;
|
||||
explicit RestoreSetApplierKeyRangeVectorRequest(std::map<Standalone<KeyRef>, UID> range2Applier) : range2Applier(range2Applier) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, range2Applier, reply);
|
||||
}
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "RestoreVersionBatchRequest range2ApplierSize:" << range2Applier.size();
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
struct RestoreRequest {
|
||||
//Database cx;
|
||||
int index;
|
||||
Key tagName;
|
||||
Key url;
|
||||
bool waitForComplete;
|
||||
Version targetVersion;
|
||||
bool verbose;
|
||||
KeyRange range;
|
||||
Key addPrefix;
|
||||
Key removePrefix;
|
||||
bool lockDB;
|
||||
UID randomUid;
|
||||
|
||||
int testData;
|
||||
std::vector<int> restoreRequests;
|
||||
//Key restoreTag;
|
||||
|
||||
ReplyPromise< struct RestoreReply > reply;
|
||||
|
||||
RestoreRequest() : testData(0) {}
|
||||
explicit RestoreRequest(int testData) : testData(testData) {}
|
||||
explicit RestoreRequest(int testData, std::vector<int> &restoreRequests) : testData(testData), restoreRequests(restoreRequests) {}
|
||||
|
||||
explicit RestoreRequest(const int index, const Key &tagName, const Key &url, bool waitForComplete, Version targetVersion, bool verbose,
|
||||
const KeyRange &range, const Key &addPrefix, const Key &removePrefix, bool lockDB,
|
||||
const UID &randomUid) : index(index), tagName(tagName), url(url), waitForComplete(waitForComplete),
|
||||
targetVersion(targetVersion), verbose(verbose), range(range),
|
||||
addPrefix(addPrefix), removePrefix(removePrefix), lockDB(lockDB),
|
||||
randomUid(randomUid) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, index , tagName , url , waitForComplete , targetVersion , verbose , range , addPrefix , removePrefix , lockDB , randomUid ,
|
||||
testData , restoreRequests , reply);
|
||||
}
|
||||
|
||||
//Q: Should I convert this toString() to a function to dump RestoreRequest to TraceEvent?
|
||||
std::string toString() const {
|
||||
std::stringstream ss;
|
||||
ss << "index:" << std::to_string(index) << " tagName:" << tagName.contents().toString() << " url:" << url.contents().toString()
|
||||
<< " waitForComplete:" << std::to_string(waitForComplete) << " targetVersion:" << std::to_string(targetVersion)
|
||||
<< " verbose:" << std::to_string(verbose) << " range:" << range.toString() << " addPrefix:" << addPrefix.contents().toString()
|
||||
<< " removePrefix:" << removePrefix.contents().toString() << " lockDB:" << std::to_string(lockDB) << " randomUid:" << randomUid.toString();
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct RestoreReply {
|
||||
int replyData;
|
||||
|
||||
RestoreReply() : replyData(0) {}
|
||||
explicit RestoreReply(int replyData) : replyData(replyData) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, replyData);
|
||||
}
|
||||
};
|
||||
|
||||
std::string getRoleStr(RestoreRole role);
|
||||
|
||||
////--- Interface functions
|
||||
Future<Void> _restoreWorker(Database const& cx, LocalityData const& locality);
|
||||
Future<Void> restoreWorker(Reference<ClusterConnectionFile> const& ccf, LocalityData const& locality);
|
||||
|
||||
|
||||
// Send each request in requests via channel of the request's interface.
|
||||
// Do not expect a meaningful reply
|
||||
// The UID in a request is the UID of the interface to handle the request
|
||||
ACTOR template <class Interface, class Request>
|
||||
//Future< REPLY_TYPE(Request) >
|
||||
Future<Void> sendBatchRequests(
|
||||
RequestStream<Request> Interface::* channel,
|
||||
std::map<UID, Interface> interfaces,
|
||||
std::vector<std::pair<UID, Request>> requests) {
|
||||
|
||||
if ( requests.empty() ) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
loop{
|
||||
try {
|
||||
std::vector<Future<REPLY_TYPE(Request)>> cmdReplies;
|
||||
for(auto& request : requests) {
|
||||
RequestStream<Request> const* stream = & (interfaces[request.first].*channel);
|
||||
cmdReplies.push_back( stream->getReply(request.second) );
|
||||
}
|
||||
|
||||
// Alex: Unless you want to do some action when it timeout multiple times, you should use timout. Otherwise, getReply will automatically keep retrying for you.
|
||||
std::vector<REPLY_TYPE(Request)> reps = wait( timeoutError(getAll(cmdReplies), SERVER_KNOBS->FASTRESTORE_FAILURE_TIMEOUT) ); //tryGetReply. Use GetReply. // Alex: you probably do NOT need the timeoutError.
|
||||
//wait( waitForAll(cmdReplies) ); //tryGetReply. Use GetReply. // Alex: you probably do NOT need the timeoutError.
|
||||
break;
|
||||
} catch (Error &e) {
|
||||
if ( e.code() == error_code_operation_cancelled ) break;
|
||||
fprintf(stdout, "sendBatchRequests Error code:%d, error message:%s\n", e.code(), e.what());
|
||||
for (auto& request : requests ) {
|
||||
TraceEvent(SevWarn, "FastRestore").detail("SendBatchRequests", requests.size())
|
||||
.detail("RequestID", request.first).detail("Request", request.second.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Similar to sendBatchRequests except that the caller expect to process the reply.
|
||||
// This actor can be combined with sendBatchRequests(...)
|
||||
ACTOR template <class Interface, class Request>
|
||||
//Future< REPLY_TYPE(Request) >
|
||||
Future<Void> getBatchReplies(
|
||||
RequestStream<Request> Interface::* channel,
|
||||
std::map<UID, Interface> interfaces,
|
||||
std::map<UID, Request> requests,
|
||||
std::vector<REPLY_TYPE(Request)>* replies) {
|
||||
|
||||
if ( requests.empty() ) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
loop{
|
||||
try {
|
||||
std::vector<Future<REPLY_TYPE(Request)>> cmdReplies;
|
||||
for(auto& request : requests) {
|
||||
RequestStream<Request> const* stream = & (interfaces[request.first].*channel);
|
||||
cmdReplies.push_back( stream->getReply(request.second) );
|
||||
}
|
||||
|
||||
// Alex: Unless you want to do some action when it timeout multiple times, you should use timout. Otherwise, getReply will automatically keep retrying for you.
|
||||
std::vector<REPLY_TYPE(Request)> reps = wait( timeoutError(getAll(cmdReplies), SERVER_KNOBS->FASTRESTORE_FAILURE_TIMEOUT) ); //tryGetReply. Use GetReply. // Alex: you probably do NOT need the timeoutError.
|
||||
*replies = reps;
|
||||
break;
|
||||
} catch (Error &e) {
|
||||
if ( e.code() == error_code_operation_cancelled ) break;
|
||||
fprintf(stdout, "getBatchReplies Error code:%d, error message:%s\n", e.code(), e.what());
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
|
@ -26,7 +26,6 @@
|
|||
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
|
||||
//A workload which test the correctness of backup and restore process
|
||||
struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
||||
double backupAfter, restoreAfter, abortAndRestartAfter;
|
||||
|
|
Loading…
Reference in New Issue