FastRestore: Refactor to smaller components

The current code uses one restore interface to handle the work
for all restore roles, i.e., master, loader and applier.
This makes it harder to review or maintain or scale.

This commit split the restore into multiple roles by mimicing FDB
transaction system:
1) It uses a RestoreWorker as the process to host restore roles;
   This commit assumes one restore role per RestoreWorker; but
   it should be easy to extend to support multiple roles per RestoreWorker;
2) It creates 3 restore roles:
   RestoreMaster: Coordinate the restore process and send commands to the other two roles;
   RestoreLoader: Parse backup files to mutations and send mutations to appliers;
   RestoreApplier: Sort received mutations and apply them to DB in order.

Compilable version. To be tested in correctness.
This commit is contained in:
Meng Xu 2019-05-09 20:55:44 -07:00
parent 25c75f4222
commit a08a6776f5
17 changed files with 4623 additions and 4042 deletions

View File

@ -18,6 +18,8 @@
* limitations under the License.
*/
#ifndef FDBCLIENT_BackupContainer_H
#define FDBCLIENT_BackupContainer_H
#pragma once
#include "flow/flow.h"
@ -27,6 +29,8 @@
#include "fdbclient/ReadYourWrites.h"
#include <vector>
class ReadYourWritesTransaction;
Future<Optional<int64_t>> timeKeeperEpochsFromVersion(Version const &v, Reference<ReadYourWritesTransaction> const &tr);
Future<Version> timeKeeperVersionFromDatetime(std::string const &datetime, Database const &db);
@ -255,3 +259,4 @@ private:
std::string URL;
};
#endif

View File

@ -601,6 +601,14 @@ const KeyRangeRef restoreWorkersKeys(
LiteralStringRef("\xff\x02/restoreWorkers/"),
LiteralStringRef("\xff\x02/restoreWorkers0")
);
const KeyRangeRef restoreLoaderKeys(
LiteralStringRef("\xff\x02/restoreLoaders/"),
LiteralStringRef("\xff\x02/restoreLoaders0")
);
const KeyRangeRef restoreApplierKeys(
LiteralStringRef("\xff\x02/restoreAppliers/"),
LiteralStringRef("\xff\x02/restoreAppliers0")
);
const KeyRef restoreStatusKey = LiteralStringRef("\xff\x02/restoreStatus/");
@ -611,24 +619,64 @@ const KeyRangeRef restoreRequestKeys(
LiteralStringRef("\xff\x02/restoreRequests0")
);
// Encode restore agent key for agentID
const Key restoreWorkerKeyFor( UID const& agentID ) {
// Encode restore worker key for workerID
const Key restoreWorkerKeyFor( UID const& workerID ) {
BinaryWriter wr(Unversioned());
wr.serializeBytes( restoreWorkersKeys.begin );
wr << agentID;
wr << workerID;
return wr.toValue();
}
// Encode restore role (loader or applier) for roleID
const Key restoreLoaderKeyFor( UID const& roleID ) {
BinaryWriter wr(Unversioned());
wr.serializeBytes( restoreLoaderKeys.begin );
wr << roleID;
return wr.toValue();
}
const Key restoreApplierKeyFor( UID const& roleID ) {
BinaryWriter wr(Unversioned());
wr.serializeBytes( restoreApplierKeys.begin );
wr << roleID;
return wr.toValue();
}
// Encode restore agent value
const Value restoreCommandInterfaceValue( RestoreInterface const& cmdInterf ) {
const Value restoreWorkerInterfaceValue( RestoreWorkerInterface const& cmdInterf ) {
BinaryWriter wr(IncludeVersion());
wr << cmdInterf;
return wr.toValue();
}
RestoreInterface decodeRestoreCommandInterfaceValue( ValueRef const& value ) {
RestoreInterface s;
RestoreWorkerInterface decodeRestoreWorkerInterfaceValue( ValueRef const& value ) {
RestoreWorkerInterface s;
BinaryReader reader( value, IncludeVersion() );
reader >> s;
return s;
}
const Value restoreLoaderInterfaceValue( RestoreLoaderInterface const& cmdInterf ) {
BinaryWriter wr(IncludeVersion());
wr << cmdInterf;
return wr.toValue();
}
RestoreLoaderInterface decodeRestoreLoaderInterfaceValue( ValueRef const& value ) {
RestoreLoaderInterface s;
BinaryReader reader( value, IncludeVersion() );
reader >> s;
return s;
}
const Value restoreApplierInterfaceValue( RestoreApplierInterface const& cmdInterf ) {
BinaryWriter wr(IncludeVersion());
wr << cmdInterf;
return wr.toValue();
}
RestoreApplierInterface decodeRestoreApplierInterfaceValue( ValueRef const& value ) {
RestoreApplierInterface s;
BinaryReader reader( value, IncludeVersion() );
reader >> s;
return s;

View File

@ -27,6 +27,9 @@
#include "fdbclient/FDBTypes.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbserver/RestoreWorkerInterface.h"
struct RestoreLoaderInterface;
struct RestoreApplierInterface;
struct RestoreMasterInterface;
extern const KeyRangeRef normalKeys; // '' to systemKeys.begin
extern const KeyRangeRef systemKeys; // [FF] to [FF][FF]
@ -275,6 +278,9 @@ extern const KeyRangeRef monitorConfKeys;
extern const KeyRef restoreLeaderKey;
extern const KeyRangeRef restoreWorkersKeys;
extern const KeyRangeRef restoreRolesKeys;
extern const KeyRangeRef restoreLoaderKeys;
extern const KeyRangeRef restoreApplierKeys;
extern const KeyRef restoreStatusKey;
@ -282,9 +288,16 @@ extern const KeyRef restoreRequestTriggerKey;
extern const KeyRef restoreRequestDoneKey;
extern const KeyRangeRef restoreRequestKeys;
const Key restoreWorkerKeyFor( UID const& agentID );
const Value restoreCommandInterfaceValue( RestoreInterface const& server );
RestoreInterface decodeRestoreCommandInterfaceValue( ValueRef const& value );
const Key restoreWorkerKeyFor( UID const& workerID );
const Key restoreLoaderKeyFor( UID const& roleID );
const Key restoreApplierKeyFor( UID const& roleID );
const Value restoreWorkerInterfaceValue(RestoreWorkerInterface const& server );
RestoreWorkerInterface decodeRestoreWorkerInterfaceValue( ValueRef const& value );
const Value restoreLoaderInterfaceValue(RestoreLoaderInterface const& server );
RestoreLoaderInterface decodeRestoreLoaderInterfaceValue( ValueRef const& value );
const Value restoreApplierInterfaceValue(RestoreApplierInterface const& server );
RestoreApplierInterface decodeRestoreApplierInterfaceValue( ValueRef const& value );
// MX: parallel restore
const Value restoreRequestTriggerValue (int const numRequests);

File diff suppressed because it is too large Load Diff

View File

@ -17,3 +17,453 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/SystemData.h"
// Backup agent header
#include "fdbclient/BackupAgent.actor.h"
//#include "FileBackupAgent.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/MutationList.h"
#include "fdbclient/BackupContainer.h"
#include "fdbserver/RestoreCommon.actor.h"
#include "fdbserver/RestoreUtil.h"
#include "fdbserver/RestoreRoleCommon.actor.h"
#include "fdbserver/RestoreApplier.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
ACTOR Future<Void> handleGetApplierKeyRangeRequest(RestoreGetApplierKeyRangeRequest req, Reference<RestoreApplierData> self);
ACTOR Future<Void> handleSetApplierKeyRangeRequest(RestoreSetApplierKeyRangeRequest req, Reference<RestoreApplierData> self);
ACTOR Future<Void> handleCalculateApplierKeyRangeRequest(RestoreCalculateApplierKeyRangeRequest req, Reference<RestoreApplierData> self);
ACTOR Future<Void> handleSendSampleMutationVectorRequest(RestoreSendMutationVectorRequest req, Reference<RestoreApplierData> self);
ACTOR Future<Void> handleSendMutationVectorRequest(RestoreSendMutationVectorRequest req, Reference<RestoreApplierData> self);
ACTOR Future<Void> handleApplyToDBRequest(RestoreSimpleRequest req, Reference<RestoreApplierData> self, Database cx);
ACTOR Future<Void> restoreApplierCore(Reference<RestoreApplierData> self, RestoreApplierInterface applierInterf, Database cx) {
state ActorCollection actors(false);
state double lastLoopTopTime;
loop {
double loopTopTime = now();
double elapsedTime = loopTopTime - lastLoopTopTime;
if( elapsedTime > 0.050 ) {
if (g_random->random01() < 0.01)
TraceEvent(SevWarn, "SlowRestoreLoaderLoopx100").detail("NodeDesc", self->describeNode()).detail("Elapsed", elapsedTime);
}
lastLoopTopTime = loopTopTime;
state std::string requestTypeStr = "[Init]";
try {
choose {
when ( RestoreSimpleRequest req = waitNext(applierInterf.heartbeat.getFuture()) ) {
requestTypeStr = "heartbeat";
wait(handleHeartbeat(req, applierInterf.id()));
}
when ( RestoreGetApplierKeyRangeRequest req = waitNext(applierInterf.getApplierKeyRangeRequest.getFuture()) ) {
requestTypeStr = "getApplierKeyRangeRequest";
wait(handleGetApplierKeyRangeRequest(req, self));
}
when ( RestoreSetApplierKeyRangeRequest req = waitNext(applierInterf.setApplierKeyRangeRequest.getFuture()) ) {
requestTypeStr = "setApplierKeyRangeRequest";
wait(handleSetApplierKeyRangeRequest(req, self));
}
when ( RestoreCalculateApplierKeyRangeRequest req = waitNext(applierInterf.calculateApplierKeyRange.getFuture()) ) {
requestTypeStr = "calculateApplierKeyRange";
wait(handleCalculateApplierKeyRangeRequest(req, self));
}
when ( RestoreSendMutationVectorRequest req = waitNext(applierInterf.sendSampleMutationVector.getFuture()) ) {
requestTypeStr = "sendSampleMutationVector";
actors.add( handleSendSampleMutationVectorRequest(req, self));
}
when ( RestoreSendMutationVectorRequest req = waitNext(applierInterf.sendMutationVector.getFuture()) ) {
requestTypeStr = "sendMutationVector";
actors.add( handleSendMutationVectorRequest(req, self) );
}
when ( RestoreSimpleRequest req = waitNext(applierInterf.applyToDB.getFuture()) ) {
requestTypeStr = "applyToDB";
actors.add( handleApplyToDBRequest(req, self, cx) );
}
when ( RestoreVersionBatchRequest req = waitNext(applierInterf.initVersionBatch.getFuture()) ) {
requestTypeStr = "initVersionBatch";
wait(handleInitVersionBatchRequest(req, self));
}
// TODO: To modify the interface for the following 2 when condition
when ( RestoreSimpleRequest req = waitNext(applierInterf.collectRestoreRoleInterfaces.getFuture()) ) {
// Step: Find other worker's workerInterfaces
// NOTE: This must be after wait(configureRolesHandler()) because we must ensure all workers have registered their workerInterfaces into DB before we can read the workerInterface.
// TODO: Wait until all workers have registered their workerInterface.
wait( handleCollectRestoreRoleInterfaceRequest(req, self, cx) );
}
}
} catch (Error &e) {
fprintf(stdout, "[ERROR] Loader handle received request:%s error. error code:%d, error message:%s\n",
requestTypeStr.c_str(), e.code(), e.what());
if ( requestTypeStr.find("[Init]") != std::string::npos ) {
printf("Exit due to error at requestType:%s", requestTypeStr.c_str());
break;
}
}
}
return Void();
}
ACTOR Future<Void> handleCalculateApplierKeyRangeRequest(RestoreCalculateApplierKeyRangeRequest req, Reference<RestoreApplierData> self) {
state int numMutations = 0;
state std::vector<Standalone<KeyRef>> keyRangeLowerBounds;
while (self->isInProgress(RestoreCommandEnum::Calculate_Applier_KeyRange)) {
printf("[DEBUG] NODE:%s Calculate_Applier_KeyRange wait for 5s\n", self->describeNode().c_str());
wait(delay(5.0));
}
wait( delay(1.0) );
// Handle duplicate message
// We need to recalculate the value for duplicate message! Because the reply to duplicate message may arrive earlier!
if (self->isCmdProcessed(req.cmdID) && !keyRangeLowerBounds.empty() ) {
printf("[DEBUG] Node:%s skip duplicate cmd:%s\n", self->describeNode().c_str(), req.cmdID.toString().c_str());
req.reply.send(GetKeyRangeNumberReply(keyRangeLowerBounds.size()));
return Void();
}
self->setInProgressFlag(RestoreCommandEnum::Calculate_Applier_KeyRange);
// Applier will calculate applier key range
printf("[INFO][Applier] CMD:%s, Node:%s Calculate key ranges for %d appliers\n",
req.cmdID.toString().c_str(), self->describeNode().c_str(), req.numAppliers);
//ASSERT(req.cmd == (RestoreCommandEnum) req.cmdID.phase);
if ( keyRangeLowerBounds.empty() ) {
keyRangeLowerBounds = self->calculateAppliersKeyRanges(req.numAppliers); // keyRangeIndex is the number of key ranges requested
self->keyRangeLowerBounds = keyRangeLowerBounds;
}
printf("[INFO][Applier] CMD:%s, NodeID:%s: num of key ranges:%ld\n",
req.cmdID.toString().c_str(), self->describeNode().c_str(), keyRangeLowerBounds.size());
req.reply.send(GetKeyRangeNumberReply(keyRangeLowerBounds.size()));
self->processedCmd[req.cmdID] = 1; // We should not skip this command in the following phase. Otherwise, the handler in other phases may return a wrong number of appliers
self->clearInProgressFlag(RestoreCommandEnum::Calculate_Applier_KeyRange);
return Void();
}
ACTOR Future<Void> handleGetApplierKeyRangeRequest(RestoreGetApplierKeyRangeRequest req, Reference<RestoreApplierData> self) {
state int numMutations = 0;
//state std::vector<Standalone<KeyRef>> keyRangeLowerBounds = self->keyRangeLowerBounds;
while (self->isInProgress(RestoreCommandEnum::Get_Applier_KeyRange)) {
printf("[DEBUG] NODE:%s Calculate_Applier_KeyRange wait for 5s\n", self->describeNode().c_str());
wait(delay(5.0));
}
wait( delay(1.0) );
//NOTE: Must reply a valid lowerBound and upperBound! Otherwise, the master will receive an invalid value!
// if (self->isCmdProcessed(req.cmdID) ) {
// printf("[DEBUG] Node:%s skip duplicate cmd:%s\n", self->describeNode().c_str(), req.cmdID.toString().c_str());
// req.reply.send(GetKeyRangeReply(workerInterf.id(), req.cmdID)); // Must wait until the previous command returns
// return Void();
// }
self->setInProgressFlag(RestoreCommandEnum::Get_Applier_KeyRange);
if ( req.applierIndex < 0 || req.applierIndex >= self->keyRangeLowerBounds.size() ) {
printf("[INFO][Applier] NodeID:%s Get_Applier_KeyRange keyRangeIndex is out of range. keyIndex:%d keyRagneSize:%ld\n",
self->describeNode().c_str(), req.applierIndex, self->keyRangeLowerBounds.size());
}
//ASSERT(req.cmd == (RestoreCommandEnum) req.cmdID.phase);
printf("[INFO][Applier] NodeID:%s replies Get_Applier_KeyRange. keyRangeIndex:%d lower_bound_of_keyRange:%s\n",
self->describeNode().c_str(), req.applierIndex, getHexString(self->keyRangeLowerBounds[req.applierIndex]).c_str());
KeyRef lowerBound = self->keyRangeLowerBounds[req.applierIndex];
KeyRef upperBound = (req.applierIndex + 1) < self->keyRangeLowerBounds.size() ? self->keyRangeLowerBounds[req.applierIndex+1] : normalKeys.end;
req.reply.send(GetKeyRangeReply(self->id(), req.cmdID, req.applierIndex, lowerBound, upperBound));
self->clearInProgressFlag(RestoreCommandEnum::Get_Applier_KeyRange);
return Void();
}
// Assign key range to applier
ACTOR Future<Void> handleSetApplierKeyRangeRequest(RestoreSetApplierKeyRangeRequest req, Reference<RestoreApplierData> self) {
// Idempodent operation. OK to re-execute the duplicate cmd
// The applier should remember the key range it is responsible for
//ASSERT(req.cmd == (RestoreCommandEnum) req.cmdID.phase);
//self->applierStatus.keyRange = req.range;
while (self->isInProgress(RestoreCommandEnum::Assign_Applier_KeyRange)) {
printf("[DEBUG] NODE:%s handleSetApplierKeyRangeRequest wait for 1s\n", self->describeNode().c_str());
wait(delay(1.0));
}
if ( self->isCmdProcessed(req.cmdID) ) {
req.reply.send(RestoreCommonReply(self->id(),req.cmdID));
return Void();
}
self->setInProgressFlag(RestoreCommandEnum::Assign_Applier_KeyRange);
self->range2Applier[req.range.begin] = req.applierID;
self->processedCmd[req.cmdID] = 1;
self->clearInProgressFlag(RestoreCommandEnum::Assign_Applier_KeyRange);
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
return Void();
}
// Applier receive mutation from loader
ACTOR Future<Void> handleSendMutationVectorRequest(RestoreSendMutationVectorRequest req, Reference<RestoreApplierData> self) {
state int numMutations = 0;
//wait( delay(1.0) ); //Q: Why adding this delay will cause segmentation fault?
if ( debug_verbose ) {
printf("[VERBOSE_DEBUG] Node:%s receive mutation number:%d\n", self->describeNode().c_str(), req.mutations.size());
}
// NOTE: We have insert operation to self->kvOps. For the same worker, we should only allow one actor of this kind to run at any time!
// Otherwise, race condition may happen!
while (self->isInProgress(RestoreCommandEnum::Loader_Send_Mutations_To_Applier)) {
printf("[DEBUG] NODE:%s sendMutation wait for 1s\n", self->describeNode().c_str());
wait(delay(1.0));
}
// Handle duplicat cmd
if ( self->isCmdProcessed(req.cmdID) ) {
//printf("[DEBUG] NODE:%s skip duplicate cmd:%s\n", self->describeNode().c_str(), req.cmdID.toString().c_str());
//printf("[DEBUG] Skipped mutation:%s\n", req.mutation.toString().c_str());
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
return Void();
}
self->setInProgressFlag(RestoreCommandEnum::Loader_Send_Mutations_To_Applier);
// Applier will cache the mutations at each version. Once receive all mutations, applier will apply them to DB
state uint64_t commitVersion = req.commitVersion;
VectorRef<MutationRef> mutations(req.mutations);
printf("[DEBUG] Node:%s receive %d mutations at version:%ld\n", self->describeNode().c_str(), mutations.size(), commitVersion);
if ( self->kvOps.find(commitVersion) == self->kvOps.end() ) {
self->kvOps.insert(std::make_pair(commitVersion, VectorRef<MutationRef>()));
}
state int mIndex = 0;
for (mIndex = 0; mIndex < mutations.size(); mIndex++) {
MutationRef mutation = mutations[mIndex];
self->kvOps[commitVersion].push_back_deep(self->kvOps[commitVersion].arena(), mutation);
numMutations++;
if ( numMutations % 100000 == 1 ) { // Should be different value in simulation and in real mode
printf("[INFO][Applier] Node:%s Receives %d mutations. cur_mutation:%s\n",
self->describeNode().c_str(), numMutations, mutation.toString().c_str());
}
}
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
// Avoid race condition when this actor is called twice on the same command
self->processedCmd[req.cmdID] = 1;
self->clearInProgressFlag(RestoreCommandEnum::Loader_Send_Mutations_To_Applier);
return Void();
}
ACTOR Future<Void> handleSendSampleMutationVectorRequest(RestoreSendMutationVectorRequest req, Reference<RestoreApplierData> self) {
state int numMutations = 0;
self->numSampledMutations = 0;
//wait( delay(1.0) );
//ASSERT(req.cmd == (RestoreCommandEnum) req.cmdID.phase);
// NOTE: We have insert operation to self->kvOps. For the same worker, we should only allow one actor of this kind to run at any time!
// Otherwise, race condition may happen!
while (self->isInProgress(RestoreCommandEnum::Loader_Send_Sample_Mutation_To_Applier)) {
printf("[DEBUG] NODE:%s handleSendSampleMutationVectorRequest wait for 1s\n", self->describeNode().c_str());
wait(delay(1.0));
}
// Handle duplicate message
if (self->isCmdProcessed(req.cmdID)) {
printf("[DEBUG] NODE:%s skip duplicate cmd:%s\n", self->describeNode().c_str(), req.cmdID.toString().c_str());
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
return Void();
}
self->setInProgressFlag(RestoreCommandEnum::Loader_Send_Sample_Mutation_To_Applier);
// Applier will cache the mutations at each version. Once receive all mutations, applier will apply them to DB
state uint64_t commitVersion = req.commitVersion;
// TODO: Change the req.mutation to a vector of mutations
VectorRef<MutationRef> mutations(req.mutations);
state int mIndex = 0;
for (mIndex = 0; mIndex < mutations.size(); mIndex++) {
MutationRef mutation = mutations[mIndex];
if ( self->keyOpsCount.find(mutation.param1) == self->keyOpsCount.end() ) {
self->keyOpsCount.insert(std::make_pair(mutation.param1, 0));
}
// NOTE: We may receive the same mutation more than once due to network package lost.
// Since sampling is just an estimation and the network should be stable enough, we do NOT handle the duplication for now
// In a very unreliable network, we may get many duplicate messages and get a bad key-range splits for appliers. But the restore should still work except for running slower.
self->keyOpsCount[mutation.param1]++;
self->numSampledMutations++;
if ( debug_verbose && self->numSampledMutations % 1000 == 1 ) {
printf("[Sampling][Applier] Node:%s Receives %d sampled mutations. cur_mutation:%s\n",
self->describeNode().c_str(), self->numSampledMutations, mutation.toString().c_str());
}
}
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
self->processedCmd[req.cmdID] = 1;
self->clearInProgressFlag(RestoreCommandEnum::Loader_Send_Sample_Mutation_To_Applier);
return Void();
}
ACTOR Future<Void> handleApplyToDBRequest(RestoreSimpleRequest req, Reference<RestoreApplierData> self, Database cx) {
state bool isPrint = false; //Debug message
state std::string typeStr = "";
// Wait in case the applyToDB request was delivered twice;
while (self->inProgressApplyToDB) {
printf("[DEBUG] NODE:%s inProgressApplyToDB wait for 5s\n", self->describeNode().c_str());
wait(delay(5.0));
}
if ( self->isCmdProcessed(req.cmdID) ) {
printf("[DEBUG] NODE:%s skip duplicate cmd:%s\n", self->describeNode().c_str(), req.cmdID.toString().c_str());
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
return Void();
}
self->inProgressApplyToDB = true;
// Assume the process will not crash when it apply mutations to DB. The reply message can be lost though
if (self->kvOps.empty()) {
printf("Node:%s kvOps is empty. No-op for apply to DB\n", self->describeNode().c_str());
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
self->processedCmd[req.cmdID] = 1;
self->inProgressApplyToDB = false;
return Void();
}
self->sanityCheckMutationOps();
if ( debug_verbose ) {
TraceEvent("ApplyKVOPsToDB").detail("MapSize", self->kvOps.size());
printf("ApplyKVOPsToDB num_of_version:%ld\n", self->kvOps.size());
}
state std::map<Version, Standalone<VectorRef<MutationRef>>>::iterator it = self->kvOps.begin();
state std::map<Version, Standalone<VectorRef<MutationRef>>>::iterator prevIt = it;
state int index = 0;
state int prevIndex = index;
state int count = 0;
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
state int numVersion = 0;
state double transactionSize = 0;
loop {
try {
tr->reset();
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
transactionSize = 0;
for ( ; it != self->kvOps.end(); ++it ) {
numVersion++;
if ( debug_verbose ) {
TraceEvent("ApplyKVOPsToDB\t").detail("Version", it->first).detail("OpNum", it->second.size());
}
//printf("ApplyKVOPsToDB numVersion:%d Version:%08lx num_of_ops:%d, \n", numVersion, it->first, it->second.size());
state MutationRef m;
for ( ; index < it->second.size(); ++index ) {
m = it->second[index];
if ( m.type >= MutationRef::Type::SetValue && m.type <= MutationRef::Type::MAX_ATOMIC_OP )
typeStr = typeString[m.type];
else {
printf("ApplyKVOPsToDB MutationType:%d is out of range\n", m.type);
}
if ( debug_verbose && count % 1000 == 1 ) {
printf("ApplyKVOPsToDB Node:%s num_mutation:%d Version:%08lx num_of_ops:%d\n",
self->describeNode().c_str(), count, it->first, it->second.size());
}
if ( debug_verbose ) {
printf("[VERBOSE_DEBUG] Node:%s apply mutation:%s\n", self->describeNode().c_str(), m.toString().c_str());
}
if ( m.type == MutationRef::SetValue ) {
tr->set(m.param1, m.param2);
} else if ( m.type == MutationRef::ClearRange ) {
KeyRangeRef mutationRange(m.param1, m.param2);
tr->clear(mutationRange);
} else if ( isAtomicOp((MutationRef::Type) m.type) ) {
//// Now handle atomic operation from this if statement
// TODO: Have not de-duplicated the mutations for multiple network delivery
// ATOMIC_MASK = (1 << AddValue) | (1 << And) | (1 << Or) | (1 << Xor) | (1 << AppendIfFits) | (1 << Max) | (1 << Min) | (1 << SetVersionstampedKey) | (1 << SetVersionstampedValue) | (1 << ByteMin) | (1 << ByteMax) | (1 << MinV2) | (1 << AndV2),
//atomicOp( const KeyRef& key, const ValueRef& operand, uint32_t operationType )
tr->atomicOp(m.param1, m.param2, m.type);
} else {
printf("[WARNING] mtype:%d (%s) unhandled\n", m.type, typeStr.c_str());
}
++count;
transactionSize += m.expectedSize();
if ( transactionSize >= transactionBatchSizeThreshold ) { // commit per 1000 mutations
wait(tr->commit());
tr->reset();
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
prevIt = it;
prevIndex = index;
transactionSize = 0;
}
if ( isPrint ) {
printf("\tApplyKVOPsToDB Version:%016lx MType:%s K:%s, V:%s K_size:%d V_size:%d\n", it->first, typeStr.c_str(),
getHexString(m.param1).c_str(), getHexString(m.param2).c_str(), m.param1.size(), m.param2.size());
TraceEvent("ApplyKVOPsToDB\t\t").detail("Version", it->first)
.detail("MType", m.type).detail("MTypeStr", typeStr)
.detail("MKey", getHexString(m.param1))
.detail("MValueSize", m.param2.size())
.detail("MValue", getHexString(m.param2));
}
}
index = 0;
}
// Last transaction
if (transactionSize > 0) {
wait(tr->commit());
}
break;
} catch(Error &e) {
printf("ApplyKVOPsToDB transaction error:%s.\n", e.what());
wait(tr->onError(e));
it = prevIt;
index = prevIndex;
transactionSize = 0;
}
}
self->kvOps.clear();
printf("Node:%s ApplyKVOPsToDB number of kv mutations:%d\n", self->describeNode().c_str(), count);
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
printf("self->processedCmd size:%d req.cmdID:%s\n", self->processedCmd.size(), req.cmdID.toString().c_str());
self->processedCmd[req.cmdID] = 1;
self->inProgressApplyToDB = false;
return Void();
}

View File

@ -21,7 +21,7 @@
// Declear RestoreApplier interface and actors
#pragma once
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RestoreApplierInterface_H)
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RestoreApplierInterface_G_H)
#define FDBSERVER_RestoreApplierInterface_G_H
#include "fdbserver/RestoreApplier.actor.g.h"
#elif !defined(FDBSERVER_RestoreApplierInterface_H)
@ -35,5 +35,150 @@
#include "fdbserver/CoordinationInterface.h"
#include "fdbrpc/Locality.h"
#include "fdbserver/RestoreUtil.h"
#include "fdbserver/RestoreRoleCommon.actor.h"
#include "fdbserver/RestoreWorkerInterface.h"
#include "flow/actorcompiler.h" // has to be last include
extern double transactionBatchSizeThreshold;
struct RestoreApplierData : RestoreRoleData, public ReferenceCounted<RestoreApplierData> {
// range2Applier is in master and loader node. Loader node uses this to determine which applier a mutation should be sent
std::map<Standalone<KeyRef>, UID> range2Applier; // KeyRef is the inclusive lower bound of the key range the applier (UID) is responsible for
std::map<Standalone<KeyRef>, int> keyOpsCount; // The number of operations per key which is used to determine the key-range boundary for appliers
int numSampledMutations; // The total number of mutations received from sampled data.
// For master applier to hold the lower bound of key ranges for each appliers
std::vector<Standalone<KeyRef>> keyRangeLowerBounds;
// TODO: This block of variables may be moved to RestoreRoleData
bool inProgressApplyToDB = false;
// Temporary data structure for parsing range and log files into (version, <K, V, mutationType>)
std::map<Version, Standalone<VectorRef<MutationRef>>> kvOps;
void addref() { return ReferenceCounted<RestoreApplierData>::addref(); }
void delref() { return ReferenceCounted<RestoreApplierData>::delref(); }
RestoreApplierData() {
nodeID = g_random->randomUniqueID();
nodeIndex = 0;
}
~RestoreApplierData() {}
std::string describeNode() {
std::stringstream ss;
ss << "NodeID:" << nodeID.toString() << " nodeIndex:" << nodeIndex;
return ss.str();
}
void resetPerVersionBatch() {
RestoreRoleData::resetPerVersionBatch();
inProgressApplyToDB = false;
kvOps.clear();
}
void sanityCheckMutationOps() {
if (kvOps.empty())
return;
if ( isKVOpsSorted() ) {
printf("[CORRECT] KVOps is sorted by version\n");
} else {
printf("[ERROR]!!! KVOps is NOT sorted by version\n");
}
if ( allOpsAreKnown() ) {
printf("[CORRECT] KVOps all operations are known.\n");
} else {
printf("[ERROR]!!! KVOps has unknown mutation op. Exit...\n");
}
}
bool isKVOpsSorted() {
bool ret = true;
auto prev = kvOps.begin();
for ( auto it = kvOps.begin(); it != kvOps.end(); ++it ) {
if ( prev->first > it->first ) {
ret = false;
break;
}
prev = it;
}
return ret;
}
bool allOpsAreKnown() {
bool ret = true;
for ( auto it = kvOps.begin(); it != kvOps.end(); ++it ) {
for ( auto m = it->second.begin(); m != it->second.end(); ++m ) {
if ( m->type == MutationRef::SetValue || m->type == MutationRef::ClearRange
|| isAtomicOp((MutationRef::Type) m->type) )
continue;
else {
printf("[ERROR] Unknown mutation type:%d\n", m->type);
ret = false;
}
}
}
return ret;
}
std::vector<Standalone<KeyRef>> calculateAppliersKeyRanges(int numAppliers) {
ASSERT(numAppliers > 0);
std::vector<Standalone<KeyRef>> lowerBounds;
int numSampledMutations = 0;
for (auto &count : keyOpsCount) {
numSampledMutations += count.second;
}
//intervalLength = (numSampledMutations - remainder) / (numApplier - 1)
int intervalLength = std::max(numSampledMutations / numAppliers, 1); // minimal length is 1
int curCount = 0;
int curInterval = 0;
printf("[INFO] Node:%s calculateAppliersKeyRanges(): numSampledMutations:%d numAppliers:%d intervalLength:%d\n",
describeNode().c_str(),
numSampledMutations, numAppliers, intervalLength);
for (auto &count : keyOpsCount) {
if (curCount >= curInterval * intervalLength) {
printf("[INFO] Node:%s calculateAppliersKeyRanges(): Add a new key range [%d]:%s: curCount:%d\n",
describeNode().c_str(), curInterval, count.first.toString().c_str(), curCount);
lowerBounds.push_back(count.first); // The lower bound of the current key range
curInterval++;
}
curCount += count.second;
}
if ( lowerBounds.size() != numAppliers ) {
printf("[WARNING] calculateAppliersKeyRanges() WE MAY NOT USE ALL APPLIERS efficiently! num_keyRanges:%ld numAppliers:%d\n",
lowerBounds.size(), numAppliers);
printLowerBounds(lowerBounds);
}
//ASSERT(lowerBounds.size() <= numAppliers + 1); // We may have at most numAppliers + 1 key ranges
if ( lowerBounds.size() >= numAppliers ) {
printf("[WARNING] Key ranges number:%ld > numAppliers:%d. Merge the last ones\n", lowerBounds.size(), numAppliers);
}
while ( lowerBounds.size() >= numAppliers ) {
printf("[WARNING] Key ranges number:%ld > numAppliers:%d. Merge the last ones\n", lowerBounds.size(), numAppliers);
lowerBounds.pop_back();
}
return lowerBounds;
}
};
ACTOR Future<Void> restoreApplierCore(Reference<RestoreApplierData> self, RestoreApplierInterface applierInterf, Database cx);
#include "flow/unactorcompiler.h"
#endif

View File

@ -32,7 +32,7 @@
#include "fdbrpc/IAsyncFile.h"
#include "fdbclient/BackupAgent.actor.h"
#include "flow/genericactors.actor.h"
#include "flow/actorcompiler.h" // has to be last include
// RestoreConfig copied from FileBackupAgent.actor.cpp
// We copy RestoreConfig instead of using (and potentially changing) it in place to avoid conflict with the existing code

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@
// Declear RestoreLoader interface and actors
#pragma once
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RestoreLoaderInterface_H)
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RestoreLoaderInterface_G_H)
#define FDBSERVER_RestoreLoaderInterface_G_H
#include "fdbserver/RestoreLoader.actor.g.h"
#elif !defined(FDBSERVER_RestoreLoaderInterface_H)
@ -35,5 +35,111 @@
#include "fdbserver/CoordinationInterface.h"
#include "fdbrpc/Locality.h"
#include "fdbserver/RestoreUtil.h"
#include "fdbserver/RestoreCommon.actor.h"
#include "fdbserver/RestoreRoleCommon.actor.h"
#include "fdbserver/RestoreWorkerInterface.h"
#include "fdbclient/BackupContainer.h"
#include "flow/actorcompiler.h" // has to be last include
struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoaderData> {
public:
// range2Applier is in master and loader node. Loader node uses this to determine which applier a mutation should be sent
std::map<Standalone<KeyRef>, UID> range2Applier; // KeyRef is the inclusive lower bound of the key range the applier (UID) is responsible for
std::map<Standalone<KeyRef>, int> keyOpsCount; // The number of operations per key which is used to determine the key-range boundary for appliers
int numSampledMutations; // The total number of mutations received from sampled data.
// Loader's state to handle the duplicate delivery of loading commands
std::map<std::string, int> processedFiles; //first is filename of processed file, second is not used
// Temporary data structure for parsing range and log files into (version, <K, V, mutationType>)
std::map<Version, Standalone<VectorRef<MutationRef>>> kvOps;
// Must use StandAlone to save mutations, otherwise, the mutationref memory will be corrupted
std::map<Standalone<StringRef>, Standalone<StringRef>> mutationMap; // Key is the unique identifier for a batch of mutation logs at the same version
std::map<Standalone<StringRef>, uint32_t> mutationPartMap; // Recoself the most recent
Reference<IBackupContainer> bc; // Backup container is used to read backup files
Key bcUrl; // The url used to get the bc
CMDUID cmdID;
// Performance statistics
double curWorkloadSize;
void addref() { return ReferenceCounted<RestoreLoaderData>::addref(); }
void delref() { return ReferenceCounted<RestoreLoaderData>::delref(); }
RestoreLoaderData() {
nodeID = g_random->randomUniqueID();
nodeIndex = 0;
}
~RestoreLoaderData() {}
std::string describeNode() {
std::stringstream ss;
ss << "[Role: Loader] [NodeID:" << nodeID.toString().c_str()
<< "] [NodeIndex:" << std::to_string(nodeIndex) << "]";
return ss.str();
}
void resetPerVersionBatch() {
printf("[INFO]Node:%s resetPerVersionBatch\n", nodeID.toString().c_str());
RestoreRoleData::resetPerVersionBatch();
range2Applier.clear();
keyOpsCount.clear();
numSampledMutations = 0;
processedFiles.clear();
kvOps.clear();
mutationMap.clear();
mutationPartMap.clear();
curWorkloadSize = 0;
}
vector<UID> getBusyAppliers() {
vector<UID> busyAppliers;
for (auto &app : range2Applier) {
busyAppliers.push_back(app.second);
}
return busyAppliers;
}
std::vector<UID> getWorkingApplierIDs() {
std::vector<UID> applierIDs;
for ( auto &applier : range2Applier ) {
applierIDs.push_back(applier.second);
}
ASSERT( !applierIDs.empty() );
return applierIDs;
}
void initBackupContainer(Key url) {
if ( bcUrl == url && bc.isValid() ) {
return;
}
printf("initBackupContainer, url:%s\n", url.toString().c_str());
bcUrl = url;
bc = IBackupContainer::openContainer(url.toString());
}
void printAppliersKeyRange() {
printf("[INFO] The mapping of KeyRange_start --> Applier ID\n");
// applier type: std::map<Standalone<KeyRef>, UID>
for (auto &applier : range2Applier) {
printf("\t[INFO]%s -> %s\n", getHexString(applier.first).c_str(), applier.second.toString().c_str());
}
}
};
ACTOR Future<Void> restoreLoaderCore(Reference<RestoreLoaderData> self, RestoreLoaderInterface loaderInterf, Database cx);
#include "flow/unactorcompiler.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,264 @@
/*
* RestoreMasterInterface.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Declear RestoreMaster interface and actors
#pragma once
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RestoreMasterInterface_G_H)
#define FDBSERVER_RestoreMasterInterface_G_H
#include "fdbserver/RestoreMaster.actor.g.h"
#elif !defined(FDBSERVER_RestoreMasterInterface_H)
#define FDBSERVER_RestoreMasterInterface_H
#include <sstream>
#include "flow/Stats.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/CommitTransaction.h"
#include "fdbrpc/fdbrpc.h"
#include "fdbserver/CoordinationInterface.h"
#include "fdbrpc/Locality.h"
#include "fdbserver/RestoreUtil.h"
#include "fdbserver/RestoreRoleCommon.actor.h"
#include "flow/actorcompiler.h" // has to be last include
extern double loadBatchSizeThresholdB;
extern int restoreStatusIndex;
struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMasterData> {
// range2Applier is in master and loader node. Loader node uses this to determine which applier a mutation should be sent
std::map<Standalone<KeyRef>, UID> range2Applier; // KeyRef is the inclusive lower bound of the key range the applier (UID) is responsible for
CMDUID cmdID; // Command id to recoself the progress
// Temporary variables to hold files and data to restore
std::vector<RestoreFileFR> allFiles; // All backup files to be processed in all version batches
std::vector<RestoreFileFR> files; // Backup files to be parsed and applied: range and log files in 1 version batch
std::map<Version, Version> forbiddenVersions; // forbidden version range [first, second)
// In each version batch, we process the files in [curBackupFilesBeginIndex, curBackupFilesEndIndex] in RestoreMasterData.allFiles.
long curBackupFilesBeginIndex;
long curBackupFilesEndIndex;
double totalWorkloadSize;
double curWorkloadSize;
int batchIndex;
Reference<IBackupContainer> bc; // Backup container is used to read backup files
Key bcUrl; // The url used to get the bc
void addref() { return ReferenceCounted<RestoreMasterData>::addref(); }
void delref() { return ReferenceCounted<RestoreMasterData>::delref(); }
void printAllBackupFilesInfo() {
printf("[INFO] All backup files: num:%ld\n", allFiles.size());
for (int i = 0; i < allFiles.size(); ++i) {
printf("\t[INFO][File %d] %s\n", i, allFiles[i].toString().c_str());
}
}
std::string describeNode() {
std::stringstream ss;
ss << "Master versionBatch:" << batchIndex;
return ss.str();
}
void constructFilesWithVersionRange() {
printf("[INFO] constructFilesWithVersionRange for num_files:%ld\n", files.size());
allFiles.clear();
for (int i = 0; i < files.size(); i++) {
printf("\t[File:%d] Start %s\n", i, files[i].toString().c_str());
Version beginVersion = 0;
Version endVersion = 0;
if ( files[i].isRange) {
// No need to parse range filename to get endVersion
beginVersion = files[i].version;
endVersion = beginVersion;
} else { // Log file
//Refer to pathToLogFile() in BackupContainer.actor.cpp
long blockSize, len;
int pos = files[i].fileName.find_last_of("/");
std::string fileName = files[i].fileName.substr(pos);
printf("\t[File:%d] Log filename:%s, pos:%d\n", i, fileName.c_str(), pos);
sscanf(fileName.c_str(), "/log,%ld,%ld,%*[^,],%lu%ln", &beginVersion, &endVersion, &blockSize, &len);
printf("\t[File:%d] Log filename:%s produces beginVersion:%ld endVersion:%ld\n",i, fileName.c_str(), beginVersion, endVersion);
}
files[i].beginVersion = beginVersion;
files[i].endVersion = endVersion;
printf("\t[File:%d] End %s\n", i, files[i].toString().c_str());
ASSERT(beginVersion <= endVersion);
allFiles.push_back( files[i]);
}
}
void printBackupFilesInfo() {
printf("[INFO] The backup files for current batch to load and apply: num:%ld\n", files.size());
for (int i = 0; i < files.size(); ++i) {
printf("\t[INFO][File %d] %s\n", i, files[i].toString().c_str());
}
}
void buildForbiddenVersionRange() {
printf("[INFO] Build forbidden version ranges for all backup files: num:%ld\n", allFiles.size());
for (int i = 0; i < allFiles.size(); ++i) {
if (!allFiles[i].isRange) {
forbiddenVersions.insert(std::make_pair(allFiles[i].beginVersion, allFiles[i].endVersion));
}
}
}
bool isForbiddenVersionRangeOverlapped() {
printf("[INFO] Check if forbidden version ranges is overlapped: num of ranges:%ld\n", forbiddenVersions.size());
if (forbiddenVersions.empty()) {
return false;
}
std::map<Version, Version>::iterator prevRange = forbiddenVersions.begin();
std::map<Version, Version>::iterator curRange = forbiddenVersions.begin();
curRange++; // Assume forbiddenVersions has at least one element!
while ( curRange != forbiddenVersions.end() ) {
if ( curRange->first < prevRange->second ) {
return true; // overlapped
}
curRange++;
}
return false; //not overlapped
}
void printForbiddenVersionRange() {
printf("[INFO] Number of forbidden version ranges:%ld\n", forbiddenVersions.size());
int i = 0;
for (auto &range : forbiddenVersions) {
printf("\t[INFO][Range%d] [%ld, %ld)\n", i, range.first, range.second);
++i;
}
}
// endVersion is begin version for range file, because range file takes snapshot at the same version
// endVersion is the end version (excluded) for mutations recoselfed in log file
bool isVersionInForbiddenRange(Version endVersion, bool isRange) {
bool isForbidden = false;
for (auto &range : forbiddenVersions) {
if ( isRange ) { //the range file includes mutations at the endVersion
if (endVersion >= range.first && endVersion < range.second) {
isForbidden = true;
break;
}
} else { // the log file does NOT include mutations at the endVersion
continue; // Log file's endVersion is always a valid version batch boundary as long as the forbidden version ranges do not overlap
}
}
return isForbidden;
}
void printAppliersKeyRange() {
printf("[INFO] The mapping of KeyRange_start --> Applier ID\n");
// applier type: std::map<Standalone<KeyRef>, UID>
for (auto &applier : range2Applier) {
printf("\t[INFO]%s -> %s\n", getHexString(applier.first).c_str(), applier.second.toString().c_str());
}
}
bool isBackupEmpty() {
for (int i = 0; i < files.size(); ++i) {
if (files[i].fileSize > 0) {
return false;
}
}
return true;
}
void initBackupContainer(Key url) {
if ( bcUrl == url && bc.isValid() ) {
return;
}
printf("initBackupContainer, url:%s\n", url.toString().c_str());
bcUrl = url;
bc = IBackupContainer::openContainer(url.toString());
//state BackupDescription desc = wait(self->bc->describeBackup());
//return Void();
}
// Collect the set of backup files to be used for a version batch
// Return true if there is still files to be restored; false otherwise.
// This function will change the process' RestoreMasterData
bool collectFilesForOneVersionBatch() {
files.clear();
curWorkloadSize = 0;
Version endVersion = -1;
bool isRange = false;
bool validVersion = false;
// Step: Find backup files in each version batch and restore them.
while ( curBackupFilesBeginIndex < allFiles.size() ) {
// Find the curBackupFilesEndIndex, such that the to-be-loaded files size (curWorkloadSize) is as close to loadBatchSizeThresholdB as possible,
// and curBackupFilesEndIndex must not belong to the forbidden version range!
if ( curBackupFilesEndIndex < allFiles.size() ) {
endVersion = allFiles[curBackupFilesEndIndex].endVersion;
isRange = allFiles[curBackupFilesEndIndex].isRange;
validVersion = !isVersionInForbiddenRange(endVersion, isRange);
curWorkloadSize += allFiles[curBackupFilesEndIndex].fileSize;
printf("[DEBUG][Batch:%d] Calculate backup files for a version batch: endVersion:%lld isRange:%d validVersion:%d curWorkloadSize:%.2fB curBackupFilesBeginIndex:%ld curBackupFilesEndIndex:%ld, files.size:%ld\n",
batchIndex, (long long) endVersion, isRange, validVersion, curWorkloadSize , curBackupFilesBeginIndex, curBackupFilesEndIndex, allFiles.size());
}
if ( (validVersion && curWorkloadSize >= loadBatchSizeThresholdB) || curBackupFilesEndIndex >= allFiles.size() ) {
if ( curBackupFilesEndIndex >= allFiles.size() && curWorkloadSize <= 0 ) {
printf("Restore finishes: curBackupFilesEndIndex:%ld, allFiles.size:%ld, curWorkloadSize:%.2f\n",
curBackupFilesEndIndex, allFiles.size(), curWorkloadSize );
//break; // return result
}
// Construct the files [curBackupFilesBeginIndex, curBackupFilesEndIndex]
//resetPerVersionBatch();
//cmdID.setBatch(batchIndex);
if ( curBackupFilesBeginIndex < allFiles.size()) {
for (int fileIndex = curBackupFilesBeginIndex; fileIndex <= curBackupFilesEndIndex && fileIndex < allFiles.size(); fileIndex++) {
files.push_back(allFiles[fileIndex]);
}
}
printBackupFilesInfo();
totalWorkloadSize += curWorkloadSize;
break;
} else if (validVersion && curWorkloadSize < loadBatchSizeThresholdB) {
curBackupFilesEndIndex++;
} else if (!validVersion && curWorkloadSize < loadBatchSizeThresholdB) {
curBackupFilesEndIndex++;
} else if (!validVersion && curWorkloadSize >= loadBatchSizeThresholdB) {
// Now: just move to the next file. We will eventually find a valid version but load more than loadBatchSizeThresholdB
printf("[WARNING] The loading batch size will be larger than expected! curBatchSize:%.2fB, expectedBatchSize:%2.fB, endVersion:%ld\n",
curWorkloadSize, loadBatchSizeThresholdB, endVersion);
curBackupFilesEndIndex++;
// TODO: Roll back to find a valid version
}
}
return (files.size() > 0);
}
};
ACTOR Future<Void> startRestoreMaster(Reference<RestoreMasterData> self, Database cx);
#include "flow/unactorcompiler.h"
#endif

View File

@ -0,0 +1,324 @@
/*
* RestoreRoleCommon.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/MutationList.h"
#include "fdbserver/RestoreUtil.h"
#include "fdbserver/RestoreRoleCommon.actor.h"
#include "fdbserver/RestoreLoader.actor.h"
#include "fdbserver/RestoreApplier.actor.h"
#include "fdbserver/RestoreMaster.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
class Database;
struct RestoreWorkerData;
// id is the id of the worker to be monitored
// This actor is used for both restore loader and restore applier
ACTOR Future<Void> handleHeartbeat(RestoreSimpleRequest req, UID id) {
wait( delay(0.1) ); // To avoid warning
req.reply.send(RestoreCommonReply(id, req.cmdID));
return Void();
}
// Restore Worker: collect restore role interfaces locally by reading the specific system keys
ACTOR Future<Void> _collectRestoreRoleInterfaces(Reference<RestoreRoleData> self, Database cx) {
state Transaction tr(cx);
//state Standalone<RangeResultRef> loaderAgentValues;
//state Standalone<RangeResultRef> applierAgentValues;
printf("[INFO][Worker] Node:%s Get the handleCollectRestoreRoleInterfaceRequest for all workers\n", self->describeNode().c_str());
loop {
try {
self->clearInterfaces();
tr.reset();
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
state Standalone<RangeResultRef> loaderAgentValues = wait( tr.getRange(restoreLoaderKeys, CLIENT_KNOBS->TOO_MANY) );
state Standalone<RangeResultRef> applierAgentValues = wait( tr.getRange(restoreApplierKeys, CLIENT_KNOBS->TOO_MANY) );
ASSERT(!loaderAgentValues.more);
ASSERT(!applierAgentValues.more);
// Save the loader and applier interfaces for the later operations
if (loaderAgentValues.size()) {
for(auto& it : loaderAgentValues) {
RestoreLoaderInterface loaderInterf = BinaryReader::fromStringRef<RestoreLoaderInterface>(it.value, IncludeVersion());
self->loadersInterf[loaderInterf.id()] = loaderInterf;
}
}
if (applierAgentValues.size()) {
for(auto& it : applierAgentValues) {
RestoreApplierInterface applierInterf = BinaryReader::fromStringRef<RestoreApplierInterface>(it.value, IncludeVersion());
self->appliersInterf[applierInterf.id()] = applierInterf;
self->masterApplierInterf = applierInterf; // TODO: Set masterApplier in a more deterministic way
}
}
//wait(tr.commit());
break;
} catch( Error &e ) {
printf("[WARNING] Node:%s handleCollectRestoreRoleInterfaceRequest() transaction error:%s\n", self->describeNode().c_str(), e.what());
wait( tr.onError(e) );
}
printf("[WARNING] Node:%s handleCollectRestoreRoleInterfaceRequest should always succeed in the first loop! Something goes wrong!\n", self->describeNode().c_str());
};
return Void();
}
// Restore worker
// RestoreRoleData will be casted to RestoreLoaderData or RestoreApplierData based on its type
ACTOR Future<Void> handleCollectRestoreRoleInterfaceRequest(RestoreSimpleRequest req, Reference<RestoreRoleData> self, Database cx) {
while (self->isInProgress(RestoreCommandEnum::Collect_RestoreRoleInterface)) {
printf("[DEBUG] NODE:%s handleCollectRestoreRoleInterfaceRequest wait for 5s\n", self->describeNode().c_str());
wait(delay(5.0));
}
// Handle duplicate, assuming cmdUID is always unique for the same workload
if ( self->isCmdProcessed(req.cmdID) ) {
printf("[DEBUG] NODE:%s skip duplicate cmd:%s\n", self->describeNode().c_str(), req.cmdID.toString().c_str());
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
return Void();
}
self->setInProgressFlag(RestoreCommandEnum::Collect_RestoreRoleInterface);
wait( _collectRestoreRoleInterfaces(self, cx) );
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
self->processedCmd[req.cmdID] = 1;
self->clearInProgressFlag(RestoreCommandEnum::Collect_RestoreRoleInterface);
return Void();
}
ACTOR Future<Void> handleInitVersionBatchRequest(RestoreVersionBatchRequest req, Reference<RestoreRoleData> self) {
// wait( delay(1.0) );
printf("[Batch:%d] Node:%s Start...\n", req.batchID, self->describeNode().c_str());
while (self->isInProgress(RestoreCommandEnum::Reset_VersionBatch)) {
printf("[DEBUG] NODE:%s handleVersionBatchRequest wait for 5s\n", self->describeNode().c_str());
wait(delay(5.0));
}
// Handle duplicate, assuming cmdUID is always unique for the same workload
if ( self->isCmdProcessed(req.cmdID) ) {
printf("[DEBUG] NODE:%s skip duplicate cmd:%s\n", self->describeNode().c_str(), req.cmdID.toString().c_str());
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
return Void();
}
self->setInProgressFlag(RestoreCommandEnum::Reset_VersionBatch);
self->resetPerVersionBatch();
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
self->processedCmd[req.cmdID] = 1;
self->clearInProgressFlag(RestoreCommandEnum::Reset_VersionBatch);
// This actor never returns. You may cancel it in master
return Void();
}
//-------Helper functions
std::string getHexString(StringRef input) {
std::stringstream ss;
for (int i = 0; i<input.size(); i++) {
if ( i % 4 == 0 )
ss << " ";
if ( i == 12 ) { //The end of 12bytes, which is the version size for value
ss << "|";
}
if ( i == (12 + 12) ) { //The end of version + header
ss << "@";
}
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[i]; // [] operator moves the pointer in step of unit8
}
return ss.str();
}
std::string getHexKey(StringRef input, int skip) {
std::stringstream ss;
for (int i = 0; i<skip; i++) {
if ( i % 4 == 0 )
ss << " ";
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[i]; // [] operator moves the pointer in step of unit8
}
ss << "||";
//hashvalue
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[skip]; // [] operator moves the pointer in step of unit8
ss << "|";
// commitversion in 64bit
int count = 0;
for (int i = skip+1; i<input.size() && i < skip+1+8; i++) {
if ( count++ % 4 == 0 )
ss << " ";
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[i]; // [] operator moves the pointer in step of unit8
}
// part value
count = 0;
for (int i = skip+1+8; i<input.size(); i++) {
if ( count++ % 4 == 0 )
ss << " ";
ss << std::setfill('0') << std::setw(2) << std::hex << (int) input[i]; // [] operator moves the pointer in step of unit8
}
return ss.str();
}
void printMutationListRefHex(MutationListRef m, std::string prefix) {
MutationListRef::Iterator iter = m.begin();
for ( ;iter != m.end(); ++iter) {
printf("%s mType:%04x param1:%s param2:%s param1_size:%d, param2_size:%d\n", prefix.c_str(), iter->type,
getHexString(iter->param1).c_str(), getHexString(iter->param2).c_str(), iter->param1.size(), iter->param2.size());
}
return;
}
//TODO: Print out the backup mutation log value. The backup log value (i.e., the value in the kv pair) has the following format
//version(12B)|mutationRef|MutationRef|....
//A mutationRef has the format: |type_4B|param1_size_4B|param2_size_4B|param1|param2.
//Note: The data is stored in little endian! You need to convert it to BigEndian so that you know how long the param1 and param2 is and how to format them!
void printBackupMutationRefValueHex(Standalone<StringRef> val_input, std::string prefix) {
std::stringstream ss;
const int version_size = 12;
const int header_size = 12;
StringRef val = val_input.contents();
StringRefReaderMX reader(val, restore_corrupted_data());
int count_size = 0;
// Get the version
uint64_t version = reader.consume<uint64_t>();
count_size += 8;
uint32_t val_length_decode = reader.consume<uint32_t>();
count_size += 4;
printf("----------------------------------------------------------\n");
printf("To decode value:%s\n", getHexString(val).c_str());
if ( val_length_decode != (val.size() - 12) ) {
fprintf(stderr, "%s[PARSE ERROR]!!! val_length_decode:%d != val.size:%d\n", prefix.c_str(), val_length_decode, val.size());
} else {
if ( debug_verbose ) {
printf("%s[PARSE SUCCESS] val_length_decode:%d == (val.size:%d - 12)\n", prefix.c_str(), val_length_decode, val.size());
}
}
// Get the mutation header
while (1) {
// stop when reach the end of the string
if(reader.eof() ) { //|| *reader.rptr == 0xFFCheckRestoreRequestDoneErrorMX
//printf("Finish decode the value\n");
break;
}
uint32_t type = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
uint32_t kLen = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
uint32_t vLen = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
const uint8_t *k = reader.consume(kLen);
const uint8_t *v = reader.consume(vLen);
count_size += 4 * 3 + kLen + vLen;
if ( kLen < 0 || kLen > val.size() || vLen < 0 || vLen > val.size() ) {
fprintf(stderr, "%s[PARSE ERROR]!!!! kLen:%d(0x%04x) vLen:%d(0x%04x)\n", prefix.c_str(), kLen, kLen, vLen, vLen);
}
if ( debug_verbose ) {
printf("%s---DedodeBackupMutation: Type:%d K:%s V:%s k_size:%d v_size:%d\n", prefix.c_str(),
type, getHexString(KeyRef(k, kLen)).c_str(), getHexString(KeyRef(v, vLen)).c_str(), kLen, vLen);
}
}
if ( debug_verbose ) {
printf("----------------------------------------------------------\n");
}
}
void printBackupLogKeyHex(Standalone<StringRef> key_input, std::string prefix) {
std::stringstream ss;
const int version_size = 12;
const int header_size = 12;
StringRef val = key_input.contents();
StringRefReaderMX reader(val, restore_corrupted_data());
int count_size = 0;
// Get the version
uint64_t version = reader.consume<uint64_t>();
count_size += 8;
uint32_t val_length_decode = reader.consume<uint32_t>();
count_size += 4;
printf("----------------------------------------------------------\n");
printf("To decode value:%s\n", getHexString(val).c_str());
if ( val_length_decode != (val.size() - 12) ) {
fprintf(stderr, "%s[PARSE ERROR]!!! val_length_decode:%d != val.size:%d\n", prefix.c_str(), val_length_decode, val.size());
} else {
printf("%s[PARSE SUCCESS] val_length_decode:%d == (val.size:%d - 12)\n", prefix.c_str(), val_length_decode, val.size());
}
// Get the mutation header
while (1) {
// stop when reach the end of the string
if(reader.eof() ) { //|| *reader.rptr == 0xFF
//printf("Finish decode the value\n");
break;
}
uint32_t type = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
uint32_t kLen = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
uint32_t vLen = reader.consume<uint32_t>();//reader.consumeNetworkUInt32();
const uint8_t *k = reader.consume(kLen);
const uint8_t *v = reader.consume(vLen);
count_size += 4 * 3 + kLen + vLen;
if ( kLen < 0 || kLen > val.size() || vLen < 0 || vLen > val.size() ) {
printf("%s[PARSE ERROR]!!!! kLen:%d(0x%04x) vLen:%d(0x%04x)\n", prefix.c_str(), kLen, kLen, vLen, vLen);
}
printf("%s---DedoceBackupMutation: Type:%d K:%s V:%s k_size:%d v_size:%d\n", prefix.c_str(),
type, getHexString(KeyRef(k, kLen)).c_str(), getHexString(KeyRef(v, vLen)).c_str(), kLen, vLen);
}
printf("----------------------------------------------------------\n");
}
void printLowerBounds(std::vector<Standalone<KeyRef>> lowerBounds) {
if ( debug_verbose == false )
return;
printf("[INFO] Print out %ld keys in the lowerbounds\n", lowerBounds.size());
for (int i = 0; i < lowerBounds.size(); i++) {
printf("\t[INFO][%d] %s\n", i, getHexString(lowerBounds[i]).c_str());
}
}
void printApplierKeyRangeInfo(std::map<UID, Standalone<KeyRangeRef>> appliers) {
printf("[INFO] appliers num:%ld\n", appliers.size());
int index = 0;
for(auto &applier : appliers) {
printf("\t[INFO][Applier:%d] ID:%s --> KeyRange:%s\n", index, applier.first.toString().c_str(), applier.second.toString().c_str());
}
}

View File

@ -0,0 +1,200 @@
/*
* RestoreRoleCommon.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Delcare commone struct and functions used in fast restore
#pragma once
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RestoreRoleCommon_G_H)
#define FDBSERVER_RestoreRoleCommon_G_H
#include "fdbserver/RestoreRoleCommon.actor.g.h"
#elif !defined(FDBSERVER_RestoreRoleCommon_H)
#define FDBSERVER_RestoreRoleCommon_H
#include <sstream>
#include "flow/Stats.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/CommitTransaction.h"
#include "fdbrpc/fdbrpc.h"
#include "fdbserver/CoordinationInterface.h"
#include "fdbrpc/Locality.h"
#include "fdbserver/RestoreUtil.h"
#include "fdbserver/RestoreWorkerInterface.h"
extern bool debug_verbose;
extern double mutationVectorThreshold;
struct RestoreRoleInterface;
struct RestoreLoaderInterface;
struct RestoreApplierInterface;
struct RestoreRoleData;
struct RestoreMasterData;
struct RestoreSimpleRequest;
ACTOR Future<Void> handleHeartbeat(RestoreSimpleRequest req, UID id);
ACTOR Future<Void> handleCollectRestoreRoleInterfaceRequest(RestoreSimpleRequest req, Reference<RestoreRoleData> self, Database cx);
ACTOR Future<Void> handleInitVersionBatchRequest(RestoreVersionBatchRequest req, Reference<RestoreRoleData> self);
ACTOR Future<Void> _collectRestoreRoleInterfaces(Reference<RestoreRoleData> self, Database cx);
// Helper class for reading restore data from a buffer and throwing the right errors.
// This struct is mostly copied from StringRefReader. We add a sanity check in this struct.
// TODO: Merge this struct with StringRefReader.
struct StringRefReaderMX {
StringRefReaderMX(StringRef s = StringRef(), Error e = Error()) : rptr(s.begin()), end(s.end()), failure_error(e), str_size(s.size()) {}
// Return remainder of data as a StringRef
StringRef remainder() {
return StringRef(rptr, end - rptr);
}
// Return a pointer to len bytes at the current read position and advance read pos
//Consume a little-Endian data. Since we only run on little-Endian machine, the data on storage is little Endian
const uint8_t * consume(unsigned int len) {
if(rptr == end && len != 0)
throw end_of_stream();
const uint8_t *p = rptr;
rptr += len;
if(rptr > end) {
printf("[ERROR] StringRefReaderMX throw error! string length:%d\n", str_size);
printf("!!!!!!!!!!!![ERROR]!!!!!!!!!!!!!! Worker may die due to the error. Master will stuck when a worker die\n");
throw failure_error;
}
return p;
}
// Return a T from the current read position and advance read pos
template<typename T> const T consume() {
return *(const T *)consume(sizeof(T));
}
// Functions for consuming big endian (network byte oselfer) integers.
// Consumes a big endian number, swaps it to little endian, and returns it.
const int32_t consumeNetworkInt32() { return (int32_t)bigEndian32((uint32_t)consume< int32_t>());}
const uint32_t consumeNetworkUInt32() { return bigEndian32( consume<uint32_t>());}
const int64_t consumeNetworkInt64() { return (int64_t)bigEndian64((uint32_t)consume< int64_t>());}
const uint64_t consumeNetworkUInt64() { return bigEndian64( consume<uint64_t>());}
bool eof() { return rptr == end; }
const uint8_t *rptr, *end;
const int str_size;
Error failure_error;
};
struct RestoreRoleData : NonCopyable, public ReferenceCounted<RestoreRoleData> {
public:
RestoreRole role;
UID nodeID; // RestoreLoader role ID
int nodeIndex; // RestoreLoader role index, which is continuous and easy for debuggging
std::map<UID, RestoreLoaderInterface> loadersInterf;
std::map<UID, RestoreApplierInterface> appliersInterf;
RestoreApplierInterface masterApplierInterf;
std::map<CMDUID, int> processedCmd;
uint32_t inProgressFlag = 0;
RestoreRoleData() : role(RestoreRole::Invalid) {};
~RestoreRoleData() {};
UID id() const { return nodeID; }
bool isCmdProcessed(CMDUID const &cmdID) {
return processedCmd.find(cmdID) != processedCmd.end();
}
// Helper functions to set/clear the flag when a worker is in the middle of processing an actor.
void setInProgressFlag(RestoreCommandEnum phaseEnum) {
int phase = (int) phaseEnum;
ASSERT(phase < 32);
inProgressFlag |= (1UL << phase);
}
void clearInProgressFlag(RestoreCommandEnum phaseEnum) {
int phase = (int) phaseEnum;
ASSERT(phase < 32);
inProgressFlag &= ~(1UL << phase);
}
bool isInProgress(RestoreCommandEnum phaseEnum) {
int phase = (int) phaseEnum;
ASSERT(phase < 32);
return (inProgressFlag & (1UL << phase));
}
void resetPerVersionBatch() {
processedCmd.clear();
inProgressFlag = 0;
}
void clearInterfaces() {
loadersInterf.clear();
appliersInterf.clear();
}
std::string describeNode() {
std::stringstream ss;
ss << "RestoreRoleData role:" << getRoleStr(role);
return ss.str();
}
// TODO: To remove this function
std::vector<UID> getApplierIDs() {
std::vector<UID> applierIDs;
for (auto &applier : appliersInterf) {
applierIDs.push_back(applier.first);
}
return applierIDs;
}
// TODO: To remove this function
std::vector<UID> getLoaderIDs() {
std::vector<UID> loaderIDs;
for (auto &loader : loadersInterf) {
loaderIDs.push_back(loader.first);
}
return loaderIDs;
}
// TODO: To remove this function
std::vector<UID> getWorkerIDs() {
std::vector<UID> workerIDs;
for (auto &loader : loadersInterf) {
workerIDs.push_back(loader.first);
}
for (auto &applier : appliersInterf) {
workerIDs.push_back(applier.first);
}
return workerIDs;
}
};
void printLowerBounds(std::vector<Standalone<KeyRef>> lowerBounds);
void printApplierKeyRangeInfo(std::map<UID, Standalone<KeyRangeRef>> appliers);
#endif

View File

@ -0,0 +1,70 @@
/*
* RestoreUtil.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbserver/RestoreUtil.h"
#include "flow/actorcompiler.h" // This must be the last #include.
std::vector<std::string> RestoreRoleStr = {"Invalid", "Master", "Loader", "Applier"};
int numRoles = RestoreRoleStr.size();
std::string getRoleStr(RestoreRole role) {
if ( (int) role >= numRoles || (int) role < 0) {
printf("[ERROR] role:%d is out of scope\n", (int) role);
return "[Unset]";
}
return RestoreRoleStr[(int)role];
}
// CMDUID implementation
void CMDUID::initPhase(RestoreCommandEnum newPhase) {
printf("CMDID, current phase:%d, new phase:%d\n", phase, newPhase);
phase = (uint16_t) newPhase;
cmdID = 0;
}
void CMDUID::nextPhase() {
phase++;
cmdID = 0;
}
void CMDUID::nextCmd() {
cmdID++;
}
RestoreCommandEnum CMDUID::getPhase() {
return (RestoreCommandEnum) phase;
}
void CMDUID::setPhase(RestoreCommandEnum newPhase) {
phase = (uint16_t) newPhase;
}
void CMDUID::setBatch(int newBatchIndex) {
batch = newBatchIndex;
}
uint64_t CMDUID::getIndex() {
return cmdID;
}
std::string CMDUID::toString() const {
return format("%04ld|%04ld|%016lld", batch, phase, cmdID);
}

146
fdbserver/RestoreUtil.h Normal file
View File

@ -0,0 +1,146 @@
/*
* RestoreUtil.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// This file defines the commonly used data structure and functions
// that are used by both RestoreWorker and RestoreRoles(Master, Loader, and Applier)
#ifndef FDBSERVER_RESTOREUTIL_H
#define FDBSERVER_RESTOREUTIL_H
#pragma once
#include "fdbclient/Tuple.h"
#include "flow/flow.h"
#include "flow/Stats.h"
#include "fdbrpc/fdbrpc.h"
#include "fdbrpc/IAsyncFile.h"
// RestoreCommandEnum is also used as the phase ID for CMDUID
enum class RestoreCommandEnum {Init = 0,
Set_Role, Set_Role_Done,
Sample_Range_File, Sample_Log_File, Sample_File_Done,
Loader_Send_Sample_Mutation_To_Applier, Loader_Send_Sample_Mutation_To_Applier_Done, //7
Calculate_Applier_KeyRange, Get_Applier_KeyRange, Get_Applier_KeyRange_Done, //10
Assign_Applier_KeyRange, Assign_Applier_KeyRange_Done, //12
Assign_Loader_Range_File, Assign_Loader_Log_File, Assign_Loader_File_Done,//15
Loader_Send_Mutations_To_Applier, Loader_Send_Mutations_To_Applier_Done,//17
Apply_Mutation_To_DB, Apply_Mutation_To_DB_Skip, //19
Loader_Notify_Appler_To_Apply_Mutation,
Notify_Loader_ApplierKeyRange, Notify_Loader_ApplierKeyRange_Done, //22
Finish_Restore, Reset_VersionBatch, Set_WorkerInterface, Collect_RestoreRoleInterface,
Heart_Beat}; //23
BINARY_SERIALIZABLE(RestoreCommandEnum);
enum class RestoreRole {Invalid = 0, Master = 1, Loader, Applier};
BINARY_SERIALIZABLE( RestoreRole );
extern std::vector<std::string> RestoreRoleStr;
extern int numRoles;
std::string getRoleStr(RestoreRole role);
// Restore command's UID. uint64_t part[2];
// part[0] is the phase id, part[1] is the command index in the phase.
// TODO: Add another field to indicate version-batch round
class CMDUID {
public:
uint16_t batch;
uint16_t phase;
uint64_t cmdID;
CMDUID() : batch(0), phase(0), cmdID(0) { }
CMDUID( uint16_t a, uint64_t b ) { batch = 0; phase=a; cmdID=b; }
CMDUID(const CMDUID &cmd) { batch = cmd.batch; phase = cmd.phase; cmdID = cmd.cmdID; }
void initPhase(RestoreCommandEnum phase);
void nextPhase(); // Set to the next phase.
void nextCmd(); // Increase the command index at the same phase
RestoreCommandEnum getPhase();
void setPhase(RestoreCommandEnum newPhase);
void setBatch(int newBatchIndex);
uint64_t getIndex();
std::string toString() const;
bool operator == ( const CMDUID& r ) const { return batch == r.batch && phase == r.phase && cmdID == r.cmdID; }
bool operator != ( const CMDUID& r ) const { return batch != r.batch || phase != r.phase || cmdID != r.cmdID; }
bool operator < ( const CMDUID& r ) const { return batch < r.batch || (batch == r.batch && phase < r.phase) || (batch == r.batch && phase == r.phase && cmdID < r.cmdID); }
//uint64_t hash() const { return first(); }
//uint64_t first() const { return part[0]; }
//uint64_t second() const { return part[1]; }
template <class Ar>
void serialize_unversioned(Ar& ar) { // Changing this serialization format will affect key definitions, so can't simply be versioned!
serializer(ar, batch, phase, cmdID);
}
};
template <class Ar> void load( Ar& ar, CMDUID& uid ) { uid.serialize_unversioned(ar); }
template <class Ar> void save( Ar& ar, CMDUID const& uid ) { const_cast<CMDUID&>(uid).serialize_unversioned(ar); }
struct FastRestoreStatus {
double curWorkloadSize;
double curRunningTime;
double curSpeed;
double totalWorkloadSize;
double totalRunningTime;
double totalSpeed;
};
// Common restore request/response interface
// Reply type
struct RestoreCommonReply {
UID id; // unique ID of the server who sends the reply
CMDUID cmdID; // The restore command for the reply
RestoreCommonReply() : id(UID()), cmdID(CMDUID()) {}
explicit RestoreCommonReply(UID id, CMDUID cmdID) : id(id), cmdID(cmdID) {}
std::string toString() const {
std::stringstream ss;
ss << "ServerNodeID:" << id.toString() << " CMDID:" << cmdID.toString();
return ss.str();
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, id, cmdID);
}
};
struct RestoreSimpleRequest : TimedRequest {
CMDUID cmdID;
ReplyPromise<RestoreCommonReply> reply;
RestoreSimpleRequest() : cmdID(CMDUID()) {}
explicit RestoreSimpleRequest(CMDUID cmdID) : cmdID(cmdID) {}
template <class Ar>
void serialize( Ar& ar ) {
serializer(ar, cmdID, reply);
}
};
#endif //FDBSERVER_RESTOREUTIL_ACTOR_H

View File

@ -18,8 +18,10 @@
* limitations under the License.
*/
#ifndef FDBSERVER_RestoreWorkerInterface_H
#define FDBSERVER_RestoreWorkerInterface_H
// Declare and define the interface for restore worker/loader/applier
#ifndef FDBSERVER_RESTORE_WORKER_INTERFACE_H
#define FDBSERVER_RESTORE_WORKER_INTERFACE_H
#pragma once
#include <sstream>
@ -30,11 +32,12 @@
#include "fdbserver/CoordinationInterface.h"
#include "fdbrpc/Locality.h"
#include "fdbserver/RestoreUtil.h"
//#include "fdbserver/RestoreRoleCommon.actor.h"
#include "flow/actorcompiler.h" // has to be last include
class RestoreConfig;
enum class RestoreRole {Invalid = 0, Master = 1, Loader, Applier};
extern std::vector<std::string> RestoreRoleStr;
BINARY_SERIALIZABLE( RestoreRole );
// Timeout threshold in seconds for restore commands
@ -43,8 +46,7 @@ extern int FastRestore_Failure_Timeout;
struct RestoreCommonReply;
struct GetKeyRangeReply;
struct GetKeyRangeReply;
struct RestoreSetRoleRequest;
struct RestoreSimpleRequest;
struct RestoreRecruitRoleRequest;
struct RestoreLoadFileRequest;
struct RestoreGetApplierKeyRangeRequest;
struct RestoreSetApplierKeyRangeRequest;
@ -54,124 +56,87 @@ struct RestoreCalculateApplierKeyRangeRequest;
struct RestoreSendMutationVectorRequest;
struct RestoreSetApplierKeyRangeVectorRequest;
// RestoreCommandEnum is also used as the phase ID for CMDUID
enum class RestoreCommandEnum {Init = 0,
Set_Role, Set_Role_Done,
Sample_Range_File, Sample_Log_File, Sample_File_Done,
Loader_Send_Sample_Mutation_To_Applier, Loader_Send_Sample_Mutation_To_Applier_Done, //7
Calculate_Applier_KeyRange, Get_Applier_KeyRange, Get_Applier_KeyRange_Done, //10
Assign_Applier_KeyRange, Assign_Applier_KeyRange_Done, //12
Assign_Loader_Range_File, Assign_Loader_Log_File, Assign_Loader_File_Done,//15
Loader_Send_Mutations_To_Applier, Loader_Send_Mutations_To_Applier_Done,//17
Apply_Mutation_To_DB, Apply_Mutation_To_DB_Skip, //19
Loader_Notify_Appler_To_Apply_Mutation,
Notify_Loader_ApplierKeyRange, Notify_Loader_ApplierKeyRange_Done, //22
Finish_Restore, RESET_VersionBatch, Set_WorkerInterface}; //23
BINARY_SERIALIZABLE(RestoreCommandEnum);
// Restore command's UID. uint64_t part[2];
// part[0] is the phase id, part[1] is the command index in the phase.
// TODO: Add another field to indicate version-batch round
class CMDUID {
public:
uint16_t batch;
uint16_t phase;
uint64_t cmdID;
CMDUID() : batch(0), phase(0), cmdID(0) { }
CMDUID( uint16_t a, uint64_t b ) { batch = 0; phase=a; cmdID=b; }
CMDUID(const CMDUID &cmd) { batch = cmd.batch; phase = cmd.phase; cmdID = cmd.cmdID; }
struct RestoreWorkerInterface {
UID interfID;
void initPhase(RestoreCommandEnum phase);
RequestStream<RestoreSimpleRequest> heartbeat;
RequestStream<RestoreRecruitRoleRequest> recruitRole;
RequestStream<RestoreSimpleRequest> terminateWorker;
void nextPhase(); // Set to the next phase.
bool operator == (RestoreWorkerInterface const& r) const { return id() == r.id(); }
bool operator != (RestoreWorkerInterface const& r) const { return id() != r.id(); }
void nextCmd(); // Increase the command index at the same phase
UID id() const { return interfID; } //cmd.getEndpoint().token;
RestoreCommandEnum getPhase();
void setPhase(RestoreCommandEnum newPhase);
void setBatch(int newBatchIndex);
NetworkAddress address() const { return recruitRole.getEndpoint().addresses.address; }
uint64_t getIndex();
void initEndpoints() {
heartbeat.getEndpoint( TaskClusterController );
recruitRole.getEndpoint( TaskClusterController );// Q: Why do we need this?
terminateWorker.getEndpoint( TaskClusterController );
std::string toString() const;
bool operator == ( const CMDUID& r ) const { return batch == r.batch && phase == r.phase && cmdID == r.cmdID; }
bool operator != ( const CMDUID& r ) const { return batch != r.batch || phase != r.phase || cmdID != r.cmdID; }
bool operator < ( const CMDUID& r ) const { return batch < r.batch || (batch == r.batch && phase < r.phase) || (batch == r.batch && phase == r.phase && cmdID < r.cmdID); }
//uint64_t hash() const { return first(); }
//uint64_t first() const { return part[0]; }
//uint64_t second() const { return part[1]; }
interfID = g_random->randomUniqueID();
}
template <class Ar>
void serialize_unversioned(Ar& ar) { // Changing this serialization format will affect key definitions, so can't simply be versioned!
serializer(ar, batch, phase, cmdID);
void serialize( Ar& ar ) {
serializer(ar, interfID, heartbeat, recruitRole, terminateWorker);
}
};
template <class Ar> void load( Ar& ar, CMDUID& uid ) { uid.serialize_unversioned(ar); }
template <class Ar> void save( Ar& ar, CMDUID const& uid ) { const_cast<CMDUID&>(uid).serialize_unversioned(ar); }
struct RestoreRoleInterface {
public:
RestoreRole role;
// NOTE: is cmd's Endpoint token the same with the request's token for the same node?
struct RestoreInterface {
RestoreRoleInterface() {
role = RestoreRole::Invalid;
}
};
struct RestoreLoaderInterface : RestoreRoleInterface {
public:
UID nodeID;
RequestStream<RestoreSimpleRequest> heartbeat;
RequestStream<RestoreSetRoleRequest> setRole;
RequestStream<RestoreLoadFileRequest> sampleRangeFile;
RequestStream<RestoreLoadFileRequest> sampleLogFile;
RequestStream<RestoreSendMutationVectorRequest> sendSampleMutationVector;
RequestStream<RestoreCalculateApplierKeyRangeRequest> calculateApplierKeyRange;
RequestStream<RestoreGetApplierKeyRangeRequest> getApplierKeyRangeRequest;
RequestStream<RestoreSetApplierKeyRangeRequest> setApplierKeyRangeRequest; // To delete
RequestStream<RestoreSetApplierKeyRangeVectorRequest> setApplierKeyRangeVectorRequest;
RequestStream<RestoreLoadFileRequest> loadRangeFile;
RequestStream<RestoreLoadFileRequest> loadLogFile;
RequestStream<RestoreSendMutationVectorRequest> sendMutationVector;
RequestStream<RestoreSimpleRequest> applyToDB;
RequestStream<RestoreVersionBatchRequest> initVersionBatch;
RequestStream<RestoreSimpleRequest> setWorkerInterface;
RequestStream<RestoreSimpleRequest> collectRestoreRoleInterfaces; // TODO: Change to collectRestoreRoleInterfaces
RequestStream<RestoreSimpleRequest> finishRestore;
// ToDelete
// RequestStream< struct RestoreCommand > cmd; // Restore commands from master to loader and applier
// RequestStream< struct RestoreRequest > request; // Restore requests used by loader and applier
bool operator == (RestoreWorkerInterface const& r) const { return id() == r.id(); }
bool operator != (RestoreWorkerInterface const& r) const { return id() != r.id(); }
bool operator == (RestoreInterface const& r) const { return id() == r.id(); }
bool operator != (RestoreInterface const& r) const { return id() != r.id(); }
UID id() const { return nodeID; }
UID id() const { return nodeID; } //cmd.getEndpoint().token;
NetworkAddress address() const { return setRole.getEndpoint().addresses.address; }
NetworkAddress address() const { return heartbeat.getEndpoint().addresses.address; }
void initEndpoints() {
heartbeat.getEndpoint( TaskClusterController );
setRole.getEndpoint( TaskClusterController );// Q: Why do we need this?
sampleRangeFile.getEndpoint( TaskClusterController );
sampleLogFile.getEndpoint( TaskClusterController );
sendSampleMutationVector.getEndpoint( TaskClusterController );
calculateApplierKeyRange.getEndpoint( TaskClusterController );
getApplierKeyRangeRequest.getEndpoint( TaskClusterController );
setApplierKeyRangeRequest.getEndpoint( TaskClusterController );
setApplierKeyRangeVectorRequest.getEndpoint( TaskClusterController );
loadRangeFile.getEndpoint( TaskClusterController );
loadLogFile.getEndpoint( TaskClusterController );
sendMutationVector.getEndpoint( TaskClusterController );
applyToDB.getEndpoint( TaskClusterController );
initVersionBatch.getEndpoint( TaskClusterController );
setWorkerInterface.getEndpoint( TaskClusterController );
collectRestoreRoleInterfaces.getEndpoint( TaskClusterController );
finishRestore.getEndpoint( TaskClusterController );
nodeID = g_random->randomUniqueID();
@ -179,10 +144,73 @@ struct RestoreInterface {
template <class Ar>
void serialize( Ar& ar ) {
serializer(ar, nodeID, heartbeat, setRole, sampleRangeFile, sampleLogFile, sendSampleMutationVector,
calculateApplierKeyRange, getApplierKeyRangeRequest, setApplierKeyRangeRequest, setApplierKeyRangeVectorRequest,
loadRangeFile, loadLogFile, sendMutationVector, applyToDB, initVersionBatch, setWorkerInterface,
finishRestore);
serializer(ar, nodeID, heartbeat, sampleRangeFile, sampleLogFile,
setApplierKeyRangeVectorRequest, loadRangeFile, loadLogFile,
initVersionBatch, collectRestoreRoleInterfaces, finishRestore);
}
};
struct RestoreApplierInterface : RestoreRoleInterface {
public:
UID nodeID;
RequestStream<RestoreSimpleRequest> heartbeat;
RequestStream<RestoreCalculateApplierKeyRangeRequest> calculateApplierKeyRange;
RequestStream<RestoreGetApplierKeyRangeRequest> getApplierKeyRangeRequest;
RequestStream<RestoreSetApplierKeyRangeRequest> setApplierKeyRangeRequest;
RequestStream<RestoreSendMutationVectorRequest> sendSampleMutationVector;
RequestStream<RestoreSendMutationVectorRequest> sendMutationVector;
RequestStream<RestoreSimpleRequest> applyToDB;
RequestStream<RestoreVersionBatchRequest> initVersionBatch;
RequestStream<RestoreSimpleRequest> collectRestoreRoleInterfaces;
RequestStream<RestoreSimpleRequest> finishRestore;
bool operator == (RestoreWorkerInterface const& r) const { return id() == r.id(); }
bool operator != (RestoreWorkerInterface const& r) const { return id() != r.id(); }
UID id() const { return nodeID; }
NetworkAddress address() const { return heartbeat.getEndpoint().addresses.address; }
void initEndpoints() {
heartbeat.getEndpoint( TaskClusterController );
calculateApplierKeyRange.getEndpoint( TaskClusterController );
getApplierKeyRangeRequest.getEndpoint( TaskClusterController );
setApplierKeyRangeRequest.getEndpoint( TaskClusterController );
sendSampleMutationVector.getEndpoint( TaskClusterController );
sendMutationVector.getEndpoint( TaskClusterController );
applyToDB.getEndpoint( TaskClusterController );
initVersionBatch.getEndpoint( TaskClusterController );
collectRestoreRoleInterfaces.getEndpoint( TaskClusterController );
finishRestore.getEndpoint( TaskClusterController );
nodeID = g_random->randomUniqueID();
}
template <class Ar>
void serialize( Ar& ar ) {
serializer(ar, nodeID, heartbeat, calculateApplierKeyRange,
getApplierKeyRangeRequest, setApplierKeyRangeRequest,
sendSampleMutationVector, sendMutationVector,
applyToDB, initVersionBatch, collectRestoreRoleInterfaces, finishRestore);
}
std::string toString() {
return nodeID.toString();
}
};
@ -215,21 +243,26 @@ struct LoadingParam {
};
struct RestoreSetRoleRequest : TimedRequest {
struct RestoreRecruitRoleRequest : TimedRequest {
CMDUID cmdID;
RestoreRole role;
int nodeIndex;
UID masterApplierID;
int nodeIndex; // Each role is a node
ReplyPromise<RestoreCommonReply> reply;
RestoreSetRoleRequest() : cmdID(CMDUID()), role(RestoreRole::Invalid) {}
explicit RestoreSetRoleRequest(CMDUID cmdID, RestoreRole role, int nodeIndex, UID masterApplierID) :
cmdID(cmdID), role(role), nodeIndex(nodeIndex), masterApplierID(masterApplierID) {}
RestoreRecruitRoleRequest() : cmdID(CMDUID()), role(RestoreRole::Invalid) {}
explicit RestoreRecruitRoleRequest(CMDUID cmdID, RestoreRole role, int nodeIndex) :
cmdID(cmdID), role(role), nodeIndex(nodeIndex){}
template <class Ar>
void serialize( Ar& ar ) {
serializer(ar, cmdID, role, nodeIndex, masterApplierID, reply);
serializer(ar, cmdID, role, nodeIndex, reply);
}
std::string printable() {
std::stringstream ss;
ss << "CMDID:" << cmdID.toString() << " Role:" << getRoleStr(role) << " NodeIndex:" << nodeIndex;
return ss.str();
}
};
@ -265,20 +298,6 @@ struct RestoreSendMutationVectorRequest : TimedRequest {
}
};
// CalculateApplierKeyRange, applyToDB
struct RestoreSimpleRequest : TimedRequest {
CMDUID cmdID;
ReplyPromise<RestoreCommonReply> reply;
RestoreSimpleRequest() : cmdID(CMDUID()) {}
explicit RestoreSimpleRequest(CMDUID cmdID) : cmdID(cmdID) {}
template <class Ar>
void serialize( Ar& ar ) {
serializer(ar, cmdID, reply);
}
};
struct RestoreCalculateApplierKeyRangeRequest : TimedRequest {
CMDUID cmdID;
@ -358,28 +377,6 @@ struct RestoreSetApplierKeyRangeVectorRequest : TimedRequest {
}
};
// Reply type
struct RestoreCommonReply {
UID id; // unique ID of the server who sends the reply
CMDUID cmdID; // The restore command for the reply
RestoreCommonReply() : id(UID()), cmdID(CMDUID()) {}
explicit RestoreCommonReply(UID id, CMDUID cmdID) : id(id), cmdID(cmdID) {}
std::string toString() const {
std::stringstream ss;
ss << "ServerNodeID:" << id.toString() << " CMDID:" << cmdID.toString();
return ss.str();
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, id, cmdID);
}
};
struct GetKeyRangeReply : RestoreCommonReply {
int index;
Standalone<KeyRef> lowerBound; // inclusive

View File

@ -53,7 +53,10 @@
<ActorCompiler Include="workloads\SaveAndKill.actor.cpp" />
<ActorCompiler Include="Resolver.actor.cpp" />
<ActorCompiler Include="Restore.actor.cpp" />
<ActorCompiler Include="RestoreUtil.actor.cpp" />
<ActorCompiler Include="RestoreCommon.actor.cpp" />
<ActorCompiler Include="RestoreRoleCommon.actor.cpp" />
<ActorCompiler Include="RestoreMaster.actor.cpp" />
<ActorCompiler Include="RestoreLoader.actor.cpp" />
<ActorCompiler Include="RestoreApplier.actor.cpp" />
<ActorCompiler Include="LogSystemDiskQueueAdapter.actor.cpp" />
@ -199,7 +202,13 @@
<ClInclude Include="RatekeeperInterface.h" />
<ClInclude Include="RecoveryState.h" />
<ClInclude Include="ResolverInterface.h" />
<ClInclude Include="RestoreWorkerInterface.h" />
<ClInclude Include="RestoreUtil.h" />
<ActorCompiler Include="RestoreRoleCommon.actor.h">
<EnableCompile>false</EnableCompile>
</ActorCompiler>
<ActorCompiler Include="RestoreMaster.actor.h">
<EnableCompile>false</EnableCompile>
</ActorCompiler>
<ActorCompiler Include="RestoreLoader.actor.h">
<EnableCompile>false</EnableCompile>
</ActorCompiler>
@ -209,6 +218,7 @@
<ActorCompiler Include="RestoreCommon.actor.h">
<EnableCompile>false</EnableCompile>
</ActorCompiler>
<ClInclude Include="RestoreWorkerInterface.h" />
<ClInclude Include="ServerDBInfo.h" />
<ClInclude Include="SimulatedCluster.h" />
<ClInclude Include="sqlite\btree.h" />