FastRestore:Filter out empty files before distributing workload

and clean up unused code
This commit is contained in:
Meng Xu 2020-01-07 15:24:38 -08:00
parent c29e380076
commit a2b26906e8
3 changed files with 8 additions and 24 deletions

View File

@ -246,16 +246,12 @@ struct RestoreAsset {
}
};
// TODO: It is probably better to specify the (beginVersion, endVersion] for each loadingParam.
// beginVersion (endVersion) is the version the applier is before (after) it receives the request.
struct LoadingParam {
constexpr static FileIdentifier file_identifier = 17023837;
bool isRangeFile;
Key url;
//Version prevVersion;
Version rangeVersion; // range file's version
// Version endVersion; // range file's mutations are all at the endVersion
int64_t blockSize;
RestoreAsset asset;
@ -271,14 +267,11 @@ struct LoadingParam {
template <class Ar>
void serialize(Ar& ar) {
// serializer(ar, isRangeFile, url, prevVersion, endVersion, blockSize, asset);
serializer(ar, isRangeFile, url, rangeVersion, blockSize, asset);
}
std::string toString() {
std::stringstream str;
// str << "isRangeFile:" << isRangeFile << " url:" << url.toString() << " prevVersion:" << prevVersion
// << " endVersion:" << endVersion << " blockSize:" << blockSize << " RestoreAsset:" << asset.toString();
str << "isRangeFile:" << isRangeFile << " url:" << url.toString()
<< " rangeVersion:" << rangeVersion << " blockSize:" << blockSize << " RestoreAsset:" << asset.toString();
return str.str();

View File

@ -273,9 +273,6 @@ ACTOR static Future<Void> loadFilesOnLoaders(Reference<RestoreMasterData> self,
std::vector<std::pair<UID, RestoreLoadFileRequest>> requests;
std::map<UID, RestoreLoaderInterface>::iterator loader = self->loadersInterf.begin();
// TODO: Remove files that are empty before proceed
// ASSERT(files->size() > 0); // files should not be empty
Version prevVersion = 0;
for (auto& file : *files) {
// NOTE: Cannot skip empty files because empty files, e.g., log file, still need to generate dummy mutation to
@ -285,9 +282,6 @@ ACTOR static Future<Void> loadFilesOnLoaders(Reference<RestoreMasterData> self,
}
// Prepare loading
LoadingParam param;
//param.prevVersion = 0; // Each file's NotifiedVersion starts from 0
// param.endVersion = file.isRange ? file.version : file.endVersion;
param.url = request.url;
param.isRangeFile = file.isRange;
param.rangeVersion = file.isRange ? file.version : -1;
@ -304,9 +298,8 @@ ACTOR static Future<Void> loadFilesOnLoaders(Reference<RestoreMasterData> self,
prevVersion = param.asset.endVersion;
// Log file to be loaded
TraceEvent("FastRestore").detail("LoadParam", param.toString()).detail("LoaderID", loader->first.toString());
ASSERT_WE_THINK(param.asset.len >= 0); // we may load an empty file
ASSERT_WE_THINK(param.asset.len > 0); // TODO: ensure empty files are not included here
ASSERT_WE_THINK(param.asset.offset >= 0);
ASSERT_WE_THINK(param.asset.offset <= file.fileSize);
ASSERT_WE_THINK(param.asset.beginVersion <= param.asset.endVersion);

View File

@ -127,7 +127,7 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMaste
// Input: Get the size of data in backup files in version range [prevVersion, nextVersion)
// Return: param1: the size of data at nextVersion, param2: the minimum range file index whose version >
// nextVersion, param3: log files with data in [prevVersion, nextVersion]
// nextVersion, param3: log files with data in [prevVersion, nextVersion)
std::tuple<double, int, std::vector<RestoreFileFR>> getVersionSize(Version prevVersion, Version nextVersion,
const std::vector<RestoreFileFR>& rangeFiles,
int rangeIdx,
@ -170,9 +170,9 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMaste
// Split backup files into version batches, each of which has similar data size
// Input: sorted range files, sorted log files;
// Output: a set of version batches whose size is less than opConfig.batchSizeThreshold
// and each mutation data in backup files is included in the version batches exactly once
// Assumption 1: input files has no empty files
// Assumption 2: range files at one version > batchSizeThreshold
// and each mutation data in backup files is included in the version batches exactly once.
// Assumption 1: input files has no empty files;
// Assumption 2: range files at one version > batchSizeThreshold.
void buildVersionBatches(const std::vector<RestoreFileFR>& rangeFiles, const std::vector<RestoreFileFR>& logFiles,
std::map<Version, VersionBatch>* versionBatches) {
// Version batch range [beginVersion, endVersion)
@ -204,9 +204,9 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMaste
}
} else {
TraceEvent(SevError, "FastRestoreBuildVersionBatch")
.detail("RangeIdx", rangeIdx)
.detail("RangeIndex", rangeIdx)
.detail("RangeFiles", rangeFiles.size())
.detail("LogIdx", logIdx)
.detail("LogIndex", logIdx)
.detail("LogFiles", logFiles.size());
}
} else {
@ -257,8 +257,7 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMaste
if (batchSize < 1) {
// [vb.endVersion, nextVersion) > opConfig.batchSizeThreshold. We should split the version range
if (prevEndVersion >= nextVersion) {
// If range files at one version > batchSizeThreshold, DBA should increase the
// batchSizeThreshold
// If range files at one version > batchSizeThreshold, DBA should increase batchSizeThreshold
TraceEvent(SevError, "FastRestoreBuildVersionBatch")
.detail("NextVersion", nextVersion)
.detail("PrevEndVersion", prevEndVersion)
@ -273,7 +272,6 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMaste
continue;
}
// Finalize the current version batch
// vb.endVersion = nextVersion;
vb.size = batchSize;
versionBatches->emplace(vb.beginVersion, vb); // copy vb to versionBatch
// start finding the next version batch