Filter partitioned logs with subset relationship
If a log file's progress is not saved, a new log file will be generated with the same begin version. Then we can have a file that contains a subset of contents in another log file. During restore, we should filter out files that their contents are subset of other files.
This commit is contained in:
parent
696ce6aa82
commit
20df67ee6a
|
@ -81,10 +81,10 @@ std::vector<LogFile> getRelevantLogFiles(const std::vector<LogFile>& files, Vers
|
|||
std::vector<LogFile> sorted;
|
||||
int i = 0;
|
||||
for (int j = 1; j < filtered.size(); j++) {
|
||||
if (!filtered[i].sameContent(filtered[j])) {
|
||||
if (!filtered[i].isSubset(filtered[j])) {
|
||||
sorted.push_back(filtered[i]);
|
||||
i = j;
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
if (i < filtered.size()) {
|
||||
sorted.push_back(filtered[i]);
|
||||
|
|
|
@ -1155,16 +1155,19 @@ std::cout << "lastBegin " << lastBegin << ", lastEnd " << lastEnd << ", end " <<
|
|||
return true;
|
||||
}
|
||||
|
||||
// Returns log files that are not duplicated.
|
||||
// Returns log files that are not duplicated, or subset of another log.
|
||||
// If a log file's progress is not saved, a new log file will be generated
|
||||
// with the same begin version. So we can have a file that contains a subset
|
||||
// of contents in another log file.
|
||||
// PRE-CONDITION: logs are already sorted.
|
||||
static std::vector<LogFile> filterDuplicates(const std::vector<LogFile>& logs) {
|
||||
std::vector<LogFile> filtered;
|
||||
int i = 0;
|
||||
for (int j = 1; j < logs.size(); j++) {
|
||||
if (!logs[i].sameContent(logs[j])) {
|
||||
if (!logs[i].isSubset(logs[j])) {
|
||||
filtered.push_back(logs[i]);
|
||||
i = j;
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
if (i < logs.size()) filtered.push_back(logs[i]);
|
||||
return filtered;
|
||||
|
@ -1180,7 +1183,7 @@ std::cout << "lastBegin " << lastBegin << ", lastEnd " << lastEnd << ", end " <<
|
|||
Version end = getPartitionedLogsContinuousEndVersion(logs, file.beginVersion);
|
||||
std::cout << " determine " << file.toString() << " , end " << end << "\n\n";
|
||||
if (end > file.beginVersion) {
|
||||
desc->minLogBegin = file.beginVersion;
|
||||
// desc->minLogBegin = file.beginVersion;
|
||||
// contiguousLogEnd is not inclusive, so +1 here.
|
||||
desc->contiguousLogEnd.get() = end + 1;
|
||||
return;
|
||||
|
|
|
@ -82,9 +82,10 @@ struct LogFile {
|
|||
return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
|
||||
}
|
||||
|
||||
// Returns if two log files have the same content by comparing version range and tag ID.
|
||||
bool sameContent(const LogFile& rhs) const {
|
||||
return beginVersion == rhs.beginVersion && endVersion == rhs.endVersion && tagId == rhs.tagId;
|
||||
// Returns if this log file contains a subset of content of the given file
|
||||
// by comparing version range and tag ID.
|
||||
bool isSubset(const LogFile& rhs) const {
|
||||
return beginVersion == rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId;
|
||||
}
|
||||
|
||||
std::string toString() const {
|
||||
|
|
Loading…
Reference in New Issue