Fix duplicate file removal for subset version ranges
Partitioned logs can have strict subset version ranges, which was not properly handled -- we used to assume overlapping only happens for the same begin version.
This commit is contained in:
parent
1a1f572f29
commit
4c75c61f39
|
@ -1156,11 +1156,13 @@ public:
|
||||||
// If a log file's progress is not saved, a new log file will be generated
|
// If a log file's progress is not saved, a new log file will be generated
|
||||||
// with the same begin version. So we can have a file that contains a subset
|
// with the same begin version. So we can have a file that contains a subset
|
||||||
// of contents in another log file.
|
// of contents in another log file.
|
||||||
// PRE-CONDITION: logs are already sorted.
|
// PRE-CONDITION: logs are already sorted by (tagId, beginVersion, endVersion).
|
||||||
static std::vector<LogFile> filterDuplicates(const std::vector<LogFile>& logs) {
|
static std::vector<LogFile> filterDuplicates(const std::vector<LogFile>& logs) {
|
||||||
std::vector<LogFile> filtered;
|
std::vector<LogFile> filtered;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (int j = 1; j < logs.size(); j++) {
|
for (int j = 1; j < logs.size(); j++) {
|
||||||
|
if (logs[j].isSubset(logs[i])) continue;
|
||||||
|
|
||||||
if (!logs[i].isSubset(logs[j])) {
|
if (!logs[i].isSubset(logs[j])) {
|
||||||
filtered.push_back(logs[i]);
|
filtered.push_back(logs[i]);
|
||||||
}
|
}
|
||||||
|
@ -1196,9 +1198,13 @@ public:
|
||||||
for (int i = 0; i < logs.size(); i++) {
|
for (int i = 0; i < logs.size(); i++) {
|
||||||
ASSERT(logs[i].tagId >= 0 && logs[i].tagId < logs[i].totalTags);
|
ASSERT(logs[i].tagId >= 0 && logs[i].tagId < logs[i].totalTags);
|
||||||
auto& indices = tagIndices[logs[i].tagId];
|
auto& indices = tagIndices[logs[i].tagId];
|
||||||
// filter out if indices.back() is subset of files[i]
|
// filter out if indices.back() is subset of files[i] or vice versa
|
||||||
if (!indices.empty() && logs[indices.back()].isSubset(logs[i])) {
|
if (!indices.empty()) {
|
||||||
indices.back() = i;
|
if (logs[indices.back()].isSubset(logs[i])) {
|
||||||
|
indices.back() = i;
|
||||||
|
} else if (!logs[i].isSubset(logs[indices.back()])) {
|
||||||
|
indices.push_back(i);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
indices.push_back(i);
|
indices.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,7 +85,7 @@ struct LogFile {
|
||||||
// Returns if this log file contains a subset of content of the given file
|
// Returns if this log file contains a subset of content of the given file
|
||||||
// by comparing version range and tag ID.
|
// by comparing version range and tag ID.
|
||||||
bool isSubset(const LogFile& rhs) const {
|
bool isSubset(const LogFile& rhs) const {
|
||||||
return beginVersion == rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId;
|
return beginVersion >= rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string toString() const {
|
std::string toString() const {
|
||||||
|
|
Loading…
Reference in New Issue