Restore kv (#9030)
* Allow multiple keyranges in CheckpointRequest. Include DataMove ID in CheckpointMetaData. * Use UID dataMoveId instead of Optional<UID>. * Implemented ShardedRocks::checkpoint(). * Implementing createCheckpoint(). * Attempted to change getCheckpointMetaData*() for a single keyrange. * Added getCheckpointMetaDataForRange. * Minor fixes for NativeAPI.actor.cpp. * Replace UID CheckpointMetaData::ssId with std::vector<UID> CheckpointMetaData::src; * Implemented getCheckpointMetaData() and completed checkpoint creation and fetch in test. * Refactoring CheckpointRequest and CheckpointMetaData rename `dataMoveId` as `actionId` and make it Optional. * Fixed ctor of CheckpointMetaData. * Implemented ShardedRocksDB::restore(). * Tested checkpoint restore, and added range check for restore, so that the target ranges can be a subset of the checkpoint ranges. * Added test to partially restore a checkpoint. * Refactor: added checkpointRestore(). * Sort ranges for comparison. * Cleanups. * Check restore ranges are empty; Add ranges in main thread. * Resolved comments. * Fixed GetCheckpointMetaData range check issue. * Refactor CheckpointReader for CF checkpoint. * Added CheckpointAsKeyValues as a parameter for newCheckpointReader. * PhysicalShard::restoreKvs(). * Added `ranges` in fetchCheckpoint. * Added RocksDBCheckpointKeyValues::ranges. * Added ICheckpointIterator and implemented for RocksDBCheckpointReader. * Refactored OpenAction for CheckpointReader, handled failure cases. * Use RocksDBCheckpointIterator::end() in readRange. * Set CheckpointReader timout and other Rocks read options. * Implementing fetchCheckpointRange(). * Added more CheckpointReader tests. * Cleanup. * More cleanup. * Added fetchCheckpointRanges test. * Implemented restore of kv-based checkpoint. * Improved CheckpointRestore test for kv, non-kv, as well as partial restore. * Added test of merge. * Fixed merge test. * Cleanup. * Resolved comments. Co-authored-by: He Liu <heliu@apple.com>
This commit is contained in:
parent
827c3b63ce
commit
ec6716ff2e
|
@ -8794,32 +8794,39 @@ static Future<Void> createCheckpointImpl(T tr,
|
||||||
ASSERT(actionId.present());
|
ASSERT(actionId.present());
|
||||||
TraceEvent(SevDebug, "CreateCheckpointTransactionBegin").detail("Ranges", describe(ranges));
|
TraceEvent(SevDebug, "CreateCheckpointTransactionBegin").detail("Ranges", describe(ranges));
|
||||||
|
|
||||||
// Only the first range is used to look up the location, since we assume all ranges are hosted by a single shard.
|
|
||||||
// The operation will fail on the storage server otherwise.
|
|
||||||
state RangeResult keyServers = wait(krmGetRanges(tr, keyServersPrefix, ranges[0]));
|
|
||||||
ASSERT(!keyServers.more);
|
|
||||||
|
|
||||||
state RangeResult UIDtoTagMap = wait(tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
state RangeResult UIDtoTagMap = wait(tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||||
ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
|
ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
|
||||||
|
|
||||||
if (format == DataMoveRocksCF) {
|
state std::unordered_map<UID, std::vector<KeyRange>> rangeMap;
|
||||||
|
state std::unordered_map<UID, std::vector<UID>> srcMap;
|
||||||
|
for (const auto& range : ranges) {
|
||||||
|
RangeResult keyServers = wait(krmGetRanges(tr, keyServersPrefix, range));
|
||||||
|
ASSERT(!keyServers.more);
|
||||||
|
for (int i = 0; i < keyServers.size() - 1; ++i) {
|
||||||
|
const KeyRangeRef currentRange(keyServers[i].key, keyServers[i + 1].key);
|
||||||
std::vector<UID> src;
|
std::vector<UID> src;
|
||||||
std::vector<UID> dest;
|
std::vector<UID> dest;
|
||||||
UID srcId;
|
UID srcId;
|
||||||
UID destId;
|
UID destId;
|
||||||
decodeKeyServersValue(UIDtoTagMap, keyServers[0].value, src, dest, srcId, destId);
|
decodeKeyServersValue(UIDtoTagMap, keyServers[i].value, src, dest, srcId, destId);
|
||||||
|
rangeMap[srcId].push_back(currentRange);
|
||||||
|
srcMap.emplace(srcId, src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (format == DataMoveRocksCF) {
|
||||||
|
for (const auto& [srcId, ranges] : rangeMap) {
|
||||||
// The checkpoint request is sent to all replicas, in case any of them is unhealthy.
|
// The checkpoint request is sent to all replicas, in case any of them is unhealthy.
|
||||||
// An alternative is to choose a healthy replica.
|
// An alternative is to choose a healthy replica.
|
||||||
const UID checkpointID = UID(srcId.first(), deterministicRandom()->randomUInt64());
|
const UID checkpointID = UID(srcId.first(), deterministicRandom()->randomUInt64());
|
||||||
CheckpointMetaData checkpoint(ranges, format, src, checkpointID, actionId.get());
|
CheckpointMetaData checkpoint(ranges, format, srcMap[srcId], checkpointID, actionId.get());
|
||||||
checkpoint.setState(CheckpointMetaData::Pending);
|
checkpoint.setState(CheckpointMetaData::Pending);
|
||||||
tr->set(checkpointKeyFor(checkpointID), checkpointValue(checkpoint));
|
tr->set(checkpointKeyFor(checkpointID), checkpointValue(checkpoint));
|
||||||
|
|
||||||
TraceEvent(SevDebug, "CreateCheckpointTransactionShard")
|
TraceEvent(SevDebug, "CreateCheckpointTransactionShard")
|
||||||
.detail("CheckpointKey", checkpointKeyFor(checkpointID))
|
.detail("CheckpointKey", checkpointKeyFor(checkpointID))
|
||||||
.detail("CheckpointMetaData", checkpoint.toString())
|
.detail("CheckpointMetaData", checkpoint.toString());
|
||||||
.detail("ReadVersion", tr->getReadVersion().get());
|
}
|
||||||
} else {
|
} else {
|
||||||
throw not_implemented();
|
throw not_implemented();
|
||||||
}
|
}
|
||||||
|
@ -8975,6 +8982,7 @@ ACTOR Future<std::vector<CheckpointMetaData>> getCheckpointMetaData(Database cx,
|
||||||
double timeout) {
|
double timeout) {
|
||||||
state std::vector<Future<std::vector<CheckpointMetaData>>> futures;
|
state std::vector<Future<std::vector<CheckpointMetaData>>> futures;
|
||||||
|
|
||||||
|
// TODO(heliu): Avoid send requests to the same shard.
|
||||||
for (const auto& range : ranges) {
|
for (const auto& range : ranges) {
|
||||||
futures.push_back(getCheckpointMetaDataForRange(cx, range, version, format, actionId, timeout));
|
futures.push_back(getCheckpointMetaDataForRange(cx, range, version, format, actionId, timeout));
|
||||||
}
|
}
|
||||||
|
|
|
@ -545,18 +545,55 @@ struct PhysicalShard {
|
||||||
|
|
||||||
// Restore from the checkpoint.
|
// Restore from the checkpoint.
|
||||||
rocksdb::Status restore(const CheckpointMetaData& checkpoint) {
|
rocksdb::Status restore(const CheckpointMetaData& checkpoint) {
|
||||||
|
const CheckpointFormat format = checkpoint.getFormat();
|
||||||
|
rocksdb::Status status;
|
||||||
|
if (format == DataMoveRocksCF) {
|
||||||
rocksdb::ExportImportFilesMetaData metaData = getMetaData(checkpoint);
|
rocksdb::ExportImportFilesMetaData metaData = getMetaData(checkpoint);
|
||||||
rocksdb::ImportColumnFamilyOptions importOptions;
|
rocksdb::ImportColumnFamilyOptions importOptions;
|
||||||
importOptions.move_files = true;
|
importOptions.move_files = true;
|
||||||
rocksdb::Status status = db->CreateColumnFamilyWithImport(getCFOptions(), id, importOptions, metaData, &cf);
|
status = db->CreateColumnFamilyWithImport(getCFOptions(), id, importOptions, metaData, &cf);
|
||||||
|
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
logRocksDBError(status, "Restore");
|
logRocksDBError(status, "RocksImportColumnFamily");
|
||||||
return status;
|
}
|
||||||
|
} else if (format == RocksDBKeyValues) {
|
||||||
|
std::vector<std::string> sstFiles;
|
||||||
|
const RocksDBCheckpointKeyValues rcp = getRocksKeyValuesCheckpoint(checkpoint);
|
||||||
|
for (const auto& file : rcp.fetchedFiles) {
|
||||||
|
TraceEvent(SevDebug, "RocksDBRestoreFile")
|
||||||
|
.detail("Shard", id)
|
||||||
|
.detail("CheckpointID", checkpoint.checkpointID)
|
||||||
|
.detail("File", file.toString());
|
||||||
|
sstFiles.push_back(file.path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!sstFiles.empty()) {
|
||||||
|
ASSERT(cf != nullptr);
|
||||||
|
rocksdb::IngestExternalFileOptions ingestOptions;
|
||||||
|
ingestOptions.move_files = true;
|
||||||
|
ingestOptions.write_global_seqno = false;
|
||||||
|
ingestOptions.verify_checksums_before_ingest = true;
|
||||||
|
status = db->IngestExternalFile(cf, sstFiles, ingestOptions);
|
||||||
|
if (!status.ok()) {
|
||||||
|
logRocksDBError(status, "RocksIngestExternalFile");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
TraceEvent(SevWarn, "RocksDBServeRestoreEmptyRange")
|
||||||
|
.detail("Shard", id)
|
||||||
|
.detail("RocksKeyValuesCheckpoint", rcp.toString())
|
||||||
|
.detail("Checkpoint", checkpoint.toString());
|
||||||
|
}
|
||||||
|
TraceEvent(SevInfo, "RocksDBServeRestoreEnd")
|
||||||
|
.detail("Shard", id)
|
||||||
|
.detail("Checkpoint", checkpoint.toString());
|
||||||
|
} else {
|
||||||
|
throw not_implemented();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status.ok() && !this->isInitialized) {
|
||||||
readIterPool = std::make_shared<ReadIteratorPool>(db, cf, id);
|
readIterPool = std::make_shared<ReadIteratorPool>(db, cf, id);
|
||||||
this->isInitialized.store(true);
|
this->isInitialized.store(true);
|
||||||
|
}
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2236,6 +2273,9 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||||
|
|
||||||
rocksdb::Status status;
|
rocksdb::Status status;
|
||||||
rocksdb::WriteBatch writeBatch;
|
rocksdb::WriteBatch writeBatch;
|
||||||
|
rocksdb::WriteOptions options;
|
||||||
|
options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
|
||||||
|
|
||||||
if (format == DataMoveRocksCF) {
|
if (format == DataMoveRocksCF) {
|
||||||
CheckpointMetaData& checkpoint = a.checkpoints.front();
|
CheckpointMetaData& checkpoint = a.checkpoints.front();
|
||||||
std::sort(a.ranges.begin(), a.ranges.end(), KeyRangeRef::ArbitraryOrder());
|
std::sort(a.ranges.begin(), a.ranges.end(), KeyRangeRef::ArbitraryOrder());
|
||||||
|
@ -2293,8 +2333,92 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (format == RocksDBKeyValues) {
|
} else if (format == RocksDBKeyValues) {
|
||||||
a.done.sendError(not_implemented());
|
// Make sure the files are complete for the desired ranges.
|
||||||
|
std::vector<KeyRange> fetchedRanges;
|
||||||
|
std::vector<KeyRange> intendedRanges(a.ranges.begin(), a.ranges.end());
|
||||||
|
std::vector<RocksDBCheckpointKeyValues> rkvs;
|
||||||
|
for (const auto& checkpoint : a.checkpoints) {
|
||||||
|
rkvs.push_back(getRocksKeyValuesCheckpoint(checkpoint));
|
||||||
|
for (const auto& file : rkvs.back().fetchedFiles) {
|
||||||
|
fetchedRanges.push_back(file.range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Verify that the collective fetchedRanges is the same as the collective intendedRanges.
|
||||||
|
std::sort(fetchedRanges.begin(), fetchedRanges.end(), KeyRangeRef::ArbitraryOrder());
|
||||||
|
std::sort(intendedRanges.begin(), intendedRanges.end(), KeyRangeRef::ArbitraryOrder());
|
||||||
|
int i = 0, j = 0;
|
||||||
|
while (i < fetchedRanges.size() && j < intendedRanges.size()) {
|
||||||
|
if (fetchedRanges[i].begin != intendedRanges[j].begin) {
|
||||||
|
break;
|
||||||
|
} else if (fetchedRanges[i] == intendedRanges[j]) {
|
||||||
|
++i;
|
||||||
|
++j;
|
||||||
|
} else if (fetchedRanges[i].contains(intendedRanges[j])) {
|
||||||
|
fetchedRanges[i] = KeyRangeRef(intendedRanges[j].end, fetchedRanges[i].end);
|
||||||
|
++j;
|
||||||
|
} else if (intendedRanges[j].contains(fetchedRanges[i])) {
|
||||||
|
intendedRanges[j] = KeyRangeRef(fetchedRanges[i].end, intendedRanges[j].end);
|
||||||
|
++i;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i != fetchedRanges.size() || j != intendedRanges.size()) {
|
||||||
|
TraceEvent(SevError, "ShardedRocksDBRestoreFailed", logId)
|
||||||
|
.detail("Reason", "RestoreFilesRangesMismatch")
|
||||||
|
.detail("Ranges", describe(a.ranges))
|
||||||
|
.detail("FetchedFiles", describe(rkvs));
|
||||||
|
a.done.sendError(failed_to_restore_checkpoint());
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ps->initialized()) {
|
||||||
|
TraceEvent(SevDebug, "ShardedRocksRestoreInitPS", logId)
|
||||||
|
.detail("Path", a.path)
|
||||||
|
.detail("Checkpoints", describe(a.checkpoints))
|
||||||
|
.detail("PhysicalShard", ps->toString());
|
||||||
|
status = ps->init();
|
||||||
|
}
|
||||||
|
if (!status.ok()) {
|
||||||
|
logRocksDBError(status, "RestoreInitPhysicalShard");
|
||||||
|
a.done.sendError(statusToError(status));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& checkpoint : a.checkpoints) {
|
||||||
|
status = ps->restore(checkpoint);
|
||||||
|
if (!status.ok()) {
|
||||||
|
TraceEvent(SevWarnAlways, "ShardedRocksIngestFileError", logId)
|
||||||
|
.detail("Error", status.ToString())
|
||||||
|
.detail("Path", a.path)
|
||||||
|
.detail("Checkpoint", checkpoint.toString())
|
||||||
|
.detail("PhysicalShard", ps->toString());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!status.ok()) {
|
||||||
|
logRocksDBError(status, "RestoreIngestFile");
|
||||||
|
for (const auto& range : a.ranges) {
|
||||||
|
writeBatch.DeleteRange(ps->cf, toSlice(range.begin), toSlice(range.end));
|
||||||
|
}
|
||||||
|
TraceEvent(SevInfo, "ShardedRocksRevertRestore", logId)
|
||||||
|
.detail("Path", a.path)
|
||||||
|
.detail("Checkpoints", describe(a.checkpoints))
|
||||||
|
.detail("PhysicalShard", ps->toString())
|
||||||
|
.detail("RestoreRanges", describe(a.ranges));
|
||||||
|
|
||||||
|
rocksdb::Status s = a.shardManager->getDb()->Write(options, &writeBatch);
|
||||||
|
if (!s.ok()) {
|
||||||
|
TraceEvent(SevError, "ShardedRocksRevertRestoreError", logId)
|
||||||
|
.detail("Error", s.ToString())
|
||||||
|
.detail("Path", a.path)
|
||||||
|
.detail("PhysicalShard", ps->toString())
|
||||||
|
.detail("RestoreRanges", describe(a.ranges));
|
||||||
|
}
|
||||||
|
a.done.sendError(statusToError(status));
|
||||||
|
return;
|
||||||
|
}
|
||||||
} else if (format == RocksDB) {
|
} else if (format == RocksDB) {
|
||||||
a.done.sendError(not_implemented());
|
a.done.sendError(not_implemented());
|
||||||
return;
|
return;
|
||||||
|
@ -2305,9 +2429,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||||
a.shardManager->populateRangeMappingMutations(&writeBatch, range, /*isAdd=*/true);
|
a.shardManager->populateRangeMappingMutations(&writeBatch, range, /*isAdd=*/true);
|
||||||
}
|
}
|
||||||
|
|
||||||
rocksdb::WriteOptions options;
|
|
||||||
options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
|
|
||||||
|
|
||||||
status = a.shardManager->getDb()->Write(options, &writeBatch);
|
status = a.shardManager->getDb()->Write(options, &writeBatch);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
logRocksDBError(status, "RestorePersistMetaData");
|
logRocksDBError(status, "RestorePersistMetaData");
|
||||||
|
@ -2316,8 +2437,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||||
}
|
}
|
||||||
TraceEvent(SevDebug, "ShardedRocksRestoredMetaDataPersisted", logId)
|
TraceEvent(SevDebug, "ShardedRocksRestoredMetaDataPersisted", logId)
|
||||||
.detail("Path", a.path)
|
.detail("Path", a.path)
|
||||||
.detail("Checkpoints", describe(a.checkpoints))
|
.detail("Checkpoints", describe(a.checkpoints));
|
||||||
.detail("RocksDBCF", getRocksCF(a.checkpoints[0]).toString());
|
|
||||||
a.shardManager->getMetaDataShard()->refreshReadIteratorPool();
|
a.shardManager->getMetaDataShard()->refreshReadIteratorPool();
|
||||||
a.done.send(Void());
|
a.done.send(Void());
|
||||||
TraceEvent(SevInfo, "ShardedRocksDBRestoreEnd", logId)
|
TraceEvent(SevInfo, "ShardedRocksDBRestoreEnd", logId)
|
||||||
|
|
|
@ -856,9 +856,8 @@ ACTOR Future<Void> fetchCheckpointRange(Database cx,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (totalBytes > 0) {
|
if (totalBytes > 0) {
|
||||||
RocksDBCheckpoint rcp = getRocksCheckpoint(*metaData);
|
RocksDBCheckpointKeyValues rcp = getRocksKeyValuesCheckpoint(*metaData);
|
||||||
rcp.fetchedFiles.emplace_back(localFile, range, totalBytes);
|
rcp.fetchedFiles.emplace_back(localFile, range, totalBytes);
|
||||||
rcp.checkpointDir = dir;
|
|
||||||
metaData->serializedCheckpoint = ObjectWriter::toValue(rcp, IncludeVersion());
|
metaData->serializedCheckpoint = ObjectWriter::toValue(rcp, IncludeVersion());
|
||||||
}
|
}
|
||||||
if (!fileExists(localFile)) {
|
if (!fileExists(localFile)) {
|
||||||
|
|
|
@ -783,6 +783,7 @@ public:
|
||||||
|
|
||||||
std::map<Version, std::vector<CheckpointMetaData>> pendingCheckpoints; // Pending checkpoint requests
|
std::map<Version, std::vector<CheckpointMetaData>> pendingCheckpoints; // Pending checkpoint requests
|
||||||
std::unordered_map<UID, CheckpointMetaData> checkpoints; // Existing and deleting checkpoints
|
std::unordered_map<UID, CheckpointMetaData> checkpoints; // Existing and deleting checkpoints
|
||||||
|
std::unordered_map<UID, ICheckpointReader*> liveCheckpointReaders; // Active checkpoint readers
|
||||||
VersionedMap<int64_t, TenantName> tenantMap;
|
VersionedMap<int64_t, TenantName> tenantMap;
|
||||||
std::map<Version, std::vector<PendingNewShard>>
|
std::map<Version, std::vector<PendingNewShard>>
|
||||||
pendingAddRanges; // Pending requests to add ranges to physical shards
|
pendingAddRanges; // Pending requests to add ranges to physical shards
|
||||||
|
@ -2476,9 +2477,15 @@ ACTOR Future<Void> fetchCheckpointKeyValuesQ(StorageServer* self, FetchCheckpoin
|
||||||
}
|
}
|
||||||
|
|
||||||
state ICheckpointReader* reader = nullptr;
|
state ICheckpointReader* reader = nullptr;
|
||||||
|
auto crIt = self->liveCheckpointReaders.find(req.checkpointID);
|
||||||
|
if (crIt != self->liveCheckpointReaders.end()) {
|
||||||
|
reader = crIt->second;
|
||||||
|
} else {
|
||||||
|
reader = newCheckpointReader(it->second, CheckpointAsKeyValues::True, self->thisServerID);
|
||||||
|
}
|
||||||
|
|
||||||
state std::unique_ptr<ICheckpointIterator> iter;
|
state std::unique_ptr<ICheckpointIterator> iter;
|
||||||
try {
|
try {
|
||||||
reader = newCheckpointReader(it->second, CheckpointAsKeyValues::True, self->thisServerID);
|
|
||||||
wait(reader->init(BinaryWriter::toValue(req.range, IncludeVersion())));
|
wait(reader->init(BinaryWriter::toValue(req.range, IncludeVersion())));
|
||||||
iter = reader->getIterator(req.range);
|
iter = reader->getIterator(req.range);
|
||||||
|
|
||||||
|
@ -2525,6 +2532,7 @@ ACTOR Future<Void> fetchCheckpointKeyValuesQ(StorageServer* self, FetchCheckpoin
|
||||||
|
|
||||||
iter.reset();
|
iter.reset();
|
||||||
if (!reader->inUse()) {
|
if (!reader->inUse()) {
|
||||||
|
self->liveCheckpointReaders.erase(req.checkpointID);
|
||||||
wait(reader->close());
|
wait(reader->close());
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
|
|
|
@ -82,6 +82,7 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
{ "TestKeyAB"_sr, "TestValueAB"_sr },
|
{ "TestKeyAB"_sr, "TestValueAB"_sr },
|
||||||
{ "TestKeyAD"_sr, "TestValueAD"_sr },
|
{ "TestKeyAD"_sr, "TestValueAD"_sr },
|
||||||
{ "TestKeyB"_sr, "TestValueB"_sr },
|
{ "TestKeyB"_sr, "TestValueB"_sr },
|
||||||
|
{ "TestKeyBA"_sr, "TestValueBA"_sr },
|
||||||
{ "TestKeyC"_sr, "TestValueC"_sr },
|
{ "TestKeyC"_sr, "TestValueC"_sr },
|
||||||
{ "TestKeyD"_sr, "TestValueD"_sr },
|
{ "TestKeyD"_sr, "TestValueD"_sr },
|
||||||
{ "TestKeyE"_sr, "TestValueE"_sr },
|
{ "TestKeyE"_sr, "TestValueE"_sr },
|
||||||
|
@ -117,12 +118,13 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
teamSize,
|
teamSize,
|
||||||
includes,
|
includes,
|
||||||
excludes)));
|
excludes)));
|
||||||
|
|
||||||
state std::vector<KeyRange> testRanges;
|
|
||||||
testRanges.push_back(KeyRangeRef("TestKeyA"_sr, "TestKeyAC"_sr));
|
|
||||||
wait(self->checkpointRestore(self, cx, testRanges, &kvs));
|
|
||||||
TraceEvent(SevDebug, "TestMovedRange").detail("Range", KeyRangeRef("TestKeyA"_sr, "TestKeyB"_sr));
|
TraceEvent(SevDebug, "TestMovedRange").detail("Range", KeyRangeRef("TestKeyA"_sr, "TestKeyB"_sr));
|
||||||
|
|
||||||
|
state std::vector<KeyRange> checkpointRanges;
|
||||||
|
checkpointRanges.push_back(KeyRangeRef("TestKeyA"_sr, "TestKeyAC"_sr));
|
||||||
|
wait(self->checkpointRestore(self, cx, checkpointRanges, checkpointRanges, CheckpointAsKeyValues::True, &kvs));
|
||||||
|
wait(self->checkpointRestore(self, cx, checkpointRanges, checkpointRanges, CheckpointAsKeyValues::False, &kvs));
|
||||||
|
|
||||||
// Move range [TestKeyD, TestKeyF) to sh0;
|
// Move range [TestKeyD, TestKeyF) to sh0;
|
||||||
includes.insert(teamA.begin(), teamA.end());
|
includes.insert(teamA.begin(), teamA.end());
|
||||||
state std::vector<UID> teamE = wait(self->moveShard(self,
|
state std::vector<UID> teamE = wait(self->moveShard(self,
|
||||||
|
@ -142,13 +144,15 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
wait(self->getStorageServerShards(cx, teamA[teamIdx], KeyRangeRef("TestKeyD"_sr, "TestKeyF"_sr)));
|
wait(self->getStorageServerShards(cx, teamA[teamIdx], KeyRangeRef("TestKeyD"_sr, "TestKeyF"_sr)));
|
||||||
ASSERT(shards.size() == 1);
|
ASSERT(shards.size() == 1);
|
||||||
ASSERT(shards[0].desiredId == sh0);
|
ASSERT(shards[0].desiredId == sh0);
|
||||||
|
ASSERT(shards[0].id == sh0);
|
||||||
TraceEvent("TestStorageServerShards", teamA[teamIdx]).detail("Shards", describe(shards));
|
TraceEvent("TestStorageServerShards", teamA[teamIdx]).detail("Shards", describe(shards));
|
||||||
}
|
}
|
||||||
|
|
||||||
testRanges.clear();
|
checkpointRanges.clear();
|
||||||
testRanges.push_back(KeyRangeRef("TestKeyA"_sr, "TestKeyB"_sr));
|
checkpointRanges.push_back(KeyRangeRef("TestKeyA"_sr, "TestKeyB"_sr));
|
||||||
testRanges.push_back(KeyRangeRef("TestKeyD"_sr, "TestKeyF"_sr));
|
checkpointRanges.push_back(KeyRangeRef("TestKeyD"_sr, "TestKeyE"_sr));
|
||||||
wait(self->checkpointRestore(self, cx, testRanges, &kvs));
|
wait(self->checkpointRestore(self, cx, checkpointRanges, checkpointRanges, CheckpointAsKeyValues::True, &kvs));
|
||||||
|
wait(self->checkpointRestore(self, cx, checkpointRanges, checkpointRanges, CheckpointAsKeyValues::False, &kvs));
|
||||||
|
|
||||||
// Move range [TestKeyB, TestKeyC) to sh1, on the same server.
|
// Move range [TestKeyB, TestKeyC) to sh1, on the same server.
|
||||||
includes.insert(teamA.begin(), teamA.end());
|
includes.insert(teamA.begin(), teamA.end());
|
||||||
|
@ -171,6 +175,14 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
TraceEvent("TestStorageServerShards", teamA[teamIdx]).detail("Shards", describe(shards));
|
TraceEvent("TestStorageServerShards", teamA[teamIdx]).detail("Shards", describe(shards));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkpointRanges.clear();
|
||||||
|
checkpointRanges.push_back(KeyRangeRef("TestKeyA"_sr, "TestKeyB"_sr));
|
||||||
|
checkpointRanges.push_back(KeyRangeRef("TestKeyB"_sr, "TestKeyC"_sr));
|
||||||
|
std::vector<KeyRange> restoreRanges;
|
||||||
|
restoreRanges.push_back(KeyRangeRef("TestKeyA"_sr, "TestKeyB"_sr));
|
||||||
|
restoreRanges.push_back(KeyRangeRef("TestKeyB"_sr, "TestKeyC"_sr));
|
||||||
|
wait(self->checkpointRestore(self, cx, checkpointRanges, restoreRanges, CheckpointAsKeyValues::True, &kvs));
|
||||||
|
|
||||||
state std::vector<UID> teamC = wait(self->moveShard(self,
|
state std::vector<UID> teamC = wait(self->moveShard(self,
|
||||||
cx,
|
cx,
|
||||||
UID(sh2, deterministicRandom()->randomUInt64()),
|
UID(sh2, deterministicRandom()->randomUInt64()),
|
||||||
|
@ -202,11 +214,13 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
|
|
||||||
ACTOR Future<Void> checkpointRestore(PhysicalShardMoveWorkLoad* self,
|
ACTOR Future<Void> checkpointRestore(PhysicalShardMoveWorkLoad* self,
|
||||||
Database cx,
|
Database cx,
|
||||||
std::vector<KeyRange> testRanges,
|
std::vector<KeyRange> checkpointRanges,
|
||||||
|
std::vector<KeyRange> restoreRanges,
|
||||||
|
CheckpointAsKeyValues asKeyValues,
|
||||||
std::map<Key, Value>* kvs) {
|
std::map<Key, Value>* kvs) {
|
||||||
|
|
||||||
// Create checkpoint.
|
// Create checkpoint.
|
||||||
TraceEvent(SevDebug, "TestCreatingCheckpoint").detail("Ranges", describe(testRanges));
|
TraceEvent(SevDebug, "TestCreatingCheckpoint").detail("Ranges", describe(checkpointRanges));
|
||||||
state Transaction tr(cx);
|
state Transaction tr(cx);
|
||||||
state CheckpointFormat format = DataMoveRocksCF;
|
state CheckpointFormat format = DataMoveRocksCF;
|
||||||
state UID dataMoveId = deterministicRandom()->randomUniqueID();
|
state UID dataMoveId = deterministicRandom()->randomUniqueID();
|
||||||
|
@ -216,7 +230,7 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
try {
|
try {
|
||||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
wait(createCheckpoint(&tr, testRanges, format, dataMoveId));
|
wait(createCheckpoint(&tr, checkpointRanges, format, dataMoveId));
|
||||||
wait(tr.commit());
|
wait(tr.commit());
|
||||||
version = tr.getCommittedVersion();
|
version = tr.getCommittedVersion();
|
||||||
break;
|
break;
|
||||||
|
@ -230,9 +244,10 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
loop {
|
loop {
|
||||||
records.clear();
|
records.clear();
|
||||||
try {
|
try {
|
||||||
wait(store(records, getCheckpointMetaData(cx, testRanges, version, format, Optional<UID>(dataMoveId))));
|
wait(store(records,
|
||||||
|
getCheckpointMetaData(cx, checkpointRanges, version, format, Optional<UID>(dataMoveId))));
|
||||||
TraceEvent(SevDebug, "TestCheckpointMetaDataFetched")
|
TraceEvent(SevDebug, "TestCheckpointMetaDataFetched")
|
||||||
.detail("Range", describe(testRanges))
|
.detail("Range", describe(checkpointRanges))
|
||||||
.detail("Version", version)
|
.detail("Version", version)
|
||||||
.detail("Checkpoints", describe(records));
|
.detail("Checkpoints", describe(records));
|
||||||
|
|
||||||
|
@ -240,7 +255,7 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent("TestFetchCheckpointMetadataError")
|
TraceEvent("TestFetchCheckpointMetadataError")
|
||||||
.errorUnsuppressed(e)
|
.errorUnsuppressed(e)
|
||||||
.detail("Range", describe(testRanges))
|
.detail("Range", describe(checkpointRanges))
|
||||||
.detail("Version", version);
|
.detail("Version", version);
|
||||||
|
|
||||||
// The checkpoint was just created, we don't expect this error.
|
// The checkpoint was just created, we don't expect this error.
|
||||||
|
@ -249,17 +264,33 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch checkpoint.
|
// Fetch checkpoint.
|
||||||
state std::string pwd = platform::getWorkingDirectory();
|
state std::string checkpointDir = abspath("checkpoints");
|
||||||
state std::string folder = pwd + "/checkpoints";
|
platform::eraseDirectoryRecursive(checkpointDir);
|
||||||
platform::eraseDirectoryRecursive(folder);
|
ASSERT(platform::createDirectory(checkpointDir));
|
||||||
ASSERT(platform::createDirectory(folder));
|
|
||||||
state std::vector<CheckpointMetaData> fetchedCheckpoints;
|
state std::vector<CheckpointMetaData> fetchedCheckpoints;
|
||||||
state int i = 0;
|
state int i = 0;
|
||||||
for (; i < records.size(); ++i) {
|
for (; i < records.size(); ++i) {
|
||||||
loop {
|
loop {
|
||||||
TraceEvent(SevDebug, "TestFetchingCheckpoint").detail("Checkpoint", records[i].toString());
|
TraceEvent(SevDebug, "TestFetchingCheckpoint").detail("Checkpoint", records[i].toString());
|
||||||
try {
|
try {
|
||||||
CheckpointMetaData record = wait(fetchCheckpoint(cx, records[i], folder));
|
state CheckpointMetaData record;
|
||||||
|
if (asKeyValues) {
|
||||||
|
std::vector<KeyRange> fetchRanges;
|
||||||
|
for (const auto& range : restoreRanges) {
|
||||||
|
for (const auto& cRange : records[i].ranges) {
|
||||||
|
if (cRange.contains(range)) {
|
||||||
|
fetchRanges.push_back(range);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT(!fetchRanges.empty());
|
||||||
|
wait(store(record, fetchCheckpointRanges(cx, records[i], checkpointDir, fetchRanges)));
|
||||||
|
ASSERT(record.getFormat() == RocksDBKeyValues);
|
||||||
|
} else {
|
||||||
|
wait(store(record, fetchCheckpoint(cx, records[i], checkpointDir)));
|
||||||
|
ASSERT(record.getFormat() == format);
|
||||||
|
}
|
||||||
fetchedCheckpoints.push_back(record);
|
fetchedCheckpoints.push_back(record);
|
||||||
TraceEvent(SevDebug, "TestCheckpointFetched").detail("Checkpoint", record.toString());
|
TraceEvent(SevDebug, "TestCheckpointFetched").detail("Checkpoint", record.toString());
|
||||||
break;
|
break;
|
||||||
|
@ -280,7 +311,7 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
rocksDBTestDir, deterministicRandom()->randomUniqueID(), KeyValueStoreType::SSD_SHARDED_ROCKSDB);
|
rocksDBTestDir, deterministicRandom()->randomUniqueID(), KeyValueStoreType::SSD_SHARDED_ROCKSDB);
|
||||||
wait(kvStore->init());
|
wait(kvStore->init());
|
||||||
try {
|
try {
|
||||||
wait(kvStore->restore(shardId, testRanges, fetchedCheckpoints));
|
wait(kvStore->restore(shardId, restoreRanges, fetchedCheckpoints));
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent(SevError, "TestRestoreCheckpointError")
|
TraceEvent(SevError, "TestRestoreCheckpointError")
|
||||||
.errorUnsuppressed(e)
|
.errorUnsuppressed(e)
|
||||||
|
@ -291,6 +322,7 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
|
|
||||||
// Validate the restored kv-store.
|
// Validate the restored kv-store.
|
||||||
RangeResult kvRange = wait(kvStore->readRange(normalKeys));
|
RangeResult kvRange = wait(kvStore->readRange(normalKeys));
|
||||||
|
ASSERT(!kvRange.more);
|
||||||
std::unordered_map<Key, Value> kvsKvs;
|
std::unordered_map<Key, Value> kvsKvs;
|
||||||
for (int i = 0; i < kvRange.size(); ++i) {
|
for (int i = 0; i < kvRange.size(); ++i) {
|
||||||
kvsKvs[kvRange[i].key] = kvRange[i].value;
|
kvsKvs[kvRange[i].key] = kvRange[i].value;
|
||||||
|
@ -305,22 +337,25 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
for (const auto& [key, value] : *kvs) {
|
for (const auto& [key, value] : *kvs) {
|
||||||
|
if (containsKey(restoreRanges, key)) {
|
||||||
|
TraceEvent(SevDebug, "TestExpectKeyValueMatch").detail("Key", key).detail("Value", value);
|
||||||
auto it = kvsKvs.find(key);
|
auto it = kvsKvs.find(key);
|
||||||
if (containsKey(testRanges, key)) {
|
ASSERT(it != kvsKvs.end() && it->second == value);
|
||||||
TraceEvent(SevVerbose, "TestExpectKeyValueMatch").detail("Key", key).detail("Value", value);
|
++count;
|
||||||
ASSERT(it->second == value);
|
|
||||||
} else {
|
|
||||||
TraceEvent(SevVerbose, "TestExpectKeyNotExist").detail("Key", key);
|
|
||||||
ASSERT(it == kvsKvs.end());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT(kvsKvs.size() == count);
|
||||||
|
|
||||||
TraceEvent(SevDebug, "TestCheckpointVerified").detail("Checkpoint", describe(fetchedCheckpoints));
|
TraceEvent(SevDebug, "TestCheckpointVerified").detail("Checkpoint", describe(fetchedCheckpoints));
|
||||||
|
|
||||||
Future<Void> close = kvStore->onClosed();
|
Future<Void> close = kvStore->onClosed();
|
||||||
kvStore->dispose();
|
kvStore->dispose();
|
||||||
wait(close);
|
wait(close);
|
||||||
|
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||||
|
platform::eraseDirectoryRecursive(checkpointDir);
|
||||||
|
|
||||||
TraceEvent(SevDebug, "TestRocksDBClosed").detail("Checkpoint", describe(fetchedCheckpoints));
|
TraceEvent(SevDebug, "TestRocksDBClosed").detail("Checkpoint", describe(fetchedCheckpoints));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue