Merge pull request #3945 from sfc-gh-clin/add-consistencycheck-to-special-keys

Add consistencycheck command to special keys
This commit is contained in:
Andrew Noyes 2020-10-26 10:59:17 -07:00 committed by GitHub
commit a15bc32b14
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 128 additions and 12 deletions

View File

@ -942,7 +942,7 @@ that process, and wait for necessary data to be moved away.
#. ``\xff\xff/management/excluded/<exclusion>`` Read/write. Indicates that the cluster should move data away from processes matching ``<exclusion>``, so that they can be safely removed. See :ref:`removing machines from a cluster <removing-machines-from-a-cluster>` for documentation for the corresponding fdbcli command.
#. ``\xff\xff/management/failed/<exclusion>`` Read/write. Indicates that the cluster should consider matching processes as permanently failed. This allows the cluster to avoid maintaining extra state and doing extra work in the hope that these processes come back. See :ref:`removing machines from a cluster <removing-machines-from-a-cluster>` for documentation for the corresponding fdbcli command.
#. ``\xff\xff/management/inProgressExclusion/<address>`` Read-only. Indicates that the process matching ``<address>`` matches an exclusion, but still has necessary data and can't yet be safely removed.
#. ``\xff\xff/management/in_progress_exclusion/<address>`` Read-only. Indicates that the process matching ``<address>`` matches an exclusion, but still has necessary data and can't yet be safely removed.
#. ``\xff\xff/management/options/excluded/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/excluded/<exclusion>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
#. ``\xff\xff/management/options/failed/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/failed/<exclusion>``. Setting this key only has an effect in the current transaction and is not persisted on commit.

View File

@ -929,7 +929,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READONLY,
std::make_unique<ExclusionInProgressRangeImpl>(
KeyRangeRef(LiteralStringRef("inProgressExclusion/"), LiteralStringRef("inProgressExclusion0"))
KeyRangeRef(LiteralStringRef("in_progress_exclusion/"), LiteralStringRef("in_progress_exclusion0"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::CONFIGURATION, SpecialKeySpace::IMPLTYPE::READWRITE,
@ -943,8 +943,13 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin)));
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
std::make_unique<LockDatabaseImpl>(singleKeyRange(LiteralStringRef("dbLocked"))
std::make_unique<LockDatabaseImpl>(singleKeyRange(LiteralStringRef("db_locked"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
std::make_unique<ConsistencyCheckImpl>(
singleKeyRange(LiteralStringRef("consistency_check_suspended"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
}
if (apiVersionAtLeast(630)) {
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, SpecialKeySpace::IMPLTYPE::READONLY,

View File

@ -46,7 +46,9 @@ std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandT
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "failed", KeyRangeRef(LiteralStringRef("failed/"), LiteralStringRef("failed0"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "lock", singleKeyRange(LiteralStringRef("dbLocked")).withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) }
{ "lock", singleKeyRange(LiteralStringRef("db_locked")).withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "consistencycheck", singleKeyRange(LiteralStringRef("consistency_check_suspended"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) }
};
std::set<std::string> SpecialKeySpace::options = { "excluded/force", "failed/force" };
@ -391,13 +393,33 @@ void SpecialKeySpace::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
return impl->clear(ryw, key);
}
bool validateSnakeCaseNaming(const KeyRef& k) {
KeyRef key(k);
// Remove prefix \xff\xff
ASSERT(key.startsWith(specialKeys.begin));
key = key.removePrefix(specialKeys.begin);
// Suffix can be \xff\xff or \x00 in single key range
if (key.endsWith(specialKeys.begin))
key = key.removeSuffix(specialKeys.end);
else if (key.endsWith(LiteralStringRef("\x00")))
key = key.removeSuffix(LiteralStringRef("\x00"));
for (const char& c : key.toString()) {
// only small letters, numbers, '/', '_' is allowed
ASSERT((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '/' || c == '_');
}
return true;
}
void SpecialKeySpace::registerKeyRange(SpecialKeySpace::MODULE module, SpecialKeySpace::IMPLTYPE type,
const KeyRangeRef& kr, SpecialKeyRangeReadImpl* impl) {
// module boundary check
if (module == SpecialKeySpace::MODULE::TESTONLY)
if (module == SpecialKeySpace::MODULE::TESTONLY) {
ASSERT(normalKeys.contains(kr));
else
} else {
ASSERT(moduleToBoundary.at(module).contains(kr));
ASSERT(validateSnakeCaseNaming(kr.begin) &&
validateSnakeCaseNaming(kr.end)); // validate keys follow snake case naming style
}
// make sure the registered range is not overlapping with existing ones
// Note: kr.end should not be the same as another range's begin, although it should work even they are the same
for (auto iter = readImpls.rangeContaining(kr.begin); true; ++iter) {
@ -1123,8 +1145,7 @@ Future<Standalone<RangeResultRef>> ProcessClassSourceRangeImpl::getRange(ReadYou
return getProcessClassSourceActor(ryw, getKeyRange().begin, kr);
}
ACTOR Future<Standalone<RangeResultRef>> getLockedKeyActor(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) {
ACTOR Future<Standalone<RangeResultRef>> getLockedKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
Optional<Value> val = wait(ryw->getTransaction().get(databaseLockedKey));
Standalone<RangeResultRef> result;
@ -1137,7 +1158,7 @@ ACTOR Future<Standalone<RangeResultRef>> getLockedKeyActor(ReadYourWritesTransac
LockDatabaseImpl::LockDatabaseImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<Standalone<RangeResultRef>> LockDatabaseImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const {
// sigle key range, the queried range should always be the same as the underlying range
// single key range, the queried range should always be the same as the underlying range
ASSERT(kr == getKeyRange());
auto lockEntry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("lock")];
if (!ryw->readYourWritesDisabled() && lockEntry.first) {
@ -1192,3 +1213,44 @@ Future<Optional<std::string>> LockDatabaseImpl::commit(ReadYourWritesTransaction
return unlockDatabaseCommitActor(ryw);
}
}
ACTOR Future<Standalone<RangeResultRef>> getConsistencyCheckKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
Optional<Value> val = wait(ryw->getTransaction().get(fdbShouldConsistencyCheckBeSuspended));
bool ccSuspendSetting = val.present() ? BinaryReader::fromStringRef<bool>(val.get(), Unversioned()) : false;
Standalone<RangeResultRef> result;
if (ccSuspendSetting) {
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, ValueRef()));
}
return result;
}
ConsistencyCheckImpl::ConsistencyCheckImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<Standalone<RangeResultRef>> ConsistencyCheckImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) const {
// single key range, the queried range should always be the same as the underlying range
ASSERT(kr == getKeyRange());
auto entry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck")];
if (!ryw->readYourWritesDisabled() && entry.first) {
// ryw enabled and we have written to the special key
Standalone<RangeResultRef> result;
if (entry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, entry.second.get()));
}
return result;
} else {
return getConsistencyCheckKeyActor(ryw, kr);
}
}
Future<Optional<std::string>> ConsistencyCheckImpl::commit(ReadYourWritesTransaction* ryw) {
auto entry =
ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck")].second;
ryw->getTransaction().setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().set(fdbShouldConsistencyCheckBeSuspended,
BinaryWriter::toValue(entry.present(), Unversioned()));
return Optional<std::string>();
}

View File

@ -312,5 +312,12 @@ public:
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
};
class ConsistencyCheckImpl : public SpecialKeyRangeRWImpl {
public:
explicit ConsistencyCheckImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
};
#include "flow/unactorcompiler.h"
#endif

View File

@ -753,7 +753,6 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
TraceEvent(SevDebug, "EmptyWorkerListInSetClassTest");
}
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled) throw;
wait(tx->onError(e));
}
}
@ -769,7 +768,6 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
break;
} catch (Error& e) {
TraceEvent(SevDebug, "DatabaseLockFailure").error(e);
if (e.code() == error_code_actor_cancelled) throw;
// In case commit_unknown_result is thrown by buggify, we may try to lock more than once
// The second lock commit will throw special_keys_api_failure error
if (e.code() == error_code_special_keys_api_failure) {
@ -815,10 +813,54 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
} catch (Error& e) {
TraceEvent(SevDebug, "DatabaseUnlockFailure").error(e);
ASSERT(e.code() != error_code_database_locked);
if (e.code() == error_code_actor_cancelled) throw;
wait(tx->onError(e));
}
}
// test consistencycheck which only used by ConsistencyCheck Workload
// Note: we have exclusive ownership of fdbShouldConsistencyCheckBeSuspended,
// no existing workloads can modify the key
{
try {
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> val1 = wait(tx->get(fdbShouldConsistencyCheckBeSuspended));
state bool ccSuspendSetting =
val1.present() ? BinaryReader::fromStringRef<bool>(val1.get(), Unversioned()) : false;
Optional<Value> val2 =
wait(tx->get(SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck")));
// Make sure the read result from special key consistency with the system key
ASSERT(ccSuspendSetting ? val2.present() : !val2.present());
tx->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
// Make sure by default, consistencycheck is enabled
ASSERT(!ccSuspendSetting);
// Disable consistencycheck
tx->set(SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck"), ValueRef());
wait(tx->commit());
tx->reset();
// Read system key to make sure it is disabled
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> val3 = wait(tx->get(fdbShouldConsistencyCheckBeSuspended));
bool ccSuspendSetting2 =
val3.present() ? BinaryReader::fromStringRef<bool>(val3.get(), Unversioned()) : false;
ASSERT(ccSuspendSetting2);
tx->reset();
} catch (Error& e) {
wait(tx->onError(e));
}
}
// make sure we enable consistencycheck by the end
{
loop {
try {
tx->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
tx->clear(SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck"));
wait(tx->commit());
tx->reset();
break;
} catch (Error& e) {
wait(tx->onError(e));
}
}
}
return Void();
}
};