Merge pull request #4877 from sfc-gh-clin/fix-datadistribution-special-key

Fix the place when changing dd system key without changing the lock key
This commit is contained in:
Chaoguang Lin 2021-06-01 15:52:33 -07:00 committed by GitHub
commit 5f04c2c7f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 49 additions and 4 deletions

View File

@ -971,7 +971,7 @@ For example, you can change a process type or update coordinators by manipulatin
#. ``\xff\xff/configuration/process/class_type/<address> := <class_type>`` Read/write. Reading keys in the range will retrieve processes' class types. Setting keys in the range will update processes' class types. The process matching ``<address>`` will be assigned to the given class type if the commit is successful. The valid class types are ``storage``, ``transaction``, ``resolution``, etc. A full list of class type can be found via ``fdbcli`` command ``help setclass``. Clearing keys is forbidden in the range. Instead, you can set the type as ``default``, which will clear the assigned class type if existing. For more details, see help text of ``fdbcli`` command ``setclass``.
#. ``\xff\xff/configuration/process/class_source/<address> := <class_source>`` Read-only. Reading keys in the range will retrieve processes' class source. The class source is one of ``command_line``, ``configure_auto``, ``set_class`` and ``invalid``, indicating the source that the process's class type comes from.
#. ``\xff\xff/configuration/coordinators/processes := <ip:port>,<ip:port>,...,<ip:port>`` Read/write. A single key, if read, will return a comma delimited string of coordinators's network addresses. Thus to provide a new set of cooridinators, set the key with a correct formatted string of new coordinators' network addresses. As there's always the need to have coordinators, clear on the key is forbidden and a transaction will fail with the ``special_keys_api_failure`` error if the clear is committed. For more details, see help text of ``fdbcli`` command ``coordinators``.
#. ``\xff\xff/configuration/coordinators/processes := <ip:port>,<ip:port>,...,<ip:port>`` Read/write. A single key, if read, will return a comma delimited string of coordinators' network addresses. Thus to provide a new set of cooridinators, set the key with a correct formatted string of new coordinators' network addresses. As there's always the need to have coordinators, clear on the key is forbidden and a transaction will fail with the ``special_keys_api_failure`` error if the clear is committed. For more details, see help text of ``fdbcli`` command ``coordinators``.
#. ``\xff\xff/configuration/coordinators/cluster_description := <new_description>`` Read/write. A single key, if read, will return the cluster description. Thus modifying the key will update the cluster decription. The new description needs to match ``[A-Za-z0-9_]+``, otherwise, the ``special_keys_api_failure`` error will be thrown. In addition, clear on the key is meaningless thus forbidden. For more details, see help text of ``fdbcli`` command ``coordinators``.
The ``<address>`` here is the network address of the corresponding process. Thus the general form is ``ip:port``.

View File

@ -2058,9 +2058,20 @@ Future<Optional<std::string>> DataDistributionImpl::commit(ReadYourWritesTransac
try {
int mode = boost::lexical_cast<int>(iter->value().second.get().toString());
Value modeVal = BinaryWriter::toValue(mode, Unversioned());
if (mode == 0 || mode == 1)
if (mode == 0 || mode == 1) {
// Whenever configuration changes or DD related system keyspace is changed,
// actor must grab the moveKeysLockOwnerKey and update moveKeysLockWriteKey.
// This prevents concurrent write to the same system keyspace.
// When the owner of the DD related system keyspace changes, DD will reboot
BinaryWriter wrMyOwner(Unversioned());
wrMyOwner << dataDistributionModeLock;
ryw->getTransaction().set(moveKeysLockOwnerKey, wrMyOwner.toValue());
BinaryWriter wrLastWrite(Unversioned());
wrLastWrite << deterministicRandom()->randomUniqueID();
ryw->getTransaction().set(moveKeysLockWriteKey, wrLastWrite.toValue());
// set mode
ryw->getTransaction().set(dataDistributionModeKey, modeVal);
else
} else
msg = ManagementAPIError::toJsonString(false,
"datadistribution",
"Please set the value of the data_distribution/mode to "

View File

@ -624,7 +624,7 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
ACTOR Future<Void> managementApiCorrectnessActor(Database cx_, SpecialKeySpaceCorrectnessWorkload* self) {
// All management api related tests
Database cx = cx_->clone();
state Database cx = cx_->clone();
state Reference<ReadYourWritesTransaction> tx = makeReference<ReadYourWritesTransaction>(cx);
// test ordered option keys
{
@ -1429,6 +1429,40 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
}
}
}
// make sure when we change dd related special keys, we grab the two system keys,
// i.e. moveKeysLockOwnerKey and moveKeysLockWriteKey
{
state Reference<ReadYourWritesTransaction> tr1(new ReadYourWritesTransaction(cx));
state Reference<ReadYourWritesTransaction> tr2(new ReadYourWritesTransaction(cx));
loop {
try {
Version readVersion = wait(tr1->getReadVersion());
tr2->setVersion(readVersion);
tr1->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
tr2->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
KeyRef ddPrefix = SpecialKeySpace::getManagementApiCommandPrefix("datadistribution");
tr1->set(LiteralStringRef("mode").withPrefix(ddPrefix), LiteralStringRef("1"));
wait(tr1->commit());
// randomly read the moveKeysLockOwnerKey/moveKeysLockWriteKey
// both of them should be grabbed when changing dd mode
wait(success(
tr2->get(deterministicRandom()->coinflip() ? moveKeysLockOwnerKey : moveKeysLockWriteKey)));
// tr2 shoulde never succeed, just write to a key to make it not a read-only transaction
tr2->set(LiteralStringRef("unused_key"), LiteralStringRef(""));
wait(tr2->commit());
ASSERT(false); // commit should always fail due to conflict
} catch (Error& e) {
if (e.code() != error_code_not_committed) {
// when buggify is enabled, it's possible we get other retriable errors
wait(tr2->onError(e));
tr1->reset();
} else {
// loop until we get conflict error
break;
}
}
}
}
return Void();
}
};