2017-05-26 04:48:44 +08:00
* CommitTransaction.h
* This source file is part of the FoundationDB open source project
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
2017-05-26 04:48:44 +08:00
* http://www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#pragma once
2018-10-20 01:30:13 +08:00
#include "fdbclient/FDBTypes.h"
2020-02-12 03:44:53 +08:00
#include "fdbserver/Knobs.h"
2017-05-26 04:48:44 +08:00
2019-09-06 02:30:02 +08:00
// The versioned message has wire format : -1, version, messages
static const int32_t VERSION_HEADER = -1;
2019-01-31 17:23:32 +08:00
static const char* typeString[] = { "SetValue",
2019-07-23 06:44:49 +08:00
2017-11-18 15:36:09 +08:00
2020-02-12 03:44:53 +08:00
struct MutationRef {
2017-05-26 04:48:44 +08:00
static const int OVERHEAD_BYTES = 12; //12 is the size of Header in MutationList entries
2019-01-31 17:23:32 +08:00
enum Type : uint8_t {
SetValue = 0,
Reserved_For_LogProtocolMessage /* See fdbserver/LogProtocolMessage.h */,
2017-05-26 04:48:44 +08:00
// This is stored this way for serialization purposes.
uint8_t type;
StringRef param1, param2;
MutationRef() {}
MutationRef( Type t, StringRef a, StringRef b ) : type(t), param1(a), param2(b) {}
MutationRef( Arena& to, const MutationRef& from ) : type(from.type), param1( to, from.param1 ), param2( to, from.param2 ) {}
2020-02-12 03:44:53 +08:00
int totalSize() const { return OVERHEAD_BYTES + param1.size() + param2.size(); }
2017-05-26 04:48:44 +08:00
int expectedSize() const { return param1.size() + param2.size(); }
2020-02-12 03:44:53 +08:00
int weightedTotalSize() const {
2020-02-19 08:41:19 +08:00
// AtomicOp can cause more workload to FDB cluster than the same-size set mutation;
// Amplify atomicOp size to consider such extra workload.
// A good value for FASTRESTORE_ATOMICOP_WEIGHT needs experimental evaluations.
2020-02-12 03:44:53 +08:00
if (isAtomicOp()) {
} else {
return totalSize();
2017-05-26 04:48:44 +08:00
std::string toString() const {
2017-08-08 09:45:42 +08:00
if (type < MutationRef::MAX_ATOMIC_OP) {
return format("code: %s param1: %s param2: %s", typeString[type], printable(param1).c_str(), printable(param2).c_str());
else {
2019-12-13 13:55:50 +08:00
return format("code: Invalid param1: %s param2: %s", printable(param1).c_str(), printable(param2).c_str());
2017-08-08 09:45:42 +08:00
2017-05-26 04:48:44 +08:00
2020-02-12 03:44:53 +08:00
bool isAtomicOp() const {
return (ATOMIC_MASK & (1<<type)) != 0;
2017-05-26 04:48:44 +08:00
template <class Ar>
void serialize( Ar& ar ) {
2018-12-29 02:49:26 +08:00
serializer(ar, type, param1, param2);
2017-05-26 04:48:44 +08:00
// These masks define which mutation types have particular properties (they are used to implement isSingleKeyMutation() etc)
2019-01-31 17:23:32 +08:00
enum {
ATOMIC_MASK = (1 << AddValue) | (1 << And) | (1 << Or) | (1 << Xor) | (1 << AppendIfFits) | (1 << Max) |
(1 << Min) | (1 << SetVersionstampedKey) | (1 << SetVersionstampedValue) | (1 << ByteMin) |
(1 << ByteMax) | (1 << MinV2) | (1 << AndV2) | (1 << CompareAndClear),
NON_ASSOCIATIVE_MASK = (1 << AddValue) | (1 << Or) | (1 << Xor) | (1 << Max) | (1 << Min) |
(1 << SetVersionstampedKey) | (1 << SetVersionstampedValue) | (1 << MinV2) |
(1 << CompareAndClear)
2017-05-26 04:48:44 +08:00
// A 'single key mutation' is one which affects exactly the value of the key specified by its param1
static inline bool isSingleKeyMutation(MutationRef::Type type) {
return (MutationRef::SINGLE_KEY_MASK & (1<<type)) != 0;
// Returns true if the given type can be safely cast to MutationRef::Type and used as a parameter to
// isSingleKeyMutation, isAtomicOp, etc. It does NOT mean that the type is a valid type of a MutationRef in any
// particular context.
static inline bool isValidMutationType(uint32_t type) {
return (type < MutationRef::MAX_ATOMIC_OP);
// An 'atomic operation' is a single key mutation which sets the key specified by its param1 to a
// nontrivial function of the previous value of the key and param2, and thus requires a
// read/modify/write to implement. (Basically a single key mutation other than a set)
static inline bool isAtomicOp(MutationRef::Type mutationType) {
return (MutationRef::ATOMIC_MASK & (1<<mutationType)) != 0;
// Returns true for operations which do not obey the associative law (i.e. a*(b*c) == (a*b)*c) in all cases
// unless a, b, and c have equal lengths, in which case even these operations are associative.
static inline bool isNonAssociativeOp(MutationRef::Type mutationType) {
return (MutationRef::NON_ASSOCIATIVE_MASK & (1<<mutationType)) != 0;
struct CommitTransactionRef {
Fix VersionStamp problems by instead adding a COMMIT_ON_FIRST_PROXY transaction option.
Simulation identified the fact that we can violate the
VersionStamps-are-always-increasing promise via the following series of events:
1. On proxy 0, dumpData adds commit requests to proxy 0's commit promise stream
2. To any proxy, a client submits the first transaction of abortBackup, which stops further dumpData calls on proxy 0.
3. To any proxy that is not proxy 0, submit a transaction that checks if it needs to upgrade the destination version.
4. The transaction from (3) is committed
5. Transactions from (1) are committed
This is possible because the dumpData transactions have no read conflict
ranges, and thus it's impossible to make them abort due to "conflicting"
transactions. There's also no promise that if client C sends a commit to proxy
A, and later a client D sends a commit to proxy B, that B must log its commit
after A. (We only promise that if C is told it was committed before D is told
it was committed, then A committed before B.)
There was a failed attempt to fix this problem. We tried to add read conflict
ranges to dumpData transactions so that they could be aborted by "conflicting"
transactions. However, this failed because this now means that dumpData
transactions require conflict resolution, and the stale read version that they
use can cause them to be aborted with a transaction_too_old error.
(Transactions that don't have read conflict ranges will never return
transaction_too_old, because with no reads, the read snapshot version is
effectively meaningless.) This was never previously possible, so the existing
code doesn't retry commits, and to make things more complicated, the dumpData
commits must be applied in order. This would require either adding
dependencies to transactions (if A is going to commit then B must also be/have
committed), which would be complicated, or submitting transactions with a fixed
read version, and replaying the failed commits with a higher read version once
we get a transaction_too_old error, which would unacceptably slow down the
maximum throughput of dumpData.
Thus, we've instead elected to add a special transaction option that bypasses
proxy load balancing for commits, and always commits against proxy 0. We can
know for certain that after the transaction from (2) is committed, all of the
dumpData transactions that will be committed have been added to the commit
promise stream on proxy 0. Thus, if we enqueue another transaction against
proxy 0, we can know that it will be placed into the promise stream after all
of the dumpData transactions, thus providing the semantics that we require: no
dumpData transaction can commit after the destination version upgrade
2017-12-20 08:44:07 +08:00
CommitTransactionRef() : read_snapshot(0) {}
2017-05-26 04:48:44 +08:00
CommitTransactionRef(Arena &a, const CommitTransactionRef &from)
: read_conflict_ranges(a, from.read_conflict_ranges),
write_conflict_ranges(a, from.write_conflict_ranges),
mutations(a, from.mutations),
read_snapshot(from.read_snapshot) {
VectorRef< KeyRangeRef > read_conflict_ranges;
VectorRef< KeyRangeRef > write_conflict_ranges;
VectorRef< MutationRef > mutations;
Version read_snapshot;
template <class Ar>
force_inline void serialize( Ar& ar ) {
2018-12-29 02:49:26 +08:00
serializer(ar, read_conflict_ranges, write_conflict_ranges, mutations, read_snapshot);
2017-05-26 04:48:44 +08:00
// Convenience for internal code required to manipulate these without the Native API
void set( Arena& arena, KeyRef const& key, ValueRef const& value ) {
mutations.push_back_deep(arena, MutationRef(MutationRef::SetValue, key, value));
write_conflict_ranges.push_back(arena, singleKeyRange(key, arena));
void clear( Arena& arena, KeyRangeRef const& keys ) {
mutations.push_back_deep(arena, MutationRef(MutationRef::ClearRange, keys.begin, keys.end));
write_conflict_ranges.push_back_deep(arena, keys);
size_t expectedSize() const {
return read_conflict_ranges.expectedSize() + write_conflict_ranges.expectedSize() + mutations.expectedSize();
bool debugMutation( const char* context, Version version, MutationRef const& m );
bool debugKeyRange( const char* context, Version version, KeyRangeRef const& keyRange );