Merge branch 'master' into transaction-tagging

# Conflicts:
#	fdbclient/DatabaseContext.h
This commit is contained in:
A.J. Beamon 2020-05-09 07:50:29 -07:00
commit 02307ba7b6
86 changed files with 2213 additions and 1028 deletions

View File

@ -449,7 +449,7 @@ FDBFuture* fdb_transaction_get_range_impl(
/* _ITERATOR mode maps to one of the known streaming modes
depending on iteration */
static const int mode_bytes_array[] = {CLIENT_KNOBS->BYTE_LIMIT_UNLIMITED, 256, 1000, 4096, 80000};
const int mode_bytes_array[] = { CLIENT_KNOBS->BYTE_LIMIT_UNLIMITED, 256, 1000, 4096, 80000 };
/* The progression used for FDB_STREAMING_MODE_ITERATOR.
Goes from small -> medium -> large. Then 1.5 * previous until serial. */

View File

@ -88,13 +88,20 @@ func (o NetworkOptions) SetTraceFormat(param string) error {
return o.setOpt(34, []byte(param))
}
// Select clock source for trace files. now (default) or realtime are supported.
// Select clock source for trace files. now (the default) or realtime are supported.
//
// Parameter: Trace clock source
func (o NetworkOptions) SetTraceClockSource(param string) error {
return o.setOpt(35, []byte(param))
}
// Once provided, this string will be used to replace the port/PID in the log file names.
//
// Parameter: The identifier that will be part of all trace file names
func (o NetworkOptions) SetTraceFileIdentifier(param string) error {
return o.setOpt(36, []byte(param))
}
// Set internal tuning or debugging knobs
//
// Parameter: knob_name=knob_value
@ -223,11 +230,16 @@ func (o NetworkOptions) SetDisableClientStatisticsLogging() error {
return o.setOpt(70, nil)
}
// Enables debugging feature to perform slow task profiling. Requires trace logging to be enabled. WARNING: this feature is not recommended for use in production.
// Deprecated
func (o NetworkOptions) SetEnableSlowTaskProfiling() error {
return o.setOpt(71, nil)
}
// Enables debugging feature to perform run loop profiling. Requires trace logging to be enabled. WARNING: this feature is not recommended for use in production.
func (o NetworkOptions) SetEnableRunLoopProfiling() error {
return o.setOpt(71, nil)
}
// Enable client buggify - will make requests randomly fail (intended for client testing)
func (o NetworkOptions) SetClientBuggifyEnable() error {
return o.setOpt(80, nil)
@ -441,6 +453,11 @@ func (o TransactionOptions) SetTransactionLoggingMaxFieldLength(param int64) err
return o.setOpt(405, int64ToBytes(param))
}
// Sets an identifier for server tracing of this transaction. When committed, this identifier triggers logging when each part of the transaction authority encounters it, which is helpful in diagnosing slowness in misbehaving clusters. The identifier is randomly generated. When there is also a debug_transaction_identifier, both IDs are logged together.
func (o TransactionOptions) SetServerRequestTracing() error {
return o.setOpt(406, nil)
}
// Set a timeout in milliseconds which, when elapsed, will cause the transaction automatically to be cancelled. Valid parameter values are ``[0, INT_MAX]``. If set to 0, will disable all timeouts. All pending and any future uses of the transaction will throw an exception. The transaction can be used again after it is reset. Prior to API version 610, like all other transaction options, the timeout must be reset after a call to ``onError``. If the API version is 610 or greater, the timeout is not reset after an ``onError`` call. This allows the user to specify a longer timeout on specific transactions than the default timeout specified through the ``transaction_timeout`` database option without the shorter database timeout cancelling transactions that encounter a retryable error. Note that at all API versions, it is safe and legal to set the timeout each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option.
//
// Parameter: value in milliseconds of timeout
@ -499,6 +516,11 @@ func (o TransactionOptions) SetUseProvisionalProxies() error {
return o.setOpt(711, nil)
}
// The transaction can retrieve keys that are conflicting with other transactions.
func (o TransactionOptions) SetReportConflictingKeys() error {
return o.setOpt(712, nil)
}
type StreamingMode int
const (
@ -636,15 +658,15 @@ type ErrorPredicate int
const (
// Returns ``true`` if the error indicates the operations in the
// transactions should be retried because of transient error.
// Returns ``true`` if the error indicates the operations in the transactions
// should be retried because of transient error.
ErrorPredicateRetryable ErrorPredicate = 50000
// Returns ``true`` if the error indicates the transaction may have
// succeeded, though not in a way the system can verify.
// Returns ``true`` if the error indicates the transaction may have succeeded,
// though not in a way the system can verify.
ErrorPredicateMaybeCommitted ErrorPredicate = 50001
// Returns ``true`` if the error indicates the transaction has not
// committed, though in a way that can be retried.
// Returns ``true`` if the error indicates the transaction has not committed,
// though in a way that can be retried.
ErrorPredicateRetryableNotCommitted ErrorPredicate = 50002
)

View File

@ -8,6 +8,7 @@ env_set(ALLOC_INSTRUMENTATION OFF BOOL "Instrument alloc")
env_set(WITH_UNDODB OFF BOOL "Use rr or undodb")
env_set(USE_ASAN OFF BOOL "Compile with address sanitizer")
env_set(USE_UBSAN OFF BOOL "Compile with undefined behavior sanitizer")
env_set(USE_TSAN OFF BOOL "Compile with thread sanitizer")
env_set(FDB_RELEASE OFF BOOL "This is a building of a final release")
env_set(USE_CCACHE OFF BOOL "Use ccache for compilation if available")
env_set(RELATIVE_DEBUG_PATHS OFF BOOL "Use relative file paths in debug info")
@ -81,6 +82,7 @@ include(CheckFunctionExists)
set(CMAKE_REQUIRED_INCLUDES stdlib.h malloc.h)
set(CMAKE_REQUIRED_LIBRARIES c)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_C_STANDARD 11)
if(WIN32)
# see: https://docs.microsoft.com/en-us/windows/desktop/WinProg/using-the-windows-headers
@ -164,6 +166,15 @@ else()
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined ${CMAKE_THREAD_LIBS_INIT}")
endif()
if(USE_TSAN)
add_compile_options(
-fsanitize=thread
-DUSE_SANITIZER)
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fsanitize=thread")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=thread")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread ${CMAKE_THREAD_LIBS_INIT}")
endif()
if(PORTABLE_BINARY)
message(STATUS "Create a more portable binary")
set(CMAKE_MODULE_LINKER_FLAGS "-static-libstdc++ -static-libgcc ${CMAKE_MODULE_LINKER_FLAGS}")

View File

@ -73,6 +73,7 @@ set(FDBCLIENT_SRCS
Tuple.h
VersionedMap.actor.h
VersionedMap.h
VersionedMap.cpp
WriteMap.h
json_spirit/json_spirit_error_position.h
json_spirit/json_spirit_reader_template.h

View File

@ -309,6 +309,9 @@ public:
std::shared_ptr<SpecialKeySpace> specialKeySpace;
std::shared_ptr<ConflictingKeysImpl> cKImpl;
std::shared_ptr<ReadConflictRangeImpl> rCRImpl;
std::shared_ptr<WriteConflictRangeImpl> wCRImpl;
static bool debugUseTags;
static const std::vector<std::string> debugTransactionTagChoices;
};

View File

@ -268,6 +268,10 @@ struct KeyRangeRef {
return KeyRangeRef( begin.withPrefix(prefix), end.withPrefix(prefix) );
}
KeyRangeRef withPrefix(const StringRef& prefix, Arena& arena) const {
return KeyRangeRef(begin.withPrefix(prefix, arena), end.withPrefix(prefix, arena));
}
KeyRangeRef removePrefix( const StringRef& prefix ) const {
return KeyRangeRef( begin.removePrefix(prefix), end.removePrefix(prefix) );
}

View File

@ -3691,8 +3691,7 @@ public:
auto range = backupRanges[restoreIndex];
Standalone<StringRef> restoreTag(backupTag.toString() + "_" + std::to_string(restoreIndex));
// Register the request request in DB, which will be picked up by restore worker leader
struct RestoreRequest restoreRequest(restoreIndex, restoreTag, bcUrl, true, targetVersion, true,
range, Key(), Key(), lockDB,
struct RestoreRequest restoreRequest(restoreIndex, restoreTag, bcUrl, targetVersion, range,
deterministicRandom()->randomUniqueID());
tr->set(restoreRequestKeyFor(restoreRequest.index), restoreRequestValue(restoreRequest));
}

View File

@ -40,8 +40,9 @@ struct MasterProxyInterface {
enum { LocationAwareLoadBalance = 1 };
enum { AlwaysFresh = 1 };
LocalityData locality;
Optional<Key> processId;
bool provisional;
Endpoint base;
RequestStream< struct CommitTransactionRequest > commit;
RequestStream< struct GetReadVersionRequest > getConsistentReadVersion; // Returns a version which (1) is committed, and (2) is >= the latest version reported committed (by a commit response) when this request was sent
// (at some point between when this request is sent and when its response is received, the latest version reported committed)
@ -64,17 +65,34 @@ struct MasterProxyInterface {
template <class Archive>
void serialize(Archive& ar) {
serializer(ar, locality, provisional, commit, getConsistentReadVersion, getKeyServersLocations,
waitFailure, getStorageServerRejoinInfo, getRawCommittedVersion,
txnState, getHealthMetrics, proxySnapReq, exclusionSafetyCheckReq);
serializer(ar, processId, provisional, base);
if( Archive::isDeserializing ) {
commit = RequestStream< struct CommitTransactionRequest >( base.getAdjustedEndpoint(0) );
getConsistentReadVersion = RequestStream< struct GetReadVersionRequest >( base.getAdjustedEndpoint(1) );
getKeyServersLocations = RequestStream< struct GetKeyServerLocationsRequest >( base.getAdjustedEndpoint(2) );
getStorageServerRejoinInfo = RequestStream< struct GetStorageServerRejoinInfoRequest >( base.getAdjustedEndpoint(3) );
waitFailure = RequestStream<ReplyPromise<Void>>( base.getAdjustedEndpoint(4) );
getRawCommittedVersion = RequestStream< struct GetRawCommittedVersionRequest >( base.getAdjustedEndpoint(5) );
txnState = RequestStream< struct TxnStateRequest >( base.getAdjustedEndpoint(6) );
getHealthMetrics = RequestStream< struct GetHealthMetricsRequest >( base.getAdjustedEndpoint(7) );
proxySnapReq = RequestStream< struct ProxySnapRequest >( base.getAdjustedEndpoint(8) );
exclusionSafetyCheckReq = RequestStream< struct ExclusionSafetyCheckRequest >( base.getAdjustedEndpoint(9) );
}
}
void initEndpoints() {
getConsistentReadVersion.getEndpoint(TaskPriority::ReadSocket);
getRawCommittedVersion.getEndpoint(TaskPriority::ProxyGetRawCommittedVersion);
commit.getEndpoint(TaskPriority::ReadSocket);
getStorageServerRejoinInfo.getEndpoint(TaskPriority::ProxyStorageRejoin);
getKeyServersLocations.getEndpoint(TaskPriority::ReadSocket); //priority lowered to TaskPriority::DefaultEndpoint on the proxy
std::vector<std::pair<FlowReceiver*, TaskPriority>> streams;
streams.push_back(commit.getReceiver(TaskPriority::ReadSocket));
streams.push_back(getConsistentReadVersion.getReceiver(TaskPriority::ReadSocket));
streams.push_back(getKeyServersLocations.getReceiver(TaskPriority::ReadSocket)); //priority lowered to TaskPriority::DefaultEndpoint on the proxy
streams.push_back(getStorageServerRejoinInfo.getReceiver(TaskPriority::ProxyStorageRejoin));
streams.push_back(waitFailure.getReceiver());
streams.push_back(getRawCommittedVersion.getReceiver(TaskPriority::ProxyGetRawCommittedVersion));
streams.push_back(txnState.getReceiver());
streams.push_back(getHealthMetrics.getReceiver());
streams.push_back(proxySnapReq.getReceiver());
streams.push_back(exclusionSafetyCheckReq.getReceiver());
base = FlowTransport::transport().addEndpoints(streams);
}
};

View File

@ -530,7 +530,9 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal),
specialKeySpace(std::make_shared<SpecialKeySpace>(normalKeys.begin, specialKeys.end)),
cKImpl(std::make_shared<ConflictingKeysImpl>(conflictingKeysRange)) {
cKImpl(std::make_shared<ConflictingKeysImpl>(conflictingKeysRange)),
rCRImpl(std::make_shared<ReadConflictRangeImpl>(readConflictRangeKeysRange)),
wCRImpl(std::make_shared<WriteConflictRangeImpl>(writeConflictRangeKeysRange)) {
dbId = deterministicRandom()->randomUniqueID();
connected = clientInfo->get().proxies.size() ? Void() : clientInfo->onChange();
@ -551,6 +553,8 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
clientStatusUpdater.actor = clientStatusUpdateActor(this);
throttleExpirer = recurring([this](){ expireThrottles(); }, CLIENT_KNOBS->TAG_THROTTLE_EXPIRATION_INTERVAL);
specialKeySpace->registerKeyRange(conflictingKeysRange, cKImpl.get());
specialKeySpace->registerKeyRange(readConflictRangeKeysRange, rCRImpl.get());
specialKeySpace->registerKeyRange(writeConflictRangeKeysRange, wCRImpl.get());
}
DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), cc("TransactionMetrics"), transactionReadVersions("ReadVersions", cc), transactionReadVersionsThrottled("ReadVersionsThrottled", cc),
@ -2489,7 +2493,7 @@ void Transaction::atomicOp(const KeyRef& key, const ValueRef& operand, MutationR
t.mutations.push_back( req.arena, MutationRef( operationType, r.begin, v ) );
if( addConflictRange )
if (addConflictRange && operationType != MutationRef::SetVersionstampedKey)
t.write_conflict_ranges.push_back( req.arena, r );
TEST(true); //NativeAPI atomic operation
@ -3116,7 +3120,7 @@ void Transaction::setOption( FDBTransactionOptions::Option option, Optional<Stri
case FDBTransactionOptions::DEBUG_TRANSACTION_IDENTIFIER:
validateOptionValue(value, true);
if (value.get().size() > 100) {
if (value.get().size() > 100 || value.get().size() == 0) {
throw invalid_option_value();
}
@ -3143,7 +3147,7 @@ void Transaction::setOption( FDBTransactionOptions::Option option, Optional<Stri
case FDBTransactionOptions::LOG_TRANSACTION:
validateOptionValue(value, false);
if (trLogInfo) {
if (trLogInfo && !trLogInfo->identifier.empty()) {
trLogInfo->logTo(TransactionLogInfo::TRACE_LOG);
}
else {

View File

@ -306,6 +306,15 @@ public:
TransactionOptions options;
double startTime;
Reference<TransactionLogInfo> trLogInfo;
const vector<Future<std::pair<Key, Key>>>& getExtraReadConflictRanges() const { return extraConflictRanges; }
Standalone<VectorRef<KeyRangeRef>> readConflictRanges() const {
return Standalone<VectorRef<KeyRangeRef>>(tr.transaction.read_conflict_ranges, tr.arena);
}
Standalone<VectorRef<KeyRangeRef>> writeConflictRanges() const {
return Standalone<VectorRef<KeyRangeRef>>(tr.transaction.write_conflict_ranges, tr.arena);
}
private:
Future<Version> getReadVersion(uint32_t flags);
Database cx;

View File

@ -72,7 +72,7 @@ RYWIterator& RYWIterator::operator++() {
if (end_key_cmp <= 0) ++cache;
if (end_key_cmp >= 0) ++writes;
begin_key_cmp = -end_key_cmp;
end_key_cmp = cache.endKey().cmp(writes.endKey());
end_key_cmp = cache.endKey().compare(writes.endKey());
return *this;
}
@ -80,7 +80,7 @@ RYWIterator& RYWIterator::operator--() {
if (begin_key_cmp >= 0) --cache;
if (begin_key_cmp <= 0) --writes;
end_key_cmp = -begin_key_cmp;
begin_key_cmp = cache.beginKey().cmp(writes.beginKey());
begin_key_cmp = cache.beginKey().compare(writes.beginKey());
return *this;
}
@ -117,8 +117,8 @@ void RYWIterator::dbg() {
}
void RYWIterator::updateCmp() {
begin_key_cmp = cache.beginKey().cmp(writes.beginKey());
end_key_cmp = cache.endKey().cmp(writes.endKey());
begin_key_cmp = cache.beginKey().compare(writes.beginKey());
end_key_cmp = cache.endKey().compare(writes.endKey());
}
void testESR() {
@ -157,13 +157,13 @@ void testESR() {
printf("Error: '%s' cmp '%s' = %d\n", printable(ssrs[i]).c_str(), printable(ssrs[j]).c_str(), c2);
return;
}
/*
int c = ssrs[i] < ssrs[j] ? -1 : ssrs[i] == ssrs[j] ? 0 : 1;
int c2 = srs[i].cmp(srs[j]);
int c2 = srs[i].compare(srs[j]);
if ( c != (0<c2)-(c2<0) ) {
printf("Error: '%s' cmp '%s' = %d\n", printable(ssrs[i]).c_str(), printable(ssrs[j]).c_str(), c2);
return;
printf("Error: '%s' cmp '%s' = %d\n", printable(ssrs[i]).c_str(), printable(ssrs[j]).c_str(), c2);
return;
}*/
/*
@ -413,8 +413,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedKey") {
it.skip(allKeys.begin);
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(!it.is_conflict_range());
ASSERT(!it.is_operation());
@ -423,8 +423,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedKey") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00\x00")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00\x00")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(it.is_conflict_range());
ASSERT(it.is_operation());
@ -434,8 +434,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedKey") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00\x00")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp:ZZZZZZZZZZ")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp:XXXXXXXX\x06\x00\x00\x00\x00")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp:ZZZZZZZZZZ")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(!it.is_conflict_range());
ASSERT(!it.is_operation());
@ -444,8 +444,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedKey") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp:ZZZZZZZZZZ")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp:ZZZZZZZZZZ\x00")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp:ZZZZZZZZZZ")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp:ZZZZZZZZZZ\x00")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(it.is_conflict_range());
ASSERT(it.is_operation());
@ -455,8 +455,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedKey") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp:ZZZZZZZZZZ\x00")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("\xff\xff")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp:ZZZZZZZZZZ\x00")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("\xff\xff")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(!it.is_conflict_range());
ASSERT(!it.is_operation());
@ -486,8 +486,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedValue") {
it.skip(allKeys.begin);
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(!it.is_conflict_range());
ASSERT(!it.is_operation());
@ -496,8 +496,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedValue") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp\x00")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp\x00")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(it.is_conflict_range());
ASSERT(it.is_operation());
@ -507,8 +507,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedValue") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp\x00")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp123")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp\x00")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp123")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(!it.is_conflict_range());
ASSERT(!it.is_operation());
@ -517,8 +517,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedValue") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp123")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("stamp123\x00")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp123")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("stamp123\x00")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(it.is_conflict_range());
ASSERT(it.is_operation());
@ -528,8 +528,8 @@ TEST_CASE("/fdbclient/WriteMap/setVersionstampedValue") {
++it;
ASSERT(it.beginKey() < allKeys.end);
ASSERT(it.beginKey().cmp(LiteralStringRef("stamp123\x00")) == 0);
ASSERT(it.endKey().cmp(LiteralStringRef("\xff\xff")) == 0);
ASSERT(it.beginKey().compare(LiteralStringRef("stamp123\x00")) == 0);
ASSERT(it.endKey().compare(LiteralStringRef("\xff\xff")) == 0);
ASSERT(!it.is_cleared_range());
ASSERT(!it.is_conflict_range());
ASSERT(!it.is_operation());

View File

@ -1040,6 +1040,18 @@ public:
wait( ryw->resetPromise.getFuture() || ready );
if( ryw->options.readYourWritesDisabled ) {
// Stash away conflict ranges to read after commit
ryw->nativeReadRanges = ryw->tr.readConflictRanges();
ryw->nativeWriteRanges = ryw->tr.writeConflictRanges();
for (const auto& f : ryw->tr.getExtraReadConflictRanges()) {
if (f.isReady() && f.get().first < f.get().second)
ryw->nativeReadRanges.push_back(
ryw->nativeReadRanges.arena(),
KeyRangeRef(f.get().first, f.get().second)
.withPrefix(readConflictRangeKeysRange.begin, ryw->nativeReadRanges.arena()));
}
if (ryw->resetPromise.isSet())
throw ryw->resetPromise.getFuture().getError();
wait( ryw->resetPromise.getFuture() || ryw->tr.commit() );
@ -1132,7 +1144,7 @@ public:
ReadYourWritesTransaction::ReadYourWritesTransaction(Database const& cx)
: cache(&arena), writes(&arena), tr(cx), retries(0), approximateSize(0), creationTime(now()), commitStarted(false),
options(tr), deferredError(cx->deferredError) {
options(tr), deferredError(cx->deferredError), versionStampFuture(tr.getVersionstamp()) {
std::copy(cx.getTransactionDefaults().begin(), cx.getTransactionDefaults().end(),
std::back_inserter(persistentOptions));
applyPersistentOptions();
@ -1290,7 +1302,7 @@ Future< Standalone<RangeResultRef> > ReadYourWritesTransaction::getRange(
}
// special key space are only allowed to query if both begin and end are in \xff\xff, \xff\xff\xff
if (specialKeys.contains(begin.getKey()) && specialKeys.contains(end.getKey()))
if (specialKeys.contains(begin.getKey()) && end.getKey() <= specialKeys.end)
return getDatabase()->specialKeySpace->getRange(Reference<ReadYourWritesTransaction>::addRef(this), begin, end,
limits, reverse);
@ -1545,6 +1557,104 @@ void ReadYourWritesTransaction::getWriteConflicts( KeyRangeMap<bool> *result ) {
}
}
Standalone<RangeResultRef> ReadYourWritesTransaction::getReadConflictRangeIntersecting(KeyRangeRef kr) {
ASSERT(readConflictRangeKeysRange.contains(kr));
ASSERT(!tr.options.checkWritesEnabled)
Standalone<RangeResultRef> result;
if (!options.readYourWritesDisabled) {
kr = kr.removePrefix(readConflictRangeKeysRange.begin);
auto iter = readConflicts.rangeContainingKeyBefore(kr.begin);
if (iter->begin() == allKeys.begin && !iter->value()) {
++iter; // Conventionally '' is missing from the result range if it's not part of a read conflict
}
for (; iter->begin() < kr.end; ++iter) {
if (kr.begin <= iter->begin() && iter->begin() < kr.end) {
result.push_back(result.arena(),
KeyValueRef(iter->begin().withPrefix(readConflictRangeKeysRange.begin, result.arena()),
iter->value() ? LiteralStringRef("1") : LiteralStringRef("0")));
}
}
} else {
CoalescedKeyRefRangeMap<ValueRef> readConflicts{ LiteralStringRef("0"), specialKeys.end };
for (const auto& range : tr.readConflictRanges())
readConflicts.insert(range.withPrefix(readConflictRangeKeysRange.begin, result.arena()),
LiteralStringRef("1"));
for (const auto& range : nativeReadRanges)
readConflicts.insert(range.withPrefix(readConflictRangeKeysRange.begin, result.arena()),
LiteralStringRef("1"));
for (const auto& f : tr.getExtraReadConflictRanges()) {
if (f.isReady() && f.get().first < f.get().second)
readConflicts.insert(KeyRangeRef(f.get().first, f.get().second)
.withPrefix(readConflictRangeKeysRange.begin, result.arena()),
LiteralStringRef("1"));
}
auto beginIter = readConflicts.rangeContaining(kr.begin);
if (beginIter->begin() != kr.begin) ++beginIter;
for (auto it = beginIter; it->begin() < kr.end; ++it) {
result.push_back(result.arena(), KeyValueRef(it->begin(), it->value()));
}
}
return result;
}
Standalone<RangeResultRef> ReadYourWritesTransaction::getWriteConflictRangeIntersecting(KeyRangeRef kr) {
ASSERT(writeConflictRangeKeysRange.contains(kr));
Standalone<RangeResultRef> result;
// Memory owned by result
CoalescedKeyRefRangeMap<ValueRef> writeConflicts{ LiteralStringRef("0"), specialKeys.end };
if (!options.readYourWritesDisabled) {
KeyRangeRef strippedWriteRangePrefix = kr.removePrefix(writeConflictRangeKeysRange.begin);
WriteMap::iterator it(&writes);
it.skip(strippedWriteRangePrefix.begin);
if (it.beginKey() > allKeys.begin) --it;
for (; it.beginKey() < strippedWriteRangePrefix.end; ++it) {
if (it.is_conflict_range())
writeConflicts.insert(
KeyRangeRef(it.beginKey().toArena(result.arena()), it.endKey().toArena(result.arena()))
.withPrefix(writeConflictRangeKeysRange.begin, result.arena()),
LiteralStringRef("1"));
}
} else {
for (const auto& range : tr.writeConflictRanges())
writeConflicts.insert(range.withPrefix(writeConflictRangeKeysRange.begin, result.arena()),
LiteralStringRef("1"));
for (const auto& range : nativeWriteRanges)
writeConflicts.insert(range.withPrefix(writeConflictRangeKeysRange.begin, result.arena()),
LiteralStringRef("1"));
}
for (const auto& k : versionStampKeys) {
KeyRange range;
if (versionStampFuture.isValid() && versionStampFuture.isReady() && !versionStampFuture.isError()) {
const auto& stamp = versionStampFuture.get();
StringRef key(range.arena(), k); // Copy
ASSERT(k.size() >= 4);
int32_t pos;
memcpy(&pos, k.end() - sizeof(int32_t), sizeof(int32_t));
pos = littleEndian32(pos);
ASSERT(pos >= 0 && pos + stamp.size() <= key.size());
memcpy(mutateString(key) + pos, stamp.begin(), stamp.size());
*(mutateString(key) + key.size() - 4) = '\x00';
// singleKeyRange, but share begin and end's memory
range = KeyRangeRef(key.substr(0, key.size() - 4), key.substr(0, key.size() - 3));
} else {
range = getVersionstampKeyRange(result.arena(), k, tr.getCachedReadVersion().orDefault(0), getMaxReadKey());
}
writeConflicts.insert(range.withPrefix(writeConflictRangeKeysRange.begin, result.arena()),
LiteralStringRef("1"));
}
auto beginIter = writeConflicts.rangeContaining(kr.begin);
if (beginIter->begin() != kr.begin) ++beginIter;
for (auto it = beginIter; it->begin() < kr.end; ++it) {
result.push_back(result.arena(), KeyValueRef(it->begin(), it->value()));
}
return result;
}
void ReadYourWritesTransaction::atomicOp( const KeyRef& key, const ValueRef& operand, uint32_t operationType ) {
bool addWriteConflict = !options.getAndResetWriteConflictDisabled();
@ -1593,6 +1703,8 @@ void ReadYourWritesTransaction::atomicOp( const KeyRef& key, const ValueRef& ope
TEST(options.readYourWritesDisabled); // SetVersionstampedKey without ryw enabled
// this does validation of the key and needs to be performed before the readYourWritesDisabled path
KeyRangeRef range = getVersionstampKeyRange(arena, k, tr.getCachedReadVersion().orDefault(0), getMaxReadKey());
versionStampKeys.push_back(arena, k);
addWriteConflict = false;
if(!options.readYourWritesDisabled) {
writeRangeToNativeTransaction(range);
writes.addUnmodifiedAndUnreadableRange(range);
@ -1911,6 +2023,9 @@ void ReadYourWritesTransaction::operator=(ReadYourWritesTransaction&& r) BOOST_N
cache.arena = &arena;
writes.arena = &arena;
persistentOptions = std::move(r.persistentOptions);
nativeReadRanges = std::move(r.nativeReadRanges);
nativeWriteRanges = std::move(r.nativeWriteRanges);
versionStampKeys = std::move(r.versionStampKeys);
}
ReadYourWritesTransaction::ReadYourWritesTransaction(ReadYourWritesTransaction&& r) BOOST_NOEXCEPT :
@ -1935,6 +2050,9 @@ ReadYourWritesTransaction::ReadYourWritesTransaction(ReadYourWritesTransaction&&
watchMap = std::move( r.watchMap );
r.resetPromise = Promise<Void>();
persistentOptions = std::move(r.persistentOptions);
nativeReadRanges = std::move(r.nativeReadRanges);
nativeWriteRanges = std::move(r.nativeWriteRanges);
versionStampKeys = std::move(r.versionStampKeys);
}
Future<Void> ReadYourWritesTransaction::onError(Error const& e) {
@ -1969,6 +2087,9 @@ void ReadYourWritesTransaction::resetRyow() {
cache = SnapshotCache(&arena);
writes = WriteMap(&arena);
readConflicts = CoalescedKeyRefRangeMap<bool>();
versionStampKeys = VectorRef<KeyRef>();
nativeReadRanges = Standalone<VectorRef<KeyRangeRef>>();
nativeWriteRanges = Standalone<VectorRef<KeyRangeRef>>();
watchMap.clear();
reading = AndFuture();
approximateSize = 0;
@ -1999,6 +2120,7 @@ void ReadYourWritesTransaction::reset() {
options.reset(tr);
transactionDebugInfo.clear();
tr.fullReset();
versionStampFuture = tr.getVersionstamp();
std::copy(tr.getDatabase().getTransactionDefaults().begin(), tr.getDatabase().getTransactionDefaults().end(), std::back_inserter(persistentOptions));
resetRyow();
}

View File

@ -119,7 +119,10 @@ public:
void reset();
void debugTransaction(UID dID) { tr.debugTransaction(dID); }
Future<Void> debug_onIdle() { return reading; }
Future<Void> debug_onIdle() { return reading; }
// Wait for all reads that are currently pending to complete
Future<Void> pendingReads() { return resetPromise.getFuture() || reading; }
// Used by ThreadSafeTransaction for exceptions thrown in void methods
Error deferredError;
@ -135,6 +138,12 @@ public:
const TransactionInfo& getTransactionInfo() const {
return tr.info;
}
// Read from the special key space readConflictRangeKeysRange
Standalone<RangeResultRef> getReadConflictRangeIntersecting(KeyRangeRef kr);
// Read from the special key space writeConflictRangeKeysRange
Standalone<RangeResultRef> getWriteConflictRangeIntersecting(KeyRangeRef kr);
private:
friend class RYWImpl;
@ -152,6 +161,14 @@ private:
double creationTime;
bool commitStarted;
// For reading conflict ranges from the special key space
VectorRef<KeyRef> versionStampKeys;
Future<Standalone<StringRef>> versionStampFuture;
Standalone<VectorRef<KeyRangeRef>>
nativeReadRanges; // Used to read conflict ranges after committing an ryw disabled transaction
Standalone<VectorRef<KeyRangeRef>>
nativeWriteRanges; // Used to read conflict ranges after committing an ryw disabled transaction
Reference<TransactionDebugInfo> transactionDebugInfo;
void resetTimeout();

View File

@ -540,42 +540,27 @@ struct RestoreRequest {
int index;
Key tagName;
Key url;
bool waitForComplete;
Version targetVersion;
bool verbose;
KeyRange range;
Key addPrefix;
Key removePrefix;
bool lockDB;
UID randomUid;
std::vector<int> restoreRequests;
// Key restoreTag;
ReplyPromise<struct RestoreCommonReply> reply;
RestoreRequest() = default;
explicit RestoreRequest(const int index, const Key& tagName, const Key& url, bool waitForComplete,
Version targetVersion, bool verbose, const KeyRange& range, const Key& addPrefix,
const Key& removePrefix, bool lockDB, const UID& randomUid)
: index(index), tagName(tagName), url(url), waitForComplete(waitForComplete), targetVersion(targetVersion),
verbose(verbose), range(range), addPrefix(addPrefix), removePrefix(removePrefix), lockDB(lockDB),
randomUid(randomUid) {}
explicit RestoreRequest(const int index, const Key& tagName, const Key& url, Version targetVersion,
const KeyRange& range, const UID& randomUid)
: index(index), tagName(tagName), url(url), targetVersion(targetVersion), range(range), randomUid(randomUid) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, index, tagName, url, waitForComplete, targetVersion, verbose, range, addPrefix, removePrefix,
lockDB, randomUid, restoreRequests, reply);
serializer(ar, index, tagName, url, targetVersion, range, randomUid, reply);
}
std::string toString() const {
std::stringstream ss;
ss << "index:" << std::to_string(index) << " tagName:" << tagName.contents().toString()
<< " url:" << url.contents().toString() << " waitForComplete:" << std::to_string(waitForComplete)
<< " targetVersion:" << std::to_string(targetVersion) << " verbose:" << std::to_string(verbose)
<< " range:" << range.toString() << " addPrefix:" << addPrefix.contents().toString()
<< " removePrefix:" << removePrefix.contents().toString() << " lockDB:" << std::to_string(lockDB)
<< " randomUid:" << randomUid.toString();
<< " url:" << url.contents().toString() << " targetVersion:" << std::to_string(targetVersion)
<< " range:" << range.toString() << " randomUid:" << randomUid.toString();
return ss.str();
}
};

View File

@ -71,7 +71,7 @@ struct ExtStringRef {
int size() const { return base.size() + extra_zero_bytes; }
int cmp(ExtStringRef const& rhs) const {
int compare(ExtStringRef const& rhs) const {
int cbl = std::min(base.size(), rhs.base.size());
if (cbl > 0) {
int c = memcmp(base.begin(), rhs.base.begin(), cbl);
@ -82,7 +82,7 @@ struct ExtStringRef {
if (base[i]) return 1;
for(int i=cbl; i<rhs.base.size(); i++)
if (rhs.base[i]) return -1;
return size() - rhs.size();
return ::compare(size(), rhs.size());
}
bool startsWith( const ExtStringRef& s ) const {
@ -114,13 +114,21 @@ private:
int extra_zero_bytes;
};
inline bool operator == (const ExtStringRef& lhs, const ExtStringRef& rhs ) {
return lhs.size() == rhs.size() && !lhs.cmp(rhs);
return lhs.size() == rhs.size() && !lhs.compare(rhs);
}
inline bool operator != (const ExtStringRef& lhs, const ExtStringRef& rhs ) { return !(lhs==rhs); }
inline bool operator < ( const ExtStringRef& lhs, const ExtStringRef& rhs ) { return lhs.cmp(rhs)<0; }
inline bool operator > ( const ExtStringRef& lhs, const ExtStringRef& rhs ) { return lhs.cmp(rhs)>0; }
inline bool operator <= ( const ExtStringRef& lhs, const ExtStringRef& rhs ) { return lhs.cmp(rhs)<=0; }
inline bool operator >= ( const ExtStringRef& lhs, const ExtStringRef& rhs ) { return lhs.cmp(rhs)>=0; }
inline bool operator<(const ExtStringRef& lhs, const ExtStringRef& rhs) {
return lhs.compare(rhs) < 0;
}
inline bool operator>(const ExtStringRef& lhs, const ExtStringRef& rhs) {
return lhs.compare(rhs) > 0;
}
inline bool operator<=(const ExtStringRef& lhs, const ExtStringRef& rhs) {
return lhs.compare(rhs) <= 0;
}
inline bool operator>=(const ExtStringRef& lhs, const ExtStringRef& rhs) {
return lhs.compare(rhs) >= 0;
}
template<>
struct Traceable<ExtStringRef> : std::true_type {
@ -152,25 +160,10 @@ private:
{
values.push_back( arena, kv );
}
int compare(Entry const& r) const { return ::compare(beginKey, r.beginKey); }
bool operator < (Entry const& r) const {
return beginKey < r.beginKey;
}
bool operator < (StringRef const& r) const {
return beginKey < r;
}
bool operator <= (Entry const& r) const {
return beginKey <= r.beginKey;
}
bool operator <= (StringRef const& r) const {
return beginKey <= r;
}
bool operator == (Entry const& r) const {
return beginKey == r.beginKey;
}
bool operator == (StringRef const& r) const {
return beginKey == r;
}
int segments() const { return 2*(values.size()+1); }
};

View File

@ -243,6 +243,26 @@ Future<Optional<Value>> SpecialKeySpace::get(Reference<ReadYourWritesTransaction
return getActor(this, ryw, key);
}
ReadConflictRangeImpl::ReadConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeBaseImpl(kr) {}
ACTOR static Future<Standalone<RangeResultRef>> getReadConflictRangeImpl(Reference<ReadYourWritesTransaction> ryw,
KeyRange kr) {
wait(ryw->pendingReads());
return ryw->getReadConflictRangeIntersecting(kr);
}
Future<Standalone<RangeResultRef>> ReadConflictRangeImpl::getRange(Reference<ReadYourWritesTransaction> ryw,
KeyRangeRef kr) const {
return getReadConflictRangeImpl(ryw, kr);
}
WriteConflictRangeImpl::WriteConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeBaseImpl(kr) {}
Future<Standalone<RangeResultRef>> WriteConflictRangeImpl::getRange(Reference<ReadYourWritesTransaction> ryw,
KeyRangeRef kr) const {
return ryw->getWriteConflictRangeIntersecting(kr);
}
ConflictingKeysImpl::ConflictingKeysImpl(KeyRangeRef kr) : SpecialKeyRangeBaseImpl(kr) {}
Future<Standalone<RangeResultRef>> ConflictingKeysImpl::getRange(Reference<ReadYourWritesTransaction> ryw,

View File

@ -95,5 +95,19 @@ public:
KeyRangeRef kr) const override;
};
class ReadConflictRangeImpl : public SpecialKeyRangeBaseImpl {
public:
explicit ReadConflictRangeImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(Reference<ReadYourWritesTransaction> ryw,
KeyRangeRef kr) const override;
};
class WriteConflictRangeImpl : public SpecialKeyRangeBaseImpl {
public:
explicit WriteConflictRangeImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(Reference<ReadYourWritesTransaction> ryw,
KeyRangeRef kr) const override;
};
#include "flow/unactorcompiler.h"
#endif

View File

@ -54,6 +54,7 @@ struct StorageServerInterface {
LocalityData locality;
UID uniqueID;
Endpoint base;
RequestStream<struct GetValueRequest> getValue;
RequestStream<struct GetKeyRequest> getKey;
@ -65,13 +66,13 @@ struct StorageServerInterface {
RequestStream<struct GetShardStateRequest> getShardState;
RequestStream<struct WaitMetricsRequest> waitMetrics;
RequestStream<struct SplitMetricsRequest> splitMetrics;
RequestStream<struct ReadHotSubRangeRequest> getReadHotRanges;
RequestStream<struct GetStorageMetricsRequest> getStorageMetrics;
RequestStream<ReplyPromise<Void>> waitFailure;
RequestStream<struct StorageQueuingMetricsRequest> getQueuingMetrics;
RequestStream<ReplyPromise<KeyValueStoreType>> getKeyValueStoreType;
RequestStream<struct WatchValueRequest> watchValue;
RequestStream<struct ReadHotSubRangeRequest> getReadHotRanges;
explicit StorageServerInterface(UID uid) : uniqueID( uid ) {}
StorageServerInterface() : uniqueID( deterministicRandom()->randomUniqueID() ) {}
@ -85,22 +86,50 @@ struct StorageServerInterface {
// StorageServerInterface is persisted in the database and in the tLog's data structures, so changes here have to be
// versioned carefully!
if constexpr (!is_fb_function<Ar>) {
serializer(ar, uniqueID, locality, getValue, getKey, getKeyValues, getShardState, waitMetrics, splitMetrics,
getReadHotRanges, getStorageMetrics, waitFailure, getQueuingMetrics, getKeyValueStoreType);
if (ar.protocolVersion().hasWatches()) serializer(ar, watchValue);
if (ar.protocolVersion().hasSmallEndpoints()) {
serializer(ar, uniqueID, locality, base);
if( Ar::isDeserializing ) {
getValue = RequestStream<struct GetValueRequest>( base.getAdjustedEndpoint(0) );
getKey = RequestStream<struct GetKeyRequest>( base.getAdjustedEndpoint(1) );
getKeyValues = RequestStream<struct GetKeyValuesRequest>( base.getAdjustedEndpoint(2) );
getShardState = RequestStream<struct GetShardStateRequest>( base.getAdjustedEndpoint(3) );
waitMetrics = RequestStream<struct WaitMetricsRequest>( base.getAdjustedEndpoint(4) );
splitMetrics = RequestStream<struct SplitMetricsRequest>( base.getAdjustedEndpoint(5) );
getStorageMetrics = RequestStream<struct GetStorageMetricsRequest>( base.getAdjustedEndpoint(6) );
waitFailure = RequestStream<ReplyPromise<Void>>( base.getAdjustedEndpoint(7) );
getQueuingMetrics = RequestStream<struct StorageQueuingMetricsRequest>( base.getAdjustedEndpoint(8) );
getKeyValueStoreType = RequestStream<ReplyPromise<KeyValueStoreType>>( base.getAdjustedEndpoint(9) );
watchValue = RequestStream<struct WatchValueRequest>( base.getAdjustedEndpoint(10) );
getReadHotRanges = RequestStream<struct ReadHotSubRangeRequest>( base.getAdjustedEndpoint(11) );
}
} else {
serializer(ar, uniqueID, locality, getValue, getKey, getKeyValues, getShardState, waitMetrics, splitMetrics,
getReadHotRanges, getStorageMetrics, waitFailure, getQueuingMetrics, getKeyValueStoreType,
watchValue);
ASSERT(Ar::isDeserializing);
if constexpr (is_fb_function<Ar>) {
ASSERT(false);
}
serializer(ar, uniqueID, locality, getValue, getKey, getKeyValues, getShardState, waitMetrics,
splitMetrics, getStorageMetrics, waitFailure, getQueuingMetrics, getKeyValueStoreType);
if (ar.protocolVersion().hasWatches()) serializer(ar, watchValue);
base = getValue.getEndpoint();
}
}
bool operator == (StorageServerInterface const& s) const { return uniqueID == s.uniqueID; }
bool operator < (StorageServerInterface const& s) const { return uniqueID < s.uniqueID; }
void initEndpoints() {
getValue.getEndpoint( TaskPriority::LoadBalancedEndpoint );
getKey.getEndpoint( TaskPriority::LoadBalancedEndpoint );
getKeyValues.getEndpoint( TaskPriority::LoadBalancedEndpoint );
std::vector<std::pair<FlowReceiver*, TaskPriority>> streams;
streams.push_back(getValue.getReceiver(TaskPriority::LoadBalancedEndpoint));
streams.push_back(getKey.getReceiver(TaskPriority::LoadBalancedEndpoint));
streams.push_back(getKeyValues.getReceiver(TaskPriority::LoadBalancedEndpoint));
streams.push_back(getShardState.getReceiver());
streams.push_back(waitMetrics.getReceiver());
streams.push_back(splitMetrics.getReceiver());
streams.push_back(getStorageMetrics.getReceiver());
streams.push_back(waitFailure.getReceiver());
streams.push_back(getQueuingMetrics.getReceiver());
streams.push_back(getKeyValueStoreType.getReceiver());
streams.push_back(watchValue.getReceiver());
streams.push_back(getReadHotRanges.getReceiver());
base = FlowTransport::transport().addEndpoints(streams);
}
};

View File

@ -49,16 +49,31 @@ const Value keyServersValue( Standalone<RangeResultRef> result, const std::vecto
std::vector<Tag> srcTag;
std::vector<Tag> destTag;
bool foundOldLocality = false;
for (const KeyValueRef kv : result) {
UID uid = decodeServerTagKey(kv.key);
if (std::find(src.begin(), src.end(), uid) != src.end()) {
srcTag.push_back( decodeServerTagValue(kv.value) );
if(srcTag.back().locality == tagLocalityUpgraded) {
foundOldLocality = true;
break;
}
}
if (std::find(dest.begin(), dest.end(), uid) != dest.end()) {
destTag.push_back( decodeServerTagValue(kv.value) );
if(destTag.back().locality == tagLocalityUpgraded) {
foundOldLocality = true;
break;
}
}
}
if(foundOldLocality || src.size() != srcTag.size() || dest.size() != destTag.size()) {
ASSERT_WE_THINK(foundOldLocality);
BinaryWriter wr(IncludeVersion()); wr << src << dest;
return wr.toValue();
}
return keyServersValue(srcTag, destTag);
}
const Value keyServersValue( const std::vector<Tag>& srcTag, const std::vector<Tag>& destTag ) {
@ -68,7 +83,7 @@ const Value keyServersValue( const std::vector<Tag>& srcTag, const std::vector<T
}
void decodeKeyServersValue( Standalone<RangeResultRef> result, const ValueRef& value,
std::vector<UID>& src, std::vector<UID>& dest ) {
std::vector<UID>& src, std::vector<UID>& dest, bool missingIsError ) {
if (value.size() == 0) {
src.clear();
dest.clear();
@ -106,13 +121,37 @@ void decodeKeyServersValue( Standalone<RangeResultRef> result, const ValueRef& v
}
std::sort(src.begin(), src.end());
std::sort(dest.begin(), dest.end());
if(missingIsError && (src.size() != srcTag.size() || dest.size() != destTag.size())) {
TraceEvent(SevError, "AttemptedToDecodeMissingTag");
for (const KeyValueRef kv : result) {
Tag tag = decodeServerTagValue(kv.value);
UID serverID = decodeServerTagKey(kv.key);
TraceEvent("TagUIDMap").detail("Tag", tag.toString()).detail("UID", serverID.toString());
}
for(auto& it : srcTag) {
TraceEvent("SrcTag").detail("Tag", it.toString());
}
for(auto& it : destTag) {
TraceEvent("DestTag").detail("Tag", it.toString());
}
ASSERT(false);
}
}
const KeyRangeRef conflictingKeysRange = KeyRangeRef(LiteralStringRef("\xff\xff/transaction/conflicting_keys/"),
LiteralStringRef("\xff\xff/transaction/conflicting_keys/\xff"));
const KeyRangeRef conflictingKeysRange =
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/conflicting_keys/"),
LiteralStringRef("\xff\xff/transaction/conflicting_keys/\xff\xff"));
const ValueRef conflictingKeysTrue = LiteralStringRef("1");
const ValueRef conflictingKeysFalse = LiteralStringRef("0");
const KeyRangeRef readConflictRangeKeysRange =
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/read_conflict_range/"),
LiteralStringRef("\xff\xff/transaction/read_conflict_range/\xff\xff"));
const KeyRangeRef writeConflictRangeKeysRange =
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/write_conflict_range/"),
LiteralStringRef("\xff\xff/transaction/write_conflict_range/\xff\xff"));
// "\xff/storageCache/[[begin]]" := "[[vector<uint16_t>]]"
const KeyRangeRef storageCacheKeys( LiteralStringRef("\xff/storageCache/"), LiteralStringRef("\xff/storageCache0") );
const KeyRef storageCachePrefix = storageCacheKeys.begin;

View File

@ -58,7 +58,7 @@ const Value keyServersValue(
const std::vector<Tag>& destTag = std::vector<Tag>());
// `result` must be the full result of getting serverTagKeys
void decodeKeyServersValue( Standalone<RangeResultRef> result, const ValueRef& value,
std::vector<UID>& src, std::vector<UID>& dest );
std::vector<UID>& src, std::vector<UID>& dest, bool missingIsError = true );
// "\xff/storageCache/[[begin]]" := "[[vector<uint16_t>]]"
extern const KeyRangeRef storageCacheKeys;
@ -77,6 +77,8 @@ bool serverHasKey( ValueRef storedValue );
extern const KeyRangeRef conflictingKeysRange;
extern const ValueRef conflictingKeysTrue, conflictingKeysFalse;
extern const KeyRangeRef writeConflictRangeKeysRange;
extern const KeyRangeRef readConflictRangeKeysRange;
extern const KeyRef cacheKeysPrefix;

View File

@ -0,0 +1,56 @@
#include "fdbclient/VersionedMap.h"
#include "flow/TreeBenchmark.h"
#include "flow/UnitTest.h"
template <typename K>
struct VersionedMapHarness {
using map = VersionedMap<K, int>;
using key_type = K;
struct result {
typename map::iterator it;
result(typename map::iterator it) : it(it) {}
result& operator++() {
++it;
return *this;
}
const K& operator*() const { return it.key(); }
const K& operator->() const { return it.key(); }
bool operator==(result const& k) const { return it == k.it; }
};
map s;
void insert(K const& k) { s.insert(k, 1); }
result find(K const& k) const { return result(s.atLatest().find(k)); }
result not_found() const { return result(s.atLatest().end()); }
result begin() const { return result(s.atLatest().begin()); }
result end() const { return result(s.atLatest().end()); }
result lower_bound(K const& k) const { return result(s.atLatest().lower_bound(k)); }
result upper_bound(K const& k) const { return result(s.atLatest().upper_bound(k)); }
void erase(K const& k) { s.erase(k); }
};
TEST_CASE("performance/map/int/VersionedMap") {
VersionedMapHarness<int> tree;
treeBenchmark(tree, *randomInt);
return Void();
}
TEST_CASE("performance/map/StringRef/VersionedMap") {
Arena arena;
VersionedMapHarness<StringRef> tree;
treeBenchmark(tree, [&arena]() { return randomStr(arena); });
return Void();
}
void forceLinkVersionedMapTests() {}

View File

@ -67,7 +67,63 @@ namespace PTreeImpl {
PTree(PTree const&);
};
template<class T>
template <class T>
class PTreeFinger {
using PTreeFingerEntry = PTree<T> const*;
// This finger size supports trees with up to exp(96/4.3) ~= 4,964,514,749 entries.
// see also: check().
static constexpr size_t N = 96;
PTreeFingerEntry entries_[N];
size_t size_ = 0;
size_t bound_sz_ = 0;
public:
PTreeFinger() {}
// Explicit copy constructors ensure we copy the live values in entries_.
PTreeFinger(PTreeFinger const& f) { *this = f; }
PTreeFinger(PTreeFinger&& f) { *this = f; }
PTreeFinger& operator=(PTreeFinger const& f) {
size_ = f.size_;
bound_sz_ = f.bound_sz_;
std::copy(f.entries_, f.entries_ + size_, entries_);
return *this;
}
PTreeFinger& operator=(PTreeFinger&& f) {
size_ = std::exchange(f.size_, 0);
bound_sz_ = f.bound_sz_;
std::copy(f.entries_, f.entries_ + size_, entries_);
return *this;
}
size_t size() const { return size_; }
PTree<T> const* back() const { return entries_[size_ - 1]; }
void pop_back() { size_--; }
void clear() { size_ = 0; }
PTree<T> const* operator[](size_t i) const { return entries_[i]; }
void resize(size_t sz) {
size_ = sz;
ASSERT(size_ < N);
}
void push_back(PTree<T> const* node) {
entries_[size_++] = { node };
ASSERT(size_ < N);
}
void push_for_bound(PTree<T> const* node, bool less) {
push_back(node);
bound_sz_ = less ? size_ : bound_sz_;
}
// remove the end of the finger so that the last entry is less than the probe
void trim_to_bound() { size_ = bound_sz_; }
};
template<class T>
static Reference<PTree<T>> update( Reference<PTree<T>> const& node, bool which, Reference<PTree<T>> const& ptr, Version at ) {
if (ptr.getPtr() == node->child(which, at).getPtr()/* && node->replacedVersion <= at*/) {
return node;
@ -109,38 +165,41 @@ namespace PTreeImpl {
template<class T, class X>
bool contains(const Reference<PTree<T>>& p, Version at, const X& x) {
if (!p) return false;
bool less = x < p->data;
if (!less && !(p->data<x)) return true; // x == p->data
int cmp = compare(x, p->data);
bool less = cmp < 0;
if (cmp == 0) return true;
return contains(p->child(!less, at), at, x);
}
template<class T, class X>
void lower_bound(const Reference<PTree<T>>& p, Version at, const X& x, std::vector<const PTree<T>*>& f){
if (!p) {
while (f.size() && !(x < f.back()->data))
f.pop_back();
return;
// TODO: Remove the number of invocations of operator<, and replace with something closer to memcmp.
// and same for upper_bound.
template <class T, class X>
void lower_bound(const Reference<PTree<T>>& p, Version at, const X& x, PTreeFinger<T>& f) {
if (!p) {
f.trim_to_bound();
return;
}
f.push_back(p.getPtr());
bool less = x < p->data;
if (!less && !(p->data<x)) return; // x == p->data
lower_bound(p->child(!less, at), at, x, f);
}
int cmp = compare(x, p->data);
bool less = cmp < 0;
f.push_for_bound(p.getPtr(), less);
if (cmp == 0) return;
lower_bound(p->child(!less, at), at, x, f);
}
template<class T, class X>
void upper_bound(const Reference<PTree<T>>& p, Version at, const X& x, std::vector<const PTree<T>*>& f){
if (!p) {
while (f.size() && !(x < f.back()->data))
f.pop_back();
return;
template <class T, class X>
void upper_bound(const Reference<PTree<T>>& p, Version at, const X& x, PTreeFinger<T>& f) {
if (!p) {
f.trim_to_bound();
return;
}
f.push_back(p.getPtr());
upper_bound(p->child(!(x < p->data), at), at, x, f);
}
template<class T, bool forward>
void move(Version at, std::vector<const PTree<T>*>& f){
ASSERT(f.size());
bool less = x < p->data;
f.push_for_bound(p.getPtr(), less);
upper_bound(p->child(!less, at), at, x, f);
}
template <class T, bool forward>
void move(Version at, PTreeFinger<T>& f) {
ASSERT(f.size());
const PTree<T> *n;
n = f.back();
if (n->child(forward, at)){
@ -155,11 +214,11 @@ namespace PTreeImpl {
f.pop_back();
} while (f.size() && f.back()->child(forward, at).getPtr() == n);
}
}
}
template<class T, bool forward>
int halfMove(Version at, std::vector<const PTree<T>*>& f) {
// Post: f[:return_value] is the finger that would have been returned by move<forward>(at,f), and f[:original_length_of_f] is unmodified
template <class T, bool forward>
int halfMove(Version at, PTreeFinger<T>& f) {
// Post: f[:return_value] is the finger that would have been returned by move<forward>(at,f), and f[:original_length_of_f] is unmodified
ASSERT(f.size());
const PTree<T> *n;
n = f.back();
@ -178,35 +237,35 @@ namespace PTreeImpl {
} while (s && f[s-1]->child(forward, at).getPtr() == n);
return s;
}
}
}
template<class T>
void next(Version at, std::vector<const PTree<T>*>& f){
move<T,true>(at, f);
}
template<class T>
void previous(Version at, std::vector<const PTree<T>*>& f){
move<T,false>(at, f);
}
template <class T>
void next(Version at, PTreeFinger<T>& f) {
move<T,true>(at, f);
}
template<class T>
int halfNext(Version at, std::vector<const PTree<T>*>& f){
return halfMove<T,true>(at, f);
}
template<class T>
int halfPrevious(Version at, std::vector<const PTree<T>*>& f){
return halfMove<T,false>(at, f);
}
template <class T>
void previous(Version at, PTreeFinger<T>& f) {
move<T,false>(at, f);
}
template<class T>
T get(std::vector<const PTree<T>*>& f){
ASSERT(f.size());
template <class T>
int halfNext(Version at, PTreeFinger<T>& f) {
return halfMove<T,true>(at, f);
}
template <class T>
int halfPrevious(Version at, PTreeFinger<T>& f) {
return halfMove<T,false>(at, f);
}
template <class T>
T get(PTreeFinger<T>& f) {
ASSERT(f.size());
return f.back()->data;
}
}
// Modifies p to point to a PTree with x inserted
// Modifies p to point to a PTree with x inserted
template<class T>
void insert(Reference<PTree<T>>& p, Version at, const T& x) {
if (!p){
@ -235,24 +294,24 @@ namespace PTreeImpl {
return lastNode(p->right(at), at);
}
template<class T, bool last>
void firstOrLastFinger(const Reference<PTree<T>>& p, Version at, std::vector<const PTree<T>*>& f) {
if (!p) return;
template <class T, bool last>
void firstOrLastFinger(const Reference<PTree<T>>& p, Version at, PTreeFinger<T>& f) {
if (!p) return;
f.push_back(p.getPtr());
firstOrLastFinger<T, last>(p->child(last, at), at, f);
}
template<class T>
void first(const Reference<PTree<T>>& p, Version at, std::vector<const PTree<T>*>& f) {
return firstOrLastFinger<T, false>(p, at, f);
}
}
template<class T>
void last(const Reference<PTree<T>>& p, Version at, std::vector<const PTree<T>*>& f) {
return firstOrLastFinger<T, true>(p, at, f);
}
template <class T>
void first(const Reference<PTree<T>>& p, Version at, PTreeFinger<T>& f) {
return firstOrLastFinger<T, false>(p, at, f);
}
// modifies p to point to a PTree with the root of p removed
template <class T>
void last(const Reference<PTree<T>>& p, Version at, PTreeFinger<T>& f) {
return firstOrLastFinger<T, true>(p, at, f);
}
// modifies p to point to a PTree with the root of p removed
template<class T>
void removeRoot(Reference<PTree<T>>& p, Version at) {
if (!p->right(at))
@ -272,24 +331,27 @@ namespace PTreeImpl {
template<class T, class X>
void remove(Reference<PTree<T>>& p, Version at, const X& x) {
if (!p) ASSERT(false); // attempt to remove item not present in PTree
if (x < p->data) {
int cmp = compare(x, p->data);
if (cmp < 0) {
Reference<PTree<T>> child = p->child(0, at);
remove(child, at, x);
p = update(p, 0, child, at);
} else if (p->data < x) {
} else if (cmp > 0) {
Reference<PTree<T>> child = p->child(1, at);
remove(child, at, x);
p = update(p, 1, child, at);
} else
} else {
removeRoot(p, at);
}
}
template<class T, class X>
void remove(Reference<PTree<T>>& p, Version at, const X& begin, const X& end) {
if (!p) return;
int beginDir, endDir;
if (begin < p->data) beginDir = -1;
else if (p->data < begin) beginDir = +1;
int beginCmp = compare(begin, p->data);
if (beginCmp < 0) beginDir = -1;
else if (beginCmp > 0) beginDir = +1;
else beginDir = 0;
if (!(p->data < end)) endDir = -1;
else endDir = +1;
@ -364,7 +426,9 @@ namespace PTreeImpl {
if (!right) return left;
Reference<PTree<T>> r = Reference<PTree<T>>(new PTree<T>(lastNode(left, at)->data, at));
ASSERT( r->data < firstNode(right, at)->data);
if (EXPENSIVE_VALIDATION) {
ASSERT( r->data < firstNode(right, at)->data);
}
Reference<PTree<T>> a = left;
remove(a, at, r->data);
@ -513,6 +577,7 @@ class VersionedMap : NonCopyable {
//private:
public:
typedef PTreeImpl::PTree<MapPair<K,std::pair<T,Version>>> PTreeT;
typedef PTreeImpl::PTreeFinger<MapPair<K, std::pair<T, Version>>> PTreeFingerT;
typedef Reference< PTreeT > Tree;
Version oldestVersion, latestVersion;
@ -589,7 +654,7 @@ public:
UNSTOPPABLE_ASSERT(r->first == newOldestVersion);
vector<Tree> toFree;
std::vector<Tree> toFree;
toFree.reserve(10000);
auto newBegin = r;
Tree *lastRoot = nullptr;
@ -679,7 +744,7 @@ public:
friend class VersionedMap<K,T>;
Tree root;
Version at;
vector< PTreeT const* > finger;
PTreeFingerT finger;
};
class ViewAtVersion {

View File

@ -107,18 +107,35 @@ struct WriteMapEntry {
WriteMapEntry( KeyRef const& key, OperationStack && stack, bool following_keys_cleared, bool following_keys_conflict, bool is_conflict, bool following_keys_unreadable, bool is_unreadable ) : key(key), stack(std::move(stack)), following_keys_cleared(following_keys_cleared), following_keys_conflict(following_keys_conflict), is_conflict(is_conflict), following_keys_unreadable(following_keys_unreadable), is_unreadable(is_unreadable) {}
int compare(StringRef const& r) const { return key.compare(r); }
int compare(ExtStringRef const& r) const { return -r.compare(key); }
std::string toString() const { return printable(key); }
};
inline int compare(StringRef const& l, WriteMapEntry const& r) {
return l.compare(r.key);
}
inline int compare(ExtStringRef const& l, WriteMapEntry const& r) {
return l.compare(r.key);
}
inline bool operator < ( const WriteMapEntry& lhs, const WriteMapEntry& rhs ) { return lhs.key < rhs.key; }
inline bool operator < ( const WriteMapEntry& lhs, const StringRef& rhs ) { return lhs.key < rhs; }
inline bool operator < ( const StringRef& lhs, const WriteMapEntry& rhs ) { return lhs < rhs.key; }
inline bool operator < ( const WriteMapEntry& lhs, const ExtStringRef& rhs ) { return rhs.cmp(lhs.key)>0; }
inline bool operator < ( const ExtStringRef& lhs, const WriteMapEntry& rhs ) { return lhs.cmp(rhs.key)<0; }
inline bool operator<(const WriteMapEntry& lhs, const ExtStringRef& rhs) {
return rhs.compare(lhs.key) > 0;
}
inline bool operator<(const ExtStringRef& lhs, const WriteMapEntry& rhs) {
return lhs.compare(rhs.key) < 0;
}
class WriteMap {
private:
typedef PTreeImpl::PTree< WriteMapEntry > PTreeT;
typedef PTreeImpl::PTree<WriteMapEntry> PTreeT;
typedef PTreeImpl::PTreeFinger<WriteMapEntry> PTreeFingerT;
typedef Reference<PTreeT> Tree;
public:
@ -374,7 +391,7 @@ public:
Tree tree;
Version at;
int beginLen, endLen;
vector< PTreeT const* > finger;
PTreeFingerT finger;
bool offset; // false-> the operation stack at entry(); true-> the following cleared or unmodified range
};

View File

@ -415,7 +415,7 @@ private:
return data.result.get();
}
static volatile int32_t want_poll;
static std::atomic<int32_t> want_poll;
ACTOR static void poll_eio() {
while (eio_poll() == -1)
@ -445,7 +445,7 @@ private:
};
#ifdef FILESYSTEM_IMPL
volatile int32_t AsyncFileEIO::want_poll = 0;
std::atomic<int32_t> AsyncFileEIO::want_poll = 0;
#endif
#include "flow/unactorcompiler.h"

View File

@ -23,7 +23,6 @@
#pragma once
#include "flow/flow.h"
#include "flow/IndexedSet.h"
#include "fdbrpc/FlowTransport.h" // Endpoint
#include <unordered_map>

View File

@ -52,6 +52,7 @@ class EndpointMap : NonCopyable {
public:
EndpointMap();
void insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority );
const Endpoint& insert( NetworkAddressList localAddresses, std::vector<std::pair<FlowReceiver*, TaskPriority>> const& streams );
NetworkMessageReceiver* get( Endpoint::Token const& token );
TaskPriority getPriority( Endpoint::Token const& token );
void remove( Endpoint::Token const& token, NetworkMessageReceiver* r );
@ -96,6 +97,41 @@ void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, Tas
data[index].receiver = r;
}
const Endpoint& EndpointMap::insert( NetworkAddressList localAddresses, std::vector<std::pair<FlowReceiver*, TaskPriority>> const& streams ) {
int adjacentFree = 0;
int adjacentStart = -1;
firstFree = -1;
for(int i = 0; i < data.size(); i++) {
if(data[i].receiver) {
adjacentFree = 0;
} else {
data[i].nextFree = firstFree;
firstFree = i;
if(adjacentStart == -1 && ++adjacentFree == streams.size()) {
adjacentStart = i+1-adjacentFree;
firstFree = data[adjacentStart].nextFree;
}
}
}
if(adjacentStart == -1) {
data.resize( data.size()+streams.size()-adjacentFree );
adjacentStart = data.size()-streams.size();
if(adjacentFree > 0) {
firstFree = data[adjacentStart].nextFree;
}
}
UID base = deterministicRandom()->randomUniqueID();
for(int i=0; i<streams.size(); i++) {
int index = adjacentStart+i;
streams[i].first->setEndpoint( Endpoint( localAddresses, UID( base.first() | TOKEN_STREAM_FLAG, (base.second()&0xffffffff00000000LL) | index) ) );
data[index].token() = Endpoint::Token( base.first() | TOKEN_STREAM_FLAG, (base.second()&0xffffffff00000000LL) | static_cast<uint32_t>(streams[i].second) );
data[index].receiver = (NetworkMessageReceiver*) streams[i].first;
}
return streams[0].first->getEndpoint(TaskPriority::DefaultEndpoint);
}
NetworkMessageReceiver* EndpointMap::get( Endpoint::Token const& token ) {
uint32_t index = token.second();
if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() )
@ -559,7 +595,7 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
// Don't immediately mark connection as failed. To stay closed to earlier behaviour of centralized
// failure monitoring, wait until connection stays failed for FLOW_KNOBS->FAILURE_DETECTION_DELAY timeout.
retryConnect = self->destination.isPublic() && e.code() == error_code_connection_failed;
retryConnect = true;
if (e.code() == error_code_connection_failed) {
if (!self->destination.isPublic()) {
// Can't connect back to non-public addresses.
@ -1240,6 +1276,10 @@ void FlowTransport::addEndpoint( Endpoint& endpoint, NetworkMessageReceiver* rec
self->endpoints.insert( receiver, endpoint.token, taskID );
}
const Endpoint& FlowTransport::addEndpoints( std::vector<std::pair<FlowReceiver*, TaskPriority>> const& streams ) {
return self->endpoints.insert( self->localAddresses, streams );
}
void FlowTransport::removeEndpoint( const Endpoint& endpoint, NetworkMessageReceiver* receiver ) {
self->endpoints.remove(endpoint.token, receiver);
}

View File

@ -65,6 +65,12 @@ public:
return addresses.getTLSAddress();
}
Endpoint getAdjustedEndpoint( uint32_t index ) {
uint32_t newIndex = token.second();
newIndex += index;
return Endpoint( addresses, UID(token.first(), (token.second()&0xffffffff00000000LL) | newIndex) );
}
bool operator == (Endpoint const& r) const {
return getPrimaryAddress() == r.getPrimaryAddress() && token == r.token;
}
@ -180,6 +186,8 @@ public:
void addEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, TaskPriority taskID );
// Sets endpoint to be a new local endpoint which delivers messages to the given receiver
const Endpoint& addEndpoints( std::vector<std::pair<struct FlowReceiver*, TaskPriority>> const& streams );
void removeEndpoint( const Endpoint&, NetworkMessageReceiver* );
// The given local endpoint no longer delivers messages to the given receiver or uses resources

View File

@ -922,6 +922,14 @@ void filterLocalityDataForPolicy(const std::set<std::string>& keys, LocalityData
}
}
void filterLocalityDataForPolicyDcAndProcess(Reference<IReplicationPolicy> policy, LocalityData* ld) {
if (!policy) return;
std::set<std::string> keys = policy->attributeKeys();
keys.insert(LocalityData::keyDcId.toString());
keys.insert(LocalityData::keyProcessId.toString());
filterLocalityDataForPolicy(policy->attributeKeys(), ld);
}
void filterLocalityDataForPolicy(Reference<IReplicationPolicy> policy, LocalityData* ld) {
if (!policy) return;
filterLocalityDataForPolicy(policy->attributeKeys(), ld);

View File

@ -85,6 +85,7 @@ extern bool validateAllCombinations(
bool bCheckIfValid = true);
/// Remove all pieces of locality information from the LocalityData that will not be used when validating the policy.
void filterLocalityDataForPolicyDcAndProcess(Reference<IReplicationPolicy> policy, LocalityData* ld);
void filterLocalityDataForPolicy(Reference<IReplicationPolicy> policy, LocalityData* ld);
void filterLocalityDataForPolicy(Reference<IReplicationPolicy> policy, std::vector<LocalityData>* vld);

View File

@ -28,7 +28,7 @@
#include "fdbrpc/FailureMonitor.h"
#include "fdbrpc/networksender.actor.h"
struct FlowReceiver : private NetworkMessageReceiver {
struct FlowReceiver : public NetworkMessageReceiver {
// Common endpoint code for NetSAV<> and NetNotifiedQueue<>
FlowReceiver() : m_isLocalEndpoint(false), m_stream(false) {
@ -60,6 +60,12 @@ struct FlowReceiver : private NetworkMessageReceiver {
return endpoint;
}
void setEndpoint(Endpoint const& e) {
ASSERT(!endpoint.isValid());
m_isLocalEndpoint = true;
endpoint = e;
}
void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
ASSERT(!endpoint.isValid());
m_isLocalEndpoint = true;
@ -392,6 +398,10 @@ public:
bool isEmpty() const { return !queue->isReady(); }
uint32_t size() const { return queue->size(); }
std::pair<FlowReceiver*, TaskPriority> getReceiver( TaskPriority taskID = TaskPriority::DefaultEndpoint ) {
return std::make_pair((FlowReceiver*)queue, taskID);
}
private:
NetNotifiedQueue<T>* queue;
};

View File

@ -366,19 +366,19 @@ tvdiff (struct timeval *tv1, struct timeval *tv2)
+ ((tv2->tv_usec - tv1->tv_usec) >> 10);
}
static unsigned int started, idle, wanted = 4;
static _Atomic(unsigned int) started, idle, wanted = 4;
static void (*want_poll_cb) (void);
static void (*done_poll_cb) (void);
static unsigned int max_poll_time; /* reslock */
static unsigned int max_poll_reqs; /* reslock */
static unsigned int nreqs; /* reqlock */
static unsigned int nready; /* reqlock */
static unsigned int npending; /* reqlock */
static unsigned int max_idle = 4; /* maximum number of threads that can idle indefinitely */
static unsigned int idle_timeout = 10; /* number of seconds after which an idle threads exit */
static _Atomic(unsigned int) max_poll_time; /* reslock */
static _Atomic(unsigned int) max_poll_reqs; /* reslock */
static _Atomic(unsigned int) nreqs; /* reqlock */
static _Atomic(unsigned int) nready; /* reqlock */
static _Atomic(unsigned int) npending; /* reqlock */
static _Atomic(unsigned int) max_idle = 4; /* maximum number of threads that can idle indefinitely */
static _Atomic(unsigned int) idle_timeout = 10; /* number of seconds after which an idle threads exit */
static xmutex_t wrklock;
static xmutex_t reslock;
@ -435,9 +435,7 @@ static unsigned int
etp_nreqs (void)
{
int retval;
if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
retval = nreqs;
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
return retval;
}
@ -446,9 +444,7 @@ etp_nready (void)
{
unsigned int retval;
if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
retval = nready;
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
return retval;
}
@ -458,9 +454,7 @@ etp_npending (void)
{
unsigned int retval;
if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
retval = npending;
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
return retval;
}
@ -470,9 +464,7 @@ etp_nthreads (void)
{
unsigned int retval;
if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
retval = started;
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
return retval;
}
@ -744,33 +736,25 @@ etp_submit (ETP_REQ *req)
static void ecb_cold
etp_set_max_poll_time (double nseconds)
{
if (WORDACCESS_UNSAFE) X_LOCK (reslock);
max_poll_time = nseconds * EIO_TICKS;
if (WORDACCESS_UNSAFE) X_UNLOCK (reslock);
}
static void ecb_cold
etp_set_max_poll_reqs (unsigned int maxreqs)
{
if (WORDACCESS_UNSAFE) X_LOCK (reslock);
max_poll_reqs = maxreqs;
if (WORDACCESS_UNSAFE) X_UNLOCK (reslock);
}
static void ecb_cold
etp_set_max_idle (unsigned int nthreads)
{
if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
max_idle = nthreads;
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
}
static void ecb_cold
etp_set_idle_timeout (unsigned int seconds)
{
if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
idle_timeout = seconds;
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
}
static void ecb_cold

View File

@ -1,18 +1,6 @@
#ifndef XTHREAD_H_
#define XTHREAD_H_
/* whether word reads are potentially non-atomic.
* this is conservative, likely most arches this runs
* on have atomic word read/writes.
*/
#ifndef WORDACCESS_UNSAFE
# if __i386 || __x86_64
# define WORDACCESS_UNSAFE 0
# else
# define WORDACCESS_UNSAFE 1
# endif
#endif
/////////////////////////////////////////////////////////////////////////////
#ifdef _WIN32

View File

@ -1054,7 +1054,7 @@ public:
m->machine = &machine;
machine.processes.push_back(m);
currentlyRebootingProcesses.erase(addresses.address);
m->excluded = g_simulator.isExcluded(addresses.address);
m->excluded = g_simulator.isExcluded(NetworkAddress(ip, port, true, false));
m->cleared = g_simulator.isCleared(addresses.address);
m->setGlobal(enTDMetrics, (flowGlobalType) &m->tdmetrics);

View File

@ -1025,7 +1025,7 @@ public:
for( auto& logSet : dbi.logSystemConfig.tLogs ) {
for( auto& it : logSet.tLogs ) {
auto tlogWorker = id_worker.find(it.interf().locality.processId());
auto tlogWorker = id_worker.find(it.interf().filteredLocality.processId());
if ( tlogWorker == id_worker.end() )
return false;
if ( tlogWorker->second.priorityInfo.isExcluded )
@ -1042,7 +1042,7 @@ public:
}
for( auto& it : logSet.logRouters ) {
auto tlogWorker = id_worker.find(it.interf().locality.processId());
auto tlogWorker = id_worker.find(it.interf().filteredLocality.processId());
if ( tlogWorker == id_worker.end() )
return false;
if ( tlogWorker->second.priorityInfo.isExcluded )
@ -1067,7 +1067,7 @@ public:
// Get proxy classes
std::vector<WorkerDetails> proxyClasses;
for(auto& it : dbi.client.proxies ) {
auto proxyWorker = id_worker.find(it.locality.processId());
auto proxyWorker = id_worker.find(it.processId);
if ( proxyWorker == id_worker.end() )
return false;
if ( proxyWorker->second.priorityInfo.isExcluded )
@ -1260,11 +1260,11 @@ public:
auto& dbInfo = db.serverInfo->get();
for (const auto& tlogset : dbInfo.logSystemConfig.tLogs) {
for (const auto& tlog: tlogset.tLogs) {
if (tlog.present() && tlog.interf().locality.processId() == processId) return true;
if (tlog.present() && tlog.interf().filteredLocality.processId() == processId) return true;
}
}
for (const MasterProxyInterface& interf : dbInfo.client.proxies) {
if (interf.locality.processId() == processId) return true;
if (interf.processId == processId) return true;
}
for (const ResolverInterface& interf: dbInfo.resolvers) {
if (interf.locality.processId() == processId) return true;
@ -1291,13 +1291,13 @@ public:
for (const auto& tlogset : dbInfo.logSystemConfig.tLogs) {
for (const auto& tlog: tlogset.tLogs) {
if (tlog.present()) {
idUsed[tlog.interf().locality.processId()]++;
idUsed[tlog.interf().filteredLocality.processId()]++;
}
}
}
for (const MasterProxyInterface& interf : dbInfo.client.proxies) {
ASSERT(interf.locality.processId().present());
idUsed[interf.locality.processId()]++;
ASSERT(interf.processId.present());
idUsed[interf.processId]++;
}
for (const ResolverInterface& interf: dbInfo.resolvers) {
ASSERT(interf.locality.processId().present());

View File

@ -367,9 +367,10 @@ public:
const T* upperBound() const { return upper; }
DeltaTree* tree;
private:
Arena arena;
DeltaTree* tree;
DecodedNode* root;
const T* lower;
const T* upper;
@ -470,7 +471,8 @@ public:
newNode->prev = prev;
newNode->next = next;
ASSERT(deltaSize == k.writeDelta(raw->delta(tree->largeNodes), *base, commonPrefix));
int written = k.writeDelta(raw->delta(tree->largeNodes), *base, commonPrefix);
ASSERT(deltaSize == written);
raw->delta(tree->largeNodes).setPrefixSource(basePrev);
// Initialize node's item from the delta (instead of copying into arena) to avoid unnecessary arena space

View File

@ -77,65 +77,98 @@ ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> par
return 0;
}
#else
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync, double maxSimDelayTime)
pid_t fork_child(const std::string& path,
std::vector<char*>& paramList)
{
state std::string argsString;
for (auto const& elem : paramList) {
argsString += elem + ",";
pid_t pid = fork();
if (pid == -1) {
return -1;
}
TraceEvent("SpawnProcess").detail("Cmd", binPath).detail("Args", argsString);
if (pid == 0) {
execv(const_cast<char*>(path.c_str()), &paramList[0]);
_exit(EXIT_FAILURE);
}
return pid;
}
state int err = 0;
state double runTime = 0;
state boost::process::child c(binPath, boost::process::args(paramList),
boost::process::std_err > boost::process::null);
// for async calls in simulator, always delay by a deterinistic amount of time and do the call
// synchronously, otherwise the predictability of the simulator breaks
ACTOR Future<int> spawnProcess(std::string path, std::vector<std::string> args, double maxWaitTime, bool isSync, double maxSimDelayTime)
{
// for async calls in simulator, always delay by a deterministic amount of time and then
// do the call synchronously, otherwise the predictability of the simulator breaks
if (!isSync && g_network->isSimulated()) {
double snapDelay = std::max(maxSimDelayTime - 1, 0.0);
// add some randomness
snapDelay += deterministicRandom()->random01();
TraceEvent("SnapDelaySpawnProcess")
.detail("SnapDelay", snapDelay);
.detail("SnapDelay", snapDelay);
wait(delay(snapDelay));
}
if (!isSync && !g_network->isSimulated()) {
while (c.running() && runTime <= maxWaitTime) {
wait(delay(0.1));
runTime += 0.1;
}
} else {
if (g_network->isSimulated()) {
// to keep the simulator deterministic, wait till the process exits,
// hence giving a large wait time
c.wait_for(std::chrono::hours(24));
ASSERT(!c.running());
} else {
int maxWaitTimeInt = static_cast<int>(maxWaitTime + 1.0);
c.wait_for(std::chrono::seconds(maxWaitTimeInt));
}
std::vector<char*> paramList;
for (int i = 0; i < args.size(); i++) {
paramList.push_back(const_cast<char*>(args[i].c_str()));
}
paramList.push_back(nullptr);
state std::string allArgs;
for (int i = 0; i < args.size(); i++) {
allArgs += args[i];
}
if (c.running()) {
TraceEvent(SevWarnAlways, "ChildTermination")
.detail("Cmd", binPath)
.detail("Args", argsString);
c.terminate();
err = -1;
if (!c.wait_for(std::chrono::seconds(1))) {
TraceEvent(SevWarnAlways, "SpawnProcessFailedToExit")
.detail("Cmd", binPath)
.detail("Args", argsString);
state pid_t pid = fork_child(path, paramList);
if (pid == -1) {
TraceEvent(SevWarnAlways, "SpawnProcess: Command failed to spawn")
.detail("Cmd", path)
.detail("Args", allArgs);
return -1;
} else if (pid > 0) {
state int status = -1;
state double runTime = 0;
while (true) {
if (runTime > maxWaitTime) {
// timing out
TraceEvent(SevWarnAlways, "SpawnProcess : Command failed, timeout")
.detail("Cmd", path)
.detail("Args", allArgs);
return -1;
}
int err = waitpid(pid, &status, WNOHANG);
if (err < 0) {
TraceEvent(SevWarnAlways, "SpawnProcess : Command failed")
.detail("Cmd", path)
.detail("Args", allArgs)
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
return -1;
} else if (err == 0) {
// child process has not completed yet
if (isSync || g_network->isSimulated()) {
// synchronously sleep
threadSleep(0.1);
} else {
// yield for other actors to run
wait(delay(0.1));
}
runTime += 0.1;
} else {
// child process completed
if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) {
TraceEvent(SevWarnAlways, "SpawnProcess : Command failed")
.detail("Cmd", path)
.detail("Args", allArgs)
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
return WIFEXITED(status) ? WEXITSTATUS(status) : -1;
}
TraceEvent("SpawnProcess : Command status")
.detail("Cmd", path)
.detail("Args", allArgs)
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : 0);
return 0;
}
}
} else {
err = c.exit_code();
}
TraceEvent("SpawnProcess")
.detail("Cmd", binPath)
.detail("Error", err);
return err;
return -1;
}
#endif
@ -148,6 +181,7 @@ ACTOR Future<int> execHelper(ExecCmdValueString* execArg, UID snapUID, std::stri
// get bin path
auto snapBin = execArg->getBinaryPath();
std::vector<std::string> paramList;
paramList.push_back(snapBin.toString());
// get user passed arguments
auto listArgs = execArg->getBinaryArgs();
for (auto elem : listArgs) {
@ -174,6 +208,7 @@ ACTOR Future<int> execHelper(ExecCmdValueString* execArg, UID snapUID, std::stri
folderTo = folder + "-snap-" + uidStr.toString() + "-" + role;
std::vector<std::string> paramList;
std::string mkdirBin = "/bin/mkdir";
paramList.push_back(mkdirBin);
paramList.push_back(folderTo);
cmdErr = spawnProcess(mkdirBin, paramList, maxWaitTime, false /*isSync*/, maxSimDelayTime);
wait(success(cmdErr));
@ -181,6 +216,7 @@ ACTOR Future<int> execHelper(ExecCmdValueString* execArg, UID snapUID, std::stri
if (err == 0) {
std::vector<std::string> paramList;
std::string cpBin = "/bin/cp";
paramList.push_back(cpBin);
paramList.push_back("-a");
paramList.push_back(folderFrom);
paramList.push_back(folderTo);

View File

@ -27,33 +27,6 @@
#include "flow/flow.h"
#include "fdbclient/FDBTypes.h"
#define REDWOOD_DEBUG 0
#define debug_printf_stream stdout
#define debug_printf_always(...) \
{ \
fprintf(debug_printf_stream, "%s %f %04d ", g_network->getLocalAddress().toString().c_str(), now(), __LINE__); \
fprintf(debug_printf_stream, __VA_ARGS__); \
fflush(debug_printf_stream); \
}
#define debug_printf_noop(...)
#if defined(NO_INTELLISENSE)
#if REDWOOD_DEBUG
#define debug_printf debug_printf_always
#else
#define debug_printf debug_printf_noop
#endif
#else
// To get error-checking on debug_printf statements in IDE
#define debug_printf printf
#endif
#define BEACON debug_printf_always("HERE\n")
#define TRACE \
debug_printf_always("%s: %s line %d %s\n", __FUNCTION__, __FILE__, __LINE__, platform::get_backtrace().c_str());
#ifndef VALGRIND
#define VALGRIND_MAKE_MEM_UNDEFINED(x, y)
#define VALGRIND_MAKE_MEM_DEFINED(x, y)

View File

@ -425,7 +425,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( POLICY_RATING_TESTS, 200 ); if( randomize && BUGGIFY ) POLICY_RATING_TESTS = 20;
init( POLICY_GENERATIONS, 100 ); if( randomize && BUGGIFY ) POLICY_GENERATIONS = 10;
init( DBINFO_SEND_AMOUNT, 2 );
init( DBINFO_SEND_AMOUNT, 5 );
init( DBINFO_BATCH_DELAY, 0.1 );
//Move Keys
@ -619,6 +619,9 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( FASTRESTORE_LOADER_SEND_MUTATION_MSG_BYTES, 1.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_LOADER_SEND_MUTATION_MSG_BYTES = deterministicRandom()->random01() * 10.0 * 1024.0 * 1024.0 + 1; }
init( FASTRESTORE_GET_RANGE_VERSIONS_EXPENSIVE, false ); if( randomize && BUGGIFY ) { FASTRESTORE_GET_RANGE_VERSIONS_EXPENSIVE = deterministicRandom()->random01() < 0.5 ? true : false; }
init( FASTRESTORE_REQBATCH_PARALLEL, 50 ); if( randomize && BUGGIFY ) { FASTRESTORE_REQBATCH_PARALLEL = deterministicRandom()->random01() * 100 + 1; }
init( FASTRESTORE_REQBATCH_LOG, false ); if( randomize && BUGGIFY ) { FASTRESTORE_REQBATCH_LOG = deterministicRandom()->random01() < 0.2 ? true : false; }
init( FASTRESTORE_TXN_CLEAR_MAX, 1000 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_CLEAR_MAX = deterministicRandom()->random01() * 100 + 1; }
init( FASTRESTORE_TXN_RETRY_MAX, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_RETRY_MAX = deterministicRandom()->random01() * 100 + 1; }
// clang-format on

View File

@ -551,6 +551,9 @@ public:
int64_t FASTRESTORE_LOADER_SEND_MUTATION_MSG_BYTES; // desired size of mutation message sent from loader to appliers
bool FASTRESTORE_GET_RANGE_VERSIONS_EXPENSIVE; // parse each range file to get (range, version) it has?
int64_t FASTRESTORE_REQBATCH_PARALLEL; // number of requests to wait on for getBatchReplies()
bool FASTRESTORE_REQBATCH_LOG; // verbose log information for getReplyBatches
int FASTRESTORE_TXN_CLEAR_MAX; // threshold to start tracking each clear op in a txn
int FASTRESTORE_TXN_RETRY_MAX; // threshold to start output error on too many retries
ServerKnobs();
void initialize(bool randomize = false, ClientKnobs* clientKnobs = NULL, bool isSimulated = false);

View File

@ -529,7 +529,7 @@ ACTOR Future<Void> logRouterCore(
loop choose {
when( wait( dbInfoChange ) ) {
dbInfoChange = db->onChange();
logRouterData.allowPops = db->get().recoveryState == RecoveryState::FULLY_RECOVERED;
logRouterData.allowPops = db->get().recoveryState == RecoveryState::FULLY_RECOVERED && db->get().recoveryCount >= req.recoveryCount;
logRouterData.logSystem->set(ILogSystem::fromServerDBInfo( logRouterData.dbgid, db->get(), true ));
}
when( TLogPeekRequest req = waitNext( interf.peekMessages.getFuture() ) ) {

View File

@ -233,7 +233,7 @@ struct LogSystemConfig {
if(!tLogs[i].isLocal) {
for( int j = 0; j < tLogs[i].tLogs.size(); j++ ) {
if( tLogs[i].tLogs[j].present() ) {
return tLogs[i].tLogs[j].interf().locality.dcId();
return tLogs[i].tLogs[j].interf().filteredLocality.dcId();
}
}
}
@ -277,7 +277,7 @@ struct LogSystemConfig {
for( auto& tLogSet : tLogs ) {
for( auto& tLog : tLogSet.tLogs ) {
if( tLogSet.locality >= 0 ) {
if( tLog.present() && tLog.interf().locality.dcId() == dcId ) {
if( tLog.present() && tLog.interf().filteredLocality.dcId() == dcId ) {
matchingLocalities[tLogSet.locality]++;
} else {
allLocalities[tLogSet.locality]++;
@ -290,7 +290,7 @@ struct LogSystemConfig {
for( auto& tLogSet : oldLog.tLogs ) {
for( auto& tLog : tLogSet.tLogs ) {
if( tLogSet.locality >= 0 ) {
if( tLog.present() && tLog.interf().locality.dcId() == dcId ) {
if( tLog.present() && tLog.interf().filteredLocality.dcId() == dcId ) {
matchingLocalities[tLogSet.locality]++;
} else {
allLocalities[tLogSet.locality]++;

View File

@ -33,6 +33,7 @@ typedef uint64_t DBRecoveryCount;
struct MasterInterface {
constexpr static FileIdentifier file_identifier = 5979145;
LocalityData locality;
Endpoint base;
RequestStream< ReplyPromise<Void> > waitFailure;
RequestStream< struct TLogRejoinRequest > tlogRejoin; // sent by tlog (whether or not rebooted) to communicate with a new master
RequestStream< struct ChangeCoordinatorsRequest > changeCoordinators;
@ -48,12 +49,24 @@ struct MasterInterface {
if constexpr (!is_fb_function<Archive>) {
ASSERT(ar.protocolVersion().isValid());
}
serializer(ar, locality, waitFailure, tlogRejoin, changeCoordinators, getCommitVersion, notifyBackupWorkerDone);
serializer(ar, locality, base);
if( Archive::isDeserializing ) {
waitFailure = RequestStream< ReplyPromise<Void> >( base.getAdjustedEndpoint(0) );
tlogRejoin = RequestStream< struct TLogRejoinRequest >( base.getAdjustedEndpoint(1) );
changeCoordinators = RequestStream< struct ChangeCoordinatorsRequest >( base.getAdjustedEndpoint(2) );
getCommitVersion = RequestStream< struct GetCommitVersionRequest >( base.getAdjustedEndpoint(3) );
notifyBackupWorkerDone = RequestStream<struct BackupWorkerDoneRequest>( base.getAdjustedEndpoint(4) );
}
}
void initEndpoints() {
getCommitVersion.getEndpoint( TaskPriority::GetConsistentReadVersion );
tlogRejoin.getEndpoint( TaskPriority::MasterTLogRejoin );
std::vector<std::pair<FlowReceiver*, TaskPriority>> streams;
streams.push_back(waitFailure.getReceiver());
streams.push_back(tlogRejoin.getReceiver(TaskPriority::MasterTLogRejoin));
streams.push_back(changeCoordinators.getReceiver());
streams.push_back(getCommitVersion.getReceiver(TaskPriority::GetConsistentReadVersion));
streams.push_back(notifyBackupWorkerDone.getReceiver());
base = FlowTransport::transport().addEndpoints(streams);
}
};

View File

@ -1690,10 +1690,12 @@ ACTOR static Future<Void> rejoinServer( MasterProxyInterface proxy, ProxyCommitD
rep.history.push_back(std::make_pair(decodeServerTagHistoryKey(history[i].key), decodeServerTagValue(history[i].value)));
}
auto localityKey = commitData->txnStateStore->readValue(tagLocalityListKeyFor(req.dcId)).get();
rep.newLocality = false;
if( localityKey.present() ) {
rep.newLocality = false;
int8_t locality = decodeTagLocalityListValue(localityKey.get());
if(locality != rep.tag.locality) {
if(rep.tag.locality != tagLocalityUpgraded && locality != rep.tag.locality) {
TraceEvent(SevWarnAlways, "SSRejoinedWithChangedLocality").detail("Tag", rep.tag.toString()).detail("DcId", req.dcId).detail("NewLocality", locality);
} else if(locality != rep.tag.locality) {
uint16_t tagId = 0;
std::vector<uint16_t> usedTags;
auto tagKeys = commitData->txnStateStore->readRange(serverTagKeys).get();
@ -1722,6 +1724,8 @@ ACTOR static Future<Void> rejoinServer( MasterProxyInterface proxy, ProxyCommitD
}
rep.newTag = Tag(locality, tagId);
}
} else if(rep.tag.locality != tagLocalityUpgraded) {
TraceEvent(SevWarnAlways, "SSRejoinedWithUnknownLocality").detail("Tag", rep.tag.toString()).detail("DcId", req.dcId);
} else {
rep.newLocality = true;
int8_t maxTagLocality = -1;

View File

@ -47,8 +47,8 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
state Future<Void> exitRole = Never();
state Future<Void> updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL);
actors.add(traceProcessMetrics(self, "Applier"));
actors.add(traceRoleVersionBatchProgress(self, "Applier"));
actors.add(traceProcessMetrics(self, "RestoreApplier"));
actors.add(traceRoleVersionBatchProgress(self, "RestoreApplier"));
loop {
state std::string requestTypeStr = "[Init]";
@ -113,6 +113,7 @@ ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMu
state NotifiedVersion& curMsgIndex = batchData->processedFileState[req.asset];
TraceEvent(SevInfo, "FastRestoreApplierPhaseReceiveMutations", self->id())
.suppressFor(1.0)
.detail("BatchIndex", req.batchIndex)
.detail("RestoreAsset", req.asset.toString())
.detail("RestoreAssetMesssageIndex", curMsgIndex.get())
@ -157,6 +158,7 @@ ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMu
req.reply.send(RestoreCommonReply(self->id(), isDuplicated));
TraceEvent(SevInfo, "FastRestoreApplierPhaseReceiveMutationsDone", self->id())
.suppressFor(1.0)
.detail("BatchIndex", req.batchIndex)
.detail("RestoreAsset", req.asset.toString())
.detail("ProcessedMessageIndex", curMsgIndex.get())
@ -165,8 +167,16 @@ ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMu
}
// Clear all ranges in input ranges
ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRangeRef>> ranges, Database cx) {
ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRangeRef>> ranges, double delayTime,
Database cx, UID applierID, int batchIndex) {
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
state int retries = 0;
state double numOps = 0;
wait(delay(delayTime + deterministicRandom()->random01() * delayTime));
TraceEvent("FastRestoreApplierClearRangeMutationsStart", applierID)
.detail("BatchIndex", batchIndex)
.detail("Ranges", ranges.size())
.detail("DelayTime", delayTime);
loop {
try {
tr->reset();
@ -176,10 +186,25 @@ ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRange
debugFRMutation("FastRestoreApplierApplyClearRangeMutation", 0,
MutationRef(MutationRef::ClearRange, range.begin, range.end));
tr->clear(range);
++numOps;
if (numOps >= SERVER_KNOBS->FASTRESTORE_TXN_CLEAR_MAX) {
TraceEvent(SevWarnAlways, "FastRestoreApplierClearRangeMutationsTooManyClearsInTxn")
.suppressFor(1.0)
.detail("Clears", numOps)
.detail("Ranges", ranges.size())
.detail("Range", range.toString());
}
}
wait(tr->commit());
break;
} catch (Error& e) {
retries++;
if (retries > SERVER_KNOBS->FASTRESTORE_TXN_RETRY_MAX) {
TraceEvent(SevWarnAlways, "RestoreApplierApplyClearRangeMutationsStuck", applierID)
.detail("BatchIndex", batchIndex)
.detail("ClearRanges", ranges.size())
.error(e);
}
wait(tr->onError(e));
}
}
@ -188,13 +213,17 @@ ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRange
// Get keys in incompleteStagingKeys and precompute the stagingKey which is stored in batchData->stagingKeys
ACTOR static Future<Void> getAndComputeStagingKeys(
std::map<Key, std::map<Key, StagingKey>::iterator> incompleteStagingKeys, Database cx, UID applierID) {
std::map<Key, std::map<Key, StagingKey>::iterator> incompleteStagingKeys, double delayTime, Database cx,
UID applierID, int batchIndex) {
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
state std::vector<Future<Optional<Value>>> fValues;
state int retries = 0;
wait(delay(delayTime + deterministicRandom()->random01() * delayTime));
TraceEvent("FastRestoreApplierGetAndComputeStagingKeysStart", applierID)
.detail("GetKeys", incompleteStagingKeys.size());
.detail("BatchIndex", batchIndex)
.detail("GetKeys", incompleteStagingKeys.size())
.detail("DelayTime", delayTime);
loop {
try {
tr->reset();
@ -207,11 +236,12 @@ ACTOR static Future<Void> getAndComputeStagingKeys(
break;
} catch (Error& e) {
if (retries++ > 10) {
TraceEvent(SevError, "FastRestoreApplierGetAndComputeStagingKeysGetKeysStuck")
TraceEvent(SevError, "FastRestoreApplierGetAndComputeStagingKeysGetKeysStuck", applierID)
.detail("BatchIndex", batchIndex)
.detail("GetKeys", incompleteStagingKeys.size())
.error(e);
break;
}
wait(tr->onError(e));
fValues.clear();
}
@ -220,31 +250,31 @@ ACTOR static Future<Void> getAndComputeStagingKeys(
ASSERT(fValues.size() == incompleteStagingKeys.size());
int i = 0;
for (auto& key : incompleteStagingKeys) {
if (!fValues[i].get().present()) {
TraceEvent(SevDebug, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB")
if (!fValues[i].get().present()) { // Debug info to understand which key does not exist in DB
TraceEvent(SevWarn, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB", applierID)
.detail("BatchIndex", batchIndex)
.detail("Key", key.first)
.detail("Reason", "Not found in DB")
.detail("PendingMutations", key.second->second.pendingMutations.size())
.detail("StagingKeyType", (int)key.second->second.type);
for (auto& vm : key.second->second.pendingMutations) {
TraceEvent(SevDebug, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB")
TraceEvent(SevWarn, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB")
.detail("PendingMutationVersion", vm.first.toString())
.detail("PendingMutation", vm.second.toString());
}
key.second->second.precomputeResult("GetAndComputeStagingKeysNoBaseValueInDB");
i++;
continue;
key.second->second.precomputeResult("GetAndComputeStagingKeysNoBaseValueInDB", applierID, batchIndex);
} else {
// The key's version ideally should be the most recently committed version.
// But as long as it is > 1 and less than the start version of the version batch, it is the same result.
MutationRef m(MutationRef::SetValue, key.first, fValues[i].get().get());
key.second->second.add(m, LogMessageVersion(1));
key.second->second.precomputeResult("GetAndComputeStagingKeys");
i++;
key.second->second.precomputeResult("GetAndComputeStagingKeys", applierID, batchIndex);
}
i++;
}
TraceEvent("FastRestoreApplierGetAndComputeStagingKeysDone", applierID)
.detail("BatchIndex", batchIndex)
.detail("GetKeys", incompleteStagingKeys.size());
return Void();
@ -253,43 +283,44 @@ ACTOR static Future<Void> getAndComputeStagingKeys(
ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData> batchData, UID applierID,
int64_t batchIndex, Database cx) {
// Apply range mutations (i.e., clearRange) to database cx
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResultStart", applierID)
.detail("BatchIndex", batchIndex)
.detail("Step", "Applying clear range mutations to DB")
.detail("ClearRanges", batchData->stagingKeyRanges.size());
state std::vector<Future<Void>> fClearRanges;
std::vector<Standalone<VectorRef<KeyRangeRef>>> clearBuf;
clearBuf.push_back(Standalone<VectorRef<KeyRangeRef>>());
Standalone<VectorRef<KeyRangeRef>> clearRanges = clearBuf.back();
Standalone<VectorRef<KeyRangeRef>> clearRanges;
double curTxnSize = 0;
double delayTime = 0;
for (auto& rangeMutation : batchData->stagingKeyRanges) {
KeyRangeRef range(rangeMutation.mutation.param1, rangeMutation.mutation.param2);
debugFRMutation("FastRestoreApplierPrecomputeMutationsResultClearRange", rangeMutation.version.version,
MutationRef(MutationRef::ClearRange, range.begin, range.end));
clearRanges.push_back(clearRanges.arena(), range);
clearRanges.push_back_deep(clearRanges.arena(), range);
curTxnSize += range.expectedSize();
if (curTxnSize >= SERVER_KNOBS->FASTRESTORE_TXN_BATCH_MAX_BYTES) {
fClearRanges.push_back(applyClearRangeMutations(clearRanges, cx));
clearBuf.push_back(Standalone<VectorRef<KeyRangeRef>>());
clearRanges = clearBuf.back();
fClearRanges.push_back(applyClearRangeMutations(clearRanges, delayTime, cx, applierID, batchIndex));
delayTime += 0.1;
clearRanges = Standalone<VectorRef<KeyRangeRef>>();
curTxnSize = 0;
}
}
if (curTxnSize > 0) {
fClearRanges.push_back(applyClearRangeMutations(clearRanges, cx));
fClearRanges.push_back(applyClearRangeMutations(clearRanges, delayTime, cx, applierID, batchIndex));
}
// Apply range mutations (i.e., clearRange) to stagingKeyRanges
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
.detail("BatchIndex", batchIndex)
.detail("Step", "Applying clear range mutations to staging keys")
.detail("ClearRanges", batchData->stagingKeyRanges.size());
.detail("ClearRanges", batchData->stagingKeyRanges.size())
.detail("FutureClearRanges", fClearRanges.size());
for (auto& rangeMutation : batchData->stagingKeyRanges) {
ASSERT(rangeMutation.mutation.param1 <= rangeMutation.mutation.param2);
std::map<Key, StagingKey>::iterator lb = batchData->stagingKeys.lower_bound(rangeMutation.mutation.param1);
std::map<Key, StagingKey>::iterator ub = batchData->stagingKeys.lower_bound(rangeMutation.mutation.param2);
while (lb != ub) {
if (lb->first >= rangeMutation.mutation.param2) {
TraceEvent(SevError, "FastRestoreApplerPhasePrecomputeMutationsResult_IncorrectUpperBound")
TraceEvent(SevError, "FastRestoreApplerPhasePrecomputeMutationsResultIncorrectUpperBound")
.detail("Key", lb->first)
.detail("ClearRangeUpperBound", rangeMutation.mutation.param2)
.detail("UsedUpperBound", ub->first);
@ -301,6 +332,10 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
lb++;
}
}
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
.detail("BatchIndex", batchIndex)
.detail("Step", "Wait on applying clear range mutations to DB")
.detail("FutureClearRanges", fClearRanges.size());
wait(waitForAll(fClearRanges));
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
@ -313,6 +348,7 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
std::map<Key, std::map<Key, StagingKey>::iterator> incompleteStagingKeys;
std::map<Key, StagingKey>::iterator stagingKeyIter = batchData->stagingKeys.begin();
int numKeysInBatch = 0;
double delayTime = 0; // Start transactions at different time to avoid overwelming FDB.
for (; stagingKeyIter != batchData->stagingKeys.end(); stagingKeyIter++) {
if (!stagingKeyIter->second.hasBaseValue()) {
incompleteStagingKeys.emplace(stagingKeyIter->first, stagingKeyIter);
@ -320,13 +356,16 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
numKeysInBatch++;
}
if (numKeysInBatch == SERVER_KNOBS->FASTRESTORE_APPLIER_FETCH_KEYS_SIZE) {
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, cx, applierID));
fGetAndComputeKeys.push_back(
getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID, batchIndex));
delayTime += 0.1;
numKeysInBatch = 0;
incompleteStagingKeys.clear();
}
}
if (numKeysInBatch > 0) {
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, cx, applierID));
fGetAndComputeKeys.push_back(
getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID, batchIndex));
}
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
@ -337,7 +376,7 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
for (stagingKeyIter = batchData->stagingKeys.begin(); stagingKeyIter != batchData->stagingKeys.end();
stagingKeyIter++) {
if (stagingKeyIter->second.hasBaseValue()) {
stagingKeyIter->second.precomputeResult("HasBaseValue");
stagingKeyIter->second.precomputeResult("HasBaseValue", applierID, batchIndex);
}
}
@ -420,7 +459,7 @@ ACTOR static Future<Void> applyStagingKeys(Reference<ApplierBatchData> batchData
std::map<Key, StagingKey>::iterator cur = begin;
double txnSize = 0;
std::vector<Future<Void>> fBatches;
TraceEvent("FastRestoreApplerPhaseApplyStagingKeys", applierID)
TraceEvent("FastRestoreApplerPhaseApplyStagingKeysStart", applierID)
.detail("BatchIndex", batchIndex)
.detail("StagingKeys", batchData->stagingKeys.size());
while (cur != batchData->stagingKeys.end()) {
@ -458,23 +497,29 @@ ACTOR Future<Void> writeMutationsToDB(UID applierID, int64_t batchIndex, Referen
ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self,
Database cx) {
TraceEvent("FastRestoreApplierPhaseHandleApplyToDBStart", self->id())
.detail("BatchIndex", req.batchIndex)
.detail("FinishedBatch", self->finishedBatch.get());
// Ensure batch (i-1) is applied before batch i
wait(self->finishedBatch.whenAtLeast(req.batchIndex - 1));
state bool isDuplicated = true;
Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
TraceEvent("FastRestoreApplierPhaseHandleApplyToDB", self->id())
.detail("BatchIndex", req.batchIndex)
.detail("FinishedBatch", self->finishedBatch.get())
.detail("HasStarted", batchData->dbApplier.present())
.detail("PreviousVersionBatchState", batchData->vbState.get());
batchData->vbState = ApplierVersionBatchState::WRITE_TO_DB;
if (self->finishedBatch.get() == req.batchIndex - 1) {
Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
TraceEvent("FastRestoreApplierPhaseHandleApplyToDBRunning", self->id())
.detail("BatchIndex", req.batchIndex)
.detail("FinishedBatch", self->finishedBatch.get())
.detail("HasStarted", batchData->dbApplier.present())
.detail("WroteToDBDone", batchData->dbApplier.present() ? batchData->dbApplier.get().isReady() : 0)
.detail("PreviousVersionBatchState", batchData->vbState.get());
ASSERT(batchData.isValid());
if (!batchData->dbApplier.present()) {
isDuplicated = false;
batchData->dbApplier = Never();
batchData->dbApplier = writeMutationsToDB(self->id(), req.batchIndex, batchData, cx);
batchData->vbState = ApplierVersionBatchState::WRITE_TO_DB;
}
ASSERT(batchData->dbApplier.present());
@ -485,14 +530,22 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
// Avoid setting finishedBatch when finishedBatch > req.batchIndex
if (self->finishedBatch.get() == req.batchIndex - 1) {
self->finishedBatch.set(req.batchIndex);
self->batch[req.batchIndex]->vbState = ApplierVersionBatchState::DONE;
// Free memory for the version batch
self->batch.erase(req.batchIndex);
if (self->delayedActors > 0) {
self->checkMemory.trigger();
}
}
}
if (self->delayedActors > 0) {
self->checkMemory.trigger();
}
req.reply.send(RestoreCommonReply(self->id(), isDuplicated));
TraceEvent("FastRestoreApplierPhaseHandleApplyToDBDone", self->id())
.detail("BatchIndex", req.batchIndex)
.detail("FinishedBatch", self->finishedBatch.get())
.detail("IsDuplicated", isDuplicated);
return Void();
}

View File

@ -117,8 +117,9 @@ struct StagingKey {
// Precompute the final value of the key.
// TODO: Look at the last LogMessageVersion, if it set or clear, we can ignore the rest of versions.
void precomputeResult(const char* context) {
TraceEvent(SevDebug, "FastRestoreApplierPrecomputeResult")
void precomputeResult(const char* context, UID applierID, int batchIndex) {
TraceEvent(SevDebug, "FastRestoreApplierPrecomputeResult", applierID)
.detail("BatchIndex", batchIndex)
.detail("Context", context)
.detail("Version", version.toString())
.detail("Key", key)
@ -136,7 +137,9 @@ struct StagingKey {
MutationRef m = lb->second;
if (m.type == MutationRef::SetValue || m.type == MutationRef::ClearRange) {
if (std::tie(type, key, val) != std::tie(m.type, m.param1, m.param2)) {
TraceEvent(SevError, "FastRestoreApplierPrecomputeResultUnhandledSituation")
TraceEvent(SevError, "FastRestoreApplierPrecomputeResultUnhandledSituation", applierID)
.detail("BatchIndex", batchIndex)
.detail("Context", context)
.detail("BufferedType", getTypeString(type))
.detail("PendingType", getTypeString(m.type))
.detail("BufferedVal", val.toString())
@ -167,11 +170,15 @@ struct StagingKey {
type = MutationRef::SetValue; // Precomputed result should be set to DB.
} else if (mutation.type == MutationRef::SetValue || mutation.type == MutationRef::ClearRange) {
type = MutationRef::SetValue; // Precomputed result should be set to DB.
TraceEvent(SevError, "FastRestoreApplierPrecomputeResultUnexpectedSet")
TraceEvent(SevError, "FastRestoreApplierPrecomputeResultUnexpectedSet", applierID)
.detail("BatchIndex", batchIndex)
.detail("Context", context)
.detail("MutationType", getTypeString(mutation.type))
.detail("Version", lb->first.toString());
} else {
TraceEvent(SevWarnAlways, "FastRestoreApplierPrecomputeResultSkipUnexpectedBackupMutation")
TraceEvent(SevWarnAlways, "FastRestoreApplierPrecomputeResultSkipUnexpectedBackupMutation", applierID)
.detail("BatchIndex", batchIndex)
.detail("Context", context)
.detail("MutationType", getTypeString(mutation.type))
.detail("Version", lb->first.toString());
}
@ -218,7 +225,8 @@ public:
static const int INIT = 1;
static const int RECEIVE_MUTATIONS = 2;
static const int WRITE_TO_DB = 3;
static const int INVALID = 4;
static const int DONE = 4;
static const int INVALID = 5;
explicit ApplierVersionBatchState(int newState) {
vbState = newState;

View File

@ -281,18 +281,23 @@ Future<Void> getBatchReplies(RequestStream<Request> Interface::*channel, std::ma
ongoingReplies.clear();
ongoingRepliesIndex.clear();
for (int i = 0; i < cmdReplies.size(); ++i) {
// TraceEvent(SevDebug, "FastRestoreGetBatchReplies")
// .detail("Requests", requests.size())
// .detail("OutstandingReplies", oustandingReplies)
// .detail("ReplyIndex", i)
// .detail("ReplyReady", cmdReplies[i].isReady())
// .detail("RequestNode", requests[i].first)
// .detail("Request", requests[i].second.toString());
if (SERVER_KNOBS->FASTRESTORE_REQBATCH_LOG) {
TraceEvent(SevInfo, "FastRestoreGetBatchReplies")
.suppressFor(1.0)
.detail("Requests", requests.size())
.detail("OutstandingReplies", oustandingReplies)
.detail("ReplyIndex", i)
.detail("ReplyIsReady", cmdReplies[i].isReady())
.detail("ReplyIsError", cmdReplies[i].isError())
.detail("RequestNode", requests[i].first)
.detail("Request", requests[i].second.toString());
}
if (!cmdReplies[i].isReady()) { // still wait for reply
ongoingReplies.push_back(cmdReplies[i]);
ongoingRepliesIndex.push_back(i);
}
}
ASSERT(ongoingReplies.size() == oustandingReplies);
if (ongoingReplies.empty()) {
break;
} else {
@ -356,7 +361,7 @@ Future<Void> getBatchReplies(RequestStream<Request> Interface::*channel, std::ma
// fprintf(stdout, "sendBatchRequests Error code:%d, error message:%s\n", e.code(), e.what());
TraceEvent(SevWarn, "FastRestoreSendBatchRequests").error(e);
for (auto& request : requests) {
TraceEvent(SevWarn, "FastRestoreLoader")
TraceEvent(SevWarn, "FastRestoreSendBatchRequests")
.detail("SendBatchRequests", requests.size())
.detail("RequestID", request.first)
.detail("Request", request.second.toString());

View File

@ -67,7 +67,7 @@ ACTOR Future<Void> restoreLoaderCore(RestoreLoaderInterface loaderInterf, int no
state Future<Void> exitRole = Never();
state Future<Void> updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL);
actors.add(traceProcessMetrics(self, "Loader"));
actors.add(traceProcessMetrics(self, "RestoreLoader"));
loop {
state std::string requestTypeStr = "[Init]";
@ -336,6 +336,8 @@ ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<R
.detail("NotProcessed", !paramExist)
.detail("Processed", isReady)
.detail("CurrentMemory", getSystemStatistics().processMemory);
// Loader destroy batchData once the batch finishes and self->finishedBatch.set(req.batchIndex);
ASSERT(self->finishedBatch.get() < req.batchIndex);
wait(isSchedulable(self, req.batchIndex, __FUNCTION__));
@ -376,6 +378,8 @@ ACTOR Future<Void> handleSendMutationsRequest(RestoreSendMutationsToAppliersRequ
.detail("BatchIndex", req.batchIndex)
.detail("UseRangeFile", req.useRangeFile)
.detail("LoaderSendStatus", batchStatus->toString());
// Loader destroy batchData once the batch finishes and self->finishedBatch.set(req.batchIndex);
ASSERT(self->finishedBatch.get() < req.batchIndex);
// Ensure each file is sent exactly once by using batchStatus->sendAllLogs and batchStatus->sendAllRanges
if (!req.useRangeFile) {
@ -945,6 +949,9 @@ ACTOR Future<Void> handleFinishVersionBatchRequest(RestoreVersionBatchRequest re
wait(self->finishedBatch.whenAtLeast(req.batchIndex - 1));
if (self->finishedBatch.get() == req.batchIndex - 1) {
self->finishedBatch.set(req.batchIndex);
// Clean up batchData
self->batch.erase(req.batchIndex);
self->status.erase(req.batchIndex);
}
if (self->delayedActors > 0) {
self->checkMemory.trigger();

View File

@ -177,6 +177,7 @@ struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoade
void resetPerRestoreRequest() {
batch.clear();
status.clear();
finishedBatch = NotifiedVersion(0);
}
void initBackupContainer(Key url) {

View File

@ -81,6 +81,7 @@ ACTOR Future<Void> startRestoreMaster(Reference<RestoreWorkerData> masterWorker,
actors.add(updateHeartbeatTime(self));
actors.add(checkRolesLiveness(self));
actors.add(traceProcessMetrics(self, "RestoreMaster"));
wait(startProcessRestoreRequests(self, cx));
} catch (Error& e) {
@ -315,7 +316,8 @@ ACTOR static Future<Version> processRestoreRequest(Reference<RestoreMasterData>
TraceEvent("FastRestoreMasterDispatchVersionBatches")
.detail("BatchIndex", batchIndex)
.detail("BatchSize", versionBatch->size)
.detail("RunningVersionBatches", self->runningVersionBatches.get());
.detail("RunningVersionBatches", self->runningVersionBatches.get())
.detail("VersionBatches", versionBatches.size());
self->batch[batchIndex] = Reference<MasterBatchData>(new MasterBatchData());
self->batchStatus[batchIndex] = Reference<MasterBatchStatus>(new MasterBatchStatus());
fBatches.push_back(distributeWorkloadPerVersionBatch(self, batchIndex, cx, request, *versionBatch));
@ -402,6 +404,7 @@ ACTOR static Future<Void> loadFilesOnLoaders(Reference<MasterBatchData> batchDat
++paramIdx;
}
TraceEvent(files->size() != paramIdx ? SevError : SevInfo, "FastRestoreMasterPhaseLoadFiles")
.detail("BatchIndex", batchIndex)
.detail("Files", files->size())
.detail("LoadParams", paramIdx);
@ -561,6 +564,9 @@ ACTOR static Future<Void> distributeWorkloadPerVersionBatch(Reference<RestoreMas
void splitKeyRangeForAppliers(Reference<MasterBatchData> batchData,
std::map<UID, RestoreApplierInterface> appliersInterf, int batchIndex) {
ASSERT(batchData->samplesSize >= 0);
// Sanity check: samples should not be used after freed
ASSERT((batchData->samplesSize > 0 && !batchData->samples.empty()) ||
batchData->samplesSize == 0 && batchData->samples.empty());
int numAppliers = appliersInterf.size();
double slotSize = std::max(batchData->samplesSize / numAppliers, 1.0);
double cumulativeSize = slotSize;
@ -619,6 +625,7 @@ void splitKeyRangeForAppliers(Reference<MasterBatchData> batchData,
.detail("BatchIndex", batchIndex)
.detail("SamplingSize", batchData->samplesSize)
.detail("SlotSize", slotSize);
batchData->samples.clear();
}
ACTOR static Future<Standalone<VectorRef<RestoreRequest>>> collectRestoreRequests(Database cx) {

View File

@ -57,6 +57,9 @@ ACTOR Future<Void> handleInitVersionBatchRequest(RestoreVersionBatchRequest req,
.detail("BatchIndex", req.batchIndex)
.detail("Role", getRoleStr(self->role))
.detail("VersionBatchNotifiedVersion", self->versionBatchId.get());
// Loader destroy batchData once the batch finishes and self->finishedBatch.set(req.batchIndex);
ASSERT(self->finishedBatch.get() < req.batchIndex);
// batchId is continuous. (req.batchIndex-1) is the id of the just finished batch.
wait(self->versionBatchId.whenAtLeast(req.batchIndex - 1));
@ -110,7 +113,8 @@ ACTOR Future<Void> isSchedulable(Reference<RestoreRoleData> self, int actorBatch
}
if (memory < memoryThresholdBytes || self->finishedBatch.get() + 1 == actorBatchIndex) {
if (memory >= memoryThresholdBytes) {
TraceEvent(SevWarn, "FastRestoreMemoryUsageAboveThreshold")
TraceEvent(SevWarn, "FastRestoreMemoryUsageAboveThreshold", self->id())
.detail("Role", getRoleStr(self->role))
.detail("BatchIndex", actorBatchIndex)
.detail("FinishedBatch", self->finishedBatch.get())
.detail("Actor", name)
@ -119,10 +123,12 @@ ACTOR Future<Void> isSchedulable(Reference<RestoreRoleData> self, int actorBatch
self->delayedActors--;
break;
} else {
TraceEvent(SevDebug, "FastRestoreMemoryUsageAboveThresholdWait")
TraceEvent(SevInfo, "FastRestoreMemoryUsageAboveThresholdWait", self->id())
.detail("Role", getRoleStr(self->role))
.detail("BatchIndex", actorBatchIndex)
.detail("Actor", name)
.detail("CurrentMemory", memory);
// TODO: Set FASTRESTORE_WAIT_FOR_MEMORY_LATENCY to a large value. It should be able to avoided
wait(delay(SERVER_KNOBS->FASTRESTORE_WAIT_FOR_MEMORY_LATENCY) || self->checkMemory.onTrigger());
}
}

View File

@ -104,8 +104,6 @@ public:
NotifiedVersion versionBatchId; // The index of the version batch that has been initialized and put into pipeline
NotifiedVersion finishedBatch; // The highest batch index all appliers have applied mutations
bool versionBatchStart = false;
RestoreRoleData() : role(RestoreRole::Invalid), cpuUsage(0.0), memory(0.0), residentMemory(0.0), delayedActors(0){};
virtual ~RestoreRoleData() = default;

View File

@ -33,9 +33,11 @@ struct TLogInterface {
enum { LocationAwareLoadBalance = 1 };
enum { AlwaysFresh = 1 };
LocalityData locality;
LocalityData filteredLocality;
UID uniqueID;
UID sharedTLogID;
Endpoint base;
RequestStream< struct TLogPeekRequest > peekMessages;
RequestStream< struct TLogPopRequest > popMessages;
@ -50,21 +52,30 @@ struct TLogInterface {
RequestStream< struct TLogSnapRequest> snapRequest;
TLogInterface() {}
explicit TLogInterface(const LocalityData& locality) : uniqueID( deterministicRandom()->randomUniqueID() ), locality(locality) { sharedTLogID = uniqueID; }
TLogInterface(UID sharedTLogID, const LocalityData& locality) : uniqueID( deterministicRandom()->randomUniqueID() ), sharedTLogID(sharedTLogID), locality(locality) {}
TLogInterface(UID uniqueID, UID sharedTLogID, const LocalityData& locality) : uniqueID(uniqueID), sharedTLogID(sharedTLogID), locality(locality) {}
explicit TLogInterface(const LocalityData& locality) : uniqueID( deterministicRandom()->randomUniqueID() ), filteredLocality(locality) { sharedTLogID = uniqueID; }
TLogInterface(UID sharedTLogID, const LocalityData& locality) : uniqueID( deterministicRandom()->randomUniqueID() ), sharedTLogID(sharedTLogID), filteredLocality(locality) {}
TLogInterface(UID uniqueID, UID sharedTLogID, const LocalityData& locality) : uniqueID(uniqueID), sharedTLogID(sharedTLogID), filteredLocality(locality) {}
UID id() const { return uniqueID; }
UID getSharedTLogID() const { return sharedTLogID; }
std::string toString() const { return id().shortString(); }
bool operator == ( TLogInterface const& r ) const { return id() == r.id(); }
NetworkAddress address() const { return peekMessages.getEndpoint().getPrimaryAddress(); }
Optional<NetworkAddress> secondaryAddress() const { return peekMessages.getEndpoint().addresses.secondaryAddress; }
void initEndpoints() {
getQueuingMetrics.getEndpoint( TaskPriority::TLogQueuingMetrics );
popMessages.getEndpoint( TaskPriority::TLogPop );
peekMessages.getEndpoint( TaskPriority::TLogPeek );
confirmRunning.getEndpoint( TaskPriority::TLogConfirmRunning );
commit.getEndpoint( TaskPriority::TLogCommit );
std::vector<std::pair<FlowReceiver*, TaskPriority>> streams;
streams.push_back(peekMessages.getReceiver(TaskPriority::TLogPeek));
streams.push_back(popMessages.getReceiver(TaskPriority::TLogPop));
streams.push_back(commit.getReceiver(TaskPriority::TLogCommit));
streams.push_back(lock.getReceiver());
streams.push_back(getQueuingMetrics.getReceiver(TaskPriority::TLogQueuingMetrics));
streams.push_back(confirmRunning.getReceiver(TaskPriority::TLogConfirmRunning));
streams.push_back(waitFailure.getReceiver());
streams.push_back(recoveryFinished.getReceiver());
streams.push_back(disablePopRequest.getReceiver());
streams.push_back(enablePopRequest.getReceiver());
streams.push_back(snapRequest.getReceiver());
base = FlowTransport::transport().addEndpoints(streams);
}
template <class Ar>
@ -72,9 +83,20 @@ struct TLogInterface {
if constexpr (!is_fb_function<Ar>) {
ASSERT(ar.isDeserializing || uniqueID != UID());
}
serializer(ar, uniqueID, sharedTLogID, locality, peekMessages, popMessages
, commit, lock, getQueuingMetrics, confirmRunning, waitFailure, recoveryFinished
, disablePopRequest, enablePopRequest, snapRequest);
serializer(ar, uniqueID, sharedTLogID, filteredLocality, base);
if( Ar::isDeserializing ) {
peekMessages = RequestStream< struct TLogPeekRequest >( base.getAdjustedEndpoint(0) );
popMessages = RequestStream< struct TLogPopRequest >( base.getAdjustedEndpoint(1) );
commit = RequestStream< struct TLogCommitRequest >( base.getAdjustedEndpoint(2) );
lock = RequestStream< ReplyPromise< struct TLogLockResult > >( base.getAdjustedEndpoint(3) );
getQueuingMetrics = RequestStream< struct TLogQueuingMetricsRequest >( base.getAdjustedEndpoint(4) );
confirmRunning = RequestStream< struct TLogConfirmRunningRequest >( base.getAdjustedEndpoint(5) );
waitFailure = RequestStream< ReplyPromise<Void> >( base.getAdjustedEndpoint(6) );
recoveryFinished = RequestStream< struct TLogRecoveryFinishedRequest >( base.getAdjustedEndpoint(7) );
disablePopRequest = RequestStream< struct TLogDisablePopRequest >( base.getAdjustedEndpoint(8) );
enablePopRequest = RequestStream< struct TLogEnablePopRequest >( base.getAdjustedEndpoint(9) );
snapRequest = RequestStream< struct TLogSnapRequest >( base.getAdjustedEndpoint(10) );
}
}
};

View File

@ -356,7 +356,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
newState.tLogs.emplace_back(*t);
newState.tLogs.back().tLogLocalities.clear();
for (const auto& log : t->logServers) {
newState.tLogs.back().tLogLocalities.push_back(log->get().interf().locality);
newState.tLogs.back().tLogLocalities.push_back(log->get().interf().filteredLocality);
}
}
}
@ -1677,7 +1677,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
// trackRejoins listens for rejoin requests from the tLogs that we are recovering from, to learn their TLogInterfaces
state std::vector<LogLockInfo> lockResults;
state std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> allLogServers;
state std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>,Reference<IReplicationPolicy>>> allLogServers;
state std::vector<Reference<LogSet>> logServers;
state std::vector<OldLogData> oldLogData;
state std::vector<std::vector<Reference<AsyncVar<bool>>>> logFailed;
@ -1686,8 +1686,9 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
for (const CoreTLogSet& coreSet : prevState.tLogs) {
logServers.emplace_back(new LogSet(coreSet));
std::vector<Reference<AsyncVar<bool>>> failed;
for (const auto& logVar : logServers.back()->logServers) {
allLogServers.push_back(logVar);
allLogServers.push_back(std::make_pair(logVar,coreSet.tLogPolicy));
failed.emplace_back(new AsyncVar<bool>());
failureTrackers.push_back(monitorLog(logVar, failed.back()));
}
@ -1698,7 +1699,9 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
oldLogData.emplace_back(oldTlogData);
for (const auto& logSet : oldLogData.back().tLogs) {
allLogServers.insert(allLogServers.end(), logSet->logServers.begin(), logSet->logServers.end());
for (const auto& logVar : logSet->logServers) {
allLogServers.push_back(std::make_pair(logVar,logSet->tLogPolicy));
}
}
}
state Future<Void> rejoins = trackRejoins( dbgid, allLogServers, rejoinRequests );
@ -2458,7 +2461,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
return logSystem;
}
ACTOR static Future<Void> trackRejoins( UID dbgid, std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> logServers, FutureStream< struct TLogRejoinRequest > rejoinRequests ) {
ACTOR static Future<Void> trackRejoins( UID dbgid, std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>,Reference<IReplicationPolicy>>> logServers, FutureStream< struct TLogRejoinRequest > rejoinRequests ) {
state std::map<UID, ReplyPromise<TLogRejoinReply>> lastReply;
try {
@ -2466,15 +2469,18 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
TLogRejoinRequest req = waitNext( rejoinRequests );
int pos = -1;
for( int i = 0; i < logServers.size(); i++ ) {
if( logServers[i]->get().id() == req.myInterface.id() ) {
if( logServers[i].first->get().id() == req.myInterface.id() ) {
pos = i;
break;
}
}
if ( pos != -1 ) {
TraceEvent("TLogJoinedMe", dbgid).detail("TLog", req.myInterface.id()).detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
if( !logServers[pos]->get().present() || req.myInterface.commit.getEndpoint() != logServers[pos]->get().interf().commit.getEndpoint())
logServers[pos]->setUnconditional( OptionalInterface<TLogInterface>(req.myInterface) );
if( !logServers[pos].first->get().present() || req.myInterface.commit.getEndpoint() != logServers[pos].first->get().interf().commit.getEndpoint()) {
TLogInterface interf = req.myInterface;
filterLocalityDataForPolicyDcAndProcess(logServers[pos].second, &interf.filteredLocality);
logServers[pos].first->setUnconditional( OptionalInterface<TLogInterface>(interf) );
}
lastReply[req.myInterface.id()].send(TLogRejoinReply{ false });
lastReply[req.myInterface.id()] = req.reply;
}

File diff suppressed because it is too large Load Diff

View File

@ -1866,7 +1866,7 @@ int main(int argc, char* argv[]) {
vector<Future<Void>> actors(listenErrors.begin(), listenErrors.end());
actors.push_back(restoreWorker(opts.connectionFile, opts.localities, dataFolder));
f = stopAfter(waitForAll(actors));
printf("Fast restore worker exits\n");
printf("Fast restore worker started\n");
g_network->run();
printf("g_network->run() done\n");
} else { // Call fdbd roles in conventional way

View File

@ -501,7 +501,6 @@ ACTOR Future<Standalone<CommitTransactionRef>> provisionalMaster( Reference<Mast
// Register a fake master proxy (to be provided right here) to make ourselves available to clients
parent->provisionalProxies = vector<MasterProxyInterface>(1);
parent->provisionalProxies[0].provisional = true;
parent->provisionalProxies[0].locality = parent->myInterface.locality;
parent->provisionalProxies[0].initEndpoints();
state Future<Void> waitFailure = waitFailureServer(parent->provisionalProxies[0].waitFailure.getFuture());
parent->registrationTrigger.trigger();

View File

@ -818,7 +818,6 @@ ACTOR Future<Void> monitorTraceLogIssues(Reference<AsyncVar<std::set<std::string
state bool pingTimeout = false;
loop {
wait(delay(SERVER_KNOBS->TRACE_LOG_FLUSH_FAILURE_CHECK_INTERVAL_SECONDS));
TraceEvent("CrashDebugPingActionSetupInWorker");
Future<Void> pingAck = pingTraceLogWriterThread();
try {
wait(timeoutError(pingAck, SERVER_KNOBS->TRACE_LOG_PING_TIMEOUT_SECONDS));
@ -1280,7 +1279,7 @@ ACTOR Future<Void> workerServer(
}
when( InitializeMasterProxyRequest req = waitNext(interf.masterProxy.getFuture()) ) {
MasterProxyInterface recruited;
recruited.locality = locality;
recruited.processId = locality.processId();
recruited.provisional = false;
recruited.initEndpoints();

View File

@ -670,7 +670,7 @@ struct ConsistencyCheckWorkload : TestWorkload
Standalone<RangeResultRef> UIDtoTagMap = wait( tr.getRange( serverTagKeys, CLIENT_KNOBS->TOO_MANY ) );
ASSERT( !UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY );
decodeKeyServersValue(UIDtoTagMap, keyLocations[shard].value, sourceStorageServers, destStorageServers);
decodeKeyServersValue(UIDtoTagMap, keyLocations[shard].value, sourceStorageServers, destStorageServers, false);
//If the destStorageServers is non-empty, then this shard is being relocated
state bool isRelocating = destStorageServers.size() > 0;
@ -1482,8 +1482,8 @@ struct ConsistencyCheckWorkload : TestWorkload
TraceEvent("ConsistencyCheck_LogRouterNotInNonExcludedWorkers").detail("Id", logRouter.id());
return false;
}
if (logRouter.interf().locality.dcId() != expectedRemoteDcId) {
TraceEvent("ConsistencyCheck_LogRouterNotBestDC").detail("expectedDC", getOptionalString(expectedRemoteDcId)).detail("ActualDC", getOptionalString(logRouter.interf().locality.dcId()));
if (logRouter.interf().filteredLocality.dcId() != expectedRemoteDcId) {
TraceEvent("ConsistencyCheck_LogRouterNotBestDC").detail("expectedDC", getOptionalString(expectedRemoteDcId)).detail("ActualDC", getOptionalString(logRouter.interf().filteredLocality.dcId()));
return false;
}
}

View File

@ -652,7 +652,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
limit = deterministicRandom()->randomInt(0, INT_MAX)+1;
}
bool isSpecialKeyRange = specialKeys.contains(keysel1.getKey()) && specialKeys.contains(keysel2.getKey());
bool isSpecialKeyRange = specialKeys.contains(keysel1.getKey()) && keysel2.getKey() <= specialKeys.end;
contract = {
std::make_pair( error_code_range_limits_invalid, ExceptionContract::possibleButRequiredIf(limit < 0) ),
@ -685,7 +685,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
keysel2 = makeKeySel();
limits = makeRangeLimits();
bool isSpecialKeyRange = specialKeys.contains(keysel1.getKey()) && specialKeys.contains(keysel2.getKey());
bool isSpecialKeyRange = specialKeys.contains(keysel1.getKey()) && keysel2.getKey() <= specialKeys.end;
contract = {
std::make_pair( error_code_range_limits_invalid, ExceptionContract::possibleButRequiredIf( !limits.isReached() && !limits.isValid()) ),
@ -729,7 +729,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
limit = deterministicRandom()->randomInt(0, INT_MAX)+1;
}
bool isSpecialKeyRange = specialKeys.contains(key1) && specialKeys.contains(key2);
bool isSpecialKeyRange = specialKeys.contains(key1) && key2 <= specialKeys.end;
contract = {
std::make_pair( error_code_inverted_range, ExceptionContract::requiredIf(key1 > key2) ),
@ -764,7 +764,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
key2 = makeKey();
limits = makeRangeLimits();
bool isSpecialKeyRange = specialKeys.contains(key1) && specialKeys.contains(key2);
bool isSpecialKeyRange = specialKeys.contains(key1) && key2 <= specialKeys.end;
contract = {
std::make_pair( error_code_inverted_range, ExceptionContract::requiredIf(key1 > key2) ),

View File

@ -42,7 +42,7 @@ public:
struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
int actorCount, minKeysPerRange, maxKeysPerRange, rangeCount, keyBytes, valBytes;
int actorCount, minKeysPerRange, maxKeysPerRange, rangeCount, keyBytes, valBytes, conflictRangeSizeFactor;
double testDuration, absoluteRandomProb, transactionsPerSecond;
PerfIntCounter wrongResults, keysCount;
Reference<ReadYourWritesTransaction> ryw; // used to store all populated data
@ -60,6 +60,9 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
transactionsPerSecond = getOption(options, LiteralStringRef("transactionsPerSecond"), 100.0);
actorCount = getOption(options, LiteralStringRef("actorCount"), 1);
absoluteRandomProb = getOption(options, LiteralStringRef("absoluteRandomProb"), 0.5);
// Controls the relative size of read/write conflict ranges and the number of random getranges
conflictRangeSizeFactor = getOption(options, LiteralStringRef("conflictRangeSizeFactor"), 10);
ASSERT(conflictRangeSizeFactor >= 1);
}
virtual std::string description() { return "SpecialKeySpaceCorrectness"; }
@ -72,6 +75,7 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
double getCheckTimeout() override { return std::numeric_limits<double>::max(); }
Future<Void> _setup(Database cx, SpecialKeySpaceCorrectnessWorkload* self) {
cx->specialKeySpace = std::make_shared<SpecialKeySpace>();
if (self->clientId == 0) {
self->ryw = Reference(new ReadYourWritesTransaction(cx));
self->ryw->setVersion(100);
@ -97,7 +101,11 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
return Void();
}
ACTOR Future<Void> _start(Database cx, SpecialKeySpaceCorrectnessWorkload* self) {
if (self->clientId == 0) wait(timeout(self->getRangeCallActor(cx, self), self->testDuration, Void()));
if (self->clientId == 0) {
wait(timeout(self->getRangeCallActor(cx, self) && testConflictRanges(cx, /*read*/ true, self) &&
testConflictRanges(cx, /*read*/ false, self),
self->testDuration, Void()));
}
return Void();
}
@ -161,6 +169,7 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
.detail("TestValue", printable(res2[i].value));
return false;
}
TEST(true); // Special key space keys equal
}
return true;
}
@ -201,6 +210,131 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
return GetRangeLimits(rowLimits, byteLimits);
}
ACTOR static Future<Void> testConflictRanges(Database cx_, bool read, SpecialKeySpaceCorrectnessWorkload* self) {
state StringRef prefix = read ? readConflictRangeKeysRange.begin : writeConflictRangeKeysRange.begin;
TEST(read); // test read conflict range special key implementation
TEST(!read); // test write conflict range special key implementation
// Get a default special key range instance
Database cx = cx_->clone();
state Reference<ReadYourWritesTransaction> tx = Reference(new ReadYourWritesTransaction(cx));
state Reference<ReadYourWritesTransaction> referenceTx = Reference(new ReadYourWritesTransaction(cx));
state bool ryw = deterministicRandom()->coinflip();
if (!ryw) {
tx->setOption(FDBTransactionOptions::READ_YOUR_WRITES_DISABLE);
}
referenceTx->setVersion(100); // Prevent this from doing a GRV or committing
referenceTx->clear(normalKeys);
referenceTx->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
int numKeys = deterministicRandom()->randomInt(1, self->conflictRangeSizeFactor) * 4;
state std::vector<std::string> keys; // Must all be distinct
keys.resize(numKeys);
int lastKey = 0;
for (auto& key : keys) {
key = std::to_string(lastKey++);
}
if (deterministicRandom()->coinflip()) {
// Include beginning of keyspace
keys.push_back("");
}
if (deterministicRandom()->coinflip()) {
// Include end of keyspace
keys.push_back("\xff");
}
std::mt19937 g(deterministicRandom()->randomUInt32());
std::shuffle(keys.begin(), keys.end(), g);
// First half of the keys will be ranges, the other keys will mix in some read boundaries that aren't range
// boundaries
std::sort(keys.begin(), keys.begin() + keys.size() / 2);
for (auto iter = keys.begin(); iter + 1 < keys.begin() + keys.size() / 2; iter += 2) {
Standalone<KeyRangeRef> range = KeyRangeRef(*iter, *(iter + 1));
if (read) {
tx->addReadConflictRange(range);
// Add it twice so that we can observe the de-duplication that should get done
tx->addReadConflictRange(range);
} else {
tx->addWriteConflictRange(range);
tx->addWriteConflictRange(range);
}
// TODO test that fails if we don't wait on tx->pendingReads()
referenceTx->set(range.begin, LiteralStringRef("1"));
referenceTx->set(range.end, LiteralStringRef("0"));
}
if (!read && deterministicRandom()->coinflip()) {
try {
wait(tx->commit());
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled) throw;
return Void();
}
TEST(true); // Read write conflict range of committed transaction
}
for (int i = 0; i < self->conflictRangeSizeFactor; ++i) {
GetRangeLimits limit;
KeySelector begin;
KeySelector end;
loop {
begin = firstGreaterOrEqual(deterministicRandom()->randomChoice(keys));
end = firstGreaterOrEqual(deterministicRandom()->randomChoice(keys));
if (begin.getKey() <= end.getKey()) break;
}
bool reverse = deterministicRandom()->coinflip();
auto correctResultFuture = referenceTx->getRange(begin, end, limit, false, reverse);
ASSERT(correctResultFuture.isReady());
begin.setKey(begin.getKey().withPrefix(prefix, begin.arena()));
end.setKey(end.getKey().withPrefix(prefix, begin.arena()));
auto testResultFuture = tx->getRange(begin, end, limit, false, reverse);
ASSERT(testResultFuture.isReady());
auto correct_iter = correctResultFuture.get().begin();
auto test_iter = testResultFuture.get().begin();
bool had_error = false;
while (correct_iter != correctResultFuture.get().end() && test_iter != testResultFuture.get().end()) {
if (correct_iter->key != test_iter->key.removePrefix(prefix) ||
correct_iter->value != test_iter->value) {
TraceEvent(SevError, "TestFailure")
.detail("Reason", "Mismatched keys")
.detail("ConflictType", read ? "read" : "write")
.detail("CorrectKey", correct_iter->key)
.detail("TestKey", test_iter->key)
.detail("CorrectValue", correct_iter->value)
.detail("TestValue", test_iter->value)
.detail("Begin", begin.toString())
.detail("End", end.toString())
.detail("Ryw", ryw);
had_error = true;
}
++correct_iter;
++test_iter;
}
while (correct_iter != correctResultFuture.get().end()) {
TraceEvent(SevError, "TestFailure")
.detail("Reason", "Extra correct key")
.detail("ConflictType", read ? "read" : "write")
.detail("CorrectKey", correct_iter->key)
.detail("CorrectValue", correct_iter->value)
.detail("Begin", begin.toString())
.detail("End", end.toString())
.detail("Ryw", ryw);
++correct_iter;
had_error = true;
}
while (test_iter != testResultFuture.get().end()) {
TraceEvent(SevError, "TestFailure")
.detail("Reason", "Extra test key")
.detail("ConflictType", read ? "read" : "write")
.detail("TestKey", test_iter->key)
.detail("TestValue", test_iter->value)
.detail("Begin", begin.toString())
.detail("End", end.toString())
.detail("Ryw", ryw);
++test_iter;
had_error = true;
}
if (had_error) break;
}
return Void();
}
};
WorkloadFactory<SpecialKeySpaceCorrectnessWorkload> SpecialKeySpaceCorrectnessFactory("SpecialKeySpaceCorrectness");

View File

@ -91,7 +91,7 @@ struct TPCCMetrics {
++failedCounter;
}
TraceEvent("TransactionComplete")
.detail("Type", txnType)
.detail("TransactionType", txnType)
.detail("Latency", responseTime)
.detail("Begin", txnStartTime)
.detail("End", txnStartTime + responseTime)

View File

@ -25,6 +25,7 @@
void forceLinkIndexedSetTests();
void forceLinkDequeTests();
void forceLinkFlowTests();
void forceLinkVersionedMapTests();
struct UnitTestWorkload : TestWorkload {
bool enabled;
@ -43,6 +44,7 @@ struct UnitTestWorkload : TestWorkload {
forceLinkIndexedSetTests();
forceLinkDequeTests();
forceLinkFlowTests();
forceLinkVersionedMapTests();
}
virtual std::string description() { return "UnitTests"; }

View File

@ -530,11 +530,12 @@ public:
int expectedSize() const { return size(); }
int compare(StringRef const& other) const {
if (std::min(size(), other.size()) > 0) {
int c = memcmp(begin(), other.begin(), std::min(size(), other.size()));
size_t minSize = std::min(size(), other.size());
if (minSize != 0) {
int c = memcmp(begin(), other.begin(), minSize);
if (c != 0) return c;
}
return size() - other.size();
return ::compare(size(), other.size());
}
// Removes bytes from begin up to and including the sep string, returns StringRef of the part before sep

View File

@ -61,6 +61,7 @@ set(FLOW_SRCS
ThreadSafeQueue.h
Trace.cpp
Trace.h
TreeBenchmark.h
UnitTest.cpp
UnitTest.h
XmlTraceLogFormatter.cpp

View File

@ -41,11 +41,22 @@ struct KeyValueMapPair {
KeyValueMapPair(KeyRef key, ValueRef value)
: arena(key.expectedSize() + value.expectedSize()), key(arena, key), value(arena, value) {}
int compare(KeyValueMapPair const& r) const { return ::compare(key, r.key); }
template <class CompatibleWithKey>
int compare(CompatibleWithKey const& r) const {
return ::compare(key, r);
}
bool operator<(KeyValueMapPair const& r) const { return key < r.key; }
bool operator==(KeyValueMapPair const& r) const { return key == r.key; }
bool operator!=(KeyValueMapPair const& r) const { return key != r.key; }
};
template <class CompatibleWithKey>
int compare(CompatibleWithKey const& l, KeyValueMapPair const& r) {
return ::compare(l, r.key);
}
template <class CompatibleWithKey>
bool operator<(KeyValueMapPair const& l, CompatibleWithKey const& r) {
return l.key < r;

View File

@ -34,6 +34,32 @@
#endif
#include <functional>
// Until we move to C++20, we'll need something to take the place of operator<=>.
// This is as good a place as any, I guess.
template <typename T>
typename std::enable_if<std::is_integral<T>::value, int>::type compare(T l, T r) {
const int gt = l > r;
const int lt = l < r;
return gt - lt;
// GCC also emits branchless code for the following, but the above performs
// slightly better in benchmarks as of this writing.
// return l < r ? -1 : l == r ? 0 : 1;
}
template <typename T, typename U>
typename std::enable_if<!std::is_integral<T>::value, int>::type compare(T const& l, U const& r) {
return l.compare(r);
}
template <class K, class V>
int compare(std::pair<K, V> const& l, std::pair<K, V> const& r) {
if (int cmp = compare(l.first, r.first)) {
return cmp;
}
return compare(l.second, r.second);
}
class UID {
uint64_t part[2];
public:
@ -44,6 +70,12 @@ public:
std::string shortString() const;
bool isValid() const { return part[0] || part[1]; }
int compare(const UID& r) const {
if (int cmp = ::compare(part[0], r.part[0])) {
return cmp;
}
return ::compare(part[1], r.part[1]);
}
bool operator == ( const UID& r ) const { return part[0]==r.part[0] && part[1]==r.part[1]; }
bool operator != ( const UID& r ) const { return part[0]!=r.part[0] || part[1]!=r.part[1]; }
bool operator < ( const UID& r ) const { return part[0] < r.part[0] || (part[0] == r.part[0] && part[1] < r.part[1]); }

View File

@ -92,12 +92,16 @@ public:
void send( T const& t ) { // Can be called safely from another thread. Call send or sendError at most once.
Promise<Void> signal;
tagAndForward( &promise, t, signal.getFuture() );
g_network->onMainThread( std::move(signal), incrementPriorityIfEven( g_network->getCurrentTask() ) );
g_network->onMainThread(std::move(signal), g_network->isOnMainThread()
? incrementPriorityIfEven(g_network->getCurrentTask())
: TaskPriority::DefaultOnMainThread);
}
void sendError( Error const& e ) { // Can be called safely from another thread. Call send or sendError at most once.
Promise<Void> signal;
tagAndForwardError( &promise, e, signal.getFuture() );
g_network->onMainThread( std::move(signal), incrementPriorityIfEven( g_network->getCurrentTask() ) );
g_network->onMainThread(std::move(signal), g_network->isOnMainThread()
? incrementPriorityIfEven(g_network->getCurrentTask())
: TaskPriority::DefaultOnMainThread);
}
private:
Promise<T> promise;

View File

@ -31,8 +31,8 @@
#include <cstring>
#include <deque>
#include <random>
#include "flow/TreeBenchmark.h"
#include "flow/UnitTest.h"
template <class Node>
int ISGetHeight(Node* n){
if (!n) return 0;
@ -137,7 +137,123 @@ TEST_CASE("/flow/IndexedSet/erase 400k of 1M") {
return Void();
}
/*TEST_CASE("/flow/IndexedSet/performance") {
TEST_CASE("/flow/IndexedSet/random ops") {
for (int t = 0; t < 100; t++) {
IndexedSet<int, int> is;
int rr = deterministicRandom()->randomInt(0, 600) * deterministicRandom()->randomInt(0, 600);
for (int n = 0; n < rr; n++) {
if (deterministicRandom()->random01() < (double)is.sumTo(is.end()) / rr * 2)
is.erase(is.lower_bound(deterministicRandom()->randomInt(0, 10000000)));
else
is.insert(deterministicRandom()->randomInt(0, 10000000), 3);
}
int b = deterministicRandom()->randomInt(0, 10000000);
// int e = b + deterministicRandom()->randomInt(0, 10);
int e = deterministicRandom()->randomInt(0, 10000000);
if (e < b) std::swap(b, e);
auto ib = is.lower_bound(b);
auto ie = is.lower_bound(e);
int original_count = is.sumTo(is.end()) / 3;
int original_incount = is.sumRange(ib, ie) / 3;
// printf("\n#%d Erasing %d of %d items\n", t, original_incount, original_count);
is.erase(ib, ie);
is.testonly_assertBalanced();
int count = 0, incount = 0;
for (auto i : is) {
++count;
if (i >= b && i < e) {
// printf("Remaining item: %d (%d - %d)\n", i, b, e);
incount++;
}
}
// printf("%d items remain, totalling %d\n", count, is.sumTo(is.end()));
// printf("%d items remain in erased range\n", incount);
ASSERT(incount == 0);
ASSERT(count == original_count - original_incount);
ASSERT(is.sumTo(is.end()) == count * 3);
}
return Void();
}
TEST_CASE("/flow/IndexedSet/strings") {
Map<std::string, int> myMap;
std::map<std::string, int> aMap;
myMap["Hello"] = 1;
myMap["Planet"] = 5;
for (auto i = myMap.begin(); i != myMap.end(); ++i) aMap[i->key] = i->value;
ASSERT(myMap.find(std::string("Hello"))->value == 1);
ASSERT(myMap.find(std::string("World")) == myMap.end());
ASSERT(myMap["Hello"] == 1);
auto a = myMap.upper_bound("A")->key;
auto x = myMap.lower_bound("M")->key;
ASSERT((a + x) == (std::string) "HelloPlanet");
return Void();
}
template <typename K>
struct IndexedSetHarness {
using map = IndexedSet<K, int>;
using result = typename map::iterator;
using key_type = K;
map s;
void insert(K const& k) { s.insert(K(k), 1); }
result find(K const& k) const { return s.find(k); }
result not_found() const { return s.end(); }
result begin() const { return s.begin(); }
result end() const { return s.end(); }
result lower_bound(K const& k) const { return s.lower_bound(k); }
result upper_bound(K const& k) const { return s.upper_bound(k); }
void erase(K const& k) { s.erase(k); }
};
TEST_CASE("performance/map/StringRef/IndexedSet") {
Arena arena;
IndexedSetHarness<StringRef> is;
treeBenchmark(is, [&arena]() { return randomStr(arena); });
return Void();
}
TEST_CASE("performance/map/StringRef/StdMap") {
Arena arena;
MapHarness<StringRef> is;
treeBenchmark(is, [&arena]() { return randomStr(arena); });
return Void();
}
TEST_CASE("performance/map/int/IndexedSet") {
IndexedSetHarness<int> is;
treeBenchmark(is, &randomInt);
return Void();
}
TEST_CASE("performance/map/int/StdMap") {
MapHarness<int> is;
treeBenchmark(is, &randomInt);
return Void();
}
TEST_CASE("performance/flow/IndexedSet/integers") {
std::mt19937_64 urng(deterministicRandom()->randomUInt32());
std::vector<int> x;
for (int i = 0; i<1000000; i++)
x.push_back(deterministicRandom()->randomInt(0, 10000000));
@ -151,7 +267,6 @@ TEST_CASE("/flow/IndexedSet/erase 400k of 1M") {
double end = timer();
double kps = x.size() / 1000.0 / (end - start);
printf("%0.1f Kinsert/sec\n", kps);
ASSERT(kps >= 500); //< Or something?
start = timer();
for (int i = 0; i<x.size(); i++)
@ -159,7 +274,6 @@ TEST_CASE("/flow/IndexedSet/erase 400k of 1M") {
end = timer();
kps = x.size() / 1000.0 / (end - start);
printf("%0.1f Kfind/sec\n", kps);
ASSERT(kps >= 500);
{
//std::set<int> ss;
@ -194,7 +308,7 @@ TEST_CASE("/flow/IndexedSet/erase 400k of 1M") {
is.testonly_assertBalanced();
std::random_shuffle(x.begin(), x.end());
std::shuffle(x.begin(), x.end(), urng);
start = timer();
for (int i = 0; i<x.size(); i++) {
is.erase(x[i]);
@ -204,87 +318,41 @@ TEST_CASE("/flow/IndexedSet/erase 400k of 1M") {
printf("%0.1f Kerase/sec\n", x.size() / 1000.0 / (end - start));
is.testonly_assertBalanced();
for (int i = 0; i<x.size() / 2; i++)
for (int i = 0; i < x.size() / 2; i++) {
ASSERT(is.find(x[i]) == is.end());
}*/
TEST_CASE("/flow/IndexedSet/random ops") {
for (int t = 0; t<100; t++) {
IndexedSet<int, int> is;
int rr = deterministicRandom()->randomInt(0, 600) * deterministicRandom()->randomInt(0, 600);
for (int n = 0; n<rr; n++) {
if (deterministicRandom()->random01() < (double)is.sumTo(is.end()) / rr * 2)
is.erase(is.lower_bound(deterministicRandom()->randomInt(0, 10000000)));
else
is.insert(deterministicRandom()->randomInt(0, 10000000), 3);
}
int b = deterministicRandom()->randomInt(0, 10000000);
//int e = b + deterministicRandom()->randomInt(0, 10);
int e = deterministicRandom()->randomInt(0, 10000000);
if (e<b) std::swap(b, e);
auto ib = is.lower_bound(b);
auto ie = is.lower_bound(e);
int original_count = is.sumTo(is.end())/3;
int original_incount = is.sumRange(ib, ie)/3;
//printf("\n#%d Erasing %d of %d items\n", t, original_incount, original_count);
is.erase(ib, ie);
is.testonly_assertBalanced();
int count = 0, incount = 0;
for (auto i : is) {
++count;
if (i >= b && i < e) {
//printf("Remaining item: %d (%d - %d)\n", i, b, e);
incount++;
}
}
//printf("%d items remain, totalling %d\n", count, is.sumTo(is.end()));
//printf("%d items remain in erased range\n", incount);
ASSERT(incount == 0);
ASSERT(count == original_count - original_incount);
ASSERT(is.sumTo(is.end()) == count*3);
}
return Void();
}
TEST_CASE("/flow/IndexedSet/strings") {
TEST_CASE("performance/flow/IndexedSet/strings") {
constexpr size_t count = 1000000;
Map< std::string, int > myMap;
std::map< std::string, int > aMap;
myMap["Hello"] = 1;
myMap["Planet"] = 5;
for (auto i = myMap.begin(); i != myMap.end(); ++i)
aMap[i->key] = i->value;
double start, end;
int tt = 0;
ASSERT(myMap.find("Hello")->value == 1);
ASSERT(myMap.find("World") == myMap.end());
ASSERT(myMap["Hello"] == 1);
std::string const hello{ "Hello" };
myMap[hello] = 1;
aMap["Hello"] = 1;
auto a = myMap.upper_bound("A")->key;
auto x = myMap.lower_bound("M")->key;
start = timer();
ASSERT((a + x) == (std::string)"HelloPlanet");
for (size_t i = 0; i < count; i++) {
tt += myMap.find(hello)->value;
}
end = timer();
/* This was a performance test:
ASSERT(tt == count);
double start = timer();
volatile int tt=0;
for(int i=0; i<1000000; i++)
tt += myMap.find( "Hello" )->value;
double end = timer();
printf("%0.1f Map.KfindStr/sec\n", 1000000/1000.0/(end-start));
printf("%0.1f Map.KfindStr/sec\n", count / 1000.0 / (end - start));
start = timer();
for(int i=0; i<1000000; i++)
aMap.find( "Hello" );
end = timer();
printf("%0.1f std::map.KfindStr/sec\n", 1000000/1000.0/(end-start));
*/
start = timer();
for (size_t i = 0; i < count; i++) {
aMap.find(hello);
}
end = timer();
printf("%0.1f std::map.KfindStr/sec\n", count / 1000.0 / (end - start));
return Void();
}
@ -340,6 +408,7 @@ TEST_CASE("/flow/IndexedSet/data constructor and destructor calls match") {
~Counter() { count--; }
Counter(const Counter& r) :value(r.value) { count++; }
void operator=(const Counter& r) { value = r.value; }
int compare(const Counter& r) const { return ::compare(value, r.value); }
bool operator<(const Counter& r) const { return value < r.value; }
};
IndexedSet<Counter, NoMetric> mySet;

View File

@ -22,6 +22,7 @@
#define FLOW_INDEXEDSET_H
#pragma once
#include "flow/Arena.h"
#include "flow/Platform.h"
#include "flow/FastAlloc.h"
#include "flow/Trace.h"
@ -199,7 +200,7 @@ private:
Node *root;
Metric eraseHalf( Node* start, Node* end, int eraseDir, int& heightDelta, std::vector<Node*>& toFree );
Metric eraseHalf(Node* start, Node* end, int eraseDir, int& heightDelta, std::vector<Node*>& toFree);
void erase( iterator begin, iterator end, std::vector<Node*>& toFree );
void replacePointer( Node* oldNode, Node* newNode ) {
@ -252,6 +253,11 @@ public:
MapPair(MapPair&& r) BOOST_NOEXCEPT : key(std::move(r.key)), value(std::move(r.value)) {}
void operator=(MapPair&& r) BOOST_NOEXCEPT { key = std::move(r.key); value = std::move(r.value); }
int compare(MapPair<Key, Value> const& r) const { return ::compare(key, r.key); }
template <class CompatibleWithKey>
int compare(CompatibleWithKey const& r) const {
return ::compare(key, r);
}
bool operator<(MapPair<Key,Value> const& r) const { return key < r.key; }
bool operator<=(MapPair<Key,Value> const& r) const { return key <= r.key; }
bool operator==(MapPair<Key,Value> const& r) const { return key == r.key; }
@ -260,6 +266,11 @@ public:
//private: MapPair( const MapPair& );
};
template <class Key, class Value, class CompatibleWithKey>
inline int compare(CompatibleWithKey const& l, MapPair<Key, Value> const& r) {
return compare(l, r.key);
}
template <class Key, class Value>
inline MapPair<typename std::decay<Key>::type, typename std::decay<Value>::type> mapPair(Key&& key, Value&& value) { return MapPair<typename std::decay<Key>::type, typename std::decay<Value>::type>(std::forward<Key>(key), std::forward<Value>(value)); }
@ -614,8 +625,8 @@ typename IndexedSet<T,Metric>::iterator IndexedSet<T,Metric>::insert(T_&& data,
int d; // direction
// traverse to find insert point
while (true){
d = t->data < data;
if (!d && !(data < t->data)) { // t->data == data
int cmp = compare(data, t->data);
if (cmp == 0) {
Node *returnNode = t;
if(replaceExisting) {
t->data = std::forward<T_>(data);
@ -633,6 +644,7 @@ typename IndexedSet<T,Metric>::iterator IndexedSet<T,Metric>::insert(T_&& data,
return returnNode;
}
d = cmp > 0;
Node *nextT = t->child[d];
if (!nextT) break;
t = nextT;
@ -689,7 +701,7 @@ int IndexedSet<T,Metric>::insert(const std::vector<std::pair<T,Metric>>& dataVec
int d = 1; // direction
if(blockStart == NULL || (blockEnd != NULL && data >= blockEnd->data)) {
blockEnd = NULL;
if (root == NULL){
if (root == NULL) {
root = new Node(std::move(data), metric);
num_inserted++;
blockStart = root;
@ -699,11 +711,12 @@ int IndexedSet<T,Metric>::insert(const std::vector<std::pair<T,Metric>>& dataVec
Node *t = root;
// traverse to find insert point
bool foundNode = false;
while (true){
d = t->data < data;
if (!d)
while (true) {
int cmp = compare(data, t->data);
d = cmp > 0;
if (d == 0)
blockEnd = t;
if (!d && !(data < t->data)) { // t->data == data
if (cmp == 0) {
Node *returnNode = t;
if(replaceExisting) {
num_inserted++;
@ -784,7 +797,8 @@ int IndexedSet<T,Metric>::insert(const std::vector<std::pair<T,Metric>>& dataVec
}
template <class T, class Metric>
Metric IndexedSet<T,Metric>::eraseHalf( Node* start, Node* end, int eraseDir, int& heightDelta, std::vector<Node*>& toFree ) {
Metric IndexedSet<T, Metric>::eraseHalf(Node* start, Node* end, int eraseDir, int& heightDelta,
std::vector<Node*>& toFree) {
// Removes all nodes between start (inclusive) and end (exclusive) from the set, where start is equal to end or one of its descendants
// eraseDir 1 means erase the right half (nodes > at) of the left subtree of end. eraseDir 0 means the left half of the right subtree
// toFree is extended with the roots of completely removed subtrees
@ -860,7 +874,7 @@ void IndexedSet<T,Metric>::erase( typename IndexedSet<T,Metric>::iterator begin,
// Removes all nodes in the set between first and last, inclusive.
// toFree is extended with the roots of completely removed subtrees.
ASSERT(!end.i || (begin.i && *begin <= *end));
ASSERT(!end.i || (begin.i && (::compare(*begin, *end) <= 0)));
if(begin == end)
return;
@ -876,8 +890,8 @@ void IndexedSet<T,Metric>::erase( typename IndexedSet<T,Metric>::iterator begin,
// Erase all matching nodes that descend from subRoot, by first erasing descendants of subRoot->child[0] and then erasing the descendants of subRoot->child[1]
// subRoot is not removed from the tree at this time
metricDelta = metricDelta + eraseHalf( first, subRoot, 1, leftHeightDelta, toFree );
metricDelta = metricDelta + eraseHalf( last, subRoot, 0, rightHeightDelta, toFree );
metricDelta = metricDelta + eraseHalf(first, subRoot, 1, leftHeightDelta, toFree);
metricDelta = metricDelta + eraseHalf(last, subRoot, 0, rightHeightDelta, toFree);
// Change in the height of subRoot due to past activity, before subRoot is rebalanced. subRoot->balance already reflects changes in height to its children.
int heightDelta = leftHeightDelta + rightHeightDelta;
@ -995,10 +1009,9 @@ template <class Key>
typename IndexedSet<T,Metric>::iterator IndexedSet<T,Metric>::find(const Key &key) const {
Node* t = root;
while (t){
int d = t->data < key;
if (!d && !(key < t->data)) // t->data == key
return iterator(t);
t = t->child[d];
int cmp = compare(key, t->data);
if (cmp == 0) return iterator(t);
t = t->child[cmp > 0];
}
return end();
}
@ -1009,14 +1022,15 @@ template <class Key>
typename IndexedSet<T,Metric>::iterator IndexedSet<T,Metric>::lower_bound(const Key &key) const {
Node* t = root;
if (!t) return iterator();
bool less;
while (true) {
Node *n = t->child[ t->data < key ];
less = t->data < key;
Node* n = t->child[less];
if (!n) break;
t = n;
}
if (t->data < key)
moveIterator<1>(t);
if (less) moveIterator<1>(t);
return iterator(t);
}
@ -1027,14 +1041,15 @@ template <class Key>
typename IndexedSet<T,Metric>::iterator IndexedSet<T,Metric>::upper_bound(const Key &key) const {
Node* t = root;
if (!t) return iterator();
bool not_less;
while (true) {
Node *n = t->child[ !(key < t->data) ];
not_less = !(key < t->data);
Node* n = t->child[not_less];
if (!n) break;
t = n;
}
if (!(key < t->data))
moveIterator<1>(t);
if (not_less) moveIterator<1>(t);
return iterator(t);
}

View File

@ -91,6 +91,7 @@ public: // introduced features
PROTOCOL_VERSION_FEATURE(0x0FDB00B063000000LL, UnifiedTLogSpilling);
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, BackupWorker);
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, ReportConflictingKeys);
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, SmallEndpoints);
};
// These impact both communications and the deserialization of certain database and IKeyValueStore keys.

View File

@ -22,7 +22,7 @@
int64_t dl_iterate_phdr_calls = 0;
#ifdef __linux__
#if defined(__linux__) && !defined(USE_SANITIZER)
#include <link.h>
#include <mutex>

View File

@ -28,7 +28,6 @@
#define FLOW_TDMETRIC_ACTOR_H
#include "flow/flow.h"
#include "flow/IndexedSet.h"
#include "flow/network.h"
#include "flow/Knobs.h"
#include "flow/genericactors.actor.h"
@ -56,9 +55,21 @@ struct MetricNameRef {
int expectedSize() const {
return type.expectedSize() + name.expectedSize();
}
inline int compare(MetricNameRef const& r) const {
int cmp;
if ((cmp = type.compare(r.type))) {
return cmp;
}
if ((cmp = name.compare(r.name))) {
return cmp;
}
return id.compare(r.id);
}
};
extern std::string reduceFilename(std::string const &filename);
inline bool operator < (const MetricNameRef& l, const MetricNameRef& r ) {
int cmp = l.type.compare(r.type);
if(cmp == 0) {

View File

@ -197,12 +197,8 @@ public:
};
void blockUntilReady() {
if(isReadyUnsafe()) {
ThreadSpinLockHolder holder(mutex);
ASSERT(isReadyUnsafe());
}
else {
BlockCallback cb( *this );
if (!isReady()) {
BlockCallback cb(*this);
}
}

View File

@ -22,6 +22,8 @@
#define FLOW_THREADPRIMITIVES_H
#pragma once
#include <atomic>
#include "flow/Error.h"
#include "flow/Trace.h"
@ -45,7 +47,7 @@
class ThreadSpinLock {
public:
// #ifdef _WIN32
ThreadSpinLock(bool initiallyLocked=false) : isLocked(initiallyLocked) {
ThreadSpinLock() {
#if VALGRIND
ANNOTATE_RWLOCK_CREATE(this);
#endif
@ -56,31 +58,26 @@ public:
#endif
}
void enter() {
while (interlockedCompareExchange(&isLocked, 1, 0) == 1)
_mm_pause();
while (isLocked.test_and_set(std::memory_order_acquire)) _mm_pause();
#if VALGRIND
ANNOTATE_RWLOCK_ACQUIRED(this, true);
#endif
}
void leave() {
#if defined(__linux__)
__sync_synchronize();
#endif
isLocked = 0;
#if defined(__linux__)
__sync_synchronize();
#endif
isLocked.clear(std::memory_order_release);
#if VALGRIND
ANNOTATE_RWLOCK_RELEASED(this, true);
#endif
}
void assertNotEntered() {
ASSERT( !isLocked );
ASSERT(!isLocked.test_and_set(std::memory_order_acquire));
isLocked.clear(std::memory_order_release);
}
private:
ThreadSpinLock(const ThreadSpinLock&);
void operator=(const ThreadSpinLock&);
volatile int32_t isLocked;
std::atomic_flag isLocked = ATOMIC_FLAG_INIT;
};
class ThreadSpinLockHolder {

View File

@ -1053,7 +1053,7 @@ TraceEvent& TraceEvent::suppressFor( double duration, bool logSuppressedEventCou
}
}
else {
TraceEvent(SevWarnAlways, "SuppressionFromNonNetworkThread").detail("Type", type);
TraceEvent(SevWarnAlways, "SuppressionFromNonNetworkThread").detail("Event", type);
detail("__InvalidSuppression__", ""); // Choosing a detail name that is unlikely to collide with other names
}
}

126
flow/TreeBenchmark.h Normal file
View File

@ -0,0 +1,126 @@
/*
* IndexedSet.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2020-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_TREEBENCHMARK_H
#define FLOW_TREEBENCHMARK_H
#pragma once
#include "flow/flow.h"
#include <random>
struct opTimer {
double start = timer();
char const* name;
int opCount;
opTimer(char const* name, int opCount) : name(name), opCount(opCount) {}
~opTimer() { printf("%s: %0.1f Kop/s\n", name, (opCount / 1000.0) / (timer() - start)); }
};
template <typename F, typename T>
void timedRun(char const* name, T& t, F f) {
opTimer timer(name, t.size());
for (auto& i : t) {
f(i);
}
}
template <typename K>
struct MapHarness {
using map = std::map<K, int>;
using key_type = K;
struct result {
typename map::const_iterator it;
result(typename map::const_iterator it) : it(it) {}
result& operator++() {
it++;
return *this;
}
const K& operator*() const { return (*it).first; }
const K& operator->() const { return it->first; }
bool operator==(result const& k) const { return it == k.it; }
};
map s;
void insert(K const& k) { s.insert(std::pair<K, int>(k, 1)); }
result find(K const& k) const { return result(s.find(k)); }
result not_found() const { return result(s.end()); }
result begin() const { return result(s.begin()); }
result end() const { return result(s.end()); }
result lower_bound(K const& k) const { return result(s.lower_bound(k)); }
result upper_bound(K const& k) const { return result(s.upper_bound(k)); }
void erase(K const& k) { s.erase(k); }
};
template <typename T, typename F>
void treeBenchmark(T& tree, F generateKey) {
std::mt19937_64 urng(deterministicRandom()->randomUInt32());
using key = typename T::key_type;
int keyCount = 1000000;
std::vector<key> keys;
for (int i = 0; i < keyCount; i++) {
keys.push_back(generateKey());
}
timedRun("insert", keys, [&tree](key const& k) { tree.insert(k); });
timedRun("find", keys, [&tree](key const& k) { ASSERT(tree.find(k) != tree.not_found()); });
timedRun("lower_bound", keys, [&tree](key const & k) { ASSERT(tree.lower_bound(k) != tree.not_found()); });
timedRun("upper_bound", keys, [&tree](key const & k) { tree.upper_bound(k); });
std::sort(keys.begin(), keys.end());
keys.resize(std::unique(keys.begin(), keys.end()) - keys.begin());
auto iter = tree.lower_bound(*keys.begin());
timedRun("scan", keys, [&tree, &iter](key const& k) {
ASSERT(k == *iter);
++iter;
});
ASSERT(iter == tree.end());
timedRun("find (sorted)", keys, [&tree](key const& k) { ASSERT(tree.find(k) != tree.end()); });
std::shuffle(keys.begin(), keys.end(), urng);
timedRun("erase", keys, [&tree](key const& k) { tree.erase(k); });
ASSERT(tree.begin() == tree.end());
}
static inline StringRef randomStr(Arena& arena) {
size_t keySz = 100;
return StringRef(arena, deterministicRandom()->randomAlphaNumeric(keySz));
}
static inline int randomInt() {
return deterministicRandom()->randomInt(0, INT32_MAX);
}
#endif // FLOW_TREEBENCHMARK_H

View File

@ -2926,7 +2926,7 @@ static class VDSOInitHelper {
/* Each function is empty and called (via a macro) only in debug mode.
The arguments are captured by dynamic tools at runtime. */
#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 && !defined(__native_client__)
#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 && !defined(__native_client__) && !__has_feature(thread_sanitizer)
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>

View File

@ -170,17 +170,17 @@ if(WITH_PYTHON)
TEST_FILES restarting/StorefrontTestRestart-1.txt
restarting/StorefrontTestRestart-2.txt)
add_fdb_test(
TEST_FILES SnapTestAttrition-1.txt
SnapTestAttrition-2.txt IGNORE)
TEST_FILES restarting/from_6.2.0/SnapTestAttrition-1.txt
restarting/from_6.2.0/SnapTestAttrition-2.txt)
add_fdb_test(
TEST_FILES SnapTestSimpleRestart-1.txt
SnapTestSimpleRestart-2.txt IGNORE)
TEST_FILES restarting/from_6.2.0/SnapTestSimpleRestart-1.txt
restarting/from_6.2.0/SnapTestSimpleRestart-2.txt)
add_fdb_test(
TEST_FILES SnapTestRestart-1.txt
SnapTestRestart-2.txt IGNORE)
TEST_FILES restarting/from_6.2.0/SnapTestRestart-1.txt
restarting/from_6.2.0/SnapTestRestart-2.txt)
add_fdb_test(
TEST_FILES SnapCycleRestart-1.txt
SnapCycleRestart-2.txt IGNORE)
TEST_FILES restarting/from_6.2.0/SnapCycleRestart-1.txt
restarting/from_6.2.0/SnapCycleRestart-2.txt)
add_fdb_test(
TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt
restarting/from_5.1.7/DrUpgradeRestart-2.txt)