Merge pull request #3996 from sfc-gh-tclinkenbeard/merge

Merge 6.3 into master
This commit is contained in:
Xin Dong 2020-11-03 14:53:25 -08:00 committed by GitHub
commit a9366f39b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 3136 additions and 1664 deletions

View File

@ -8,10 +8,10 @@ RUN yum install -y yum-utils &&\
http://opensource.wandisco.com/centos/6/git/x86_64/wandisco-git-release-6-1.noarch.rpm &&\
yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
devtoolset-8-gcc-8.3.1 devtoolset-8-gcc-c++-8.3.1 \
devtoolset-8-libubsan-devel devtoolset-8-valgrind-devel \
devtoolset-8-libubsan-devel devtoolset-8-libasan-devel devtoolset-8-valgrind-devel \
rh-python36-python-devel rh-ruby24 golang python27 rpm-build \
mono-core debbuild python-pip dos2unix valgrind-devel ccache \
distcc wget git &&\
distcc wget git lz4 lz4-devel lz4-static &&\
pip install boto3==1.1.1
USER root
@ -61,8 +61,8 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
ARG TIMEZONEINFO=America/Los_Angeles
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
LABEL version=0.1.17
ENV DOCKER_IMAGEVER=0.1.17
LABEL version=0.1.19
ENV DOCKER_IMAGEVER=0.1.19
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++

View File

@ -1,4 +1,4 @@
FROM foundationdb/foundationdb-build:0.1.17
FROM foundationdb/foundationdb-build:0.1.19
USER root
@ -50,8 +50,8 @@ RUN cp -iv /usr/local/bin/clang++ /usr/local/bin/clang++.deref &&\
ldconfig &&\
rm -rf /mnt/artifacts
LABEL version=0.11.9
ENV DOCKER_IMAGEVER=0.11.9
LABEL version=0.11.10
ENV DOCKER_IMAGEVER=0.11.10
ENV CLANGCC=/usr/local/bin/clang.de8a65ef
ENV CLANGCXX=/usr/local/bin/clang++.de8a65ef

View File

@ -2,7 +2,7 @@ version: "3"
services:
common: &common
image: foundationdb/foundationdb-build:0.1.17
image: foundationdb/foundationdb-build:0.1.19
build-setup: &build-setup
<<: *common

View File

@ -18,7 +18,7 @@ if (RocksDB_FOUND)
-DWITH_CORE_TOOLS=OFF
-DWITH_BENCHMARK_TOOLS=OFF
-DWITH_BZ2=OFF
-DWITH_LZ4=OFF
-DWITH_LZ4=ON
-DWITH_SNAPPY=OFF
-DWITH_ZLIB=OFF
-DWITH_ZSTD=OFF
@ -45,7 +45,7 @@ else()
-DWITH_CORE_TOOLS=OFF
-DWITH_BENCHMARK_TOOLS=OFF
-DWITH_BZ2=OFF
-DWITH_LZ4=OFF
-DWITH_LZ4=ON
-DWITH_SNAPPY=OFF
-DWITH_ZLIB=OFF
-DWITH_ZSTD=OFF

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,12 @@
Release Notes
#############
6.2.28
======
* Log detailed team collection information when median available space ratio of all teams is too low. `(PR #3912) <https://github.com/apple/foundationdb/pull/3912>`_
* Bug fix, blob client did not support authentication key sizes over 64 bytes. `(PR #3964) <https://github.com/apple/foundationdb/pull/3964>`_
6.2.27
======
* For clusters with a large number of shards, avoid slow tasks in the data distributor by adding yields to the shard map destruction. `(PR #3834) <https://github.com/apple/foundationdb/pull/3834>`_

View File

@ -2,8 +2,8 @@
Release Notes
#############
6.3.9
=====
6.3.10
======
Features
--------
@ -63,6 +63,7 @@ Fixes
* Fix an issue where ``fdbcli --exec 'exclude no_wait ...'`` would incorrectly report that processes can safely be removed from the cluster. [6.3.5] `(PR #3566) <https://github.com/apple/foundationdb/pull/3566>`_
* Commit latencies could become large because of inaccurate compute estimates. [6.3.9] `(PR #3845) <https://github.com/apple/foundationdb/pull/3845>`_
* Added a timeout on TLS handshakes to prevent them from hanging indefinitely. [6.3.9] `(PR #3850) <https://github.com/apple/foundationdb/pull/3850>`_
* Bug fix, blob client did not support authentication key sizes over 64 bytes. `(PR #3964) <https://github.com/apple/foundationdb/pull/3964>`_
Status
------
@ -121,6 +122,8 @@ Fixes from previous versions
* The 6.3.1 patch release includes all fixes from the patch releases 6.2.21 and 6.2.22. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
* The 6.3.3 patch release includes all fixes from the patch release 6.2.23. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
* The 6.3.5 patch release includes all fixes from the patch releases 6.2.24 and 6.2.25. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
* The 6.3.9 patch release includes all fixes from the patch releases 6.2.26. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
* The 6.3.10 patch release includes all fixes from the patch releases 6.2.27. :doc:`(6.2 Release Notes) </release-notes/release-notes-620>`
Fixes only impacting 6.3.0+
---------------------------

View File

@ -427,7 +427,8 @@ public:
return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr){ return discontinueBackup(tr, tagName); });
}
Future<Void> abortBackup(Database cx, Key tagName, bool partial = false, bool abortOldBackup = false, bool dstOnly = false);
Future<Void> abortBackup(Database cx, Key tagName, bool partial = false, bool abortOldBackup = false,
bool dstOnly = false, bool waitForDestUID = false);
Future<std::string> getStatus(Database cx, int errorLimit, Key tagName);

File diff suppressed because it is too large Load Diff

View File

@ -1597,6 +1597,7 @@ namespace dbBackup {
wait(tr->commit());
break;
} catch (Error &e) {
TraceEvent("SetDestUidOrBeginVersionError").error(e, true);
wait(tr->onError(e));
}
}
@ -2167,7 +2168,8 @@ public:
return Void();
}
ACTOR static Future<Void> abortBackup(DatabaseBackupAgent* backupAgent, Database cx, Key tagName, bool partial, bool abortOldBackup, bool dstOnly) {
ACTOR static Future<Void> abortBackup(DatabaseBackupAgent* backupAgent, Database cx, Key tagName, bool partial,
bool abortOldBackup, bool dstOnly, bool waitForDestUID) {
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
state Key logUidValue, destUidValue;
state UID logUid, destUid;
@ -2187,14 +2189,19 @@ public:
state Future<UID> destUidFuture = backupAgent->getDestUid(tr, logUid);
wait(success(statusFuture) && success(destUidFuture));
UID destUid = destUidFuture.get();
if (destUid.isValid()) {
destUidValue = BinaryWriter::toValue(destUid, Unversioned());
}
EBackupState status = statusFuture.get();
if (!backupAgent->isRunnable(status)) {
throw backup_unneeded();
}
UID destUid = destUidFuture.get();
if (destUid.isValid()) {
destUidValue = BinaryWriter::toValue(destUid, Unversioned());
} else if (destUidValue.size() == 0 && waitForDestUID) {
// Give DR task a chance to update destUid to avoid the problem of
// leftover version key. If we got an commit_unknown_result before,
// reuse the previous destUidValue.
throw not_committed();
}
Optional<Value> _backupUid = wait(tr->get(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId)));
backupUid = _backupUid.get();
@ -2215,6 +2222,7 @@ public:
break;
}
catch (Error &e) {
TraceEvent("DBA_AbortError").error(e, true);
wait(tr->onError(e));
}
}
@ -2523,8 +2531,9 @@ Future<Void> DatabaseBackupAgent::discontinueBackup(Reference<ReadYourWritesTran
return DatabaseBackupAgentImpl::discontinueBackup(this, tr, tagName);
}
Future<Void> DatabaseBackupAgent::abortBackup(Database cx, Key tagName, bool partial, bool abortOldBackup, bool dstOnly){
return DatabaseBackupAgentImpl::abortBackup(this, cx, tagName, partial, abortOldBackup, dstOnly);
Future<Void> DatabaseBackupAgent::abortBackup(Database cx, Key tagName, bool partial, bool abortOldBackup, bool dstOnly,
bool waitForDestUID) {
return DatabaseBackupAgentImpl::abortBackup(this, cx, tagName, partial, abortOldBackup, dstOnly, waitForDestUID);
}
Future<std::string> DatabaseBackupAgent::getStatus(Database cx, int errorLimit, Key tagName) {

View File

@ -4462,7 +4462,6 @@ Future< StorageMetrics > Transaction::getStorageMetrics( KeyRange const& keys, i
ACTOR Future<Standalone<VectorRef<DDMetricsRef>>> waitDataDistributionMetricsList(Database cx, KeyRange keys,
int shardLimit) {
state Future<Void> clientTimeout = delay(5.0);
loop {
choose {
when(wait(cx->onProxiesChanged())) {}
@ -4474,7 +4473,6 @@ ACTOR Future<Standalone<VectorRef<DDMetricsRef>>> waitDataDistributionMetricsLis
}
return rep.get().storageMetricsList;
}
when(wait(clientTimeout)) { throw timed_out(); }
}
}
}

View File

@ -991,7 +991,12 @@ Future<std::vector<std::string>> S3BlobStoreEndpoint::listBuckets() {
std::string S3BlobStoreEndpoint::hmac_sha1(std::string const& msg) {
std::string key = secret;
// First pad the key to 64 bytes.
// Hash key to shorten it if it is longer than SHA1 block size
if(key.size() > 64) {
key = SHA1::from_string(key);
}
// Pad key up to SHA1 block size if needed
key.append(64 - key.size(), '\0');
std::string kipad = key;

View File

@ -540,25 +540,34 @@ Future<Standalone<RangeResultRef>> ConflictingKeysImpl::getRange(ReadYourWritesT
}
ACTOR Future<Standalone<RangeResultRef>> ddMetricsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
try {
auto keys = kr.removePrefix(ddStatsRange.begin);
Standalone<VectorRef<DDMetricsRef>> resultWithoutPrefix =
wait(waitDataDistributionMetricsList(ryw->getDatabase(), keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT));
Standalone<RangeResultRef> result;
for (const auto& ddMetricsRef : resultWithoutPrefix) {
// each begin key is the previous end key, thus we only encode the begin key in the result
KeyRef beginKey = ddMetricsRef.beginKey.withPrefix(ddStatsRange.begin, result.arena());
// Use json string encoded in utf-8 to encode the values, easy for adding more fields in the future
json_spirit::mObject statsObj;
statsObj["shard_bytes"] = ddMetricsRef.shardBytes;
std::string statsString =
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
ValueRef bytes(result.arena(), statsString);
result.push_back(result.arena(), KeyValueRef(beginKey, bytes));
loop {
try {
auto keys = kr.removePrefix(ddStatsRange.begin);
Standalone<VectorRef<DDMetricsRef>> resultWithoutPrefix = wait(
waitDataDistributionMetricsList(ryw->getDatabase(), keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT));
Standalone<RangeResultRef> result;
for (const auto& ddMetricsRef : resultWithoutPrefix) {
// each begin key is the previous end key, thus we only encode the begin key in the result
KeyRef beginKey = ddMetricsRef.beginKey.withPrefix(ddStatsRange.begin, result.arena());
// Use json string encoded in utf-8 to encode the values, easy for adding more fields in the future
json_spirit::mObject statsObj;
statsObj["shard_bytes"] = ddMetricsRef.shardBytes;
std::string statsString =
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
ValueRef bytes(result.arena(), statsString);
result.push_back(result.arena(), KeyValueRef(beginKey, bytes));
}
return result;
} catch (Error& e) {
state Error err(e);
if (e.code() == error_code_operation_failed) {
TraceEvent(SevWarnAlways, "DataDistributorNotPresent")
.detail("Operation", "DDMetricsReqestThroughSpecialKeys");
wait(delayJittered(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY));
continue;
}
throw err;
}
return result;
} catch (Error& e) {
throw;
}
}

View File

@ -35,8 +35,10 @@ void TagSet::addTag(TransactionTagRef tag) {
throw too_many_tags();
}
auto result = tags.insert(TransactionTagRef(arena, tag));
if(result.second) {
TransactionTagRef tagRef(arena, tag);
auto it = find(tags.begin(), tags.end(), tagRef);
if (it == tags.end()) {
tags.push_back(std::move(tagRef));
bytes += tag.size();
}
}

View File

@ -40,7 +40,7 @@ typedef Standalone<TransactionTagRef> TransactionTag;
class TagSet {
public:
typedef std::set<TransactionTagRef>::const_iterator const_iterator;
typedef std::vector<TransactionTagRef>::const_iterator const_iterator;
TagSet() : bytes(0) {}
@ -54,51 +54,35 @@ public:
const_iterator end() const {
return tags.end();
}
void clear() {
tags.clear();
bytes = 0;
}
//private:
Arena arena;
std::set<TransactionTagRef> tags;
size_t bytes;
};
template <>
struct dynamic_size_traits<TagSet> : std::true_type {
// May be called multiple times during one serialization
template <class Context>
static size_t size(const TagSet& t, Context&) {
return t.tags.size() + t.bytes;
}
// Guaranteed to be called only once during serialization
template <class Context>
static void save(uint8_t* out, const TagSet& t, Context& c) {
void save(uint8_t* out, Context& c) const {
uint8_t *start = out;
for (const auto& tag : t.tags) {
for (const auto& tag : *this) {
*(out++) = (uint8_t)tag.size();
std::copy(tag.begin(), tag.end(), out);
out += tag.size();
}
ASSERT((size_t)(out-start) == size(t, c));
ASSERT((size_t)(out - start) == size() + bytes);
}
// Context is an arbitrary type that is plumbed by reference throughout the
// load call tree.
template <class Context>
static void load(const uint8_t* data, size_t size, TagSet& t, Context& context) {
void load(const uint8_t* data, size_t size, Context& context) {
//const uint8_t *start = data;
const uint8_t *end = data + size;
while(data < end) {
uint8_t len = *(data++);
TransactionTagRef tag(context.tryReadZeroCopy(data, len), len);
// Tags are already deduplicated
const auto& tag = tags.emplace_back(context.tryReadZeroCopy(data, len), len);
data += len;
t.tags.insert(tag);
t.bytes += tag.size();
bytes += tag.size();
}
ASSERT(data == end);
@ -106,7 +90,41 @@ struct dynamic_size_traits<TagSet> : std::true_type {
// Deserialized tag sets share the arena with the request that contained them
// For this reason, persisting a TagSet that shares memory with other request
// members should be done with caution.
t.arena = context.arena();
arena = context.arena();
}
size_t getBytes() const { return bytes; }
const Arena& getArena() const { return arena; }
private:
size_t bytes;
Arena arena;
// Currently there are never >= 256 tags, so
// std::vector is faster than std::set. This may
// change if we allow more tags in the future.
std::vector<TransactionTagRef> tags;
};
template <>
struct dynamic_size_traits<TagSet> : std::true_type {
// May be called multiple times during one serialization
template <class Context>
static size_t size(const TagSet& t, Context&) {
return t.size() + t.getBytes();
}
// Guaranteed to be called only once during serialization
template <class Context>
static void save(uint8_t* out, const TagSet& t, Context& c) {
t.save(out, c);
}
// Context is an arbitrary type that is plumbed by reference throughout the
// load call tree.
template <class Context>
static void load(const uint8_t* data, size_t size, TagSet& t, Context& context) {
t.load(data, size, context);
}
};
@ -208,4 +226,4 @@ using PrioritizedTransactionTagMap = std::map<TransactionPriority, TransactionTa
template <class Value>
using UIDTransactionTagMap = std::unordered_map<UID, TransactionTagMap<Value>>;
#endif
#endif

View File

@ -45,7 +45,7 @@ double Counter::getRate() const {
}
double Counter::getRoughness() const {
double elapsed = now() - roughness_interval_start;
double elapsed = last_event - roughness_interval_start;
if(elapsed == 0) {
return -1;
}

View File

@ -239,6 +239,10 @@ if (WITH_ROCKSDB_EXPERIMENTAL)
set(PORTABLE_ROCKSDB 1)
include(CompileRocksDB)
# CompileRocksDB sets `lz4_LIBRARIES` to be the shared lib, we want to link
# statically, so find the static library here.
find_library(lz4_STATIC_LIBRARIES
NAMES liblz4.a REQUIRED)
endif()
# Suppress warnings in sqlite since it's third party
@ -259,7 +263,7 @@ target_include_directories(fdbserver PRIVATE
if (WITH_ROCKSDB_EXPERIMENTAL)
add_dependencies(fdbserver rocksdb)
target_include_directories(fdbserver PRIVATE ${ROCKSDB_INCLUDE_DIR})
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite ${ROCKSDB_LIBRARIES})
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite ${ROCKSDB_LIBRARIES} ${lz4_STATIC_LIBRARIES})
else()
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite)
endif()

View File

@ -622,6 +622,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int lowestUtilizationTeam;
int highestUtilizationTeam;
AsyncTrigger printDetailedTeamsInfo;
void resetLocalitySet() {
storageServerSet = Reference<LocalitySet>(new LocalityMap<UID>());
LocalityMap<UID>* storageServerMap = (LocalityMap<UID>*) storageServerSet.getPtr();
@ -801,6 +803,13 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
} else {
self->medianAvailableSpace = SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO;
}
if (self->medianAvailableSpace < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
TraceEvent(SevWarn, "DDTeamMedianAvailableSpaceTooSmall", self->distributorId)
.detail("MedianAvailableSpaceRatio", self->medianAvailableSpace)
.detail("TargetAvailableSpaceRatio", SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO)
.detail("Primary", self->primary);
self->printDetailedTeamsInfo.trigger();
}
}
bool foundSrc = false;
@ -2719,6 +2728,196 @@ ACTOR Future<Void> waitUntilHealthy(DDTeamCollection* self, double extraDelay =
}
}
// Take a snapshot of necessary data structures from `DDTeamCollection` and print them out with yields to avoid slow
// task on the run loop.
ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
state DatabaseConfiguration configuration;
state std::map<UID, Reference<TCServerInfo>> server_info;
state std::map<UID, ServerStatus> server_status;
state vector<Reference<TCTeamInfo>> teams;
state std::map<Standalone<StringRef>, Reference<TCMachineInfo>> machine_info;
state std::vector<Reference<TCMachineTeamInfo>> machineTeams;
// state std::vector<std::string> internedLocalityRecordKeyNameStrings;
// state int machineLocalityMapEntryArraySize;
// state std::vector<Reference<LocalityRecord>> machineLocalityMapRecordArray;
state int traceEventsPrinted = 0;
state std::vector<const UID*> serverIDs;
state double lastPrintTime = 0;
loop {
wait(self->printDetailedTeamsInfo.onTrigger());
if (now() - lastPrintTime < SERVER_KNOBS->DD_TEAMS_INFO_PRINT_INTERVAL) {
continue;
}
lastPrintTime = now();
traceEventsPrinted = 0;
double snapshotStart = now();
configuration = self->configuration;
server_info = self->server_info;
teams = self->teams;
machine_info = self->machine_info;
machineTeams = self->machineTeams;
// internedLocalityRecordKeyNameStrings = self->machineLocalityMap._keymap->_lookuparray;
// machineLocalityMapEntryArraySize = self->machineLocalityMap.size();
// machineLocalityMapRecordArray = self->machineLocalityMap.getRecordArray();
std::vector<const UID*> _uids = self->machineLocalityMap.getObjects();
serverIDs = _uids;
auto const& keys = self->server_status.getKeys();
for (auto const& key : keys) {
server_status.emplace(key, self->server_status.get(key));
}
TraceEvent("DDPrintSnapshotTeasmInfo", self->distributorId)
.detail("SnapshotSpeed", now() - snapshotStart)
.detail("Primary", self->primary);
// Print to TraceEvents
TraceEvent("DDConfig", self->distributorId)
.detail("StorageTeamSize", configuration.storageTeamSize)
.detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER)
.detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER)
.detail("Primary", self->primary);
TraceEvent("ServerInfo", self->distributorId)
.detail("Size", server_info.size())
.detail("Primary", self->primary);
state int i;
state std::map<UID, Reference<TCServerInfo>>::iterator server = server_info.begin();
for (i = 0; i < server_info.size(); i++) {
TraceEvent("ServerInfo", self->distributorId)
.detail("ServerInfoIndex", i)
.detail("ServerID", server->first.toString())
.detail("ServerTeamOwned", server->second->teams.size())
.detail("MachineID", server->second->machine->machineID.contents().toString())
.detail("Primary", self->primary);
server++;
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
wait(yield());
}
}
server = server_info.begin();
for (i = 0; i < server_info.size(); i++) {
const UID& uid = server->first;
TraceEvent("ServerStatus", self->distributorId)
.detail("ServerUID", uid)
.detail("Healthy", !server_status.at(uid).isUnhealthy())
.detail("MachineIsValid", server_info[uid]->machine.isValid())
.detail("MachineTeamSize",
server_info[uid]->machine.isValid() ? server_info[uid]->machine->machineTeams.size() : -1)
.detail("Primary", self->primary);
server++;
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
wait(yield());
}
}
TraceEvent("ServerTeamInfo", self->distributorId).detail("Size", teams.size()).detail("Primary", self->primary);
for (i = 0; i < teams.size(); i++) {
const auto& team = teams[i];
TraceEvent("ServerTeamInfo", self->distributorId)
.detail("TeamIndex", i)
.detail("Healthy", team->isHealthy())
.detail("TeamSize", team->size())
.detail("MemberIDs", team->getServerIDsStr())
.detail("Primary", self->primary);
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
wait(yield());
}
}
TraceEvent("MachineInfo", self->distributorId)
.detail("Size", machine_info.size())
.detail("Primary", self->primary);
state std::map<Standalone<StringRef>, Reference<TCMachineInfo>>::iterator machine = machine_info.begin();
state bool isMachineHealthy = false;
for (i = 0; i < machine_info.size(); i++) {
Reference<TCMachineInfo> _machine = machine->second;
if (!_machine.isValid() || machine_info.find(_machine->machineID) == machine_info.end() ||
_machine->serversOnMachine.empty()) {
isMachineHealthy = false;
}
// Healthy machine has at least one healthy server
for (auto& server : _machine->serversOnMachine) {
if (!server_status.at(server->id).isUnhealthy()) {
isMachineHealthy = true;
}
}
isMachineHealthy = false;
TraceEvent("MachineInfo", self->distributorId)
.detail("MachineInfoIndex", i)
.detail("Healthy", isMachineHealthy)
.detail("MachineID", machine->first.contents().toString())
.detail("MachineTeamOwned", machine->second->machineTeams.size())
.detail("ServerNumOnMachine", machine->second->serversOnMachine.size())
.detail("ServersID", machine->second->getServersIDStr())
.detail("Primary", self->primary);
machine++;
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
wait(yield());
}
}
TraceEvent("MachineTeamInfo", self->distributorId)
.detail("Size", machineTeams.size())
.detail("Primary", self->primary);
for (i = 0; i < machineTeams.size(); i++) {
const auto& team = machineTeams[i];
TraceEvent("MachineTeamInfo", self->distributorId)
.detail("TeamIndex", i)
.detail("MachineIDs", team->getMachineIDsStr())
.detail("ServerTeams", team->serverTeams.size())
.detail("Primary", self->primary);
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
wait(yield());
}
}
// TODO: re-enable the following logging or remove them.
// TraceEvent("LocalityRecordKeyName", self->distributorId)
// .detail("Size", internedLocalityRecordKeyNameStrings.size())
// .detail("Primary", self->primary);
// for (i = 0; i < internedLocalityRecordKeyNameStrings.size(); i++) {
// TraceEvent("LocalityRecordKeyIndexName", self->distributorId)
// .detail("KeyIndex", i)
// .detail("KeyName", internedLocalityRecordKeyNameStrings[i])
// .detail("Primary", self->primary);
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
// wait(yield());
// }
// }
// TraceEvent("MachineLocalityMap", self->distributorId)
// .detail("Size", machineLocalityMapEntryArraySize)
// .detail("Primary", self->primary);
// for (i = 0; i < serverIDs.size(); i++) {
// const auto& serverID = serverIDs[i];
// Reference<LocalityRecord> record = machineLocalityMapRecordArray[i];
// if (record.isValid()) {
// TraceEvent("MachineLocalityMap", self->distributorId)
// .detail("LocalityIndex", i)
// .detail("UID", serverID->toString())
// .detail("LocalityRecord", record->toString())
// .detail("Primary", self->primary);
// } else {
// TraceEvent("MachineLocalityMap", self->distributorId)
// .detail("LocalityIndex", i)
// .detail("UID", serverID->toString())
// .detail("LocalityRecord", "[NotFound]")
// .detail("Primary", self->primary);
// }
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
// wait(yield());
// }
// }
}
}
ACTOR Future<Void> removeBadTeams(DDTeamCollection* self) {
wait(self->initialFailureReactionDelay);
wait(waitUntilHealthy(self));
@ -4719,12 +4918,15 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
reportErrorsExcept(dataDistributionTeamCollection(remoteTeamCollection, initData, tcis[1],
self->dbInfo, ddEnabledState),
"DDTeamCollectionSecondary", self->ddId, &normalDDQueueErrors()));
actors.push_back(printSnapshotTeamsInfo(remoteTeamCollection));
}
primaryTeamCollection->teamCollections = teamCollectionsPtrs;
self->teamCollection = primaryTeamCollection.getPtr();
actors.push_back(reportErrorsExcept(
dataDistributionTeamCollection(primaryTeamCollection, initData, tcis[0], self->dbInfo, ddEnabledState),
"DDTeamCollectionPrimary", self->ddId, &normalDDQueueErrors()));
actors.push_back(printSnapshotTeamsInfo(primaryTeamCollection));
actors.push_back(yieldPromiseStream(output.getFuture(), input));
wait( waitForAll( actors ) );

View File

@ -243,6 +243,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( DD_SS_FAILURE_VERSIONLAG, 250000000 );
init( DD_SS_ALLOWED_VERSIONLAG, 200000000 ); if( randomize && BUGGIFY ) { DD_SS_FAILURE_VERSIONLAG = deterministicRandom()->randomInt(15000000, 500000000); DD_SS_ALLOWED_VERSIONLAG = 0.75 * DD_SS_FAILURE_VERSIONLAG; }
init( DD_SS_STUCK_TIME_LIMIT, 300.0 ); if( randomize && BUGGIFY ) { DD_SS_STUCK_TIME_LIMIT = 200.0 + deterministicRandom()->random01() * 100.0; }
init( DD_TEAMS_INFO_PRINT_INTERVAL, 60 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_INTERVAL = 10;
init( DD_TEAMS_INFO_PRINT_YIELD_COUNT, 100 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_YIELD_COUNT = deterministicRandom()->random01() * 1000 + 1;
// TeamRemover
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true

View File

@ -191,6 +191,8 @@ public:
int64_t DD_SS_FAILURE_VERSIONLAG; // Allowed SS version lag from the current read version before marking it as failed.
int64_t DD_SS_ALLOWED_VERSIONLAG; // SS will be marked as healthy if it's version lag goes below this value.
double DD_SS_STUCK_TIME_LIMIT; // If a storage server is not getting new versions for this amount of time, then it becomes undesired.
int DD_TEAMS_INFO_PRINT_INTERVAL;
int DD_TEAMS_INFO_PRINT_YIELD_COUNT;
// TeamRemover to remove redundant teams
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor

View File

@ -487,6 +487,7 @@ struct RolesInfo {
obj["data_lag"] = getLagObject(versionLag);
obj["durability_lag"] = getLagObject(version - durableVersion);
dataLagSeconds = versionLag / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
TraceEventFields const& busiestReadTag = metrics.at("BusiestReadTag");
if(busiestReadTag.size()) {

View File

@ -484,7 +484,7 @@ public:
TEST(true); // Tracking tag on storage server
double cost = costFunction(bytes);
for(auto& tag : tags.get()) {
int64_t &count = intervalCounts[TransactionTag(tag, tags.get().arena)];
int64_t& count = intervalCounts[TransactionTag(tag, tags.get().getArena())];
count += cost;
if(count > busiestTagCount) {
busiestTagCount = count;

View File

@ -332,6 +332,7 @@ struct BackupToDBCorrectnessWorkload : TestWorkload {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
// Check the left over tasks
// We have to wait for the list to empty since an abort and get status
@ -537,9 +538,11 @@ struct BackupToDBCorrectnessWorkload : TestWorkload {
TraceEvent("BARW_AbortBackupExtra", randomID).detail("BackupTag", printable(self->backupTag));
try {
wait(backupAgent.abortBackup(self->extraDB, self->backupTag));
}
catch (Error& e) {
// This abort can race with submitBackup such that destUID may
// not be set yet. Adding "waitForDestUID" flag to avoid the race.
wait(backupAgent.abortBackup(self->extraDB, self->backupTag, /*partial=*/false,
/*abortOldBackup=*/false, /*dstOnly=*/false, /*waitForDestUID*/ true));
} catch (Error& e) {
TraceEvent("BARW_AbortBackupExtraException", randomID).error(e);
if (e.code() != error_code_backup_unneeded)
throw;

View File

@ -143,6 +143,7 @@ struct BackupToDBUpgradeWorkload : TestWorkload {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
// Check the left over tasks
// We have to wait for the list to empty since an abort and get status

View File

@ -1191,6 +1191,13 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
{
op = -1;
}
// do not test the option since it's already used by the workload
if (op == FDBTransactionOptions::SPECIAL_KEY_SPACE_RELAXED)
op = -1;
// disable for now(see issue#3934, pr#3930)
if (op == FDBTransactionOptions::CHECK_WRITES_ENABLE)
op = -1;
double orv = deterministicRandom()->random01();
if (orv >= 0.25) {