Merge branch 'release-6.1' into merge-release-6.1-into-master

# Conflicts:
#	documentation/sphinx/source/release-notes.rst
#	fdbserver/DataDistribution.actor.cpp
This commit is contained in:
A.J. Beamon 2019-07-05 13:52:29 -07:00
commit 2a56e011ea
17 changed files with 392 additions and 70 deletions

View File

@ -68,6 +68,10 @@ class ResultSet(object):
self.tester_results[name] = results
@staticmethod
def _min_tuple(t1, t2):
return t1 if fdb.tuple.compare(t1, t2) < 0 else t2
def check_for_errors(self):
if len(self.tester_results) == 1:
return (0, False)
@ -97,7 +101,7 @@ class ResultSet(object):
# If these results aren't using sequence numbers, then we match two results based on whether they share the same key
else:
min_key = min([r.key(self.specification) for r in results.values()])
min_key = reduce(ResultSet._min_tuple, [r.key(self.specification) for r in results.values()])
results = {i: r for i, r in results.items() if Result.tuples_match(r.key(self.specification), min_key)}
# Increment the indices for those testers which produced a result in this iteration

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.1.10.pkg <https://www.foundationdb.org/downloads/6.1.10/macOS/installers/FoundationDB-6.1.10.pkg>`_
* `FoundationDB-6.1.11.pkg <https://www.foundationdb.org/downloads/6.1.11/macOS/installers/FoundationDB-6.1.11.pkg>`_
Ubuntu
------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.1.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.10/ubuntu/installers/foundationdb-clients_6.1.10-1_amd64.deb>`_
* `foundationdb-server-6.1.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.10/ubuntu/installers/foundationdb-server_6.1.10-1_amd64.deb>`_ (depends on the clients package)
* `foundationdb-clients-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-clients_6.1.11-1_amd64.deb>`_
* `foundationdb-server-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-server_6.1.11-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6
---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.1.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel6/installers/foundationdb-clients-6.1.10-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.1.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel6/installers/foundationdb-server-6.1.10-1.el6.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-clients-6.1.11-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-server-6.1.11-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7
---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.1.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel7/installers/foundationdb-clients-6.1.10-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.1.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel7/installers/foundationdb-server-6.1.10-1.el7.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-clients-6.1.11-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-server-6.1.11-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows
-------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.1.10-x64.msi <https://www.foundationdb.org/downloads/6.1.10/windows/installers/foundationdb-6.1.10-x64.msi>`_
* `foundationdb-6.1.11-x64.msi <https://www.foundationdb.org/downloads/6.1.11/windows/installers/foundationdb-6.1.11-x64.msi>`_
API Language Bindings
=====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
* `foundationdb-6.1.10.tar.gz <https://www.foundationdb.org/downloads/6.1.10/bindings/python/foundationdb-6.1.10.tar.gz>`_
* `foundationdb-6.1.11.tar.gz <https://www.foundationdb.org/downloads/6.1.11/bindings/python/foundationdb-6.1.11.tar.gz>`_
Ruby 1.9.3/2.0.0+
-----------------
* `fdb-6.1.10.gem <https://www.foundationdb.org/downloads/6.1.10/bindings/ruby/fdb-6.1.10.gem>`_
* `fdb-6.1.11.gem <https://www.foundationdb.org/downloads/6.1.11/bindings/ruby/fdb-6.1.11.gem>`_
Java 8+
-------
* `fdb-java-6.1.10.jar <https://www.foundationdb.org/downloads/6.1.10/bindings/java/fdb-java-6.1.10.jar>`_
* `fdb-java-6.1.10-javadoc.jar <https://www.foundationdb.org/downloads/6.1.10/bindings/java/fdb-java-6.1.10-javadoc.jar>`_
* `fdb-java-6.1.11.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11.jar>`_
* `fdb-java-6.1.11-javadoc.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11-javadoc.jar>`_
Go 1.11+
--------

View File

@ -2,6 +2,14 @@
Release Notes
#############
6.1.11
======
Fixes
-----
* Machines which were added to a cluster immediately after the cluster was upgraded to 6.1 would not be given data. `(PR #1764) <https://github.com/apple/foundationdb/pull/1764>`_
6.1.10
======

View File

@ -1309,7 +1309,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// Five steps to create each machine team, which are document in the function
// Reuse ReplicationPolicy selectReplicas func to select machine team
// return number of added machine teams
int addBestMachineTeams(int targetMachineTeamsToBuild) {
int addBestMachineTeams(int targetMachineTeamsToBuild, int remainingMachineTeamBudget) {
int addedMachineTeams = 0;
int machineTeamsToBuild = 0;
@ -1327,7 +1327,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int loopCount = 0;
// Add a team in each iteration
while (addedMachineTeams < machineTeamsToBuild) {
while (addedMachineTeams < machineTeamsToBuild || addedMachineTeams < remainingMachineTeamBudget) {
// Step 2: Get least used machines from which we choose machines as a machine team
std::vector<Reference<TCMachineInfo>> leastUsedMachines; // A less used machine has less number of teams
int minTeamCount = std::numeric_limits<int>::max();
@ -1377,6 +1377,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// that have the least-utilized server
team.clear();
auto success = machineLocalityMap.selectReplicas(configuration.storagePolicy, forcedAttributes, team);
// NOTE: selectReplicas() should always return success when storageTeamSize = 1
ASSERT_WE_THINK(configuration.storageTeamSize > 1 || (configuration.storageTeamSize == 1 && success));
if (!success) {
break;
}
@ -1430,6 +1432,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
addMachineTeam(machines);
addedMachineTeams++;
// Update the remaining machine team budget because the budget may decrease by
// any value between 1 and storageTeamSize
remainingMachineTeamBudget = getRemainingMachineTeamBudget();
} else {
TraceEvent(SevWarn, "DataDistributionBuildTeams", distributorId)
.detail("Primary", primary)
@ -1589,6 +1594,32 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
return totalHealthyMachineCount;
}
std::pair<int64_t, int64_t> calculateMinMaxServerTeamNumOnServer() {
int64_t minTeamNumber = std::numeric_limits<int64_t>::max();
int64_t maxTeamNumber = 0;
for (auto& server : server_info) {
if (server_status.get(server.first).isUnhealthy()) {
continue;
}
minTeamNumber = std::min((int64_t) server.second->teams.size(), minTeamNumber);
maxTeamNumber = std::max((int64_t) server.second->teams.size(), maxTeamNumber);
}
return std::make_pair(minTeamNumber, maxTeamNumber);
}
std::pair<int64_t, int64_t> calculateMinMaxMachineTeamNumOnMachine() {
int64_t minTeamNumber = std::numeric_limits<int64_t>::max();
int64_t maxTeamNumber = 0;
for (auto& machine : machine_info) {
if (!isMachineHealthy(machine.second)) {
continue;
}
minTeamNumber = std::min<int64_t>((int64_t) machine.second->machineTeams.size(), minTeamNumber);
maxTeamNumber = std::max<int64_t>((int64_t) machine.second->machineTeams.size(), maxTeamNumber);
}
return std::make_pair(minTeamNumber, maxTeamNumber);
}
// Sanity check
bool isServerTeamNumberCorrect(Reference<TCMachineTeamInfo>& mt) {
int num = 0;
@ -1639,12 +1670,41 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
return healthyTeamCount;
}
// Each machine is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
// remainingMachineTeamBudget is the number of machine teams needed to ensure every machine has
// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
int getRemainingMachineTeamBudget() {
int remainingMachineTeamBudget = 0;
for (auto& m : machine_info) {
int machineTeamCount = m.second->machineTeams.size();
remainingMachineTeamBudget += std::max(0, (int)(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER - machineTeamCount));
}
// We over-provision the remainingMachineTeamBudget because we do not know, when a new machine team is built,
// how many times it can be counted into the budget. For example, when a new machine is added,
// a new machine team only consume 1 such budget
return remainingMachineTeamBudget;
}
// Each server is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
int getRemainingServerTeamBudget() {
// remainingTeamBudget is the number of teams needed to ensure every server has
// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
int remainingTeamBudget = 0;
for (auto& s : server_info) {
int numValidTeams = s.second->teams.size();
remainingTeamBudget += std::max(0, (int)(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER - numValidTeams));
}
return remainingTeamBudget;
}
// Create server teams based on machine teams
// Before the number of machine teams reaches the threshold, build a machine team for each server team
// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
// build an extra machine team and record the event in trace
int addTeamsBestOf(int teamsToBuild, int desiredTeamNumber, int maxTeamNumber) {
ASSERT(teamsToBuild > 0);
int addTeamsBestOf(int teamsToBuild, int desiredTeamNumber, int maxTeamNumber, int remainingTeamBudget) {
ASSERT(teamsToBuild >= 0);
ASSERT_WE_THINK(machine_info.size() > 0 || server_info.size() == 0);
int addedMachineTeams = 0;
@ -1655,27 +1715,28 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// When we change configuration, we may have machine teams with storageTeamSize in the old configuration.
int healthyMachineTeamCount = getHealthyMachineTeamCount();
int totalMachineTeamCount = machineTeams.size();
int totalHealthyMachineCount = calculateHealthyMachineCount();
int remainingMachineTeamBudget = getRemainingMachineTeamBudget();
int desiredMachineTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * totalHealthyMachineCount;
int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
// machineTeamsToBuild mimics how the teamsToBuild is calculated in buildTeams()
int machineTeamsToBuild =
std::min(desiredMachineTeams - healthyMachineTeamCount, maxMachineTeams - totalMachineTeamCount);
int machineTeamsToBuild = std::max(
0, std::min(desiredMachineTeams - healthyMachineTeamCount, maxMachineTeams - totalMachineTeamCount));
TraceEvent("BuildMachineTeams")
.detail("TotalHealthyMachine", totalHealthyMachineCount)
.detail("HealthyMachineTeamCount", healthyMachineTeamCount)
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("MachineTeamsToBuild", machineTeamsToBuild);
.detail("MachineTeamsToBuild", machineTeamsToBuild)
.detail("RemainingMachineTeamBudget", remainingMachineTeamBudget);
// Pre-build all machine teams until we have the desired number of machine teams
if (machineTeamsToBuild > 0) {
addedMachineTeams = addBestMachineTeams(machineTeamsToBuild);
if (machineTeamsToBuild > 0 || remainingMachineTeamBudget > 0) {
addedMachineTeams = addBestMachineTeams(machineTeamsToBuild, remainingMachineTeamBudget);
}
while (addedTeams < teamsToBuild) {
while (addedTeams < teamsToBuild || addedTeams < remainingTeamBudget) {
// Step 1: Create 1 best machine team
std::vector<UID> bestServerTeam;
int bestScore = std::numeric_limits<int>::max();
@ -1752,6 +1813,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// Step 4: Add the server team
addTeam(bestServerTeam.begin(), bestServerTeam.end(), false);
addedTeams++;
remainingTeamBudget = getRemainingServerTeamBudget();
if (++loopCount > 2 * teamsToBuild * (configuration.storageTeamSize + 1)) {
break;
@ -1760,10 +1822,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
healthyMachineTeamCount = getHealthyMachineTeamCount();
std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
TraceEvent("TeamCollectionInfo", distributorId)
.detail("Primary", primary)
.detail("AddedTeamNumber", addedTeams)
.detail("AimToBuildTeamNumber", teamsToBuild)
.detail("RemainingTeamBudget", remainingTeamBudget)
.detail("CurrentTeamNumber", teams.size())
.detail("DesiredTeamNumber", desiredTeamNumber)
.detail("MaxTeamNumber", maxTeamNumber)
@ -1773,6 +1839,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachine", totalHealthyMachineCount)
.detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
.detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
.detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
.detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
.detail("DoBuildTeams", doBuildTeams)
.trackLatest("TeamCollectionInfo");
return addedTeams;
@ -1789,10 +1860,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
int healthyMachineTeamCount = getHealthyMachineTeamCount();
std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
TraceEvent("TeamCollectionInfo", distributorId)
.detail("Primary", primary)
.detail("AddedTeamNumber", 0)
.detail("AimToBuildTeamNumber", 0)
.detail("RemainingTeamBudget", 0)
.detail("CurrentTeamNumber", teams.size())
.detail("DesiredTeamNumber", desiredServerTeams)
.detail("MaxTeamNumber", maxServerTeams)
@ -1802,14 +1877,22 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachine", totalHealthyMachineCount)
.detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
.detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
.detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
.detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
.detail("DoBuildTeams", doBuildTeams)
.trackLatest("TeamCollectionInfo");
// Debug purpose
// if (healthyMachineTeamCount > desiredMachineTeams || machineTeams.size() > maxMachineTeams) {
// // When the number of machine teams is over the limit, print out the current team info.
// traceAllInfo(true);
// }
// Advance time so that we will not have multiple TeamCollectionInfo at the same time, otherwise
// simulation test will randomly pick one TeamCollectionInfo trace, which could be the one before build teams
// wait(delay(0.01));
// Debug purpose
// if (healthyMachineTeamCount > desiredMachineTeams || machineTeams.size() > maxMachineTeams) {
// // When the number of machine teams is over the limit, print out the current team info.
// traceAllInfo(true);
// }
}
// Use the current set of known processes (from server_info) to compute an optimized set of storage server teams.
@ -1856,10 +1939,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
totalTeamCount++;
}
}
// Each server is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
// remainingTeamBudget is the number of teams needed to ensure every server has
// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
int remainingTeamBudget = self->getRemainingServerTeamBudget();
// teamsToBuild is calculated such that we will not build too many teams in the situation
// when all (or most of) teams become unhealthy temporarily and then healthy again
state int teamsToBuild = std::min(desiredTeams - teamCount, maxTeams - totalTeamCount);
state int teamsToBuild = std::max(0, std::min(desiredTeams - teamCount, maxTeams - totalTeamCount));
TraceEvent("BuildTeamsBegin", self->distributorId)
.detail("TeamsToBuild", teamsToBuild)
@ -1876,13 +1963,13 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("MachineCount", self->machine_info.size())
.detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER);
if (teamsToBuild > 0) {
if (teamsToBuild > 0 || remainingTeamBudget > 0) {
state vector<std::vector<UID>> builtTeams;
// addTeamsBestOf() will not add more teams than needed.
// If the team number is more than the desired, the extra teams are added in the code path when
// a team is added as an initial team
int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams);
int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams, remainingTeamBudget);
if (addedTeams <= 0 && self->teams.size() == 0) {
TraceEvent(SevWarn, "NoTeamAfterBuildTeam")
@ -1898,10 +1985,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
int healthyMachineTeamCount = self->getHealthyMachineTeamCount();
std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = self->calculateMinMaxServerTeamNumOnServer();
std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = self->calculateMinMaxMachineTeamNumOnMachine();
TraceEvent("TeamCollectionInfo", self->distributorId)
.detail("Primary", self->primary)
.detail("AddedTeamNumber", 0)
.detail("AimToBuildTeamNumber", teamsToBuild)
.detail("RemainingTeamBudget", remainingTeamBudget)
.detail("CurrentTeamNumber", self->teams.size())
.detail("DesiredTeamNumber", desiredTeams)
.detail("MaxTeamNumber", maxTeams)
@ -1911,6 +2002,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachine", totalHealthyMachineCount)
.detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
.detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
.detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
.detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
.detail("DoBuildTeams", self->doBuildTeams)
.trackLatest("TeamCollectionInfo");
}
}
@ -2308,6 +2404,16 @@ ACTOR Future<Void> teamRemover(DDTeamCollection* self) {
team = mt->serverTeams[teamIndex];
ASSERT(team->machineTeam->machineIDs == mt->machineIDs); // Sanity check
// Check if a server will have 0 team after the team is removed
for (auto& s : team->getServers()) {
if (s->teams.size() == 0) {
TraceEvent(SevError, "TeamRemoverTooAggressive")
.detail("Server", s->id)
.detail("Team", team->getServerIDsStr());
self->traceAllInfo(true);
}
}
// The team will be marked as a bad team
bool foundTeam = self->removeTeam(team);
ASSERT(foundTeam == true);
@ -2734,12 +2840,18 @@ ACTOR Future<Void> waitHealthyZoneChange( DDTeamCollection* self ) {
if(val.present()) {
auto p = decodeHealthyZoneValue(val.get());
if(p.second > tr.getReadVersion().get()) {
healthyZoneTimeout = delay((p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND);
self->healthyZone.set(p.first);
} else {
double timeoutSeconds = (p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND;
healthyZoneTimeout = delay(timeoutSeconds);
if(self->healthyZone.get() != p.first) {
TraceEvent("MaintenanceZoneStart", self->distributorId).detail("ZoneID", printable(p.first)).detail("EndVersion", p.second).detail("Duration", timeoutSeconds);
self->healthyZone.set(p.first);
}
} else if(self->healthyZone.get().present()) {
TraceEvent("MaintenanceZoneEnd", self->distributorId);
self->healthyZone.set(Optional<Key>());
}
} else {
} else if(self->healthyZone.get().present()) {
TraceEvent("MaintenanceZoneEnd", self->distributorId);
self->healthyZone.set(Optional<Key>());
}
@ -2840,6 +2952,7 @@ ACTOR Future<Void> storageServerFailureTracker(
}
if(status->isFailed && self->healthyZone.get().present() && self->clearHealthyZoneFuture.isReady()) {
self->clearHealthyZoneFuture = clearHealthyZone(self->cx);
TraceEvent("MaintenanceZoneCleared", self->distributorId);
self->healthyZone.set(Optional<Key>());
}
@ -2953,11 +3066,14 @@ ACTOR Future<Void> storageServerTracker(
if(hasWrongStoreTypeOrDC)
self->restartRecruiting.trigger();
if ( lastIsUnhealthy && !status.isUnhealthy() && !server->teams.size() ) {
if (lastIsUnhealthy && !status.isUnhealthy() &&
server->teams.size() < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER) {
self->doBuildTeams = true;
self->restartTeamBuilder.trigger(); // This does not trigger building teams if there exist healthy teams
}
lastIsUnhealthy = status.isUnhealthy();
state bool recordTeamCollectionInfo = false;
choose {
when( wait( failureTracker ) ) {
// The server is failed AND all data has been removed from it, so permanently remove it.
@ -3061,7 +3177,8 @@ ACTOR Future<Void> storageServerTracker(
self->badTeamRemover = removeBadTeams(self);
self->addActor.send(self->badTeamRemover);
// The team number changes, so we need to update the team number info
self->traceTeamCollectionInfo();
// self->traceTeamCollectionInfo();
recordTeamCollectionInfo = true;
}
}
@ -3069,10 +3186,13 @@ ACTOR Future<Void> storageServerTracker(
// We rely on the old failureTracker being actorCancelled since the old actor now has a pointer to an invalid location
status = ServerStatus( status.isFailed, status.isUndesired, server->lastKnownInterface.locality );
// self->traceTeamCollectionInfo();
recordTeamCollectionInfo = true;
//Restart the storeTracker for the new interface
storeTracker = keyValueStoreTypeTracker(self, server);
hasWrongStoreTypeOrDC = false;
self->restartTeamBuilder.trigger();
if(restartRecruiting)
self->restartRecruiting.trigger();
}
@ -3093,6 +3213,10 @@ ACTOR Future<Void> storageServerTracker(
server->wakeUpTracker = Promise<Void>();
}
}
if (recordTeamCollectionInfo) {
self->traceTeamCollectionInfo();
}
}
} catch( Error &e ) {
if (e.code() != error_code_actor_cancelled && errorOut.canBeSet())
@ -3842,7 +3966,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/UseMachineID") {
Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(teamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize);
collection->addTeamsBestOf(30, desiredTeams, maxTeams);
collection->addTeamsBestOf(30, desiredTeams, maxTeams, 30);
ASSERT(collection->sanityCheckTeams() == true);
@ -3867,8 +3991,8 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") {
return Void();
}
collection->addBestMachineTeams(30); // Create machine teams to help debug
collection->addTeamsBestOf(30, desiredTeams, maxTeams);
collection->addBestMachineTeams(30, 30); // Create machine teams to help debug
collection->addTeamsBestOf(30, desiredTeams, maxTeams, 30);
collection->sanityCheckTeams(); // Server team may happen to be on the same machine team, although unlikely
if (collection) delete (collection);
@ -3883,7 +4007,7 @@ TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") {
state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);
int result = collection->addTeamsBestOf(200, desiredTeams, maxTeams);
int result = collection->addTeamsBestOf(200, desiredTeams, maxTeams, 200);
delete(collection);
@ -3903,11 +4027,11 @@ TEST_CASE("/DataDistribution/AddAllTeams/withLimit") {
state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);
int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams);
int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams, 10);
delete(collection);
ASSERT(result == 10);
ASSERT(result >= 10);
return Void();
}
@ -3923,9 +4047,9 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") {
collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);
int result = collection->addTeamsBestOf(8, desiredTeams, maxTeams);
int result = collection->addTeamsBestOf(8, desiredTeams, maxTeams, 8);
ASSERT(result == 8);
ASSERT(result >= 8);
for(auto process = collection->server_info.begin(); process != collection->server_info.end(); process++) {
auto teamCount = process->second->teams.size();
@ -3953,8 +4077,8 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") {
collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);
collection->addBestMachineTeams(10);
int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams);
collection->addBestMachineTeams(10, 10);
int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams, 10);
if (collection->machineTeams.size() != 10 || result != 8) {
collection->traceAllInfo(true); // Debug message

View File

@ -291,6 +291,15 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
int64_t desiredMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("DesiredMachineTeams"));
int64_t maxMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeams"));
int64_t minServerTeamOnServer =
boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinTeamNumberOnServer"));
int64_t maxServerTeamOnServer =
boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxTeamNumberOnServer"));
int64_t minMachineTeamOnMachine =
boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinMachineTeamNumberOnMachine"));
int64_t maxMachineTeamOnMachine =
boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeamNumberOnMachine"));
// Team number is always valid when we disable teamRemover. This avoids false positive in simulation test
if (SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER) {
TraceEvent("GetTeamCollectionValid")
@ -300,7 +309,10 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
// The if condition should be consistent with the condition in teamRemover() that decides
// if redundant teams exist.
if (healthyMachineTeamCount > desiredMachineTeamNumber) {
if (healthyMachineTeamCount > desiredMachineTeamNumber ||
(minMachineTeamOnMachine <= 0 && SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER == 3)) {
// When DESIRED_TEAMS_PER_SERVER == 1, we see minMachineTeamOnMachine can be 0 in one out of 30k test
// cases. Only check DESIRED_TEAMS_PER_SERVER == 3 for now since it is mostly used configuration.
TraceEvent("GetTeamCollectionValid")
.detail("CurrentTeamNumber", currentTeamNumber)
.detail("DesiredTeamNumber", desiredTeamNumber)
@ -308,7 +320,13 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
.detail("CurrentHealthyMachineTeamNumber", healthyMachineTeamCount)
.detail("DesiredMachineTeams", desiredMachineTeamNumber)
.detail("CurrentMachineTeamNumber", currentMachineTeamNumber)
.detail("MaxMachineTeams", maxMachineTeamNumber);
.detail("MaxMachineTeams", maxMachineTeamNumber)
.detail("MinTeamNumberOnServer", minServerTeamOnServer)
.detail("MaxTeamNumberOnServer", maxServerTeamOnServer)
.detail("MinMachineTeamNumberOnMachine", minMachineTeamOnMachine)
.detail("MaxMachineTeamNumberOnMachine", maxMachineTeamOnMachine)
.detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER)
.detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER);
return false;
} else {
return true;

View File

@ -70,5 +70,6 @@ ENV FDB_PORT 4500
ENV FDB_CLUSTER_FILE /var/fdb/fdb.cluster
ENV FDB_NETWORKING_MODE container
ENV FDB_COORDINATOR ""
ENV FDB_COORDINATOR_PORT 4500
ENV FDB_CLUSTER_FILE_CONTENTS ""
ENV FDB_PROCESS_CLASS unset

View File

@ -57,6 +57,13 @@ helpful when setting up a larger cluster inside a docker network, for instance
when using Docker Compose. The name you provide must be resolvable through the
DNS on the container you are running.
### FDB_COORDINATOR_PORT
The port to use for connecting to the FDB coordinator process. This should be
set by other processes in a multi-process cluster to the same value as the
`FDB_PORT` environment variable of the coordinator process. It will default
to 4500, which is also the default for `FDB_PORT`.
# Copying Into Other Images
You can also use this image to provide files for images that are clients of a
@ -68,4 +75,4 @@ files you may want to copy are:
library, which you can use if you are setting up a multiversion client.
* `/var/fdb/scripts/create_cluster_file.bash`: A script for setting up the
cluster file based on an `FDB_COORDINATOR` environment variable.
* `/usr/bin/fdbcli`: The FoundationDB CLI.
* `/usr/bin/fdbcli`: The FoundationDB CLI.

View File

@ -39,7 +39,8 @@ function create_cluster_file() {
echo "Failed to look up coordinator address for $FDB_COORDINATOR" 1>&2
exit 1
fi
echo "docker:docker@$coordinator_ip:4500" > $FDB_CLUSTER_FILE
coordinator_port=${FDB_COORDINATOR_PORT:-4500}
echo "docker:docker@$coordinator_ip:$coordinator_port" > $FDB_CLUSTER_FILE
else
echo "FDB_COORDINATOR environment variable not defined" 1>&2
exit 1
@ -47,5 +48,5 @@ function create_cluster_file() {
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
create_cluster_file "$@"
fi
create_cluster_file "$@"
fi

View File

@ -43,4 +43,4 @@ function create_server_environment() {
fi
create_cluster_file
}
}

View File

@ -23,7 +23,7 @@
source /var/fdb/scripts/create_server_environment.bash
create_server_environment
source /var/fdb/.fdbenv
echo "Starting FDB server on $PUBLIC_IP:4500"
fdbserver --listen_address 0.0.0.0:$FDB_PORT --public_address $PUBLIC_IP:4500 \
echo "Starting FDB server on $PUBLIC_IP:$FDB_PORT"
fdbserver --listen_address 0.0.0.0:$FDB_PORT --public_address $PUBLIC_IP:$FDB_PORT \
--datadir /var/fdb/data --logdir /var/fdb/logs \
--locality_zoneid=`hostname` --locality_machineid=`hostname` --class $FDB_PROCESS_CLASS
--locality_zoneid=`hostname` --locality_machineid=`hostname` --class $FDB_PROCESS_CLASS

View File

@ -0,0 +1,45 @@
# Local Docker-based FoundationDB Cluster
This contains a sample `docker-compose.yaml` and some simple startup and teardown
scripts for running a simple single-instance FoundationDB using the Docker image
specified in this repository. This uses the `host` networking option to expose
the server process to its host machine.
This depends on having the FoundationDB client installed on your host machine
to work properly. This can be done using one of the client packages available
on our [Download](https://www.foundationdb.org/download/) page. The startup
scripts included here depend on `fdbcli` from one of those packages, and any
client that wishes to connect will need a copy of the FoundationDB native client
in addition to its binding of choice. Both the CLI and the native client
are installed in all of our client packages
Once those dependencies are installed, one can build the FoundationDB Docker
image:
```
docker build --build-arg FDB_VERSION=6.1.8 -t foundationdb:6.1.8 ../..
```
Then one can start the cluster by running:
```
./start.bash
```
This starts up a single instance FoundationDB cluster using the `docker-compose.yaml`
and configures it as a new database. This will write the cluster file information to
`docker.cluster`. One should then be able to access the cluster through the CLI
or one of the bindings by using this cluster file. For example:
```
fdbcli --exec status -C docker.cluster
```
To stop the cluster, one can run:
```
./stop.bash
```
Note that all data are lost between reboots of the processes as they have not
been configured to use a persistent volume (but write to Docker's temporary file system).

View File

@ -0,0 +1,32 @@
# docker-compose.yaml
#
# This source file is part of the FoundationDB open source project
#
# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Specification for a one node cluster than can be accessed from the host.
# The user must specify the FDB_PORT on which it is run.
version: '3'
services:
fdb:
image: foundationdb:6.1.8
ports:
- $FDB_PORT:$FDB_PORT/tcp
environment:
FDB_NETWORKING_MODE: host
FDB_COORDINATOR_PORT: $FDB_PORT
FDB_PORT: $FDB_PORT

View File

@ -0,0 +1,39 @@
#! /bin/bash
#
# start.bash
#
# This source file is part of the FoundationDB open source project
#
# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -eu
FDB_CLUSTER_FILE="${FDB_CLUSTER_FILE:-docker.cluster}"
FDB_PORT="${FDB_PORT:-4550}"
FDB_PORT=$FDB_PORT docker-compose up -d fdb
echo "docker:docker@127.0.0.1:$FDB_PORT" > $FDB_CLUSTER_FILE
# Attempt to connect. Configure the database if necessary.
if ! fdbcli -C $FDB_CLUSTER_FILE --exec status --timeout 1 ; then
if ! fdbcli -C $FDB_CLUSTER_FILE --exec "configure new single memory ; status" --timeout 10 ; then
echo "Unable to configure new FDB cluster."
exit 1
fi
fi
echo "Can now connect to docker-based FDB cluster using $FDB_CLUSTER_FILE."

View File

@ -0,0 +1,28 @@
#! /bin/bash
#
# stop.bash
#
# This source file is part of the FoundationDB open source project
#
# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -eu
FDB_PORT="${FDB_PORT:-4550}"
FDB_PORT=$FDB_PORT docker-compose down
echo "Docker-based FDB cluster is now down."

View File

@ -24,9 +24,9 @@ RUN apt-get update; apt-get install -y dnsutils
RUN mkdir -p /app
WORKDIR /app
COPY --from=foundationdb:5.2.5 /usr/lib/libfdb_c.so /usr/lib
COPY --from=foundationdb:5.2.5 /usr/bin/fdbcli /usr/bin/
COPY --from=foundationdb:5.2.5 /var/fdb/scripts/create_cluster_file.bash /app
COPY --from=foundationdb:6.1.8 /usr/lib/libfdb_c.so /usr/lib
COPY --from=foundationdb:6.1.8 /usr/bin/fdbcli /usr/bin/
COPY --from=foundationdb:6.1.8 /var/fdb/scripts/create_cluster_file.bash /app
COPY requirements.txt /app
RUN pip install -r requirements.txt
@ -38,4 +38,4 @@ RUN chmod u+x /app/start.bash
CMD /app/start.bash
ENV FLASK_APP=server.py
ENV FLASK_ENV=development
ENV FLASK_ENV=development

View File

@ -19,18 +19,33 @@
version: '3'
services:
fdb:
image: foundationdb:5.2.5
environment:
FDB_COORDINATOR: fdb-coordinator
# Specify three fdbserver processes.
fdb-coordinator:
image: foundationdb:5.2.5
image: foundationdb:6.1.8
environment:
FDB_COORDINATOR: fdb-coordinator
fdb-server-1:
depends_on:
- fdb-coordinator
image: foundationdb:6.1.8
environment:
FDB_COORDINATOR: fdb-coordinator
fdb-server-2:
depends_on:
- fdb-coordinator
image: foundationdb:6.1.8
environment:
FDB_COORDINATOR: fdb-coordinator
# Bring up the application so that it depends on the cluster.
app:
depends_on:
- fdb-coordinator
- fdb-server-1
- fdb-server-2
build:
context: app
ports:
- 5000:5000
- 5000:5000/tcp
environment:
FDB_COORDINATOR: fdb-coordinator
FDB_COORDINATOR: fdb-coordinator

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)'
Id='{001349F6-30BD-4854-ABD6-A8D30CB31677}'
Id='{8E0DAD6E-4CA7-45A0-9D24-BA18FFC47547}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)'