Merge branch 'master' of github.com:apple/foundationdb into feature-redwood

This commit is contained in:
Stephen Atherton 2018-06-18 22:45:27 -07:00
commit e5c48d453a
35 changed files with 336 additions and 214 deletions

View File

@ -61,10 +61,10 @@ extern "C" {
#endif
/* Pointers to these opaque types represent objects in the FDB API */
typedef struct future FDBFuture;
typedef struct cluster FDBCluster;
typedef struct database FDBDatabase;
typedef struct transaction FDBTransaction;
typedef struct FDB_future FDBFuture;
typedef struct FDB_cluster FDBCluster;
typedef struct FDB_database FDBDatabase;
typedef struct FDB_transaction FDBTransaction;
typedef int fdb_error_t;
typedef int fdb_bool_t;

View File

@ -26,11 +26,15 @@ CLEAN_TARGETS += GENNAME()_clean
GENNAME()_ALL_SOURCES := $(addprefix GENDIR/,GENSOURCES)
GENNAME()_BUILD_SOURCES := $(patsubst %.actor.cpp,%.actor.g.cpp,$(filter-out %.h %.hpp,$(GENNAME()_ALL_SOURCES)))
GENNAME()_GENERATED_SOURCES := $(patsubst %.actor.h,%.actor.g.h,$(patsubst %.actor.cpp,%.actor.g.cpp,$(filter %.actor.h %.actor.cpp,$(GENNAME()_ALL_SOURCES))))
GENNAME()_GENERATED_SOURCES := $(patsubst %.actor.h,%.actor.g.h,$(patsubst %.actor.cpp,${OBJDIR}/%.actor.g.cpp,$(filter %.actor.h %.actor.cpp,$(GENNAME()_ALL_SOURCES))))
GENERATED_SOURCES += $(GENNAME()_GENERATED_SOURCES)
-include GENDIR/local.mk
# We need to include the current directory for .g.actor.cpp files emitted into
# .objs that use includes not based at the root of fdb.
GENNAME()_CFLAGS := -I GENDIR -I ${OBJDIR}/GENDIR ${GENNAME()_CFLAGS}
# If we have any static libs, we have to wrap them in the appropriate
# compiler flag magic
ifeq ($(GENNAME()_STATIC_LIBS),)
@ -54,30 +58,34 @@ GENNAME: GENTARGET
-include $(GENNAME()_DEPS)
GENDIR/%.actor.g.cpp: GENDIR/%.actor.cpp $(ACTORCOMPILER)
$(OBJDIR)/GENDIR/%.actor.g.cpp: GENDIR/%.actor.cpp $(ACTORCOMPILER)
@echo "Actorcompiling $<"
@mkdir -p $(OBJDIR)/$(<D)
@$(MONO) $(ACTORCOMPILER) $< $@ >/dev/null
GENDIR/%.actor.g.h: GENDIR/%.actor.h $(ACTORCOMPILER)
@if [ -e $< ]; then echo "Actorcompiling $<" ; $(MONO) $(ACTORCOMPILER) $< $@ >/dev/null ; fi
.PRECIOUS: GENDIR/%.actor.g.cpp GENDIR/%.actor.g.h
.PRECIOUS: $(OBJDIR)/GENDIR/%.actor.g.cpp GENDIR/%.actor.g.h
# The order-only dependency on the generated .h files is to force make
# to actor compile all headers before attempting compilation of any .c
# or .cpp files. We have no mechanism to detect dependencies on
# generated headers before compilation.
define run-gplusplus-GENNAME() =
@mkdir -p $(DEPSDIR)/$(<D) && \
mkdir -p $(OBJDIR)/$(<D) && \
$(CCACHE_CXX) $(CFLAGS) $(CXXFLAGS) $(GENNAME()_CFLAGS) $(GENNAME()_CXXFLAGS) -MMD -MT $@ -MF $(DEPSDIR)/$<.d.tmp -c $< -o $@ && \
cp $(DEPSDIR)/$<.d.tmp $(DEPSDIR)/$<.d && \
sed -e 's/#.*//' -e 's/^[^:]*: *//' -e 's/ *\\$$//' -e '/^$$/ d' -e 's/$$/ :/' < $(DEPSDIR)/$<.d.tmp >> $(DEPSDIR)/$<.d && \
rm $(DEPSDIR)/$<.d.tmp
endef
$(OBJDIR)/GENDIR/%.cpp.o: GENDIR/%.cpp $(ALL_MAKEFILES) | $(filter %.h,$(GENERATED_SOURCES))
@echo "Compiling $<"
ifeq ($(VERBOSE),1)
@echo "$(CCACHE_CXX) $(CFLAGS) $(CXXFLAGS) $(GENNAME()_CFLAGS) $(GENNAME()_CXXFLAGS) -MMD -MT $@ -MF $(DEPSDIR)/$<.d.tmp -c $< -o $@"
endif
@mkdir -p $(DEPSDIR)/$(<D) && \
mkdir -p $(OBJDIR)/$(<D) && \
$(CCACHE_CXX) $(CFLAGS) $(CXXFLAGS) $(GENNAME()_CFLAGS) $(GENNAME()_CXXFLAGS) -MMD -MT $@ -MF $(DEPSDIR)/$<.d.tmp -c $< -o $@ && \
cp $(DEPSDIR)/$<.d.tmp $(DEPSDIR)/$<.d && \
sed -e 's/#.*//' -e 's/^[^:]*: *//' -e 's/ *\\$$//' -e '/^$$/ d' -e 's/$$/ :/' < $(DEPSDIR)/$<.d.tmp >> $(DEPSDIR)/$<.d && \
rm $(DEPSDIR)/$<.d.tmp
${run-gplusplus-GENNAME()}
$(OBJDIR)/GENDIR/%.cpp.o: $(OBJDIR)/GENDIR/%.cpp $(ALL_MAKEFILES) | $(filter %.h,$(GENERATED_SOURCES))
${run-gplusplus-GENNAME()}
$(OBJDIR)/GENDIR/%.c.o: GENDIR/%.c $(ALL_MAKEFILES) | $(filter %.h,$(GENERATED_SOURCES))
@echo "Compiling $<"

View File

@ -361,7 +361,7 @@
"remote_redundancy_mode":"remote_single",
"remote_log_replicas":3,
"remote_logs":5,
"storage_quorum":1,
"usable_regions":1,
"storage_replicas":1,
"resolvers":1,
"storage_replication_policy":"(zoneid^3x1)",

View File

@ -25,6 +25,11 @@ Fixes
* Watches registered on a lagging storage server would take a long time to trigger.
* The cluster controller would not start a new generation until it recovered its files from disk.
Status
------
* The replication factor in status JSON is stored under "redundancy_mode" instead of "redundancy":"factor". `(PR #492) <https://github.com/apple/foundationdb/pull/492>`_
Other Changes
-------------

View File

@ -29,14 +29,14 @@ DatabaseConfiguration::DatabaseConfiguration()
void DatabaseConfiguration::resetInternal() {
// does NOT reset rawConfiguration
initialized = false;
masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = durableStorageQuorum = storageTeamSize = -1;
masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = -1;
tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END;
autoMasterProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES;
autoResolverCount = CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS;
autoDesiredTLogCount = CLIENT_KNOBS->DEFAULT_AUTO_LOGS;
usableRegions = 1;
regions.clear();
tLogPolicy = storagePolicy = remoteTLogPolicy = IRepPolicyRef();
remoteDesiredTLogCount = -1;
remoteTLogReplicationFactor = 0;
}
@ -144,12 +144,10 @@ bool DatabaseConfiguration::isValid() const {
if( !(initialized &&
tLogWriteAntiQuorum >= 0 &&
tLogReplicationFactor >= 1 &&
durableStorageQuorum >= 1 &&
storageTeamSize >= 1 &&
getDesiredProxies() >= 1 &&
getDesiredLogs() >= 1 &&
getDesiredResolvers() >= 1 &&
durableStorageQuorum <= storageTeamSize &&
tLogDataStoreType != KeyValueStoreType::END &&
storageServerStoreType != KeyValueStoreType::END &&
autoMasterProxyCount >= 1 &&
@ -159,8 +157,10 @@ bool DatabaseConfiguration::isValid() const {
tLogPolicy &&
getDesiredRemoteLogs() >= 1 &&
remoteTLogReplicationFactor >= 0 &&
usableRegions >= 1 &&
usableRegions <= 2 &&
regions.size() <= 2 &&
( remoteTLogReplicationFactor == 0 || ( remoteTLogPolicy && regions.size() == 2 && durableStorageQuorum == storageTeamSize ) ) &&
( usableRegions == 1 || regions.size() == 2 ) &&
( regions.size() == 0 || regions[0].priority >= 0 ) ) ) {
return false;
}
@ -194,16 +194,16 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
std::string tlogInfo = tLogPolicy->info();
std::string storageInfo = storagePolicy->info();
bool customRedundancy = false;
if( durableStorageQuorum == storageTeamSize && tLogWriteAntiQuorum == 0 ) {
if( tLogReplicationFactor == 1 && durableStorageQuorum == 1 ) {
if( tLogWriteAntiQuorum == 0 ) {
if( tLogReplicationFactor == 1 && storageTeamSize == 1 ) {
result["redundancy_mode"] = "single";
} else if( tLogReplicationFactor == 2 && durableStorageQuorum == 2 ) {
} else if( tLogReplicationFactor == 2 && storageTeamSize == 2 ) {
result["redundancy_mode"] = "double";
} else if( tLogReplicationFactor == 4 && durableStorageQuorum == 6 && tlogInfo == "dcid^2 x zoneid^2 x 1" && storageInfo == "dcid^3 x zoneid^2 x 1" ) {
} else if( tLogReplicationFactor == 4 && storageTeamSize == 6 && tlogInfo == "dcid^2 x zoneid^2 x 1" && storageInfo == "dcid^3 x zoneid^2 x 1" ) {
result["redundancy_mode"] = "three_datacenter";
} else if( tLogReplicationFactor == 3 && durableStorageQuorum == 3 ) {
} else if( tLogReplicationFactor == 3 && storageTeamSize == 3 ) {
result["redundancy_mode"] = "triple";
} else if( tLogReplicationFactor == 4 && durableStorageQuorum == 3 && tlogInfo == "data_hall^2 x zoneid^2 x 1" && storageInfo == "data_hall^3 x 1" ) {
} else if( tLogReplicationFactor == 4 && storageTeamSize == 3 && tlogInfo == "data_hall^2 x zoneid^2 x 1" && storageInfo == "data_hall^3 x 1" ) {
result["redundancy_mode"] = "three_data_hall";
} else {
customRedundancy = true;
@ -214,7 +214,6 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
if(customRedundancy) {
result["storage_replicas"] = storageTeamSize;
result["storage_quorum"] = durableStorageQuorum;
result["log_replicas"] = tLogReplicationFactor;
result["log_anti_quorum"] = tLogWriteAntiQuorum;
if(!noPolicies) result["storage_replication_policy"] = storagePolicy->info();
@ -233,18 +232,17 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
result["storage_engine"] = "custom";
}
if( remoteTLogReplicationFactor == 0 ) {
result["remote_redundancy_mode"] = "remote_none";
} else if( remoteTLogReplicationFactor == 1 ) {
if( remoteTLogReplicationFactor == 1 ) {
result["remote_redundancy_mode"] = "remote_single";
} else if( remoteTLogReplicationFactor == 2 ) {
result["remote_redundancy_mode"] = "remote_double";
} else if( remoteTLogReplicationFactor == 3 ) {
result["remote_redundancy_mode"] = "remote_triple";
} else {
} else if( remoteTLogReplicationFactor > 3 ) {
result["remote_log_replicas"] = remoteTLogReplicationFactor;
if(noPolicies && remoteTLogPolicy) result["remote_log_policy"] = remoteTLogPolicy->info();
}
result["usable_regions"] = usableRegions;
if(regions.size()) {
StatusArray regionArr;
@ -334,7 +332,6 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
else if (ck == LiteralStringRef("logs")) parse(&desiredTLogCount, value);
else if (ck == LiteralStringRef("log_replicas")) parse(&tLogReplicationFactor, value);
else if (ck == LiteralStringRef("log_anti_quorum")) parse(&tLogWriteAntiQuorum, value);
else if (ck == LiteralStringRef("storage_quorum")) parse(&durableStorageQuorum, value);
else if (ck == LiteralStringRef("storage_replicas")) parse(&storageTeamSize, value);
else if (ck == LiteralStringRef("log_engine")) { parse((&type), value); tLogDataStoreType = (KeyValueStoreType::StoreType)type;
// TODO: Remove this once Redwood works as a log engine
@ -350,6 +347,7 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
else if (ck == LiteralStringRef("remote_logs")) parse(&remoteDesiredTLogCount, value);
else if (ck == LiteralStringRef("remote_log_replicas")) parse(&remoteTLogReplicationFactor, value);
else if (ck == LiteralStringRef("remote_log_policy")) parseReplicationPolicy(&remoteTLogPolicy, value);
else if (ck == LiteralStringRef("usable_regions")) parse(&usableRegions, value);
else if (ck == LiteralStringRef("regions")) parse(&regions, value);
else return false;
return true; // All of the above options currently require recovery to take effect

View File

@ -102,13 +102,12 @@ struct DatabaseConfiguration {
result++;
}
if(remoteTLogReplicationFactor > 0) {
if(usableRegions > 1) {
result++;
}
return result;
}
// SOMEDAY: think about changing storageTeamSize to durableStorageQuorum
int32_t minDatacentersRequired() const {
int minRequired = 0;
for(auto& r : regions) {
@ -130,12 +129,12 @@ struct DatabaseConfiguration {
for(auto& r : regions) {
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
}
if(remoteTLogReplicationFactor > 0 && worstSatellite > 0) {
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), durableStorageQuorum - 1);
if(usableRegions > 1 && worstSatellite > 0) {
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1);
} else if(worstSatellite > 0) {
return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, durableStorageQuorum - 1);
return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, storageTeamSize - 1);
}
return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, durableStorageQuorum - 1);
return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1);
}
// MasterProxy Servers
@ -156,7 +155,6 @@ struct DatabaseConfiguration {
// Storage Servers
IRepPolicyRef storagePolicy;
int32_t durableStorageQuorum;
int32_t storageTeamSize;
KeyValueStoreType storageServerStoreType;
@ -166,6 +164,7 @@ struct DatabaseConfiguration {
IRepPolicyRef remoteTLogPolicy;
//Data centers
int32_t usableRegions;
std::vector<RegionInfo> regions;
// Excluded servers (no state should be here)
@ -175,11 +174,13 @@ struct DatabaseConfiguration {
int32_t getDesiredProxies() const { if(masterProxyCount == -1) return autoMasterProxyCount; return masterProxyCount; }
int32_t getDesiredResolvers() const { if(resolverCount == -1) return autoResolverCount; return resolverCount; }
int32_t getDesiredLogs() const { if(desiredTLogCount == -1) return autoDesiredTLogCount; return desiredTLogCount; }
int32_t getDesiredRemoteLogs() const { if(remoteDesiredTLogCount == -1) return getDesiredLogs(); return remoteDesiredTLogCount; }
int32_t getDesiredRemoteLogs() const { if(remoteDesiredTLogCount == -1) return getDesiredLogs(); return remoteDesiredTLogCount; }
int32_t getDesiredSatelliteLogs( Optional<Key> dcId ) const {
auto desired = getRegion(dcId).satelliteDesiredTLogCount;
if(desired == -1) return autoDesiredTLogCount; return desired;
}
int32_t getRemoteTLogReplicationFactor() const { if(remoteTLogReplicationFactor == 0) return tLogReplicationFactor; return remoteTLogReplicationFactor; }
IRepPolicyRef getRemoteTLogPolicy() const { if(remoteTLogReplicationFactor == 0) return tLogPolicy; return remoteTLogPolicy; }
bool operator == ( DatabaseConfiguration const& rhs ) const {
const_cast<DatabaseConfiguration*>(this)->makeConfigurationImmutable();

View File

@ -65,7 +65,7 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
std::string key = mode.substr(0, pos);
std::string value = mode.substr(pos+1);
if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "satellite_logs") && isInteger(value) ) {
if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "satellite_logs" || key == "usable_regions") && isInteger(value) ) {
out[p+key] = value;
}
@ -135,8 +135,7 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
} else
redundancySpecified = false;
if (redundancySpecified) {
out[p+"storage_replicas"] =
out[p+"storage_quorum"] = redundancy;
out[p+"storage_replicas"] = redundancy;
out[p+"log_replicas"] = log_replicas;
out[p+"log_anti_quorum"] = "0";
@ -153,7 +152,7 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
std::string remote_redundancy, remote_log_replicas;
IRepPolicyRef remoteTLogPolicy;
bool remoteRedundancySpecified = true;
if (mode == "remote_none") {
if (mode == "remote_default") {
remote_redundancy="0";
remote_log_replicas="0";
remoteTLogPolicy = IRepPolicyRef();
@ -244,7 +243,6 @@ bool isCompleteConfiguration( std::map<std::string, std::string> const& options
return options.count( p+"log_replicas" ) == 1 &&
options.count( p+"log_anti_quorum" ) == 1 &&
options.count( p+"storage_quorum" ) == 1 &&
options.count( p+"storage_replicas" ) == 1 &&
options.count( p+"log_engine" ) == 1 &&
options.count( p+"storage_engine" ) == 1;
@ -1212,7 +1210,7 @@ ACTOR Future<Void> waitForFullReplication( Database cx ) {
}
}
if( !watchFutures.size() || (config.remoteTLogReplicationFactor == 0 && watchFutures.size() < config.regions.size())) {
if( !watchFutures.size() || (config.usableRegions == 1 && watchFutures.size() < config.regions.size())) {
return Void();
}

View File

@ -1092,7 +1092,7 @@ public:
bool tooManyDead = false;
bool notEnoughLeft = false;
bool primaryTLogsDead = tLogWriteAntiQuorum ? !validateAllCombinations(badCombo, primaryProcessesDead, tLogPolicy, primaryLocalitiesLeft, tLogWriteAntiQuorum, false) : primaryProcessesDead.validate(tLogPolicy);
if(remoteTLogPolicy && !primaryTLogsDead) {
if(usableRegions > 1 && remoteTLogPolicy && !primaryTLogsDead) {
primaryTLogsDead = primaryProcessesDead.validate(remoteTLogPolicy);
}
@ -1101,13 +1101,13 @@ public:
notEnoughLeft = !primaryProcessesLeft.validate(tLogPolicy) || !primaryProcessesLeft.validate(storagePolicy);
} else {
bool remoteTLogsDead = tLogWriteAntiQuorum ? !validateAllCombinations(badCombo, remoteProcessesDead, tLogPolicy, remoteLocalitiesLeft, tLogWriteAntiQuorum, false) : remoteProcessesDead.validate(tLogPolicy);
if(remoteTLogPolicy && !remoteTLogsDead) {
if(usableRegions > 1 && remoteTLogPolicy && !remoteTLogsDead) {
remoteTLogsDead = remoteProcessesDead.validate(remoteTLogPolicy);
}
if(!hasSatelliteReplication) {
notEnoughLeft = ( !primaryProcessesLeft.validate(tLogPolicy) || !primaryProcessesLeft.validate(storagePolicy) ) && ( !remoteProcessesLeft.validate(tLogPolicy) || !remoteProcessesLeft.validate(storagePolicy) );
if(hasRemoteReplication) {
if(usableRegions > 1) {
tooManyDead = primaryTLogsDead || remoteTLogsDead || ( primaryProcessesDead.validate(storagePolicy) && remoteProcessesDead.validate(storagePolicy) );
} else {
tooManyDead = primaryTLogsDead || remoteTLogsDead || primaryProcessesDead.validate(storagePolicy) || remoteProcessesDead.validate(storagePolicy);
@ -1117,7 +1117,7 @@ public:
bool remoteSatelliteTLogsDead = satelliteTLogWriteAntiQuorum ? !validateAllCombinations(badCombo, remoteSatelliteProcessesDead, satelliteTLogPolicy, remoteSatelliteLocalitiesLeft, satelliteTLogWriteAntiQuorum, false) : remoteSatelliteProcessesDead.validate(satelliteTLogPolicy);
notEnoughLeft = ( !primaryProcessesLeft.validate(tLogPolicy) || !primaryProcessesLeft.validate(storagePolicy) || !primarySatelliteProcessesLeft.validate(satelliteTLogPolicy) ) && ( !remoteProcessesLeft.validate(tLogPolicy) || !remoteProcessesLeft.validate(storagePolicy) || !remoteSatelliteProcessesLeft.validate(satelliteTLogPolicy) );
if(hasRemoteReplication) {
if(usableRegions > 1) {
tooManyDead = ( primaryTLogsDead && primarySatelliteTLogsDead ) || ( remoteTLogsDead && remoteSatelliteTLogsDead ) || ( primaryProcessesDead.validate(storagePolicy) && remoteProcessesDead.validate(storagePolicy) );
} else {
tooManyDead = ( primaryTLogsDead && primarySatelliteTLogsDead ) || ( remoteTLogsDead && remoteSatelliteTLogsDead ) || primaryProcessesDead.validate(storagePolicy) || remoteProcessesDead.validate(storagePolicy);

View File

@ -280,8 +280,8 @@ public:
IRepPolicyRef tLogPolicy;
int32_t tLogWriteAntiQuorum;
Optional<Standalone<StringRef>> primaryDcId;
bool hasRemoteReplication;
IRepPolicyRef remoteTLogPolicy;
int32_t usableRegions;
Optional<Standalone<StringRef>> remoteDcId;
bool hasSatelliteReplication;
IRepPolicyRef satelliteTLogPolicy;

View File

@ -459,8 +459,9 @@ public:
std::set<Optional<Key>> remoteDC;
remoteDC.insert(req.dcId);
auto remoteLogs = getWorkersForTlogs( req.configuration, req.configuration.remoteTLogReplicationFactor, req.configuration.getDesiredRemoteLogs(), req.configuration.remoteTLogPolicy, id_used, false, remoteDC );
auto remoteLogs = getWorkersForTlogs( req.configuration, req.configuration.getRemoteTLogReplicationFactor(), req.configuration.getDesiredRemoteLogs(), req.configuration.getRemoteTLogPolicy(), id_used, false, remoteDC );
for(int i = 0; i < remoteLogs.size(); i++) {
result.remoteTLogs.push_back(remoteLogs[i].first);
}
@ -890,12 +891,12 @@ public:
if(oldSatelliteTLogFit < newSatelliteTLogFit) return false;
RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog);
RoleFitness newRemoteTLogFit((db.config.remoteTLogReplicationFactor > 0 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForTlogs(db.config, db.config.remoteTLogReplicationFactor, db.config.getDesiredRemoteLogs(), db.config.remoteTLogPolicy, id_used, true, remoteDC) : remote_tlogs, ProcessClass::TLog);
RoleFitness newRemoteTLogFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForTlogs(db.config, db.config.getRemoteTLogReplicationFactor(), db.config.getDesiredRemoteLogs(), db.config.getRemoteTLogPolicy(), id_used, true, remoteDC) : remote_tlogs, ProcessClass::TLog);
if(oldRemoteTLogFit < newRemoteTLogFit) return false;
RoleFitness oldLogRoutersFit(log_routers, ProcessClass::LogRouter);
RoleFitness newLogRoutersFit((db.config.remoteTLogReplicationFactor > 0 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newTLogFit.count, db.config, id_used, Optional<WorkerFitnessInfo>(), true ) : log_routers, ProcessClass::LogRouter);
RoleFitness newLogRoutersFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newTLogFit.count, db.config, id_used, Optional<WorkerFitnessInfo>(), true ) : log_routers, ProcessClass::LogRouter);
if(oldLogRoutersFit.count < oldTLogFit.count) {
oldLogRoutersFit.worstFit = ProcessClass::NeverAssign;
@ -1942,7 +1943,7 @@ ACTOR Future<Void> updatedChangedDatacenters(ClusterControllerData *self) {
ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *self ) {
loop {
self->versionDifferenceUpdated = false;
if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.remoteTLogReplicationFactor == 0) {
if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.usableRegions == 1) {
self->versionDifferenceUpdated = true;
self->datacenterVersionDifference = 0;
Void _ = wait(self->db.serverInfo->onChange());

View File

@ -43,17 +43,18 @@ struct CoreTLogSet {
bool isLocal;
int8_t locality;
Version startVersion;
std::vector<std::vector<int>> satelliteTagLocations;
CoreTLogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), locality(tagLocalityUpgraded), startVersion(invalidVersion) {}
bool operator == (CoreTLogSet const& rhs) const {
return tLogs == rhs.tLogs && tLogWriteAntiQuorum == rhs.tLogWriteAntiQuorum && tLogReplicationFactor == rhs.tLogReplicationFactor && isLocal == rhs.isLocal &&
return tLogs == rhs.tLogs && tLogWriteAntiQuorum == rhs.tLogWriteAntiQuorum && tLogReplicationFactor == rhs.tLogReplicationFactor && isLocal == rhs.isLocal && satelliteTagLocations == rhs.satelliteTagLocations &&
locality == rhs.locality && startVersion == rhs.startVersion && ((!tLogPolicy && !rhs.tLogPolicy) || (tLogPolicy && rhs.tLogPolicy && (tLogPolicy->info() == rhs.tLogPolicy->info())));
}
template <class Archive>
void serialize(Archive& ar) {
ar & tLogs & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & locality & startVersion;
ar & tLogs & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & locality & startVersion & satelliteTagLocations;
}
};

View File

@ -484,7 +484,7 @@ Future<Void> storageServerTracker(
MoveKeysLock const& lock,
UID const& masterId,
std::map<UID, Reference<TCServerInfo>>* const& other_servers,
PromiseStream< std::pair<UID, Optional<StorageServerInterface>> > const& changes,
Optional<PromiseStream< std::pair<UID, Optional<StorageServerInterface>> >> const& changes,
Promise<Void> const& errorOut,
Version const& addedVersion);
@ -513,7 +513,7 @@ struct DDTeamCollection {
PromiseStream<UID> removedServers;
std::set<UID> recruitingIds; // The IDs of the SS which are being recruited
std::set<NetworkAddress> recruitingLocalities;
PromiseStream< std::pair<UID, Optional<StorageServerInterface>> > serverChanges;
Optional<PromiseStream< std::pair<UID, Optional<StorageServerInterface>> >> serverChanges;
Future<Void> initialFailureReactionDelay;
Future<Void> initializationDoneActor;
Promise<Void> serverTrackerErrorOut;
@ -544,7 +544,7 @@ struct DDTeamCollection {
DatabaseConfiguration configuration,
std::vector<Optional<Key>> includedDCs,
Optional<std::vector<Optional<Key>>> otherTrackedDCs,
PromiseStream< std::pair<UID, Optional<StorageServerInterface>> > const& serverChanges,
Optional<PromiseStream< std::pair<UID, Optional<StorageServerInterface>> >> const& serverChanges,
Future<Void> readyToStart, Reference<AsyncVar<bool>> zeroHealthyTeams, bool primary,
Reference<AsyncVar<bool>> processingUnhealthy)
:cx(cx), masterId(masterId), lock(lock), output(output), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams( true ), teamBuilder( Void() ),
@ -553,7 +553,7 @@ struct DDTeamCollection {
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)), optimalTeamCount( 0 ), recruitingStream(0), restartRecruiting( SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY ),
unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs), zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), processingUnhealthy(processingUnhealthy)
{
if(!primary || configuration.remoteTLogReplicationFactor <= 0) {
if(!primary || configuration.usableRegions == 1) {
TraceEvent("DDTrackerStarting", masterId)
.detail( "State", "Inactive" )
.trackLatest( format("%s/DDTrackerStarting", printable(cx->dbName).c_str() ).c_str() );
@ -579,7 +579,7 @@ struct DDTeamCollection {
Void _ = wait(signal);
Void _ = wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskDataDistribution));
if(!self->primary || self->configuration.remoteTLogReplicationFactor <= 0) {
if(!self->primary || self->configuration.usableRegions == 1) {
TraceEvent("DDTrackerStarting", self->masterId)
.detail( "State", "Active" )
.trackLatest( format("%s/DDTrackerStarting", printable(self->cx->dbName).c_str() ).c_str() );
@ -1578,7 +1578,7 @@ ACTOR Future<Void> storageServerTracker(
MoveKeysLock lock,
UID masterId,
std::map<UID, Reference<TCServerInfo>>* other_servers,
PromiseStream< std::pair<UID, Optional<StorageServerInterface>> > changes,
Optional<PromiseStream< std::pair<UID, Optional<StorageServerInterface>> >> changes,
Promise<Void> errorOut,
Version addedVersion)
{
@ -1593,7 +1593,9 @@ ACTOR Future<Void> storageServerTracker(
state Future<KeyValueStoreType> storeTracker = keyValueStoreTypeTracker( self, server );
state bool hasWrongStoreTypeOrDC = false;
changes.send( std::make_pair(server->id, server->lastKnownInterface) );
if(changes.present()) {
changes.get().send( std::make_pair(server->id, server->lastKnownInterface) );
}
try {
loop {
@ -1680,7 +1682,9 @@ ACTOR Future<Void> storageServerTracker(
when( Void _ = wait( failureTracker ) ) {
// The server is failed AND all data has been removed from it, so permanently remove it.
TraceEvent("StatusMapChange", masterId).detail("ServerID", server->id).detail("Status", "Removing");
changes.send( std::make_pair(server->id, Optional<StorageServerInterface>()) );
if(changes.present()) {
changes.get().send( std::make_pair(server->id, Optional<StorageServerInterface>()) );
}
// Remove server from FF/serverList
Void _ = wait( removeStorageServer( cx, server->id, lock ) );
@ -1699,7 +1703,9 @@ ACTOR Future<Void> storageServerTracker(
server->lastKnownInterface = newInterface.first;
server->lastKnownClass = newInterface.second;
interfaceChanged = server->onInterfaceChanged;
changes.send( std::make_pair(server->id, server->lastKnownInterface) );
if(changes.present()) {
changes.get().send( std::make_pair(server->id, server->lastKnownInterface) );
}
// We rely on the old failureTracker being actorCancelled since the old actor now has a pointer to an invalid location
status = ServerStatus( status.isFailed, status.isUndesired, server->lastKnownInterface.locality );
@ -1918,7 +1924,7 @@ ACTOR Future<Void> dataDistributionTeamCollection(
DatabaseConfiguration configuration,
std::vector<Optional<Key>> includedDCs,
Optional<std::vector<Optional<Key>>> otherTrackedDCs,
PromiseStream< std::pair<UID, Optional<StorageServerInterface>> > serverChanges,
Optional<PromiseStream< std::pair<UID, Optional<StorageServerInterface>> >> serverChanges,
Future<Void> readyToStart,
Reference<AsyncVar<bool>> zeroHealthyTeams,
bool primary,
@ -2131,7 +2137,7 @@ ACTOR Future<Void> dataDistribution(
TraceEvent("DDInitTakingMoveKeysLock", mi.id());
state MoveKeysLock lock = wait( takeMoveKeysLock( cx, mi.id() ) );
TraceEvent("DDInitTookMoveKeysLock", mi.id());
state Reference<InitialDataDistribution> initData = wait( getInitialDataDistribution(cx, mi.id(), lock, configuration.remoteTLogReplicationFactor > 0 ? remoteDcIds : std::vector<Optional<Key>>() ) );
state Reference<InitialDataDistribution> initData = wait( getInitialDataDistribution(cx, mi.id(), lock, configuration.usableRegions > 1 ? remoteDcIds : std::vector<Optional<Key>>() ) );
if(initData->shards.size() > 1) {
TraceEvent("DDInitGotInitialDD", mi.id()).detail("B", printable(initData->shards.end()[-2].key)).detail("E", printable(initData->shards.end()[-1].key)).detail("Src", describe(initData->shards.end()[-2].primarySrc)).detail("Dest", describe(initData->shards.end()[-2].primaryDest)).trackLatest("InitialDD");
} else {
@ -2174,7 +2180,7 @@ ACTOR Future<Void> dataDistribution(
int storageTeamSize = configuration.storageTeamSize;
vector<Future<Void>> actors;
if (configuration.remoteTLogReplicationFactor > 0) {
if (configuration.usableRegions > 1) {
tcis.push_back(TeamCollectionInterface());
storageTeamSize = 2*configuration.storageTeamSize;
@ -2192,7 +2198,7 @@ ACTOR Future<Void> dataDistribution(
shardsAffectedByTeamFailure->defineShard(keys);
std::vector<ShardsAffectedByTeamFailure::Team> teams;
teams.push_back(ShardsAffectedByTeamFailure::Team(initData->shards[s].primarySrc, true));
if(configuration.remoteTLogReplicationFactor > 0) {
if(configuration.usableRegions > 1) {
teams.push_back(ShardsAffectedByTeamFailure::Team(initData->shards[s].remoteSrc, false));
}
shardsAffectedByTeamFailure->moveShard(keys, teams);
@ -2205,10 +2211,10 @@ ACTOR Future<Void> dataDistribution(
actors.push_back( pollMoveKeysLock(cx, lock) );
actors.push_back( reportErrorsExcept( dataDistributionTracker( initData, cx, output, getShardMetrics, getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, mi.id() ), "DDTracker", mi.id(), &normalDDQueueErrors() ) );
actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, mi, storageTeamSize, configuration.durableStorageQuorum, lastLimited, recoveryCommitVersion ), "DDQueue", mi.id(), &normalDDQueueErrors() ) );
actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( initData, tcis[0], cx, db, shardsAffectedByTeamFailure, lock, output, mi.id(), configuration, primaryDcId, configuration.remoteTLogReplicationFactor > 0 ? remoteDcIds : std::vector<Optional<Key>>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy ), "DDTeamCollectionPrimary", mi.id(), &normalDDQueueErrors() ) );
if (configuration.remoteTLogReplicationFactor > 0) {
actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( initData, tcis[1], cx, db, shardsAffectedByTeamFailure, lock, output, mi.id(), configuration, remoteDcIds, Optional<std::vector<Optional<Key>>>(), serverChanges, readyToStart.getFuture() && remoteRecovered, zeroHealthyTeams[1], false, processingUnhealthy ), "DDTeamCollectionSecondary", mi.id(), &normalDDQueueErrors() ) );
actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, mi, storageTeamSize, lastLimited, recoveryCommitVersion ), "DDQueue", mi.id(), &normalDDQueueErrors() ) );
actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( initData, tcis[0], cx, db, shardsAffectedByTeamFailure, lock, output, mi.id(), configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector<Optional<Key>>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy ), "DDTeamCollectionPrimary", mi.id(), &normalDDQueueErrors() ) );
if (configuration.usableRegions > 1) {
actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( initData, tcis[1], cx, db, shardsAffectedByTeamFailure, lock, output, mi.id(), configuration, remoteDcIds, Optional<std::vector<Optional<Key>>>(), Optional<PromiseStream< std::pair<UID, Optional<StorageServerInterface>> >>(), readyToStart.getFuture() && remoteRecovered, zeroHealthyTeams[1], false, processingUnhealthy ), "DDTeamCollectionSecondary", mi.id(), &normalDDQueueErrors() ) );
}
Void _ = wait( waitForAll( actors ) );

View File

@ -226,7 +226,6 @@ Future<Void> dataDistributionQueue(
PromiseStream<Promise<int64_t>> const& getAverageShardBytes,
MasterInterface const& mi,
int const& teamSize,
int const& durableStorageQuorum,
double* const& lastLimited,
Version const& recoveryVersion);

View File

@ -345,7 +345,6 @@ struct DDQueueData {
int queuedRelocations;
int bytesWritten;
int teamSize;
int durableStorageQuorumPerTeam;
std::map<UID, Busyness> busymap;
@ -394,12 +393,10 @@ struct DDQueueData {
DDQueueData( MasterInterface mi, MoveKeysLock lock, Database cx, std::vector<TeamCollectionInterface> teamCollections,
Reference<ShardsAffectedByTeamFailure> sABTF, PromiseStream<Promise<int64_t>> getAverageShardBytes,
int teamSize, int durableStorageQuorumPerTeam, PromiseStream<RelocateShard> input,
PromiseStream<GetMetricsRequest> getShardMetrics, double* lastLimited, Version recoveryVersion ) :
int teamSize, PromiseStream<RelocateShard> input, PromiseStream<GetMetricsRequest> getShardMetrics, double* lastLimited, Version recoveryVersion ) :
activeRelocations( 0 ), queuedRelocations( 0 ), bytesWritten ( 0 ), teamCollections( teamCollections ),
shardsAffectedByTeamFailure( sABTF ), getAverageShardBytes( getAverageShardBytes ), mi( mi ), lock( lock ),
cx( cx ), teamSize( teamSize ), durableStorageQuorumPerTeam( durableStorageQuorumPerTeam ), input( input ),
getShardMetrics( getShardMetrics ), startMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ),
cx( cx ), teamSize( teamSize ), input( input ), getShardMetrics( getShardMetrics ), startMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ),
finishMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ), lastLimited(lastLimited), recoveryVersion(recoveryVersion),
suppressIntervals(0), lastInterval(0), unhealthyRelocations(0), rawProcessingUnhealthy( new AsyncVar<bool>(false) ) {}
@ -863,7 +860,6 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
state std::vector<ShardsAffectedByTeamFailure::Team> destinationTeams;
state ParallelTCInfo healthyDestinations;
state bool anyHealthy = false;
state int durableStorageQuorum = 0;
try {
if(now() - self->lastInterval < 1.0) {
@ -892,7 +888,6 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
destinationTeams.clear();
healthyDestinations.clear();
anyHealthy = false;
durableStorageQuorum = 0;
loop{
if (tciIndex == self->teamCollections.size()) {
break;
@ -912,9 +907,6 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
if(bestTeam.get()->isHealthy()) {
healthyDestinations.addTeam(bestTeam.get());
anyHealthy = true;
durableStorageQuorum += self->durableStorageQuorumPerTeam;
} else {
durableStorageQuorum += bestTeam.get()->size();
}
}
else {
@ -951,12 +943,8 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
state Promise<Void> dataMovementComplete;
state Future<Void> doMoveKeys = moveKeys(
self->cx, rd.keys, destination.getServerIDs(), healthyDestinations.getServerIDs(), self->lock,
durableStorageQuorum, dataMovementComplete,
&self->startMoveKeysParallelismLock,
&self->finishMoveKeysParallelismLock,
self->recoveryVersion,
self->teamCollections.size() > 1,
relocateShardInterval.pairID );
dataMovementComplete, &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock,
self->recoveryVersion,self->teamCollections.size() > 1, relocateShardInterval.pairID );
state Future<Void> pollHealth = (!anyHealthy || signalledTransferComplete) ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch );
try {
loop {
@ -1155,11 +1143,10 @@ ACTOR Future<Void> dataDistributionQueue(
PromiseStream<Promise<int64_t>> getAverageShardBytes,
MasterInterface mi,
int teamSize,
int durableStorageQuorum,
double* lastLimited,
Version recoveryVersion)
{
state DDQueueData self( mi, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, durableStorageQuorum, input, getShardMetrics, lastLimited, recoveryVersion );
state DDQueueData self( mi, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, input, getShardMetrics, lastLimited, recoveryVersion );
state std::set<UID> serversToLaunchFrom;
state KeyRange keysToLaunchFrom;
state RelocateData launchData;

View File

@ -323,6 +323,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( TARGET_BYTES_PER_TLOG, 2000e6 ); if( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3;
init( SPRING_BYTES_TLOG, 400e6 ); if( smallTlogTarget ) SPRING_BYTES_TLOG = 200e3;
init( TLOG_SPILL_THRESHOLD, 1500e6 ); if( smallTlogTarget ) TLOG_SPILL_THRESHOLD = 1500e3; if( randomize && BUGGIFY ) TLOG_SPILL_THRESHOLD = 0;
init( TLOG_HARD_LIMIT_BYTES, 3000e6 ); if( smallTlogTarget ) TLOG_HARD_LIMIT_BYTES = 3000e3;
init( MAX_TRANSACTIONS_PER_BYTE, 1000 );

View File

@ -261,6 +261,7 @@ public:
int64_t TARGET_BYTES_PER_TLOG;
double SPRING_BYTES_TLOG;
int64_t TLOG_SPILL_THRESHOLD;
int64_t TLOG_HARD_LIMIT_BYTES;
double MAX_TRANSACTIONS_PER_BYTE;

View File

@ -179,7 +179,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
loop {
loop {
choose {
when(Void _ = wait( r ? r->getMore() : Never() ) ) {
when(Void _ = wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
break;
}
when( Void _ = wait( dbInfoChange ) ) { //FIXME: does this actually happen?
@ -336,7 +336,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
while(!self->messageBlocks.empty() && self->messageBlocks.front().first < minPopped) {
self->messageBlocks.pop_front();
Void _ = wait(yield(TaskUpdateStorage));
Void _ = wait(yield(TaskTLogPop));
}
if(self->logSystem->get() && self->allowPops) {

View File

@ -47,6 +47,7 @@ public:
int8_t locality;
Version startVersion;
std::vector<Future<TLogLockResult>> replies;
std::vector<std::vector<int>> satelliteTagLocations;
LogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), locality(tagLocalityInvalid), startVersion(invalidVersion) {}
@ -72,7 +73,81 @@ public:
return result;
}
void populateSatelliteTagLocations(int logRouterTags, int oldLogRouterTags) {
satelliteTagLocations.clear();
satelliteTagLocations.resize(std::max(logRouterTags,oldLogRouterTags) + 1);
std::set<std::pair<int,int>> used_servers;
for(int i = 0; i < tLogLocalities.size(); i++) {
used_servers.insert(std::make_pair(0,i));
}
LocalitySetRef serverSet = Reference<LocalitySet>(new LocalityMap<std::pair<int,int>>());
LocalityMap<std::pair<int,int>>* serverMap = (LocalityMap<std::pair<int,int>>*) serverSet.getPtr();
std::vector<std::pair<int,int>> resultPairs;
for(int loc = 0; loc < satelliteTagLocations.size(); loc++) {
int team = loc;
if(loc < logRouterTags) {
team = loc + 1;
} else if(loc == logRouterTags) {
team = 0;
}
bool teamComplete = false;
alsoServers.resize(1);
serverMap->clear();
resultPairs.clear();
for(auto& used_idx : used_servers) {
auto entry = serverMap->add(tLogLocalities[used_idx.second], &used_idx);
if(!resultPairs.size()) {
resultPairs.push_back(used_idx);
alsoServers[0] = entry;
}
resultEntries.clear();
if( serverSet->selectReplicas(tLogPolicy, alsoServers, resultEntries) ) {
for(auto& entry : resultEntries) {
resultPairs.push_back(*serverMap->getObject(entry));
}
for(auto& res : resultPairs) {
satelliteTagLocations[team].push_back(res.second);
used_servers.erase(res);
res.first++;
used_servers.insert(res);
}
teamComplete = true;
break;
}
}
ASSERT(teamComplete);
}
checkSatelliteTagLocations();
}
void checkSatelliteTagLocations() {
std::vector<int> used;
used.resize(tLogLocalities.size());
for(auto team : satelliteTagLocations) {
for(auto loc : team) {
used[loc]++;
}
}
int minUsed = satelliteTagLocations.size();
int maxUsed = 0;
for(auto i : used) {
minUsed = std::min(minUsed, i);
maxUsed = std::max(maxUsed, i);
}
TraceEvent(maxUsed - minUsed > 1 ? (g_network->isSimulated() ? SevError : SevWarnAlways) : SevInfo, "CheckSatelliteTagLocations").detail("MinUsed", minUsed).detail("MaxUsed", maxUsed);
}
int bestLocationFor( Tag tag ) {
if(locality == tagLocalitySatellite) {
return satelliteTagLocations[tag == txsTag ? 0 : tag.id + 1][0];
}
//the following logic supports upgrades from 5.X
if(tag == txsTag) return txsTagOld % logServers.size();
return tag.id % logServers.size();
}
@ -113,6 +188,18 @@ public:
}
void getPushLocations( std::vector<Tag> const& tags, std::vector<int>& locations, int locationOffset ) {
if(locality == tagLocalitySatellite) {
for(auto& t : tags) {
if(t == txsTag || t.locality == tagLocalityLogRouter) {
for(int loc : satelliteTagLocations[t == txsTag ? 0 : t.id + 1]) {
locations.push_back(locationOffset + loc);
}
}
}
uniquify(locations);
return;
}
newLocations.clear();
alsoServers.clear();
resultEntries.clear();
@ -461,8 +548,8 @@ struct ILogSystem {
virtual Future<Void> endEpoch() = 0;
// Ends the current epoch without starting a new one
static Reference<ILogSystem> fromServerDBInfo( UID const& dbgid, struct ServerDBInfo const& db, bool usePreviousEpochEnd = false );
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, struct LocalityData const&, struct LogSystemConfig const&, bool excludeRemote = false, bool usePreviousEpochEnd = false );
static Reference<ILogSystem> fromServerDBInfo( UID const& dbgid, struct ServerDBInfo const& db, bool usePreviousEpochEnd = false, Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>() );
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, struct LocalityData const&, struct LogSystemConfig const&, bool excludeRemote = false, bool usePreviousEpochEnd = false, Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>() );
// Constructs a new ILogSystem implementation from the given ServerDBInfo/LogSystemConfig. Might return a null reference if there isn't a fully recovered log system available.
// The caller can peek() the returned log system and can push() if it has version numbers reserved for it and prevVersions

View File

@ -64,6 +64,7 @@ struct TLogSet {
bool isLocal;
int8_t locality;
Version startVersion;
std::vector<std::vector<int>> satelliteTagLocations;
TLogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), locality(tagLocalityInvalid), startVersion(invalidVersion) {}
@ -72,7 +73,7 @@ struct TLogSet {
}
bool operator == ( const TLogSet& rhs ) const {
if (tLogWriteAntiQuorum != rhs.tLogWriteAntiQuorum || tLogReplicationFactor != rhs.tLogReplicationFactor || isLocal != rhs.isLocal ||
if (tLogWriteAntiQuorum != rhs.tLogWriteAntiQuorum || tLogReplicationFactor != rhs.tLogReplicationFactor || isLocal != rhs.isLocal || satelliteTagLocations != rhs.satelliteTagLocations ||
startVersion != rhs.startVersion || tLogs.size() != rhs.tLogs.size() || locality != rhs.locality || logRouters.size() != rhs.logRouters.size()) {
return false;
}
@ -93,7 +94,8 @@ struct TLogSet {
}
bool isEqualIds(TLogSet const& r) const {
if (tLogWriteAntiQuorum != r.tLogWriteAntiQuorum || tLogReplicationFactor != r.tLogReplicationFactor || isLocal != r.isLocal || startVersion != r.startVersion || tLogs.size() != r.tLogs.size() || locality != r.locality) {
if (tLogWriteAntiQuorum != r.tLogWriteAntiQuorum || tLogReplicationFactor != r.tLogReplicationFactor || isLocal != r.isLocal || satelliteTagLocations != r.satelliteTagLocations ||
startVersion != r.startVersion || tLogs.size() != r.tLogs.size() || locality != r.locality) {
return false;
}
if ((tLogPolicy && !r.tLogPolicy) || (!tLogPolicy && r.tLogPolicy) || (tLogPolicy && (tLogPolicy->info() != r.tLogPolicy->info()))) {
@ -109,7 +111,7 @@ struct TLogSet {
template <class Ar>
void serialize( Ar& ar ) {
ar & tLogs & logRouters & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & locality & startVersion;
ar & tLogs & logRouters & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & locality & startVersion & satelliteTagLocations;
}
};

View File

@ -1196,7 +1196,7 @@ ACTOR Future<Void> masterProxyServerCore(
for(auto r = rs.begin(); r != rs.end(); ++r)
r->value().push_back(std::make_pair<Version,int>(0,0));
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get());
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get(), false, addActor);
commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, txsTag, false);
commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter, proxy.id(), 2e9, true, true);
@ -1220,7 +1220,7 @@ ACTOR Future<Void> masterProxyServerCore(
when( Void _ = wait( dbInfoChange ) ) {
dbInfoChange = db->onChange();
if(db->get().master.id() == master.id() && db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION) {
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get());
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get(), false, addActor);
for(auto it : commitData.tag_popped) {
commitData.logSystem->pop(it.second, it.first);
}

View File

@ -168,9 +168,7 @@ ACTOR Future<vector<vector<Optional<UID>>>> findReadWriteDestinations(Standalone
// Set keyServers[keys].dest = servers
// Set serverKeys[servers][keys] = active for each subrange of keys that the server did not already have, complete for each subrange that it already has
// Set serverKeys[dest][keys] = "" for the dest servers of each existing shard in keys (unless that destination is a member of servers OR if the source list is sufficiently degraded)
ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> servers,
MoveKeysLock lock, int durableStorageQuorum,
FlowLock *startMoveKeysLock, UID relocationIntervalId ) {
ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> servers, MoveKeysLock lock, FlowLock *startMoveKeysLock, UID relocationIntervalId ) {
state TraceInterval interval("RelocateShard_StartMoveKeys");
//state TraceInterval waitInterval("");
@ -407,7 +405,7 @@ ACTOR Future<Void> checkFetchingState( Database cx, vector<UID> dest, KeyRange k
// keyServers[k].dest must be the same for all k in keys
// Set serverKeys[dest][keys] = true; serverKeys[src][keys] = false for all src not in dest
// Should be cancelled and restarted if keyServers[keys].dest changes (?so this is no longer true?)
ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> destinationTeam, MoveKeysLock lock, int durableStorageQuorum, FlowLock *finishMoveKeysParallelismLock, Version recoveryVersion, bool hasRemote, UID relocationIntervalId )
ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> destinationTeam, MoveKeysLock lock, FlowLock *finishMoveKeysParallelismLock, Version recoveryVersion, bool hasRemote, UID relocationIntervalId )
{
state TraceInterval interval("RelocateShard_FinishMoveKeys");
state TraceInterval waitInterval("");
@ -549,12 +547,6 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
break;
}
if (dest.size() < durableStorageQuorum) {
TraceEvent(SevError,"FinishMoveKeysError", relocationIntervalId)
.detailf("Reason", "dest size too small (%d)", dest.size());
ASSERT(false);
}
waitInterval = TraceInterval("RelocateShard_FinishMoveKeysWaitDurable");
TraceEvent(SevDebug, waitInterval.begin(), relocationIntervalId)
.detail("KeyBegin", printable(keys.begin))
@ -590,9 +582,7 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
for(int s=0; s<storageServerInterfaces.size(); s++)
serverReady.push_back( waitForShardReady( storageServerInterfaces[s], keys, tr.getReadVersion().get(), recoveryVersion, GetShardStateRequest::READABLE) );
Void _ = wait( timeout(
smartQuorum( serverReady, std::max<int>(0, durableStorageQuorum - (dest.size() - newDestinations.size())), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, TaskMoveKeys ),
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskMoveKeys ) );
Void _ = wait( timeout( waitForAll( serverReady ), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskMoveKeys ) );
int count = dest.size() - newDestinations.size();
for(int s=0; s<serverReady.size(); s++)
count += serverReady[s].isReady() && !serverReady[s].isError();
@ -600,7 +590,7 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
//printf(" fMK: moved data to %d/%d servers\n", count, serverReady.size());
TraceEvent(SevDebug, waitInterval.end(), relocationIntervalId).detail("ReadyServers", count);
if( count >= durableStorageQuorum ) {
if( count == dest.size() ) {
// update keyServers, serverKeys
// SOMEDAY: Doing these in parallel is safe because none of them overlap or touch (one per server)
Void _ = wait( krmSetRangeCoalescing( &tr, keyServersPrefix, currentKeys, keys, keyServersValue( dest ) ) );
@ -834,7 +824,6 @@ ACTOR Future<Void> moveKeys(
vector<UID> destinationTeam,
vector<UID> healthyDestinations,
MoveKeysLock lock,
int durableStorageQuorum,
Promise<Void> dataMovementComplete,
FlowLock *startMoveKeysParallelismLock,
FlowLock *finishMoveKeysParallelismLock,
@ -844,11 +833,11 @@ ACTOR Future<Void> moveKeys(
{
ASSERT( destinationTeam.size() );
std::sort( destinationTeam.begin(), destinationTeam.end() );
Void _ = wait( startMoveKeys( cx, keys, destinationTeam, lock, durableStorageQuorum, startMoveKeysParallelismLock, relocationIntervalId ) );
Void _ = wait( startMoveKeys( cx, keys, destinationTeam, lock, startMoveKeysParallelismLock, relocationIntervalId ) );
state Future<Void> completionSignaller = checkFetchingState( cx, healthyDestinations, keys, dataMovementComplete, relocationIntervalId );
Void _ = wait( finishMoveKeys( cx, keys, destinationTeam, lock, durableStorageQuorum, finishMoveKeysParallelismLock, recoveryVersion, hasRemote, relocationIntervalId ) );
Void _ = wait( finishMoveKeys( cx, keys, destinationTeam, lock, finishMoveKeysParallelismLock, recoveryVersion, hasRemote, relocationIntervalId ) );
//This is defensive, but make sure that we always say that the movement is complete before moveKeys completes
completionSignaller.cancel();

View File

@ -56,7 +56,6 @@ Future<Void> moveKeys(
vector<UID> const& destinationTeam,
vector<UID> const& healthyDestinations,
MoveKeysLock const& lock,
int const& durableStorageQuorum,
Promise<Void> const& dataMovementComplete,
FlowLock* const& startMoveKeysParallelismLock,
FlowLock* const& finishMoveKeysParallelismLock,

View File

@ -288,13 +288,13 @@ ACTOR Future<Void> reconfigureAfter(Database cx, double time) {
if(g_network->isSimulated()) {
TraceEvent(SevWarnAlways, "DisablingFearlessConfiguration");
g_simulator.hasRemoteReplication = false;
ConfigurationResult::Type _ = wait( changeConfig( cx, "remote_none" ) );
g_simulator.usableRegions = 1;
ConfigurationResult::Type _ = wait( changeConfig( cx, "usable_regions=1" ) );
if (g_network->isSimulated() && g_simulator.extraDB) {
Reference<ClusterConnectionFile> extraFile(new ClusterConnectionFile(*g_simulator.extraDB));
Reference<Cluster> cluster = Cluster::createCluster(extraFile, -1);
Database extraDB = cluster->createDatabase(LiteralStringRef("DB")).get();
ConfigurationResult::Type _ = wait(changeConfig(extraDB, "remote_none"));
ConfigurationResult::Type _ = wait(changeConfig(extraDB, "usable_regions=1"));
}
}

View File

@ -725,11 +725,9 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) {
int replication_factor = g_random->randomInt(storage_servers, generateFearless ? 4 : 5);
int anti_quorum = g_random->randomInt(0, replication_factor);
// Go through buildConfiguration, as it sets tLogPolicy/storagePolicy.
set_config(format("storage_replicas:=%d storage_quorum:=%d "
"log_replicas:=%d log_anti_quorum:=%1 "
set_config(format("storage_replicas:=%d log_replicas:=%d log_anti_quorum:=%d "
"replica_datacenters:=1 min_replica_datacenters:=1",
storage_servers, storage_servers,
replication_factor, anti_quorum));
storage_servers, replication_factor, anti_quorum));
break;
}
case 1: {
@ -828,6 +826,14 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) {
remoteObj["satellite_logs"] = logs;
}
if (g_random->random01() < 0.5) {
TEST( true ); // Simulated cluster using one region
needsRemote = false;
} else {
TEST( true ); // Simulated cluster using two regions
db.usableRegions = 2;
}
int remote_replication_type = g_random->randomInt(0,5);
switch (remote_replication_type) {
case 0: {
@ -836,8 +842,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) {
break;
}
case 1: {
needsRemote = false;
TEST( true ); // Simulated cluster using no remote redundancy mode
TEST( true ); // Simulated cluster using default remote redundancy mode
break;
}
case 2: {
@ -927,8 +932,8 @@ void setupSimulatedSystem( vector<Future<Void>> *systemActors, std::string baseF
g_simulator.storagePolicy = simconfig.db.storagePolicy;
g_simulator.tLogPolicy = simconfig.db.tLogPolicy;
g_simulator.tLogWriteAntiQuorum = simconfig.db.tLogWriteAntiQuorum;
g_simulator.hasRemoteReplication = simconfig.db.remoteTLogReplicationFactor > 0;
g_simulator.remoteTLogPolicy = simconfig.db.remoteTLogPolicy;
g_simulator.remoteTLogPolicy = simconfig.db.getRemoteTLogPolicy();
g_simulator.usableRegions = simconfig.db.usableRegions;
if(simconfig.db.regions.size() == 2) {
g_simulator.primaryDcId = simconfig.db.regions[0].dcId;
@ -959,7 +964,6 @@ void setupSimulatedSystem( vector<Future<Void>> *systemActors, std::string baseF
}
ASSERT(g_simulator.storagePolicy && g_simulator.tLogPolicy);
ASSERT(!g_simulator.hasRemoteReplication || g_simulator.remoteTLogPolicy);
ASSERT(!g_simulator.hasSatelliteReplication || g_simulator.satelliteTLogPolicy);
TraceEvent("SimulatorConfig").detail("ConfigString", printable(StringRef(startingConfigString)));

View File

@ -1115,7 +1115,7 @@ ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<bool>>> l
}
}
fullReplication = (!unreplicated || (result.get().remoteTLogReplicationFactor == 0 && unreplicated < result.get().regions.size()));
fullReplication = (!unreplicated || (result.get().usableRegions == 1 && unreplicated < result.get().regions.size()));
}
when(Void _ = wait(getConfTimeout)) {
messages->push_back(makeMessage("full_replication_timeout", "Unable to read datacenter replicas."));
@ -1326,11 +1326,11 @@ static int getExtraTLogEligibleMachines(vector<std::pair<WorkerInterface, Proces
}
if(configuration.regions.size() == 0) {
return allMachines.size() - std::max( configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) );
return allMachines.size() - std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize);
}
int extraTlogEligibleMachines = std::numeric_limits<int>::max();
for(auto& region : configuration.regions) {
extraTlogEligibleMachines = std::min<int>( extraTlogEligibleMachines, dcId_machine[region.dcId].size() - std::max( configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) ) );
extraTlogEligibleMachines = std::min<int>( extraTlogEligibleMachines, dcId_machine[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) ) );
if(region.satelliteTLogReplicationFactor > 0) {
int totalSatelliteEligible = 0;
for(auto& sat : region.satellites) {

View File

@ -188,7 +188,7 @@ static const KeyRangeRef persistRecoveryCountKeys = KeyRangeRef( LiteralStringRe
// Updated on updatePersistentData()
static const KeyRangeRef persistCurrentVersionKeys = KeyRangeRef( LiteralStringRef( "version/" ), LiteralStringRef( "version0" ) );
static const KeyRangeRef persistKnownCommittedVersionKeys = KeyRangeRef( LiteralStringRef( "knownCommitted/" ), LiteralStringRef( "knownCommitted0" ) );
static const KeyRangeRef persistUnrecoveredBeforeVersionKeys = KeyRangeRef( LiteralStringRef( "UnrecoveredBefore/" ), LiteralStringRef( "UnrecoveredBefore0" ) );
static const KeyRangeRef persistLocalityKeys = KeyRangeRef( LiteralStringRef( "Locality/" ), LiteralStringRef( "Locality0" ) );
static const KeyRangeRef persistLogRouterTagsKeys = KeyRangeRef( LiteralStringRef( "LogRouterTags/" ), LiteralStringRef( "LogRouterTags0" ) );
static const KeyRange persistTagMessagesKeys = prefixRange(LiteralStringRef("TagMsg/"));
static const KeyRange persistTagPoppedKeys = prefixRange(LiteralStringRef("TagPop/"));
@ -445,7 +445,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
Key logIdKey = BinaryWriter::toValue(logId,Unversioned());
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistCurrentVersionKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistKnownCommittedVersionKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistUnrecoveredBeforeVersionKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLocalityKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLogRouterTagsKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistRecoveryCountKeys.begin)) );
Key msgKey = logIdKey.withPrefix(persistTagMessagesKeys.begin);
@ -583,7 +583,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
}
}
self->persistentData->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistCurrentVersionKeys.begin), BinaryWriter::toValue(newPersistentDataVersion, Unversioned()) ) );
self->persistentData->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistCurrentVersionKeys.begin), BinaryWriter::toValue(newPersistentDataVersion, Unversioned()) ) );
self->persistentData->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistKnownCommittedVersionKeys.begin), BinaryWriter::toValue(logData->knownCommittedVersion, Unversioned()) ) );
logData->persistentDataVersion = newPersistentDataVersion;
@ -808,7 +808,11 @@ void commitMessages( Reference<LogData> self, Version version, const std::vector
block.append(block.arena(), msg.message.begin(), msg.message.size());
for(auto tag : msg.tags) {
if(!(self->locality == tagLocalitySpecial || self->locality == tag.locality || tag.locality < 0)) {
if(self->locality == tagLocalitySatellite) {
if(!(tag == txsTag || tag.locality == tagLocalityLogRouter)) {
continue;
}
} else if(!(self->locality == tagLocalitySpecial || self->locality == tag.locality || tag.locality < 0)) {
continue;
}
@ -839,7 +843,7 @@ void commitMessages( Reference<LogData> self, Version version, const std::vector
addedBytes += SERVER_KNOBS->VERSION_MESSAGES_ENTRY_BYTES_WITH_OVERHEAD;
}
}
msgSize -= msg.message.size();
}
self->messageBlocks.push_back( std::make_pair(version, block) );
@ -1171,6 +1175,18 @@ ACTOR Future<Void> tLogCommit(
Void _ = wait(delay(0, g_network->getCurrentTask()));
}
state double waitStartT = 0;
while( self->bytesInput - self->bytesDurable >= SERVER_KNOBS->TLOG_HARD_LIMIT_BYTES && !logData->stopped ) {
if (now() - waitStartT >= 1) {
TraceEvent(SevWarn, "TLogUpdateLag", logData->logId)
.detail("Version", logData->version.get())
.detail("PersistentDataVersion", logData->persistentDataVersion)
.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
waitStartT = now();
}
Void _ = wait( delayJittered(.005, TaskTLogCommit) );
}
if(logData->stopped) {
req.reply.sendError( tlog_stopped() );
return Void();
@ -1225,9 +1241,9 @@ ACTOR Future<Void> initPersistentState( TLogData* self, Reference<LogData> logDa
state IKeyValueStore *storage = self->persistentData;
Void _ = wait(storage->init());
storage->set( persistFormat );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistCurrentVersionKeys.begin), BinaryWriter::toValue(logData->version.get(), Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistCurrentVersionKeys.begin), BinaryWriter::toValue(logData->version.get(), Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistKnownCommittedVersionKeys.begin), BinaryWriter::toValue(logData->knownCommittedVersion, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistUnrecoveredBeforeVersionKeys.begin), BinaryWriter::toValue(logData->unrecoveredBefore, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLocalityKeys.begin), BinaryWriter::toValue(logData->locality, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLogRouterTagsKeys.begin), BinaryWriter::toValue(logData->logRouterTags, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistRecoveryCountKeys.begin), BinaryWriter::toValue(logData->recoveryCount, Unversioned()) ) );
@ -1445,7 +1461,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
while (!endVersion.present() || logData->version.get() < endVersion.get()) {
loop {
choose {
when(Void _ = wait( r ? r->getMore() : Never() ) ) {
when(Void _ = wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
if(poppedIsKnownCommitted) {
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped());
}
@ -1462,6 +1478,18 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
}
}
state double waitStartT = 0;
while( self->bytesInput - self->bytesDurable >= SERVER_KNOBS->TLOG_HARD_LIMIT_BYTES && !logData->stopped ) {
if (now() - waitStartT >= 1) {
TraceEvent(SevWarn, "TLogUpdateLag", logData->logId)
.detail("Version", logData->version.get())
.detail("PersistentDataVersion", logData->persistentDataVersion)
.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
waitStartT = now();
}
Void _ = wait( delayJittered(.005, TaskTLogCommit) );
}
if(logData->stopped) {
return Void();
}
@ -1610,14 +1638,14 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fUnrecoveredBefore = storage->readRange(persistUnrecoveredBeforeVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
// FIXME: metadata in queue?
Void _ = wait( waitForAll( (vector<Future<Optional<Value>>>(), fFormat ) ) );
Void _ = wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fUnrecoveredBefore, fLogRouterTags, fRecoverCounts) ) );
Void _ = wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fLocality, fLogRouterTags, fRecoverCounts) ) );
if (fFormat.get().present() && !persistFormatReadableRange.contains( fFormat.get().get() )) {
//FIXME: remove when we no longer need to test upgrades from 4.X releases
@ -1659,9 +1687,9 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
ASSERT(fVers.get().size() == fRecoverCounts.get().size());
state std::map<UID, Version> id_unrecoveredBefore;
for(auto it : fUnrecoveredBefore.get()) {
id_unrecoveredBefore[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistUnrecoveredBeforeVersionKeys.begin), Unversioned())] = BinaryReader::fromStringRef<Version>( it.value, Unversioned() );
state std::map<UID, int8_t> id_locality;
for(auto it : fLocality.get()) {
id_locality[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistLocalityKeys.begin), Unversioned())] = BinaryReader::fromStringRef<int8_t>( it.value, Unversioned() );
}
state std::map<UID, int> id_logRouterTags;
@ -1695,13 +1723,11 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
//We do not need the remoteTag, because we will not be loading any additional data
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], UID()) );
logData->locality = tagLocalitySpecial;
logData->locality = id_locality[id1];
logData->stopped = true;
self->id_data[id1] = logData;
id_interf[id1] = recruited;
logData->unrecoveredBefore = id_unrecoveredBefore[id1];
logData->recoveredAt = logData->unrecoveredBefore;
logData->knownCommittedVersion = id_knownCommitted[id1];
Version ver = BinaryReader::fromStringRef<Version>( fVers.get()[idx].value, Unversioned() );
logData->persistentDataVersion = ver;

View File

@ -84,11 +84,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Version knownCommittedVersion;
LocalityData locality;
std::map< std::pair<UID, Tag>, std::pair<Version, Version> > outstandingPops; // For each currently running popFromLog actor, (log server #, tag)->popped version
ActorCollection actors;
Optional<PromiseStream<Future<Void>>> addActor;
ActorCollection popActors;
std::vector<OldLogData> oldLogData;
AsyncTrigger logSystemConfigChanged;
TagPartitionedLogSystem( UID dbgid, LocalityData locality ) : dbgid(dbgid), locality(locality), actors(false), recoveryCompleteWrittenToCoreState(false), remoteLogsWrittenToCoreState(false), logSystemType(0), logRouterTags(0), expectedLogSets(0), hasRemoteServers(false), stopped(false) {}
TagPartitionedLogSystem( UID dbgid, LocalityData locality, Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>() ) : dbgid(dbgid), locality(locality), addActor(addActor), popActors(false), recoveryCompleteWrittenToCoreState(false), remoteLogsWrittenToCoreState(false), logSystemType(0), logRouterTags(0), expectedLogSets(0), hasRemoteServers(false), stopped(false) {}
virtual void stopRejoins() {
rejoins = Future<Void>();
@ -121,10 +122,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
return epochEnd( outLogSystem, dbgid, oldState, rejoins, locality );
}
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, LocalityData const& locality, LogSystemConfig const& lsConf, bool excludeRemote, bool usePreviousEpochEnd ) {
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, LocalityData const& locality, LogSystemConfig const& lsConf, bool excludeRemote, bool usePreviousEpochEnd, Optional<PromiseStream<Future<Void>>> addActor ) {
ASSERT( lsConf.logSystemType == 2 || (lsConf.logSystemType == 0 && !lsConf.tLogs.size()) );
//ASSERT(lsConf.epoch == epoch); //< FIXME
Reference<TagPartitionedLogSystem> logSystem( new TagPartitionedLogSystem(dbgid, locality) );
Reference<TagPartitionedLogSystem> logSystem( new TagPartitionedLogSystem(dbgid, locality, addActor) );
logSystem->tLogs.reserve(lsConf.tLogs.size());
logSystem->expectedLogSets = lsConf.expectedLogSets;
@ -152,6 +153,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSet->isLocal = tLogSet.isLocal;
logSet->locality = tLogSet.locality;
logSet->startVersion = tLogSet.startVersion;
logSet->satelliteTagLocations = tLogSet.satelliteTagLocations;
logSet->updateLocalitySet();
filterLocalityDataForPolicy(logSet->tLogPolicy, &logSet->tLogLocalities);
}
@ -177,6 +179,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSet->isLocal = tLogData.isLocal;
logSet->locality = tLogData.locality;
logSet->startVersion = tLogData.startVersion;
logSet->satelliteTagLocations = tLogData.satelliteTagLocations;
//logSet.UpdateLocalitySet(); we do not update the locality set, since we never push to old logs
}
logSystem->oldLogData[i].logRouterTags = lsConf.oldTLogs[i].logRouterTags;
@ -211,6 +214,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSet->isLocal = tLogSet.isLocal;
logSet->locality = tLogSet.locality;
logSet->startVersion = tLogSet.startVersion;
logSet->satelliteTagLocations = tLogSet.satelliteTagLocations;
//logSet->updateLocalitySet(); we do not update the locality set, since we never push to old logs
}
logSystem->logRouterTags = lsConf.oldTLogs[0].logRouterTags;
@ -236,6 +240,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSet->isLocal = tLogSet.isLocal;
logSet->locality = tLogSet.locality;
logSet->startVersion = tLogSet.startVersion;
logSet->satelliteTagLocations = tLogSet.satelliteTagLocations;
//logSet->updateLocalitySet(); we do not update the locality set, since we never push to old logs
}
logSystem->oldLogData[i-1].logRouterTags = lsConf.oldTLogs[i].logRouterTags;
@ -270,6 +275,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
coreSet.isLocal = t->isLocal;
coreSet.locality = t->locality;
coreSet.startVersion = t->startVersion;
coreSet.satelliteTagLocations = t->satelliteTagLocations;
newState.tLogs.push_back(coreSet);
}
}
@ -291,6 +297,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
coreSet.isLocal = t->isLocal;
coreSet.locality = t->locality;
coreSet.startVersion = t->startVersion;
coreSet.satelliteTagLocations = t->satelliteTagLocations;
newState.oldTLogData[i].tLogs.push_back(coreSet);
}
}
@ -380,7 +387,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
changes.push_back(self->recoveryCompleteWrittenToCoreState.onChange());
ASSERT( failed.size() >= 1 );
Void _ = wait( quorum(changes, 1) || tagError<Void>( quorum( failed, 1 ), master_tlog_failed() ) || self->actors.getResult() );
Void _ = wait( quorum(changes, 1) || tagError<Void>( quorum( failed, 1 ), master_tlog_failed() ) );
}
}
@ -396,7 +403,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
it->logServers[loc]->get().interf().commit.getReply(
TLogCommitRequest( data.getArena(), prevVersion, version, knownCommittedVersion, data.getMessages(location), debugID ), TaskTLogCommitReply ),
getDebugID());
actors.add(commitMessage);
addActor.get().send(commitMessage);
tLogCommitResults.push_back(commitMessage);
location++;
}
@ -412,7 +419,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
std::vector<Reference<LogSet>> localSets;
Version lastBegin = 0;
for(auto& log : tLogs) {
if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality || tag.locality < 0)) {
if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality ||
tag == txsTag || tag.locality == tagLocalityLogRouter || (tag.locality == tagLocalityUpgraded && log->locality != tagLocalitySatellite))) {
lastBegin = std::max(lastBegin, log->startVersion);
localSets.push_back(log);
if(log->locality != tagLocalitySatellite) {
@ -454,7 +462,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
std::vector<Reference<LogSet>> localOldSets;
Version thisBegin = begin;
for(auto& log : oldLogData[i].tLogs) {
if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality || tag.locality < 0)) {
if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality ||
tag == txsTag || tag.locality == tagLocalityLogRouter || (tag.locality == tagLocalityUpgraded && log->locality != tagLocalitySatellite))) {
thisBegin = std::max(thisBegin, log->startVersion);
localOldSets.push_back(log);
if(log->locality != tagLocalitySatellite) {
@ -587,7 +596,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
int bestSet = -1;
bool foundSpecial = false;
for(int t = 0; t < tLogs.size(); t++) {
if(tLogs[t]->logServers.size() && (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded || tLogs[t]->locality == tag.locality)) {
if(tLogs[t]->logServers.size() && (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded || tLogs[t]->locality == tag.locality || tag.locality == tagLocalityUpgraded)) {
if( tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded ) {
foundSpecial = true;
}
@ -621,7 +630,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
int bestOldSet = -1;
for(int t = 0; t < oldLogData[i].tLogs.size(); t++) {
if(oldLogData[i].tLogs[t]->logServers.size() && (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded || oldLogData[i].tLogs[t]->locality == tag.locality)) {
if(oldLogData[i].tLogs[t]->logServers.size() && (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded || oldLogData[i].tLogs[t]->locality == tag.locality || tag.locality == tagLocalityUpgraded)) {
if( oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded ) {
foundSpecial = true;
}
@ -765,8 +774,9 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first;
if (prev < upTo)
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion);
if (prev == 0)
actors.add( popFromLog( this, log, tag, 0.0 ) ); //Fast pop time because log routers can only hold 5 seconds of data.
if (prev == 0) {
popActors.add( popFromLog( this, log, tag, 0.0 ) ); //Fast pop time because log routers can only hold 5 seconds of data.
}
}
}
}
@ -779,7 +789,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
if (prev < upTo)
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion);
if (prev == 0)
actors.add( popFromLog( this, log, tag, 0.0 ) );
popActors.add( popFromLog( this, log, tag, 0.0 ) );
}
}
}
@ -800,7 +810,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
if (prev < upTo)
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion);
if (prev == 0)
actors.add( popFromLog( this, log, tag, 1.0 ) ); //< FIXME: knob
popActors.add( popFromLog( this, log, tag, 1.0 ) ); //< FIXME: knob
}
}
}
@ -934,6 +944,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
log.isLocal = logSet->isLocal;
log.locality = logSet->locality;
log.startVersion = logSet->startVersion;
log.satelliteTagLocations = logSet->satelliteTagLocations;
for( int i = 0; i < logSet->logServers.size(); i++ ) {
log.tLogs.push_back(logSet->logServers[i]->get());
@ -960,6 +971,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
log.isLocal = logSet->isLocal;
log.locality = logSet->locality;
log.startVersion = logSet->startVersion;
log.satelliteTagLocations = logSet->satelliteTagLocations;
for( int i = 0; i < logSet->logServers.size(); i++ ) {
log.tLogs.push_back(logSet->logServers[i]->get());
@ -1195,6 +1207,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSet->isLocal = coreSet.isLocal;
logSet->locality = coreSet.locality;
logSet->startVersion = coreSet.startVersion;
logSet->satelliteTagLocations = coreSet.satelliteTagLocations;
logFailed.push_back(failed);
}
oldLogData.resize(prevState.oldTLogData.size());
@ -1218,6 +1231,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSet->isLocal = log.isLocal;
logSet->locality = log.locality;
logSet->startVersion = log.startVersion;
logSet->satelliteTagLocations = log.satelliteTagLocations;
}
oldData.epochEnd = old.epochEnd;
oldData.logRouterTags = old.logRouterTags;
@ -1479,8 +1493,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
state RecruitRemoteFromConfigurationReply remoteWorkers = wait( fRemoteWorkers );
state Reference<LogSet> logSet = Reference<LogSet>( new LogSet() );
logSet->tLogReplicationFactor = configuration.remoteTLogReplicationFactor;
logSet->tLogPolicy = configuration.remoteTLogPolicy;
logSet->tLogReplicationFactor = configuration.getRemoteTLogReplicationFactor();
logSet->tLogPolicy = configuration.getRemoteTLogPolicy();
logSet->isLocal = false;
logSet->locality = remoteLocality;
@ -1587,6 +1601,13 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSystem->previousEpochEndVersion = oldLogSystem->epochEndVersion;
logSystem->recruitmentID = g_random->randomUniqueID();
oldLogSystem->recruitmentID = logSystem->recruitmentID;
if(configuration.usableRegions > 1) {
logSystem->logRouterTags = recr.tLogs.size();
logSystem->expectedLogSets++;
} else {
logSystem->logRouterTags = 0;
}
logSystem->tLogs.push_back( Reference<LogSet>( new LogSet() ) );
logSystem->tLogs[0]->tLogWriteAntiQuorum = configuration.tLogWriteAntiQuorum;
@ -1605,14 +1626,17 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSystem->tLogs[1]->isLocal = true;
logSystem->tLogs[1]->locality = tagLocalitySatellite;
logSystem->tLogs[1]->startVersion = oldLogSystem->knownCommittedVersion + 1;
logSystem->expectedLogSets++;
}
if(configuration.remoteTLogReplicationFactor > 0) {
logSystem->logRouterTags = recr.tLogs.size();
logSystem->tLogs[1]->tLogLocalities.resize( recr.satelliteTLogs.size() );
for(int i = 0; i < recr.satelliteTLogs.size(); i++) {
logSystem->tLogs[1]->tLogLocalities[i] = recr.satelliteTLogs[i].locality;
}
filterLocalityDataForPolicy(logSystem->tLogs[1]->tLogPolicy, &logSystem->tLogs[1]->tLogLocalities);
logSystem->tLogs[1]->logServers.resize( recr.satelliteTLogs.size() ); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
logSystem->tLogs[1]->updateLocalitySet(logSystem->tLogs[1]->tLogLocalities);
logSystem->tLogs[1]->populateSatelliteTagLocations(logSystem->logRouterTags,oldLogSystem->logRouterTags);
logSystem->expectedLogSets++;
} else {
logSystem->logRouterTags = 0;
}
if(oldLogSystem->tLogs.size()) {
@ -1740,16 +1764,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
req.logRouterTags = logSystem->logRouterTags;
}
vector<LocalityData> satelliteLocalities;
satelliteLocalities.resize(recr.satelliteTLogs.size());
for(int i = 0; i < recr.satelliteTLogs.size(); i++) {
satelliteLocalities[i] = recr.satelliteTLogs[i].locality;
}
logSystem->tLogs[1]->tLogLocalities.resize( recr.satelliteTLogs.size() );
logSystem->tLogs[1]->logServers.resize( recr.satelliteTLogs.size() ); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
logSystem->tLogs[1]->updateLocalitySet(satelliteLocalities);
for(int i = -1; i < oldLogSystem->logRouterTags; i++) {
Tag tag = i == -1 ? txsTag : Tag(tagLocalityLogRouter, i);
locations.clear();
@ -1765,9 +1779,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
for( int i = 0; i < satelliteInitializationReplies.size(); i++ ) {
logSystem->tLogs[1]->logServers[i] = Reference<AsyncVar<OptionalInterface<TLogInterface>>>( new AsyncVar<OptionalInterface<TLogInterface>>( OptionalInterface<TLogInterface>(satelliteInitializationReplies[i].get()) ) );
logSystem->tLogs[1]->tLogLocalities[i] = recr.satelliteTLogs[i].locality;
}
filterLocalityDataForPolicy(logSystem->tLogs[1]->tLogPolicy, &logSystem->tLogs[1]->tLogLocalities);
for( int i = 0; i < logSystem->tLogs[1]->logServers.size(); i++)
recoveryComplete.push_back( transformErrors( throwErrorOr( logSystem->tLogs[1]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor( TLogRecoveryFinishedRequest(), SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ) );
@ -1788,7 +1800,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
recoveryComplete.push_back( transformErrors( throwErrorOr( logSystem->tLogs[0]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor( TLogRecoveryFinishedRequest(), SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ) );
logSystem->recoveryComplete = waitForAll(recoveryComplete);
if(configuration.remoteTLogReplicationFactor > 0) {
if(configuration.usableRegions > 1) {
logSystem->hasRemoteServers = true;
logSystem->remoteRecovery = TagPartitionedLogSystem::newRemoteEpoch(logSystem.getPtr(), oldLogSystem, fRemoteWorkers, configuration, recoveryCount, remoteLocality, allTags);
} else {
@ -2033,11 +2045,11 @@ Future<Void> ILogSystem::recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSys
return TagPartitionedLogSystem::recoverAndEndEpoch( outLogSystem, dbgid, oldState, rejoins, locality );
}
Reference<ILogSystem> ILogSystem::fromLogSystemConfig( UID const& dbgid, struct LocalityData const& locality, struct LogSystemConfig const& conf, bool excludeRemote, bool usePreviousEpochEnd ) {
Reference<ILogSystem> ILogSystem::fromLogSystemConfig( UID const& dbgid, struct LocalityData const& locality, struct LogSystemConfig const& conf, bool excludeRemote, bool usePreviousEpochEnd, Optional<PromiseStream<Future<Void>>> addActor ) {
if (conf.logSystemType == 0)
return Reference<ILogSystem>();
else if (conf.logSystemType == 2)
return TagPartitionedLogSystem::fromLogSystemConfig( dbgid, locality, conf, excludeRemote, usePreviousEpochEnd );
return TagPartitionedLogSystem::fromLogSystemConfig( dbgid, locality, conf, excludeRemote, usePreviousEpochEnd, addActor );
else
throw internal_error();
}
@ -2051,6 +2063,6 @@ Reference<ILogSystem> ILogSystem::fromOldLogSystemConfig( UID const& dbgid, stru
throw internal_error();
}
Reference<ILogSystem> ILogSystem::fromServerDBInfo( UID const& dbgid, ServerDBInfo const& dbInfo, bool usePreviousEpochEnd ) {
return fromLogSystemConfig( dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, usePreviousEpochEnd );
Reference<ILogSystem> ILogSystem::fromServerDBInfo( UID const& dbgid, ServerDBInfo const& dbInfo, bool usePreviousEpochEnd, Optional<PromiseStream<Future<Void>>> addActor ) {
return fromLogSystemConfig( dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, usePreviousEpochEnd, addActor );
}

View File

@ -20,7 +20,7 @@
# -*- mode: makefile; -*-
fdbserver_CFLAGS := $(fdbclient_CFLAGS)
fdbserver_CFLAGS := $(fdbclient_CFLAGS) -I fdbserver/workloads
fdbserver_LDFLAGS := $(fdbrpc_LDFLAGS)
fdbserver_LIBS := lib/libfdbclient.a lib/libfdbrpc.a lib/libflow.a

View File

@ -291,7 +291,7 @@ ACTOR Future<Void> newResolvers( Reference<MasterData> self, RecruitFromConfigur
}
ACTOR Future<Void> newTLogServers( Reference<MasterData> self, RecruitFromConfigurationReply recr, Reference<ILogSystem> oldLogSystem, vector<Standalone<CommitTransactionRef>>* initialConfChanges ) {
if(self->configuration.remoteTLogReplicationFactor > 0) {
if(self->configuration.usableRegions > 1) {
state Optional<Key> remoteDcId = self->remoteDcIds.size() ? self->remoteDcIds[0] : Optional<Key>();
if( !self->dcId_locality.count(recr.dcId) ) {
TraceEvent(SevWarn, "UnknownPrimaryDCID", self->dbgid).detail("PrimaryId", printable(recr.dcId));

View File

@ -294,7 +294,7 @@ public:
allHistoryCopy = allHistory;
hist = &allHistoryCopy;
}
while(hist->size() && v > hist->back().first ) {
logSystem->pop( v, hist->back().second );
hist->pop_back();
@ -453,7 +453,7 @@ public:
shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0),
logProtocol(0), counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")),
behind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false),
behind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false),
lastUpdate(now()), poppedAllAfter(std::numeric_limits<Version>::max())
{
version.initMetric(LiteralStringRef("StorageServer.Version"), counters.cc.id);
@ -815,7 +815,7 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req )
ACTOR Future<Void> watchValueQ( StorageServer* data, WatchValueRequest req ) {
state Future<Void> watch = watchValue_impl( data, req );
state double startTime = now();
loop {
double timeoutDelay = -1;
if(data->noRecentUpdates.get()) {
@ -2324,12 +2324,11 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
// If we are disk bound and durableVersion is very old, we need to block updates or we could run out of memory
// This is often referred to as the storage server e-brake (emergency brake)
state double waitStartT = 0;
while ( data->queueSize() >= SERVER_KNOBS->STORAGE_HARD_LIMIT_BYTES && data->durableVersion.get() < data->desiredOldestVersion.get() )
{
if (now() - waitStartT >= .1) {
while ( data->queueSize() >= SERVER_KNOBS->STORAGE_HARD_LIMIT_BYTES && data->durableVersion.get() < data->desiredOldestVersion.get() ) {
if (now() - waitStartT >= 1) {
TraceEvent(SevWarn, "StorageServerUpdateLag", data->thisServerID)
.detail("Version", data->version.get())
.detail("DurableVersion", data->durableVersion.get()).suppressFor(1.0);
.detail("DurableVersion", data->durableVersion.get());
waitStartT = now();
}
@ -2343,7 +2342,7 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
state Reference<ILogSystem::IPeekCursor> cursor = data->logCursor;
//TraceEvent("SSUpdatePeeking", data->thisServerID).detail("MyVer", data->version.get()).detail("Epoch", data->updateEpoch).detail("Seq", data->updateSequence);
loop {
Void _ = wait( cursor->getMore() );
if(!cursor->isExhausted()) {
@ -3291,7 +3290,7 @@ ACTOR Future<Void> replaceInterface( StorageServer* self, StorageServerInterface
tr.addReadConflictRange(singleKeyRange(tagLocalityListKeyFor(ssi.locality.dcId())));
tr.set(serverListKeyFor(ssi.id()), serverListValue(ssi));
if(rep.newLocality) {
tr.addReadConflictRange(tagLocalityListKeys);
tr.set( tagLocalityListKeyFor(ssi.locality.dcId()), tagLocalityListValue(rep.newTag.get().locality) );

View File

@ -1076,8 +1076,8 @@ struct ConsistencyCheckWorkload : TestWorkload
if(( configuration.regions.size() == 0 && missingStorage.size()) ||
(configuration.regions.size() == 1 && missingStorage.count(configuration.regions[0].dcId)) ||
(configuration.regions.size() == 2 && configuration.remoteTLogReplicationFactor == 0 && missingStorage.count(configuration.regions[0].dcId) && missingStorage.count(configuration.regions[1].dcId)) ||
(configuration.regions.size() == 2 && configuration.remoteTLogReplicationFactor > 0 && (missingStorage.count(configuration.regions[0].dcId) || missingStorage.count(configuration.regions[1].dcId)))) {
(configuration.regions.size() == 2 && configuration.usableRegions == 1 && missingStorage.count(configuration.regions[0].dcId) && missingStorage.count(configuration.regions[1].dcId)) ||
(configuration.regions.size() == 2 && configuration.usableRegions > 1 && (missingStorage.count(configuration.regions[0].dcId) || missingStorage.count(configuration.regions[1].dcId)))) {
self->testFailure("No storage server on worker");
return false;
}

View File

@ -134,9 +134,7 @@ struct MoveKeysWorkload : TestWorkload {
try {
state Promise<Void> signal;
Void _ = wait( moveKeys( cx, keys, destinationTeamIDs, destinationTeamIDs, lock,
self->configuration.durableStorageQuorum,
signal, &fl1, &fl2, invalidVersion, false, relocateShardInterval.pairID ) );
Void _ = wait( moveKeys( cx, keys, destinationTeamIDs, destinationTeamIDs, lock, signal, &fl1, &fl2, invalidVersion, false, relocateShardInterval.pairID ) );
TraceEvent(relocateShardInterval.end()).detail("Result","Success");
return Void();
} catch (Error& e) {
@ -175,7 +173,7 @@ struct MoveKeysWorkload : TestWorkload {
ASSERT( self->configuration.storageTeamSize > 0 );
if(self->configuration.remoteTLogReplicationFactor > 0) { //FIXME: add support for generating random teams across DCs
if(self->configuration.usableRegions > 1) { //FIXME: add support for generating random teams across DCs
return Void();
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long