Merge pull request #3273 from apple/release-6.3

Merge release-6.3 into master
This commit is contained in:
A.J. Beamon 2020-05-29 16:58:36 -07:00 committed by GitHub
commit 414206015d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 50 additions and 13 deletions

View File

@ -11,6 +11,7 @@ Fixes
* HTTPS requests made by backup could hang indefinitely. `(PR #3027) <https://github.com/apple/foundationdb/pull/3027>`_
* ``fdbrestore`` prefix options required exactly a single hyphen instead of the standard two. `(PR #3056) <https://github.com/apple/foundationdb/pull/3056>`_
* Commits could stall on a newly elected proxy because of inaccurate compute estimates. `(PR #3123) <https://github.com/apple/foundationdb/pull/3123>`_
* A transaction class process with a bad disk could be repeatedly recruited as a transaction log. `(PR #3268) <https://github.com/apple/foundationdb/pull/3268>`_
Features
--------

View File

@ -167,7 +167,11 @@ class DLDatabase : public IDatabase, ThreadSafeReferenceCounted<DLDatabase> {
public:
DLDatabase(Reference<FdbCApi> api, FdbCApi::FDBDatabase *db) : api(api), db(db), ready(Void()) {}
DLDatabase(Reference<FdbCApi> api, ThreadFuture<FdbCApi::FDBDatabase*> dbFuture);
~DLDatabase() { api->databaseDestroy(db); }
~DLDatabase() {
if (db) {
api->databaseDestroy(db);
}
}
ThreadFuture<Void> onReady();

View File

@ -99,7 +99,6 @@ int mostUsedZoneCount(Reference<LocalitySet>& logServerSet, std::vector<Locality
bool findBestPolicySetSimple(int targetUniqueValueCount, Reference<LocalitySet>& logServerSet, std::vector<LocalityEntry>& bestSet,
int desired) {
auto& mutableEntries = logServerSet->getMutableEntries();
deterministicRandom()->randomShuffle(mutableEntries);
// First make sure the current localitySet is able to fulfuill the policy
AttribKey indexKey = logServerSet->keyIndex("zoneid");
int uniqueValueCount = logServerSet->getKeyValueArray()[indexKey._id].size();
@ -118,18 +117,24 @@ bool findBestPolicySetSimple(int targetUniqueValueCount, Reference<LocalitySet>&
}
ASSERT_WE_THINK(uniqueValueCount == entries.size());
std::vector<std::vector<int>> randomizedEntries;
randomizedEntries.resize(entries.size());
for(auto it : entries) {
randomizedEntries.push_back(it.second);
}
deterministicRandom()->randomShuffle(randomizedEntries);
desired = std::max(desired, targetUniqueValueCount);
auto it = entries.begin();
auto it = randomizedEntries.begin();
while (bestSet.size() < desired) {
if(it->second.size()) {
bestSet.push_back(mutableEntries[it->second.back()]);
it->second.pop_back();
if(it->size()) {
bestSet.push_back(mutableEntries[it->back()]);
it->pop_back();
}
++it;
if(it == entries.end()) {
it = entries.begin();
if(it == randomizedEntries.end()) {
it = randomizedEntries.begin();
}
}

View File

@ -85,6 +85,10 @@ struct LogRouterData {
bool allowPops;
LogSet logSet;
bool foundEpochEnd;
double waitForVersionTime = 0;
double maxWaitForVersionTime = 0;
double getMoreTime = 0;
double maxGetMoreTime = 0;
struct PeekTrackerData {
std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
@ -94,6 +98,7 @@ struct LogRouterData {
std::map<UID, PeekTrackerData> peekTracker;
CounterCollection cc;
Counter getMoreCount, getMoreBlockedCount;
Future<Void> logger;
Reference<EventCacheHolder> eventCacheHolder;
@ -116,7 +121,7 @@ struct LogRouterData {
LogRouterData(UID dbgid, const InitializeLogRouterRequest& req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()),
version(req.startVersion-1), minPopped(0), startVersion(req.startVersion), allowPops(false), minKnownCommittedVersion(0), poppedVersion(0), foundEpochEnd(false),
cc("LogRouter", dbgid.toString()) {
cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc), getMoreBlockedCount("GetMoreBlockedCount", cc) {
//setup just enough of a logSet to be able to call getPushLocations
logSet.logServers.resize(req.tLogLocalities.size());
logSet.tLogPolicy = req.tLogPolicy;
@ -133,11 +138,16 @@ struct LogRouterData {
eventCacheHolder = Reference<EventCacheHolder>( new EventCacheHolder(dbgid.shortString() + ".PeekLocation") );
specialCounter(cc, "Version", [this](){return this->version.get(); });
specialCounter(cc, "MinPopped", [this](){return this->minPopped.get(); });
specialCounter(cc, "Version", [this](){ return this->version.get(); });
specialCounter(cc, "MinPopped", [this](){ return this->minPopped.get(); });
specialCounter(cc, "FetchedVersions", [this](){ return std::max<Version>(0, std::min<Version>(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, this->version.get() - this->minPopped.get())); });
specialCounter(cc, "MinKnownCommittedVersion", [this](){ return this->minKnownCommittedVersion; });
specialCounter(cc, "PoppedVersion", [this](){ return this->poppedVersion; });
specialCounter(cc, "FoundEpochEnd", [this](){ return this->foundEpochEnd; });
specialCounter(cc, "WaitForVersionMS", [this](){ double val = this->waitForVersionTime; this->waitForVersionTime = 0; return 1000*val; });
specialCounter(cc, "WaitForVersionMaxMS", [this](){ double val = this->maxWaitForVersionTime; this->maxWaitForVersionTime = 0; return 1000*val; });
specialCounter(cc, "GetMoreMS", [this](){ double val = this->getMoreTime; this->getMoreTime = 0; return 1000*val; });
specialCounter(cc, "GetMoreMaxMS", [this](){ double val = this->maxGetMoreTime; this->maxGetMoreTime = 0; return 1000*val; });
logger = traceCounters("LogRouterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "LogRouterMetrics");
}
};
@ -195,11 +205,14 @@ void commitMessages( LogRouterData* self, Version version, const std::vector<Tag
ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
// The only time the log router should allow a gap in versions larger than MAX_READ_TRANSACTION_LIFE_VERSIONS is when processing epoch end.
// Since one set of log routers is created per generation of transaction logs, the gap caused by epoch end will be within MAX_VERSIONS_IN_FLIGHT of the log routers start version.
state double startTime = now();
if(self->version.get() < self->startVersion) {
if(ver > self->startVersion) {
self->version.set(self->startVersion);
wait(self->minPopped.whenAtLeast(self->version.get()));
}
self->waitForVersionTime += now() - startTime;
self->maxWaitForVersionTime = std::max(self->maxWaitForVersionTime, now() - startTime);
return Void();
}
if(!self->foundEpochEnd) {
@ -217,6 +230,8 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
if(ver >= self->startVersion + SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT) {
self->foundEpochEnd = true;
}
self->waitForVersionTime += now() - startTime;
self->maxWaitForVersionTime = std::max(self->maxWaitForVersionTime, now() - startTime);
return Void();
}
@ -229,8 +244,19 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
loop {
loop {
Future<Void> getMoreF = Never();
if(r) {
getMoreF = r->getMore(TaskPriority::TLogCommit);
++self->getMoreCount;
if(!getMoreF.isReady()) {
++self->getMoreBlockedCount;
}
}
state double startTime = now();
choose {
when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) {
when(wait( getMoreF ) ) {
self->getMoreTime += now() - startTime;
self->maxGetMoreTime = std::max(self->maxGetMoreTime, now() - startTime);
break;
}
when( wait( dbInfoChange ) ) { //FIXME: does this actually happen?

View File

@ -212,7 +212,8 @@ ACTOR Future<Void> workerHandleErrors(FutureStream<ErrorInfo> errors) {
endRole(err.role, err.id, "Error", ok, err.error);
if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout) throw err.error;
if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout || (err.role == Role::SHARED_TRANSACTION_LOG && err.error.code() == error_code_io_error )) throw err.error;
}
}
}