Merge branch 'release-5.1' of github.com:apple/foundationdb into release-5.1

This commit is contained in:
Evan Tschannen 2018-03-09 09:56:09 -08:00
commit ae7d8e90b2
6 changed files with 112 additions and 28 deletions

View File

@ -735,9 +735,8 @@ public:
// These should not happen // These should not happen
if(e.code() == error_code_key_not_found) if(e.code() == error_code_key_not_found)
t.backtrace(); t.backtrace();
std::string msg = format("ERROR: %s %s", e.what(), details.c_str());
return updateErrorInfo(cx, e, msg); return updateErrorInfo(cx, e, details);
} }
}; };
#endif #endif

View File

@ -35,6 +35,49 @@
#include <boost/algorithm/string/classification.hpp> #include <boost/algorithm/string/classification.hpp>
#include <algorithm> #include <algorithm>
static std::string boolToYesOrNo(bool val) { return val ? std::string("Yes") : std::string("No"); }
static std::string versionToString(Optional<Version> version) {
if (version.present())
return std::to_string(version.get());
else
return "N/A";
}
static std::string timeStampToString(Optional<int64_t> ts) {
if (!ts.present())
return "N/A";
time_t curTs = ts.get();
char buffer[128];
struct tm* timeinfo;
timeinfo = localtime(&curTs);
strftime(buffer, 128, "%D %T", timeinfo);
return std::string(buffer);
}
static Future<Optional<int64_t>> getTimestampFromVersion(Optional<Version> ver, Reference<ReadYourWritesTransaction> tr) {
if (!ver.present())
return Optional<int64_t>();
return timeKeeperEpochsFromVersion(ver.get(), tr);
}
// Time format :
// <= 59 seconds
// <= 59.99 minutes
// <= 23.99 hours
// N.NN days
std::string secondsToTimeFormat(int64_t seconds) {
if (seconds >= 86400)
return format("%.2f day(s)", seconds / 86400.0);
else if (seconds >= 3600)
return format("%.2f hour(s)", seconds / 3600.0);
else if (seconds >= 60)
return format("%.2f minute(s)", seconds / 60.0);
else
return format("%ld second(s)", seconds);
}
const Key FileBackupAgent::keyLastRestorable = LiteralStringRef("last_restorable"); const Key FileBackupAgent::keyLastRestorable = LiteralStringRef("last_restorable");
// For convenience // For convenience
@ -177,9 +220,8 @@ public:
// These should not happen // These should not happen
if(e.code() == error_code_key_not_found) if(e.code() == error_code_key_not_found)
t.backtrace(); t.backtrace();
std::string msg = format("ERROR: %s (%s)", details.c_str(), e.what());
return updateErrorInfo(cx, e, msg); return updateErrorInfo(cx, e, details);
} }
Key mutationLogPrefix() { Key mutationLogPrefix() {
@ -790,7 +832,7 @@ namespace fileBackup {
// servers to catch and log to the appropriate config any error that execute/finish didn't catch and log. // servers to catch and log to the appropriate config any error that execute/finish didn't catch and log.
struct RestoreTaskFuncBase : TaskFuncBase { struct RestoreTaskFuncBase : TaskFuncBase {
virtual Future<Void> handleError(Database cx, Reference<Task> task, Error const &error) { virtual Future<Void> handleError(Database cx, Reference<Task> task, Error const &error) {
return RestoreConfig(task).logError(cx, error, format("Task '%s' UID '%s' %s failed", task->params[Task::reservedTaskParamKeyType].printable().c_str(), task->key.printable().c_str(), toString(task).c_str())); return RestoreConfig(task).logError(cx, error, format("'%s' on '%s'", error.what(), task->params[Task::reservedTaskParamKeyType].printable().c_str()));
} }
virtual std::string toString(Reference<Task> task) virtual std::string toString(Reference<Task> task)
{ {
@ -800,7 +842,7 @@ namespace fileBackup {
struct BackupTaskFuncBase : TaskFuncBase { struct BackupTaskFuncBase : TaskFuncBase {
virtual Future<Void> handleError(Database cx, Reference<Task> task, Error const &error) { virtual Future<Void> handleError(Database cx, Reference<Task> task, Error const &error) {
return BackupConfig(task).logError(cx, error, format("Task '%s' UID '%s' %s failed", task->params[Task::reservedTaskParamKeyType].printable().c_str(), task->key.printable().c_str(), toString(task).c_str())); return BackupConfig(task).logError(cx, error, format("'%s' on '%s'", error.what(), task->params[Task::reservedTaskParamKeyType].printable().c_str()));
} }
virtual std::string toString(Reference<Task> task) virtual std::string toString(Reference<Task> task)
{ {
@ -3538,17 +3580,50 @@ public:
state int64_t snapshotInterval; state int64_t snapshotInterval;
state Version snapshotBeginVersion; state Version snapshotBeginVersion;
state Version snapshotTargetEndVersion; state Version snapshotTargetEndVersion;
state Optional<Version> latestSnapshotEndVersion;
state Optional<Version> latestLogEndVersion;
state Optional<int64_t> logBytesWritten;
state Optional<int64_t> rangeBytesWritten;
state Optional<int64_t> latestSnapshotEndVersionTimestamp;
state Optional<int64_t> latestLogEndVersionTimestamp;
state Optional<int64_t> snapshotBeginVersionTimestamp;
state Optional<int64_t> snapshotTargetEndVersionTimestamp;
state bool stopWhenDone;
Void _ = wait( store(config.snapshotBeginVersion().getOrThrow(tr), snapshotBeginVersion) Void _ = wait( store(config.snapshotBeginVersion().getOrThrow(tr), snapshotBeginVersion)
&& store(config.snapshotTargetEndVersion().getOrThrow(tr), snapshotTargetEndVersion) && store(config.snapshotTargetEndVersion().getOrThrow(tr), snapshotTargetEndVersion)
&& store(config.snapshotIntervalSeconds().getOrThrow(tr), snapshotInterval) && store(config.snapshotIntervalSeconds().getOrThrow(tr), snapshotInterval)
); && store(config.logBytesWritten().get(tr), logBytesWritten)
&& store(config.rangeBytesWritten().get(tr), rangeBytesWritten)
&& store(config.latestLogEndVersion().get(tr), latestLogEndVersion)
&& store(config.latestSnapshotEndVersion().get(tr), latestSnapshotEndVersion)
&& store(config.stopWhenDone().getOrThrow(tr), stopWhenDone)
);
Void _ = wait( store(getTimestampFromVersion(latestSnapshotEndVersion, tr), latestSnapshotEndVersionTimestamp)
&& store(getTimestampFromVersion(latestLogEndVersion, tr), latestLogEndVersionTimestamp)
&& store(timeKeeperEpochsFromVersion(snapshotBeginVersion, tr), snapshotBeginVersionTimestamp)
&& store(timeKeeperEpochsFromVersion(snapshotTargetEndVersion, tr), snapshotTargetEndVersionTimestamp)
);
statusText += format("Snapshot interval is %lld seconds. ", snapshotInterval); statusText += format("Snapshot interval is %lld seconds. ", snapshotInterval);
if(backupState == BackupAgentBase::STATE_DIFFERENTIAL) if(backupState == BackupAgentBase::STATE_DIFFERENTIAL)
statusText += format("Current snapshot progress target is %3.2f%% (>100%% means the snapshot is supposed to be done)\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ; statusText += format("Current snapshot progress target is %3.2f%% (>100%% means the snapshot is supposed to be done)\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ;
else else
statusText += "The initial snapshot is still running.\n"; statusText += "The initial snapshot is still running.\n";
statusText += format("\nDetails:\n LogBytes written - %ld\n RangeBytes written - %ld\n "
"Last complete log version and timestamp - %s, %s\n "
"Last complete snapshot version and timestamp - %s, %s\n "
"Current Snapshot start version and timestamp - %s, %s\n "
"Expected snapshot end version and timestamp - %s, %s\n "
"Backup supposed to stop at next snapshot completion - %s\n",
logBytesWritten.orDefault(0), rangeBytesWritten.orDefault(0),
versionToString(latestLogEndVersion).c_str(), timeStampToString(latestLogEndVersionTimestamp).c_str(),
versionToString(latestSnapshotEndVersion).c_str(), timeStampToString(latestSnapshotEndVersionTimestamp).c_str(),
versionToString(snapshotBeginVersion).c_str(), timeStampToString(snapshotBeginVersionTimestamp).c_str(),
versionToString(snapshotTargetEndVersion).c_str(), timeStampToString(snapshotTargetEndVersionTimestamp).c_str(),
boolToYesOrNo(stopWhenDone).c_str());
} }
// Append the errors, if requested // Append the errors, if requested
@ -3559,8 +3634,7 @@ public:
for(auto &e : errors) { for(auto &e : errors) {
Version v = e.second.second; Version v = e.second.second;
std::string msg = format("%s (%lld seconds ago)\n", e.second.first.c_str(), std::string msg = format("%s ago : %s\n", secondsToTimeFormat((recentReadVersion - v) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND).c_str(), e.second.first.c_str());
(recentReadVersion - v) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
// If error version is at or more recent than the latest restorable version then it could be inhibiting progress // If error version is at or more recent than the latest restorable version then it could be inhibiting progress
if(v >= latestRestorableVersion.orDefault(0)) { if(v >= latestRestorableVersion.orDefault(0)) {
@ -3571,10 +3645,16 @@ public:
} }
} }
if(!recentErrors.empty()) if (!recentErrors.empty()) {
statusText += "Errors possibly preventing progress:\n" + recentErrors; if (latestRestorableVersion.present())
statusText += format("Recent Errors (since latest restorable point %s ago)\n",
secondsToTimeFormat((recentReadVersion - latestRestorableVersion.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND).c_str())
+ recentErrors;
else
statusText += "Recent Errors (since initialization)\n" + recentErrors;
}
if(!pastErrors.empty()) if(!pastErrors.empty())
statusText += "Older errors:\n" + pastErrors; statusText += "Older Errors\n" + pastErrors;
} }
} }

View File

@ -885,12 +885,12 @@ Reference<ProxyInfo> DatabaseContext::getMasterProxies() {
} }
//Actor which will wait until the ProxyInfo returned by the DatabaseContext cx is not NULL //Actor which will wait until the ProxyInfo returned by the DatabaseContext cx is not NULL
ACTOR Future<Reference<ProxyInfo>> getMasterProxiesFuture(DatabaseContext *cx) { ACTOR Future<Reference<ProxyInfo>> getMasterProxiesFuture(DatabaseContext *cx) {
loop{ loop{
Reference<ProxyInfo> proxies = cx->getMasterProxies(); Reference<ProxyInfo> proxies = cx->getMasterProxies();
if (proxies) if (proxies)
return proxies; return proxies;
Void _ = wait( cx->onMasterProxiesChanged() ); Void _ = wait( cx->onMasterProxiesChanged() );
} }
} }

View File

@ -343,27 +343,26 @@ ACTOR Future<Void> runProfiler(ProfilerRequest req) {
} }
} }
ACTOR Future<Void> storageServerRollbackRebooter( Future<Void> prevStorageServer, KeyValueStoreType storeType, std::string filename, StorageServerInterface ssi, Reference<AsyncVar<ServerDBInfo>> db, std::string folder, ActorCollection* filesClosed, int64_t memoryLimit ) { ACTOR Future<Void> storageServerRollbackRebooter( Future<Void> prevStorageServer, KeyValueStoreType storeType, std::string filename, UID id, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> db, std::string folder, ActorCollection* filesClosed, int64_t memoryLimit ) {
loop { loop {
ErrorOr<Void> e = wait( errorOr( prevStorageServer) ); ErrorOr<Void> e = wait( errorOr( prevStorageServer) );
if (!e.isError()) return Void(); if (!e.isError()) return Void();
else if (e.getError().code() != error_code_please_reboot) throw e.getError(); else if (e.getError().code() != error_code_please_reboot) throw e.getError();
TraceEvent("StorageServerRequestedReboot", ssi.id()); TraceEvent("StorageServerRequestedReboot", id);
//if (BUGGIFY) Void _ = wait(delay(1.0)); // This does the same thing as zombie() //if (BUGGIFY) Void _ = wait(delay(1.0)); // This does the same thing as zombie()
// We need a new interface, since the new storageServer will do replaceInterface(). And we need to destroy // We need a new interface, since the new storageServer will do replaceInterface(). And we need to destroy
// the old one so the failure detector will know it is gone. // the old one so the failure detector will know it is gone.
StorageServerInterface ssi_new; StorageServerInterface ssi;
ssi_new.uniqueID = ssi.uniqueID; ssi.uniqueID = id;
ssi_new.locality = ssi.locality; ssi.locality = locality;
ssi = ssi_new;
ssi.initEndpoints(); ssi.initEndpoints();
auto* kv = openKVStore( storeType, filename, ssi.uniqueID, memoryLimit ); auto* kv = openKVStore( storeType, filename, ssi.uniqueID, memoryLimit );
Future<Void> kvClosed = kv->onClosed(); Future<Void> kvClosed = kv->onClosed();
filesClosed->add( kvClosed ); filesClosed->add( kvClosed );
prevStorageServer = storageServer( kv, ssi, db, folder, Promise<Void>() ); prevStorageServer = storageServer( kv, ssi, db, folder, Promise<Void>() );
prevStorageServer = handleIOErrors( prevStorageServer, kv, ssi.id(), kvClosed ); prevStorageServer = handleIOErrors( prevStorageServer, kv, id, kvClosed );
} }
} }
@ -610,7 +609,7 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
Future<Void> f = storageServer( kv, recruited, dbInfo, folder, recovery ); Future<Void> f = storageServer( kv, recruited, dbInfo, folder, recovery );
recoveries.push_back(recovery.getFuture()); recoveries.push_back(recovery.getFuture());
f = handleIOErrors( f, kv, s.storeID, kvClosed ); f = handleIOErrors( f, kv, s.storeID, kvClosed );
f = storageServerRollbackRebooter( f, s.storeType, s.filename, recruited, dbInfo, folder, &filesClosed, memoryLimit ); f = storageServerRollbackRebooter( f, s.storeType, s.filename, recruited.id(), recruited.locality, dbInfo, folder, &filesClosed, memoryLimit );
errorForwarders.add( forwardError( errors, "StorageServer", recruited.id(), f ) ); errorForwarders.add( forwardError( errors, "StorageServer", recruited.id(), f ) );
} else if( s.storedComponent == DiskStore::TLogData ) { } else if( s.storedComponent == DiskStore::TLogData ) {
IKeyValueStore* kv = openKVStore( s.storeType, s.filename, s.storeID, memoryLimit, validateDataFiles ); IKeyValueStore* kv = openKVStore( s.storeType, s.filename, s.storeID, memoryLimit, validateDataFiles );
@ -763,7 +762,7 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
Future<Void> s = storageServer( data, recruited, req.seedTag, storageReady, dbInfo, folder ); Future<Void> s = storageServer( data, recruited, req.seedTag, storageReady, dbInfo, folder );
s = handleIOErrors(s, data, recruited.id(), kvClosed); s = handleIOErrors(s, data, recruited.id(), kvClosed);
s = storageCache.removeOnReady( req.reqId, s ); s = storageCache.removeOnReady( req.reqId, s );
s = storageServerRollbackRebooter( s, req.storeType, filename, recruited, dbInfo, folder, &filesClosed, memoryLimit ); s = storageServerRollbackRebooter( s, req.storeType, filename, recruited.id(), recruited.locality, dbInfo, folder, &filesClosed, memoryLimit );
errorForwarders.add( forwardError( errors, "StorageServer", recruited.id(), s ) ); errorForwarders.add( forwardError( errors, "StorageServer", recruited.id(), s ) );
} else } else
forwardPromise( req.reply, storageCache.get( req.reqId ) ); forwardPromise( req.reply, storageCache.get( req.reqId ) );

View File

@ -682,6 +682,7 @@ struct ConsistencyCheckWorkload : TestWorkload
state Key lastSampleKey; state Key lastSampleKey;
state Key lastStartSampleKey; state Key lastStartSampleKey;
state int64_t totalReadAmount = 0; state int64_t totalReadAmount = 0;
state int64_t rangeBytes = 0;
state KeySelector begin = firstGreaterOrEqual(range.begin); state KeySelector begin = firstGreaterOrEqual(range.begin);
@ -725,6 +726,9 @@ struct ConsistencyCheckWorkload : TestWorkload
{ {
state GetKeyValuesReply current = rangeResult.get(); state GetKeyValuesReply current = rangeResult.get();
totalReadAmount += current.data.expectedSize(); totalReadAmount += current.data.expectedSize();
if (j == 0) {
rangeBytes += current.data.expectedSize();
}
//If we haven't encountered a valid storage server yet, then mark this as the baseline to compare against //If we haven't encountered a valid storage server yet, then mark this as the baseline to compare against
if(firstValidServer == -1) if(firstValidServer == -1)
firstValidServer = j; firstValidServer = j;
@ -911,6 +915,8 @@ struct ConsistencyCheckWorkload : TestWorkload
} }
} }
TraceEvent("ConsistencyCheck_CheckedRange").detail("Range", printable(range)).detail("Bytes", rangeBytes);
canSplit = canSplit && sampledBytes - splitBytes >= shardBounds.min.bytes && sampledBytes > splitBytes; canSplit = canSplit && sampledBytes - splitBytes >= shardBounds.min.bytes && sampledBytes > splitBytes;
//Update the size of all storage servers containing this shard //Update the size of all storage servers containing this shard

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'> <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)' <Product Name='$(var.Title)'
Id='{61C46988-7589-4B8A-9BB9-D850FD5B8B05}' Id='{3A9AB74C-F787-4A85-9D50-C0E18E2A1CA7}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}' UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)' Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)' Manufacturer='$(var.Manufacturer)'