Bug fix in blob store listing when it requires multiple serial requests Added more trace events to FileBackup and BlobStoreEndpoint with suppression and added suppression to existing trace events.

This commit is contained in:
Stephen Atherton 2017-12-22 17:08:25 -08:00
parent f2524ffd33
commit f324afc13f
2 changed files with 65 additions and 19 deletions

View File

@ -851,6 +851,13 @@ namespace fileBackup {
state Key beginKey = Params.beginKey().get(task);
state Key endKey = Params.endKey().get(task);
TraceEvent("FileBackupRangeStart")
.detail("BackupUID", BackupConfig(task).getUid())
.detail("BeginKey", Params.beginKey().get(task).printable())
.detail("EndKey", Params.endKey().get(task).printable())
.detail("TaskKey", task->key.printable())
.suppressFor(60, true);
// When a key range task saves the last chunk of progress and then the executor dies, when the task continues
// its beginKey and endKey will be equal but there is no work to be done.
if(beginKey == endKey)
@ -965,7 +972,8 @@ namespace fileBackup {
.detail("BeginKey", Params.beginKey().get(task).printable())
.detail("EndKey", Params.endKey().get(task).printable())
.detail("SliceBeginKey", nextKey.printable())
.detail("SliceEndKey", keys[idx].printable());
.detail("SliceEndKey", keys[idx].printable())
.suppressFor(60, true);
}
nextKey = keys[idx];
}
@ -991,6 +999,14 @@ namespace fileBackup {
}
Void _ = wait(taskBucket->finish(tr, task));
TraceEvent("FileBackupRangeFinish")
.detail("BackupUID", BackupConfig(task).getUid())
.detail("BeginKey", Params.beginKey().get(task).printable())
.detail("EndKey", Params.endKey().get(task).printable())
.detail("TaskKey", task->key.printable())
.suppressFor(60, true);
return Void();
}
@ -1202,7 +1218,7 @@ namespace fileBackup {
state int countAllShards = countShardsDone + countShardsNotDone;
if(countShardsNotDone == 0) {
TraceEvent("FileBackupSnapshotDispatchTaskFinished")
TraceEvent("FileBackupSnapshotDispatchFinished")
.detail("BackupUID", config.getUid())
.detail("AllShards", countAllShards)
.detail("ShardsDone", countShardsDone)
@ -1228,7 +1244,7 @@ namespace fileBackup {
state int countExpectedShardsDone = std::min<int>(countAllShards, countAllShards * timeElapsed);
state int countShardsToDispatch = std::max<int>(0, countExpectedShardsDone - countShardsDone);
TraceEvent("FileBackupSnapshotDispatchTask1")
TraceEvent("FileBackupSnapshotDispatchStats")
.detail("BackupUID", config.getUid())
.detail("AllShards", countAllShards)
.detail("ShardsDone", countShardsDone)
@ -1362,7 +1378,7 @@ namespace fileBackup {
}
if(countShardsNotDone == 0) {
TraceEvent("FileBackupSnapshotDispatchTaskFinished")
TraceEvent("FileBackupSnapshotDispatchFinished")
.detail("BackupUID", config.getUid())
.detail("AllShards", countAllShards)
.detail("ShardsDone", countShardsDone)
@ -1542,7 +1558,8 @@ namespace fileBackup {
.detail("BackupUID", config.getUid())
.detail("Size", outFile->size())
.detail("BeginVersion", beginVersion)
.detail("EndVersion", endVersion);
.detail("EndVersion", endVersion)
.suppressFor(60, true);
Params.fileSize().set(task, outFile->size());
@ -1654,6 +1671,12 @@ namespace fileBackup {
// If stopWhenDone is set and there is a restorable version, set the done future and do not create further tasks.
if(stopWhenDone && restorableVersion.present()) {
Void _ = wait(onDone->set(tr, taskBucket) && taskBucket->finish(tr, task));
TraceEvent("FileBackupLogsDispatchDone")
.detail("BackupUID", config.getUid())
.detail("BeginVersion", beginVersion)
.detail("RestorableVersion", restorableVersion.orDefault(-1));
return Void();
}
@ -1670,6 +1693,12 @@ namespace fileBackup {
Key _ = wait(BackupLogsDispatchTask::addTask(tr, taskBucket, task, endVersion, TaskCompletionKey::signal(onDone), logDispatchBatchFuture));
Void _ = wait(taskBucket->finish(tr, task));
TraceEvent("FileBackupLogsDispatchContinuing")
.detail("BackupUID", config.getUid())
.detail("BeginVersion", beginVersion)
.detail("EndVersion", endVersion).suppressFor(60, true);
return Void();
}
@ -1715,6 +1744,9 @@ namespace fileBackup {
backup.stateEnum().set(tr, EBackupState::STATE_COMPLETED);
Void _ = wait(taskBucket->finish(tr, task));
TraceEvent("FileBackupFinished").detail("BackupUID", uid);
return Void();
}
@ -2056,7 +2088,8 @@ namespace fileBackup {
.detail("FileSize", rangeFile.fileSize)
.detail("ReadOffset", readOffset)
.detail("ReadLen", readLen)
.detail("TaskInstance", (uint64_t)this);
.detail("TaskInstance", (uint64_t)this)
.suppressFor(60, true);
state Reference<ReadYourWritesTransaction> tr( new ReadYourWritesTransaction(cx) );
state Future<Reference<IBackupContainer>> bc;
@ -2180,7 +2213,8 @@ namespace fileBackup {
.detail("DataSize", data.size())
.detail("Bytes", txBytes)
.detail("OriginalFileRange", printable(originalFileRange))
.detail("TaskInstance", (uint64_t)this);
.detail("TaskInstance", (uint64_t)this)
.suppressFor(60, true);
// Commit succeeded, so advance starting point
start = i;
@ -2284,8 +2318,9 @@ namespace fileBackup {
.detail("FileSize", logFile.fileSize)
.detail("ReadOffset", readOffset)
.detail("ReadLen", readLen)
.detail("TaskInstance", (uint64_t)this);
.detail("TaskInstance", (uint64_t)this)
.suppressFor(60, true);
state Reference<ReadYourWritesTransaction> tr( new ReadYourWritesTransaction(cx) );
state Reference<IBackupContainer> bc;
@ -2356,7 +2391,8 @@ namespace fileBackup {
.detail("EndIndex", i)
.detail("DataSize", data.size())
.detail("Bytes", txBytes)
.detail("TaskInstance", (uint64_t)this);
.detail("TaskInstance", (uint64_t)this)
.suppressFor(60, true);
// Commit succeeded, so advance starting point
start = i;
@ -2619,7 +2655,11 @@ namespace fileBackup {
beginFile = beginFile + '\x00';
beginBlock = 0;
//TraceEvent("FileRestoreDispatchedFile").detail("RestoreUID", restore.getUid()).detail("FileName", fi.filename).detail("TaskInstance", (uint64_t)this);
TraceEvent("FileRestoreDispatchedFile")
.detail("RestoreUID", restore.getUid())
.detail("FileName", f.fileName)
.detail("TaskInstance", (uint64_t)this)
.suppressFor(60, true);
}
// If no blocks were dispatched then the next dispatch task should run now and be joined with the allPartsDone future

View File

@ -164,7 +164,7 @@ Reference<BlobStoreEndpoint> BlobStoreEndpoint::fromString(std::string const &ur
} catch(std::string &err) {
if(error != nullptr)
*error = err;
TraceEvent(SevWarnAlways, "BlobStoreEndpointBadURL").detail("Description", err).detail("Format", getURLFormat()).detail("URL", url);
TraceEvent(SevWarnAlways, "BlobStoreEndpointBadURL").detail("Description", err).detail("Format", getURLFormat()).detail("URL", url).suppressFor(60, true);
throw backup_invalid_url();
}
}
@ -280,7 +280,7 @@ ACTOR Future<json_spirit::mObject> tryReadJSONFile(std::string path) {
ASSERT(r == size);
content = buf.toString();
} catch(Error &e) {
TraceEvent(SevWarn, "BlobCredentialFileError").detail("File", path).error(e);
TraceEvent(SevWarn, "BlobCredentialFileError").detail("File", path).error(e).suppressFor(60, true);
return json_spirit::mObject();
}
@ -290,9 +290,9 @@ ACTOR Future<json_spirit::mObject> tryReadJSONFile(std::string path) {
if(json.type() == json_spirit::obj_type)
return json.get_obj();
else
TraceEvent(SevWarn, "BlobCredentialFileNotJSONObject").detail("File", path);
TraceEvent(SevWarn, "BlobCredentialFileNotJSONObject").detail("File", path).suppressFor(60, true);
} catch(Error &e) {
TraceEvent(SevWarn, "BlobCredentialFileParseFailed").detail("File", path).error(e);
TraceEvent(SevWarn, "BlobCredentialFileParseFailed").detail("File", path).error(e).suppressFor(60, true);
}
return json_spirit::mObject();
@ -344,7 +344,7 @@ ACTOR Future<BlobStoreEndpoint::ReusableConnection> connect_impl(Reference<BlobS
TraceEvent("BlobStoreEndpointReusingConnected")
.detail("RemoteEndpoint", rconn.conn->getPeerAddress())
.detail("ExpiresIn", rconn.expirationTime - now())
.suppressFor(5, true);
.suppressFor(60, true);
return rconn;
}
}
@ -354,7 +354,7 @@ ACTOR Future<BlobStoreEndpoint::ReusableConnection> connect_impl(Reference<BlobS
TraceEvent("BlobStoreEndpointNewConnection")
.detail("RemoteEndpoint", conn->getPeerAddress())
.detail("ExpiresIn", b->knobs.max_connection_life)
.suppressFor(5, true);
.suppressFor(60, true);
if(b->lookupSecret)
Void _ = wait(b->updateSecret());
@ -463,7 +463,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<BlobStoreEndpoi
event.detail("Verb", verb)
.detail("Resource", resource)
.detail("ThisTry", thisTry)
.suppressFor(5, true);
.suppressFor(15, true);
// We will wait delay seconds before the next retry, start with nextRetryDelay.
double delay = nextRetryDelay;
@ -567,7 +567,6 @@ ACTOR Future<Void> listBucketStream_impl(Reference<BlobStoreEndpoint> bstore, st
object.size = strtoll(sizeVal.c_str(), NULL, 10);
objectDoc.get("Key", object.name);
result.objects.push_back(std::move(object));
}
}
if(doc.has("CommonPrefixes")) {
@ -578,8 +577,15 @@ ACTOR Future<Void> listBucketStream_impl(Reference<BlobStoreEndpoint> bstore, st
result.commonPrefixes.push_back(std::move(prefix));
}
}
if(!result.objects.empty())
lastFile = result.objects.back().name;
if(!result.commonPrefixes.empty() && lastFile < result.commonPrefixes.back())
lastFile = result.commonPrefixes.back();
results.send(result);
} catch(Error &e) {
TraceEvent(SevWarn, "BlobStoreEndpointListResultParseError").detail("Resource", resource + HTTP::urlEncode(lastFile)).suppressFor(60, true);
throw http_bad_response();
}
}