Disambiguate between S3BlobStore and other blob stores

This commit is contained in:
sfc-gh-tclinkenbeard 2020-10-29 20:42:23 -07:00
parent 731073bd52
commit 55e86c1336
13 changed files with 1457 additions and 1400 deletions

View File

@ -38,7 +38,7 @@
#include "fdbclient/BackupContainer.h"
#include "fdbclient/KeyBackedTypes.h"
#include "fdbclient/RunTransaction.actor.h"
#include "fdbclient/BlobStore.h"
#include "fdbclient/S3BlobStore.h"
#include "fdbclient/json_spirit/json_spirit_writer_template.h"
#include "flow/Platform.h"
@ -1460,12 +1460,12 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
o.create("configured_workers") = CLIENT_KNOBS->BACKUP_TASKS_PER_AGENT;
if(exe == EXE_AGENT) {
static BlobStoreEndpoint::Stats last_stats;
static S3BlobStoreEndpoint::Stats last_stats;
static double last_ts = 0;
BlobStoreEndpoint::Stats current_stats = BlobStoreEndpoint::s_stats;
S3BlobStoreEndpoint::Stats current_stats = S3BlobStoreEndpoint::s_stats;
JSONDoc blobstats = o.create("blob_stats");
blobstats.create("total") = current_stats.getJSON();
BlobStoreEndpoint::Stats diff = current_stats - last_stats;
S3BlobStoreEndpoint::Stats diff = current_stats - last_stats;
json_spirit::mObject diffObj = diff.getJSON();
if(last_ts > 0)
diffObj["bytes_per_second"] = double(current_stats.bytes_sent - last_stats.bytes_sent) / (now() - last_ts);

View File

@ -1,5 +1,5 @@
/*
* AsyncFileBlobStore.actor.cpp
* AsyncFileS3BlobStore.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
@ -18,40 +18,37 @@
* limitations under the License.
*/
#include "fdbclient/AsyncFileBlobStore.actor.h"
#include "fdbclient/AsyncFileS3BlobStore.actor.h"
#include "fdbrpc/AsyncFileReadAhead.actor.h"
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h" // has to be last include
Future<int64_t> AsyncFileBlobStoreRead::size() const {
if(!m_size.isValid())
m_size = m_bstore->objectSize(m_bucket, m_object);
Future<int64_t> AsyncFileS3BlobStoreRead::size() const {
if (!m_size.isValid()) m_size = m_bstore->objectSize(m_bucket, m_object);
return m_size;
}
Future<int> AsyncFileBlobStoreRead::read( void *data, int length, int64_t offset ) {
Future<int> AsyncFileS3BlobStoreRead::read(void* data, int length, int64_t offset) {
return m_bstore->readObject(m_bucket, m_object, data, length, offset);
}
ACTOR Future<Void> sendStuff(int id, Reference<IRateControl> t, int bytes) {
printf("Starting fake sender %d which will send send %d bytes.\n", id, bytes);
state double ts = timer();
state int total = 0;
while(total < bytes) {
state int r = std::min<int>(deterministicRandom()->randomInt(0,1000), bytes - total);
while (total < bytes) {
state int r = std::min<int>(deterministicRandom()->randomInt(0, 1000), bytes - total);
wait(t->getAllowance(r));
total += r;
}
double dur = timer() - ts;
printf("Sender %d: Sent %d in %fs, %f/s\n", id, total, dur, total/dur);
printf("Sender %d: Sent %d in %fs, %f/s\n", id, total, dur, total / dur);
return Void();
}
TEST_CASE("/backup/throttling") {
// Test will not work in simulation.
if(g_network->isSimulated())
return Void();
if (g_network->isSimulated()) return Void();
state int limit = 100000;
state Reference<IRateControl> t(new SpeedLimit(limit, 1));
@ -62,13 +59,18 @@ TEST_CASE("/backup/throttling") {
state int total = 0;
int s;
s = 500000;
f.push_back(sendStuff(id++, t, s)); total += s;
f.push_back(sendStuff(id++, t, s)); total += s;
f.push_back(sendStuff(id++, t, s));
total += s;
f.push_back(sendStuff(id++, t, s));
total += s;
s = 50000;
f.push_back(sendStuff(id++, t, s)); total += s;
f.push_back(sendStuff(id++, t, s)); total += s;
f.push_back(sendStuff(id++, t, s));
total += s;
f.push_back(sendStuff(id++, t, s));
total += s;
s = 5000;
f.push_back(sendStuff(id++, t, s)); total += s;
f.push_back(sendStuff(id++, t, s));
total += s;
wait(waitForAll(f));
double dur = timer() - ts;
@ -78,5 +80,3 @@ TEST_CASE("/backup/throttling") {
return Void();
}

View File

@ -1,5 +1,5 @@
/*
* AsyncFileBlobStore.actor.h
* AsyncFileS3BlobStore.actor.h
*
* This source file is part of the FoundationDB open source project
*
@ -20,12 +20,13 @@
#pragma once
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source version.
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
// version.
#if defined(NO_INTELLISENSE) && !defined(FDBRPC_ASYNCFILEBLOBSTORE_ACTOR_G_H)
#define FDBRPC_ASYNCFILEBLOBSTORE_ACTOR_G_H
#include "fdbclient/AsyncFileBlobStore.actor.g.h"
#elif !defined(FDBRPC_ASYNCFILEBLOBSTORE_ACTOR_H)
#define FDBRPC_ASYNCFILEBLOBSTORE_ACTOR_H
#define FDBRPC_ASYNCFILEBLOBSTORE_ACTOR_G_H
#include "fdbclient/AsyncFileS3BlobStore.actor.g.h"
#elif !defined(FDBRPC_ASYNCFILES3BLOBSTORE_ACTOR_H)
#define FDBRPC_ASYNCFILES3BLOBSTORE_ACTOR_H
#include <sstream>
#include <time.h>
@ -34,55 +35,54 @@
#include "flow/serialize.h"
#include "flow/Net2Packet.h"
#include "fdbrpc/IRateControl.h"
#include "fdbclient/BlobStore.h"
#include "fdbclient/S3BlobStore.h"
#include "fdbclient/md5/md5.h"
#include "fdbclient/libb64/encode.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/actorcompiler.h" // This must be the last #include.
ACTOR template<typename T> static Future<T> joinErrorGroup(Future<T> f, Promise<Void> p) {
ACTOR template <typename T>
static Future<T> joinErrorGroup(Future<T> f, Promise<Void> p) {
try {
wait(success(f) || p.getFuture());
return f.get();
} catch(Error &e) {
if(p.canBeSet())
p.sendError(e);
} catch (Error& e) {
if (p.canBeSet()) p.sendError(e);
throw;
}
}
// This class represents a write-only file that lives in an S3-style blob store. It writes using the REST API,
// using multi-part upload and beginning to transfer each part as soon as it is large enough.
// All write operations file operations must be sequential and contiguous.
// Limits on part sizes, upload speed, and concurrent uploads are taken from the BlobStoreEndpoint being used.
class AsyncFileBlobStoreWrite : public IAsyncFile, public ReferenceCounted<AsyncFileBlobStoreWrite> {
// Limits on part sizes, upload speed, and concurrent uploads are taken from the S3BlobStoreEndpoint being used.
class AsyncFileS3BlobStoreWrite : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreWrite> {
public:
virtual void addref() { ReferenceCounted<AsyncFileBlobStoreWrite>::addref(); }
virtual void delref() { ReferenceCounted<AsyncFileBlobStoreWrite>::delref(); }
virtual void addref() { ReferenceCounted<AsyncFileS3BlobStoreWrite>::addref(); }
virtual void delref() { ReferenceCounted<AsyncFileS3BlobStoreWrite>::delref(); }
struct Part : ReferenceCounted<Part> {
Part(int n, int minSize) : number(n), writer(content.getWriteBuffer(minSize), nullptr, Unversioned()), length(0) {
Part(int n, int minSize)
: number(n), writer(content.getWriteBuffer(minSize), nullptr, Unversioned()), length(0) {
etag = std::string();
::MD5_Init(&content_md5_buf);
}
virtual ~Part() {
etag.cancel();
}
virtual ~Part() { etag.cancel(); }
Future<std::string> etag;
int number;
UnsentPacketQueue content;
std::string md5string;
PacketWriter writer;
int length;
void write(const uint8_t *buf, int len) {
void write(const uint8_t* buf, int len) {
writer.serializeBytes(buf, len);
::MD5_Update(&content_md5_buf, buf, len);
length += len;
}
// MD5 sum can only be finalized once, further calls will do nothing so new writes will be reflected in the sum.
void finalizeMD5() {
if(md5string.empty()) {
if (md5string.empty()) {
std::string sumBytes;
sumBytes.resize(16);
::MD5_Final((unsigned char *)sumBytes.data(), &content_md5_buf);
::MD5_Final((unsigned char*)sumBytes.data(), &content_md5_buf);
md5string = base64::encoder::from_string(sumBytes);
md5string.resize(md5string.size() - 1);
}
@ -94,71 +94,75 @@ public:
Future<int> read(void* data, int length, int64_t offset) override { throw file_not_readable(); }
ACTOR static Future<Void> write_impl(Reference<AsyncFileBlobStoreWrite> f, const uint8_t *data, int length) {
state Part *p = f->m_parts.back().getPtr();
// If this write will cause the part to cross the min part size boundary then write to the boundary and start a new part.
while(p->length + length >= f->m_bstore->knobs.multipart_min_part_size) {
ACTOR static Future<Void> write_impl(Reference<AsyncFileS3BlobStoreWrite> f, const uint8_t* data, int length) {
state Part* p = f->m_parts.back().getPtr();
// If this write will cause the part to cross the min part size boundary then write to the boundary and start a
// new part.
while (p->length + length >= f->m_bstore->knobs.multipart_min_part_size) {
// Finish off this part
int finishlen = f->m_bstore->knobs.multipart_min_part_size - p->length;
p->write((const uint8_t *)data, finishlen);
p->write((const uint8_t*)data, finishlen);
// Adjust source buffer args
length -= finishlen;
data = (const uint8_t *)data + finishlen;
data = (const uint8_t*)data + finishlen;
// End current part (and start new one)
wait(f->endCurrentPart(f.getPtr(), true));
p = f->m_parts.back().getPtr();
}
p->write((const uint8_t *)data, length);
p->write((const uint8_t*)data, length);
return Void();
}
Future<Void> write(void const* data, int length, int64_t offset) override {
if(offset != m_cursor)
throw non_sequential_op();
if (offset != m_cursor) throw non_sequential_op();
m_cursor += length;
return m_error.getFuture() || write_impl(Reference<AsyncFileBlobStoreWrite>::addRef(this), (const uint8_t *)data, length);
return m_error.getFuture() ||
write_impl(Reference<AsyncFileS3BlobStoreWrite>::addRef(this), (const uint8_t*)data, length);
}
Future<Void> truncate(int64_t size) override {
if(size != m_cursor)
return non_sequential_op();
if (size != m_cursor) return non_sequential_op();
return Void();
}
ACTOR static Future<std::string> doPartUpload(AsyncFileBlobStoreWrite *f, Part *p) {
ACTOR static Future<std::string> doPartUpload(AsyncFileS3BlobStoreWrite* f, Part* p) {
p->finalizeMD5();
std::string upload_id = wait(f->getUploadID());
std::string etag = wait(f->m_bstore->uploadPart(f->m_bucket, f->m_object, upload_id, p->number, &p->content, p->length, p->md5string));
std::string etag = wait(f->m_bstore->uploadPart(f->m_bucket, f->m_object, upload_id, p->number, &p->content,
p->length, p->md5string));
return etag;
}
ACTOR static Future<Void> doFinishUpload(AsyncFileBlobStoreWrite* f) {
ACTOR static Future<Void> doFinishUpload(AsyncFileS3BlobStoreWrite* f) {
// If there is only 1 part then it has not yet been uploaded so just write the whole file at once.
if(f->m_parts.size() == 1) {
if (f->m_parts.size() == 1) {
Reference<Part> part = f->m_parts.back();
part->finalizeMD5();
wait(f->m_bstore->writeEntireFileFromBuffer(f->m_bucket, f->m_object, &part->content, part->length, part->md5string));
wait(f->m_bstore->writeEntireFileFromBuffer(f->m_bucket, f->m_object, &part->content, part->length,
part->md5string));
return Void();
}
// There are at least 2 parts. End the last part (which could be empty)
wait(f->endCurrentPart(f));
state BlobStoreEndpoint::MultiPartSetT partSet;
state S3BlobStoreEndpoint::MultiPartSetT partSet;
state std::vector<Reference<Part>>::iterator p;
// Wait for all the parts to be done to get their ETags, populate the partSet required to finish the object upload.
for(p = f->m_parts.begin(); p != f->m_parts.end(); ++p) {
// Wait for all the parts to be done to get their ETags, populate the partSet required to finish the object
// upload.
for (p = f->m_parts.begin(); p != f->m_parts.end(); ++p) {
std::string tag = wait((*p)->etag);
if((*p)->length > 0) // The last part might be empty and has to be omitted.
if ((*p)->length > 0) // The last part might be empty and has to be omitted.
partSet[(*p)->number] = tag;
}
// No need to wait for the upload ID here because the above loop waited for all the parts and each part required the upload ID so it is ready
// No need to wait for the upload ID here because the above loop waited for all the parts and each part required
// the upload ID so it is ready
wait(f->m_bstore->finishMultiPartUpload(f->m_bucket, f->m_object, f->m_upload_id.get(), partSet));
return Void();
@ -167,43 +171,43 @@ public:
// Ready once all data has been sent AND acknowledged from the remote side
Future<Void> sync() override {
// Only initiate the finish operation once, and also prevent further writing.
if(!m_finished.isValid()) {
if (!m_finished.isValid()) {
m_finished = doFinishUpload(this);
m_cursor = -1; // Cause future write attempts to fail
m_cursor = -1; // Cause future write attempts to fail
}
return m_finished;
}
//
// Flush can't really do what the caller would "want" for a blob store file. The caller would probably notionally want
// all bytes written to be at least in transit to the blob store, but that is not very feasible. The blob store
// has a minimum size requirement for all but the final part, and parts must be sent with a header that specifies
// their size. So in the case of a write buffer that does not meet the part minimum size the part could be sent
// but then if there is any more data written then that part needs to be sent again in its entirety. So a client
// that calls flush often could generate far more blob store write traffic than they intend to.
// Flush can't really do what the caller would "want" for a blob store file. The caller would probably notionally
// want all bytes written to be at least in transit to the blob store, but that is not very feasible. The blob
// store has a minimum size requirement for all but the final part, and parts must be sent with a header that
// specifies their size. So in the case of a write buffer that does not meet the part minimum size the part could
// be sent but then if there is any more data written then that part needs to be sent again in its entirety. So a
// client that calls flush often could generate far more blob store write traffic than they intend to.
Future<Void> flush() override { return Void(); }
Future<int64_t> size() const override { return m_cursor; }
Future<Void> readZeroCopy(void** data, int* length, int64_t offset) override {
TraceEvent(SevError, "ReadZeroCopyNotSupported").detail("FileType", "BlobStoreWrite");
TraceEvent(SevError, "ReadZeroCopyNotSupported").detail("FileType", "S3BlobStoreWrite");
return platform_error();
}
void releaseZeroCopy(void* data, int length, int64_t offset) override {}
int64_t debugFD() const override { return -1; }
~AsyncFileBlobStoreWrite() override {
~AsyncFileS3BlobStoreWrite() override {
m_upload_id.cancel();
m_finished.cancel();
m_parts.clear(); // Contains futures
m_parts.clear(); // Contains futures
}
std::string getFilename() const override { return m_object; }
private:
Reference<BlobStoreEndpoint> m_bstore;
Reference<S3BlobStoreEndpoint> m_bstore;
std::string m_bucket;
std::string m_object;
@ -216,48 +220,46 @@ private:
FlowLock m_concurrentUploads;
// End the current part and start uploading it, but also wait for a part to finish if too many are in transit.
ACTOR static Future<Void> endCurrentPart(AsyncFileBlobStoreWrite *f, bool startNew = false) {
if(f->m_parts.back()->length == 0)
return Void();
ACTOR static Future<Void> endCurrentPart(AsyncFileS3BlobStoreWrite* f, bool startNew = false) {
if (f->m_parts.back()->length == 0) return Void();
// Wait for an upload slot to be available
wait(f->m_concurrentUploads.take());
// Do the upload, and if it fails forward errors to m_error and also stop if anything else sends an error to m_error
// Also, hold a releaser for the concurrent upload slot while all that is going on.
// Do the upload, and if it fails forward errors to m_error and also stop if anything else sends an error to
// m_error Also, hold a releaser for the concurrent upload slot while all that is going on.
auto releaser = std::make_shared<FlowLock::Releaser>(f->m_concurrentUploads, 1);
f->m_parts.back()->etag =
holdWhile(std::move(releaser), joinErrorGroup(doPartUpload(f, f->m_parts.back().getPtr()), f->m_error));
// Make a new part to write to
if(startNew)
f->m_parts.push_back(Reference<Part>(new Part(f->m_parts.size() + 1, f->m_bstore->knobs.multipart_min_part_size)));
if (startNew)
f->m_parts.push_back(
Reference<Part>(new Part(f->m_parts.size() + 1, f->m_bstore->knobs.multipart_min_part_size)));
return Void();
}
Future<std::string> getUploadID() {
if(!m_upload_id.isValid())
m_upload_id = m_bstore->beginMultiPartUpload(m_bucket, m_object);
if (!m_upload_id.isValid()) m_upload_id = m_bstore->beginMultiPartUpload(m_bucket, m_object);
return m_upload_id;
}
public:
AsyncFileBlobStoreWrite(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object)
: m_bstore(bstore), m_bucket(bucket), m_object(object), m_cursor(0), m_concurrentUploads(bstore->knobs.concurrent_writes_per_file) {
AsyncFileS3BlobStoreWrite(Reference<S3BlobStoreEndpoint> bstore, std::string bucket, std::string object)
: m_bstore(bstore), m_bucket(bucket), m_object(object), m_cursor(0),
m_concurrentUploads(bstore->knobs.concurrent_writes_per_file) {
// Add first part
m_parts.push_back(Reference<Part>(new Part(1, m_bstore->knobs.multipart_min_part_size)));
}
};
// This class represents a read-only file that lives in an S3-style blob store. It reads using the REST API.
class AsyncFileBlobStoreRead : public IAsyncFile, public ReferenceCounted<AsyncFileBlobStoreRead> {
class AsyncFileS3BlobStoreRead : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreRead> {
public:
virtual void addref() { ReferenceCounted<AsyncFileBlobStoreRead>::addref(); }
virtual void delref() { ReferenceCounted<AsyncFileBlobStoreRead>::delref(); }
virtual void addref() { ReferenceCounted<AsyncFileS3BlobStoreRead>::addref(); }
virtual void delref() { ReferenceCounted<AsyncFileS3BlobStoreRead>::delref(); }
Future<int> read(void* data, int length, int64_t offset) override;
@ -270,7 +272,7 @@ public:
Future<int64_t> size() const override;
Future<Void> readZeroCopy(void** data, int* length, int64_t offset) override {
TraceEvent(SevError, "ReadZeroCopyNotSupported").detail("FileType", "BlobStoreRead");
TraceEvent(SevError, "ReadZeroCopyNotSupported").detail("FileType", "S3BlobStoreRead");
return platform_error();
}
void releaseZeroCopy(void* data, int length, int64_t offset) override {}
@ -279,17 +281,15 @@ public:
std::string getFilename() const override { return m_object; }
virtual ~AsyncFileBlobStoreRead() {}
virtual ~AsyncFileS3BlobStoreRead() {}
Reference<BlobStoreEndpoint> m_bstore;
Reference<S3BlobStoreEndpoint> m_bstore;
std::string m_bucket;
std::string m_object;
mutable Future<int64_t> m_size;
AsyncFileBlobStoreRead(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object)
: m_bstore(bstore), m_bucket(bucket), m_object(object) {
}
AsyncFileS3BlobStoreRead(Reference<S3BlobStoreEndpoint> bstore, std::string bucket, std::string object)
: m_bstore(bstore), m_bucket(bucket), m_object(object) {}
};
#include "flow/unactorcompiler.h"

View File

@ -35,7 +35,7 @@
#include "fdbrpc/AsyncFileReadAhead.actor.h"
#include "fdbrpc/simulator.h"
#include "flow/Platform.h"
#include "fdbclient/AsyncFileBlobStore.actor.h"
#include "fdbclient/AsyncFileS3BlobStore.actor.h"
#include "fdbclient/BackupContainerAzureBlobStore.h"
#include "fdbclient/BackupContainerFileSystem.h"
#include "fdbclient/BackupContainerLocalDirectory.h"
@ -265,9 +265,9 @@ Reference<IBackupContainer> IBackupContainer::openContainer(const std::string& u
std::string resource;
// The URL parameters contain blobstore endpoint tunables as well as possible backup-specific options.
BlobStoreEndpoint::ParametersT backupParams;
Reference<BlobStoreEndpoint> bstore =
BlobStoreEndpoint::fromString(url, &resource, &lastOpenError, &backupParams);
S3BlobStoreEndpoint::ParametersT backupParams;
Reference<S3BlobStoreEndpoint> bstore =
S3BlobStoreEndpoint::fromString(url, &resource, &lastOpenError, &backupParams);
if (resource.empty()) throw backup_invalid_url();
for (auto c : resource)
@ -314,9 +314,9 @@ ACTOR Future<std::vector<std::string>> listContainers_impl(std::string baseURL)
} else if (u.startsWith(LiteralStringRef("blobstore://"))) {
std::string resource;
BlobStoreEndpoint::ParametersT backupParams;
Reference<BlobStoreEndpoint> bstore =
BlobStoreEndpoint::fromString(baseURL, &resource, &IBackupContainer::lastOpenError, &backupParams);
S3BlobStoreEndpoint::ParametersT backupParams;
Reference<S3BlobStoreEndpoint> bstore =
S3BlobStoreEndpoint::fromString(baseURL, &resource, &IBackupContainer::lastOpenError, &backupParams);
if (!resource.empty()) {
TraceEvent(SevWarn, "BackupContainer")

View File

@ -18,7 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/AsyncFileBlobStore.actor.h"
#include "fdbclient/AsyncFileS3BlobStore.actor.h"
#include "fdbclient/BackupContainerS3BlobStore.h"
#include "fdbrpc/AsyncFileReadAhead.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -32,9 +32,9 @@ public:
// number of slashes so the backup names are kept in a separate folder tree from their actual data.
static const std::string INDEXFOLDER;
ACTOR static Future<std::vector<std::string>> listURLs(Reference<BlobStoreEndpoint> bstore, std::string bucket) {
ACTOR static Future<std::vector<std::string>> listURLs(Reference<S3BlobStoreEndpoint> bstore, std::string bucket) {
state std::string basePath = INDEXFOLDER + '/';
BlobStoreEndpoint::ListResult contents = wait(bstore->listObjects(bucket, basePath));
S3BlobStoreEndpoint::ListResult contents = wait(bstore->listObjects(bucket, basePath));
std::vector<std::string> results;
for (auto& f : contents.objects) {
results.push_back(
@ -79,7 +79,7 @@ public:
return pathFilter(folderPath.substr(prefixTrim));
};
state BlobStoreEndpoint::ListResult result = wait(bc->m_bstore->listObjects(
state S3BlobStoreEndpoint::ListResult result = wait(bc->m_bstore->listObjects(
bc->m_bucket, bc->dataPath(path), '/', std::numeric_limits<int>::max(), rawPathFilter));
BackupContainerFileSystem::FilesAndSizesT files;
for (auto& o : result.objects) {
@ -130,8 +130,8 @@ std::string BackupContainerS3BlobStore::indexEntry() {
return BackupContainerS3BlobStoreImpl::INDEXFOLDER + "/" + m_name;
}
BackupContainerS3BlobStore::BackupContainerS3BlobStore(Reference<BlobStoreEndpoint> bstore, const std::string& name,
const BlobStoreEndpoint::ParametersT& params)
BackupContainerS3BlobStore::BackupContainerS3BlobStore(Reference<S3BlobStoreEndpoint> bstore, const std::string& name,
const S3BlobStoreEndpoint::ParametersT& params)
: m_bstore(bstore), m_name(name), m_bucket("FDB_BACKUPS_V2") {
// Currently only one parameter is supported, "bucket"
@ -156,24 +156,24 @@ void BackupContainerS3BlobStore::delref() {
}
std::string BackupContainerS3BlobStore::getURLFormat() {
return BlobStoreEndpoint::getURLFormat(true) + " (Note: The 'bucket' parameter is required.)";
return S3BlobStoreEndpoint::getURLFormat(true) + " (Note: The 'bucket' parameter is required.)";
}
Future<Reference<IAsyncFile>> BackupContainerS3BlobStore::readFile(const std::string& path) {
return Reference<IAsyncFile>(new AsyncFileReadAheadCache(
Reference<IAsyncFile>(new AsyncFileBlobStoreRead(m_bstore, m_bucket, dataPath(path))),
Reference<IAsyncFile>(new AsyncFileS3BlobStoreRead(m_bstore, m_bucket, dataPath(path))),
m_bstore->knobs.read_block_size, m_bstore->knobs.read_ahead_blocks, m_bstore->knobs.concurrent_reads_per_file,
m_bstore->knobs.read_cache_blocks_per_file));
}
Future<std::vector<std::string>> BackupContainerS3BlobStore::listURLs(Reference<BlobStoreEndpoint> bstore,
Future<std::vector<std::string>> BackupContainerS3BlobStore::listURLs(Reference<S3BlobStoreEndpoint> bstore,
const std::string& bucket) {
return BackupContainerS3BlobStoreImpl::listURLs(bstore, bucket);
}
Future<Reference<IBackupFile>> BackupContainerS3BlobStore::writeFile(const std::string& path) {
return Reference<IBackupFile>(new BackupContainerS3BlobStoreImpl::BackupFile(
path, Reference<IAsyncFile>(new AsyncFileBlobStoreWrite(m_bstore, m_bucket, dataPath(path)))));
path, Reference<IAsyncFile>(new AsyncFileS3BlobStoreWrite(m_bstore, m_bucket, dataPath(path)))));
}
Future<Void> BackupContainerS3BlobStore::deleteFile(const std::string& path) {

View File

@ -22,12 +22,12 @@
#define FDBCLIENT_BACKUP_CONTAINER_S3_BLOBSTORE_H
#pragma once
#include "fdbclient/AsyncFileBlobStore.actor.h"
#include "fdbclient/AsyncFileS3BlobStore.actor.h"
#include "fdbclient/BackupContainerFileSystem.h"
class BackupContainerS3BlobStore final : public BackupContainerFileSystem,
ReferenceCounted<BackupContainerS3BlobStore> {
Reference<BlobStoreEndpoint> m_bstore;
Reference<S3BlobStoreEndpoint> m_bstore;
std::string m_name;
// All backup data goes into a single bucket
@ -41,8 +41,8 @@ class BackupContainerS3BlobStore final : public BackupContainerFileSystem,
friend class BackupContainerS3BlobStoreImpl;
public:
BackupContainerS3BlobStore(Reference<BlobStoreEndpoint> bstore, const std::string& name,
const BlobStoreEndpoint::ParametersT& params);
BackupContainerS3BlobStore(Reference<S3BlobStoreEndpoint> bstore, const std::string& name,
const S3BlobStoreEndpoint::ParametersT& params);
void addref() override;
void delref() override;
@ -51,7 +51,7 @@ public:
Future<Reference<IAsyncFile>> readFile(const std::string& path) final;
static Future<std::vector<std::string>> listURLs(Reference<BlobStoreEndpoint> bstore, const std::string& bucket);
static Future<std::vector<std::string>> listURLs(Reference<S3BlobStoreEndpoint> bstore, const std::string& bucket);
Future<Reference<IBackupFile>> writeFile(const std::string& path) final;

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
set(FDBCLIENT_SRCS
AsyncFileBlobStore.actor.cpp
AsyncFileBlobStore.actor.h
AsyncFileS3BlobStore.actor.cpp
AsyncFileS3BlobStore.actor.h
AsyncTaskThread.actor.cpp
AsyncTaskThread.h
Atomic.h
@ -15,7 +15,6 @@ set(FDBCLIENT_SRCS
BackupContainerLocalDirectory.h
BackupContainerS3BlobStore.actor.cpp
BackupContainerS3BlobStore.h
BlobStore.actor.cpp
ClientLogEvents.h
ClientWorkerInterface.h
ClusterInterface.h
@ -61,6 +60,7 @@ set(FDBCLIENT_SRCS
RunTransaction.actor.h
RYWIterator.cpp
RYWIterator.h
S3BlobStore.actor.cpp
Schemas.cpp
Schemas.h
SnapshotCache.h

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* BlobStore.h
* S3BlobStore.h
*
* This source file is part of the FoundationDB open source project
*
@ -31,11 +31,11 @@
// Representation of all the things you need to connect to a blob store instance with some credentials.
// Reference counted because a very large number of them could be needed.
class BlobStoreEndpoint : public ReferenceCounted<BlobStoreEndpoint> {
class S3BlobStoreEndpoint : public ReferenceCounted<S3BlobStoreEndpoint> {
public:
struct Stats {
Stats() : requests_successful(0), requests_failed(0), bytes_sent(0) {}
Stats operator-(const Stats &rhs);
Stats operator-(const Stats& rhs);
void clear() { memset(this, 0, sizeof(*this)); }
json_spirit::mObject getJSON();
@ -48,29 +48,12 @@ public:
struct BlobKnobs {
BlobKnobs();
int secure_connection,
connect_tries,
connect_timeout,
max_connection_life,
request_tries,
request_timeout_min,
requests_per_second,
list_requests_per_second,
write_requests_per_second,
read_requests_per_second,
delete_requests_per_second,
multipart_max_part_size,
multipart_min_part_size,
concurrent_requests,
concurrent_uploads,
concurrent_lists,
concurrent_reads_per_file,
concurrent_writes_per_file,
read_block_size,
read_ahead_blocks,
read_cache_blocks_per_file,
max_send_bytes_per_second,
max_recv_bytes_per_second;
int secure_connection, connect_tries, connect_timeout, max_connection_life, request_tries, request_timeout_min,
requests_per_second, list_requests_per_second, write_requests_per_second, read_requests_per_second,
delete_requests_per_second, multipart_max_part_size, multipart_min_part_size, concurrent_requests,
concurrent_uploads, concurrent_lists, concurrent_reads_per_file, concurrent_writes_per_file,
read_block_size, read_ahead_blocks, read_cache_blocks_per_file, max_send_bytes_per_second,
max_recv_bytes_per_second;
bool set(StringRef name, int value);
std::string getURLParameters() const;
static std::vector<std::string> getKnobDescriptions() {
@ -79,8 +62,10 @@ public:
"connect_tries (or ct) Number of times to try to connect for each request.",
"connect_timeout (or cto) Number of seconds to wait for a connect request to succeed.",
"max_connection_life (or mcl) Maximum number of seconds to use a single TCP connection.",
"request_tries (or rt) Number of times to try each request until a parseable HTTP response other than 429 is received.",
"request_timeout_min (or rtom) Number of seconds to wait for a request to succeed after a connection is established.",
"request_tries (or rt) Number of times to try each request until a parseable HTTP "
"response other than 429 is received.",
"request_timeout_min (or rtom) Number of seconds to wait for a request to succeed after a "
"connection is established.",
"requests_per_second (or rps) Max number of requests to start per second.",
"list_requests_per_second (or lrps) Max number of list requests to start per second.",
"write_requests_per_second (or wrps) Max number of write requests to start per second.",
@ -88,8 +73,10 @@ public:
"delete_requests_per_second (or drps) Max number of delete requests to start per second.",
"multipart_max_part_size (or maxps) Max part size for multipart uploads.",
"multipart_min_part_size (or minps) Min part size for multipart uploads.",
"concurrent_requests (or cr) Max number of total requests in progress at once, regardless of operation-specific concurrency limits.",
"concurrent_uploads (or cu) Max concurrent uploads (part or whole) that can be in progress at once.",
"concurrent_requests (or cr) Max number of total requests in progress at once, regardless of "
"operation-specific concurrency limits.",
"concurrent_uploads (or cu) Max concurrent uploads (part or whole) that can be in progress "
"at once.",
"concurrent_lists (or cl) Max concurrent list operations that can be in progress at once.",
"concurrent_reads_per_file (or crps) Max concurrent reads in progress for any one file.",
"concurrent_writes_per_file (or cwps) Max concurrent uploads in progress for any one file.",
@ -97,43 +84,45 @@ public:
"read_ahead_blocks (or rab) Number of blocks to read ahead of requested offset.",
"read_cache_blocks_per_file (or rcb) Size of the read cache for a file in blocks.",
"max_send_bytes_per_second (or sbps) Max send bytes per second for all requests combined.",
"max_recv_bytes_per_second (or rbps) Max receive bytes per second for all requests combined (NOT YET USED)."
"max_recv_bytes_per_second (or rbps) Max receive bytes per second for all requests combined (NOT YET "
"USED)."
};
}
};
BlobStoreEndpoint(std::string const &host, std::string service, std::string const &key, std::string const &secret, BlobKnobs const &knobs = BlobKnobs(), HTTP::Headers extraHeaders = HTTP::Headers())
: host(host), service(service), key(key), secret(secret), lookupSecret(secret.empty()), knobs(knobs), extraHeaders(extraHeaders),
requestRate(new SpeedLimit(knobs.requests_per_second, 1)),
requestRateList(new SpeedLimit(knobs.list_requests_per_second, 1)),
requestRateWrite(new SpeedLimit(knobs.write_requests_per_second, 1)),
requestRateRead(new SpeedLimit(knobs.read_requests_per_second, 1)),
requestRateDelete(new SpeedLimit(knobs.delete_requests_per_second, 1)),
sendRate(new SpeedLimit(knobs.max_send_bytes_per_second, 1)),
recvRate(new SpeedLimit(knobs.max_recv_bytes_per_second, 1)),
concurrentRequests(knobs.concurrent_requests),
concurrentUploads(knobs.concurrent_uploads),
concurrentLists(knobs.concurrent_lists) {
S3BlobStoreEndpoint(std::string const& host, std::string service, std::string const& key, std::string const& secret,
BlobKnobs const& knobs = BlobKnobs(), HTTP::Headers extraHeaders = HTTP::Headers())
: host(host), service(service), key(key), secret(secret), lookupSecret(secret.empty()), knobs(knobs),
extraHeaders(extraHeaders), requestRate(new SpeedLimit(knobs.requests_per_second, 1)),
requestRateList(new SpeedLimit(knobs.list_requests_per_second, 1)),
requestRateWrite(new SpeedLimit(knobs.write_requests_per_second, 1)),
requestRateRead(new SpeedLimit(knobs.read_requests_per_second, 1)),
requestRateDelete(new SpeedLimit(knobs.delete_requests_per_second, 1)),
sendRate(new SpeedLimit(knobs.max_send_bytes_per_second, 1)),
recvRate(new SpeedLimit(knobs.max_recv_bytes_per_second, 1)), concurrentRequests(knobs.concurrent_requests),
concurrentUploads(knobs.concurrent_uploads), concurrentLists(knobs.concurrent_lists) {
if(host.empty())
throw connection_string_invalid();
if (host.empty()) throw connection_string_invalid();
}
static std::string getURLFormat(bool withResource = false) {
const char *resource = "";
if(withResource)
resource = "<name>";
return format("blobstore://<api_key>:<secret>@<host>[:<port>]/%s[?<param>=<value>[&<param>=<value>]...]", resource);
const char* resource = "";
if (withResource) resource = "<name>";
return format("blobstore://<api_key>:<secret>@<host>[:<port>]/%s[?<param>=<value>[&<param>=<value>]...]",
resource);
}
typedef std::map<std::string, std::string> ParametersT;
// Parse url and return a BlobStoreEndpoint
// If the url has parameters that BlobStoreEndpoint can't consume then an error will be thrown unless ignored_parameters is given in which case
// the unconsumed parameters will be added to it.
static Reference<BlobStoreEndpoint> fromString(std::string const &url, std::string *resourceFromURL = nullptr, std::string *error = nullptr, ParametersT *ignored_parameters = nullptr);
// Parse url and return a S3BlobStoreEndpoint
// If the url has parameters that S3BlobStoreEndpoint can't consume then an error will be thrown unless
// ignored_parameters is given in which case the unconsumed parameters will be added to it.
static Reference<S3BlobStoreEndpoint> fromString(std::string const& url, std::string* resourceFromURL = nullptr,
std::string* error = nullptr,
ParametersT* ignored_parameters = nullptr);
// Get a normalized version of this URL with the given resource and any non-default BlobKnob values as URL parameters in addition to the passed params string
// Get a normalized version of this URL with the given resource and any non-default BlobKnob values as URL
// parameters in addition to the passed params string
std::string getResourceURL(std::string resource, std::string params);
struct ReusableConnection {
@ -142,7 +131,7 @@ public:
};
std::queue<ReusableConnection> connectionPool;
Future<ReusableConnection> connect();
void returnConnection(ReusableConnection &conn);
void returnConnection(ReusableConnection& conn);
std::string host;
std::string service;
@ -167,18 +156,21 @@ public:
Future<Void> updateSecret();
// Calculates the authentication string from the secret key
std::string hmac_sha1(std::string const &msg);
std::string hmac_sha1(std::string const& msg);
// Sets headers needed for Authorization (including Date which will be overwritten if present)
void setAuthHeaders(std::string const &verb, std::string const &resource, HTTP::Headers &headers);
void setAuthHeaders(std::string const& verb, std::string const& resource, HTTP::Headers& headers);
// Prepend the HTTP request header to the given PacketBuffer, returning the new head of the buffer chain
static PacketBuffer * writeRequestHeader(std::string const &request, HTTP::Headers const &headers, PacketBuffer *dest);
static PacketBuffer* writeRequestHeader(std::string const& request, HTTP::Headers const& headers,
PacketBuffer* dest);
// Do an HTTP request to the Blob Store, read the response. Handles authentication.
// Every blob store interaction should ultimately go through this function
Future<Reference<HTTP::Response>> doRequest(std::string const &verb, std::string const &resource, const HTTP::Headers &headers, UnsentPacketQueue *pContent, int contentLen, std::set<unsigned int> successCodes);
Future<Reference<HTTP::Response>> doRequest(std::string const& verb, std::string const& resource,
const HTTP::Headers& headers, UnsentPacketQueue* pContent,
int contentLen, std::set<unsigned int> successCodes);
struct ObjectInfo {
std::string name;
@ -192,51 +184,61 @@ public:
// Get bucket contents via a stream, since listing large buckets will take many serial blob requests
// If a delimiter is passed then common prefixes will be read in parallel, recursively, depending on recurseFilter.
// Recursefilter is a must be a function that takes a string and returns true if it passes. The default behavior is to assume true.
Future<Void> listObjectsStream(std::string const &bucket, PromiseStream<ListResult> results, Optional<std::string> prefix = {}, Optional<char> delimiter = {}, int maxDepth = 0, std::function<bool(std::string const &)> recurseFilter = nullptr);
// Recursefilter is a must be a function that takes a string and returns true if it passes. The default behavior is
// to assume true.
Future<Void> listObjectsStream(std::string const& bucket, PromiseStream<ListResult> results,
Optional<std::string> prefix = {}, Optional<char> delimiter = {}, int maxDepth = 0,
std::function<bool(std::string const&)> recurseFilter = nullptr);
// Get a list of the files in a bucket, see listObjectsStream for more argument detail.
Future<ListResult> listObjects(std::string const &bucket, Optional<std::string> prefix = {}, Optional<char> delimiter = {}, int maxDepth = 0, std::function<bool(std::string const &)> recurseFilter = nullptr);
Future<ListResult> listObjects(std::string const& bucket, Optional<std::string> prefix = {},
Optional<char> delimiter = {}, int maxDepth = 0,
std::function<bool(std::string const&)> recurseFilter = nullptr);
// Get a list of all buckets
Future<std::vector<std::string>> listBuckets();
// Check if a bucket exists
Future<bool> bucketExists(std::string const &bucket);
Future<bool> bucketExists(std::string const& bucket);
// Check if an object exists in a bucket
Future<bool> objectExists(std::string const &bucket, std::string const &object);
Future<bool> objectExists(std::string const& bucket, std::string const& object);
// Get the size of an object in a bucket
Future<int64_t> objectSize(std::string const &bucket, std::string const &object);
Future<int64_t> objectSize(std::string const& bucket, std::string const& object);
// Read an arbitrary segment of an object
Future<int> readObject(std::string const &bucket, std::string const &object, void *data, int length, int64_t offset);
Future<int> readObject(std::string const& bucket, std::string const& object, void* data, int length,
int64_t offset);
// Delete an object in a bucket
Future<Void> deleteObject(std::string const &bucket, std::string const &object);
Future<Void> deleteObject(std::string const& bucket, std::string const& object);
// Delete all objects in a bucket under a prefix. Note this is not atomic as blob store does not
// support this operation directly. This method is just a convenience method that lists and deletes
// all of the objects in the bucket under the given prefix.
// Since it can take a while, if a pNumDeleted and/or pBytesDeleted are provided they will be incremented every time
// a deletion of an object completes.
Future<Void> deleteRecursively(std::string const &bucket, std::string prefix = "", int *pNumDeleted = nullptr, int64_t *pBytesDeleted = nullptr);
Future<Void> deleteRecursively(std::string const& bucket, std::string prefix = "", int* pNumDeleted = nullptr,
int64_t* pBytesDeleted = nullptr);
// Create a bucket if it does not already exists.
Future<Void> createBucket(std::string const &bucket);
Future<Void> createBucket(std::string const& bucket);
// Useful methods for working with tiny files
Future<std::string> readEntireFile(std::string const &bucket, std::string const &object);
Future<Void> writeEntireFile(std::string const &bucket, std::string const &object, std::string const &content);
Future<Void> writeEntireFileFromBuffer(std::string const &bucket, std::string const &object, UnsentPacketQueue *pContent, int contentLen, std::string const &contentMD5);
Future<std::string> readEntireFile(std::string const& bucket, std::string const& object);
Future<Void> writeEntireFile(std::string const& bucket, std::string const& object, std::string const& content);
Future<Void> writeEntireFileFromBuffer(std::string const& bucket, std::string const& object,
UnsentPacketQueue* pContent, int contentLen, std::string const& contentMD5);
// MultiPart upload methods
// Returns UploadID
Future<std::string> beginMultiPartUpload(std::string const &bucket, std::string const &object);
Future<std::string> beginMultiPartUpload(std::string const& bucket, std::string const& object);
// Returns eTag
Future<std::string> uploadPart(std::string const &bucket, std::string const &object, std::string const &uploadID, unsigned int partNumber, UnsentPacketQueue *pContent, int contentLen, std::string const &contentMD5);
Future<std::string> uploadPart(std::string const& bucket, std::string const& object, std::string const& uploadID,
unsigned int partNumber, UnsentPacketQueue* pContent, int contentLen,
std::string const& contentMD5);
typedef std::map<int, std::string> MultiPartSetT;
Future<Void> finishMultiPartUpload(std::string const &bucket, std::string const &object, std::string const &uploadID, MultiPartSetT const &parts);
Future<Void> finishMultiPartUpload(std::string const& bucket, std::string const& object,
std::string const& uploadID, MultiPartSetT const& parts);
};

View File

@ -26,7 +26,7 @@
#include "flow/flow.h"
// All outstanding operations must be cancelled before the destructor of IAsyncFile is called.
// The desirability of the above semantic is disputed. Some classes (AsyncFileBlobStore,
// The desirability of the above semantic is disputed. Some classes (AsyncFileS3BlobStore,
// AsyncFileCached) maintain references, while others (AsyncFileNonDurable) don't, and the comment
// is unapplicable to some others as well (AsyncFileKAIO). It's safest to assume that all operations
// must complete or cancel, but you should probably look at the file implementations you'll be using.

View File

@ -43,7 +43,6 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES BackupContainers.txt IGNORE)
add_fdb_test(TEST_FILES BandwidthThrottle.txt IGNORE)
add_fdb_test(TEST_FILES BigInsert.txt IGNORE)
add_fdb_test(TEST_FILES BlobStore.txt IGNORE)
add_fdb_test(TEST_FILES ConsistencyCheck.txt IGNORE)
add_fdb_test(TEST_FILES DDMetricsExclude.txt IGNORE)
add_fdb_test(TEST_FILES DataDistributionMetrics.txt IGNORE)
@ -76,6 +75,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES RedwoodPerfPrefixCompression.txt IGNORE)
add_fdb_test(TEST_FILES RedwoodPerfSequentialInsert.txt IGNORE)
add_fdb_test(TEST_FILES RocksDBTest.txt IGNORE)
add_fdb_test(TEST_FILES S3BlobStore.txt IGNORE)
add_fdb_test(TEST_FILES SampleNoSimAttrition.txt IGNORE)
if (NOT USE_UBSAN) # TODO re-enable in UBSAN after https://github.com/apple/foundationdb/issues/2410 is resolved
add_fdb_test(TEST_FILES SimpleExternalTest.txt)