foundationdb/fdbclient/AsyncFileS3BlobStore.actor.h

302 lines
11 KiB
C
Raw Normal View History

2017-05-26 04:48:44 +08:00
/*
* AsyncFileS3BlobStore.actor.h
2017-05-26 04:48:44 +08:00
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
2017-05-26 04:48:44 +08:00
* http://www.apache.org/licenses/LICENSE-2.0
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
// version.
2017-05-26 04:48:44 +08:00
#if defined(NO_INTELLISENSE) && !defined(FDBRPC_ASYNCFILEBLOBSTORE_ACTOR_G_H)
#define FDBRPC_ASYNCFILEBLOBSTORE_ACTOR_G_H
#include "fdbclient/AsyncFileS3BlobStore.actor.g.h"
#elif !defined(FDBRPC_ASYNCFILES3BLOBSTORE_ACTOR_H)
#define FDBRPC_ASYNCFILES3BLOBSTORE_ACTOR_H
2017-05-26 04:48:44 +08:00
#include <sstream>
#include <time.h>
#include "fdbrpc/IAsyncFile.h"
2017-05-26 04:48:44 +08:00
#include "flow/serialize.h"
#include "flow/Net2Packet.h"
#include "fdbrpc/IRateControl.h"
#include "fdbclient/S3BlobStore.h"
#include "fdbclient/md5/md5.h"
#include "fdbclient/libb64/encode.h"
#include "flow/actorcompiler.h" // This must be the last #include.
2017-05-26 04:48:44 +08:00
ACTOR template <typename T>
static Future<T> joinErrorGroup(Future<T> f, Promise<Void> p) {
try {
wait(success(f) || p.getFuture());
return f.get();
} catch (Error& e) {
if (p.canBeSet())
p.sendError(e);
throw;
}
}
2017-05-26 04:48:44 +08:00
// This class represents a write-only file that lives in an S3-style blob store. It writes using the REST API,
// using multi-part upload and beginning to transfer each part as soon as it is large enough.
// All write operations file operations must be sequential and contiguous.
// Limits on part sizes, upload speed, and concurrent uploads are taken from the S3BlobStoreEndpoint being used.
2020-11-28 02:10:41 +08:00
class AsyncFileS3BlobStoreWrite final : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreWrite> {
2017-05-26 04:48:44 +08:00
public:
2020-11-28 02:10:41 +08:00
void addref() override { ReferenceCounted<AsyncFileS3BlobStoreWrite>::addref(); }
void delref() override { ReferenceCounted<AsyncFileS3BlobStoreWrite>::delref(); }
2017-05-26 04:48:44 +08:00
struct Part : ReferenceCounted<Part> {
Part(int n, int minSize)
: number(n), writer(content.getWriteBuffer(minSize), nullptr, Unversioned()), length(0) {
2017-05-26 04:48:44 +08:00
etag = std::string();
::MD5_Init(&content_md5_buf);
}
virtual ~Part() { etag.cancel(); }
2017-05-26 04:48:44 +08:00
Future<std::string> etag;
int number;
UnsentPacketQueue content;
std::string md5string;
PacketWriter writer;
int length;
void write(const uint8_t* buf, int len) {
2017-05-26 04:48:44 +08:00
writer.serializeBytes(buf, len);
::MD5_Update(&content_md5_buf, buf, len);
length += len;
}
// MD5 sum can only be finalized once, further calls will do nothing so new writes will be reflected in the sum.
void finalizeMD5() {
if (md5string.empty()) {
2017-05-26 04:48:44 +08:00
std::string sumBytes;
sumBytes.resize(16);
::MD5_Final((unsigned char*)sumBytes.data(), &content_md5_buf);
2017-05-26 04:48:44 +08:00
md5string = base64::encoder::from_string(sumBytes);
md5string.resize(md5string.size() - 1);
}
}
private:
MD5_CTX content_md5_buf;
};
2020-08-20 08:32:11 +08:00
Future<int> read(void* data, int length, int64_t offset) override { throw file_not_readable(); }
2017-05-26 04:48:44 +08:00
ACTOR static Future<Void> write_impl(Reference<AsyncFileS3BlobStoreWrite> f, const uint8_t* data, int length) {
state Part* p = f->m_parts.back().getPtr();
// If this write will cause the part to cross the min part size boundary then write to the boundary and start a
// new part.
while (p->length + length >= f->m_bstore->knobs.multipart_min_part_size) {
2017-05-26 04:48:44 +08:00
// Finish off this part
int finishlen = f->m_bstore->knobs.multipart_min_part_size - p->length;
p->write((const uint8_t*)data, finishlen);
2017-05-26 04:48:44 +08:00
// Adjust source buffer args
length -= finishlen;
data = (const uint8_t*)data + finishlen;
2017-05-26 04:48:44 +08:00
// End current part (and start new one)
wait(f->endCurrentPart(f.getPtr(), true));
2017-05-26 04:48:44 +08:00
p = f->m_parts.back().getPtr();
}
p->write((const uint8_t*)data, length);
2017-05-26 04:48:44 +08:00
return Void();
}
2020-08-20 08:32:11 +08:00
Future<Void> write(void const* data, int length, int64_t offset) override {
if (offset != m_cursor)
throw non_sequential_op();
2017-05-26 04:48:44 +08:00
m_cursor += length;
return m_error.getFuture() ||
write_impl(Reference<AsyncFileS3BlobStoreWrite>::addRef(this), (const uint8_t*)data, length);
2017-05-26 04:48:44 +08:00
}
2020-08-20 08:32:11 +08:00
Future<Void> truncate(int64_t size) override {
if (size != m_cursor)
return non_sequential_op();
2017-05-26 04:48:44 +08:00
return Void();
}
ACTOR static Future<std::string> doPartUpload(AsyncFileS3BlobStoreWrite* f, Part* p) {
p->finalizeMD5();
std::string upload_id = wait(f->getUploadID());
std::string etag = wait(f->m_bstore->uploadPart(
f->m_bucket, f->m_object, upload_id, p->number, &p->content, p->length, p->md5string));
return etag;
2017-05-26 04:48:44 +08:00
}
ACTOR static Future<Void> doFinishUpload(AsyncFileS3BlobStoreWrite* f) {
2017-05-26 04:48:44 +08:00
// If there is only 1 part then it has not yet been uploaded so just write the whole file at once.
if (f->m_parts.size() == 1) {
2017-05-26 04:48:44 +08:00
Reference<Part> part = f->m_parts.back();
part->finalizeMD5();
wait(f->m_bstore->writeEntireFileFromBuffer(
f->m_bucket, f->m_object, &part->content, part->length, part->md5string));
2017-05-26 04:48:44 +08:00
return Void();
}
// There are at least 2 parts. End the last part (which could be empty)
wait(f->endCurrentPart(f));
2017-05-26 04:48:44 +08:00
state S3BlobStoreEndpoint::MultiPartSetT partSet;
2017-05-26 04:48:44 +08:00
state std::vector<Reference<Part>>::iterator p;
// Wait for all the parts to be done to get their ETags, populate the partSet required to finish the object
// upload.
for (p = f->m_parts.begin(); p != f->m_parts.end(); ++p) {
2017-05-26 04:48:44 +08:00
std::string tag = wait((*p)->etag);
if ((*p)->length > 0) // The last part might be empty and has to be omitted.
2017-05-26 04:48:44 +08:00
partSet[(*p)->number] = tag;
}
// No need to wait for the upload ID here because the above loop waited for all the parts and each part required
// the upload ID so it is ready
wait(f->m_bstore->finishMultiPartUpload(f->m_bucket, f->m_object, f->m_upload_id.get(), partSet));
2017-05-26 04:48:44 +08:00
return Void();
}
// Ready once all data has been sent AND acknowledged from the remote side
Future<Void> sync() override {
2017-05-26 04:48:44 +08:00
// Only initiate the finish operation once, and also prevent further writing.
if (!m_finished.isValid()) {
m_finished = doFinishUpload(this);
m_cursor = -1; // Cause future write attempts to fail
2017-05-26 04:48:44 +08:00
}
return m_finished;
}
//
// Flush can't really do what the caller would "want" for a blob store file. The caller would probably notionally
// want all bytes written to be at least in transit to the blob store, but that is not very feasible. The blob
// store has a minimum size requirement for all but the final part, and parts must be sent with a header that
// specifies their size. So in the case of a write buffer that does not meet the part minimum size the part could
// be sent but then if there is any more data written then that part needs to be sent again in its entirety. So a
// client that calls flush often could generate far more blob store write traffic than they intend to.
2020-08-20 08:32:11 +08:00
Future<Void> flush() override { return Void(); }
2017-05-26 04:48:44 +08:00
2020-08-20 08:32:11 +08:00
Future<int64_t> size() const override { return m_cursor; }
2017-05-26 04:48:44 +08:00
2020-08-20 08:32:11 +08:00
Future<Void> readZeroCopy(void** data, int* length, int64_t offset) override {
TraceEvent(SevError, "ReadZeroCopyNotSupported").detail("FileType", "S3BlobStoreWrite");
2017-05-26 04:48:44 +08:00
return platform_error();
}
2020-08-20 08:32:11 +08:00
void releaseZeroCopy(void* data, int length, int64_t offset) override {}
2017-05-26 04:48:44 +08:00
2020-08-20 08:32:11 +08:00
int64_t debugFD() const override { return -1; }
2017-05-26 04:48:44 +08:00
~AsyncFileS3BlobStoreWrite() override {
2017-05-26 04:48:44 +08:00
m_upload_id.cancel();
m_finished.cancel();
m_parts.clear(); // Contains futures
2017-05-26 04:48:44 +08:00
}
2020-08-20 08:32:11 +08:00
std::string getFilename() const override { return m_object; }
2017-05-26 04:48:44 +08:00
private:
Reference<S3BlobStoreEndpoint> m_bstore;
2017-05-26 04:48:44 +08:00
std::string m_bucket;
std::string m_object;
int64_t m_cursor;
Future<std::string> m_upload_id;
Future<Void> m_finished;
std::vector<Reference<Part>> m_parts;
Promise<Void> m_error;
FlowLock m_concurrentUploads;
2017-05-26 04:48:44 +08:00
// End the current part and start uploading it, but also wait for a part to finish if too many are in transit.
ACTOR static Future<Void> endCurrentPart(AsyncFileS3BlobStoreWrite* f, bool startNew = false) {
if (f->m_parts.back()->length == 0)
return Void();
2017-05-26 04:48:44 +08:00
// Wait for an upload slot to be available
wait(f->m_concurrentUploads.take());
// Do the upload, and if it fails forward errors to m_error and also stop if anything else sends an error to
// m_error Also, hold a releaser for the concurrent upload slot while all that is going on.
auto releaser = std::make_shared<FlowLock::Releaser>(f->m_concurrentUploads, 1);
f->m_parts.back()->etag =
holdWhile(std::move(releaser), joinErrorGroup(doPartUpload(f, f->m_parts.back().getPtr()), f->m_error));
2017-05-26 04:48:44 +08:00
// Make a new part to write to
if (startNew)
f->m_parts.push_back(
Reference<Part>(new Part(f->m_parts.size() + 1, f->m_bstore->knobs.multipart_min_part_size)));
2017-05-26 04:48:44 +08:00
return Void();
}
Future<std::string> getUploadID() {
if (!m_upload_id.isValid())
m_upload_id = m_bstore->beginMultiPartUpload(m_bucket, m_object);
2017-05-26 04:48:44 +08:00
return m_upload_id;
}
public:
AsyncFileS3BlobStoreWrite(Reference<S3BlobStoreEndpoint> bstore, std::string bucket, std::string object)
: m_bstore(bstore), m_bucket(bucket), m_object(object), m_cursor(0),
m_concurrentUploads(bstore->knobs.concurrent_writes_per_file) {
2017-05-26 04:48:44 +08:00
// Add first part
m_parts.push_back(makeReference<Part>(1, m_bstore->knobs.multipart_min_part_size));
2017-05-26 04:48:44 +08:00
}
};
// This class represents a read-only file that lives in an S3-style blob store. It reads using the REST API.
2020-11-28 02:10:41 +08:00
class AsyncFileS3BlobStoreRead final : public IAsyncFile, public ReferenceCounted<AsyncFileS3BlobStoreRead> {
2017-05-26 04:48:44 +08:00
public:
2020-11-28 02:10:41 +08:00
void addref() override { ReferenceCounted<AsyncFileS3BlobStoreRead>::addref(); }
void delref() override { ReferenceCounted<AsyncFileS3BlobStoreRead>::delref(); }
2017-05-26 04:48:44 +08:00
Future<int> read(void* data, int length, int64_t offset) override;
2017-05-26 04:48:44 +08:00
Future<Void> write(void const* data, int length, int64_t offset) override { throw file_not_writable(); }
Future<Void> truncate(int64_t size) override { throw file_not_writable(); }
2017-05-26 04:48:44 +08:00
Future<Void> sync() override { return Void(); }
Future<Void> flush() override { return Void(); }
2017-05-26 04:48:44 +08:00
2020-08-20 08:32:11 +08:00
Future<int64_t> size() const override;
2017-05-26 04:48:44 +08:00
Future<Void> readZeroCopy(void** data, int* length, int64_t offset) override {
TraceEvent(SevError, "ReadZeroCopyNotSupported").detail("FileType", "S3BlobStoreRead");
2017-05-26 04:48:44 +08:00
return platform_error();
}
void releaseZeroCopy(void* data, int length, int64_t offset) override {}
2017-05-26 04:48:44 +08:00
int64_t debugFD() const override { return -1; }
2017-05-26 04:48:44 +08:00
2020-08-20 08:32:11 +08:00
std::string getFilename() const override { return m_object; }
2017-05-26 04:48:44 +08:00
~AsyncFileS3BlobStoreRead() override {}
2017-05-26 04:48:44 +08:00
Reference<S3BlobStoreEndpoint> m_bstore;
2017-05-26 04:48:44 +08:00
std::string m_bucket;
std::string m_object;
2020-08-20 08:32:11 +08:00
mutable Future<int64_t> m_size;
2017-05-26 04:48:44 +08:00
AsyncFileS3BlobStoreRead(Reference<S3BlobStoreEndpoint> bstore, std::string bucket, std::string object)
: m_bstore(bstore), m_bucket(bucket), m_object(object) {}
2017-05-26 04:48:44 +08:00
};
#include "flow/unactorcompiler.h"
2017-05-26 04:48:44 +08:00
#endif