2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* AsyncFileReadAhead.actor.h
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
|
|
|
|
// version.
|
2017-05-26 04:48:44 +08:00
|
|
|
#if defined(NO_INTELLISENSE) && !defined(FDBRPC_ASYNCFILEREADAHEAD_ACTOR_G_H)
|
2021-03-11 02:06:03 +08:00
|
|
|
#define FDBRPC_ASYNCFILEREADAHEAD_ACTOR_G_H
|
|
|
|
#include "fdbrpc/AsyncFileReadAhead.actor.g.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#elif !defined(FDBRPC_ASYNCFILEREADAHEAD_ACTOR_H)
|
2021-03-11 02:06:03 +08:00
|
|
|
#define FDBRPC_ASYNCFILEREADAHEAD_ACTOR_H
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
#include "flow/flow.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbrpc/IAsyncFile.h"
|
2021-03-11 02:06:03 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Read-only file type that wraps another file instance, reads in large blocks, and reads ahead of the actual range
|
|
|
|
// requested
|
2020-10-08 10:55:05 +08:00
|
|
|
class AsyncFileReadAheadCache final : public IAsyncFile, public ReferenceCounted<AsyncFileReadAheadCache> {
|
2017-05-26 04:48:44 +08:00
|
|
|
public:
|
2020-10-08 10:55:05 +08:00
|
|
|
void addref() override { ReferenceCounted<AsyncFileReadAheadCache>::addref(); }
|
|
|
|
void delref() override { ReferenceCounted<AsyncFileReadAheadCache>::delref(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
struct CacheBlock : ReferenceCounted<CacheBlock> {
|
|
|
|
CacheBlock(int size = 0) : data(new uint8_t[size]), len(size) {}
|
2021-03-11 02:06:03 +08:00
|
|
|
~CacheBlock() { delete[] data; }
|
|
|
|
uint8_t* data;
|
2017-05-26 04:48:44 +08:00
|
|
|
int len;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Read from the underlying file to a CacheBlock
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Reference<CacheBlock>> readBlock(AsyncFileReadAheadCache* f, int length, int64_t offset) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(f->m_max_concurrent_reads.take());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
state Reference<CacheBlock> block(new CacheBlock(length));
|
|
|
|
try {
|
|
|
|
int len = wait(f->m_f->read(block->data, length, offset));
|
|
|
|
block->len = len;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
f->m_max_concurrent_reads.release(1);
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
|
|
|
|
f->m_max_concurrent_reads.release(1);
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<int> read_impl(Reference<AsyncFileReadAheadCache> f, void* data, int length, int64_t offset) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// Make sure range is valid for the file
|
|
|
|
int64_t fileSize = wait(f->size());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (offset >= fileSize)
|
|
|
|
return 0; // TODO: Should this throw since the input isn't really valid?
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (length == 0) {
|
2019-02-28 16:22:38 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
// If reading past the end then clip length to just read to the end
|
2021-03-11 02:06:03 +08:00
|
|
|
if (offset + length > fileSize)
|
|
|
|
length = fileSize - offset; // Length is at least 1 since offset < fileSize
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// Calculate block range for the blocks that contain this data
|
|
|
|
state int firstBlockNum = offset / f->m_block_size;
|
2019-02-28 16:22:38 +08:00
|
|
|
ASSERT(f->m_block_size > 0);
|
|
|
|
state int lastBlockNum = (offset + length - 1) / f->m_block_size;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// Start reads (if needed) of the block range required for this read, plus the read ahead blocks
|
|
|
|
// The futures for the read started will be stored in the cache but since things can be evicted from
|
|
|
|
// the cache while we're wait()ing we also will keep a local cache of futures for the blocks
|
|
|
|
// we need (not the read ahead blocks).
|
|
|
|
state std::map<int, Future<Reference<CacheBlock>>> localCache;
|
|
|
|
|
|
|
|
// Start blocks up to the read ahead size beyond the last needed block but don't go past the end of the file
|
|
|
|
state int lastBlockNumInFile = ((fileSize + f->m_block_size - 1) / f->m_block_size) - 1;
|
2019-02-28 16:22:38 +08:00
|
|
|
ASSERT(lastBlockNum <= lastBlockNumInFile);
|
2017-05-26 04:48:44 +08:00
|
|
|
int lastBlockToStart = std::min<int>(lastBlockNum + f->m_read_ahead_blocks, lastBlockNumInFile);
|
|
|
|
|
2019-02-28 16:22:38 +08:00
|
|
|
state int blockNum;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (blockNum = firstBlockNum; blockNum <= lastBlockToStart; ++blockNum) {
|
2017-05-26 04:48:44 +08:00
|
|
|
Future<Reference<CacheBlock>> fblock;
|
|
|
|
|
|
|
|
// Look in the per-file cache for the block's future
|
|
|
|
auto i = f->m_blocks.find(blockNum);
|
|
|
|
// If not found, start the read.
|
2021-03-11 02:06:03 +08:00
|
|
|
if (i == f->m_blocks.end() || (i->second.isValid() && i->second.isError())) {
|
|
|
|
// printf("starting read of %s block %d\n", f->getFilename().c_str(), blockNum);
|
2017-09-30 10:13:08 +08:00
|
|
|
fblock = readBlock(f.getPtr(), f->m_block_size, f->m_block_size * blockNum);
|
2017-05-26 04:48:44 +08:00
|
|
|
f->m_blocks[blockNum] = fblock;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else
|
2017-05-26 04:48:44 +08:00
|
|
|
fblock = i->second;
|
|
|
|
|
|
|
|
// Only put blocks we actually need into our local cache
|
2021-03-11 02:06:03 +08:00
|
|
|
if (blockNum <= lastBlockNum)
|
2017-05-26 04:48:44 +08:00
|
|
|
localCache[blockNum] = fblock;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read block(s) and copy data
|
|
|
|
state int wpos = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (blockNum = firstBlockNum; blockNum <= lastBlockNum; ++blockNum) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// Wait for block to be ready
|
|
|
|
Reference<CacheBlock> block = wait(localCache[blockNum]);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Calculate the block-relative read range. It's a given that the offset / length range touches this block
|
|
|
|
// so readStart will never be greater than blocksize (though it could be past the actual end of a short
|
|
|
|
// block).
|
2017-05-26 04:48:44 +08:00
|
|
|
int64_t blockStart = blockNum * f->m_block_size;
|
|
|
|
int64_t readStart = std::max<int64_t>(0, offset - blockStart);
|
|
|
|
int64_t readEnd = std::min<int64_t>(f->m_block_size, offset + length - blockStart);
|
|
|
|
int rlen = readEnd - readStart;
|
2021-03-11 02:06:03 +08:00
|
|
|
memcpy((uint8_t*)data + wpos, block->data + readStart, rlen);
|
2017-05-26 04:48:44 +08:00
|
|
|
wpos += rlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT(wpos == length);
|
|
|
|
localCache.clear();
|
|
|
|
|
|
|
|
// If the cache is too large then go through the cache in block number order and remove any entries whose future
|
|
|
|
// has a reference count of 1, stopping once the cache is no longer too big. There is no point in removing
|
|
|
|
// an entry from the cache if it has a reference count of > 1 because it will continue to exist and use memory
|
|
|
|
// anyway so it should be left in the cache so that other readers may benefit from it.
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// printf("cache block limit: %d Cache contents:\n", f->m_cache_block_limit);
|
|
|
|
// for(auto &m : f->m_blocks) printf("\tblock %d refcount %d\n", m.first, m.second.getFutureReferenceCount());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (f->m_blocks.size() > f->m_cache_block_limit) {
|
2017-05-26 04:48:44 +08:00
|
|
|
auto i = f->m_blocks.begin();
|
2021-03-11 02:06:03 +08:00
|
|
|
while (i != f->m_blocks.end()) {
|
|
|
|
if (i->second.getFutureReferenceCount() == 1) {
|
|
|
|
// printf("evicting block %d\n", i->first);
|
2017-05-26 04:48:44 +08:00
|
|
|
i = f->m_blocks.erase(i);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (f->m_blocks.size() <= f->m_cache_block_limit)
|
2017-05-26 04:48:44 +08:00
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else
|
2017-05-26 04:48:44 +08:00
|
|
|
++i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return wpos;
|
|
|
|
}
|
|
|
|
|
2020-08-20 09:17:05 +08:00
|
|
|
Future<int> read(void* data, int length, int64_t offset) override {
|
2017-05-26 04:48:44 +08:00
|
|
|
return read_impl(Reference<AsyncFileReadAheadCache>::addRef(this), data, length, offset);
|
|
|
|
}
|
|
|
|
|
2020-08-20 09:17:05 +08:00
|
|
|
Future<Void> write(void const* data, int length, int64_t offset) override { throw file_not_writable(); }
|
|
|
|
Future<Void> truncate(int64_t size) override { throw file_not_writable(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-08-20 09:17:05 +08:00
|
|
|
Future<Void> sync() override { return Void(); }
|
|
|
|
Future<Void> flush() override { return Void(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-08-20 08:32:11 +08:00
|
|
|
Future<int64_t> size() const override { return m_f->size(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-08-20 09:17:05 +08:00
|
|
|
Future<Void> readZeroCopy(void** data, int* length, int64_t offset) override {
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent(SevError, "ReadZeroCopyNotSupported").detail("FileType", "ReadAheadCache");
|
|
|
|
return platform_error();
|
|
|
|
}
|
2020-08-20 09:17:05 +08:00
|
|
|
void releaseZeroCopy(void* data, int length, int64_t offset) override {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-08-20 08:32:11 +08:00
|
|
|
int64_t debugFD() const override { return -1; }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-08-20 09:17:05 +08:00
|
|
|
std::string getFilename() const override { return m_f->getFilename(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-01-26 09:55:43 +08:00
|
|
|
~AsyncFileReadAheadCache() override {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : m_blocks) {
|
2017-09-30 10:13:08 +08:00
|
|
|
it.second.cancel();
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
Reference<IAsyncFile> m_f;
|
|
|
|
int m_block_size;
|
|
|
|
int m_read_ahead_blocks;
|
|
|
|
int m_cache_block_limit;
|
|
|
|
FlowLock m_max_concurrent_reads;
|
|
|
|
|
|
|
|
// Map block numbers to future
|
|
|
|
std::map<int, Future<Reference<CacheBlock>>> m_blocks;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
AsyncFileReadAheadCache(Reference<IAsyncFile> f,
|
|
|
|
int blockSize,
|
|
|
|
int readAheadBlocks,
|
|
|
|
int maxConcurrentReads,
|
|
|
|
int cacheSizeBlocks)
|
|
|
|
: m_f(f), m_block_size(blockSize), m_read_ahead_blocks(readAheadBlocks),
|
|
|
|
m_max_concurrent_reads(maxConcurrentReads), m_cache_block_limit(std::max<int>(1, cacheSizeBlocks)) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
};
|
|
|
|
|
2018-08-11 06:47:41 +08:00
|
|
|
#include "flow/unactorcompiler.h"
|
|
|
|
#endif
|