foundationdb/fdbserver/VFSAsync.cpp

827 lines
27 KiB
C++
Raw Normal View History

2017-05-26 04:48:44 +08:00
/*
* VFSAsync.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
2017-05-26 04:48:44 +08:00
* http://www.apache.org/licenses/LICENSE-2.0
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sqlite/sqlite3.h"
#include <stdio.h>
#include <string>
#include <vector>
#include "fdbrpc/fdbrpc.h"
#include "fdbrpc/IAsyncFile.h"
#include "fdbserver/CoroFlow.h"
2017-05-26 04:48:44 +08:00
#include "fdbrpc/simulator.h"
#include "fdbrpc/AsyncFileReadAhead.actor.h"
#include <assert.h>
#include <string.h>
#ifdef WIN32
#include <Windows.h>
2017-05-26 04:48:44 +08:00
#endif
#ifdef __unixish__
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/param.h>
#include <sys/time.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#endif
#include "fdbserver/VFSAsync.h"
2017-05-26 04:48:44 +08:00
/*
** The maximum pathname length supported by this VFS.
*/
#define MAXPATHNAME 512
#define NO_LOCK 0
#define SHARED_LOCK 1
#define RESERVED_LOCK 2
#define PENDING_LOCK 3
#define EXCLUSIVE_LOCK 4
const uint32_t RESERVED_COUNT = 1U<<29;
VFSAsyncFile::VFSAsyncFile(std::string const& filename, int flags)
: filename(filename), flags(flags), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0) {
filename_lockCount_openCount[filename].second++;
TraceEvent(SevDebug, "VFSAsyncFileConstruct")
.detail("Filename", filename)
.detail("OpenCount", filename_lockCount_openCount[filename].second)
.detail("LockCount", filename_lockCount_openCount[filename].first)
.backtrace();
}
2017-05-26 04:48:44 +08:00
std::map<std::string, std::pair<uint32_t,int>> VFSAsyncFile::filename_lockCount_openCount;
static int asyncClose(sqlite3_file *pFile){
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
TraceEvent(SevDebug, "VFSAsyncFileDestroy")
.detail("Filename", p->filename)
.backtrace();
2017-05-26 04:48:44 +08:00
//printf("Closing %s: %d zcrefs, %d/%d reads zc\n", filename.c_str(), debug_zcrefs, debug_zcreads, debug_zcreads+debug_reads);
ASSERT( !p->debug_zcrefs );
p->~VFSAsyncFile();
return SQLITE_OK;
}
static int asyncRead(sqlite3_file *pFile, void *zBuf, int iAmt, sqlite_int64 iOfst) {
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
try {
++p->debug_reads;
int readBytes = waitForAndGet( p->file->read( zBuf, iAmt, iOfst ) );
if (readBytes < iAmt) {
memset((uint8_t*)zBuf + readBytes, 0, iAmt-readBytes); // When reading past the EOF, sqlite expects the extra portion of the buffer to be zeroed
return SQLITE_IOERR_SHORT_READ;
}
return SQLITE_OK;
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ);
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_READ;
}
}
#if 1
static int asyncReleaseZeroCopy(sqlite3_file* pFile, void* data, int iAmt, sqlite_int64 iOfst) {
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
try{
--p->debug_zcrefs;
p->file->releaseZeroCopy( data, iAmt, iOfst );
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR);
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR;
}
return SQLITE_OK;
}
static int asyncReadZeroCopy(sqlite3_file *pFile, void **data, int iAmt, sqlite_int64 iOfst, int *pDataWasCached) {
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
try {
int readBytes = iAmt;
Future<Void> readFuture = p->file->readZeroCopy( data, &readBytes, iOfst );
if(pDataWasCached)
*pDataWasCached = readFuture.isReady() ? 1 : 0;
waitFor(readFuture);
++p->debug_zcrefs;
if (readBytes < iAmt) {
// When reading past the EOF, sqlite expects the extra portion of the buffer to be zeroed. We can't do that, so return and sqlite will use the slow path.
asyncReleaseZeroCopy(pFile, *data, readBytes, iOfst);
return SQLITE_IOERR_SHORT_READ;
}
++p->debug_zcreads;
return SQLITE_OK;
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ);
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_READ;
}
}
#else
static int asyncReadZeroCopy(sqlite3_file *pFile, void **data, int iAmt, sqlite_int64 iOfst) {
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
try {
*data = new char[iAmt];
int readBytes = waitForAndGet( p->file->read( *data, iAmt, iOfst ) );
//printf("+asyncReadRef %p +%lld %d/%d = %p\n", pFile, iOfst, readBytes, iAmt, *data);
if (readBytes < iAmt) {
memset((uint8_t*)*data + readBytes, 0, iAmt-readBytes); // When reading past the EOF, sqlite expects the extra portion of the buffer to be zeroed
return SQLITE_IOERR_SHORT_READ;
}
return SQLITE_OK;
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ);
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_READ;
}
}
static int asyncReleaseZeroCopy(sqlite3_file* pFile, void* data, int iAmt, sqlite_int64 iOfst) {
//printf("-asyncReleaseRef %p +%lld %d <= %p\n", pFile, iOfst, iAmt, data);
delete[] (char*)data;
return SQLITE_OK;
}
#endif
static int asyncWrite(sqlite3_file *pFile, const void *zBuf, int iAmt, sqlite_int64 iOfst) {
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
try {
waitFor( p->file->write( zBuf, iAmt, iOfst ) );
return SQLITE_OK;
} catch(Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_WRITE);
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_WRITE;
}
}
static int asyncTruncate(sqlite3_file *pFile, sqlite_int64 size){
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
// Adjust size to a multiple of chunkSize if set
if(p->chunkSize != 0) {
size = ((size + p->chunkSize - 1) / p->chunkSize) * p->chunkSize;
}
2017-05-26 04:48:44 +08:00
try {
waitFor( p->file->truncate( size ) );
return SQLITE_OK;
} catch(Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_TRUNCATE);
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_TRUNCATE;
}
}
static int asyncSync(sqlite3_file *pFile, int flags){
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
try {
waitFor( p->file->sync() );
return SQLITE_OK;
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_FSYNC);
}
TraceEvent("VFSAsyncFileSyncError")
.error(e)
2017-05-26 04:48:44 +08:00
.detail("Filename", p->filename)
.detail("Sqlite3File", (int64_t)pFile)
.detail("IAsyncFile", (int64_t)p->file.getPtr());
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_FSYNC;
}
}
/*
** Write the size of the file in bytes to *pSize.
*/
static int VFSAsyncFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
try {
*pSize = waitForAndGet( p->file->size() );
return SQLITE_OK;
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_FSTAT);
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_FSTAT;
}
}
static int asyncLock(sqlite3_file *pFile, int eLock){
//VFSAsyncFile *p = (VFSAsyncFile*)pFile;
2017-05-26 04:48:44 +08:00
//TraceEvent("FileLock").detail("File", p->filename).detail("Fd", p->file->debugFD()).detail("PrevLockLevel", p->lockLevel).detail("Op", eLock).detail("LockCount", *p->pLockCount);
2017-05-26 04:48:44 +08:00
return eLock == EXCLUSIVE_LOCK ? SQLITE_BUSY : SQLITE_OK;
}
static int asyncUnlock(sqlite3_file *pFile, int eLock) {
assert( eLock <= SHARED_LOCK );
return SQLITE_OK;
}
static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
*pResOut = *p->pLockCount >= RESERVED_COUNT;
return SQLITE_OK;
}
/*
** No xFileControl() verbs are implemented by this VFS.
*/
static int VFSAsyncFileControl(sqlite3_file *pFile, int op, void *pArg){
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
switch(op) {
case SQLITE_FCNTL_CHUNK_SIZE:
p->chunkSize = *(int *)pArg;
return SQLITE_OK;
case SQLITE_FCNTL_SIZE_HINT:
return asyncTruncate(pFile, *(int64_t *)pArg);
default:
return SQLITE_NOTFOUND;
};
2017-05-26 04:48:44 +08:00
}
static int asyncSectorSize(sqlite3_file *pFile){ return 512; } // SOMEDAY: Would 4K be better?
static int asyncDeviceCharacteristics(sqlite3_file *pFile){ return 0; }
#if 1
struct SharedMemoryInfo { // for a file
std::string filename;
std::vector<void*> regions;
int regionSize;
int refcount; // Number of connections with this open
int sharedLocks[SQLITE_SHM_NLOCK];
int exclusiveLocks[SQLITE_SHM_NLOCK];
SharedMemoryInfo() : regionSize(0), refcount(0) {
memset(sharedLocks, 0, sizeof(sharedLocks));
memset(exclusiveLocks, 0, sizeof(exclusiveLocks));
}
void cleanup(){
for(int i=0; i<regions.size(); i++)
delete[] (uint8_t*)regions[i];
table.erase(filename);
}
static Mutex mutex;
static std::map< std::string, SharedMemoryInfo > table;
};
Mutex SharedMemoryInfo::mutex;
std::map< std::string, SharedMemoryInfo > SharedMemoryInfo::table;
/*
** This function is called to obtain a pointer to region iRegion of the
** shared-memory associated with the database file fd. Shared-memory regions
** are numbered starting from zero. Each shared-memory region is szRegion
** bytes in size.
**
2020-08-19 05:18:50 +08:00
** If an error occurs, an error code is returned and *pp is set to nullptr.
2017-05-26 04:48:44 +08:00
**
** Otherwise, if the bExtend parameter is 0 and the requested shared-memory
** region has not been allocated (by any client, including one running in a
2020-08-19 05:18:50 +08:00
** separate process), then *pp is set to nullptr and SQLITE_OK returned. If
2017-05-26 04:48:44 +08:00
** bExtend is non-zero and the requested shared-memory region has not yet
** been allocated, it is allocated by this function.
**
** If the shared-memory region has already been allocated or is allocated by
** this call as described above, then it is mapped into this processes
** address space (if it is not already), *pp is set to point to the mapped
** memory and SQLITE_OK returned.
*/
static int asyncShmMap(
sqlite3_file *fd, /* Handle open on database file */
int iRegion, /* Region to retrieve */
int szRegion, /* Size of regions */
int bExtend, /* True to extend file if necessary */
void volatile **pp /* OUT: Mapped memory */
)
{
MutexHolder hold( SharedMemoryInfo::mutex );
VFSAsyncFile *pDbFd = (VFSAsyncFile*)fd;
SharedMemoryInfo* memInfo = pDbFd->sharedMemory;
if (!memInfo) {
std::string filename = pDbFd->filename;
memInfo = pDbFd->sharedMemory = &SharedMemoryInfo::table[ filename ];
memInfo->filename = filename;
memInfo->regionSize = szRegion;
++memInfo->refcount;
//printf("Shared memory for: '%s' (%d refs)\n", filename.c_str(), memInfo->refcount);
} else {
assert( memInfo->regionSize == szRegion );
}
if (iRegion >= memInfo->regions.size()) {
2020-08-19 05:18:50 +08:00
if (!bExtend) { *pp = nullptr; return SQLITE_OK; }
2017-05-26 04:48:44 +08:00
while (memInfo->regions.size() <= iRegion) {
void *mem = new uint8_t[ szRegion ];
memset( mem, 0, szRegion );
memInfo->regions.push_back( mem );
}
}
*pp = memInfo->regions[ iRegion ];
return SQLITE_OK;
}
/*
** Change the lock state for a shared-memory segment.
**
** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
** different here than in posix. In xShmLock(), one can go from unlocked
** to shared and back or from unlocked to exclusive and back. But one may
** not go from shared to exclusive or from exclusive to shared.
*/
// sqlite doesn't seem to match these up correctly - it happily calls unlock on locks it doesn't hold.
// So we have to keep track of which locks are held by a given sqlite3_file
static int asyncShmLock(
sqlite3_file *fd, /* Database file holding the shared memory */
int ofst, /* First lock to acquire or release */
int n, /* Number of locks to acquire or release */
int flags /* What to do with the lock */
){
assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
assert( n>=1 );
assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
|| flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
|| flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
|| flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
MutexHolder hold( SharedMemoryInfo::mutex );
VFSAsyncFile *pDbFd = (VFSAsyncFile*)fd;
SharedMemoryInfo* memInfo = pDbFd->sharedMemory;
if (flags & SQLITE_SHM_UNLOCK) {
for(int i=ofst; i<ofst+n; i++) {
if ( pDbFd->sharedMemorySharedLocks & (1<<i) ) {
pDbFd->sharedMemorySharedLocks &= ~(1<<i);
--memInfo->sharedLocks[i];
}
if ( pDbFd->sharedMemoryExclusiveLocks & (1<<i) ) {
pDbFd->sharedMemoryExclusiveLocks &= ~(1<<i);
--memInfo->exclusiveLocks[i];
}
}
} else if (flags & SQLITE_SHM_SHARED) {
for(int i=ofst; i<ofst+n; i++)
if ( memInfo->exclusiveLocks[i] != ((pDbFd->sharedMemoryExclusiveLocks>>i)&1) ) {
//TraceEvent("ShmLocked").detail("File", DEBUG_DETERMINISM ? 0 : (int64_t)pDbFd).detail("Acquiring", "Shared").detail("I", i).detail("Exclusive", memInfo->exclusiveLocks[i]).detail("MyExclusive", pDbFd->sharedMemoryExclusiveLocks);
2017-05-26 04:48:44 +08:00
return SQLITE_BUSY;
}
for(int i=ofst; i<ofst+n; i++)
if ( !(pDbFd->sharedMemorySharedLocks & (1<<i)) ) {
pDbFd->sharedMemorySharedLocks |= 1<<i;
memInfo->sharedLocks[i]++;
}
} else {
for(int i=ofst; i<ofst+n; i++)
if ( memInfo->exclusiveLocks[i] != ((pDbFd->sharedMemoryExclusiveLocks>>i)&1) ||
memInfo->sharedLocks[i] != ((pDbFd->sharedMemorySharedLocks>>i)&1) )
{
//TraceEvent("ShmLocked").detail("File", DEBUG_DETERMINISM ? 0 : (int64_t)pDbFd).detail("Acquiring", "Exclusive").detail("I", i).detail("Exclusive", memInfo->exclusiveLocks[i]).detail("MyExclusive", pDbFd->sharedMemoryExclusiveLocks).detail("Shared", memInfo->sharedLocks[i]).detail("MyShared", pDbFd->sharedMemorySharedLocks);
2017-05-26 04:48:44 +08:00
return SQLITE_BUSY;
}
for(int i=ofst; i<ofst+n; i++)
if (!( pDbFd->sharedMemoryExclusiveLocks & (1<<i) )) {
pDbFd->sharedMemoryExclusiveLocks |= 1<<i;
memInfo->exclusiveLocks[i]++;
}
}
return SQLITE_OK;
}
/*
** Implement a memory barrier or memory fence on shared memory.
**
** All loads and stores begun before the barrier must complete before
** any load or store begun after the barrier.
*/
static void asyncShmBarrier(sqlite3_file*){
#if WIN32
_ReadWriteBarrier();
#else
__sync_synchronize();
#endif
}
/*
** Close a connection to shared-memory. Delete the underlying
** storage if deleteFlag is true.
**
** If there is no shared memory associated with the connection then this
** routine is a harmless no-op.
*/
static int asyncShmUnmap(
sqlite3_file *fd, /* The underlying database file */
int deleteFlag /* Delete shared-memory if true */
){
MutexHolder hold( SharedMemoryInfo::mutex );
VFSAsyncFile *pDbFd = (VFSAsyncFile*)fd;
SharedMemoryInfo* memInfo = pDbFd->sharedMemory;
if (!memInfo) return SQLITE_OK;
pDbFd->sharedMemory = 0;
//printf("Connection %p closed shared memory\n", fd);
if (!--memInfo->refcount) {
//printf("Cleanup shared memory for: '%s' (%d refs; deleteFlag=%d)\n", memInfo->filename.c_str(), memInfo->refcount, deleteFlag);
//printf(" Shared locks: "); for(int i=0; i<8; i++) printf("%d ", memInfo->sharedLocks[i]); printf("\n");
//printf(" Exclusive locks: "); for(int i=0; i<8; i++) printf("%d ", memInfo->exclusiveLocks[i]); printf("\n");
//TraceEvent("CleanupSharedMemory").detail("Filename", memInfo->filename.c_str()).detail("RefCount", memInfo->refcount).detail("DeleteFlag", deleteFlag);
//for(int i = 0; i < 8; i++)
//TraceEvent("CleanupSharedMemory_Locks").detail("Filename", memInfo->filename.c_str()).detail("Num", i).detail("Shared", memInfo->sharedLocks[i]).detail("Exclusive", memInfo->exclusiveLocks[i]);
//We don't think deleteFlag will ever be set
ASSERT(!deleteFlag);
}
return SQLITE_OK;
}
VFSAsyncFile::~VFSAsyncFile() {
TraceEvent(SevDebug, "VFSAsyncFileDestroyStart")
.detail("Filename", filename)
.detail("OpenCount", filename_lockCount_openCount[filename].second)
.detail("LockCount", filename_lockCount_openCount[filename].first)
.backtrace();
2017-05-26 04:48:44 +08:00
if (!--filename_lockCount_openCount[filename].second) {
filename_lockCount_openCount.erase(filename);
TraceEvent(SevDebug, "VFSAsyncFileDestroy")
.detail("Filename", filename)
.backtrace();
2017-05-26 04:48:44 +08:00
//Always delete the shared memory when the last copy of the file is deleted. In simulation, this is helpful because "killing" a file without properly closing
//it can result in a shared memory state that causes corruption when reopening the killed file. The only expected penalty from doing this
//is a potentially slower open operation on a database, but that should happen infrequently.
//
//We can't do this in ShmUnmap when refcount is 0 because it seems that SQLite sometimes subsequently tries to reopen the WAL from multiple locations simultaneously,
//resulting in a locking error
auto itr = SharedMemoryInfo::table.find(filename);
if(itr != SharedMemoryInfo::table.end()) {
2018-04-23 17:02:13 +08:00
ASSERT_ABORT(itr->second.refcount == 0);
2017-05-26 04:48:44 +08:00
itr->second.cleanup();
}
}
}
#endif
/*
** Open a file handle.
*/
static int asyncOpen(
sqlite3_vfs *pVfs, /* VFS */
const char *zName, /* File to open, or 0 for a temp file */
sqlite3_file *pFile, /* Pointer to VFSAsyncFile struct to populate */
int flags, /* Input SQLITE_OPEN_XXX flags */
2020-08-19 05:18:50 +08:00
int *pOutFlags /* Output SQLITE_OPEN_XXX flags (or nullptr) */
2017-05-26 04:48:44 +08:00
){
static const sqlite3_io_methods asyncio = {
3, /* iVersion */
asyncClose, /* xClose */
asyncRead, /* xRead */
asyncWrite, /* xWrite */
asyncTruncate, /* xTruncate */
asyncSync, /* xSync */
VFSAsyncFileSize, /* xFileSize */
asyncLock, /* xLock */
asyncUnlock, /* xUnlock */
asyncCheckReservedLock, /* xCheckReservedLock */
VFSAsyncFileControl, /* xFileControl */
asyncSectorSize, /* xSectorSize */
asyncDeviceCharacteristics, /* xDeviceCharacteristics */
asyncShmMap,
asyncShmLock,
asyncShmBarrier,
asyncShmUnmap,
asyncReadZeroCopy,
asyncReleaseZeroCopy
};
VFSAsyncFile *p = (VFSAsyncFile*)pFile; /* Populate this structure */
if( zName==0 )
return SQLITE_IOERR;
static_assert( SQLITE_OPEN_EXCLUSIVE == IAsyncFile::OPEN_EXCLUSIVE &&
SQLITE_OPEN_CREATE == IAsyncFile::OPEN_CREATE &&
SQLITE_OPEN_READONLY == IAsyncFile::OPEN_READONLY &&
SQLITE_OPEN_READWRITE == IAsyncFile::OPEN_READWRITE, "SQLite flag values don't match IAsyncFile flag values" );
// File creation here is disabled because we always create the files first in KeyValueStoreSQLite, using atomic creation
int oflags = flags & (/*SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_CREATE |*/ SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE);
if (flags & SQLITE_OPEN_WAL) oflags |= IAsyncFile::OPEN_LARGE_PAGES;
oflags |= IAsyncFile::OPEN_LOCK;
memset(static_cast<void*>(p), 0, sizeof(VFSAsyncFile));
2017-05-26 04:48:44 +08:00
new (p) VFSAsyncFile(zName, flags);
try {
// Note that SQLiteDB::open also opens the db file, so its flags and modes are important, too
p->file = waitForAndGet( IAsyncFileSystem::filesystem()->open( p->filename, oflags, 0600 ) );
TraceEvent(SevDebug, "VFSAsyncFileOpened")
2017-05-26 04:48:44 +08:00
.detail("Filename", p->filename)
.backtrace();
2017-05-26 04:48:44 +08:00
} catch (Error& e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_CANTOPEN);
}
TraceEvent("VFSAsyncFileOpenError").error(e).detail("Filename", p->filename);
2017-05-26 04:48:44 +08:00
p->~VFSAsyncFile();
return SQLITE_CANTOPEN;
}
if( pOutFlags ){
*pOutFlags = flags;
}
p->base.pMethods = &asyncio;
return SQLITE_OK;
}
// The next few functions, which perform filesystem operations by path rather than by file, have
// OS-specific implementations.
/*
** Delete the file identified by argument zPath. If the dirSync parameter
** is non-zero, then ensure the file-system modification to delete the
** file has been synced to disk before returning.
*/
static int asyncDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
ASSERT( false ); // At the moment this isn't used; hence isn't under test. Could easily use IAsyncFileSystem::filesystem()->deleteFile().
return SQLITE_IOERR_DELETE;
}
/*
** Query the file-system to see if the named file exists, is readable or
** is both readable and writable. For an exists query, treat a zero-length file
** as if it does not exist.
*/
static int asyncAccess(
sqlite3_vfs *pVfs,
const char *zPath,
int flags,
int *pResOut
){
#ifdef __unixish__
#ifndef F_OK
# define F_OK 0
#endif
#ifndef R_OK
# define R_OK 4
#endif
#ifndef W_OK
# define W_OK 2
#endif
int rc; /* access() return code */
int eAccess = F_OK; /* Second argument to access() */
assert(flags==SQLITE_ACCESS_EXISTS /* access(zPath, F_OK) */
|| flags==SQLITE_ACCESS_READ /* access(zPath, R_OK) */
|| flags==SQLITE_ACCESS_READWRITE /* access(zPath, R_OK|W_OK) */
);
if( flags==SQLITE_ACCESS_READWRITE ) eAccess = R_OK|W_OK;
if( flags==SQLITE_ACCESS_READ ) eAccess = R_OK;
rc = access(zPath, eAccess);
*pResOut = (rc==0);
if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){
struct stat buf;
if( 0==stat(zPath, &buf) && buf.st_size==0 ){
*pResOut = 0;
}
}
return SQLITE_OK;
#else
WIN32_FILE_ATTRIBUTE_DATA data;
DWORD attr = INVALID_FILE_ATTRIBUTES;
memset(&data, 0, sizeof(data));
if (GetFileAttributesEx(zPath, GetFileExInfoStandard, &data)) {
if (!(flags == SQLITE_ACCESS_EXISTS && data.nFileSizeHigh==0 && data.nFileSizeLow==0))
attr = data.dwFileAttributes;
} else if (GetLastError()!=ERROR_FILE_NOT_FOUND)
return SQLITE_IOERR_ACCESS;
if (flags == SQLITE_ACCESS_READWRITE)
*pResOut = (attr & FILE_ATTRIBUTE_READONLY)==0;
else
*pResOut = attr != INVALID_FILE_ATTRIBUTES;
return SQLITE_OK;
#endif
}
/*
** Argument zPath points to a nul-terminated string containing a file path.
** If zPath is an absolute path, then it is copied as is into the output
** buffer. Otherwise, if it is a relative path, then the equivalent full
** path is written to the output buffer.
*/
static int asyncFullPathname(
sqlite3_vfs *pVfs, /* VFS */
const char *zPath, /* Input path (possibly a relative path) */
int nPathOut, /* Size of output buffer in bytes */
char *zPathOut /* Pointer to output buffer */
){
try {
auto s = abspath( zPath );
if (s.size() >= nPathOut)
return SQLITE_IOERR;
memcpy(zPathOut, s.c_str(), s.size()+1);
return SQLITE_OK;
} catch (Error& e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR);
}
TraceEvent(SevError,"VFSAsyncFullPathnameError").error(e).detail("PathIn", (std::string)zPath);
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR;
} catch(...) {
TraceEvent(SevError,"VFSAsyncFullPathnameError").error(unknown_error()).detail("PathIn", (std::string)zPath);
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR;
}
}
/*
** Returns true if there is a shared memory entry for the specified filename,
** and false otherwise.
*/
bool vfsAsyncIsOpen( std::string filename ) {
return SharedMemoryInfo::table.count( abspath(filename) ) > 0;
2017-05-26 04:48:44 +08:00
}
/*
** The following four VFS methods:
**
** xDlOpen
** xDlError
** xDlSym
** xDlClose
**
** are supposed to implement the functionality needed by SQLite to load
** extensions compiled as shared objects. This simple VFS does not support
** this functionality, so the following functions are no-ops.
*/
static void *asyncDlOpen(sqlite3_vfs *pVfs, const char *zPath){
return 0;
}
static void asyncDlError(sqlite3_vfs *pVfs, int nByte, char *zErrMsg){
sqlite3_snprintf(nByte, zErrMsg, "Loadable extensions are not supported");
zErrMsg[nByte-1] = '\0';
}
static void (*asyncDlSym(sqlite3_vfs *pVfs, void *pH, const char *z))(void){
return 0;
}
static void asyncDlClose(sqlite3_vfs *pVfs, void *pHandle){
return;
}
/*
** Parameter zByte points to a buffer nByte bytes in size. Populate this
** buffer with pseudo-random data.
*/
static int asyncRandomness(sqlite3_vfs *pVfs, int nByte, char *zByte){
for(int i=0; i<nByte; i++)
zByte[i] = deterministicRandom()->randomInt(0,256);
2017-05-26 04:48:44 +08:00
return SQLITE_OK;
}
/*
** Sleep for at least nMicro microseconds. Return the (approximate) number
** of microseconds slept for.
*/
static int asyncSleep(sqlite3_vfs *pVfs, int microseconds){
try {
Future<Void> simCancel = Never();
if( g_network->isSimulated() )
simCancel = success( g_simulator.getCurrentProcess()->shutdownSignal.getFuture() );
if( simCancel.isReady() ) {
waitFor( delay(FLOW_KNOBS->MAX_BUGGIFIED_DELAY) );
return 0;
}
waitFor( g_network->delay( microseconds*1e-6, TaskPriority::DefaultDelay ) || simCancel );
2017-05-26 04:48:44 +08:00
return microseconds;
} catch( Error &e ) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_ERROR);
}
TraceEvent(SevError, "VFSAsyncSleepError").error(e,true);
2017-05-26 04:48:44 +08:00
return 0;
}
}
/*
** Find the current time (in Universal Coordinated Time). Write into *piNow
** the current time and date as a Julian Day number times 86_400_000. In
** other words, write into *piNow the number of milliseconds since the Julian
** epoch of noon in Greenwich on November 24, 4714 B.C according to the
** proleptic Gregorian calendar.
**
** On success, return 0. Return 1 if the time and date cannot be found.
*/
static int asyncCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
#if __unixish__
static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
struct timeval sNow;
2020-08-19 05:18:50 +08:00
gettimeofday(&sNow, nullptr);
2017-05-26 04:48:44 +08:00
*piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
#elif defined(_WIN32)
static const sqlite3_int64 winFiletimeEpoch = 23058135*(sqlite3_int64)8640000;
int64_t ft = 0;
GetSystemTimeAsFileTime( (FILETIME*)&ft );
*piNow = winFiletimeEpoch + ft / 10000;
#else
#error Port me!
#endif
return 0;
}
/*
** Set *pTime to the current UTC time expressed as a Julian day. Return
** SQLITE_OK if successful, or an error code otherwise.
**
** http://en.wikipedia.org/wiki/Julian_day
*/
static int asyncCurrentTime(sqlite3_vfs *pVfs, double *pTime){
sqlite3_int64 t = 0;
int rc = asyncCurrentTimeInt64(pVfs, &t);
if (rc) return rc;
*pTime = t / 86400000.0;
return SQLITE_OK;
}
static int asyncGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ return 0; }
/*
** This function returns a pointer to the VFS implemented in this file.
** To make the VFS available to SQLite:
**
** sqlite3_vfs_register(sqlite3_asyncvfs(), 0);
*/
sqlite3_vfs *vfsAsync(){
static sqlite3_vfs asyncvfs = {
3, /* iVersion */
sizeof(VFSAsyncFile), /* szOsFile */
MAXPATHNAME, /* mxPathname */
0, /* pNext */
"fdb_async", /* zName */
0, /* pAppData */
asyncOpen, /* xOpen */
asyncDelete, /* xDelete */
asyncAccess, /* xAccess */
asyncFullPathname, /* xFullPathname */
asyncDlOpen, /* xDlOpen */
asyncDlError, /* xDlError */
asyncDlSym, /* xDlSym */
asyncDlClose, /* xDlClose */
asyncRandomness, /* xRandomness */
asyncSleep, /* xSleep */
asyncCurrentTime, /* xCurrentTime */
asyncGetLastError, /* xGetLastError */
asyncCurrentTimeInt64, /* xCurrentTimeInt64 */
0, /* xSetSystemCall */
0, /* xGetSystemCall */
0, /* xNextSystemCall */
};
return &asyncvfs;
}