2017-05-26 04:48:44 +08:00
/*
* VFSAsync . cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
# include "sqlite/sqlite3.h"
# include <stdio.h>
# include <string>
# include <vector>
# include "fdbrpc/fdbrpc.h"
# include "fdbrpc/IAsyncFile.h"
2018-10-20 01:30:13 +08:00
# include "fdbserver/CoroFlow.h"
2017-05-26 04:48:44 +08:00
# include "fdbrpc/simulator.h"
# include "fdbrpc/AsyncFileReadAhead.actor.h"
# include <assert.h>
# include <string.h>
# ifdef WIN32
2018-10-24 09:31:17 +08:00
# include <Windows.h>
2017-05-26 04:48:44 +08:00
# endif
# ifdef __unixish__
# include <sys/types.h>
# include <sys/stat.h>
# include <sys/file.h>
# include <sys/param.h>
# include <sys/time.h>
# include <unistd.h>
# include <errno.h>
# include <fcntl.h>
# endif
2021-01-16 11:29:14 +08:00
# include "fdbserver/VFSAsync.h"
2017-05-26 04:48:44 +08:00
/*
* * The maximum pathname length supported by this VFS .
*/
# define MAXPATHNAME 512
# define NO_LOCK 0
# define SHARED_LOCK 1
# define RESERVED_LOCK 2
# define PENDING_LOCK 3
# define EXCLUSIVE_LOCK 4
const uint32_t RESERVED_COUNT = 1U < < 29 ;
2021-01-16 11:29:14 +08:00
VFSAsyncFile : : VFSAsyncFile ( std : : string const & filename , int flags )
2021-01-17 20:17:13 +08:00
: filename ( filename ) , flags ( flags ) , pLockCount ( & filename_lockCount_openCount [ filename ] . first ) , debug_zcrefs ( 0 ) , debug_zcreads ( 0 ) , debug_reads ( 0 ) , chunkSize ( 0 ) {
2021-01-16 11:29:14 +08:00
filename_lockCount_openCount [ filename ] . second + + ;
2021-01-27 16:01:33 +08:00
TraceEvent ( SevDebug , " VFSAsyncFileConstruct " )
. detail ( " Filename " , filename )
. detail ( " OpenCount " , filename_lockCount_openCount [ filename ] . second )
. detail ( " LockCount " , filename_lockCount_openCount [ filename ] . first )
. backtrace ( ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
std : : map < std : : string , std : : pair < uint32_t , int > > VFSAsyncFile : : filename_lockCount_openCount ;
static int asyncClose ( sqlite3_file * pFile ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
2021-01-27 16:01:33 +08:00
TraceEvent ( SevDebug , " VFSAsyncFileDestroy " )
. detail ( " Filename " , p - > filename )
. backtrace ( ) ;
2017-05-26 04:48:44 +08:00
//printf("Closing %s: %d zcrefs, %d/%d reads zc\n", filename.c_str(), debug_zcrefs, debug_zcreads, debug_zcreads+debug_reads);
ASSERT ( ! p - > debug_zcrefs ) ;
p - > ~ VFSAsyncFile ( ) ;
return SQLITE_OK ;
}
static int asyncRead ( sqlite3_file * pFile , void * zBuf , int iAmt , sqlite_int64 iOfst ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
try {
+ + p - > debug_reads ;
int readBytes = waitForAndGet ( p - > file - > read ( zBuf , iAmt , iOfst ) ) ;
if ( readBytes < iAmt ) {
memset ( ( uint8_t * ) zBuf + readBytes , 0 , iAmt - readBytes ) ; // When reading past the EOF, sqlite expects the extra portion of the buffer to be zeroed
return SQLITE_IOERR_SHORT_READ ;
}
return SQLITE_OK ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR_READ ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_READ ;
}
}
# if 1
static int asyncReleaseZeroCopy ( sqlite3_file * pFile , void * data , int iAmt , sqlite_int64 iOfst ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
try {
- - p - > debug_zcrefs ;
p - > file - > releaseZeroCopy ( data , iAmt , iOfst ) ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR ;
}
return SQLITE_OK ;
}
static int asyncReadZeroCopy ( sqlite3_file * pFile , void * * data , int iAmt , sqlite_int64 iOfst , int * pDataWasCached ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
try {
int readBytes = iAmt ;
Future < Void > readFuture = p - > file - > readZeroCopy ( data , & readBytes , iOfst ) ;
if ( pDataWasCached )
* pDataWasCached = readFuture . isReady ( ) ? 1 : 0 ;
waitFor ( readFuture ) ;
+ + p - > debug_zcrefs ;
if ( readBytes < iAmt ) {
// When reading past the EOF, sqlite expects the extra portion of the buffer to be zeroed. We can't do that, so return and sqlite will use the slow path.
asyncReleaseZeroCopy ( pFile , * data , readBytes , iOfst ) ;
return SQLITE_IOERR_SHORT_READ ;
}
+ + p - > debug_zcreads ;
return SQLITE_OK ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR_READ ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_READ ;
}
}
# else
static int asyncReadZeroCopy ( sqlite3_file * pFile , void * * data , int iAmt , sqlite_int64 iOfst ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
try {
* data = new char [ iAmt ] ;
int readBytes = waitForAndGet ( p - > file - > read ( * data , iAmt , iOfst ) ) ;
//printf("+asyncReadRef %p +%lld %d/%d = %p\n", pFile, iOfst, readBytes, iAmt, *data);
if ( readBytes < iAmt ) {
memset ( ( uint8_t * ) * data + readBytes , 0 , iAmt - readBytes ) ; // When reading past the EOF, sqlite expects the extra portion of the buffer to be zeroed
return SQLITE_IOERR_SHORT_READ ;
}
return SQLITE_OK ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR_READ ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_READ ;
}
}
static int asyncReleaseZeroCopy ( sqlite3_file * pFile , void * data , int iAmt , sqlite_int64 iOfst ) {
//printf("-asyncReleaseRef %p +%lld %d <= %p\n", pFile, iOfst, iAmt, data);
delete [ ] ( char * ) data ;
return SQLITE_OK ;
}
# endif
static int asyncWrite ( sqlite3_file * pFile , const void * zBuf , int iAmt , sqlite_int64 iOfst ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
try {
waitFor ( p - > file - > write ( zBuf , iAmt , iOfst ) ) ;
return SQLITE_OK ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR_WRITE ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_WRITE ;
}
}
static int asyncTruncate ( sqlite3_file * pFile , sqlite_int64 size ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
2019-04-23 19:50:58 +08:00
// Adjust size to a multiple of chunkSize if set
if ( p - > chunkSize ! = 0 ) {
size = ( ( size + p - > chunkSize - 1 ) / p - > chunkSize ) * p - > chunkSize ;
}
2017-05-26 04:48:44 +08:00
try {
waitFor ( p - > file - > truncate ( size ) ) ;
return SQLITE_OK ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR_TRUNCATE ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_TRUNCATE ;
}
}
static int asyncSync ( sqlite3_file * pFile , int flags ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
try {
waitFor ( p - > file - > sync ( ) ) ;
return SQLITE_OK ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR_FSYNC ) ;
2021-01-16 11:29:14 +08:00
}
2021-01-27 16:01:33 +08:00
TraceEvent ( " VFSAsyncFileSyncError " )
2018-08-02 05:30:57 +08:00
. error ( e )
2017-05-26 04:48:44 +08:00
. detail ( " Filename " , p - > filename )
2018-06-09 02:11:08 +08:00
. detail ( " Sqlite3File " , ( int64_t ) pFile )
2018-08-02 05:30:57 +08:00
. detail ( " IAsyncFile " , ( int64_t ) p - > file . getPtr ( ) ) ;
2017-05-27 08:43:28 +08:00
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_FSYNC ;
}
}
/*
* * Write the size of the file in bytes to * pSize .
*/
static int VFSAsyncFileSize ( sqlite3_file * pFile , sqlite_int64 * pSize ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
try {
* pSize = waitForAndGet ( p - > file - > size ( ) ) ;
return SQLITE_OK ;
2021-01-16 11:29:14 +08:00
} catch ( Error & e ) {
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR_FSTAT ) ;
2021-01-16 11:29:14 +08:00
}
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR_FSTAT ;
}
}
static int asyncLock ( sqlite3_file * pFile , int eLock ) {
2019-03-30 04:21:15 +08:00
//VFSAsyncFile *p = (VFSAsyncFile*)pFile;
2017-05-26 04:48:44 +08:00
2018-06-09 02:11:08 +08:00
//TraceEvent("FileLock").detail("File", p->filename).detail("Fd", p->file->debugFD()).detail("PrevLockLevel", p->lockLevel).detail("Op", eLock).detail("LockCount", *p->pLockCount);
2017-05-26 04:48:44 +08:00
return eLock = = EXCLUSIVE_LOCK ? SQLITE_BUSY : SQLITE_OK ;
}
static int asyncUnlock ( sqlite3_file * pFile , int eLock ) {
assert ( eLock < = SHARED_LOCK ) ;
return SQLITE_OK ;
}
static int asyncCheckReservedLock ( sqlite3_file * pFile , int * pResOut ) {
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
* pResOut = * p - > pLockCount > = RESERVED_COUNT ;
return SQLITE_OK ;
}
/*
* * No xFileControl ( ) verbs are implemented by this VFS .
*/
static int VFSAsyncFileControl ( sqlite3_file * pFile , int op , void * pArg ) {
2019-04-23 19:50:58 +08:00
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ;
switch ( op ) {
case SQLITE_FCNTL_CHUNK_SIZE :
p - > chunkSize = * ( int * ) pArg ;
return SQLITE_OK ;
case SQLITE_FCNTL_SIZE_HINT :
return asyncTruncate ( pFile , * ( int64_t * ) pArg ) ;
default :
return SQLITE_NOTFOUND ;
} ;
2017-05-26 04:48:44 +08:00
}
static int asyncSectorSize ( sqlite3_file * pFile ) { return 512 ; } // SOMEDAY: Would 4K be better?
static int asyncDeviceCharacteristics ( sqlite3_file * pFile ) { return 0 ; }
# if 1
struct SharedMemoryInfo { // for a file
std : : string filename ;
std : : vector < void * > regions ;
int regionSize ;
int refcount ; // Number of connections with this open
int sharedLocks [ SQLITE_SHM_NLOCK ] ;
int exclusiveLocks [ SQLITE_SHM_NLOCK ] ;
SharedMemoryInfo ( ) : regionSize ( 0 ) , refcount ( 0 ) {
memset ( sharedLocks , 0 , sizeof ( sharedLocks ) ) ;
memset ( exclusiveLocks , 0 , sizeof ( exclusiveLocks ) ) ;
}
void cleanup ( ) {
for ( int i = 0 ; i < regions . size ( ) ; i + + )
delete [ ] ( uint8_t * ) regions [ i ] ;
table . erase ( filename ) ;
}
static Mutex mutex ;
static std : : map < std : : string , SharedMemoryInfo > table ;
} ;
Mutex SharedMemoryInfo : : mutex ;
std : : map < std : : string , SharedMemoryInfo > SharedMemoryInfo : : table ;
/*
* * This function is called to obtain a pointer to region iRegion of the
* * shared - memory associated with the database file fd . Shared - memory regions
* * are numbered starting from zero . Each shared - memory region is szRegion
* * bytes in size .
* *
2020-08-19 05:18:50 +08:00
* * If an error occurs , an error code is returned and * pp is set to nullptr .
2017-05-26 04:48:44 +08:00
* *
* * Otherwise , if the bExtend parameter is 0 and the requested shared - memory
* * region has not been allocated ( by any client , including one running in a
2020-08-19 05:18:50 +08:00
* * separate process ) , then * pp is set to nullptr and SQLITE_OK returned . If
2017-05-26 04:48:44 +08:00
* * bExtend is non - zero and the requested shared - memory region has not yet
* * been allocated , it is allocated by this function .
* *
* * If the shared - memory region has already been allocated or is allocated by
* * this call as described above , then it is mapped into this processes
* * address space ( if it is not already ) , * pp is set to point to the mapped
* * memory and SQLITE_OK returned .
*/
static int asyncShmMap (
sqlite3_file * fd , /* Handle open on database file */
int iRegion , /* Region to retrieve */
int szRegion , /* Size of regions */
int bExtend , /* True to extend file if necessary */
void volatile * * pp /* OUT: Mapped memory */
)
{
MutexHolder hold ( SharedMemoryInfo : : mutex ) ;
VFSAsyncFile * pDbFd = ( VFSAsyncFile * ) fd ;
SharedMemoryInfo * memInfo = pDbFd - > sharedMemory ;
if ( ! memInfo ) {
std : : string filename = pDbFd - > filename ;
memInfo = pDbFd - > sharedMemory = & SharedMemoryInfo : : table [ filename ] ;
memInfo - > filename = filename ;
memInfo - > regionSize = szRegion ;
+ + memInfo - > refcount ;
//printf("Shared memory for: '%s' (%d refs)\n", filename.c_str(), memInfo->refcount);
} else {
assert ( memInfo - > regionSize = = szRegion ) ;
}
if ( iRegion > = memInfo - > regions . size ( ) ) {
2020-08-19 05:18:50 +08:00
if ( ! bExtend ) { * pp = nullptr ; return SQLITE_OK ; }
2017-05-26 04:48:44 +08:00
while ( memInfo - > regions . size ( ) < = iRegion ) {
void * mem = new uint8_t [ szRegion ] ;
memset ( mem , 0 , szRegion ) ;
memInfo - > regions . push_back ( mem ) ;
}
}
* pp = memInfo - > regions [ iRegion ] ;
return SQLITE_OK ;
}
/*
* * Change the lock state for a shared - memory segment .
* *
* * Note that the relationship between SHAREd and EXCLUSIVE locks is a little
* * different here than in posix . In xShmLock ( ) , one can go from unlocked
* * to shared and back or from unlocked to exclusive and back . But one may
* * not go from shared to exclusive or from exclusive to shared .
*/
// sqlite doesn't seem to match these up correctly - it happily calls unlock on locks it doesn't hold.
// So we have to keep track of which locks are held by a given sqlite3_file
static int asyncShmLock (
sqlite3_file * fd , /* Database file holding the shared memory */
int ofst , /* First lock to acquire or release */
int n , /* Number of locks to acquire or release */
int flags /* What to do with the lock */
) {
assert ( ofst > = 0 & & ofst + n < = SQLITE_SHM_NLOCK ) ;
assert ( n > = 1 ) ;
assert ( flags = = ( SQLITE_SHM_LOCK | SQLITE_SHM_SHARED )
| | flags = = ( SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE )
| | flags = = ( SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED )
| | flags = = ( SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE ) ) ;
assert ( n = = 1 | | ( flags & SQLITE_SHM_EXCLUSIVE ) ! = 0 ) ;
MutexHolder hold ( SharedMemoryInfo : : mutex ) ;
VFSAsyncFile * pDbFd = ( VFSAsyncFile * ) fd ;
SharedMemoryInfo * memInfo = pDbFd - > sharedMemory ;
if ( flags & SQLITE_SHM_UNLOCK ) {
for ( int i = ofst ; i < ofst + n ; i + + ) {
if ( pDbFd - > sharedMemorySharedLocks & ( 1 < < i ) ) {
pDbFd - > sharedMemorySharedLocks & = ~ ( 1 < < i ) ;
- - memInfo - > sharedLocks [ i ] ;
}
if ( pDbFd - > sharedMemoryExclusiveLocks & ( 1 < < i ) ) {
pDbFd - > sharedMemoryExclusiveLocks & = ~ ( 1 < < i ) ;
- - memInfo - > exclusiveLocks [ i ] ;
}
}
} else if ( flags & SQLITE_SHM_SHARED ) {
for ( int i = ofst ; i < ofst + n ; i + + )
if ( memInfo - > exclusiveLocks [ i ] ! = ( ( pDbFd - > sharedMemoryExclusiveLocks > > i ) & 1 ) ) {
2018-06-09 02:11:08 +08:00
//TraceEvent("ShmLocked").detail("File", DEBUG_DETERMINISM ? 0 : (int64_t)pDbFd).detail("Acquiring", "Shared").detail("I", i).detail("Exclusive", memInfo->exclusiveLocks[i]).detail("MyExclusive", pDbFd->sharedMemoryExclusiveLocks);
2017-05-26 04:48:44 +08:00
return SQLITE_BUSY ;
}
for ( int i = ofst ; i < ofst + n ; i + + )
if ( ! ( pDbFd - > sharedMemorySharedLocks & ( 1 < < i ) ) ) {
pDbFd - > sharedMemorySharedLocks | = 1 < < i ;
memInfo - > sharedLocks [ i ] + + ;
}
} else {
for ( int i = ofst ; i < ofst + n ; i + + )
if ( memInfo - > exclusiveLocks [ i ] ! = ( ( pDbFd - > sharedMemoryExclusiveLocks > > i ) & 1 ) | |
memInfo - > sharedLocks [ i ] ! = ( ( pDbFd - > sharedMemorySharedLocks > > i ) & 1 ) )
{
2018-06-09 02:11:08 +08:00
//TraceEvent("ShmLocked").detail("File", DEBUG_DETERMINISM ? 0 : (int64_t)pDbFd).detail("Acquiring", "Exclusive").detail("I", i).detail("Exclusive", memInfo->exclusiveLocks[i]).detail("MyExclusive", pDbFd->sharedMemoryExclusiveLocks).detail("Shared", memInfo->sharedLocks[i]).detail("MyShared", pDbFd->sharedMemorySharedLocks);
2017-05-26 04:48:44 +08:00
return SQLITE_BUSY ;
}
for ( int i = ofst ; i < ofst + n ; i + + )
if ( ! ( pDbFd - > sharedMemoryExclusiveLocks & ( 1 < < i ) ) ) {
pDbFd - > sharedMemoryExclusiveLocks | = 1 < < i ;
memInfo - > exclusiveLocks [ i ] + + ;
}
}
return SQLITE_OK ;
}
/*
* * Implement a memory barrier or memory fence on shared memory .
* *
* * All loads and stores begun before the barrier must complete before
* * any load or store begun after the barrier .
*/
static void asyncShmBarrier ( sqlite3_file * ) {
# if WIN32
_ReadWriteBarrier ( ) ;
# else
__sync_synchronize ( ) ;
# endif
}
/*
* * Close a connection to shared - memory . Delete the underlying
* * storage if deleteFlag is true .
* *
* * If there is no shared memory associated with the connection then this
* * routine is a harmless no - op .
*/
static int asyncShmUnmap (
sqlite3_file * fd , /* The underlying database file */
int deleteFlag /* Delete shared-memory if true */
) {
MutexHolder hold ( SharedMemoryInfo : : mutex ) ;
VFSAsyncFile * pDbFd = ( VFSAsyncFile * ) fd ;
SharedMemoryInfo * memInfo = pDbFd - > sharedMemory ;
if ( ! memInfo ) return SQLITE_OK ;
pDbFd - > sharedMemory = 0 ;
//printf("Connection %p closed shared memory\n", fd);
if ( ! - - memInfo - > refcount ) {
//printf("Cleanup shared memory for: '%s' (%d refs; deleteFlag=%d)\n", memInfo->filename.c_str(), memInfo->refcount, deleteFlag);
//printf(" Shared locks: "); for(int i=0; i<8; i++) printf("%d ", memInfo->sharedLocks[i]); printf("\n");
//printf(" Exclusive locks: "); for(int i=0; i<8; i++) printf("%d ", memInfo->exclusiveLocks[i]); printf("\n");
//TraceEvent("CleanupSharedMemory").detail("Filename", memInfo->filename.c_str()).detail("RefCount", memInfo->refcount).detail("DeleteFlag", deleteFlag);
//for(int i = 0; i < 8; i++)
//TraceEvent("CleanupSharedMemory_Locks").detail("Filename", memInfo->filename.c_str()).detail("Num", i).detail("Shared", memInfo->sharedLocks[i]).detail("Exclusive", memInfo->exclusiveLocks[i]);
//We don't think deleteFlag will ever be set
ASSERT ( ! deleteFlag ) ;
}
return SQLITE_OK ;
}
VFSAsyncFile : : ~ VFSAsyncFile ( ) {
2021-01-27 16:01:33 +08:00
TraceEvent ( SevDebug , " VFSAsyncFileDestroyStart " )
. detail ( " Filename " , filename )
. detail ( " OpenCount " , filename_lockCount_openCount [ filename ] . second )
. detail ( " LockCount " , filename_lockCount_openCount [ filename ] . first )
. backtrace ( ) ;
2017-05-26 04:48:44 +08:00
if ( ! - - filename_lockCount_openCount [ filename ] . second ) {
filename_lockCount_openCount . erase ( filename ) ;
2021-01-27 16:01:33 +08:00
TraceEvent ( SevDebug , " VFSAsyncFileDestroy " )
. detail ( " Filename " , filename )
. backtrace ( ) ;
2017-05-26 04:48:44 +08:00
//Always delete the shared memory when the last copy of the file is deleted. In simulation, this is helpful because "killing" a file without properly closing
//it can result in a shared memory state that causes corruption when reopening the killed file. The only expected penalty from doing this
//is a potentially slower open operation on a database, but that should happen infrequently.
//
//We can't do this in ShmUnmap when refcount is 0 because it seems that SQLite sometimes subsequently tries to reopen the WAL from multiple locations simultaneously,
//resulting in a locking error
auto itr = SharedMemoryInfo : : table . find ( filename ) ;
if ( itr ! = SharedMemoryInfo : : table . end ( ) ) {
2018-04-23 17:02:13 +08:00
ASSERT_ABORT ( itr - > second . refcount = = 0 ) ;
2017-05-26 04:48:44 +08:00
itr - > second . cleanup ( ) ;
}
}
}
# endif
/*
* * Open a file handle .
*/
static int asyncOpen (
sqlite3_vfs * pVfs , /* VFS */
const char * zName , /* File to open, or 0 for a temp file */
sqlite3_file * pFile , /* Pointer to VFSAsyncFile struct to populate */
int flags , /* Input SQLITE_OPEN_XXX flags */
2020-08-19 05:18:50 +08:00
int * pOutFlags /* Output SQLITE_OPEN_XXX flags (or nullptr) */
2017-05-26 04:48:44 +08:00
) {
static const sqlite3_io_methods asyncio = {
3 , /* iVersion */
asyncClose , /* xClose */
asyncRead , /* xRead */
asyncWrite , /* xWrite */
asyncTruncate , /* xTruncate */
asyncSync , /* xSync */
VFSAsyncFileSize , /* xFileSize */
asyncLock , /* xLock */
asyncUnlock , /* xUnlock */
asyncCheckReservedLock , /* xCheckReservedLock */
VFSAsyncFileControl , /* xFileControl */
asyncSectorSize , /* xSectorSize */
asyncDeviceCharacteristics , /* xDeviceCharacteristics */
asyncShmMap ,
asyncShmLock ,
asyncShmBarrier ,
asyncShmUnmap ,
asyncReadZeroCopy ,
asyncReleaseZeroCopy
} ;
VFSAsyncFile * p = ( VFSAsyncFile * ) pFile ; /* Populate this structure */
if ( zName = = 0 )
return SQLITE_IOERR ;
static_assert ( SQLITE_OPEN_EXCLUSIVE = = IAsyncFile : : OPEN_EXCLUSIVE & &
SQLITE_OPEN_CREATE = = IAsyncFile : : OPEN_CREATE & &
SQLITE_OPEN_READONLY = = IAsyncFile : : OPEN_READONLY & &
SQLITE_OPEN_READWRITE = = IAsyncFile : : OPEN_READWRITE , " SQLite flag values don't match IAsyncFile flag values " ) ;
// File creation here is disabled because we always create the files first in KeyValueStoreSQLite, using atomic creation
int oflags = flags & ( /*SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_CREATE |*/ SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE ) ;
if ( flags & SQLITE_OPEN_WAL ) oflags | = IAsyncFile : : OPEN_LARGE_PAGES ;
oflags | = IAsyncFile : : OPEN_LOCK ;
2020-05-03 11:43:50 +08:00
memset ( static_cast < void * > ( p ) , 0 , sizeof ( VFSAsyncFile ) ) ;
2017-05-26 04:48:44 +08:00
new ( p ) VFSAsyncFile ( zName , flags ) ;
try {
// Note that SQLiteDB::open also opens the db file, so its flags and modes are important, too
p - > file = waitForAndGet ( IAsyncFileSystem : : filesystem ( ) - > open ( p - > filename , oflags , 0600 ) ) ;
2021-01-27 16:01:33 +08:00
TraceEvent ( SevDebug , " VFSAsyncFileOpened " )
2017-05-26 04:48:44 +08:00
. detail ( " Filename " , p - > filename )
2021-01-27 16:01:33 +08:00
. backtrace ( ) ;
2017-05-26 04:48:44 +08:00
} catch ( Error & e ) {
2021-01-16 21:04:30 +08:00
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_CANTOPEN ) ;
2021-01-16 21:04:30 +08:00
}
2021-01-27 16:01:33 +08:00
TraceEvent ( " VFSAsyncFileOpenError " ) . error ( e ) . detail ( " Filename " , p - > filename ) ;
2017-05-26 04:48:44 +08:00
p - > ~ VFSAsyncFile ( ) ;
return SQLITE_CANTOPEN ;
}
if ( pOutFlags ) {
* pOutFlags = flags ;
}
p - > base . pMethods = & asyncio ;
return SQLITE_OK ;
}
// The next few functions, which perform filesystem operations by path rather than by file, have
// OS-specific implementations.
/*
* * Delete the file identified by argument zPath . If the dirSync parameter
* * is non - zero , then ensure the file - system modification to delete the
* * file has been synced to disk before returning .
*/
static int asyncDelete ( sqlite3_vfs * pVfs , const char * zPath , int dirSync ) {
ASSERT ( false ) ; // At the moment this isn't used; hence isn't under test. Could easily use IAsyncFileSystem::filesystem()->deleteFile().
return SQLITE_IOERR_DELETE ;
}
/*
* * Query the file - system to see if the named file exists , is readable or
* * is both readable and writable . For an exists query , treat a zero - length file
* * as if it does not exist .
*/
static int asyncAccess (
sqlite3_vfs * pVfs ,
const char * zPath ,
int flags ,
int * pResOut
) {
# ifdef __unixish__
# ifndef F_OK
# define F_OK 0
# endif
# ifndef R_OK
# define R_OK 4
# endif
# ifndef W_OK
# define W_OK 2
# endif
int rc ; /* access() return code */
int eAccess = F_OK ; /* Second argument to access() */
assert ( flags = = SQLITE_ACCESS_EXISTS /* access(zPath, F_OK) */
| | flags = = SQLITE_ACCESS_READ /* access(zPath, R_OK) */
| | flags = = SQLITE_ACCESS_READWRITE /* access(zPath, R_OK|W_OK) */
) ;
if ( flags = = SQLITE_ACCESS_READWRITE ) eAccess = R_OK | W_OK ;
if ( flags = = SQLITE_ACCESS_READ ) eAccess = R_OK ;
rc = access ( zPath , eAccess ) ;
* pResOut = ( rc = = 0 ) ;
if ( flags = = SQLITE_ACCESS_EXISTS & & * pResOut ) {
struct stat buf ;
if ( 0 = = stat ( zPath , & buf ) & & buf . st_size = = 0 ) {
* pResOut = 0 ;
}
}
return SQLITE_OK ;
# else
WIN32_FILE_ATTRIBUTE_DATA data ;
DWORD attr = INVALID_FILE_ATTRIBUTES ;
memset ( & data , 0 , sizeof ( data ) ) ;
if ( GetFileAttributesEx ( zPath , GetFileExInfoStandard , & data ) ) {
if ( ! ( flags = = SQLITE_ACCESS_EXISTS & & data . nFileSizeHigh = = 0 & & data . nFileSizeLow = = 0 ) )
attr = data . dwFileAttributes ;
} else if ( GetLastError ( ) ! = ERROR_FILE_NOT_FOUND )
return SQLITE_IOERR_ACCESS ;
if ( flags = = SQLITE_ACCESS_READWRITE )
* pResOut = ( attr & FILE_ATTRIBUTE_READONLY ) = = 0 ;
else
* pResOut = attr ! = INVALID_FILE_ATTRIBUTES ;
return SQLITE_OK ;
# endif
}
/*
* * Argument zPath points to a nul - terminated string containing a file path .
* * If zPath is an absolute path , then it is copied as is into the output
* * buffer . Otherwise , if it is a relative path , then the equivalent full
* * path is written to the output buffer .
*/
static int asyncFullPathname (
sqlite3_vfs * pVfs , /* VFS */
const char * zPath , /* Input path (possibly a relative path) */
int nPathOut , /* Size of output buffer in bytes */
char * zPathOut /* Pointer to output buffer */
) {
try {
auto s = abspath ( zPath ) ;
if ( s . size ( ) > = nPathOut )
return SQLITE_IOERR ;
memcpy ( zPathOut , s . c_str ( ) , s . size ( ) + 1 ) ;
return SQLITE_OK ;
} catch ( Error & e ) {
2021-01-16 11:29:14 +08:00
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_IOERR ) ;
2021-01-16 11:29:14 +08:00
}
2018-08-02 05:30:57 +08:00
TraceEvent ( SevError , " VFSAsyncFullPathnameError " ) . error ( e ) . detail ( " PathIn " , ( std : : string ) zPath ) ;
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR ;
} catch ( . . . ) {
2018-08-02 05:30:57 +08:00
TraceEvent ( SevError , " VFSAsyncFullPathnameError " ) . error ( unknown_error ( ) ) . detail ( " PathIn " , ( std : : string ) zPath ) ;
2017-05-26 04:48:44 +08:00
return SQLITE_IOERR ;
}
}
/*
* * Returns true if there is a shared memory entry for the specified filename ,
* * and false otherwise .
*/
bool vfsAsyncIsOpen ( std : : string filename ) {
2019-03-27 00:58:54 +08:00
return SharedMemoryInfo : : table . count ( abspath ( filename ) ) > 0 ;
2017-05-26 04:48:44 +08:00
}
/*
* * The following four VFS methods :
* *
* * xDlOpen
* * xDlError
* * xDlSym
* * xDlClose
* *
* * are supposed to implement the functionality needed by SQLite to load
* * extensions compiled as shared objects . This simple VFS does not support
* * this functionality , so the following functions are no - ops .
*/
static void * asyncDlOpen ( sqlite3_vfs * pVfs , const char * zPath ) {
return 0 ;
}
static void asyncDlError ( sqlite3_vfs * pVfs , int nByte , char * zErrMsg ) {
sqlite3_snprintf ( nByte , zErrMsg , " Loadable extensions are not supported " ) ;
zErrMsg [ nByte - 1 ] = ' \0 ' ;
}
static void ( * asyncDlSym ( sqlite3_vfs * pVfs , void * pH , const char * z ) ) ( void ) {
return 0 ;
}
static void asyncDlClose ( sqlite3_vfs * pVfs , void * pHandle ) {
return ;
}
/*
* * Parameter zByte points to a buffer nByte bytes in size . Populate this
* * buffer with pseudo - random data .
*/
static int asyncRandomness ( sqlite3_vfs * pVfs , int nByte , char * zByte ) {
for ( int i = 0 ; i < nByte ; i + + )
2019-05-11 05:01:52 +08:00
zByte [ i ] = deterministicRandom ( ) - > randomInt ( 0 , 256 ) ;
2017-05-26 04:48:44 +08:00
return SQLITE_OK ;
}
/*
* * Sleep for at least nMicro microseconds . Return the ( approximate ) number
* * of microseconds slept for .
*/
static int asyncSleep ( sqlite3_vfs * pVfs , int microseconds ) {
try {
Future < Void > simCancel = Never ( ) ;
if ( g_network - > isSimulated ( ) )
simCancel = success ( g_simulator . getCurrentProcess ( ) - > shutdownSignal . getFuture ( ) ) ;
if ( simCancel . isReady ( ) ) {
waitFor ( delay ( FLOW_KNOBS - > MAX_BUGGIFIED_DELAY ) ) ;
return 0 ;
}
2019-06-25 17:47:35 +08:00
waitFor ( g_network - > delay ( microseconds * 1e-6 , TaskPriority : : DefaultDelay ) | | simCancel ) ;
2017-05-26 04:48:44 +08:00
return microseconds ;
} catch ( Error & e ) {
2021-01-16 11:29:14 +08:00
if ( e . isInjectedFault ( ) ) {
2021-01-17 20:17:13 +08:00
VFSAsyncFile : : setInjectedError ( SQLITE_ERROR ) ;
2021-01-16 11:29:14 +08:00
}
2021-01-27 16:01:33 +08:00
TraceEvent ( SevError , " VFSAsyncSleepError " ) . error ( e , true ) ;
2017-05-26 04:48:44 +08:00
return 0 ;
}
}
/*
* * Find the current time ( in Universal Coordinated Time ) . Write into * piNow
* * the current time and date as a Julian Day number times 86 _400_000 . In
* * other words , write into * piNow the number of milliseconds since the Julian
* * epoch of noon in Greenwich on November 24 , 4714 B . C according to the
* * proleptic Gregorian calendar .
* *
* * On success , return 0. Return 1 if the time and date cannot be found .
*/
static int asyncCurrentTimeInt64 ( sqlite3_vfs * NotUsed , sqlite3_int64 * piNow ) {
# if __unixish__
static const sqlite3_int64 unixEpoch = 24405875 * ( sqlite3_int64 ) 8640000 ;
struct timeval sNow ;
2020-08-19 05:18:50 +08:00
gettimeofday ( & sNow , nullptr ) ;
2017-05-26 04:48:44 +08:00
* piNow = unixEpoch + 1000 * ( sqlite3_int64 ) sNow . tv_sec + sNow . tv_usec / 1000 ;
# elif defined(_WIN32)
static const sqlite3_int64 winFiletimeEpoch = 23058135 * ( sqlite3_int64 ) 8640000 ;
int64_t ft = 0 ;
GetSystemTimeAsFileTime ( ( FILETIME * ) & ft ) ;
* piNow = winFiletimeEpoch + ft / 10000 ;
# else
# error Port me!
# endif
return 0 ;
}
/*
* * Set * pTime to the current UTC time expressed as a Julian day . Return
* * SQLITE_OK if successful , or an error code otherwise .
* *
* * http : //en.wikipedia.org/wiki/Julian_day
*/
static int asyncCurrentTime ( sqlite3_vfs * pVfs , double * pTime ) {
sqlite3_int64 t = 0 ;
int rc = asyncCurrentTimeInt64 ( pVfs , & t ) ;
if ( rc ) return rc ;
* pTime = t / 86400000.0 ;
return SQLITE_OK ;
}
static int asyncGetLastError ( sqlite3_vfs * NotUsed , int NotUsed2 , char * NotUsed3 ) { return 0 ; }
/*
* * This function returns a pointer to the VFS implemented in this file .
* * To make the VFS available to SQLite :
* *
* * sqlite3_vfs_register ( sqlite3_asyncvfs ( ) , 0 ) ;
*/
sqlite3_vfs * vfsAsync ( ) {
static sqlite3_vfs asyncvfs = {
3 , /* iVersion */
sizeof ( VFSAsyncFile ) , /* szOsFile */
MAXPATHNAME , /* mxPathname */
0 , /* pNext */
" fdb_async " , /* zName */
0 , /* pAppData */
asyncOpen , /* xOpen */
asyncDelete , /* xDelete */
asyncAccess , /* xAccess */
asyncFullPathname , /* xFullPathname */
asyncDlOpen , /* xDlOpen */
asyncDlError , /* xDlError */
asyncDlSym , /* xDlSym */
asyncDlClose , /* xDlClose */
asyncRandomness , /* xRandomness */
asyncSleep , /* xSleep */
asyncCurrentTime , /* xCurrentTime */
asyncGetLastError , /* xGetLastError */
asyncCurrentTimeInt64 , /* xCurrentTimeInt64 */
0 , /* xSetSystemCall */
0 , /* xGetSystemCall */
0 , /* xNextSystemCall */
} ;
return & asyncvfs ;
}