From e72d486215d2cc1d2598132bca319d09eb9fa74b Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Sat, 16 Jan 2021 05:04:30 -0800 Subject: [PATCH] Refactored how injected fault state is managed to make debugging injected fault handling easier. Moved injected error tracking for open() into an INetwork global since it should be unique per simulated process. Fixed bug where injected error in WAL file was not recognized as injected because it occurred before SQLiteDB's vfsWAL pointer was initialized. Simplified logic in SQLiteDB. Added comments. --- fdbserver/KeyValueStoreSQLite.actor.cpp | 31 +++++++++++----------- fdbserver/VFSAsync.cpp | 23 +++++++++-------- fdbserver/VFSAsync.h | 34 ++++++++++++++++++++++--- flow/network.h | 3 ++- 4 files changed, 62 insertions(+), 29 deletions(-) diff --git a/fdbserver/KeyValueStoreSQLite.actor.cpp b/fdbserver/KeyValueStoreSQLite.actor.cpp index 3e4803c482..b419b8a5cb 100644 --- a/fdbserver/KeyValueStoreSQLite.actor.cpp +++ b/fdbserver/KeyValueStoreSQLite.actor.cpp @@ -198,7 +198,6 @@ struct SQLiteDB : NonCopyable { bool page_checksums; bool fragment_values; PageChecksumCodec *pPagerCodec; // we do NOT own this pointer, db does. - VFSAsyncFile *vfsDB, *vfsWAL; void beginTransaction(bool write) { checkError("BtreeBeginTrans", sqlite3BtreeBeginTrans(btree, write)); @@ -213,7 +212,7 @@ struct SQLiteDB : NonCopyable { void open(bool writable); void createFromScratch(); - SQLiteDB( std::string filename, bool page_checksums, bool fragment_values): filename(filename), db(NULL), btree(NULL), table(-1), freetable(-1), haveMutex(false), page_checksums(page_checksums), fragment_values(fragment_values), vfsDB(nullptr), vfsWAL(nullptr) {} + SQLiteDB( std::string filename, bool page_checksums, bool fragment_values): filename(filename), db(NULL), btree(NULL), table(-1), freetable(-1), haveMutex(false), page_checksums(page_checksums), fragment_values(fragment_values) {} ~SQLiteDB() { if (db) { @@ -237,12 +236,20 @@ struct SQLiteDB : NonCopyable { } } - bool consumeInjectedErrors() { - // Both of these consumeInjectedError() calls must be made, If this was written as one expression - // then if the first one returned true the second call would be skipped. - bool dbErr = (vfsDB != nullptr && vfsDB->consumeInjectedError()); - bool walErr = (vfsWAL != nullptr && vfsWAL->consumeInjectedError()); - return dbErr || walErr; + // Consume and return the first of any injected errors detected by the VFSAsyncFile class. + bool consumeInjectedErrors(bool isOpen) { + // Get pointers to the VFSAsyncFile instances that SQLite is using to access its DB and WAL files + // in order to check if either of them have encountered an injected fault. + // + // These could be made into members, but this code is only run when an error occurs and only in simulation. + VFSAsyncFile *vfsDB = (VFSAsyncFile *)sqlite3_get_vfs_db(db); + VFSAsyncFile *vfsWAL = (VFSAsyncFile *)sqlite3_get_vfs_wal(db); + + // Return the first error found, leave the others unconsumed. + // Consume the open error last to prevent an injected open error from hiding a non-injected IO op error. + return (vfsDB != nullptr && vfsDB->consumeInjectedError()) + || (vfsWAL != nullptr && vfsWAL->consumeInjectedError()) + || (isOpen && VFSAsyncFile::consumeInjectedOpenError()); } void checkError( const char* context, int rc ) { @@ -252,7 +259,7 @@ struct SQLiteDB : NonCopyable { // an injected fault. Assume that if fault injection is happening, this is an injected fault. Error err = io_error(); - if (g_network->isSimulated() && (consumeInjectedErrors())) { + if (g_network->isSimulated() && consumeInjectedErrors(strcmp(context, "open") == 0)) { err = err.asInjectedFault(); } @@ -1365,9 +1372,6 @@ void SQLiteDB::open(bool writable) { int result = sqlite3_open_v2(apath.c_str(), &db, (writable ? SQLITE_OPEN_READWRITE : SQLITE_OPEN_READONLY), NULL); checkError("open", result); - vfsDB = (VFSAsyncFile *)sqlite3_get_vfs_db(db); - ASSERT(vfsDB != nullptr); - int chunkSize; if( !g_network->isSimulated() ) { chunkSize = 4096 * SERVER_KNOBS->SQLITE_CHUNK_SIZE_PAGES; @@ -1394,9 +1398,6 @@ void SQLiteDB::open(bool writable) { ASSERT( false ); } - vfsWAL = (VFSAsyncFile *)sqlite3_get_vfs_wal(db); - ASSERT(vfsWAL != nullptr); - if (writable) { Statement(*this, "PRAGMA synchronous = NORMAL").execute(); // OFF, NORMAL, FULL Statement(*this, "PRAGMA wal_autocheckpoint = -1").nextRow(); diff --git a/fdbserver/VFSAsync.cpp b/fdbserver/VFSAsync.cpp index b50ced0e97..9bde2dbc01 100644 --- a/fdbserver/VFSAsync.cpp +++ b/fdbserver/VFSAsync.cpp @@ -61,7 +61,7 @@ const uint32_t RESERVED_COUNT = 1U<<29; VFSAsyncFile::VFSAsyncFile(std::string const& filename, int flags) -: filename(filename), flags(flags), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0), errorInjected(false) { +: filename(filename), flags(flags), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0), injectedError(false) { filename_lockCount_openCount[filename].second++; } @@ -92,7 +92,7 @@ static int asyncRead(sqlite3_file *pFile, void *zBuf, int iAmt, sqlite_int64 iOf return SQLITE_OK; } catch (Error &e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pFile)->errorInjected = true; + ((VFSAsyncFile *)pFile)->setInjectedError(); } return SQLITE_IOERR_READ; } @@ -106,7 +106,7 @@ static int asyncReleaseZeroCopy(sqlite3_file* pFile, void* data, int iAmt, sqlit p->file->releaseZeroCopy( data, iAmt, iOfst ); } catch (Error &e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pFile)->errorInjected = true; + ((VFSAsyncFile *)pFile)->setInjectedError(); } return SQLITE_IOERR; } @@ -131,7 +131,7 @@ static int asyncReadZeroCopy(sqlite3_file *pFile, void **data, int iAmt, sqlite_ return SQLITE_OK; } catch (Error &e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pFile)->errorInjected = true; + ((VFSAsyncFile *)pFile)->setInjectedError(); } return SQLITE_IOERR_READ; } @@ -170,7 +170,7 @@ static int asyncWrite(sqlite3_file *pFile, const void *zBuf, int iAmt, sqlite_in return SQLITE_OK; } catch(Error &e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pFile)->errorInjected = true; + ((VFSAsyncFile *)pFile)->setInjectedError(); } return SQLITE_IOERR_WRITE; } @@ -189,7 +189,7 @@ static int asyncTruncate(sqlite3_file *pFile, sqlite_int64 size){ return SQLITE_OK; } catch(Error &e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pFile)->errorInjected = true; + ((VFSAsyncFile *)pFile)->setInjectedError(); } return SQLITE_IOERR_TRUNCATE; } @@ -202,7 +202,7 @@ static int asyncSync(sqlite3_file *pFile, int flags){ return SQLITE_OK; } catch (Error &e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pFile)->errorInjected = true; + ((VFSAsyncFile *)pFile)->setInjectedError(); } TraceEvent("VFSSyncError") @@ -225,7 +225,7 @@ static int VFSAsyncFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){ return SQLITE_OK; } catch (Error &e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pFile)->errorInjected = true; + ((VFSAsyncFile *)pFile)->setInjectedError(); } return SQLITE_IOERR_FSTAT; } @@ -547,6 +547,9 @@ static int asyncOpen( .detail("Sqlite3File", DEBUG_DETERMINISM ? 0 : (int64_t)pFile) .detail("IAsyncFile", DEBUG_DETERMINISM ? 0 : (int64_t)p->file.getPtr());*/ } catch (Error& e) { + if(e.isInjectedFault()) { + VFSAsyncFile::setOpenError(); + } TraceEvent("SQLiteOpenFail").error(e).detail("Filename", p->filename); p->~VFSAsyncFile(); return SQLITE_CANTOPEN; @@ -652,7 +655,7 @@ static int asyncFullPathname( return SQLITE_OK; } catch (Error& e) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pVfs)->errorInjected = true; + ((VFSAsyncFile *)pVfs)->setInjectedError(); } TraceEvent(SevError,"VFSAsyncFullPathnameError").error(e).detail("PathIn", (std::string)zPath); return SQLITE_IOERR; @@ -723,7 +726,7 @@ static int asyncSleep(sqlite3_vfs *pVfs, int microseconds){ return microseconds; } catch( Error &e ) { if(e.isInjectedFault()) { - ((VFSAsyncFile *)pVfs)->errorInjected = true; + ((VFSAsyncFile *)pVfs)->setInjectedError(); } TraceEvent(SevError, "AsyncSleepError").error(e,true); return 0; diff --git a/fdbserver/VFSAsync.h b/fdbserver/VFSAsync.h index debf317eef..b67292b9f2 100644 --- a/fdbserver/VFSAsync.h +++ b/fdbserver/VFSAsync.h @@ -52,11 +52,39 @@ struct VFSAsyncFile { int flags; std::string filename; Reference file; - bool errorInjected; + bool injectedError; + + // Method to set the injectedError flag to indicate that an injected error has been caught. + // Mostly useful for debugging injected fault handling. + void setInjectedError() { + //TraceEvent(SevInfo, "VFSSetInjectedFault").backtrace().detail("This", (void *)this); + injectedError = true; + } bool consumeInjectedError() { - bool e = errorInjected; - errorInjected = false; + bool e = injectedError; + injectedError = false; + return e; + } + + // Static methods for capturing an injected open error and consuming it to convert SQLite + // errors into Error instances with isInjectedFault() set. + // + // Uses a g_network global so that injected errors on one simulated process do not mask + // non-injected errors on another. + static void setOpenError() { + g_network->setGlobal(INetwork::enSQLiteOpenError, (flowGlobalType)true); + } + + static void clearOpenError() { + g_network->setGlobal(INetwork::enSQLiteOpenError, (flowGlobalType)false); + } + + static bool consumeInjectedOpenError() { + bool e = (bool)g_network->global(INetwork::enSQLiteOpenError); + if(e) { + clearOpenError(); + } return e; } diff --git a/flow/network.h b/flow/network.h index eb2105c468..e047f2e813 100644 --- a/flow/network.h +++ b/flow/network.h @@ -436,7 +436,8 @@ public: enASIOTimedOut = 9, enBlobCredentialFiles = 10, enNetworkAddressesFunc = 11, - enClientFailureMonitor = 12 + enClientFailureMonitor = 12, + enSQLiteOpenError = 13 }; virtual void longTaskCheck( const char* name ) {}