2017-05-26 04:48:44 +08:00
/*
* AsyncFileKAIO . actor . h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
# pragma once
# ifdef __linux__
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source version.
# if defined(NO_INTELLISENSE) && !defined(FLOW_ASYNCFILEKAIO_ACTOR_G_H)
# define FLOW_ASYNCFILEKAIO_ACTOR_G_H
2018-10-20 01:30:13 +08:00
# include "fdbrpc/AsyncFileKAIO.actor.g.h"
2017-05-26 04:48:44 +08:00
# elif !defined(FLOW_ASYNCFILEKAIO_ACTOR_H)
# define FLOW_ASYNCFILEKAIO_ACTOR_H
2018-10-20 01:30:13 +08:00
# include "fdbrpc/IAsyncFile.h"
2017-05-26 04:48:44 +08:00
# include <fcntl.h>
# include <sys/stat.h>
# include <sys/eventfd.h>
# include <sys/syscall.h>
# include "fdbrpc/linux_kaio.h"
# include "flow/Knobs.h"
# include "flow/UnitTest.h"
2017-05-27 08:43:28 +08:00
# include <stdio.h>
2020-01-14 10:40:35 +08:00
# include "flow/crc32c.h"
2017-05-27 08:43:28 +08:00
# include "flow/genericactors.actor.h"
2018-08-11 06:47:41 +08:00
# include "flow/actorcompiler.h" // This must be the last #include.
2017-05-27 08:43:28 +08:00
// Set this to true to enable detailed KAIO request logging, which currently is written to a hardcoded location /data/v7/fdb/
# define KAIO_LOGGING 0
2017-05-26 04:48:44 +08:00
DESCR struct SlowAioSubmit {
int64_t submitDuration ; // ns
int64_t truncateDuration ; // ns
int64_t numTruncates ;
int64_t truncateBytes ;
int64_t largestTruncate ;
} ;
class AsyncFileKAIO : public IAsyncFile , public ReferenceCounted < AsyncFileKAIO > {
public :
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
private :
# pragma pack(push, 1)
struct OpLogEntry {
OpLogEntry ( ) : result ( 0 ) { }
enum EOperation { READ = 1 , WRITE = 2 , SYNC = 3 , TRUNCATE = 4 } ;
enum EStage { START = 1 , LAUNCH = 2 , REQUEUE = 3 , COMPLETE = 4 , READY = 5 } ;
int64_t timestamp ;
uint32_t id ;
uint32_t checksum ;
uint32_t pageOffset ;
uint8_t pageCount ;
uint8_t op ;
uint8_t stage ;
uint32_t result ;
static uint32_t nextID ( ) {
static uint32_t last = 0 ;
return + + last ;
}
void log ( FILE * file ) {
if ( ftell ( file ) > ( int64_t ) 50 * 1e9 )
fseek ( file , 0 , SEEK_SET ) ;
if ( ! fwrite ( this , sizeof ( OpLogEntry ) , 1 , file ) )
throw io_error ( ) ;
}
} ;
# pragma pop
FILE * logFile ;
struct IOBlock ;
static void KAIOLogBlockEvent ( IOBlock * ioblock , OpLogEntry : : EStage stage , uint32_t result = 0 ) ;
static void KAIOLogBlockEvent ( FILE * logFile , IOBlock * ioblock , OpLogEntry : : EStage stage , uint32_t result = 0 ) ;
static void KAIOLogEvent ( FILE * logFile , uint32_t id , OpLogEntry : : EOperation op , OpLogEntry : : EStage stage , uint32_t pageOffset = 0 , uint32_t result = 0 ) ;
public :
# else
# define KAIOLogBlockEvent(...)
# define KAIOLogEvent(...)
# endif
2017-05-26 04:48:44 +08:00
static Future < Reference < IAsyncFile > > open ( std : : string filename , int flags , int mode , void * ignore ) {
2020-03-27 16:49:35 +08:00
ASSERT ( ! FLOW_KNOBS - > DISABLE_POSIX_KERNEL_AIO ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( flags & OPEN_UNBUFFERED ) ;
if ( flags & OPEN_LOCK )
mode | = 02000 ; // Enable mandatory locking for this file if it is supported by the filesystem
std : : string open_filename = filename ;
if ( flags & OPEN_ATOMIC_WRITE_AND_CREATE ) {
ASSERT ( ( flags & OPEN_CREATE ) & & ( flags & OPEN_READWRITE ) & & ! ( flags & OPEN_EXCLUSIVE ) ) ;
open_filename = filename + " .part " ;
}
2019-05-14 10:05:09 +08:00
int fd = : : open ( open_filename . c_str ( ) , openFlags ( flags ) , mode ) ;
2017-05-26 04:48:44 +08:00
if ( fd < 0 ) {
Error e = errno = = ENOENT ? file_not_found ( ) : io_error ( ) ;
int ecode = errno ; // Save errno in case it is modified before it is used below
TraceEvent ev ( " AsyncFileKAIOOpenFailed " ) ;
2018-08-02 05:30:57 +08:00
ev . error ( e ) . detail ( " Filename " , filename ) . detailf ( " Flags " , " %x " , flags )
2019-05-14 10:05:09 +08:00
. detailf ( " OSFlags " , " %x " , openFlags ( flags ) ) . detailf ( " Mode " , " 0%o " , mode ) . GetLastError ( ) ;
2017-05-26 04:48:44 +08:00
if ( ecode = = EINVAL )
ev . detail ( " Description " , " Invalid argument - Does the target filesystem support KAIO? " ) ;
return e ;
} else {
TraceEvent ( " AsyncFileKAIOOpen " )
. detail ( " Filename " , filename )
. detail ( " Flags " , flags )
2018-06-09 02:11:08 +08:00
. detail ( " Mode " , mode )
. detail ( " Fd " , fd ) ;
2017-05-26 04:48:44 +08:00
}
Reference < AsyncFileKAIO > r ( new AsyncFileKAIO ( fd , flags , filename ) ) ;
if ( flags & OPEN_LOCK ) {
// Acquire a "write" lock for the entire file
flock lockDesc ;
lockDesc . l_type = F_WRLCK ;
lockDesc . l_whence = SEEK_SET ;
lockDesc . l_start = 0 ;
lockDesc . l_len = 0 ; // "Specifying 0 for l_len has the special meaning: lock all bytes starting at the location specified by l_whence and l_start through to the end of file, no matter how large the file grows."
lockDesc . l_pid = 0 ;
if ( fcntl ( fd , F_SETLK , & lockDesc ) = = - 1 ) {
2018-06-09 02:11:08 +08:00
TraceEvent ( SevError , " UnableToLockFile " ) . detail ( " Filename " , filename ) . GetLastError ( ) ;
2017-05-26 04:48:44 +08:00
return io_error ( ) ;
}
}
struct stat buf ;
if ( fstat ( fd , & buf ) ) {
2018-06-09 02:11:08 +08:00
TraceEvent ( " AsyncFileKAIOFStatError " ) . detail ( " Fd " , fd ) . detail ( " Filename " , filename ) . GetLastError ( ) ;
2017-05-26 04:48:44 +08:00
return io_error ( ) ;
}
r - > lastFileSize = r - > nextFileSize = buf . st_size ;
return Reference < IAsyncFile > ( std : : move ( r ) ) ;
}
static void init ( Reference < IEventFD > ev , double ioTimeout ) {
2020-03-27 16:49:35 +08:00
ASSERT ( ! FLOW_KNOBS - > DISABLE_POSIX_KERNEL_AIO ) ;
2017-05-26 04:48:44 +08:00
if ( ! g_network - > isSimulated ( ) ) {
ctx . countAIOSubmit . init ( LiteralStringRef ( " AsyncFile.CountAIOSubmit " ) ) ;
ctx . countAIOCollect . init ( LiteralStringRef ( " AsyncFile.CountAIOCollect " ) ) ;
ctx . submitMetric . init ( LiteralStringRef ( " AsyncFile.Submit " ) ) ;
ctx . countPreSubmitTruncate . init ( LiteralStringRef ( " AsyncFile.CountPreAIOSubmitTruncate " ) ) ;
ctx . preSubmitTruncateBytes . init ( LiteralStringRef ( " AsyncFile.PreAIOSubmitTruncateBytes " ) ) ;
ctx . slowAioSubmitMetric . init ( LiteralStringRef ( " AsyncFile.SlowAIOSubmit " ) ) ;
}
int rc = io_setup ( FLOW_KNOBS - > MAX_OUTSTANDING , & ctx . iocx ) ;
if ( rc < 0 ) {
TraceEvent ( " IOSetupError " ) . GetLastError ( ) ;
throw io_error ( ) ;
}
setTimeout ( ioTimeout ) ;
ctx . evfd = ev - > getFD ( ) ;
poll ( ev ) ;
g_network - > setGlobal ( INetwork : : enRunCycleFunc , ( flowGlobalType ) & AsyncFileKAIO : : launch ) ;
}
static int get_eventfd ( ) { return ctx . evfd ; }
static void setTimeout ( double ioTimeout ) { ctx . setIOTimeout ( ioTimeout ) ; }
virtual void addref ( ) { ReferenceCounted < AsyncFileKAIO > : : addref ( ) ; }
virtual void delref ( ) { ReferenceCounted < AsyncFileKAIO > : : delref ( ) ; }
virtual Future < int > read ( void * data , int length , int64_t offset ) {
+ + countFileLogicalReads ;
+ + countLogicalReads ;
//printf("%p Begin logical read\n", getCurrentCoro());
if ( failed ) {
return io_timeout ( ) ;
}
IOBlock * io = new IOBlock ( IO_CMD_PREAD , fd ) ;
io - > buf = data ;
io - > nbytes = length ;
io - > offset = offset ;
enqueue ( io , " read " , this ) ;
2017-05-27 08:43:28 +08:00
Future < int > result = io - > result . getFuture ( ) ;
# if KAIO_LOGGING
//result = map(result, [=](int r) mutable { KAIOLogBlockEvent(io, OpLogEntry::READY, r); return r; });
# endif
return result ;
2017-05-26 04:48:44 +08:00
}
virtual Future < Void > write ( void const * data , int length , int64_t offset ) {
+ + countFileLogicalWrites ;
+ + countLogicalWrites ;
//printf("%p Begin logical write\n", getCurrentCoro());
if ( failed ) {
return io_timeout ( ) ;
}
IOBlock * io = new IOBlock ( IO_CMD_PWRITE , fd ) ;
io - > buf = ( void * ) data ;
io - > nbytes = length ;
io - > offset = offset ;
nextFileSize = std : : max ( nextFileSize , offset + length ) ;
enqueue ( io , " write " , this ) ;
2017-05-27 08:43:28 +08:00
Future < int > result = io - > result . getFuture ( ) ;
# if KAIO_LOGGING
//result = map(result, [=](int r) mutable { KAIOLogBlockEvent(io, OpLogEntry::READY, r); return r; });
# endif
return success ( result ) ;
2017-05-26 04:48:44 +08:00
}
2017-12-01 08:55:39 +08:00
// TODO(alexmiller): Remove when we upgrade the dev docker image to >14.10
# ifndef FALLOC_FL_ZERO_RANGE
# define FALLOC_FL_ZERO_RANGE 0x10
# endif
virtual Future < Void > zeroRange ( int64_t offset , int64_t length ) override {
bool success = false ;
if ( ctx . fallocateZeroSupported ) {
int rc = fallocate ( fd , FALLOC_FL_ZERO_RANGE , offset , length ) ;
if ( rc = = EOPNOTSUPP ) {
ctx . fallocateZeroSupported = false ;
}
if ( rc = = 0 ) {
success = true ;
}
}
return success ? Void ( ) : IAsyncFile : : zeroRange ( offset , length ) ;
}
2017-05-26 04:48:44 +08:00
virtual Future < Void > truncate ( int64_t size ) {
+ + countFileLogicalWrites ;
+ + countLogicalWrites ;
2017-12-01 08:55:39 +08:00
2017-05-26 04:48:44 +08:00
if ( failed ) {
return io_timeout ( ) ;
}
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
uint32_t id = OpLogEntry : : nextID ( ) ;
# endif
int result = - 1 ;
KAIOLogEvent ( logFile , id , OpLogEntry : : TRUNCATE , OpLogEntry : : START , size / 4096 ) ;
2017-05-26 04:48:44 +08:00
bool completed = false ;
2019-04-24 04:40:23 +08:00
double begin = timer_monotonic ( ) ;
2017-05-26 04:48:44 +08:00
if ( ctx . fallocateSupported & & size > = lastFileSize ) {
2017-05-27 08:43:28 +08:00
result = fallocate ( fd , 0 , 0 , size ) ;
if ( result ! = 0 ) {
2017-05-26 04:48:44 +08:00
int fallocateErrCode = errno ;
2019-05-11 08:46:26 +08:00
TraceEvent ( " AsyncFileKAIOAllocateError " ) . detail ( " Fd " , fd ) . detail ( " Filename " , filename ) . detail ( " Size " , size ) . GetLastError ( ) ;
2017-05-26 04:48:44 +08:00
if ( fallocateErrCode = = EOPNOTSUPP ) {
// Mark fallocate as unsupported. Try again with truncate.
ctx . fallocateSupported = false ;
} else {
2017-05-27 08:43:28 +08:00
KAIOLogEvent ( logFile , id , OpLogEntry : : TRUNCATE , OpLogEntry : : COMPLETE , size / 4096 , result ) ;
2017-05-26 04:48:44 +08:00
return io_error ( ) ;
}
} else {
completed = true ;
}
}
2017-05-27 08:43:28 +08:00
if ( ! completed )
result = ftruncate ( fd , size ) ;
2019-04-24 04:40:23 +08:00
double end = timer_monotonic ( ) ;
2019-05-11 05:01:52 +08:00
if ( nondeterministicRandom ( ) - > random01 ( ) < end - begin ) {
2019-04-24 04:40:23 +08:00
TraceEvent ( " SlowKAIOTruncate " )
. detail ( " TruncateTime " , end - begin )
. detail ( " TruncateBytes " , size - lastFileSize ) ;
}
2017-05-27 08:43:28 +08:00
KAIOLogEvent ( logFile , id , OpLogEntry : : TRUNCATE , OpLogEntry : : COMPLETE , size / 4096 , result ) ;
if ( result ! = 0 ) {
2018-06-09 02:11:08 +08:00
TraceEvent ( " AsyncFileKAIOTruncateError " ) . detail ( " Fd " , fd ) . detail ( " Filename " , filename ) . GetLastError ( ) ;
2017-05-26 04:48:44 +08:00
return io_error ( ) ;
}
lastFileSize = nextFileSize = size ;
2017-06-15 07:43:53 +08:00
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
ACTOR static Future < Void > throwErrorIfFailed ( Reference < AsyncFileKAIO > self , Future < Void > sync ) {
2018-08-11 04:57:10 +08:00
wait ( sync ) ;
2017-05-26 04:48:44 +08:00
if ( self - > failed ) {
throw io_timeout ( ) ;
}
return Void ( ) ;
}
virtual Future < Void > sync ( ) {
+ + countFileLogicalWrites ;
+ + countLogicalWrites ;
if ( failed ) {
return io_timeout ( ) ;
}
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
uint32_t id = OpLogEntry : : nextID ( ) ;
# endif
KAIOLogEvent ( logFile , id , OpLogEntry : : SYNC , OpLogEntry : : START ) ;
2017-05-26 04:48:44 +08:00
Future < Void > fsync = throwErrorIfFailed ( Reference < AsyncFileKAIO > : : addRef ( this ) , AsyncFileEIO : : async_fdatasync ( fd ) ) ; // Don't close the file until the asynchronous thing is done
// Alas, AIO f(data)sync doesn't seem to actually be implemented by the kernel
/*IOBlock *io = new IOBlock(IO_CMD_FDSYNC, fd);
submit ( io , " write " ) ;
fsync = success ( io - > result . getFuture ( ) ) ; */
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
fsync = map ( fsync , [ = ] ( Void r ) mutable { KAIOLogEvent ( logFile , id , OpLogEntry : : SYNC , OpLogEntry : : COMPLETE ) ; return r ; } ) ;
# endif
2017-05-26 04:48:44 +08:00
if ( flags & OPEN_ATOMIC_WRITE_AND_CREATE ) {
flags & = ~ OPEN_ATOMIC_WRITE_AND_CREATE ;
return AsyncFileEIO : : waitAndAtomicRename ( fsync , filename + " .part " , filename ) ;
}
return fsync ;
}
virtual Future < int64_t > size ( ) { return nextFileSize ; }
virtual int64_t debugFD ( ) {
return fd ;
}
virtual std : : string getFilename ( ) {
return filename ;
}
2017-05-27 08:43:28 +08:00
~ AsyncFileKAIO ( ) {
close ( fd ) ;
# if KAIO_LOGGING
if ( logFile ! = nullptr )
fclose ( logFile ) ;
# endif
}
2017-05-26 04:48:44 +08:00
static void launch ( ) {
if ( ctx . queue . size ( ) & & ctx . outstanding < FLOW_KNOBS - > MAX_OUTSTANDING - FLOW_KNOBS - > MIN_SUBMIT ) {
ctx . submitMetric = true ;
double begin = timer_monotonic ( ) ;
if ( ! ctx . outstanding ) ctx . ioStallBegin = begin ;
IOBlock * toStart [ FLOW_KNOBS - > MAX_OUTSTANDING ] ;
int n = std : : min < size_t > ( FLOW_KNOBS - > MAX_OUTSTANDING - ctx . outstanding , ctx . queue . size ( ) ) ;
int64_t previousTruncateCount = ctx . countPreSubmitTruncate ;
int64_t previousTruncateBytes = ctx . preSubmitTruncateBytes ;
int64_t largestTruncate = 0 ;
for ( int i = 0 ; i < n ; i + + ) {
auto io = ctx . queue . top ( ) ;
2017-05-27 08:43:28 +08:00
KAIOLogBlockEvent ( io , OpLogEntry : : LAUNCH ) ;
2017-05-26 04:48:44 +08:00
ctx . queue . pop ( ) ;
toStart [ i ] = io ;
io - > startTime = now ( ) ;
if ( ctx . ioTimeout > 0 ) {
ctx . appendToRequestList ( io ) ;
}
if ( io - > owner - > lastFileSize ! = io - > owner - > nextFileSize ) {
+ + ctx . countPreSubmitTruncate ;
int64_t truncateSize = io - > owner - > nextFileSize - io - > owner - > lastFileSize ;
ASSERT ( truncateSize > 0 ) ;
ctx . preSubmitTruncateBytes + = truncateSize ;
largestTruncate = std : : max ( largestTruncate , truncateSize ) ;
io - > owner - > truncate ( io - > owner - > nextFileSize ) ;
}
}
double truncateComplete = timer_monotonic ( ) ;
int rc = io_submit ( ctx . iocx , n , ( linux_iocb * * ) toStart ) ;
double end = timer_monotonic ( ) ;
if ( end - begin > FLOW_KNOBS - > SLOW_LOOP_CUTOFF ) {
ctx . slowAioSubmitMetric - > submitDuration = end - truncateComplete ;
ctx . slowAioSubmitMetric - > truncateDuration = truncateComplete - begin ;
ctx . slowAioSubmitMetric - > numTruncates = ctx . countPreSubmitTruncate - previousTruncateCount ;
ctx . slowAioSubmitMetric - > truncateBytes = ctx . preSubmitTruncateBytes - previousTruncateBytes ;
ctx . slowAioSubmitMetric - > largestTruncate = largestTruncate ;
ctx . slowAioSubmitMetric - > log ( ) ;
2019-05-11 05:01:52 +08:00
if ( nondeterministicRandom ( ) - > random01 ( ) < end - begin ) {
2017-05-26 04:48:44 +08:00
TraceEvent ( " SlowKAIOLaunch " )
. detail ( " IOSubmitTime " , end - truncateComplete )
. detail ( " TruncateTime " , truncateComplete - begin )
. detail ( " TruncateCount " , ctx . countPreSubmitTruncate - previousTruncateCount )
. detail ( " TruncateBytes " , ctx . preSubmitTruncateBytes - previousTruncateBytes )
. detail ( " LargestTruncate " , largestTruncate ) ;
}
}
ctx . submitMetric = false ;
+ + ctx . countAIOSubmit ;
double elapsed = timer_monotonic ( ) - begin ;
2020-01-13 08:44:30 +08:00
g_network - > networkInfo . metrics . secSquaredSubmit + = elapsed * elapsed / 2 ;
2017-05-26 04:48:44 +08:00
2018-06-09 02:11:08 +08:00
//TraceEvent("Launched").detail("N", rc).detail("Queued", ctx.queue.size()).detail("Elapsed", elapsed).detail("Outstanding", ctx.outstanding+rc);
2017-05-26 04:48:44 +08:00
//printf("launched: %d/%d in %f us (%d outstanding; lowest prio %d)\n", rc, ctx.queue.size(), elapsed*1e6, ctx.outstanding + rc, toStart[n-1]->getTask());
if ( rc < 0 ) {
if ( errno = = EAGAIN ) {
rc = 0 ;
} else {
2017-05-27 08:43:28 +08:00
KAIOLogBlockEvent ( toStart [ 0 ] , OpLogEntry : : COMPLETE , errno ? - errno : - 1000000 ) ;
2017-05-26 04:48:44 +08:00
// Other errors are assumed to represent failure to issue the first I/O in the list
toStart [ 0 ] - > setResult ( errno ? - errno : - 1000000 ) ;
rc = 1 ;
}
} else
ctx . outstanding + = rc ;
// Any unsubmitted I/Os need to be requeued
2017-05-27 08:43:28 +08:00
for ( int i = rc ; i < n ; i + + ) {
KAIOLogBlockEvent ( toStart [ i ] , OpLogEntry : : REQUEUE ) ;
2017-05-26 04:48:44 +08:00
ctx . queue . push ( toStart [ i ] ) ;
2017-05-27 08:43:28 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
bool failed ;
private :
int fd , flags ;
int64_t lastFileSize , nextFileSize ;
std : : string filename ;
Int64MetricHandle countFileLogicalWrites ;
Int64MetricHandle countFileLogicalReads ;
Int64MetricHandle countLogicalWrites ;
Int64MetricHandle countLogicalReads ;
struct IOBlock : linux_iocb , FastAllocated < IOBlock > {
Promise < int > result ;
Reference < AsyncFileKAIO > owner ;
int64_t prio ;
IOBlock * prev ;
IOBlock * next ;
double startTime ;
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
int32_t iolog_id ;
# endif
2017-05-26 04:48:44 +08:00
struct indirect_order_by_priority { bool operator ( ) ( IOBlock * a , IOBlock * b ) { return a - > prio < b - > prio ; } } ;
IOBlock ( int op , int fd ) : prev ( nullptr ) , next ( nullptr ) , startTime ( 0 ) {
memset ( ( linux_iocb * ) this , 0 , sizeof ( linux_iocb ) ) ;
aio_lio_opcode = op ;
aio_fildes = fd ;
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
iolog_id = 0 ;
# endif
2017-05-26 04:48:44 +08:00
}
2019-06-25 17:47:35 +08:00
TaskPriority getTask ( ) const { return static_cast < TaskPriority > ( ( prio > > 32 ) + 1 ) ; }
2017-05-26 04:48:44 +08:00
2019-06-25 17:47:35 +08:00
ACTOR static void deliver ( Promise < int > result , bool failed , int r , TaskPriority task ) {
2018-08-11 04:57:10 +08:00
wait ( delay ( 0 , task ) ) ;
2017-05-26 04:48:44 +08:00
if ( failed ) result . sendError ( io_timeout ( ) ) ;
else if ( r < 0 ) result . sendError ( io_error ( ) ) ;
else result . send ( r ) ;
}
void setResult ( int r ) {
if ( r < 0 ) {
struct stat fst ;
fstat ( aio_fildes , & fst ) ;
errno = - r ;
2018-06-09 02:11:08 +08:00
TraceEvent ( " AsyncFileKAIOIOError " ) . GetLastError ( ) . detail ( " Fd " , aio_fildes ) . detail ( " Op " , aio_lio_opcode ) . detail ( " Nbytes " , nbytes ) . detail ( " Offset " , offset ) . detail ( " Ptr " , int64_t ( buf ) )
. detail ( " Size " , fst . st_size ) . detail ( " Filename " , owner - > filename ) ;
2017-05-26 04:48:44 +08:00
}
deliver ( result , owner - > failed , r , getTask ( ) ) ;
delete this ;
}
void timeout ( bool warnOnly ) {
2018-06-09 02:11:08 +08:00
TraceEvent ( SevWarnAlways , " AsyncFileKAIOTimeout " ) . detail ( " Fd " , aio_fildes ) . detail ( " Op " , aio_lio_opcode ) . detail ( " Nbytes " , nbytes ) . detail ( " Offset " , offset ) . detail ( " Ptr " , int64_t ( buf ) )
. detail ( " Filename " , owner - > filename ) ;
2017-05-27 08:43:28 +08:00
g_network - > setGlobal ( INetwork : : enASIOTimedOut , ( flowGlobalType ) true ) ;
2017-05-26 04:48:44 +08:00
if ( ! warnOnly )
owner - > failed = true ;
}
} ;
2017-05-27 08:43:28 +08:00
2017-05-26 04:48:44 +08:00
struct Context {
io_context_t iocx ;
int evfd ;
int outstanding ;
double ioStallBegin ;
bool fallocateSupported ;
2017-12-01 08:55:39 +08:00
bool fallocateZeroSupported ;
2017-05-26 04:48:44 +08:00
std : : priority_queue < IOBlock * , std : : vector < IOBlock * > , IOBlock : : indirect_order_by_priority > queue ;
Int64MetricHandle countAIOSubmit ;
Int64MetricHandle countAIOCollect ;
Int64MetricHandle submitMetric ;
double ioTimeout ;
bool timeoutWarnOnly ;
IOBlock * submittedRequestList ;
Int64MetricHandle countPreSubmitTruncate ;
Int64MetricHandle preSubmitTruncateBytes ;
EventMetricHandle < SlowAioSubmit > slowAioSubmitMetric ;
uint32_t opsIssued ;
2017-12-01 08:55:39 +08:00
Context ( ) : iocx ( 0 ) , evfd ( - 1 ) , outstanding ( 0 ) , opsIssued ( 0 ) , ioStallBegin ( 0 ) , fallocateSupported ( true ) , fallocateZeroSupported ( true ) , submittedRequestList ( nullptr ) {
2017-05-26 04:48:44 +08:00
setIOTimeout ( 0 ) ;
}
void setIOTimeout ( double timeout ) {
2017-07-06 05:43:10 +08:00
ioTimeout = fabs ( timeout ) ;
2017-05-26 04:48:44 +08:00
timeoutWarnOnly = timeout < 0 ;
}
void appendToRequestList ( IOBlock * io ) {
ASSERT ( ! io - > next & & ! io - > prev ) ;
if ( submittedRequestList ) {
io - > prev = submittedRequestList - > prev ;
io - > prev - > next = io ;
submittedRequestList - > prev = io ;
io - > next = submittedRequestList ;
}
else {
submittedRequestList = io ;
io - > next = io - > prev = io ;
}
}
void removeFromRequestList ( IOBlock * io ) {
if ( io - > next = = nullptr ) {
ASSERT ( io - > prev = = nullptr ) ;
return ;
}
ASSERT ( io - > prev ! = nullptr ) ;
if ( io = = io - > next ) {
ASSERT ( io = = submittedRequestList & & io = = io - > prev ) ;
submittedRequestList = nullptr ;
}
else {
io - > next - > prev = io - > prev ;
io - > prev - > next = io - > next ;
if ( submittedRequestList = = io ) {
submittedRequestList = io - > next ;
}
}
io - > next = io - > prev = nullptr ;
}
} ;
static Context ctx ;
explicit AsyncFileKAIO ( int fd , int flags , std : : string const & filename ) : fd ( fd ) , flags ( flags ) , filename ( filename ) , failed ( false ) {
2020-03-27 16:49:35 +08:00
ASSERT ( ! FLOW_KNOBS - > DISABLE_POSIX_KERNEL_AIO ) ;
2017-05-26 04:48:44 +08:00
if ( ! g_network - > isSimulated ( ) ) {
countFileLogicalWrites . init ( LiteralStringRef ( " AsyncFile.CountFileLogicalWrites " ) , filename ) ;
countFileLogicalReads . init ( LiteralStringRef ( " AsyncFile.CountFileLogicalReads " ) , filename ) ;
countLogicalWrites . init ( LiteralStringRef ( " AsyncFile.CountLogicalWrites " ) ) ;
countLogicalReads . init ( LiteralStringRef ( " AsyncFile.CountLogicalReads " ) ) ;
}
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
logFile = nullptr ;
// TODO: Don't do this hacky investigation-specific thing
StringRef fname ( filename ) ;
if ( fname . endsWith ( LiteralStringRef ( " .sqlite " ) ) | | fname . endsWith ( LiteralStringRef ( " .sqlite-wal " ) ) ) {
2017-06-01 08:03:15 +08:00
std : : string logFileName = basename ( filename ) ;
2017-05-27 08:43:28 +08:00
while ( logFileName . find ( " / " ) ! = std : : string : : npos )
logFileName = logFileName . substr ( logFileName . find ( " / " ) + 1 ) ;
if ( ! logFileName . empty ( ) ) {
// TODO: don't hardcode this path
std : : string logPath ( " /data/v7/fdb/ " ) ;
try {
platform : : createDirectory ( logPath ) ;
logFileName = logPath + format ( " %s.iolog " , logFileName . c_str ( ) ) ;
logFile = fopen ( logFileName . c_str ( ) , " r+ " ) ;
if ( logFile = = nullptr )
logFile = fopen ( logFileName . c_str ( ) , " w " ) ;
if ( logFile ! = nullptr )
TraceEvent ( " KAIOLogOpened " ) . detail ( " File " , filename ) . detail ( " LogFile " , logFileName ) ;
2017-06-01 08:03:15 +08:00
else {
2017-05-27 08:43:28 +08:00
TraceEvent ( SevWarn , " KAIOLogOpenFailure " )
. detail ( " File " , filename )
. detail ( " LogFile " , logFileName )
. detail ( " ErrorCode " , errno )
. detail ( " ErrorDesc " , strerror ( errno ) ) ;
2017-06-01 08:03:15 +08:00
}
2017-05-27 08:43:28 +08:00
} catch ( Error & e ) {
TraceEvent ( SevError , " KAIOLogOpenFailure " ) . error ( e ) ;
}
}
}
# endif
2017-05-26 04:48:44 +08:00
}
void enqueue ( IOBlock * io , const char * op , AsyncFileKAIO * owner ) {
ASSERT ( int64_t ( io - > buf ) % 4096 = = 0 & & io - > offset % 4096 = = 0 & & io - > nbytes % 4096 = = 0 ) ;
2017-05-27 08:43:28 +08:00
KAIOLogBlockEvent ( owner - > logFile , io , OpLogEntry : : START ) ;
2017-05-26 04:48:44 +08:00
io - > flags | = 1 ;
io - > eventfd = ctx . evfd ;
io - > prio = ( int64_t ( g_network - > getCurrentTask ( ) ) < < 32 ) - ( + + ctx . opsIssued ) ;
//io->prio = - (++ctx.opsIssued);
io - > owner = Reference < AsyncFileKAIO > : : addRef ( owner ) ;
ctx . queue . push ( io ) ;
}
static int openFlags ( int flags ) {
2019-05-14 10:05:09 +08:00
int oflags = O_DIRECT | O_CLOEXEC ;
2017-05-26 04:48:44 +08:00
ASSERT ( bool ( flags & OPEN_READONLY ) ! = bool ( flags & OPEN_READWRITE ) ) ; // readonly xor readwrite
if ( flags & OPEN_EXCLUSIVE ) oflags | = O_EXCL ;
if ( flags & OPEN_CREATE ) oflags | = O_CREAT ;
if ( flags & OPEN_READONLY ) oflags | = O_RDONLY ;
if ( flags & OPEN_READWRITE ) oflags | = O_RDWR ;
if ( flags & OPEN_ATOMIC_WRITE_AND_CREATE ) oflags | = O_TRUNC ;
return oflags ;
}
ACTOR static void poll ( Reference < IEventFD > ev ) {
loop {
2019-04-21 01:39:20 +08:00
wait ( success ( ev - > read ( ) ) ) ;
2017-05-26 04:48:44 +08:00
2019-06-25 17:47:35 +08:00
wait ( delay ( 0 , TaskPriority : : DiskIOComplete ) ) ;
2017-05-26 04:48:44 +08:00
linux_ioresult ev [ FLOW_KNOBS - > MAX_OUTSTANDING ] ;
timespec tm ; tm . tv_sec = 0 ; tm . tv_nsec = 0 ;
int n ;
loop {
n = io_getevents ( ctx . iocx , 0 , FLOW_KNOBS - > MAX_OUTSTANDING , ev , & tm ) ;
if ( n > = 0 | | errno ! = EINTR ) break ;
}
+ + ctx . countAIOCollect ;
// printf("io_getevents: collected %d/%d in %f us (%d queued)\n", n, ctx.outstanding, (timer()-before)*1e6, ctx.queue.size());
if ( n < 0 ) {
// printf("io_getevents failed: %d\n", errno);
TraceEvent ( " IOGetEventsError " ) . GetLastError ( ) ;
throw io_error ( ) ;
}
if ( n ) {
double t = timer_monotonic ( ) ;
double elapsed = t - ctx . ioStallBegin ;
ctx . ioStallBegin = t ;
2020-01-13 08:44:30 +08:00
g_network - > networkInfo . metrics . secSquaredDiskStall + = elapsed * elapsed / 2 ;
2017-05-26 04:48:44 +08:00
}
ctx . outstanding - = n ;
if ( ctx . ioTimeout > 0 ) {
double currentTime = now ( ) ;
while ( ctx . submittedRequestList & & currentTime - ctx . submittedRequestList - > startTime > ctx . ioTimeout ) {
ctx . submittedRequestList - > timeout ( ctx . timeoutWarnOnly ) ;
ctx . removeFromRequestList ( ctx . submittedRequestList ) ;
}
}
for ( int i = 0 ; i < n ; i + + ) {
IOBlock * iob = static_cast < IOBlock * > ( ev [ i ] . iocb ) ;
2017-05-27 08:43:28 +08:00
KAIOLogBlockEvent ( iob , OpLogEntry : : COMPLETE , ev [ i ] . result ) ;
2017-05-26 04:48:44 +08:00
if ( ctx . ioTimeout > 0 ) {
ctx . removeFromRequestList ( iob ) ;
}
iob - > setResult ( ev [ i ] . result ) ;
}
}
}
} ;
2017-05-27 08:43:28 +08:00
# if KAIO_LOGGING
// Call from contexts where only an ioblock is available, log if its owner is set
void AsyncFileKAIO : : KAIOLogBlockEvent ( IOBlock * ioblock , OpLogEntry : : EStage stage , uint32_t result ) {
if ( ioblock - > owner )
return KAIOLogBlockEvent ( ioblock - > owner - > logFile , ioblock , stage , result ) ;
}
void AsyncFileKAIO : : KAIOLogBlockEvent ( FILE * logFile , IOBlock * ioblock , OpLogEntry : : EStage stage , uint32_t result ) {
if ( logFile ! = nullptr ) {
// Figure out what type of operation this is
OpLogEntry : : EOperation op ;
if ( ioblock - > aio_lio_opcode = = IO_CMD_PREAD )
op = OpLogEntry : : READ ;
else if ( ioblock - > aio_lio_opcode = = IO_CMD_PWRITE )
op = OpLogEntry : : WRITE ;
else
return ;
// Assign this IO operation an io log id number if it doesn't already have one
if ( ioblock - > iolog_id = = 0 )
ioblock - > iolog_id = OpLogEntry : : nextID ( ) ;
OpLogEntry e ;
e . timestamp = timer_int ( ) ;
e . op = ( uint8_t ) op ;
e . id = ioblock - > iolog_id ;
e . stage = ( uint8_t ) stage ;
e . pageOffset = ( uint32_t ) ( ioblock - > offset / 4096 ) ;
e . pageCount = ( uint8_t ) ( ioblock - > nbytes / 4096 ) ;
e . result = result ;
// Log a checksum for Writes up to the Complete stage or Reads starting from the Complete stage
if ( ( op = = OpLogEntry : : WRITE & & stage < = OpLogEntry : : COMPLETE ) | | ( op = = OpLogEntry : : READ & & stage > = OpLogEntry : : COMPLETE ) )
2020-01-14 10:40:35 +08:00
e . checksum = crc32c_append ( 0xab12fd93 , ioblock - > buf , ioblock - > nbytes ) ;
2017-05-27 08:43:28 +08:00
else
e . checksum = 0 ;
e . log ( logFile ) ;
}
}
void AsyncFileKAIO : : KAIOLogEvent ( FILE * logFile , uint32_t id , OpLogEntry : : EOperation op , OpLogEntry : : EStage stage , uint32_t pageOffset , uint32_t result ) {
if ( logFile ! = nullptr ) {
OpLogEntry e ;
e . timestamp = timer_int ( ) ;
e . id = id ;
e . op = ( uint8_t ) op ;
e . stage = ( uint8_t ) stage ;
e . pageOffset = pageOffset ;
e . pageCount = 0 ;
e . checksum = 0 ;
e . result = result ;
e . log ( logFile ) ;
}
}
# endif
2017-05-26 04:48:44 +08:00
ACTOR Future < Void > runTestOps ( Reference < IAsyncFile > f , int numIterations , int fileSize , bool expectedToSucceed ) {
state void * buf = FastAllocator < 4096 > : : allocate ( ) ; // we leak this if there is an error, but that shouldn't be a big deal
state int iteration = 0 ;
state bool opTimedOut = false ;
for ( ; iteration < numIterations ; + + iteration ) {
state std : : vector < Future < Void > > futures ;
2019-05-11 05:01:52 +08:00
state int numOps = deterministicRandom ( ) - > randomInt ( 1 , 20 ) ;
2017-05-26 04:48:44 +08:00
for ( ; numOps > 0 ; - - numOps ) {
2019-05-11 05:01:52 +08:00
if ( deterministicRandom ( ) - > coinflip ( ) ) {
futures . push_back ( success ( f - > read ( buf , 4096 , deterministicRandom ( ) - > randomInt ( 0 , fileSize ) / 4096 * 4096 ) ) ) ;
2017-05-26 04:48:44 +08:00
}
else {
2019-05-11 05:01:52 +08:00
futures . push_back ( f - > write ( buf , 4096 , deterministicRandom ( ) - > randomInt ( 0 , fileSize ) / 4096 * 4096 ) ) ;
2017-05-26 04:48:44 +08:00
}
}
state int fIndex = 0 ;
for ( ; fIndex < futures . size ( ) ; + + fIndex ) {
try {
2018-08-11 04:57:10 +08:00
wait ( futures [ fIndex ] ) ;
2017-05-26 04:48:44 +08:00
}
catch ( Error & e ) {
ASSERT ( ! expectedToSucceed ) ;
ASSERT ( e . code ( ) = = error_code_io_timeout ) ;
opTimedOut = true ;
}
}
try {
2018-08-11 04:57:10 +08:00
wait ( f - > sync ( ) & & delay ( 0.1 ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( expectedToSucceed ) ;
}
catch ( Error & e ) {
ASSERT ( ! expectedToSucceed & & e . code ( ) = = error_code_io_timeout ) ;
}
}
FastAllocator < 4096 > : : release ( buf ) ;
ASSERT ( expectedToSucceed | | opTimedOut ) ;
return Void ( ) ;
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /fdbrpc/AsyncFileKAIO/RequestList " ) {
2019-04-06 07:29:30 +08:00
// This test does nothing in simulation because simulation doesn't support AsyncFileKAIO
if ( ! g_network - > isSimulated ( ) ) {
2019-04-06 01:36:38 +08:00
state Reference < IAsyncFile > f ;
2017-05-26 04:48:44 +08:00
try {
2019-04-06 02:28:49 +08:00
Reference < IAsyncFile > f_ = wait ( AsyncFileKAIO : : open (
2019-04-06 01:36:38 +08:00
" /tmp/__KAIO_TEST_FILE__ " ,
IAsyncFile : : OPEN_UNBUFFERED | IAsyncFile : : OPEN_READWRITE | IAsyncFile : : OPEN_CREATE , 0666 , nullptr ) ) ;
2019-04-06 02:28:49 +08:00
f = f_ ;
state int fileSize = 2 < < 27 ; // ~100MB
2018-08-11 04:57:10 +08:00
wait ( f - > truncate ( fileSize ) ) ;
2017-05-26 04:48:44 +08:00
// Test that the request list works as intended with default timeout
AsyncFileKAIO : : setTimeout ( 0.0 ) ;
2018-08-11 04:57:10 +08:00
wait ( runTestOps ( f , 100 , fileSize , true ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ! ( ( AsyncFileKAIO * ) f . getPtr ( ) ) - > failed ) ;
// Test that the request list works as intended with long timeout
AsyncFileKAIO : : setTimeout ( 20.0 ) ;
2018-08-11 04:57:10 +08:00
wait ( runTestOps ( f , 100 , fileSize , true ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ! ( ( AsyncFileKAIO * ) f . getPtr ( ) ) - > failed ) ;
// Test that requests timeout correctly
AsyncFileKAIO : : setTimeout ( 0.0001 ) ;
2018-08-11 04:57:10 +08:00
wait ( runTestOps ( f , 10 , fileSize , false ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ( ( AsyncFileKAIO * ) f . getPtr ( ) ) - > failed ) ;
2019-04-06 02:28:49 +08:00
} catch ( Error & e ) {
2017-05-26 04:48:44 +08:00
state Error err = e ;
if ( f ) {
2018-08-11 04:57:10 +08:00
wait ( AsyncFileEIO : : deleteFile ( f - > getFilename ( ) , true ) ) ;
2017-05-26 04:48:44 +08:00
}
throw err ;
}
2018-08-11 04:57:10 +08:00
wait ( AsyncFileEIO : : deleteFile ( f - > getFilename ( ) , true ) ) ;
2017-05-26 04:48:44 +08:00
}
return Void ( ) ;
}
AsyncFileKAIO : : Context AsyncFileKAIO : : ctx ;
2018-08-11 06:47:41 +08:00
# include "flow/unactorcompiler.h"
2017-05-26 04:48:44 +08:00
# endif
# endif