ocfs2: implement directory read-ahead
Uptodate.c now knows about read-ahead buffers. Use some more aggressive logic in ocfs2_readdir(). The two functions which currently use directory read-ahead are ocfs2_find_entry() and ocfs2_readdir(). Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
This commit is contained in:
parent
e0b4096d34
commit
aa9588741d
|
@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
|
||||||
mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
|
mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
|
||||||
(unsigned long long)block, nr, flags, inode);
|
(unsigned long long)block, nr, flags, inode);
|
||||||
|
|
||||||
|
BUG_ON((flags & OCFS2_BH_READAHEAD) &&
|
||||||
|
(!inode || !(flags & OCFS2_BH_CACHED)));
|
||||||
|
|
||||||
if (osb == NULL || osb->sb == NULL || bhs == NULL) {
|
if (osb == NULL || osb->sb == NULL || bhs == NULL) {
|
||||||
status = -EINVAL;
|
status = -EINVAL;
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
|
||||||
bh = bhs[i];
|
bh = bhs[i];
|
||||||
ignore_cache = 0;
|
ignore_cache = 0;
|
||||||
|
|
||||||
|
/* There are three read-ahead cases here which we need to
|
||||||
|
* be concerned with. All three assume a buffer has
|
||||||
|
* previously been submitted with OCFS2_BH_READAHEAD
|
||||||
|
* and it hasn't yet completed I/O.
|
||||||
|
*
|
||||||
|
* 1) The current request is sync to disk. This rarely
|
||||||
|
* happens these days, and never when performance
|
||||||
|
* matters - the code can just wait on the buffer
|
||||||
|
* lock and re-submit.
|
||||||
|
*
|
||||||
|
* 2) The current request is cached, but not
|
||||||
|
* readahead. ocfs2_buffer_uptodate() will return
|
||||||
|
* false anyway, so we'll wind up waiting on the
|
||||||
|
* buffer lock to do I/O. We re-check the request
|
||||||
|
* with after getting the lock to avoid a re-submit.
|
||||||
|
*
|
||||||
|
* 3) The current request is readahead (and so must
|
||||||
|
* also be a caching one). We short circuit if the
|
||||||
|
* buffer is locked (under I/O) and if it's in the
|
||||||
|
* uptodate cache. The re-check from #2 catches the
|
||||||
|
* case that the previous read-ahead completes just
|
||||||
|
* before our is-it-in-flight check.
|
||||||
|
*/
|
||||||
|
|
||||||
if (flags & OCFS2_BH_CACHED &&
|
if (flags & OCFS2_BH_CACHED &&
|
||||||
!ocfs2_buffer_uptodate(inode, bh)) {
|
!ocfs2_buffer_uptodate(inode, bh)) {
|
||||||
mlog(ML_UPTODATE,
|
mlog(ML_UPTODATE,
|
||||||
|
@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* A read-ahead request was made - if the
|
||||||
|
* buffer is already under read-ahead from a
|
||||||
|
* previously submitted request than we are
|
||||||
|
* done here. */
|
||||||
|
if ((flags & OCFS2_BH_READAHEAD)
|
||||||
|
&& ocfs2_buffer_read_ahead(inode, bh))
|
||||||
|
continue;
|
||||||
|
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
if (buffer_jbd(bh)) {
|
if (buffer_jbd(bh)) {
|
||||||
#ifdef CATCH_BH_JBD_RACES
|
#ifdef CATCH_BH_JBD_RACES
|
||||||
|
@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
|
||||||
continue;
|
continue;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Re-check ocfs2_buffer_uptodate() as a
|
||||||
|
* previously read-ahead buffer may have
|
||||||
|
* completed I/O while we were waiting for the
|
||||||
|
* buffer lock. */
|
||||||
|
if ((flags & OCFS2_BH_CACHED)
|
||||||
|
&& !(flags & OCFS2_BH_READAHEAD)
|
||||||
|
&& ocfs2_buffer_uptodate(inode, bh)) {
|
||||||
|
unlock_buffer(bh);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
clear_buffer_uptodate(bh);
|
clear_buffer_uptodate(bh);
|
||||||
get_bh(bh); /* for end_buffer_read_sync() */
|
get_bh(bh); /* for end_buffer_read_sync() */
|
||||||
bh->b_end_io = end_buffer_read_sync;
|
bh->b_end_io = end_buffer_read_sync;
|
||||||
if (flags & OCFS2_BH_READAHEAD)
|
submit_bh(READ, bh);
|
||||||
submit_bh(READA, bh);
|
|
||||||
else
|
|
||||||
submit_bh(READ, bh);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
|
||||||
for (i = (nr - 1); i >= 0; i--) {
|
for (i = (nr - 1); i >= 0; i--) {
|
||||||
bh = bhs[i];
|
bh = bhs[i];
|
||||||
|
|
||||||
/* We know this can't have changed as we hold the
|
if (!(flags & OCFS2_BH_READAHEAD)) {
|
||||||
* inode sem. Avoid doing any work on the bh if the
|
/* We know this can't have changed as we hold the
|
||||||
* journal has it. */
|
* inode sem. Avoid doing any work on the bh if the
|
||||||
if (!buffer_jbd(bh))
|
* journal has it. */
|
||||||
wait_on_buffer(bh);
|
if (!buffer_jbd(bh))
|
||||||
|
wait_on_buffer(bh);
|
||||||
|
|
||||||
if (!buffer_uptodate(bh)) {
|
if (!buffer_uptodate(bh)) {
|
||||||
/* Status won't be cleared from here on out,
|
/* Status won't be cleared from here on out,
|
||||||
* so we can safely record this and loop back
|
* so we can safely record this and loop back
|
||||||
* to cleanup the other buffers. Don't need to
|
* to cleanup the other buffers. Don't need to
|
||||||
* remove the clustered uptodate information
|
* remove the clustered uptodate information
|
||||||
* for this bh as it's not marked locally
|
* for this bh as it's not marked locally
|
||||||
* uptodate. */
|
* uptodate. */
|
||||||
status = -EIO;
|
status = -EIO;
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
bhs[i] = NULL;
|
bhs[i] = NULL;
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Always set the buffer in the cache, even if it was
|
||||||
|
* a forced read, or read-ahead which hasn't yet
|
||||||
|
* completed. */
|
||||||
if (inode)
|
if (inode)
|
||||||
ocfs2_set_buffer_uptodate(inode, bh);
|
ocfs2_set_buffer_uptodate(inode, bh);
|
||||||
}
|
}
|
||||||
if (inode)
|
if (inode)
|
||||||
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
|
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
|
||||||
|
|
||||||
mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n",
|
mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
|
||||||
(unsigned long long)block, nr,
|
(unsigned long long)block, nr,
|
||||||
(!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
|
(!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
|
||||||
|
|
||||||
bail:
|
bail:
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb,
|
||||||
|
|
||||||
|
|
||||||
#define OCFS2_BH_CACHED 1
|
#define OCFS2_BH_CACHED 1
|
||||||
#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */
|
#define OCFS2_BH_READAHEAD 8
|
||||||
|
|
||||||
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
|
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
|
||||||
struct buffer_head **bh, int flags,
|
struct buffer_head **bh, int flags,
|
||||||
|
|
|
@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
|
||||||
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
|
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
unsigned long offset, blk;
|
unsigned long offset, blk, last_ra_blk = 0;
|
||||||
int i, num, stored;
|
int i, stored;
|
||||||
struct buffer_head * bh, * tmp;
|
struct buffer_head * bh, * tmp;
|
||||||
struct ocfs2_dir_entry * de;
|
struct ocfs2_dir_entry * de;
|
||||||
int err;
|
int err;
|
||||||
struct inode *inode = filp->f_dentry->d_inode;
|
struct inode *inode = filp->f_dentry->d_inode;
|
||||||
struct super_block * sb = inode->i_sb;
|
struct super_block * sb = inode->i_sb;
|
||||||
int have_disk_lock = 0;
|
unsigned int ra_sectors = 16;
|
||||||
|
|
||||||
mlog_entry("dirino=%llu\n",
|
mlog_entry("dirino=%llu\n",
|
||||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||||
|
@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
|
||||||
mlog_errno(error);
|
mlog_errno(error);
|
||||||
/* we haven't got any yet, so propagate the error. */
|
/* we haven't got any yet, so propagate the error. */
|
||||||
stored = error;
|
stored = error;
|
||||||
goto bail;
|
goto bail_nolock;
|
||||||
}
|
}
|
||||||
have_disk_lock = 1;
|
|
||||||
|
|
||||||
offset = filp->f_pos & (sb->s_blocksize - 1);
|
offset = filp->f_pos & (sb->s_blocksize - 1);
|
||||||
|
|
||||||
|
@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* The idea here is to begin with 8k read-ahead and to stay
|
||||||
* Do the readahead (8k)
|
* 4k ahead of our current position.
|
||||||
*/
|
*
|
||||||
if (!offset) {
|
* TODO: Use the pagecache for this. We just need to
|
||||||
for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
|
* make sure it's cluster-safe... */
|
||||||
|
if (!last_ra_blk
|
||||||
|
|| (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
|
||||||
|
for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
|
||||||
i > 0; i--) {
|
i > 0; i--) {
|
||||||
tmp = ocfs2_bread(inode, ++blk, &err, 1);
|
tmp = ocfs2_bread(inode, ++blk, &err, 1);
|
||||||
if (tmp)
|
if (tmp)
|
||||||
brelse(tmp);
|
brelse(tmp);
|
||||||
}
|
}
|
||||||
|
last_ra_blk = blk;
|
||||||
|
ra_sectors = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
revalidate:
|
revalidate:
|
||||||
|
@ -194,9 +198,9 @@ revalidate:
|
||||||
|
|
||||||
stored = 0;
|
stored = 0;
|
||||||
bail:
|
bail:
|
||||||
if (have_disk_lock)
|
ocfs2_meta_unlock(inode, 0);
|
||||||
ocfs2_meta_unlock(inode, 0);
|
|
||||||
|
|
||||||
|
bail_nolock:
|
||||||
mlog_exit(stored);
|
mlog_exit(stored);
|
||||||
|
|
||||||
return stored;
|
return stored;
|
||||||
|
|
|
@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
|
||||||
u64 p_blkno;
|
u64 p_blkno;
|
||||||
int readflags = OCFS2_BH_CACHED;
|
int readflags = OCFS2_BH_CACHED;
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* only turn this on if we know we can deal with read_block
|
|
||||||
* returning nothing */
|
|
||||||
if (reada)
|
if (reada)
|
||||||
readflags |= OCFS2_BH_READAHEAD;
|
readflags |= OCFS2_BH_READAHEAD;
|
||||||
#endif
|
|
||||||
|
|
||||||
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
|
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
|
||||||
i_size_read(inode)) {
|
i_size_read(inode)) {
|
||||||
|
|
|
@ -56,6 +56,7 @@
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "namei.h"
|
#include "namei.h"
|
||||||
#include "suballoc.h"
|
#include "suballoc.h"
|
||||||
|
#include "super.h"
|
||||||
#include "symlink.h"
|
#include "symlink.h"
|
||||||
#include "sysfile.h"
|
#include "sysfile.h"
|
||||||
#include "uptodate.h"
|
#include "uptodate.h"
|
||||||
|
@ -1962,13 +1963,8 @@ restart:
|
||||||
}
|
}
|
||||||
num++;
|
num++;
|
||||||
|
|
||||||
/* XXX: questionable readahead stuff here */
|
|
||||||
bh = ocfs2_bread(dir, b++, &err, 1);
|
bh = ocfs2_bread(dir, b++, &err, 1);
|
||||||
bh_use[ra_max] = bh;
|
bh_use[ra_max] = bh;
|
||||||
#if 0 // ???
|
|
||||||
if (bh)
|
|
||||||
ll_rw_block(READ, 1, &bh);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((bh = bh_use[ra_ptr++]) == NULL)
|
if ((bh = bh_use[ra_ptr++]) == NULL)
|
||||||
|
@ -1976,6 +1972,10 @@ restart:
|
||||||
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
||||||
if (!buffer_uptodate(bh)) {
|
if (!buffer_uptodate(bh)) {
|
||||||
/* read error, skip block & hope for the best */
|
/* read error, skip block & hope for the best */
|
||||||
|
ocfs2_error(dir->i_sb, "reading directory %llu, "
|
||||||
|
"offset %lu\n",
|
||||||
|
(unsigned long long)OCFS2_I(dir)->ip_blkno,
|
||||||
|
block);
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
|
|
|
@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Warning: even if it returns true, this does *not* guarantee that
|
/* Warning: even if it returns true, this does *not* guarantee that
|
||||||
* the block is stored in our inode metadata cache. */
|
* the block is stored in our inode metadata cache.
|
||||||
|
*
|
||||||
|
* This can be called under lock_buffer()
|
||||||
|
*/
|
||||||
int ocfs2_buffer_uptodate(struct inode *inode,
|
int ocfs2_buffer_uptodate(struct inode *inode,
|
||||||
struct buffer_head *bh)
|
struct buffer_head *bh)
|
||||||
{
|
{
|
||||||
|
@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
|
||||||
return ocfs2_buffer_cached(OCFS2_I(inode), bh);
|
return ocfs2_buffer_cached(OCFS2_I(inode), bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether a buffer is currently out on a read-ahead request.
|
||||||
|
* ip_io_sem should be held to serialize submitters with the logic here.
|
||||||
|
*/
|
||||||
|
int ocfs2_buffer_read_ahead(struct inode *inode,
|
||||||
|
struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
|
||||||
|
}
|
||||||
|
|
||||||
/* Requires ip_lock */
|
/* Requires ip_lock */
|
||||||
static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
|
static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
|
||||||
sector_t block)
|
sector_t block)
|
||||||
|
@ -403,7 +416,11 @@ out_free:
|
||||||
*
|
*
|
||||||
* Note that this function may actually fail to insert the block if
|
* Note that this function may actually fail to insert the block if
|
||||||
* memory cannot be allocated. This is not fatal however (but may
|
* memory cannot be allocated. This is not fatal however (but may
|
||||||
* result in a performance penalty) */
|
* result in a performance penalty)
|
||||||
|
*
|
||||||
|
* Readahead buffers can be passed in here before the I/O request is
|
||||||
|
* completed.
|
||||||
|
*/
|
||||||
void ocfs2_set_buffer_uptodate(struct inode *inode,
|
void ocfs2_set_buffer_uptodate(struct inode *inode,
|
||||||
struct buffer_head *bh)
|
struct buffer_head *bh)
|
||||||
{
|
{
|
||||||
|
|
|
@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
|
||||||
struct buffer_head *bh);
|
struct buffer_head *bh);
|
||||||
void ocfs2_remove_from_cache(struct inode *inode,
|
void ocfs2_remove_from_cache(struct inode *inode,
|
||||||
struct buffer_head *bh);
|
struct buffer_head *bh);
|
||||||
|
int ocfs2_buffer_read_ahead(struct inode *inode,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
|
||||||
#endif /* OCFS2_UPTODATE_H */
|
#endif /* OCFS2_UPTODATE_H */
|
||||||
|
|
Loading…
Reference in New Issue