fuse: implement NFS-like readdirplus support
This patch implements readdirplus support in FUSE, similar to NFS. The payload returned in the readdirplus call contains 'fuse_entry_out' structure thereby providing all the necessary inputs for 'faking' a lookup() operation on the spot. If the dentry and inode already existed (for e.g. in a re-run of ls -l) then just the inode attributes timeout and dentry timeout are refreshed. With a simple client->network->server implementation of a FUSE based filesystem, the following performance observations were made: Test: Performing a filesystem crawl over 20,000 files with sh# time ls -lR /mnt Without readdirplus: Run 1: 18.1s Run 2: 16.0s Run 3: 16.2s With readdirplus: Run 1: 4.1s Run 2: 3.8s Run 3: 3.8s The performance improvement is significant as it avoided 20,000 upcalls calls (lookup). Cache consistency is no worse than what already is. Signed-off-by: Anand V. Avati <avati@redhat.com> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
This commit is contained in:
parent
ff7532ca2c
commit
0b05b18381
|
@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
|
||||||
fuse_request_send_nowait_locked(fc, req);
|
fuse_request_send_nowait_locked(fc, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void fuse_force_forget(struct file *file, u64 nodeid)
|
||||||
|
{
|
||||||
|
struct inode *inode = file->f_path.dentry->d_inode;
|
||||||
|
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||||
|
struct fuse_req *req;
|
||||||
|
struct fuse_forget_in inarg;
|
||||||
|
|
||||||
|
memset(&inarg, 0, sizeof(inarg));
|
||||||
|
inarg.nlookup = 1;
|
||||||
|
req = fuse_get_req_nofail(fc, file);
|
||||||
|
req->in.h.opcode = FUSE_FORGET;
|
||||||
|
req->in.h.nodeid = nodeid;
|
||||||
|
req->in.numargs = 1;
|
||||||
|
req->in.args[0].size = sizeof(inarg);
|
||||||
|
req->in.args[0].value = &inarg;
|
||||||
|
req->isreply = 0;
|
||||||
|
fuse_request_send_nowait(fc, req);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock the request. Up to the next unlock_request() there mustn't be
|
* Lock the request. Up to the next unlock_request() there mustn't be
|
||||||
* anything that could cause a page-fault. If the request was already
|
* anything that could cause a page-fault. If the request was already
|
||||||
|
|
160
fs/fuse/dir.c
160
fs/fuse/dir.c
|
@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int fuse_direntplus_link(struct file *file,
|
||||||
|
struct fuse_direntplus *direntplus,
|
||||||
|
u64 attr_version)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct fuse_entry_out *o = &direntplus->entry_out;
|
||||||
|
struct fuse_dirent *dirent = &direntplus->dirent;
|
||||||
|
struct dentry *parent = file->f_path.dentry;
|
||||||
|
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
|
||||||
|
struct dentry *dentry;
|
||||||
|
struct dentry *alias;
|
||||||
|
struct inode *dir = parent->d_inode;
|
||||||
|
struct fuse_conn *fc;
|
||||||
|
struct inode *inode;
|
||||||
|
|
||||||
|
if (!o->nodeid) {
|
||||||
|
/*
|
||||||
|
* Unlike in the case of fuse_lookup, zero nodeid does not mean
|
||||||
|
* ENOENT. Instead, it only means the userspace filesystem did
|
||||||
|
* not want to return attributes/handle for this entry.
|
||||||
|
*
|
||||||
|
* So do nothing.
|
||||||
|
*/
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name.name[0] == '.') {
|
||||||
|
/*
|
||||||
|
* We could potentially refresh the attributes of the directory
|
||||||
|
* and its parent?
|
||||||
|
*/
|
||||||
|
if (name.len == 1)
|
||||||
|
return 0;
|
||||||
|
if (name.name[1] == '.' && name.len == 2)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
fc = get_fuse_conn(dir);
|
||||||
|
|
||||||
|
name.hash = full_name_hash(name.name, name.len);
|
||||||
|
dentry = d_lookup(parent, &name);
|
||||||
|
if (dentry && dentry->d_inode) {
|
||||||
|
inode = dentry->d_inode;
|
||||||
|
if (get_node_id(inode) == o->nodeid) {
|
||||||
|
struct fuse_inode *fi;
|
||||||
|
fi = get_fuse_inode(inode);
|
||||||
|
spin_lock(&fc->lock);
|
||||||
|
fi->nlookup++;
|
||||||
|
spin_unlock(&fc->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The other branch to 'found' comes via fuse_iget()
|
||||||
|
* which bumps nlookup inside
|
||||||
|
*/
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
err = d_invalidate(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
dput(dentry);
|
||||||
|
dentry = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
dentry = d_alloc(parent, &name);
|
||||||
|
err = -ENOMEM;
|
||||||
|
if (!dentry)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
|
||||||
|
&o->attr, entry_attr_timeout(o), attr_version);
|
||||||
|
if (!inode)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
alias = d_materialise_unique(dentry, inode);
|
||||||
|
err = PTR_ERR(alias);
|
||||||
|
if (IS_ERR(alias))
|
||||||
|
goto out;
|
||||||
|
if (alias) {
|
||||||
|
dput(dentry);
|
||||||
|
dentry = alias;
|
||||||
|
}
|
||||||
|
|
||||||
|
found:
|
||||||
|
fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
|
||||||
|
attr_version);
|
||||||
|
|
||||||
|
fuse_change_entry_timeout(dentry, o);
|
||||||
|
|
||||||
|
err = 0;
|
||||||
|
out:
|
||||||
|
if (dentry)
|
||||||
|
dput(dentry);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
|
||||||
|
void *dstbuf, filldir_t filldir, u64 attr_version)
|
||||||
|
{
|
||||||
|
struct fuse_direntplus *direntplus;
|
||||||
|
struct fuse_dirent *dirent;
|
||||||
|
size_t reclen;
|
||||||
|
int over = 0;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
|
||||||
|
direntplus = (struct fuse_direntplus *) buf;
|
||||||
|
dirent = &direntplus->dirent;
|
||||||
|
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
|
||||||
|
|
||||||
|
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
|
||||||
|
return -EIO;
|
||||||
|
if (reclen > nbytes)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (!over) {
|
||||||
|
/* We fill entries into dstbuf only as much as
|
||||||
|
it can hold. But we still continue iterating
|
||||||
|
over remaining entries to link them. If not,
|
||||||
|
we need to send a FORGET for each of those
|
||||||
|
which we did not link.
|
||||||
|
*/
|
||||||
|
over = filldir(dstbuf, dirent->name, dirent->namelen,
|
||||||
|
file->f_pos, dirent->ino,
|
||||||
|
dirent->type);
|
||||||
|
file->f_pos = dirent->off;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf += reclen;
|
||||||
|
nbytes -= reclen;
|
||||||
|
|
||||||
|
ret = fuse_direntplus_link(file, direntplus, attr_version);
|
||||||
|
if (ret)
|
||||||
|
fuse_force_forget(file, direntplus->entry_out.nodeid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
|
static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
|
||||||
struct inode *inode = file->f_path.dentry->d_inode;
|
struct inode *inode = file->f_path.dentry->d_inode;
|
||||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||||
struct fuse_req *req;
|
struct fuse_req *req;
|
||||||
|
u64 attr_version = 0;
|
||||||
|
|
||||||
if (is_bad_inode(inode))
|
if (is_bad_inode(inode))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
|
||||||
req->out.argpages = 1;
|
req->out.argpages = 1;
|
||||||
req->num_pages = 1;
|
req->num_pages = 1;
|
||||||
req->pages[0] = page;
|
req->pages[0] = page;
|
||||||
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
|
if (fc->do_readdirplus) {
|
||||||
|
attr_version = fuse_get_attr_version(fc);
|
||||||
|
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
|
||||||
|
FUSE_READDIRPLUS);
|
||||||
|
} else {
|
||||||
|
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
|
||||||
|
FUSE_READDIR);
|
||||||
|
}
|
||||||
fuse_request_send(fc, req);
|
fuse_request_send(fc, req);
|
||||||
nbytes = req->out.args[0].size;
|
nbytes = req->out.args[0].size;
|
||||||
err = req->out.h.error;
|
err = req->out.h.error;
|
||||||
fuse_put_request(fc, req);
|
fuse_put_request(fc, req);
|
||||||
if (!err)
|
if (!err) {
|
||||||
err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
|
if (fc->do_readdirplus) {
|
||||||
filldir);
|
err = parse_dirplusfile(page_address(page), nbytes,
|
||||||
|
file, dstbuf, filldir,
|
||||||
|
attr_version);
|
||||||
|
} else {
|
||||||
|
err = parse_dirfile(page_address(page), nbytes, file,
|
||||||
|
dstbuf, filldir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__free_page(page);
|
__free_page(page);
|
||||||
fuse_invalidate_attr(inode); /* atime changed */
|
fuse_invalidate_attr(inode); /* atime changed */
|
||||||
|
|
|
@ -487,6 +487,9 @@ struct fuse_conn {
|
||||||
/** Use enhanced/automatic page cache invalidation. */
|
/** Use enhanced/automatic page cache invalidation. */
|
||||||
unsigned auto_inval_data:1;
|
unsigned auto_inval_data:1;
|
||||||
|
|
||||||
|
/** Does the filesystem support readdir-plus? */
|
||||||
|
unsigned do_readdirplus:1;
|
||||||
|
|
||||||
/** The number of requests waiting for completion */
|
/** The number of requests waiting for completion */
|
||||||
atomic_t num_waiting;
|
atomic_t num_waiting;
|
||||||
|
|
||||||
|
@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
|
||||||
|
|
||||||
struct fuse_forget_link *fuse_alloc_forget(void);
|
struct fuse_forget_link *fuse_alloc_forget(void);
|
||||||
|
|
||||||
|
/* Used by READDIRPLUS */
|
||||||
|
void fuse_force_forget(struct file *file, u64 nodeid);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize READ or READDIR request
|
* Initialize READ or READDIR request
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
|
||||||
fc->dont_mask = 1;
|
fc->dont_mask = 1;
|
||||||
if (arg->flags & FUSE_AUTO_INVAL_DATA)
|
if (arg->flags & FUSE_AUTO_INVAL_DATA)
|
||||||
fc->auto_inval_data = 1;
|
fc->auto_inval_data = 1;
|
||||||
|
if (arg->flags & FUSE_DO_READDIRPLUS)
|
||||||
|
fc->do_readdirplus = 1;
|
||||||
} else {
|
} else {
|
||||||
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
|
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
|
||||||
fc->no_lock = 1;
|
fc->no_lock = 1;
|
||||||
|
@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
|
||||||
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
|
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
|
||||||
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
|
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
|
||||||
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
|
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
|
||||||
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA;
|
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
|
||||||
|
FUSE_DO_READDIRPLUS;
|
||||||
req->in.h.opcode = FUSE_INIT;
|
req->in.h.opcode = FUSE_INIT;
|
||||||
req->in.numargs = 1;
|
req->in.numargs = 1;
|
||||||
req->in.args[0].size = sizeof(*arg);
|
req->in.args[0].size = sizeof(*arg);
|
||||||
|
|
|
@ -193,6 +193,7 @@ struct fuse_file_lock {
|
||||||
#define FUSE_FLOCK_LOCKS (1 << 10)
|
#define FUSE_FLOCK_LOCKS (1 << 10)
|
||||||
#define FUSE_HAS_IOCTL_DIR (1 << 11)
|
#define FUSE_HAS_IOCTL_DIR (1 << 11)
|
||||||
#define FUSE_AUTO_INVAL_DATA (1 << 12)
|
#define FUSE_AUTO_INVAL_DATA (1 << 12)
|
||||||
|
#define FUSE_DO_READDIRPLUS (1 << 13)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CUSE INIT request/reply flags
|
* CUSE INIT request/reply flags
|
||||||
|
@ -299,6 +300,7 @@ enum fuse_opcode {
|
||||||
FUSE_NOTIFY_REPLY = 41,
|
FUSE_NOTIFY_REPLY = 41,
|
||||||
FUSE_BATCH_FORGET = 42,
|
FUSE_BATCH_FORGET = 42,
|
||||||
FUSE_FALLOCATE = 43,
|
FUSE_FALLOCATE = 43,
|
||||||
|
FUSE_READDIRPLUS = 44,
|
||||||
|
|
||||||
/* CUSE specific operations */
|
/* CUSE specific operations */
|
||||||
CUSE_INIT = 4096,
|
CUSE_INIT = 4096,
|
||||||
|
@ -630,6 +632,16 @@ struct fuse_dirent {
|
||||||
#define FUSE_DIRENT_SIZE(d) \
|
#define FUSE_DIRENT_SIZE(d) \
|
||||||
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
|
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
|
||||||
|
|
||||||
|
struct fuse_direntplus {
|
||||||
|
struct fuse_entry_out entry_out;
|
||||||
|
struct fuse_dirent dirent;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define FUSE_NAME_OFFSET_DIRENTPLUS \
|
||||||
|
offsetof(struct fuse_direntplus, dirent.name)
|
||||||
|
#define FUSE_DIRENTPLUS_SIZE(d) \
|
||||||
|
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
|
||||||
|
|
||||||
struct fuse_notify_inval_inode_out {
|
struct fuse_notify_inval_inode_out {
|
||||||
__u64 ino;
|
__u64 ino;
|
||||||
__s64 off;
|
__s64 off;
|
||||||
|
|
Loading…
Reference in New Issue