Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse update from Miklos Szeredi:
 "This series adds cached writeback support to fuse, improving write
  throughput"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: fix "uninitialized variable" warning
  fuse: Turn writeback cache on
  fuse: Fix O_DIRECT operations vs cached writeback misorder
  fuse: fuse_flush() should wait on writeback
  fuse: Implement write_begin/write_end callbacks
  fuse: restructure fuse_readpage()
  fuse: Flush files on wb close
  fuse: Trust kernel i_mtime only
  fuse: Trust kernel i_size only
  fuse: Connection bit for enabling writeback
  fuse: Prepare to handle short reads
  fuse: Linking file to inode helper
This commit is contained in:
Linus Torvalds 2014-04-04 15:34:27 -07:00
commit d15fee814d
6 changed files with 384 additions and 84 deletions

View File

@ -95,7 +95,7 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
struct iovec iov = { .iov_base = buf, .iov_len = count }; struct iovec iov = { .iov_base = buf, .iov_len = count };
struct fuse_io_priv io = { .async = 0, .file = file }; struct fuse_io_priv io = { .async = 0, .file = file };
return fuse_direct_io(&io, &iov, 1, count, &pos, 0); return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE);
} }
static ssize_t cuse_write(struct file *file, const char __user *buf, static ssize_t cuse_write(struct file *file, const char __user *buf,
@ -109,7 +109,8 @@ static ssize_t cuse_write(struct file *file, const char __user *buf,
* No locking or generic_write_checks(), the server is * No locking or generic_write_checks(), the server is
* responsible for locking and sanity checks. * responsible for locking and sanity checks.
*/ */
return fuse_direct_io(&io, &iov, 1, count, &pos, 1); return fuse_direct_io(&io, &iov, 1, count, &pos,
FUSE_DIO_WRITE | FUSE_DIO_CUSE);
} }
static int cuse_open(struct inode *inode, struct file *file) static int cuse_open(struct inode *inode, struct file *file)

View File

@ -839,6 +839,14 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
struct kstat *stat) struct kstat *stat)
{ {
unsigned int blkbits; unsigned int blkbits;
struct fuse_conn *fc = get_fuse_conn(inode);
/* see the comment in fuse_change_attributes() */
if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
attr->size = i_size_read(inode);
attr->mtime = inode->i_mtime.tv_sec;
attr->mtimensec = inode->i_mtime.tv_nsec;
}
stat->dev = inode->i_sb->s_dev; stat->dev = inode->i_sb->s_dev;
stat->ino = attr->ino; stat->ino = attr->ino;
@ -1477,12 +1485,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
} }
static bool update_mtime(unsigned ivalid) static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
{ {
/* Always update if mtime is explicitly set */ /* Always update if mtime is explicitly set */
if (ivalid & ATTR_MTIME_SET) if (ivalid & ATTR_MTIME_SET)
return true; return true;
/* Or if kernel i_mtime is the official one */
if (trust_local_mtime)
return true;
/* If it's an open(O_TRUNC) or an ftruncate(), don't update */ /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
return false; return false;
@ -1491,7 +1503,8 @@ static bool update_mtime(unsigned ivalid)
return true; return true;
} }
static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
bool trust_local_mtime)
{ {
unsigned ivalid = iattr->ia_valid; unsigned ivalid = iattr->ia_valid;
@ -1510,11 +1523,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
if (!(ivalid & ATTR_ATIME_SET)) if (!(ivalid & ATTR_ATIME_SET))
arg->valid |= FATTR_ATIME_NOW; arg->valid |= FATTR_ATIME_NOW;
} }
if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) {
arg->valid |= FATTR_MTIME; arg->valid |= FATTR_MTIME;
arg->mtime = iattr->ia_mtime.tv_sec; arg->mtime = iattr->ia_mtime.tv_sec;
arg->mtimensec = iattr->ia_mtime.tv_nsec; arg->mtimensec = iattr->ia_mtime.tv_nsec;
if (!(ivalid & ATTR_MTIME_SET)) if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime)
arg->valid |= FATTR_MTIME_NOW; arg->valid |= FATTR_MTIME_NOW;
} }
} }
@ -1563,6 +1576,63 @@ void fuse_release_nowrite(struct inode *inode)
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
} }
static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
struct inode *inode,
struct fuse_setattr_in *inarg_p,
struct fuse_attr_out *outarg_p)
{
req->in.h.opcode = FUSE_SETATTR;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
req->in.args[0].size = sizeof(*inarg_p);
req->in.args[0].value = inarg_p;
req->out.numargs = 1;
if (fc->minor < 9)
req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
req->out.args[0].size = sizeof(*outarg_p);
req->out.args[0].value = outarg_p;
}
/*
* Flush inode->i_mtime to the server
*/
int fuse_flush_mtime(struct file *file, bool nofail)
{
struct inode *inode = file->f_mapping->host;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req = NULL;
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
int err;
if (nofail) {
req = fuse_get_req_nofail_nopages(fc, file);
} else {
req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
}
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
inarg.valid |= FATTR_MTIME;
inarg.mtime = inode->i_mtime.tv_sec;
inarg.mtimensec = inode->i_mtime.tv_nsec;
fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
fuse_request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err)
clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
return err;
}
/* /*
* Set attributes, and at the same time refresh them. * Set attributes, and at the same time refresh them.
* *
@ -1580,8 +1650,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
struct fuse_setattr_in inarg; struct fuse_setattr_in inarg;
struct fuse_attr_out outarg; struct fuse_attr_out outarg;
bool is_truncate = false; bool is_truncate = false;
bool is_wb = fc->writeback_cache;
loff_t oldsize; loff_t oldsize;
int err; int err;
bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode);
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
attr->ia_valid |= ATTR_FORCE; attr->ia_valid |= ATTR_FORCE;
@ -1610,7 +1682,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg)); memset(&outarg, 0, sizeof(outarg));
iattr_to_fattr(attr, &inarg); iattr_to_fattr(attr, &inarg, trust_local_mtime);
if (file) { if (file) {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH; inarg.valid |= FATTR_FH;
@ -1621,17 +1693,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
inarg.valid |= FATTR_LOCKOWNER; inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files); inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
} }
req->in.h.opcode = FUSE_SETATTR; fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
req->out.numargs = 1;
if (fc->minor < 9)
req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
req->out.args[0].size = sizeof(outarg);
req->out.args[0].value = &outarg;
fuse_request_send(fc, req); fuse_request_send(fc, req);
err = req->out.h.error; err = req->out.h.error;
fuse_put_request(fc, req); fuse_put_request(fc, req);
@ -1648,10 +1710,18 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
} }
spin_lock(&fc->lock); spin_lock(&fc->lock);
/* the kernel maintains i_mtime locally */
if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) {
inode->i_mtime = attr->ia_mtime;
clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
}
fuse_change_attributes_common(inode, &outarg.attr, fuse_change_attributes_common(inode, &outarg.attr,
attr_timeout(&outarg)); attr_timeout(&outarg));
oldsize = inode->i_size; oldsize = inode->i_size;
i_size_write(inode, outarg.attr.size); /* see the comment in fuse_change_attributes() */
if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
i_size_write(inode, outarg.attr.size);
if (is_truncate) { if (is_truncate) {
/* NOTE: this may release/reacquire fc->lock */ /* NOTE: this may release/reacquire fc->lock */
@ -1663,7 +1733,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
* Only call invalidate_inode_pages2() after removing * Only call invalidate_inode_pages2() after removing
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/ */
if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { if ((is_truncate || !is_wb) &&
S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
truncate_pagecache(inode, outarg.attr.size); truncate_pagecache(inode, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping); invalidate_inode_pages2(inode->i_mapping);
} }
@ -1875,6 +1946,17 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
return err; return err;
} }
static int fuse_update_time(struct inode *inode, struct timespec *now,
int flags)
{
if (flags & S_MTIME) {
inode->i_mtime = *now;
set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state);
BUG_ON(!S_ISREG(inode->i_mode));
}
return 0;
}
static const struct inode_operations fuse_dir_inode_operations = { static const struct inode_operations fuse_dir_inode_operations = {
.lookup = fuse_lookup, .lookup = fuse_lookup,
.mkdir = fuse_mkdir, .mkdir = fuse_mkdir,
@ -1914,6 +1996,7 @@ static const struct inode_operations fuse_common_inode_operations = {
.getxattr = fuse_getxattr, .getxattr = fuse_getxattr,
.listxattr = fuse_listxattr, .listxattr = fuse_listxattr,
.removexattr = fuse_removexattr, .removexattr = fuse_removexattr,
.update_time = fuse_update_time,
}; };
static const struct inode_operations fuse_symlink_inode_operations = { static const struct inode_operations fuse_symlink_inode_operations = {

View File

@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
} }
EXPORT_SYMBOL_GPL(fuse_do_open); EXPORT_SYMBOL_GPL(fuse_do_open);
static void fuse_link_write_file(struct file *file)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_file *ff = file->private_data;
/*
* file may be written through mmap, so chain it onto the
* inodes's write_file list
*/
spin_lock(&fc->lock);
if (list_empty(&ff->write_entry))
list_add(&ff->write_entry, &fi->write_files);
spin_unlock(&fc->lock);
}
void fuse_finish_open(struct inode *inode, struct file *file) void fuse_finish_open(struct inode *inode, struct file *file)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
@ -208,6 +224,8 @@ void fuse_finish_open(struct inode *inode, struct file *file)
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
fuse_invalidate_attr(inode); fuse_invalidate_attr(inode);
} }
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
} }
int fuse_open_common(struct inode *inode, struct file *file, bool isdir) int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
@ -292,6 +310,15 @@ static int fuse_open(struct inode *inode, struct file *file)
static int fuse_release(struct inode *inode, struct file *file) static int fuse_release(struct inode *inode, struct file *file)
{ {
struct fuse_conn *fc = get_fuse_conn(inode);
/* see fuse_vma_close() for !writeback_cache case */
if (fc->writeback_cache)
filemap_write_and_wait(file->f_mapping);
if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state))
fuse_flush_mtime(file, true);
fuse_release_common(file, FUSE_RELEASE); fuse_release_common(file, FUSE_RELEASE);
/* return value is ignored by VFS */ /* return value is ignored by VFS */
@ -333,12 +360,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
} }
/* /*
* Check if page is under writeback * Check if any page in a range is under writeback
* *
* This is currently done by walking the list of writepage requests * This is currently done by walking the list of writepage requests
* for the inode, which can be pretty inefficient. * for the inode, which can be pretty inefficient.
*/ */
static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
pgoff_t idx_to)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
@ -351,8 +379,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
BUG_ON(req->inode != inode); BUG_ON(req->inode != inode);
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
if (curr_index <= index && if (idx_from < curr_index + req->num_pages &&
index < curr_index + req->num_pages) { curr_index <= idx_to) {
found = true; found = true;
break; break;
} }
@ -362,6 +390,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
return found; return found;
} }
static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
{
return fuse_range_is_writeback(inode, index, index);
}
/* /*
* Wait for page writeback to be completed. * Wait for page writeback to be completed.
* *
@ -376,6 +409,21 @@ static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
return 0; return 0;
} }
/*
* Wait for all pending writepages on the inode to finish.
*
* This is currently done by blocking further writes with FUSE_NOWRITE
* and waiting for all sent writes to complete.
*
* This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
* could conflict with truncation.
*/
static void fuse_sync_writes(struct inode *inode)
{
fuse_set_nowrite(inode);
fuse_release_nowrite(inode);
}
static int fuse_flush(struct file *file, fl_owner_t id) static int fuse_flush(struct file *file, fl_owner_t id)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
@ -391,6 +439,14 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (fc->no_flush) if (fc->no_flush)
return 0; return 0;
err = filemap_write_and_wait(file->f_mapping);
if (err)
return err;
mutex_lock(&inode->i_mutex);
fuse_sync_writes(inode);
mutex_unlock(&inode->i_mutex);
req = fuse_get_req_nofail_nopages(fc, file); req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh; inarg.fh = ff->fh;
@ -411,21 +467,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
return err; return err;
} }
/*
* Wait for all pending writepages on the inode to finish.
*
* This is currently done by blocking further writes with FUSE_NOWRITE
* and waiting for all sent writes to complete.
*
* This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
* could conflict with truncation.
*/
static void fuse_sync_writes(struct inode *inode)
{
fuse_set_nowrite(inode);
fuse_release_nowrite(inode);
}
int fuse_fsync_common(struct file *file, loff_t start, loff_t end, int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
int datasync, int isdir) int datasync, int isdir)
{ {
@ -459,6 +500,12 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
fuse_sync_writes(inode); fuse_sync_writes(inode);
if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) {
int err = fuse_flush_mtime(file, false);
if (err)
goto out;
}
req = fuse_get_req_nopages(fc); req = fuse_get_req_nopages(fc);
if (IS_ERR(req)) { if (IS_ERR(req)) {
err = PTR_ERR(req); err = PTR_ERR(req);
@ -655,7 +702,33 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
} }
static int fuse_readpage(struct file *file, struct page *page) static void fuse_short_read(struct fuse_req *req, struct inode *inode,
u64 attr_ver)
{
size_t num_read = req->out.args[0].size;
struct fuse_conn *fc = get_fuse_conn(inode);
if (fc->writeback_cache) {
/*
* A hole in a file. Some data after the hole are in page cache,
* but have not reached the client fs yet. So, the hole is not
* present there.
*/
int i;
int start_idx = num_read >> PAGE_CACHE_SHIFT;
size_t off = num_read & (PAGE_CACHE_SIZE - 1);
for (i = start_idx; i < req->num_pages; i++) {
zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
off = 0;
}
} else {
loff_t pos = page_offset(req->pages[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
}
static int fuse_do_readpage(struct file *file, struct page *page)
{ {
struct fuse_io_priv io = { .async = 0, .file = file }; struct fuse_io_priv io = { .async = 0, .file = file };
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
@ -667,10 +740,6 @@ static int fuse_readpage(struct file *file, struct page *page)
u64 attr_ver; u64 attr_ver;
int err; int err;
err = -EIO;
if (is_bad_inode(inode))
goto out;
/* /*
* Page writeback can extend beyond the lifetime of the * Page writeback can extend beyond the lifetime of the
* page-cache page, so make sure we read a properly synced * page-cache page, so make sure we read a properly synced
@ -679,9 +748,8 @@ static int fuse_readpage(struct file *file, struct page *page)
fuse_wait_on_page_writeback(inode, page->index); fuse_wait_on_page_writeback(inode, page->index);
req = fuse_get_req(fc, 1); req = fuse_get_req(fc, 1);
err = PTR_ERR(req);
if (IS_ERR(req)) if (IS_ERR(req))
goto out; return PTR_ERR(req);
attr_ver = fuse_get_attr_version(fc); attr_ver = fuse_get_attr_version(fc);
@ -692,18 +760,32 @@ static int fuse_readpage(struct file *file, struct page *page)
req->page_descs[0].length = count; req->page_descs[0].length = count;
num_read = fuse_send_read(req, &io, pos, count, NULL); num_read = fuse_send_read(req, &io, pos, count, NULL);
err = req->out.h.error; err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) { if (!err) {
/* /*
* Short read means EOF. If file size is larger, truncate it * Short read means EOF. If file size is larger, truncate it
*/ */
if (num_read < count) if (num_read < count)
fuse_read_update_size(inode, pos + num_read, attr_ver); fuse_short_read(req, inode, attr_ver);
SetPageUptodate(page); SetPageUptodate(page);
} }
fuse_put_request(fc, req);
return err;
}
static int fuse_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
int err;
err = -EIO;
if (is_bad_inode(inode))
goto out;
err = fuse_do_readpage(file, page);
fuse_invalidate_atime(inode); fuse_invalidate_atime(inode);
out: out:
unlock_page(page); unlock_page(page);
@ -726,13 +808,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
/* /*
* Short read means EOF. If file size is larger, truncate it * Short read means EOF. If file size is larger, truncate it
*/ */
if (!req->out.h.error && num_read < count) { if (!req->out.h.error && num_read < count)
loff_t pos; fuse_short_read(req, inode, req->misc.read.attr_ver);
pos = page_offset(req->pages[0]) + num_read;
fuse_read_update_size(inode, pos,
req->misc.read.attr_ver);
}
fuse_invalidate_atime(inode); fuse_invalidate_atime(inode);
} }
@ -922,16 +1000,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
return req->misc.write.out.size; return req->misc.write.out.size;
} }
void fuse_write_update_size(struct inode *inode, loff_t pos) bool fuse_write_update_size(struct inode *inode, loff_t pos)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
bool ret = false;
spin_lock(&fc->lock); spin_lock(&fc->lock);
fi->attr_version = ++fc->attr_version; fi->attr_version = ++fc->attr_version;
if (pos > inode->i_size) if (pos > inode->i_size) {
i_size_write(inode, pos); i_size_write(inode, pos);
ret = true;
}
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
return ret;
} }
static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
@ -1116,6 +1199,15 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct iov_iter i; struct iov_iter i;
loff_t endbyte = 0; loff_t endbyte = 0;
if (get_fuse_conn(inode)->writeback_cache) {
/* Update size (EOF optimization) and mode (SUID clearing) */
err = fuse_update_attributes(mapping->host, NULL, file, NULL);
if (err)
return err;
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
WARN_ON(iocb->ki_pos != pos); WARN_ON(iocb->ki_pos != pos);
ocount = 0; ocount = 0;
@ -1289,13 +1381,18 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p)
ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
unsigned long nr_segs, size_t count, loff_t *ppos, unsigned long nr_segs, size_t count, loff_t *ppos,
int write) int flags)
{ {
int write = flags & FUSE_DIO_WRITE;
int cuse = flags & FUSE_DIO_CUSE;
struct file *file = io->file; struct file *file = io->file;
struct inode *inode = file->f_mapping->host;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_conn *fc = ff->fc;
size_t nmax = write ? fc->max_write : fc->max_read; size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos; loff_t pos = *ppos;
pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
ssize_t res = 0; ssize_t res = 0;
struct fuse_req *req; struct fuse_req *req;
struct iov_iter ii; struct iov_iter ii;
@ -1309,6 +1406,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
if (!write)
mutex_lock(&inode->i_mutex);
fuse_sync_writes(inode);
if (!write)
mutex_unlock(&inode->i_mutex);
}
while (count) { while (count) {
size_t nres; size_t nres;
fl_owner_t owner = current->files; fl_owner_t owner = current->files;
@ -1397,7 +1502,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
res = generic_write_checks(file, ppos, &count, 0); res = generic_write_checks(file, ppos, &count, 0);
if (!res) if (!res)
res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); res = fuse_direct_io(io, iov, nr_segs, count, ppos,
FUSE_DIO_WRITE);
fuse_invalidate_attr(inode); fuse_invalidate_attr(inode);
@ -1885,6 +1991,77 @@ out:
return err; return err;
} }
/*
* It's worthy to make sure that space is reserved on disk for the write,
* but how to implement it without killing performance need more thinking.
*/
static int fuse_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
struct page *page;
loff_t fsize;
int err = -ENOMEM;
WARN_ON(!fc->writeback_cache);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
goto error;
fuse_wait_on_page_writeback(mapping->host, page->index);
if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
goto success;
/*
* Check if the start this page comes after the end of file, in which
* case the readpage can be optimized away.
*/
fsize = i_size_read(mapping->host);
if (fsize <= (pos & PAGE_CACHE_MASK)) {
size_t off = pos & ~PAGE_CACHE_MASK;
if (off)
zero_user_segment(page, 0, off);
goto success;
}
err = fuse_do_readpage(file, page);
if (err)
goto cleanup;
success:
*pagep = page;
return 0;
cleanup:
unlock_page(page);
page_cache_release(page);
error:
return err;
}
static int fuse_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = page->mapping->host;
if (!PageUptodate(page)) {
/* Zero any unwritten bytes at the end of the page */
size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
if (endoff)
zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
SetPageUptodate(page);
}
fuse_write_update_size(inode, pos + copied);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
return copied;
}
static int fuse_launder_page(struct page *page) static int fuse_launder_page(struct page *page)
{ {
int err = 0; int err = 0;
@ -1946,20 +2123,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{ {
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
struct inode *inode = file_inode(file); fuse_link_write_file(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_file *ff = file->private_data;
/*
* file may be written through mmap, so chain it onto the
* inodes's write_file list
*/
spin_lock(&fc->lock);
if (list_empty(&ff->write_entry))
list_add(&ff->write_entry, &fi->write_files);
spin_unlock(&fc->lock);
}
file_accessed(file); file_accessed(file);
vma->vm_ops = &fuse_file_vm_ops; vma->vm_ops = &fuse_file_vm_ops;
return 0; return 0;
@ -2606,7 +2772,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
{ {
spin_lock(&fc->lock); spin_lock(&fc->lock);
if (RB_EMPTY_NODE(&ff->polled_node)) { if (RB_EMPTY_NODE(&ff->polled_node)) {
struct rb_node **link, *parent; struct rb_node **link, *uninitialized_var(parent);
link = fuse_find_polled_node(fc, ff->kh, &parent); link = fuse_find_polled_node(fc, ff->kh, &parent);
BUG_ON(*link); BUG_ON(*link);
@ -2850,8 +3016,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
goto out; goto out;
/* we could have extended the file */ /* we could have extended the file */
if (!(mode & FALLOC_FL_KEEP_SIZE)) if (!(mode & FALLOC_FL_KEEP_SIZE)) {
fuse_write_update_size(inode, offset + length); bool changed = fuse_write_update_size(inode, offset + length);
if (changed && fc->writeback_cache) {
struct fuse_inode *fi = get_fuse_inode(inode);
inode->i_mtime = current_fs_time(inode->i_sb);
set_bit(FUSE_I_MTIME_DIRTY, &fi->state);
}
}
if (mode & FALLOC_FL_PUNCH_HOLE) if (mode & FALLOC_FL_PUNCH_HOLE)
truncate_pagecache_range(inode, offset, offset + length - 1); truncate_pagecache_range(inode, offset, offset + length - 1);
@ -2915,6 +3089,8 @@ static const struct address_space_operations fuse_file_aops = {
.set_page_dirty = __set_page_dirty_nobuffers, .set_page_dirty = __set_page_dirty_nobuffers,
.bmap = fuse_bmap, .bmap = fuse_bmap,
.direct_IO = fuse_direct_IO, .direct_IO = fuse_direct_IO,
.write_begin = fuse_write_begin,
.write_end = fuse_write_end,
}; };
void fuse_init_file_inode(struct inode *inode) void fuse_init_file_inode(struct inode *inode)

View File

@ -119,6 +119,8 @@ enum {
FUSE_I_INIT_RDPLUS, FUSE_I_INIT_RDPLUS,
/** An operation changing file size is in progress */ /** An operation changing file size is in progress */
FUSE_I_SIZE_UNSTABLE, FUSE_I_SIZE_UNSTABLE,
/** i_mtime has been updated locally; a flush to userspace needed */
FUSE_I_MTIME_DIRTY,
}; };
struct fuse_conn; struct fuse_conn;
@ -480,6 +482,9 @@ struct fuse_conn {
/** Set if bdi is valid */ /** Set if bdi is valid */
unsigned bdi_initialized:1; unsigned bdi_initialized:1;
/** write-back cache policy (default is write-through) */
unsigned writeback_cache:1;
/* /*
* The following bitfields are only for optimization purposes * The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction * and hence races in setting them will not cause malfunction
@ -863,9 +868,20 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir); bool isdir);
/**
* fuse_direct_io() flags
*/
/** If set, it is WRITE; otherwise - READ */
#define FUSE_DIO_WRITE (1 << 0)
/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
#define FUSE_DIO_CUSE (1 << 1)
ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
unsigned long nr_segs, size_t count, loff_t *ppos, unsigned long nr_segs, size_t count, loff_t *ppos,
int write); int flags);
long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned int flags); unsigned int flags);
long fuse_ioctl_common(struct file *file, unsigned int cmd, long fuse_ioctl_common(struct file *file, unsigned int cmd,
@ -873,7 +889,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
unsigned fuse_file_poll(struct file *file, poll_table *wait); unsigned fuse_file_poll(struct file *file, poll_table *wait);
int fuse_dev_release(struct inode *inode, struct file *file); int fuse_dev_release(struct inode *inode, struct file *file);
void fuse_write_update_size(struct inode *inode, loff_t pos); bool fuse_write_update_size(struct inode *inode, loff_t pos);
int fuse_flush_mtime(struct file *file, bool nofail);
int fuse_do_setattr(struct inode *inode, struct iattr *attr, int fuse_do_setattr(struct inode *inode, struct iattr *attr,
struct file *file); struct file *file);

View File

@ -170,8 +170,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
inode->i_blocks = attr->blocks; inode->i_blocks = attr->blocks;
inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_sec = attr->atime;
inode->i_atime.tv_nsec = attr->atimensec; inode->i_atime.tv_nsec = attr->atimensec;
inode->i_mtime.tv_sec = attr->mtime; /* mtime from server may be stale due to local buffered write */
inode->i_mtime.tv_nsec = attr->mtimensec; if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
inode->i_mtime.tv_sec = attr->mtime;
inode->i_mtime.tv_nsec = attr->mtimensec;
}
inode->i_ctime.tv_sec = attr->ctime; inode->i_ctime.tv_sec = attr->ctime;
inode->i_ctime.tv_nsec = attr->ctimensec; inode->i_ctime.tv_nsec = attr->ctimensec;
@ -197,6 +200,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
bool is_wb = fc->writeback_cache;
loff_t oldsize; loff_t oldsize;
struct timespec old_mtime; struct timespec old_mtime;
@ -211,10 +215,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_change_attributes_common(inode, attr, attr_valid); fuse_change_attributes_common(inode, attr, attr_valid);
oldsize = inode->i_size; oldsize = inode->i_size;
i_size_write(inode, attr->size); /*
* In case of writeback_cache enabled, the cached writes beyond EOF
* extend local i_size without keeping userspace server in sync. So,
* attr->size coming from server can be stale. We cannot trust it.
*/
if (!is_wb || !S_ISREG(inode->i_mode))
i_size_write(inode, attr->size);
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
if (S_ISREG(inode->i_mode)) { if (!is_wb && S_ISREG(inode->i_mode)) {
bool inval = false; bool inval = false;
if (oldsize != attr->size) { if (oldsize != attr->size) {
@ -243,6 +253,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{ {
inode->i_mode = attr->mode & S_IFMT; inode->i_mode = attr->mode & S_IFMT;
inode->i_size = attr->size; inode->i_size = attr->size;
inode->i_mtime.tv_sec = attr->mtime;
inode->i_mtime.tv_nsec = attr->mtimensec;
if (S_ISREG(inode->i_mode)) { if (S_ISREG(inode->i_mode)) {
fuse_init_common(inode); fuse_init_common(inode);
fuse_init_file_inode(inode); fuse_init_file_inode(inode);
@ -289,7 +301,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
return NULL; return NULL;
if ((inode->i_state & I_NEW)) { if ((inode->i_state & I_NEW)) {
inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_flags |= S_NOATIME;
if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
inode->i_flags |= S_NOCMTIME;
inode->i_generation = generation; inode->i_generation = generation;
inode->i_data.backing_dev_info = &fc->bdi; inode->i_data.backing_dev_info = &fc->bdi;
fuse_init_inode(inode, attr); fuse_init_inode(inode, attr);
@ -873,6 +887,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
} }
if (arg->flags & FUSE_ASYNC_DIO) if (arg->flags & FUSE_ASYNC_DIO)
fc->async_dio = 1; fc->async_dio = 1;
if (arg->flags & FUSE_WRITEBACK_CACHE)
fc->writeback_cache = 1;
} else { } else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE; ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1; fc->no_lock = 1;
@ -900,7 +916,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO; FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE;
req->in.h.opcode = FUSE_INIT; req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1; req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg); req->in.args[0].size = sizeof(*arg);

View File

@ -93,6 +93,9 @@
* *
* 7.22 * 7.22
* - add FUSE_ASYNC_DIO * - add FUSE_ASYNC_DIO
*
* 7.23
* - add FUSE_WRITEBACK_CACHE
*/ */
#ifndef _LINUX_FUSE_H #ifndef _LINUX_FUSE_H
@ -128,7 +131,7 @@
#define FUSE_KERNEL_VERSION 7 #define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */ /** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 22 #define FUSE_KERNEL_MINOR_VERSION 23
/** The node ID of the root inode */ /** The node ID of the root inode */
#define FUSE_ROOT_ID 1 #define FUSE_ROOT_ID 1
@ -219,6 +222,7 @@ struct fuse_file_lock {
* FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
* FUSE_READDIRPLUS_AUTO: adaptive readdirplus * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
* FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_ASYNC_DIO: asynchronous direct I/O submission
* FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
*/ */
#define FUSE_ASYNC_READ (1 << 0) #define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_POSIX_LOCKS (1 << 1)
@ -236,6 +240,7 @@ struct fuse_file_lock {
#define FUSE_DO_READDIRPLUS (1 << 13) #define FUSE_DO_READDIRPLUS (1 << 13)
#define FUSE_READDIRPLUS_AUTO (1 << 14) #define FUSE_READDIRPLUS_AUTO (1 << 14)
#define FUSE_ASYNC_DIO (1 << 15) #define FUSE_ASYNC_DIO (1 << 15)
#define FUSE_WRITEBACK_CACHE (1 << 16)
/** /**
* CUSE INIT request/reply flags * CUSE INIT request/reply flags