Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: "This adds a ->writepage() implementation to fuse, improving mmaped writeout and paving the way for buffered writeback. And there's a patch to add a fix minor number for /dev/cuse, similarly to /dev/fuse" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: writepages: protect secondary requests from fuse file release fuse: writepages: update bdi writeout when deleting secondary request fuse: writepages: crop secondary requests fuse: writepages: roll back changes if request not found cuse: add fix minor number to /dev/cuse fuse: writepage: skip already in flight fuse: writepages: handle same page rewrites fuse: writepages: fix aggregation fuse: fix race in fuse_writepages() fuse: Implement writepages callback fuse: don't BUG on no write file fuse: lock page in mkwrite fuse: Prepare to handle multiple pages in writeback fuse: Getting file for writeback helper
This commit is contained in:
commit
a7fa20a594
|
@ -414,6 +414,7 @@ Your cooperation is appreciated.
|
|||
200 = /dev/net/tun TAP/TUN network device
|
||||
201 = /dev/button/gulpb Transmeta GULP-B buttons
|
||||
202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control
|
||||
203 = /dev/cuse Cuse (character device in user-space)
|
||||
204 = /dev/video/em8300 EM8300 DVD decoder control
|
||||
205 = /dev/video/em8300_mv EM8300 DVD decoder video
|
||||
206 = /dev/video/em8300_ma EM8300 DVD decoder audio
|
||||
|
|
|
@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = {
|
|||
ATTRIBUTE_GROUPS(cuse_class_dev);
|
||||
|
||||
static struct miscdevice cuse_miscdev = {
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.minor = CUSE_MINOR,
|
||||
.name = "cuse",
|
||||
.fops = &cuse_channel_fops,
|
||||
};
|
||||
|
||||
MODULE_ALIAS_MISCDEV(CUSE_MINOR);
|
||||
MODULE_ALIAS("devname:cuse");
|
||||
|
||||
static int __init cuse_init(void)
|
||||
{
|
||||
int i, rc;
|
||||
|
|
361
fs/fuse/file.c
361
fs/fuse/file.c
|
@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
|
|||
|
||||
BUG_ON(req->inode != inode);
|
||||
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
|
||||
if (curr_index == index) {
|
||||
if (curr_index <= index &&
|
||||
index < curr_index + req->num_pages) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
|
|||
|
||||
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
__free_page(req->pages[0]);
|
||||
fuse_file_put(req->ff, false);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < req->num_pages; i++)
|
||||
__free_page(req->pages[i]);
|
||||
|
||||
if (req->ff)
|
||||
fuse_file_put(req->ff, false);
|
||||
}
|
||||
|
||||
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
|
||||
|
@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
|
|||
struct inode *inode = req->inode;
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
|
||||
int i;
|
||||
|
||||
list_del(&req->writepages_entry);
|
||||
dec_bdi_stat(bdi, BDI_WRITEBACK);
|
||||
dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
|
||||
bdi_writeout_inc(bdi);
|
||||
for (i = 0; i < req->num_pages; i++) {
|
||||
dec_bdi_stat(bdi, BDI_WRITEBACK);
|
||||
dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
|
||||
bdi_writeout_inc(bdi);
|
||||
}
|
||||
wake_up(&fi->page_waitq);
|
||||
}
|
||||
|
||||
/* Called under fc->lock, may release and reacquire it */
|
||||
static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
|
||||
static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
|
||||
loff_t size)
|
||||
__releases(fc->lock)
|
||||
__acquires(fc->lock)
|
||||
{
|
||||
struct fuse_inode *fi = get_fuse_inode(req->inode);
|
||||
loff_t size = i_size_read(req->inode);
|
||||
struct fuse_write_in *inarg = &req->misc.write.in;
|
||||
__u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
|
||||
|
||||
if (!fc->connected)
|
||||
goto out_free;
|
||||
|
||||
if (inarg->offset + PAGE_CACHE_SIZE <= size) {
|
||||
inarg->size = PAGE_CACHE_SIZE;
|
||||
if (inarg->offset + data_size <= size) {
|
||||
inarg->size = data_size;
|
||||
} else if (inarg->offset < size) {
|
||||
inarg->size = size & (PAGE_CACHE_SIZE - 1);
|
||||
inarg->size = size - inarg->offset;
|
||||
} else {
|
||||
/* Got truncated off completely */
|
||||
goto out_free;
|
||||
|
@ -1472,12 +1482,13 @@ __acquires(fc->lock)
|
|||
{
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
size_t crop = i_size_read(inode);
|
||||
struct fuse_req *req;
|
||||
|
||||
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
|
||||
req = list_entry(fi->queued_writes.next, struct fuse_req, list);
|
||||
list_del_init(&req->list);
|
||||
fuse_send_writepage(fc, req);
|
||||
fuse_send_writepage(fc, req, crop);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
|
|||
|
||||
mapping_set_error(inode->i_mapping, req->out.h.error);
|
||||
spin_lock(&fc->lock);
|
||||
while (req->misc.write.next) {
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct fuse_write_in *inarg = &req->misc.write.in;
|
||||
struct fuse_req *next = req->misc.write.next;
|
||||
req->misc.write.next = next->misc.write.next;
|
||||
next->misc.write.next = NULL;
|
||||
next->ff = fuse_file_get(req->ff);
|
||||
list_add(&next->writepages_entry, &fi->writepages);
|
||||
|
||||
/*
|
||||
* Skip fuse_flush_writepages() to make it easy to crop requests
|
||||
* based on primary request size.
|
||||
*
|
||||
* 1st case (trivial): there are no concurrent activities using
|
||||
* fuse_set/release_nowrite. Then we're on safe side because
|
||||
* fuse_flush_writepages() would call fuse_send_writepage()
|
||||
* anyway.
|
||||
*
|
||||
* 2nd case: someone called fuse_set_nowrite and it is waiting
|
||||
* now for completion of all in-flight requests. This happens
|
||||
* rarely and no more than once per page, so this should be
|
||||
* okay.
|
||||
*
|
||||
* 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
|
||||
* of fuse_set_nowrite..fuse_release_nowrite section. The fact
|
||||
* that fuse_set_nowrite returned implies that all in-flight
|
||||
* requests were completed along with all of their secondary
|
||||
* requests. Further primary requests are blocked by negative
|
||||
* writectr. Hence there cannot be any in-flight requests and
|
||||
* no invocations of fuse_writepage_end() while we're in
|
||||
* fuse_set_nowrite..fuse_release_nowrite section.
|
||||
*/
|
||||
fuse_send_writepage(fc, next, inarg->offset + inarg->size);
|
||||
}
|
||||
fi->writectr--;
|
||||
fuse_writepage_finish(fc, req);
|
||||
spin_unlock(&fc->lock);
|
||||
fuse_writepage_free(fc, req);
|
||||
}
|
||||
|
||||
static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
|
||||
struct fuse_inode *fi)
|
||||
{
|
||||
struct fuse_file *ff = NULL;
|
||||
|
||||
spin_lock(&fc->lock);
|
||||
if (!WARN_ON(list_empty(&fi->write_files))) {
|
||||
ff = list_entry(fi->write_files.next, struct fuse_file,
|
||||
write_entry);
|
||||
fuse_file_get(ff);
|
||||
}
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
return ff;
|
||||
}
|
||||
|
||||
static int fuse_writepage_locked(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
|
@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page)
|
|||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
struct fuse_req *req;
|
||||
struct fuse_file *ff;
|
||||
struct page *tmp_page;
|
||||
int error = -ENOMEM;
|
||||
|
||||
set_page_writeback(page);
|
||||
|
||||
|
@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page)
|
|||
if (!tmp_page)
|
||||
goto err_free;
|
||||
|
||||
spin_lock(&fc->lock);
|
||||
BUG_ON(list_empty(&fi->write_files));
|
||||
ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
|
||||
req->ff = fuse_file_get(ff);
|
||||
spin_unlock(&fc->lock);
|
||||
error = -EIO;
|
||||
req->ff = fuse_write_file_get(fc, fi);
|
||||
if (!req->ff)
|
||||
goto err_free;
|
||||
|
||||
fuse_write_fill(req, ff, page_offset(page), 0);
|
||||
fuse_write_fill(req, req->ff, page_offset(page), 0);
|
||||
|
||||
copy_highpage(tmp_page, page);
|
||||
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
|
||||
req->misc.write.next = NULL;
|
||||
req->in.argpages = 1;
|
||||
req->num_pages = 1;
|
||||
req->pages[0] = tmp_page;
|
||||
|
@ -1550,19 +1611,263 @@ err_free:
|
|||
fuse_request_free(req);
|
||||
err:
|
||||
end_page_writeback(page);
|
||||
return -ENOMEM;
|
||||
return error;
|
||||
}
|
||||
|
||||
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (fuse_page_is_writeback(page->mapping->host, page->index)) {
|
||||
/*
|
||||
* ->writepages() should be called for sync() and friends. We
|
||||
* should only get here on direct reclaim and then we are
|
||||
* allowed to skip a page which is already in flight
|
||||
*/
|
||||
WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
|
||||
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = fuse_writepage_locked(page);
|
||||
unlock_page(page);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct fuse_fill_wb_data {
|
||||
struct fuse_req *req;
|
||||
struct fuse_file *ff;
|
||||
struct inode *inode;
|
||||
struct page **orig_pages;
|
||||
};
|
||||
|
||||
static void fuse_writepages_send(struct fuse_fill_wb_data *data)
|
||||
{
|
||||
struct fuse_req *req = data->req;
|
||||
struct inode *inode = data->inode;
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
int num_pages = req->num_pages;
|
||||
int i;
|
||||
|
||||
req->ff = fuse_file_get(data->ff);
|
||||
spin_lock(&fc->lock);
|
||||
list_add_tail(&req->list, &fi->queued_writes);
|
||||
fuse_flush_writepages(inode);
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
for (i = 0; i < num_pages; i++)
|
||||
end_page_writeback(data->orig_pages[i]);
|
||||
}
|
||||
|
||||
static bool fuse_writepage_in_flight(struct fuse_req *new_req,
|
||||
struct page *page)
|
||||
{
|
||||
struct fuse_conn *fc = get_fuse_conn(new_req->inode);
|
||||
struct fuse_inode *fi = get_fuse_inode(new_req->inode);
|
||||
struct fuse_req *tmp;
|
||||
struct fuse_req *old_req;
|
||||
bool found = false;
|
||||
pgoff_t curr_index;
|
||||
|
||||
BUG_ON(new_req->num_pages != 0);
|
||||
|
||||
spin_lock(&fc->lock);
|
||||
list_del(&new_req->writepages_entry);
|
||||
list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
|
||||
BUG_ON(old_req->inode != new_req->inode);
|
||||
curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
|
||||
if (curr_index <= page->index &&
|
||||
page->index < curr_index + old_req->num_pages) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
list_add(&new_req->writepages_entry, &fi->writepages);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
new_req->num_pages = 1;
|
||||
for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
|
||||
BUG_ON(tmp->inode != new_req->inode);
|
||||
curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
|
||||
if (tmp->num_pages == 1 &&
|
||||
curr_index == page->index) {
|
||||
old_req = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
|
||||
old_req->state == FUSE_REQ_PENDING)) {
|
||||
struct backing_dev_info *bdi = page->mapping->backing_dev_info;
|
||||
|
||||
copy_highpage(old_req->pages[0], page);
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
dec_bdi_stat(bdi, BDI_WRITEBACK);
|
||||
dec_zone_page_state(page, NR_WRITEBACK_TEMP);
|
||||
bdi_writeout_inc(bdi);
|
||||
fuse_writepage_free(fc, new_req);
|
||||
fuse_request_free(new_req);
|
||||
goto out;
|
||||
} else {
|
||||
new_req->misc.write.next = old_req->misc.write.next;
|
||||
old_req->misc.write.next = new_req;
|
||||
}
|
||||
out_unlock:
|
||||
spin_unlock(&fc->lock);
|
||||
out:
|
||||
return found;
|
||||
}
|
||||
|
||||
static int fuse_writepages_fill(struct page *page,
|
||||
struct writeback_control *wbc, void *_data)
|
||||
{
|
||||
struct fuse_fill_wb_data *data = _data;
|
||||
struct fuse_req *req = data->req;
|
||||
struct inode *inode = data->inode;
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct page *tmp_page;
|
||||
bool is_writeback;
|
||||
int err;
|
||||
|
||||
if (!data->ff) {
|
||||
err = -EIO;
|
||||
data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
|
||||
if (!data->ff)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Being under writeback is unlikely but possible. For example direct
|
||||
* read to an mmaped fuse file will set the page dirty twice; once when
|
||||
* the pages are faulted with get_user_pages(), and then after the read
|
||||
* completed.
|
||||
*/
|
||||
is_writeback = fuse_page_is_writeback(inode, page->index);
|
||||
|
||||
if (req && req->num_pages &&
|
||||
(is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
|
||||
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
|
||||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
|
||||
fuse_writepages_send(data);
|
||||
data->req = NULL;
|
||||
}
|
||||
err = -ENOMEM;
|
||||
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
if (!tmp_page)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* The page must not be redirtied until the writeout is completed
|
||||
* (i.e. userspace has sent a reply to the write request). Otherwise
|
||||
* there could be more than one temporary page instance for each real
|
||||
* page.
|
||||
*
|
||||
* This is ensured by holding the page lock in page_mkwrite() while
|
||||
* checking fuse_page_is_writeback(). We already hold the page lock
|
||||
* since clear_page_dirty_for_io() and keep it held until we add the
|
||||
* request to the fi->writepages list and increment req->num_pages.
|
||||
* After this fuse_page_is_writeback() will indicate that the page is
|
||||
* under writeback, so we can release the page lock.
|
||||
*/
|
||||
if (data->req == NULL) {
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
|
||||
err = -ENOMEM;
|
||||
req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
|
||||
if (!req) {
|
||||
__free_page(tmp_page);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
fuse_write_fill(req, data->ff, page_offset(page), 0);
|
||||
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
|
||||
req->misc.write.next = NULL;
|
||||
req->in.argpages = 1;
|
||||
req->background = 1;
|
||||
req->num_pages = 0;
|
||||
req->end = fuse_writepage_end;
|
||||
req->inode = inode;
|
||||
|
||||
spin_lock(&fc->lock);
|
||||
list_add(&req->writepages_entry, &fi->writepages);
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
data->req = req;
|
||||
}
|
||||
set_page_writeback(page);
|
||||
|
||||
copy_highpage(tmp_page, page);
|
||||
req->pages[req->num_pages] = tmp_page;
|
||||
req->page_descs[req->num_pages].offset = 0;
|
||||
req->page_descs[req->num_pages].length = PAGE_SIZE;
|
||||
|
||||
inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
|
||||
inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
|
||||
|
||||
err = 0;
|
||||
if (is_writeback && fuse_writepage_in_flight(req, page)) {
|
||||
end_page_writeback(page);
|
||||
data->req = NULL;
|
||||
goto out_unlock;
|
||||
}
|
||||
data->orig_pages[req->num_pages] = page;
|
||||
|
||||
/*
|
||||
* Protected by fc->lock against concurrent access by
|
||||
* fuse_page_is_writeback().
|
||||
*/
|
||||
spin_lock(&fc->lock);
|
||||
req->num_pages++;
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fuse_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
struct fuse_fill_wb_data data;
|
||||
int err;
|
||||
|
||||
err = -EIO;
|
||||
if (is_bad_inode(inode))
|
||||
goto out;
|
||||
|
||||
data.inode = inode;
|
||||
data.req = NULL;
|
||||
data.ff = NULL;
|
||||
|
||||
err = -ENOMEM;
|
||||
data.orig_pages = kzalloc(sizeof(struct page *) *
|
||||
FUSE_MAX_PAGES_PER_REQ,
|
||||
GFP_NOFS);
|
||||
if (!data.orig_pages)
|
||||
goto out;
|
||||
|
||||
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
|
||||
if (data.req) {
|
||||
/* Ignore errors if we can write at least one page */
|
||||
BUG_ON(!data.req->num_pages);
|
||||
fuse_writepages_send(&data);
|
||||
err = 0;
|
||||
}
|
||||
if (data.ff)
|
||||
fuse_file_put(data.ff, false);
|
||||
|
||||
kfree(data.orig_pages);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fuse_launder_page(struct page *page)
|
||||
{
|
||||
int err = 0;
|
||||
|
@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
|
|||
static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct page *page = vmf->page;
|
||||
/*
|
||||
* Don't use page->mapping as it may become NULL from a
|
||||
* concurrent truncate.
|
||||
*/
|
||||
struct inode *inode = vma->vm_file->f_mapping->host;
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
|
||||
file_update_time(vma->vm_file);
|
||||
lock_page(page);
|
||||
if (page->mapping != inode->i_mapping) {
|
||||
unlock_page(page);
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
|
||||
fuse_wait_on_page_writeback(inode, page->index);
|
||||
return 0;
|
||||
return VM_FAULT_LOCKED;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct fuse_file_vm_ops = {
|
||||
|
@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
|
|||
static const struct address_space_operations fuse_file_aops = {
|
||||
.readpage = fuse_readpage,
|
||||
.writepage = fuse_writepage,
|
||||
.writepages = fuse_writepages,
|
||||
.launder_page = fuse_launder_page,
|
||||
.readpages = fuse_readpages,
|
||||
.set_page_dirty = __set_page_dirty_nobuffers,
|
||||
|
|
|
@ -321,6 +321,7 @@ struct fuse_req {
|
|||
struct {
|
||||
struct fuse_write_in in;
|
||||
struct fuse_write_out out;
|
||||
struct fuse_req *next;
|
||||
} write;
|
||||
struct fuse_notify_retrieve_in retrieve_in;
|
||||
struct fuse_lk_in lk_in;
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#define I2O_MINOR 166
|
||||
#define MICROCODE_MINOR 184
|
||||
#define TUN_MINOR 200
|
||||
#define CUSE_MINOR 203
|
||||
#define MWAVE_MINOR 219 /* ACP/Mwave Modem */
|
||||
#define MPT_MINOR 220
|
||||
#define MPT2SAS_MINOR 221
|
||||
|
|
Loading…
Reference in New Issue