From 8ac835056ca39b242d98332f46e4d65428a8b7db Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 7 Dec 2010 20:16:56 +0100 Subject: [PATCH 1/4] fuse: ioctl cleanup Get rid of unnecessary page_address()-es. Signed-off-by: Miklos Szeredi CC: Tejun Heo --- fs/fuse/file.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8b984a2cebbd..ca3b6bbb3790 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1740,7 +1740,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, struct fuse_ioctl_out outarg; struct fuse_req *req = NULL; struct page **pages = NULL; - struct page *iov_page = NULL; + struct iovec *iov_page = NULL; struct iovec *in_iov = NULL, *out_iov = NULL; unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; size_t in_size, out_size, transferred; @@ -1751,7 +1751,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, err = -ENOMEM; pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); - iov_page = alloc_page(GFP_KERNEL); + iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); if (!pages || !iov_page) goto out; @@ -1760,7 +1760,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, * RETRY from server is not allowed. */ if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { - struct iovec *iov = page_address(iov_page); + struct iovec *iov = iov_page; iov->iov_base = (void __user *)arg; iov->iov_len = _IOC_SIZE(cmd); @@ -1841,7 +1841,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, /* did it ask for retry? */ if (outarg.flags & FUSE_IOCTL_RETRY) { - char *vaddr; + void *vaddr; /* no retry if in restricted mode */ err = -EIO; @@ -1862,14 +1862,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, goto out; vaddr = kmap_atomic(pages[0], KM_USER0); - err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr, + err = fuse_copy_ioctl_iovec(iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); kunmap_atomic(vaddr, KM_USER0); if (err) goto out; - in_iov = page_address(iov_page); + in_iov = iov_page; out_iov = in_iov + in_iovs; err = fuse_verify_ioctl_iov(in_iov, in_iovs); @@ -1891,8 +1891,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, out: if (req) fuse_put_request(fc, req); - if (iov_page) - __free_page(iov_page); + free_page((unsigned long) iov_page); while (num_pages) __free_page(pages[--num_pages]); kfree(pages); From 07e77dca8a1f17a724a9b7449f0ca02e70e9d057 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 7 Dec 2010 20:16:56 +0100 Subject: [PATCH 2/4] fuse: separate queue for FORGET requests Terje Malmedal reports that a fuse filesystem with 32 million inodes on a machine with lots of memory can go unresponsive for up to 30 minutes when all those inodes are evicted from the icache. The reason is that FORGET messages, sent when the inode is evicted, are queued up together with regular filesystem requests, and while the huge queue of FORGET messages are processed no other filesystem operation can proceed. Since a full fuse request structure is allocated for each inode, these take up quite a bit of memory as well. To solve these issues, create a slim 'fuse_forget_link' structure containing just the minimum of information required to send the FORGET request and chain these on a separate queue. When userspace is asking for a request make sure that FORGET and non-FORGET requests are selected fairly: for each 8 non-FORGET allow 16 FORGET requests. This will make sure FORGETs do not pile up, yet other requests are also allowed to proceed while the queued FORGETs are processed. Reported-by: Terje Malmedal Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 86 +++++++++++++++++++++++++++++++++++++++++++----- fs/fuse/dir.c | 53 +++++++++++++++-------------- fs/fuse/fuse_i.h | 28 +++++++++++----- fs/fuse/inode.c | 30 +++++++---------- 4 files changed, 133 insertions(+), 64 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6e07696308dc..fed65303eeeb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -251,6 +251,20 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) kill_fasync(&fc->fasync, SIGIO, POLL_IN); } +void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup) +{ + forget->nodeid = nodeid; + forget->nlookup = nlookup; + + spin_lock(&fc->lock); + fc->forget_list_tail->next = forget; + fc->forget_list_tail = forget; + wake_up(&fc->waitq); + kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_unlock(&fc->lock); +} + static void flush_bg_queue(struct fuse_conn *fc) { while (fc->active_background < fc->max_background && @@ -438,12 +452,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) } } -void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) -{ - req->isreply = 0; - fuse_request_send_nowait(fc, req); -} - void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; @@ -896,9 +904,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } +static int forget_pending(struct fuse_conn *fc) +{ + return fc->forget_list_head.next != NULL; +} + static int request_pending(struct fuse_conn *fc) { - return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); + return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) || + forget_pending(fc); } /* Wait until a request is available on the pending list */ @@ -960,6 +974,50 @@ __releases(fc->lock) return err ? err : reqsize; } +static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc) +{ + struct fuse_forget_link *forget = fc->forget_list_head.next; + + fc->forget_list_head.next = forget->next; + if (fc->forget_list_head.next == NULL) + fc->forget_list_tail = &fc->forget_list_head; + + return forget; +} + +static int fuse_read_single_forget(struct fuse_conn *fc, + struct fuse_copy_state *cs, + size_t nbytes) +__releases(fc->lock) +{ + int err; + struct fuse_forget_link *forget = dequeue_forget(fc); + struct fuse_forget_in arg = { + .nlookup = forget->nlookup, + }; + struct fuse_in_header ih = { + .opcode = FUSE_FORGET, + .nodeid = forget->nodeid, + .unique = fuse_get_unique(fc), + .len = sizeof(ih) + sizeof(arg), + }; + + spin_unlock(&fc->lock); + kfree(forget); + if (nbytes < ih.len) + return -EINVAL; + + err = fuse_copy_one(cs, &ih, sizeof(ih)); + if (!err) + err = fuse_copy_one(cs, &arg, sizeof(arg)); + fuse_copy_finish(cs); + + if (err) + return err; + + return ih.len; +} + /* * Read a single request into the userspace filesystem's buffer. This * function waits until a request is available, then removes it from @@ -998,6 +1056,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, return fuse_read_interrupt(fc, cs, nbytes, req); } + if (forget_pending(fc)) { + if (list_empty(&fc->pending) || fc->forget_batch-- > 0) + return fuse_read_single_forget(fc, cs, nbytes); + + if (fc->forget_batch <= -8) + fc->forget_batch = 16; + } + req = list_entry(fc->pending.next, struct fuse_req, list); req->state = FUSE_REQ_READING; list_move(&req->list, &fc->io); @@ -1090,7 +1156,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (!fc) return -EPERM; - bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); + bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) return -ENOMEM; @@ -1626,7 +1692,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (!fc) return -EPERM; - bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); + bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) return -ENOMEM; @@ -1770,6 +1836,8 @@ __acquires(fc->lock) flush_bg_queue(fc); end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); + while (forget_pending(fc)) + kfree(dequeue_forget(fc)); } /* diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9627c95482d..6ea42e98cb17 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -10,9 +10,9 @@ #include #include -#include #include #include +#include #if BITS_PER_LONG >= 64 static inline void fuse_dentry_settime(struct dentry *entry, u64 time) @@ -165,7 +165,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) struct fuse_entry_out outarg; struct fuse_conn *fc; struct fuse_req *req; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; struct dentry *parent; u64 attr_version; @@ -178,8 +178,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (IS_ERR(req)) return 0; - forget_req = fuse_get_req(fc); - if (IS_ERR(forget_req)) { + forget = fuse_alloc_forget(); + if (!forget) { fuse_put_request(fc, req); return 0; } @@ -199,15 +199,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (!err) { struct fuse_inode *fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { - fuse_send_forget(fc, forget_req, - outarg.nodeid, 1); + fuse_queue_forget(fc, forget, outarg.nodeid, 1); return 0; } spin_lock(&fc->lock); fi->nlookup++; spin_unlock(&fc->lock); } - fuse_put_request(fc, forget_req); + kfree(forget); if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) return 0; @@ -259,7 +258,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, { struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_req *req; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; u64 attr_version; int err; @@ -273,9 +272,9 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, if (IS_ERR(req)) goto out; - forget_req = fuse_get_req(fc); - err = PTR_ERR(forget_req); - if (IS_ERR(forget_req)) { + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) { fuse_put_request(fc, req); goto out; } @@ -301,13 +300,13 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, attr_version); err = -ENOMEM; if (!*inode) { - fuse_send_forget(fc, forget_req, outarg->nodeid, 1); + fuse_queue_forget(fc, forget, outarg->nodeid, 1); goto out; } err = 0; out_put_forget: - fuse_put_request(fc, forget_req); + kfree(forget); out: return err; } @@ -374,7 +373,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct inode *inode; struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; @@ -388,9 +387,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (flags & O_DIRECT) return -EINVAL; - forget_req = fuse_get_req(fc); - if (IS_ERR(forget_req)) - return PTR_ERR(forget_req); + forget = fuse_alloc_forget(); + if (!forget) + return -ENOMEM; req = fuse_get_req(fc); err = PTR_ERR(req); @@ -448,10 +447,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(ff, flags); - fuse_send_forget(fc, forget_req, outentry.nodeid, 1); + fuse_queue_forget(fc, forget, outentry.nodeid, 1); return -ENOMEM; } - fuse_put_request(fc, forget_req); + kfree(forget); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); @@ -469,7 +468,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, out_put_request: fuse_put_request(fc, req); out_put_forget_req: - fuse_put_request(fc, forget_req); + kfree(forget); return err; } @@ -483,12 +482,12 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, struct fuse_entry_out outarg; struct inode *inode; int err; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; - forget_req = fuse_get_req(fc); - if (IS_ERR(forget_req)) { + forget = fuse_alloc_forget(); + if (!forget) { fuse_put_request(fc, req); - return PTR_ERR(forget_req); + return -ENOMEM; } memset(&outarg, 0, sizeof(outarg)); @@ -515,10 +514,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr, entry_attr_timeout(&outarg), 0); if (!inode) { - fuse_send_forget(fc, forget_req, outarg.nodeid, 1); + fuse_queue_forget(fc, forget, outarg.nodeid, 1); return -ENOMEM; } - fuse_put_request(fc, forget_req); + kfree(forget); if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -541,7 +540,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, return 0; out_put_forget_req: - fuse_put_request(fc, forget_req); + kfree(forget); return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 57d4a3a0f102..33369c63a522 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -53,6 +53,13 @@ extern struct mutex fuse_mutex; extern unsigned max_user_bgreq; extern unsigned max_user_congthresh; +/* One forget request */ +struct fuse_forget_link { + u64 nodeid; + u64 nlookup; + struct fuse_forget_link *next; +}; + /** FUSE inode */ struct fuse_inode { /** Inode data */ @@ -66,7 +73,7 @@ struct fuse_inode { u64 nlookup; /** The request used for sending the FORGET message */ - struct fuse_req *forget_req; + struct fuse_forget_link *forget; /** Time in jiffies until the file attributes are valid */ u64 i_time; @@ -255,7 +262,6 @@ struct fuse_req { /** Data for asynchronous requests */ union { - struct fuse_forget_in forget_in; struct { struct fuse_release_in in; struct path path; @@ -369,6 +375,13 @@ struct fuse_conn { /** Pending interrupts */ struct list_head interrupts; + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + /** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests */ @@ -543,8 +556,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, /** * Send FORGET command */ -void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - u64 nodeid, u64 nlookup); +void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup); + +struct fuse_forget_link *fuse_alloc_forget(void); /** * Initialize READ or READDIR request @@ -655,11 +670,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); */ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); -/** - * Send a request with no reply - */ -void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); - /** * Send a request in the background */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cfce3ad86a92..7ba4d351da65 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -71,6 +71,11 @@ struct fuse_mount_data { unsigned blksize; }; +struct fuse_forget_link *fuse_alloc_forget() +{ + return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); +} + static struct inode *fuse_alloc_inode(struct super_block *sb) { struct inode *inode; @@ -90,8 +95,8 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); init_waitqueue_head(&fi->page_waitq); - fi->forget_req = fuse_request_alloc(); - if (!fi->forget_req) { + fi->forget = fuse_alloc_forget(); + if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); return NULL; } @@ -104,24 +109,10 @@ static void fuse_destroy_inode(struct inode *inode) struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); BUG_ON(!list_empty(&fi->queued_writes)); - if (fi->forget_req) - fuse_request_free(fi->forget_req); + kfree(fi->forget); kmem_cache_free(fuse_inode_cachep, inode); } -void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - u64 nodeid, u64 nlookup) -{ - struct fuse_forget_in *inarg = &req->misc.forget_in; - inarg->nlookup = nlookup; - req->in.h.opcode = FUSE_FORGET; - req->in.h.nodeid = nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(struct fuse_forget_in); - req->in.args[0].value = inarg; - fuse_request_send_noreply(fc, req); -} - static void fuse_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); @@ -129,8 +120,8 @@ static void fuse_evict_inode(struct inode *inode) if (inode->i_sb->s_flags & MS_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup); - fi->forget_req = NULL; + fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); + fi->forget = NULL; } } @@ -534,6 +525,7 @@ void fuse_conn_init(struct fuse_conn *fc) INIT_LIST_HEAD(&fc->interrupts); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); + fc->forget_list_tail = &fc->forget_list_head; atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; From 02c048b919455aaa38628563cdcc2e691c8a9f53 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 7 Dec 2010 20:16:56 +0100 Subject: [PATCH 3/4] fuse: allow batching of FORGET requests Terje Malmedal reports that a fuse filesystem with 32 million inodes on a machine with lots of memory can take up to 30 minutes to process FORGET requests when all those inodes are evicted from the icache. To solve this, create a BATCH_FORGET request that allows up to about 8000 FORGET requests to be sent in a single message. This request is only sent if userspace supports interface version 7.16 or later, otherwise fall back to sending individual FORGET messages. Reported-by: Terje Malmedal Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 92 ++++++++++++++++++++++++++++++++++++++------ fs/fuse/fuse_i.h | 3 +- include/linux/fuse.h | 16 +++++++- 3 files changed, 97 insertions(+), 14 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fed65303eeeb..cf8d28d1fbad 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -254,8 +254,8 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, u64 nodeid, u64 nlookup) { - forget->nodeid = nodeid; - forget->nlookup = nlookup; + forget->forget_one.nodeid = nodeid; + forget->forget_one.nlookup = nlookup; spin_lock(&fc->lock); fc->forget_list_tail->next = forget; @@ -974,15 +974,26 @@ __releases(fc->lock) return err ? err : reqsize; } -static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc) +static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, + unsigned max, + unsigned *countp) { - struct fuse_forget_link *forget = fc->forget_list_head.next; + struct fuse_forget_link *head = fc->forget_list_head.next; + struct fuse_forget_link **newhead = &head; + unsigned count; - fc->forget_list_head.next = forget->next; + for (count = 0; *newhead != NULL && count < max; count++) + newhead = &(*newhead)->next; + + fc->forget_list_head.next = *newhead; + *newhead = NULL; if (fc->forget_list_head.next == NULL) fc->forget_list_tail = &fc->forget_list_head; - return forget; + if (countp != NULL) + *countp = count; + + return head; } static int fuse_read_single_forget(struct fuse_conn *fc, @@ -991,13 +1002,13 @@ static int fuse_read_single_forget(struct fuse_conn *fc, __releases(fc->lock) { int err; - struct fuse_forget_link *forget = dequeue_forget(fc); + struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); struct fuse_forget_in arg = { - .nlookup = forget->nlookup, + .nlookup = forget->forget_one.nlookup, }; struct fuse_in_header ih = { .opcode = FUSE_FORGET, - .nodeid = forget->nodeid, + .nodeid = forget->forget_one.nodeid, .unique = fuse_get_unique(fc), .len = sizeof(ih) + sizeof(arg), }; @@ -1018,6 +1029,65 @@ __releases(fc->lock) return ih.len; } +static int fuse_read_batch_forget(struct fuse_conn *fc, + struct fuse_copy_state *cs, size_t nbytes) +__releases(fc->lock) +{ + int err; + unsigned max_forgets; + unsigned count; + struct fuse_forget_link *head; + struct fuse_batch_forget_in arg = { .count = 0 }; + struct fuse_in_header ih = { + .opcode = FUSE_BATCH_FORGET, + .unique = fuse_get_unique(fc), + .len = sizeof(ih) + sizeof(arg), + }; + + if (nbytes < ih.len) { + spin_unlock(&fc->lock); + return -EINVAL; + } + + max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); + head = dequeue_forget(fc, max_forgets, &count); + spin_unlock(&fc->lock); + + arg.count = count; + ih.len += count * sizeof(struct fuse_forget_one); + err = fuse_copy_one(cs, &ih, sizeof(ih)); + if (!err) + err = fuse_copy_one(cs, &arg, sizeof(arg)); + + while (head) { + struct fuse_forget_link *forget = head; + + if (!err) { + err = fuse_copy_one(cs, &forget->forget_one, + sizeof(forget->forget_one)); + } + head = forget->next; + kfree(forget); + } + + fuse_copy_finish(cs); + + if (err) + return err; + + return ih.len; +} + +static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, + size_t nbytes) +__releases(fc->lock) +{ + if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) + return fuse_read_single_forget(fc, cs, nbytes); + else + return fuse_read_batch_forget(fc, cs, nbytes); +} + /* * Read a single request into the userspace filesystem's buffer. This * function waits until a request is available, then removes it from @@ -1058,7 +1128,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, if (forget_pending(fc)) { if (list_empty(&fc->pending) || fc->forget_batch-- > 0) - return fuse_read_single_forget(fc, cs, nbytes); + return fuse_read_forget(fc, cs, nbytes); if (fc->forget_batch <= -8) fc->forget_batch = 16; @@ -1837,7 +1907,7 @@ __acquires(fc->lock) end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); while (forget_pending(fc)) - kfree(dequeue_forget(fc)); + kfree(dequeue_forget(fc, 1, NULL)); } /* diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 33369c63a522..ae5744a2f9e9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -55,8 +55,7 @@ extern unsigned max_user_congthresh; /* One forget request */ struct fuse_forget_link { - u64 nodeid; - u64 nlookup; + struct fuse_forget_one forget_one; struct fuse_forget_link *next; }; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index c3c578e09833..cf11881f4938 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -41,6 +41,9 @@ * 7.15 * - add store notify * - add retrieve notify + * + * 7.16 + * - add BATCH_FORGET request */ #ifndef _LINUX_FUSE_H @@ -72,7 +75,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 15 +#define FUSE_KERNEL_MINOR_VERSION 16 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -256,6 +259,7 @@ enum fuse_opcode { FUSE_IOCTL = 39, FUSE_POLL = 40, FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -290,6 +294,16 @@ struct fuse_forget_in { __u64 nlookup; }; +struct fuse_forget_one { + __u64 nodeid; + __u64 nlookup; +}; + +struct fuse_batch_forget_in { + __u32 count; + __u32 dummy; +}; + struct fuse_getattr_in { __u32 getattr_flags; __u32 dummy; From 1baa26b2be92fe9917e2f7ef46d423b5dfa4da71 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 7 Dec 2010 20:16:56 +0100 Subject: [PATCH 4/4] fuse: fix ioctl ABI In kernel ABI version 7.16 and later FUSE_IOCTL_RETRY reply from a unrestricted IOCTL request shall return with an array of 'struct fuse_ioctl_iovec' instead of 'struct iovec'. This fixes the ABI ambiguity of 32bit vs. 64bit. Reported-by: "ccmail111" Signed-off-by: Miklos Szeredi CC: Tejun Heo --- fs/fuse/file.c | 53 +++++++++++++++++++++++++++++++++++++++----- include/linux/fuse.h | 10 +++++++++ 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ca3b6bbb3790..95da1bc1c826 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1634,9 +1634,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, * and 64bit. Fortunately we can determine which structure the server * used from the size of the reply. */ -static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src, - size_t transferred, unsigned count, - bool is_compat) +static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, + size_t transferred, unsigned count, + bool is_compat) { #ifdef CONFIG_COMPAT if (count * sizeof(struct compat_iovec) == transferred) { @@ -1680,6 +1680,42 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) return 0; } +static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, + void *src, size_t transferred, unsigned count, + bool is_compat) +{ + unsigned i; + struct fuse_ioctl_iovec *fiov = src; + + if (fc->minor < 16) { + return fuse_copy_ioctl_iovec_old(dst, src, transferred, + count, is_compat); + } + + if (count * sizeof(struct fuse_ioctl_iovec) != transferred) + return -EIO; + + for (i = 0; i < count; i++) { + /* Did the server supply an inappropriate value? */ + if (fiov[i].base != (unsigned long) fiov[i].base || + fiov[i].len != (unsigned long) fiov[i].len) + return -EIO; + + dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; + dst[i].iov_len = (size_t) fiov[i].len; + +#ifdef CONFIG_COMPAT + if (is_compat && + (ptr_to_compat(dst[i].iov_base) != fiov[i].base || + (compat_size_t) dst[i].iov_len != fiov[i].len)) + return -EIO; +#endif + } + + return 0; +} + + /* * For ioctls, there is no generic way to determine how much memory * needs to be read and/or written. Furthermore, ioctls are allowed @@ -1746,8 +1782,15 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, size_t in_size, out_size, transferred; int err; +#if BITS_PER_LONG == 32 + inarg.flags |= FUSE_IOCTL_32BIT; +#else + if (flags & FUSE_IOCTL_COMPAT) + inarg.flags |= FUSE_IOCTL_32BIT; +#endif + /* assume all the iovs returned by client always fits in a page */ - BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); err = -ENOMEM; pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); @@ -1862,7 +1905,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, goto out; vaddr = kmap_atomic(pages[0], KM_USER0); - err = fuse_copy_ioctl_iovec(iov_page, vaddr, + err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); kunmap_atomic(vaddr, KM_USER0); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index cf11881f4938..d464de53db43 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -44,6 +44,9 @@ * * 7.16 * - add BATCH_FORGET request + * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct + * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' + * - add FUSE_IOCTL_32BIT flag */ #ifndef _LINUX_FUSE_H @@ -203,12 +206,14 @@ struct fuse_file_lock { * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed * FUSE_IOCTL_RETRY: retry with new iovecs + * FUSE_IOCTL_32BIT: 32bit ioctl * * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs */ #define FUSE_IOCTL_COMPAT (1 << 0) #define FUSE_IOCTL_UNRESTRICTED (1 << 1) #define FUSE_IOCTL_RETRY (1 << 2) +#define FUSE_IOCTL_32BIT (1 << 3) #define FUSE_IOCTL_MAX_IOV 256 @@ -524,6 +529,11 @@ struct fuse_ioctl_in { __u32 out_size; }; +struct fuse_ioctl_iovec { + __u64 base; + __u64 len; +}; + struct fuse_ioctl_out { __s32 result; __u32 flags;