From 2067b2b3f4846402a040286135f98f46f8919939 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 1/6] afs: Fix the CB.ProbeUuid service handler to reply correctly Fix the service handler function for the CB.ProbeUuid RPC call so that it replies in the correct manner - that is an empty reply for success and an abort of 1 for failure. Putting 0 or 1 in an integer in the body of the reply should result in the fileserver throwing an RX_PROTOCOL_ERROR abort and discarding its record of the client; older servers, however, don't necessarily check that all the data got consumed, and so might incorrectly think that they got a positive response and associate the client with the wrong host record. If the client is incorrectly associated, this will result in callbacks intended for a different client being delivered to this one and then, when the other client connects and responds positively, all of the callback promises meant for the client that issued the improper response will be lost and it won't receive any further change notifications. Fixes: 9396d496d745 ("afs: support the CB.ProbeUuid RPC op") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman --- fs/afs/cmservice.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 4f1b6f466ff5..b86195e4dc6c 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -505,18 +505,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) struct afs_call *call = container_of(work, struct afs_call, work); struct afs_uuid *r = call->request; - struct { - __be32 match; - } reply; - _enter(""); if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) - reply.match = htonl(0); + afs_send_empty_reply(call); else - reply.match = htonl(1); + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + 1, 1, "K-1"); - afs_send_simple_reply(call, &reply, sizeof(reply)); afs_put_call(call); _leave(""); } From 4a46fdba449a5cd890271df5a9e23927d519ed00 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 2/6] afs: Fix loop index mixup in afs_deliver_vl_get_entry_by_name_u() afs_deliver_vl_get_entry_by_name_u() scans through the vl entry received from the volume location server and builds a return list containing the sites that are currently valid. When assigning values for the return list, the index into the vl entry (i) is used rather than the one for the new list (entry->nr_server). If all sites are usable, this works out fine as the indices will match. If some sites are not valid, for example if AFS_VLSF_DONTUSE is set, fs_mask and the uuid will be set for the wrong return site. Fix this by using entry->nr_server as the index into the arrays being filled in rather than i. This can lead to EDESTADDRREQ errors if none of the returned sites have a valid fs_mask. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Jeffrey Altman --- fs/afs/vlclient.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index d7e0fd3c00df..cfb0ac4bd039 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -56,23 +56,24 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) struct afs_uuid__xdr *xdr; struct afs_uuid *uuid; int j; + int n = entry->nr_servers; tmp = ntohl(uvldb->serverFlags[i]); if (tmp & AFS_VLSF_DONTUSE || (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) continue; if (tmp & AFS_VLSF_RWVOL) { - entry->fs_mask[i] |= AFS_VOL_VTM_RW; + entry->fs_mask[n] |= AFS_VOL_VTM_RW; if (vlflags & AFS_VLF_BACKEXISTS) - entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + entry->fs_mask[n] |= AFS_VOL_VTM_BAK; } if (tmp & AFS_VLSF_ROVOL) - entry->fs_mask[i] |= AFS_VOL_VTM_RO; - if (!entry->fs_mask[i]) + entry->fs_mask[n] |= AFS_VOL_VTM_RO; + if (!entry->fs_mask[n]) continue; xdr = &uvldb->serverNumber[i]; - uuid = (struct afs_uuid *)&entry->fs_server[i]; + uuid = (struct afs_uuid *)&entry->fs_server[n]; uuid->time_low = xdr->time_low; uuid->time_mid = htons(ntohl(xdr->time_mid)); uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version)); From a6eed4ab5dd4bfb696c1a3f49742b8d1846a66a0 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 3/6] fs: afs: Fix a possible null-pointer dereference in afs_put_read() In afs_read_dir(), there is an if statement on line 255 to check whether req->pages is NULL: if (!req->pages) goto error; If req->pages is NULL, afs_put_read() on line 337 is executed. In afs_put_read(), req->pages[i] is used on line 195. Thus, a possible null-pointer dereference may occur in this case. To fix this possible bug, an if statement is added in afs_put_read() to check req->pages. This bug is found by a static analysis tool STCheck written by us. Fixes: f3ddee8dc4e2 ("afs: Fix directory handling") Signed-off-by: Jia-Ju Bai Signed-off-by: David Howells --- fs/afs/file.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index 56b69576274d..dd3c55c9101c 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -191,11 +191,13 @@ void afs_put_read(struct afs_read *req) int i; if (refcount_dec_and_test(&req->usage)) { - for (i = 0; i < req->nr_pages; i++) - if (req->pages[i]) - put_page(req->pages[i]); - if (req->pages != req->array) - kfree(req->pages); + if (req->pages) { + for (i = 0; i < req->nr_pages; i++) + if (req->pages[i]) + put_page(req->pages[i]); + if (req->pages != req->array) + kfree(req->pages); + } kfree(req); } } From 37c0bbb3326674940e657118306ac52364314523 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 4/6] afs: Fix off-by-one in afs_rename() expected data version calculation When afs_rename() calculates the expected data version of the target directory in a cross-directory rename, it doesn't increment it as it should, so it always thinks that the target inode is unexpectedly modified on the server. Fixes: a58823ac4589 ("afs: Fix application of status and callback to be under same lock") Signed-off-by: David Howells --- fs/afs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e640d67274be..20aa18b38a49 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1804,7 +1804,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, afs_end_vnode_operation(&fc); goto error_rehash; } - new_data_version = new_dvnode->status.data_version; + new_data_version = new_dvnode->status.data_version + 1; } else { new_data_version = orig_data_version; new_scb = &scb[0]; From 5dc84855b0fc7e1db182b55c5564fd539d6eff92 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 5/6] afs: Only update d_fsdata if different in afs_d_revalidate() In the in-kernel afs filesystem, d_fsdata is set with the data version of the parent directory. afs_d_revalidate() will update this to the current directory version, but it shouldn't do this if it the value it read from d_fsdata is the same as no lock is held and cmpxchg() is not used. Fix the code to only change the value if it is different from the current directory version. Fixes: 260a980317da ("[AFS]: Add "directory write" support.") Signed-off-by: David Howells --- fs/afs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 20aa18b38a49..618e26cea887 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1017,7 +1017,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) dir_version = (long)dir->status.data_version; de_version = (long)dentry->d_fsdata; if (de_version == dir_version) - goto out_valid; + goto out_valid_noupdate; dir_version = (long)dir->invalid_before; if (de_version - dir_version >= 0) @@ -1081,6 +1081,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) out_valid: dentry->d_fsdata = (void *)dir_version; +out_valid_noupdate: dput(parent); key_put(key); _leave(" = 1 [valid]"); From 9dd0b82ef530cdfe805c9f7079c99e104be59a14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:52 +0100 Subject: [PATCH 6/6] afs: Fix missing dentry data version updating In the in-kernel afs filesystem, the d_fsdata dentry field is used to hold the data version of the parent directory when it was created or when d_revalidate() last caused it to be updated. This is compared to the ->invalid_before field in the directory inode, rather than the actual data version number, thereby allowing changes due to local edits to be ignored. Only if the server data version gets bumped unexpectedly (eg. by a competing client), do we need to revalidate stuff. However, the d_fsdata field should also be updated if an rpc op is performed that modifies that particular dentry. Such ops return the revised data version of the directory(ies) involved, so we should use that. This is particularly problematic for rename, since a dentry from one directory may be moved directly into another directory (ie. mv a/x b/x). It would then be sporting the wrong data version - and if this is in the future, for the destination directory, revalidations would be missed, leading to foreign renames and hard-link deletion being missed. Fix this by the following means: (1) Return the data version number from operations that read the directory contents - if they issue the read. This starts in afs_dir_iterate() and is used, ignored or passed back by its callers. (2) In afs_lookup*(), set the dentry version to the version returned by (1) before d_splice_alias() is called and the dentry published. (3) In afs_d_revalidate(), set the dentry version to that returned from (1) if an rpc call was issued. This means that if a parallel procedure, such as mkdir(), modifies the directory, we won't accidentally use the data version from that. (4) In afs_{mkdir,create,link,symlink}(), set the new dentry's version to the directory data version before d_instantiate() is called. (5) In afs_{rmdir,unlink}, update the target dentry's version to the directory data version as soon as we've updated the directory inode. (6) In afs_rename(), we need to unhash the old dentry before we start so that we don't get afs_d_revalidate() reverting the version change in cross-directory renames. We then need to set both the old and the new dentry versions the data version of the new directory before we call d_move() as d_move() will rehash them. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: David Howells --- fs/afs/dir.c | 84 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 618e26cea887..81207dc3c997 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -440,7 +440,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, * iterate through the data blob that lists the contents of an AFS directory */ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, - struct key *key) + struct key *key, afs_dataversion_t *_dir_version) { struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_xdr_dir_page *dbuf; @@ -460,6 +460,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, req = afs_read_dir(dvnode, key); if (IS_ERR(req)) return PTR_ERR(req); + *_dir_version = req->data_version; /* round the file position up to the next entry boundary */ ctx->pos += sizeof(union afs_xdr_dirent) - 1; @@ -514,7 +515,10 @@ out: */ static int afs_readdir(struct file *file, struct dir_context *ctx) { - return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file)); + afs_dataversion_t dir_version; + + return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file), + &dir_version); } /* @@ -555,7 +559,8 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, * - just returns the FID the dentry name maps to if found */ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, - struct afs_fid *fid, struct key *key) + struct afs_fid *fid, struct key *key, + afs_dataversion_t *_dir_version) { struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_lookup_one_cookie cookie = { @@ -568,7 +573,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); /* search the directory */ - ret = afs_dir_iterate(dir, &cookie.ctx, key); + ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version); if (ret < 0) { _leave(" = %d [iter]", ret); return ret; @@ -642,6 +647,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, struct afs_server *server; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct inode *inode = NULL, *ti; + afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version); int ret, i; _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); @@ -669,12 +675,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, cookie->fids[i].vid = as->volume->vid; /* search the directory */ - ret = afs_dir_iterate(dir, &cookie->ctx, key); + ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version); if (ret < 0) { inode = ERR_PTR(ret); goto out; } + dentry->d_fsdata = (void *)(unsigned long)data_version; + inode = ERR_PTR(-ENOENT); if (!cookie->found) goto out; @@ -968,7 +976,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent; struct inode *inode; struct key *key; - long dir_version, de_version; + afs_dataversion_t dir_version; + long de_version; int ret; if (flags & LOOKUP_RCU) @@ -1014,20 +1023,20 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) * on a 32-bit system, we only have 32 bits in the dentry to store the * version. */ - dir_version = (long)dir->status.data_version; + dir_version = dir->status.data_version; de_version = (long)dentry->d_fsdata; - if (de_version == dir_version) + if (de_version == (long)dir_version) goto out_valid_noupdate; - dir_version = (long)dir->invalid_before; - if (de_version - dir_version >= 0) + dir_version = dir->invalid_before; + if (de_version - (long)dir_version >= 0) goto out_valid; _debug("dir modified"); afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key); + ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version); switch (ret) { case 0: /* the filename maps to something */ @@ -1080,7 +1089,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) } out_valid: - dentry->d_fsdata = (void *)dir_version; + dentry->d_fsdata = (void *)(unsigned long)dir_version; out_valid_noupdate: dput(parent); key_put(key); @@ -1186,6 +1195,20 @@ static void afs_prep_for_new_inode(struct afs_fs_cursor *fc, iget_data->cb_s_break = fc->cbi->server->cb_s_break; } +/* + * Note that a dentry got changed. We need to set d_fsdata to the data version + * number derived from the result of the operation. It doesn't matter if + * d_fsdata goes backwards as we'll just revalidate. + */ +static void afs_update_dentry_version(struct afs_fs_cursor *fc, + struct dentry *dentry, + struct afs_status_cb *scb) +{ + if (fc->ac.error == 0) + dentry->d_fsdata = + (void *)(unsigned long)scb->status.data_version; +} + /* * create a directory on an AFS filesystem */ @@ -1228,6 +1251,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) afs_check_for_remote_deletion(&fc, dvnode); afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, &scb[0]); + afs_update_dentry_version(&fc, dentry, &scb[0]); afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) @@ -1320,6 +1344,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, scb); + afs_update_dentry_version(&fc, dentry, scb); ret = afs_end_vnode_operation(&fc); if (ret == 0) { afs_dir_remove_subdir(dentry); @@ -1459,6 +1484,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) &data_version, &scb[0]); afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, &data_version_2, &scb[1]); + afs_update_dentry_version(&fc, dentry, &scb[0]); ret = afs_end_vnode_operation(&fc); if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) ret = afs_dir_remove_link(dvnode, dentry, key); @@ -1527,6 +1553,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, afs_check_for_remote_deletion(&fc, dvnode); afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, &scb[0]); + afs_update_dentry_version(&fc, dentry, &scb[0]); afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) @@ -1608,6 +1635,7 @@ static int afs_link(struct dentry *from, struct inode *dir, afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, NULL, &scb[1]); ihold(&vnode->vfs_inode); + afs_update_dentry_version(&fc, dentry, &scb[0]); d_instantiate(dentry, &vnode->vfs_inode); mutex_unlock(&vnode->io_lock); @@ -1687,6 +1715,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, afs_check_for_remote_deletion(&fc, dvnode); afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, &scb[0]); + afs_update_dentry_version(&fc, dentry, &scb[0]); afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) @@ -1792,6 +1821,17 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } } + /* This bit is potentially nasty as there's a potential race with + * afs_d_revalidate{,_rcu}(). We have to change d_fsdata on the dentry + * to reflect it's new parent's new data_version after the op, but + * d_revalidate may see old_dentry between the op having taken place + * and the version being updated. + * + * So drop the old_dentry for now to make other threads go through + * lookup instead - which we hold a lock against. + */ + d_drop(old_dentry); + ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { afs_dataversion_t orig_data_version; @@ -1803,7 +1843,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, if (orig_dvnode != new_dvnode) { if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); - goto error_rehash; + goto error_rehash_old; } new_data_version = new_dvnode->status.data_version + 1; } else { @@ -1828,7 +1868,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } ret = afs_end_vnode_operation(&fc); if (ret < 0) - goto error_rehash; + goto error_rehash_old; } if (ret == 0) { @@ -1854,10 +1894,26 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(new_inode); spin_unlock(&new_inode->i_lock); } + + /* Now we can update d_fsdata on the dentries to reflect their + * new parent's data_version. + * + * Note that if we ever implement RENAME_EXCHANGE, we'll have + * to update both dentries with opposing dir versions. + */ + if (new_dvnode != orig_dvnode) { + afs_update_dentry_version(&fc, old_dentry, &scb[1]); + afs_update_dentry_version(&fc, new_dentry, &scb[1]); + } else { + afs_update_dentry_version(&fc, old_dentry, &scb[0]); + afs_update_dentry_version(&fc, new_dentry, &scb[0]); + } d_move(old_dentry, new_dentry); goto error_tmp; } +error_rehash_old: + d_rehash(new_dentry); error_rehash: if (rehash) d_rehash(rehash);