posix_acl: Inode acl caching fixes

When get_acl() is called for an inode whose ACL is not cached yet, the
get_acl inode operation is called to fetch the ACL from the filesystem.
The inode operation is responsible for updating the cached acl with
set_cached_acl().  This is done without locking at the VFS level, so
another task can call set_cached_acl() or forget_cached_acl() before the
get_acl inode operation gets to calling set_cached_acl(), and then
get_acl's call to set_cached_acl() results in caching an outdate ACL.

Prevent this from happening by setting the cached ACL pointer to a
task-specific sentinel value before calling the get_acl inode operation.
Move the responsibility for updating the cached ACL from the get_acl
inode operations to get_acl().  There, only set the cached ACL if the
sentinel value hasn't changed.

The sentinel values are chosen to have odd values.  Likewise, the value
of ACL_NOT_CACHED is odd.  In contrast, ACL object pointers always have
an even value (ACLs are aligned in memory).  This allows to distinguish
uncached ACLs values from ACL objects.

In addition, switch from guarding inode->i_acl and inode->i_default_acl
upates by the inode->i_lock spinlock to using xchg() and cmpxchg().

Filesystems that do not want ACLs returned from their get_acl inode
operations to be cached must call forget_cached_acl() to prevent the VFS
from doing so.

(Patch written by Al Viro and Andreas Gruenbacher.)

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
Andreas Gruenbacher 2016-03-24 14:38:37 +01:00 committed by Al Viro
parent 8861964f4c
commit b8a7a3a667
17 changed files with 139 additions and 79 deletions

View File

@ -93,7 +93,7 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
* instantiating the inode (v9fs_inode_from_fid) * instantiating the inode (v9fs_inode_from_fid)
*/ */
acl = get_cached_acl(inode, type); acl = get_cached_acl(inode, type);
BUG_ON(acl == ACL_NOT_CACHED); BUG_ON(is_uncached_acl(acl));
return acl; return acl;
} }

View File

@ -63,9 +63,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
} }
kfree(value); kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl; return acl;
} }

View File

@ -37,6 +37,8 @@ static inline void ceph_set_cached_acl(struct inode *inode,
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)) if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
set_cached_acl(inode, type, acl); set_cached_acl(inode, type, acl);
else
forget_cached_acl(inode, type);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
} }

View File

@ -172,9 +172,6 @@ ext2_get_acl(struct inode *inode, int type)
acl = ERR_PTR(retval); acl = ERR_PTR(retval);
kfree(value); kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl; return acl;
} }

View File

@ -172,9 +172,6 @@ ext4_get_acl(struct inode *inode, int type)
acl = ERR_PTR(retval); acl = ERR_PTR(retval);
kfree(value); kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl; return acl;
} }

View File

@ -190,9 +190,6 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
acl = ERR_PTR(retval); acl = ERR_PTR(retval);
kfree(value); kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl; return acl;
} }

View File

@ -48,9 +48,6 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl; return acl;
} }

View File

@ -238,9 +238,9 @@ void __destroy_inode(struct inode *inode)
} }
#ifdef CONFIG_FS_POSIX_ACL #ifdef CONFIG_FS_POSIX_ACL
if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) if (inode->i_acl && !is_uncached_acl(inode->i_acl))
posix_acl_release(inode->i_acl); posix_acl_release(inode->i_acl);
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
posix_acl_release(inode->i_default_acl); posix_acl_release(inode->i_default_acl);
#endif #endif
this_cpu_dec(nr_inodes); this_cpu_dec(nr_inodes);

View File

@ -203,8 +203,6 @@ struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
acl = ERR_PTR(rc); acl = ERR_PTR(rc);
} }
kfree(value); kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl; return acl;
} }

View File

@ -63,8 +63,6 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
acl = posix_acl_from_xattr(&init_user_ns, value, size); acl = posix_acl_from_xattr(&init_user_ns, value, size);
} }
kfree(value); kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl; return acl;
} }

View File

@ -265,7 +265,7 @@ static int check_acl(struct inode *inode, int mask)
if (!acl) if (!acl)
return -EAGAIN; return -EAGAIN;
/* no ->get_acl() calls in RCU mode... */ /* no ->get_acl() calls in RCU mode... */
if (acl == ACL_NOT_CACHED) if (is_uncached_acl(acl))
return -ECHILD; return -ECHILD;
return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
} }

View File

@ -11,6 +11,38 @@
#define NFSDBG_FACILITY NFSDBG_PROC #define NFSDBG_FACILITY NFSDBG_PROC
/*
* nfs3_prepare_get_acl, nfs3_complete_get_acl, nfs3_abort_get_acl: Helpers for
* caching get_acl results in a race-free way. See fs/posix_acl.c:get_acl()
* for explanations.
*/
static void nfs3_prepare_get_acl(struct posix_acl **p)
{
struct posix_acl *sentinel = uncached_acl_sentinel(current);
if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) {
/* Not the first reader or sentinel already in place. */
}
}
static void nfs3_complete_get_acl(struct posix_acl **p, struct posix_acl *acl)
{
struct posix_acl *sentinel = uncached_acl_sentinel(current);
/* Only cache the ACL if our sentinel is still in place. */
posix_acl_dup(acl);
if (cmpxchg(p, sentinel, acl) != sentinel)
posix_acl_release(acl);
}
static void nfs3_abort_get_acl(struct posix_acl **p)
{
struct posix_acl *sentinel = uncached_acl_sentinel(current);
/* Remove our sentinel upon failure. */
cmpxchg(p, sentinel, ACL_NOT_CACHED);
}
struct posix_acl *nfs3_get_acl(struct inode *inode, int type) struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
{ {
struct nfs_server *server = NFS_SERVER(inode); struct nfs_server *server = NFS_SERVER(inode);
@ -55,6 +87,11 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
if (res.fattr == NULL) if (res.fattr == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
if (args.mask & NFS_ACL)
nfs3_prepare_get_acl(&inode->i_acl);
if (args.mask & NFS_DFACL)
nfs3_prepare_get_acl(&inode->i_default_acl);
status = rpc_call_sync(server->client_acl, &msg, 0); status = rpc_call_sync(server->client_acl, &msg, 0);
dprintk("NFS reply getacl: %d\n", status); dprintk("NFS reply getacl: %d\n", status);
@ -89,12 +126,12 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
} }
if (res.mask & NFS_ACL) if (res.mask & NFS_ACL)
set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access); nfs3_complete_get_acl(&inode->i_acl, res.acl_access);
else else
forget_cached_acl(inode, ACL_TYPE_ACCESS); forget_cached_acl(inode, ACL_TYPE_ACCESS);
if (res.mask & NFS_DFACL) if (res.mask & NFS_DFACL)
set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default); nfs3_complete_get_acl(&inode->i_default_acl, res.acl_default);
else else
forget_cached_acl(inode, ACL_TYPE_DEFAULT); forget_cached_acl(inode, ACL_TYPE_DEFAULT);
@ -108,6 +145,8 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
} }
getout: getout:
nfs3_abort_get_acl(&inode->i_acl);
nfs3_abort_get_acl(&inode->i_default_acl);
posix_acl_release(res.acl_access); posix_acl_release(res.acl_access);
posix_acl_release(res.acl_default); posix_acl_release(res.acl_default);
nfs_free_fattr(res.fattr); nfs_free_fattr(res.fattr);

View File

@ -54,6 +54,7 @@
#include "uptodate.h" #include "uptodate.h"
#include "quota.h" #include "quota.h"
#include "refcounttree.h" #include "refcounttree.h"
#include "acl.h"
#include "buffer_head_io.h" #include "buffer_head_io.h"
@ -3623,6 +3624,8 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
filemap_fdatawait(mapping); filemap_fdatawait(mapping);
} }
forget_all_cached_acls(inode);
out: out:
return UNBLOCK_CONTINUE; return UNBLOCK_CONTINUE;
} }

View File

@ -37,14 +37,18 @@ EXPORT_SYMBOL(acl_by_type);
struct posix_acl *get_cached_acl(struct inode *inode, int type) struct posix_acl *get_cached_acl(struct inode *inode, int type)
{ {
struct posix_acl **p = acl_by_type(inode, type); struct posix_acl **p = acl_by_type(inode, type);
struct posix_acl *acl = ACCESS_ONCE(*p); struct posix_acl *acl;
if (acl) {
spin_lock(&inode->i_lock); for (;;) {
acl = *p; rcu_read_lock();
if (acl != ACL_NOT_CACHED) acl = rcu_dereference(*p);
acl = posix_acl_dup(acl); if (!acl || is_uncached_acl(acl) ||
spin_unlock(&inode->i_lock); atomic_inc_not_zero(&acl->a_refcount))
break;
rcu_read_unlock();
cpu_relax();
} }
rcu_read_unlock();
return acl; return acl;
} }
EXPORT_SYMBOL(get_cached_acl); EXPORT_SYMBOL(get_cached_acl);
@ -59,58 +63,72 @@ void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
{ {
struct posix_acl **p = acl_by_type(inode, type); struct posix_acl **p = acl_by_type(inode, type);
struct posix_acl *old; struct posix_acl *old;
spin_lock(&inode->i_lock);
old = *p; old = xchg(p, posix_acl_dup(acl));
rcu_assign_pointer(*p, posix_acl_dup(acl)); if (!is_uncached_acl(old))
spin_unlock(&inode->i_lock);
if (old != ACL_NOT_CACHED)
posix_acl_release(old); posix_acl_release(old);
} }
EXPORT_SYMBOL(set_cached_acl); EXPORT_SYMBOL(set_cached_acl);
static void __forget_cached_acl(struct posix_acl **p)
{
struct posix_acl *old;
old = xchg(p, ACL_NOT_CACHED);
if (!is_uncached_acl(old))
posix_acl_release(old);
}
void forget_cached_acl(struct inode *inode, int type) void forget_cached_acl(struct inode *inode, int type)
{ {
struct posix_acl **p = acl_by_type(inode, type); __forget_cached_acl(acl_by_type(inode, type));
struct posix_acl *old;
spin_lock(&inode->i_lock);
old = *p;
*p = ACL_NOT_CACHED;
spin_unlock(&inode->i_lock);
if (old != ACL_NOT_CACHED)
posix_acl_release(old);
} }
EXPORT_SYMBOL(forget_cached_acl); EXPORT_SYMBOL(forget_cached_acl);
void forget_all_cached_acls(struct inode *inode) void forget_all_cached_acls(struct inode *inode)
{ {
struct posix_acl *old_access, *old_default; __forget_cached_acl(&inode->i_acl);
spin_lock(&inode->i_lock); __forget_cached_acl(&inode->i_default_acl);
old_access = inode->i_acl;
old_default = inode->i_default_acl;
inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
spin_unlock(&inode->i_lock);
if (old_access != ACL_NOT_CACHED)
posix_acl_release(old_access);
if (old_default != ACL_NOT_CACHED)
posix_acl_release(old_default);
} }
EXPORT_SYMBOL(forget_all_cached_acls); EXPORT_SYMBOL(forget_all_cached_acls);
struct posix_acl *get_acl(struct inode *inode, int type) struct posix_acl *get_acl(struct inode *inode, int type)
{ {
void *sentinel;
struct posix_acl **p;
struct posix_acl *acl; struct posix_acl *acl;
/*
* The sentinel is used to detect when another operation like
* set_cached_acl() or forget_cached_acl() races with get_acl().
* It is guaranteed that is_uncached_acl(sentinel) is true.
*/
acl = get_cached_acl(inode, type); acl = get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED) if (!is_uncached_acl(acl))
return acl; return acl;
if (!IS_POSIXACL(inode)) if (!IS_POSIXACL(inode))
return NULL; return NULL;
sentinel = uncached_acl_sentinel(current);
p = acl_by_type(inode, type);
/* /*
* A filesystem can force a ACL callback by just never filling the * If the ACL isn't being read yet, set our sentinel. Otherwise, the
* ACL cache. But normally you'd fill the cache either at inode * current value of the ACL will not be ACL_NOT_CACHED and so our own
* instantiation time, or on the first ->get_acl call. * sentinel will not be set; another task will update the cache. We
* could wait for that other task to complete its job, but it's easier
* to just call ->get_acl to fetch the ACL ourself. (This is going to
* be an unlikely race.)
*/
if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED)
/* fall through */ ;
/*
* Normally, the ACL returned by ->get_acl will be cached.
* A filesystem can prevent that by calling
* forget_cached_acl(inode, type) in ->get_acl.
* *
* If the filesystem doesn't have a get_acl() function at all, we'll * If the filesystem doesn't have a get_acl() function at all, we'll
* just create the negative cache entry. * just create the negative cache entry.
@ -119,7 +137,24 @@ struct posix_acl *get_acl(struct inode *inode, int type)
set_cached_acl(inode, type, NULL); set_cached_acl(inode, type, NULL);
return NULL; return NULL;
} }
return inode->i_op->get_acl(inode, type); acl = inode->i_op->get_acl(inode, type);
if (IS_ERR(acl)) {
/*
* Remove our sentinel so that we don't block future attempts
* to cache the ACL.
*/
cmpxchg(p, sentinel, ACL_NOT_CACHED);
return acl;
}
/*
* Cache the result, but only if our sentinel is still in place.
*/
posix_acl_dup(acl);
if (unlikely(cmpxchg(p, sentinel, acl) != sentinel))
posix_acl_release(acl);
return acl;
} }
EXPORT_SYMBOL(get_acl); EXPORT_SYMBOL(get_acl);

View File

@ -197,10 +197,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
size = reiserfs_xattr_get(inode, name, NULL, 0); size = reiserfs_xattr_get(inode, name, NULL, 0);
if (size < 0) { if (size < 0) {
if (size == -ENODATA || size == -ENOSYS) { if (size == -ENODATA || size == -ENOSYS)
set_cached_acl(inode, type, NULL);
return NULL; return NULL;
}
return ERR_PTR(size); return ERR_PTR(size);
} }
@ -220,8 +218,6 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
} else { } else {
acl = reiserfs_posix_acl_from_disk(value, retval); acl = reiserfs_posix_acl_from_disk(value, retval);
} }
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
kfree(value); kfree(value);
return acl; return acl;

View File

@ -158,22 +158,14 @@ xfs_get_acl(struct inode *inode, int type)
if (error) { if (error) {
/* /*
* If the attribute doesn't exist make sure we have a negative * If the attribute doesn't exist make sure we have a negative
* cache entry, for any other error assume it is transient and * cache entry, for any other error assume it is transient.
* leave the cache entry as ACL_NOT_CACHED.
*/ */
if (error == -ENOATTR) if (error != -ENOATTR)
goto out_update_cache;
acl = ERR_PTR(error); acl = ERR_PTR(error);
goto out; } else {
acl = xfs_acl_from_disk(xfs_acl, len,
XFS_ACL_MAX_ENTRIES(ip->i_mount));
} }
acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount));
if (IS_ERR(acl))
goto out;
out_update_cache:
set_cached_acl(inode, type, acl);
out:
kmem_free(xfs_acl); kmem_free(xfs_acl);
return acl; return acl;
} }

View File

@ -577,6 +577,18 @@ static inline void mapping_allow_writable(struct address_space *mapping)
struct posix_acl; struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1)) #define ACL_NOT_CACHED ((void *)(-1))
static inline struct posix_acl *
uncached_acl_sentinel(struct task_struct *task)
{
return (void *)task + 1;
}
static inline bool
is_uncached_acl(struct posix_acl *acl)
{
return (long)acl & 1;
}
#define IOP_FASTPERM 0x0001 #define IOP_FASTPERM 0x0001
#define IOP_LOOKUP 0x0002 #define IOP_LOOKUP 0x0002
#define IOP_NOFOLLOW 0x0004 #define IOP_NOFOLLOW 0x0004