ceph: fix security xattr deadlock

When security is enabled, security module can call filesystem's
getxattr/setxattr callbacks during d_instantiate(). For cephfs,
d_instantiate() is usually called by MDS' dispatch thread, while
handling MDS reply. If the MDS reply does not include xattrs and
corresponding caps, getxattr/setxattr need to send a new request
to MDS and waits for the reply. This makes MDS' dispatch sleep,
nobody handles later MDS replies.

The fix is make sure lookup/atomic_open reply include xattrs and
corresponding caps. So getxattr can be handled by cached xattrs.
This requires some modification to both MDS and request message.
(Client tells MDS what caps it wants; MDS encodes proper caps in
the reply)

Smack security module may call setxattr during d_instantiate().
Unlike getxattr, we can't force MDS to issue CEPH_CAP_XATTR_EXCL
to us. So just make setxattr return error when called by MDS'
dispatch thread.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
This commit is contained in:
Yan, Zheng 2016-03-07 10:34:50 +08:00 committed by Ilya Dryomov
parent 29dccfa5af
commit 315f240880
8 changed files with 125 additions and 11 deletions

View File

@ -624,6 +624,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req; struct ceph_mds_request *req;
int op; int op;
int mask;
int err; int err;
dout("lookup %p dentry %p '%pd'\n", dout("lookup %p dentry %p '%pd'\n",
@ -666,8 +667,12 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
return ERR_CAST(req); return ERR_CAST(req);
req->r_dentry = dget(dentry); req->r_dentry = dget(dentry);
req->r_num_caps = 2; req->r_num_caps = 2;
/* we only need inode linkage */
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask);
req->r_locked_dir = dir; req->r_locked_dir = dir;
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err); err = ceph_handle_snapdir(req, dentry, err);

View File

@ -71,12 +71,18 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
inode = ceph_find_inode(sb, vino); inode = ceph_find_inode(sb, vino);
if (!inode) { if (!inode) {
struct ceph_mds_request *req; struct ceph_mds_request *req;
int mask;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
USE_ANY_MDS); USE_ANY_MDS);
if (IS_ERR(req)) if (IS_ERR(req))
return ERR_CAST(req); return ERR_CAST(req);
mask = CEPH_STAT_CAP_INODE;
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask);
req->r_ino1 = vino; req->r_ino1 = vino;
req->r_num_caps = 1; req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
@ -128,6 +134,7 @@ static struct dentry *__get_parent(struct super_block *sb,
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct inode *inode; struct inode *inode;
struct dentry *dentry; struct dentry *dentry;
int mask;
int err; int err;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT, req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
@ -144,6 +151,12 @@ static struct dentry *__get_parent(struct super_block *sb,
.snap = CEPH_NOSNAP, .snap = CEPH_NOSNAP,
}; };
} }
mask = CEPH_STAT_CAP_INODE;
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1; req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode; inode = req->r_target_inode;

View File

@ -300,6 +300,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct dentry *dn; struct dentry *dn;
struct ceph_acls_info acls = {}; struct ceph_acls_info acls = {};
int mask;
int err; int err;
dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@ -335,6 +336,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
acls.pagelist = NULL; acls.pagelist = NULL;
} }
} }
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.open.mask = cpu_to_le32(mask);
req->r_locked_dir = dir; /* caller holds dir->i_mutex */ req->r_locked_dir = dir; /* caller holds dir->i_mutex */
err = ceph_mdsc_do_request(mdsc, err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,

View File

@ -1389,7 +1389,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct qstr dname; struct qstr dname;
struct dentry *dn; struct dentry *dn;
struct inode *in; struct inode *in;
int err = 0, ret, i; int err = 0, skipped = 0, ret, i;
struct inode *snapdir = NULL; struct inode *snapdir = NULL;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
struct ceph_dentry_info *di; struct ceph_dentry_info *di;
@ -1501,7 +1501,17 @@ retry_lookup:
} }
if (d_really_is_negative(dn)) { if (d_really_is_negative(dn)) {
struct dentry *realdn = splice_dentry(dn, in); struct dentry *realdn;
if (ceph_security_xattr_deadlock(in)) {
dout(" skip splicing dn %p to inode %p"
" (security xattr deadlock)\n", dn, in);
iput(in);
skipped++;
goto next_item;
}
realdn = splice_dentry(dn, in);
if (IS_ERR(realdn)) { if (IS_ERR(realdn)) {
err = PTR_ERR(realdn); err = PTR_ERR(realdn);
d_drop(dn); d_drop(dn);
@ -1518,7 +1528,7 @@ retry_lookup:
req->r_session, req->r_session,
req->r_request_started); req->r_request_started);
if (err == 0 && cache_ctl.index >= 0) { if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
ret = fill_readdir_cache(d_inode(parent), dn, ret = fill_readdir_cache(d_inode(parent), dn,
&cache_ctl, req); &cache_ctl, req);
if (ret < 0) if (ret < 0)
@ -1529,7 +1539,7 @@ next_item:
dput(dn); dput(dn);
} }
out: out:
if (err == 0) { if (err == 0 && skipped == 0) {
req->r_did_prepopulate = true; req->r_did_prepopulate = true;
req->r_readdir_cache_idx = cache_ctl.index; req->r_readdir_cache_idx = cache_ctl.index;
} }

View File

@ -2540,6 +2540,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* insert trace into our cache */ /* insert trace into our cache */
mutex_lock(&req->r_fill_mutex); mutex_lock(&req->r_fill_mutex);
current->journal_info = req;
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) { if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
@ -2547,6 +2548,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
ceph_readdir_prepopulate(req, req->r_session); ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation); ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
} }
current->journal_info = NULL;
mutex_unlock(&req->r_fill_mutex); mutex_unlock(&req->r_fill_mutex);
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);

View File

@ -468,7 +468,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */ #define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */
#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */ #define CEPH_I_POOL_RD (1 << 5) /* can read from pool */
#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */ #define CEPH_I_POOL_WR (1 << 6) /* can write to pool */
#define CEPH_I_SEC_INITED (1 << 7) /* security initialized */
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
long long release_count, long long release_count,
@ -804,6 +804,20 @@ extern void __init ceph_xattr_init(void);
extern void ceph_xattr_exit(void); extern void ceph_xattr_exit(void);
extern const struct xattr_handler *ceph_xattr_handlers[]; extern const struct xattr_handler *ceph_xattr_handlers[];
#ifdef CONFIG_SECURITY
extern bool ceph_security_xattr_deadlock(struct inode *in);
extern bool ceph_security_xattr_wanted(struct inode *in);
#else
static inline bool ceph_security_xattr_deadlock(struct inode *in)
{
return false;
}
static inline bool ceph_security_xattr_wanted(struct inode *in)
{
return false;
}
#endif
/* acl.c */ /* acl.c */
struct ceph_acls_info { struct ceph_acls_info {
void *default_acl; void *default_acl;

View File

@ -714,13 +714,31 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
} }
} }
static inline int __get_request_mask(struct inode *in) {
struct ceph_mds_request *req = current->journal_info;
int mask = 0;
if (req && req->r_target_inode == in) {
if (req->r_op == CEPH_MDS_OP_LOOKUP ||
req->r_op == CEPH_MDS_OP_LOOKUPINO ||
req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
req->r_op == CEPH_MDS_OP_GETATTR) {
mask = le32_to_cpu(req->r_args.getattr.mask);
} else if (req->r_op == CEPH_MDS_OP_OPEN ||
req->r_op == CEPH_MDS_OP_CREATE) {
mask = le32_to_cpu(req->r_args.open.mask);
}
}
return mask;
}
ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
size_t size) size_t size)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int err;
struct ceph_inode_xattr *xattr; struct ceph_inode_xattr *xattr;
struct ceph_vxattr *vxattr = NULL; struct ceph_vxattr *vxattr = NULL;
int req_mask;
int err;
if (!ceph_is_valid_xattr(name)) if (!ceph_is_valid_xattr(name))
return -ENODATA; return -ENODATA;
@ -734,13 +752,24 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
return err; return err;
} }
req_mask = __get_request_mask(inode);
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
dout("getxattr %p ver=%lld index_ver=%lld\n", inode, dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
ci->i_xattrs.version, ci->i_xattrs.index_version); ci->i_xattrs.version, ci->i_xattrs.index_version);
if (ci->i_xattrs.version == 0 || if (ci->i_xattrs.version == 0 ||
!__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { !((req_mask & CEPH_CAP_XATTR_SHARED) ||
__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
/* security module gets xattr while filling trace */
if (current->journal_info != NULL) {
pr_warn_ratelimited("sync getxattr %p "
"during filling trace\n", inode);
return -EBUSY;
}
/* get xattrs from mds (if we don't already have them) */ /* get xattrs from mds (if we don't already have them) */
err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
if (err) if (err)
@ -767,6 +796,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
memcpy(value, xattr->val, xattr->val_len); memcpy(value, xattr->val, xattr->val_len);
if (current->journal_info != NULL &&
!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
ci->i_ceph_flags |= CEPH_I_SEC_INITED;
out: out:
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
return err; return err;
@ -1017,7 +1049,15 @@ do_sync:
do_sync_unlocked: do_sync_unlocked:
if (lock_snap_rwsem) if (lock_snap_rwsem)
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);
err = ceph_sync_setxattr(dentry, name, value, size, flags);
/* security module set xattr while filling trace */
if (current->journal_info != NULL) {
pr_warn_ratelimited("sync setxattr %p "
"during filling trace\n", inode);
err = -EBUSY;
} else {
err = ceph_sync_setxattr(dentry, name, value, size, flags);
}
out: out:
ceph_free_cap_flush(prealloc_cf); ceph_free_cap_flush(prealloc_cf);
kfree(newname); kfree(newname);
@ -1166,3 +1206,25 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
return __ceph_removexattr(dentry, name); return __ceph_removexattr(dentry, name);
} }
#ifdef CONFIG_SECURITY
bool ceph_security_xattr_wanted(struct inode *in)
{
return in->i_security != NULL;
}
bool ceph_security_xattr_deadlock(struct inode *in)
{
struct ceph_inode_info *ci;
bool ret;
if (in->i_security == NULL)
return false;
ci = ceph_inode(in);
spin_lock(&ci->i_ceph_lock);
ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
!(ci->i_xattrs.version > 0 &&
__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
spin_unlock(&ci->i_ceph_lock);
return ret;
}
#endif

View File

@ -376,7 +376,8 @@ union ceph_mds_request_args {
__le32 stripe_count; /* ... */ __le32 stripe_count; /* ... */
__le32 object_size; __le32 object_size;
__le32 file_replication; __le32 file_replication;
__le32 unused; /* used to be preferred osd */ __le32 mask; /* CEPH_CAP_* */
__le32 old_size;
} __attribute__ ((packed)) open; } __attribute__ ((packed)) open;
struct { struct {
__le32 flags; __le32 flags;