Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil: "First, there is a critical fix for the new primary-affinity function that went into -rc1. The second batch of patches from Zheng fix a range of problems with directory fragmentation, readdir, and a few odds and ends for cephfs" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: reserve caps for file layout/lock MDS requests ceph: avoid releasing caps that are being used ceph: clear directory's completeness when creating file libceph: fix non-default values check in apply_primary_affinity() ceph: use fpos_cmp() to compare dentry positions ceph: check directory's completeness before emitting directory entry
This commit is contained in:
commit
5575eeb7b9
|
@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
|
|||
rel->seq = cpu_to_le32(cap->seq);
|
||||
rel->issue_seq = cpu_to_le32(cap->issue_seq),
|
||||
rel->mseq = cpu_to_le32(cap->mseq);
|
||||
rel->caps = cpu_to_le32(cap->issued);
|
||||
rel->caps = cpu_to_le32(cap->implemented);
|
||||
rel->wanted = cpu_to_le32(cap->mds_wanted);
|
||||
rel->dname_len = 0;
|
||||
rel->dname_seq = 0;
|
||||
|
|
|
@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
|
|||
|
||||
/* start at beginning? */
|
||||
if (ctx->pos == 2 || last == NULL ||
|
||||
ctx->pos < ceph_dentry(last)->offset) {
|
||||
fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
|
||||
if (list_empty(&parent->d_subdirs))
|
||||
goto out_unlock;
|
||||
p = parent->d_subdirs.prev;
|
||||
|
@ -182,9 +182,16 @@ more:
|
|||
spin_unlock(&dentry->d_lock);
|
||||
spin_unlock(&parent->d_lock);
|
||||
|
||||
/* make sure a dentry wasn't dropped while we didn't have parent lock */
|
||||
if (!ceph_dir_is_complete(dir)) {
|
||||
dout(" lost dir complete on %p; falling back to mds\n", dir);
|
||||
dput(dentry);
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
|
||||
dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
|
||||
ctx->pos = di->offset;
|
||||
if (!dir_emit(ctx, dentry->d_name.name,
|
||||
dentry->d_name.len,
|
||||
ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
|
||||
|
@ -198,19 +205,12 @@ more:
|
|||
return 0;
|
||||
}
|
||||
|
||||
ctx->pos = di->offset + 1;
|
||||
|
||||
if (last)
|
||||
dput(last);
|
||||
last = dentry;
|
||||
|
||||
ctx->pos++;
|
||||
|
||||
/* make sure a dentry wasn't dropped while we didn't have parent lock */
|
||||
if (!ceph_dir_is_complete(dir)) {
|
||||
dout(" lost dir complete on %p; falling back to mds\n", dir);
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&parent->d_lock);
|
||||
p = p->prev; /* advance to next dentry */
|
||||
goto more;
|
||||
|
@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
|
|||
err = __dcache_readdir(file, ctx, shared_gen);
|
||||
if (err != -EAGAIN)
|
||||
return err;
|
||||
frag = fpos_frag(ctx->pos);
|
||||
off = fpos_off(ctx->pos);
|
||||
} else {
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
}
|
||||
|
@ -446,7 +448,6 @@ more:
|
|||
if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
|
||||
dout(" marking %p complete\n", inode);
|
||||
__ceph_dir_set_complete(ci, fi->dir_release_count);
|
||||
ci->i_max_offset = ctx->pos;
|
||||
}
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
|
@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||
* to do it here.
|
||||
*/
|
||||
|
||||
/* d_move screws up d_subdirs order */
|
||||
ceph_dir_clear_complete(new_dir);
|
||||
|
||||
d_move(old_dentry, new_dentry);
|
||||
|
||||
/* ensure target dentry is invalidated, despite
|
||||
rehashing bug in vfs_rename_dir */
|
||||
ceph_invalidate_dentry_lease(new_dentry);
|
||||
|
||||
/* d_move screws up sibling dentries' offsets */
|
||||
ceph_dir_clear_complete(old_dir);
|
||||
ceph_dir_clear_complete(new_dir);
|
||||
|
||||
}
|
||||
ceph_mdsc_put_request(req);
|
||||
return err;
|
||||
|
|
|
@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode,
|
|||
!__ceph_dir_is_complete(ci)) {
|
||||
dout(" marking %p complete (empty)\n", inode);
|
||||
__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
|
||||
ci->i_max_offset = 2;
|
||||
}
|
||||
no_change:
|
||||
/* only update max_size on auth cap */
|
||||
|
@ -889,41 +888,6 @@ out_unlock:
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set dentry's directory position based on the current dir's max, and
|
||||
* order it in d_subdirs, so that dcache_readdir behaves.
|
||||
*
|
||||
* Always called under directory's i_mutex.
|
||||
*/
|
||||
static void ceph_set_dentry_offset(struct dentry *dn)
|
||||
{
|
||||
struct dentry *dir = dn->d_parent;
|
||||
struct inode *inode = dir->d_inode;
|
||||
struct ceph_inode_info *ci;
|
||||
struct ceph_dentry_info *di;
|
||||
|
||||
BUG_ON(!inode);
|
||||
|
||||
ci = ceph_inode(inode);
|
||||
di = ceph_dentry(dn);
|
||||
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (!__ceph_dir_is_complete(ci)) {
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
return;
|
||||
}
|
||||
di->offset = ceph_inode(inode)->i_max_offset++;
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
spin_lock(&dir->d_lock);
|
||||
spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
|
||||
list_move(&dn->d_u.d_child, &dir->d_subdirs);
|
||||
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
|
||||
dn->d_u.d_child.prev, dn->d_u.d_child.next);
|
||||
spin_unlock(&dn->d_lock);
|
||||
spin_unlock(&dir->d_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* splice a dentry to an inode.
|
||||
* caller must hold directory i_mutex for this to be safe.
|
||||
|
@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
|
|||
* the caller) if we fail.
|
||||
*/
|
||||
static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
|
||||
bool *prehash, bool set_offset)
|
||||
bool *prehash)
|
||||
{
|
||||
struct dentry *realdn;
|
||||
|
||||
|
@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
|
|||
}
|
||||
if ((!prehash || *prehash) && d_unhashed(dn))
|
||||
d_rehash(dn);
|
||||
if (set_offset)
|
||||
ceph_set_dentry_offset(dn);
|
||||
out:
|
||||
return dn;
|
||||
}
|
||||
|
@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
|
|||
{
|
||||
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
|
||||
struct inode *in = NULL;
|
||||
struct ceph_mds_reply_inode *ininfo;
|
||||
struct ceph_vino vino;
|
||||
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
|
||||
int err = 0;
|
||||
|
@ -1161,6 +1122,9 @@ retry_lookup:
|
|||
|
||||
/* rename? */
|
||||
if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
|
||||
struct inode *olddir = req->r_old_dentry_dir;
|
||||
BUG_ON(!olddir);
|
||||
|
||||
dout(" src %p '%.*s' dst %p '%.*s'\n",
|
||||
req->r_old_dentry,
|
||||
req->r_old_dentry->d_name.len,
|
||||
|
@ -1180,13 +1144,10 @@ retry_lookup:
|
|||
rehashing bug in vfs_rename_dir */
|
||||
ceph_invalidate_dentry_lease(dn);
|
||||
|
||||
/*
|
||||
* d_move() puts the renamed dentry at the end of
|
||||
* d_subdirs. We need to assign it an appropriate
|
||||
* directory offset so we can behave when dir is
|
||||
* complete.
|
||||
*/
|
||||
ceph_set_dentry_offset(req->r_old_dentry);
|
||||
/* d_move screws up sibling dentries' offsets */
|
||||
ceph_dir_clear_complete(dir);
|
||||
ceph_dir_clear_complete(olddir);
|
||||
|
||||
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
|
||||
ceph_dentry(req->r_old_dentry)->offset);
|
||||
|
||||
|
@ -1213,8 +1174,9 @@ retry_lookup:
|
|||
|
||||
/* attach proper inode */
|
||||
if (!dn->d_inode) {
|
||||
ceph_dir_clear_complete(dir);
|
||||
ihold(in);
|
||||
dn = splice_dentry(dn, in, &have_lease, true);
|
||||
dn = splice_dentry(dn, in, &have_lease);
|
||||
if (IS_ERR(dn)) {
|
||||
err = PTR_ERR(dn);
|
||||
goto done;
|
||||
|
@ -1235,17 +1197,16 @@ retry_lookup:
|
|||
(req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
|
||||
req->r_op == CEPH_MDS_OP_MKSNAP)) {
|
||||
struct dentry *dn = req->r_dentry;
|
||||
struct inode *dir = req->r_locked_dir;
|
||||
|
||||
/* fill out a snapdir LOOKUPSNAP dentry */
|
||||
BUG_ON(!dn);
|
||||
BUG_ON(!req->r_locked_dir);
|
||||
BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR);
|
||||
ininfo = rinfo->targeti.in;
|
||||
vino.ino = le64_to_cpu(ininfo->ino);
|
||||
vino.snap = le64_to_cpu(ininfo->snapid);
|
||||
BUG_ON(!dir);
|
||||
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
|
||||
dout(" linking snapped dir %p to dn %p\n", in, dn);
|
||||
ceph_dir_clear_complete(dir);
|
||||
ihold(in);
|
||||
dn = splice_dentry(dn, in, NULL, true);
|
||||
dn = splice_dentry(dn, in, NULL);
|
||||
if (IS_ERR(dn)) {
|
||||
err = PTR_ERR(dn);
|
||||
goto done;
|
||||
|
@ -1407,7 +1368,7 @@ retry_lookup:
|
|||
}
|
||||
|
||||
if (!dn->d_inode) {
|
||||
dn = splice_dentry(dn, in, NULL, false);
|
||||
dn = splice_dentry(dn, in, NULL);
|
||||
if (IS_ERR(dn)) {
|
||||
err = PTR_ERR(dn);
|
||||
dn = NULL;
|
||||
|
|
|
@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
|
|||
return PTR_ERR(req);
|
||||
req->r_inode = inode;
|
||||
ihold(inode);
|
||||
req->r_num_caps = 1;
|
||||
|
||||
req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
|
||||
|
||||
req->r_args.setlayout.layout.fl_stripe_unit =
|
||||
|
@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
|
|||
return PTR_ERR(req);
|
||||
req->r_inode = inode;
|
||||
ihold(inode);
|
||||
req->r_num_caps = 1;
|
||||
|
||||
req->r_args.setlayout.layout.fl_stripe_unit =
|
||||
cpu_to_le32(l.stripe_unit);
|
||||
|
|
|
@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
|
|||
return PTR_ERR(req);
|
||||
req->r_inode = inode;
|
||||
ihold(inode);
|
||||
req->r_num_caps = 1;
|
||||
|
||||
/* mds requires start and length rather than start and end */
|
||||
if (LLONG_MAX == fl->fl_end)
|
||||
|
|
|
@ -266,7 +266,6 @@ struct ceph_inode_info {
|
|||
struct timespec i_rctime;
|
||||
u64 i_rbytes, i_rfiles, i_rsubdirs;
|
||||
u64 i_files, i_subdirs;
|
||||
u64 i_max_offset; /* largest readdir offset, set with complete dir */
|
||||
|
||||
struct rb_root i_fragtree;
|
||||
struct mutex i_fragtree_mutex;
|
||||
|
|
|
@ -1548,8 +1548,10 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
|
|||
return;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (osds[i] != CRUSH_ITEM_NONE &&
|
||||
osdmap->osd_primary_affinity[i] !=
|
||||
int osd = osds[i];
|
||||
|
||||
if (osd != CRUSH_ITEM_NONE &&
|
||||
osdmap->osd_primary_affinity[osd] !=
|
||||
CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
|
||||
break;
|
||||
}
|
||||
|
@ -1563,10 +1565,9 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
|
|||
* osd's pgs get rejected as primary.
|
||||
*/
|
||||
for (i = 0; i < len; i++) {
|
||||
int osd;
|
||||
int osd = osds[i];
|
||||
u32 aff;
|
||||
|
||||
osd = osds[i];
|
||||
if (osd == CRUSH_ITEM_NONE)
|
||||
continue;
|
||||
|
||||
|
|
Loading…
Reference in New Issue