Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits)
  ceph: document unlocked d_parent accesses
  ceph: explicitly reference rename old_dentry parent dir in request
  ceph: document locking for ceph_set_dentry_offset
  ceph: avoid d_parent in ceph_dentry_hash; fix ceph_encode_fh() hashing bug
  ceph: protect d_parent access in ceph_d_revalidate
  ceph: protect access to d_parent
  ceph: handle racing calls to ceph_init_dentry
  ceph: set dir complete frag after adding capability
  rbd: set blk_queue request sizes to object size
  ceph: set up readahead size when rsize is not passed
  rbd: cancel watch request when releasing the device
  ceph: ignore lease mask
  ceph: fix ceph_lookup_open intent usage
  ceph: only link open operations to directory unsafe list if O_CREAT|O_TRUNC
  ceph: fix bad parent_inode calc in ceph_lookup_open
  ceph: avoid carrying Fw cap during write into page cache
  libceph: don't time out osd requests that haven't been received
  ceph: report f_bfree based on kb_avail rather than diffing.
  ceph: only queue capsnap if caps are dirty
  ceph: fix snap writeback when racing with writes
  ...
This commit is contained in:
Linus Torvalds 2011-07-26 13:38:50 -07:00
commit ba5b56cb3e
17 changed files with 306 additions and 145 deletions

View File

@ -629,6 +629,14 @@ static int rbd_get_num_segments(struct rbd_image_header *header,
return end_seg - start_seg + 1; return end_seg - start_seg + 1;
} }
/*
* returns the size of an object in the image
*/
static u64 rbd_obj_bytes(struct rbd_image_header *header)
{
return 1 << header->obj_order;
}
/* /*
* bio helpers * bio helpers
*/ */
@ -1253,6 +1261,35 @@ fail:
return ret; return ret;
} }
/*
* Request sync osd unwatch
*/
static int rbd_req_sync_unwatch(struct rbd_device *dev,
const char *obj)
{
struct ceph_osd_req_op *ops;
int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
if (ret < 0)
return ret;
ops[0].watch.ver = 0;
ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
ops[0].watch.flag = 0;
ret = rbd_req_sync_op(dev, NULL,
CEPH_NOSNAP,
0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
1, obj, 0, 0, NULL, NULL, NULL);
rbd_destroy_ops(ops);
ceph_osdc_cancel_event(dev->watch_event);
dev->watch_event = NULL;
return ret;
}
struct rbd_notify_info { struct rbd_notify_info {
struct rbd_device *dev; struct rbd_device *dev;
}; };
@ -1736,6 +1773,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
if (!q) if (!q)
goto out_disk; goto out_disk;
/* set io sizes to object size */
blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL);
blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header));
blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header));
blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header));
blk_queue_merge_bvec(q, rbd_merge_bvec); blk_queue_merge_bvec(q, rbd_merge_bvec);
disk->queue = q; disk->queue = q;
@ -2290,7 +2334,7 @@ static void rbd_dev_release(struct device *dev)
ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc, ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc,
rbd_dev->watch_request); rbd_dev->watch_request);
if (rbd_dev->watch_event) if (rbd_dev->watch_event)
ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
rbd_put_client(rbd_dev); rbd_put_client(rbd_dev);

View File

@ -102,7 +102,7 @@ static int mdsc_show(struct seq_file *s, void *p)
path = NULL; path = NULL;
spin_lock(&req->r_old_dentry->d_lock); spin_lock(&req->r_old_dentry->d_lock);
seq_printf(s, " #%llx/%.*s (%s)", seq_printf(s, " #%llx/%.*s (%s)",
ceph_ino(req->r_old_dentry->d_parent->d_inode), ceph_ino(req->r_old_dentry_dir),
req->r_old_dentry->d_name.len, req->r_old_dentry->d_name.len,
req->r_old_dentry->d_name.name, req->r_old_dentry->d_name.name,
path ? path : ""); path ? path : "");

View File

@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata) if (dentry->d_fsdata)
return 0; return 0;
if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
d_set_d_op(dentry, &ceph_dentry_ops);
else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
else
d_set_d_op(dentry, &ceph_snap_dentry_ops);
di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di) if (!di)
return -ENOMEM; /* oh well */ return -ENOMEM; /* oh well */
@ -58,16 +50,42 @@ int ceph_init_dentry(struct dentry *dentry)
kmem_cache_free(ceph_dentry_cachep, di); kmem_cache_free(ceph_dentry_cachep, di);
goto out_unlock; goto out_unlock;
} }
if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
d_set_d_op(dentry, &ceph_dentry_ops);
else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
else
d_set_d_op(dentry, &ceph_snap_dentry_ops);
di->dentry = dentry; di->dentry = dentry;
di->lease_session = NULL; di->lease_session = NULL;
dentry->d_fsdata = di;
dentry->d_time = jiffies; dentry->d_time = jiffies;
/* avoid reordering d_fsdata setup so that the check above is safe */
smp_mb();
dentry->d_fsdata = di;
ceph_dentry_lru_add(dentry); ceph_dentry_lru_add(dentry);
out_unlock: out_unlock:
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
return 0; return 0;
} }
struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
{
struct inode *inode = NULL;
if (!dentry)
return NULL;
spin_lock(&dentry->d_lock);
if (dentry->d_parent) {
inode = dentry->d_parent->d_inode;
ihold(inode);
}
spin_unlock(&dentry->d_lock);
return inode;
}
/* /*
@ -133,7 +151,7 @@ more:
d_unhashed(dentry) ? "!hashed" : "hashed", d_unhashed(dentry) ? "!hashed" : "hashed",
parent->d_subdirs.prev, parent->d_subdirs.next); parent->d_subdirs.prev, parent->d_subdirs.next);
if (p == &parent->d_subdirs) { if (p == &parent->d_subdirs) {
fi->at_end = 1; fi->flags |= CEPH_F_ATEND;
goto out_unlock; goto out_unlock;
} }
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@ -234,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
const int max_bytes = fsc->mount_options->max_readdir_bytes; const int max_bytes = fsc->mount_options->max_readdir_bytes;
dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
if (fi->at_end) if (fi->flags & CEPH_F_ATEND)
return 0; return 0;
/* always start with . and .. */ /* always start with . and .. */
@ -403,7 +421,7 @@ more:
dout("readdir next frag is %x\n", frag); dout("readdir next frag is %x\n", frag);
goto more; goto more;
} }
fi->at_end = 1; fi->flags |= CEPH_F_ATEND;
/* /*
* if dir_release_count still matches the dir, no dentries * if dir_release_count still matches the dir, no dentries
@ -435,7 +453,7 @@ static void reset_readdir(struct ceph_file_info *fi)
dput(fi->dentry); dput(fi->dentry);
fi->dentry = NULL; fi->dentry = NULL;
} }
fi->at_end = 0; fi->flags &= ~CEPH_F_ATEND;
} }
static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
@ -463,7 +481,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
if (offset != file->f_pos) { if (offset != file->f_pos) {
file->f_pos = offset; file->f_pos = offset;
file->f_version = 0; file->f_version = 0;
fi->at_end = 0; fi->flags &= ~CEPH_F_ATEND;
} }
retval = offset; retval = offset;
@ -488,21 +506,13 @@ out:
} }
/* /*
* Process result of a lookup/open request. * Handle lookups for the hidden .snap directory.
*
* Mainly, make sure we return the final req->r_dentry (if it already
* existed) in place of the original VFS-provided dentry when they
* differ.
*
* Gracefully handle the case where the MDS replies with -ENOENT and
* no trace (which it may do, at its discretion, e.g., if it doesn't
* care to issue a lease on the negative dentry).
*/ */
struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, int ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err) struct dentry *dentry, int err)
{ {
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *parent = dentry->d_parent->d_inode; struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
/* .snap dir? */ /* .snap dir? */
if (err == -ENOENT && if (err == -ENOENT &&
@ -516,7 +526,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
d_add(dentry, inode); d_add(dentry, inode);
err = 0; err = 0;
} }
return err;
}
/*
* Figure out final result of a lookup/open request.
*
* Mainly, make sure we return the final req->r_dentry (if it already
* existed) in place of the original VFS-provided dentry when they
* differ.
*
* Gracefully handle the case where the MDS replies with -ENOENT and
* no trace (which it may do, at its discretion, e.g., if it doesn't
* care to issue a lease on the negative dentry).
*/
struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err)
{
if (err == -ENOENT) { if (err == -ENOENT) {
/* no trace? */ /* no trace? */
err = 0; err = 0;
@ -610,6 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_locked_dir = dir; req->r_locked_dir = dir;
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err); dentry = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); /* will dput(dentry) */ ceph_mdsc_put_request(req); /* will dput(dentry) */
dout("lookup result=%p\n", dentry); dout("lookup result=%p\n", dentry);
@ -789,6 +816,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry); req->r_dentry = dget(dentry);
req->r_num_caps = 2; req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = dir; req->r_locked_dir = dir;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@ -887,6 +915,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_dentry = dget(new_dentry); req->r_dentry = dget(new_dentry);
req->r_num_caps = 2; req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry); req->r_old_dentry = dget(old_dentry);
req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = new_dir; req->r_locked_dir = new_dir;
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
@ -1002,36 +1031,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/ */
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{ {
int valid = 0;
struct inode *dir; struct inode *dir;
if (nd && nd->flags & LOOKUP_RCU) if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD; return -ECHILD;
dir = dentry->d_parent->d_inode;
dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode, dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
ceph_dentry(dentry)->offset); ceph_dentry(dentry)->offset);
dir = ceph_get_dentry_parent_inode(dentry);
/* always trust cached snapped dentries, snapdir dentry */ /* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) { if (ceph_snap(dir) != CEPH_NOSNAP) {
dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode); dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
goto out_touch; valid = 1;
} else if (dentry->d_inode &&
ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
valid = 1;
} else if (dentry_lease_is_valid(dentry) ||
dir_lease_is_valid(dir, dentry)) {
valid = 1;
} }
if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR)
goto out_touch;
if (dentry_lease_is_valid(dentry) || dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
dir_lease_is_valid(dir, dentry)) if (valid)
goto out_touch; ceph_dentry_lru_touch(dentry);
else
dout("d_revalidate %p invalid\n", dentry); d_drop(dentry);
d_drop(dentry); iput(dir);
return 0; return valid;
out_touch:
ceph_dentry_lru_touch(dentry);
return 1;
} }
/* /*
@ -1228,9 +1259,8 @@ void ceph_dentry_lru_del(struct dentry *dn)
* Return name hash for a given dentry. This is dependent on * Return name hash for a given dentry. This is dependent on
* the parent directory's hash function. * the parent directory's hash function.
*/ */
unsigned ceph_dentry_hash(struct dentry *dn) unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{ {
struct inode *dir = dn->d_parent->d_inode;
struct ceph_inode_info *dci = ceph_inode(dir); struct ceph_inode_info *dci = ceph_inode(dir);
switch (dci->i_dir_layout.dl_dir_hash) { switch (dci->i_dir_layout.dl_dir_hash) {

View File

@ -46,7 +46,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
int type; int type;
struct ceph_nfs_fh *fh = (void *)rawfh; struct ceph_nfs_fh *fh = (void *)rawfh;
struct ceph_nfs_confh *cfh = (void *)rawfh; struct ceph_nfs_confh *cfh = (void *)rawfh;
struct dentry *parent = dentry->d_parent; struct dentry *parent;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
int connected_handle_length = sizeof(*cfh)/4; int connected_handle_length = sizeof(*cfh)/4;
int handle_length = sizeof(*fh)/4; int handle_length = sizeof(*fh)/4;
@ -55,26 +55,33 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
if (ceph_snap(inode) != CEPH_NOSNAP) if (ceph_snap(inode) != CEPH_NOSNAP)
return -EINVAL; return -EINVAL;
spin_lock(&dentry->d_lock);
parent = dget(dentry->d_parent);
spin_unlock(&dentry->d_lock);
if (*max_len >= connected_handle_length) { if (*max_len >= connected_handle_length) {
dout("encode_fh %p connectable\n", dentry); dout("encode_fh %p connectable\n", dentry);
cfh->ino = ceph_ino(dentry->d_inode); cfh->ino = ceph_ino(dentry->d_inode);
cfh->parent_ino = ceph_ino(parent->d_inode); cfh->parent_ino = ceph_ino(parent->d_inode);
cfh->parent_name_hash = ceph_dentry_hash(parent); cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
dentry);
*max_len = connected_handle_length; *max_len = connected_handle_length;
type = 2; type = 2;
} else if (*max_len >= handle_length) { } else if (*max_len >= handle_length) {
if (connectable) { if (connectable) {
*max_len = connected_handle_length; *max_len = connected_handle_length;
return 255; type = 255;
} else {
dout("encode_fh %p\n", dentry);
fh->ino = ceph_ino(dentry->d_inode);
*max_len = handle_length;
type = 1;
} }
dout("encode_fh %p\n", dentry);
fh->ino = ceph_ino(dentry->d_inode);
*max_len = handle_length;
type = 1;
} else { } else {
*max_len = handle_length; *max_len = handle_length;
return 255; type = 255;
} }
dput(parent);
return type; return type;
} }
@ -123,7 +130,6 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
return dentry; return dentry;
} }
err = ceph_init_dentry(dentry); err = ceph_init_dentry(dentry);
if (err < 0) { if (err < 0) {
iput(inode); iput(inode);
return ERR_PTR(err); return ERR_PTR(err);

View File

@ -122,7 +122,7 @@ int ceph_open(struct inode *inode, struct file *file)
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_file_info *cf = file->private_data; struct ceph_file_info *cf = file->private_data;
struct inode *parent_inode = file->f_dentry->d_parent->d_inode; struct inode *parent_inode = NULL;
int err; int err;
int flags, fmode, wanted; int flags, fmode, wanted;
@ -194,7 +194,10 @@ int ceph_open(struct inode *inode, struct file *file)
req->r_inode = inode; req->r_inode = inode;
ihold(inode); ihold(inode);
req->r_num_caps = 1; req->r_num_caps = 1;
if (flags & (O_CREAT|O_TRUNC))
parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req); err = ceph_mdsc_do_request(mdsc, parent_inode, req);
iput(parent_inode);
if (!err) if (!err)
err = ceph_init_file(inode, file, req->r_fmode); err = ceph_init_file(inode, file, req->r_fmode);
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
@ -222,9 +225,9 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
{ {
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct file *file = nd->intent.open.file; struct file *file;
struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct dentry *ret;
int err; int err;
int flags = nd->intent.open.flags; int flags = nd->intent.open.flags;
@ -242,16 +245,24 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
req->r_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
} }
req->r_locked_dir = dir; /* caller holds dir->i_mutex */ req->r_locked_dir = dir; /* caller holds dir->i_mutex */
err = ceph_mdsc_do_request(mdsc, parent_inode, req); err = ceph_mdsc_do_request(mdsc,
dentry = ceph_finish_lookup(req, dentry, err); (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) req);
err = ceph_handle_snapdir(req, dentry, err);
if (err)
goto out;
if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry); err = ceph_handle_notrace_create(dir, dentry);
if (!err) if (err)
err = ceph_init_file(req->r_dentry->d_inode, file, goto out;
req->r_fmode); file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
if (IS_ERR(file))
err = PTR_ERR(file);
out:
ret = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
dout("ceph_lookup_open result=%p\n", dentry); dout("ceph_lookup_open result=%p\n", ret);
return dentry; return ret;
} }
int ceph_release(struct inode *inode, struct file *file) int ceph_release(struct inode *inode, struct file *file)
@ -643,7 +654,8 @@ again:
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) || (iocb->ki_filp->f_flags & O_DIRECT) ||
(inode->i_sb->s_flags & MS_SYNCHRONOUS)) (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
(fi->flags & CEPH_F_SYNC))
/* hmm, this isn't really async... */ /* hmm, this isn't really async... */
ret = ceph_sync_read(filp, base, len, ppos, &checkeof); ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
else else
@ -712,7 +724,7 @@ retry_snap:
want = CEPH_CAP_FILE_BUFFER; want = CEPH_CAP_FILE_BUFFER;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
if (ret < 0) if (ret < 0)
goto out; goto out_put;
dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
@ -720,12 +732,23 @@ retry_snap:
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) || (iocb->ki_filp->f_flags & O_DIRECT) ||
(inode->i_sb->s_flags & MS_SYNCHRONOUS)) { (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
(fi->flags & CEPH_F_SYNC)) {
ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
&iocb->ki_pos); &iocb->ki_pos);
} else { } else {
ret = generic_file_aio_write(iocb, iov, nr_segs, pos); /*
* buffered write; drop Fw early to avoid slow
* revocation if we get stuck on balance_dirty_pages
*/
int dirty;
spin_lock(&inode->i_lock);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&inode->i_lock);
ceph_put_cap_refs(ci, got);
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
if ((ret >= 0 || ret == -EIOCBQUEUED) && if ((ret >= 0 || ret == -EIOCBQUEUED) &&
((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
|| ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
@ -733,7 +756,12 @@ retry_snap:
if (err < 0) if (err < 0)
ret = err; ret = err;
} }
if (dirty)
__mark_inode_dirty(inode, dirty);
goto out;
} }
if (ret >= 0) { if (ret >= 0) {
int dirty; int dirty;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
@ -743,12 +771,13 @@ retry_snap:
__mark_inode_dirty(inode, dirty); __mark_inode_dirty(inode, dirty);
} }
out: out_put:
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
ceph_cap_string(got)); ceph_cap_string(got));
ceph_put_cap_refs(ci, got); ceph_put_cap_refs(ci, got);
out:
if (ret == -EOLDSNAPC) { if (ret == -EOLDSNAPC) {
dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);

View File

@ -560,7 +560,8 @@ static int fill_inode(struct inode *inode,
struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_mds_reply_inode *info = iinfo->in;
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int i; int i;
int issued, implemented; int issued = 0, implemented;
int updating_inode = 0;
struct timespec mtime, atime, ctime; struct timespec mtime, atime, ctime;
u32 nsplits; u32 nsplits;
struct ceph_buffer *xattr_blob = NULL; struct ceph_buffer *xattr_blob = NULL;
@ -599,7 +600,8 @@ static int fill_inode(struct inode *inode,
if (le64_to_cpu(info->version) > 0 && if (le64_to_cpu(info->version) > 0 &&
(ci->i_version & ~1) >= le64_to_cpu(info->version)) (ci->i_version & ~1) >= le64_to_cpu(info->version))
goto no_change; goto no_change;
updating_inode = 1;
issued = __ceph_caps_issued(ci, &implemented); issued = __ceph_caps_issued(ci, &implemented);
issued |= implemented | __ceph_caps_dirty(ci); issued |= implemented | __ceph_caps_dirty(ci);
@ -707,17 +709,6 @@ static int fill_inode(struct inode *inode,
ci->i_rfiles = le64_to_cpu(info->rfiles); ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ceph_decode_timespec(&ci->i_rctime, &info->rctime); ceph_decode_timespec(&ci->i_rctime, &info->rctime);
/* set dir completion flag? */
if (ci->i_files == 0 && ci->i_subdirs == 0 &&
ceph_snap(inode) == CEPH_NOSNAP &&
(le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
/* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = 2;
}
break; break;
default: default:
pr_err("fill_inode %llx.%llx BAD mode 0%o\n", pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@ -774,6 +765,19 @@ no_change:
__ceph_get_fmode(ci, cap_fmode); __ceph_get_fmode(ci, cap_fmode);
} }
/* set dir completion flag? */
if (S_ISDIR(inode->i_mode) &&
updating_inode && /* didn't jump to no_change */
ci->i_files == 0 && ci->i_subdirs == 0 &&
ceph_snap(inode) == CEPH_NOSNAP &&
(le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
/* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = 2;
}
/* update delegation info? */ /* update delegation info? */
if (dirinfo) if (dirinfo)
ceph_fill_dirfrag(inode, dirinfo); ceph_fill_dirfrag(inode, dirinfo);
@ -805,14 +809,14 @@ static void update_dentry_lease(struct dentry *dentry,
return; return;
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
dout("update_dentry_lease %p mask %d duration %lu ms ttl %lu\n", dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
dentry, le16_to_cpu(lease->mask), duration, ttl); dentry, duration, ttl);
/* make lease_rdcache_gen match directory */ /* make lease_rdcache_gen match directory */
dir = dentry->d_parent->d_inode; dir = dentry->d_parent->d_inode;
di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
if (lease->mask == 0) if (duration == 0)
goto out_unlock; goto out_unlock;
if (di->lease_gen == session->s_cap_gen && if (di->lease_gen == session->s_cap_gen &&
@ -839,11 +843,13 @@ out_unlock:
/* /*
* Set dentry's directory position based on the current dir's max, and * Set dentry's directory position based on the current dir's max, and
* order it in d_subdirs, so that dcache_readdir behaves. * order it in d_subdirs, so that dcache_readdir behaves.
*
* Always called under directory's i_mutex.
*/ */
static void ceph_set_dentry_offset(struct dentry *dn) static void ceph_set_dentry_offset(struct dentry *dn)
{ {
struct dentry *dir = dn->d_parent; struct dentry *dir = dn->d_parent;
struct inode *inode = dn->d_parent->d_inode; struct inode *inode = dir->d_inode;
struct ceph_dentry_info *di; struct ceph_dentry_info *di;
BUG_ON(!inode); BUG_ON(!inode);
@ -1022,9 +1028,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
/* do we have a dn lease? */ /* do we have a dn lease? */
have_lease = have_dir_cap || have_lease = have_dir_cap ||
(le16_to_cpu(rinfo->dlease->mask) & le32_to_cpu(rinfo->dlease->duration_ms);
CEPH_LOCK_DN);
if (!have_lease) if (!have_lease)
dout("fill_trace no dentry lease or dir cap\n"); dout("fill_trace no dentry lease or dir cap\n");
@ -1560,7 +1564,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{ {
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct inode *parent_inode = dentry->d_parent->d_inode; struct inode *parent_inode;
const unsigned int ia_valid = attr->ia_valid; const unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
@ -1743,7 +1747,9 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
req->r_inode_drop = release; req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask); req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1; req->r_num_caps = 1;
parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req); err = ceph_mdsc_do_request(mdsc, parent_inode, req);
iput(parent_inode);
} }
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
ceph_cap_string(dirtied), mask); ceph_cap_string(dirtied), mask);

View File

@ -38,7 +38,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
static long ceph_ioctl_set_layout(struct file *file, void __user *arg) static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
{ {
struct inode *inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
struct inode *parent_inode = file->f_dentry->d_parent->d_inode; struct inode *parent_inode;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_ioctl_layout l; struct ceph_ioctl_layout l;
@ -87,7 +87,9 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
req->r_args.setlayout.layout.fl_pg_preferred = req->r_args.setlayout.layout.fl_pg_preferred =
cpu_to_le32(l.preferred_osd); cpu_to_le32(l.preferred_osd);
parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req); err = ceph_mdsc_do_request(mdsc, parent_inode, req);
iput(parent_inode);
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
return err; return err;
} }
@ -231,6 +233,14 @@ static long ceph_ioctl_lazyio(struct file *file)
return 0; return 0;
} }
static long ceph_ioctl_syncio(struct file *file)
{
struct ceph_file_info *fi = file->private_data;
fi->flags |= CEPH_F_SYNC;
return 0;
}
long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{ {
dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
@ -249,6 +259,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case CEPH_IOC_LAZYIO: case CEPH_IOC_LAZYIO:
return ceph_ioctl_lazyio(file); return ceph_ioctl_lazyio(file);
case CEPH_IOC_SYNCIO:
return ceph_ioctl_syncio(file);
} }
return -ENOTTY; return -ENOTTY;

View File

@ -40,5 +40,6 @@ struct ceph_ioctl_dataloc {
struct ceph_ioctl_dataloc) struct ceph_ioctl_dataloc)
#define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4)
#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5)
#endif #endif

View File

@ -483,22 +483,26 @@ void ceph_mdsc_release_request(struct kref *kref)
destroy_reply_info(&req->r_reply_info); destroy_reply_info(&req->r_reply_info);
} }
if (req->r_inode) { if (req->r_inode) {
ceph_put_cap_refs(ceph_inode(req->r_inode), ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
CEPH_CAP_PIN);
iput(req->r_inode); iput(req->r_inode);
} }
if (req->r_locked_dir) if (req->r_locked_dir)
ceph_put_cap_refs(ceph_inode(req->r_locked_dir), ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
CEPH_CAP_PIN);
if (req->r_target_inode) if (req->r_target_inode)
iput(req->r_target_inode); iput(req->r_target_inode);
if (req->r_dentry) if (req->r_dentry)
dput(req->r_dentry); dput(req->r_dentry);
if (req->r_old_dentry) { if (req->r_old_dentry) {
ceph_put_cap_refs( /*
ceph_inode(req->r_old_dentry->d_parent->d_inode), * track (and drop pins for) r_old_dentry_dir
CEPH_CAP_PIN); * separately, since r_old_dentry's d_parent may have
* changed between the dir mutex being dropped and
* this request being freed.
*/
ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
dput(req->r_old_dentry); dput(req->r_old_dentry);
iput(req->r_old_dentry_dir);
} }
kfree(req->r_path1); kfree(req->r_path1);
kfree(req->r_path2); kfree(req->r_path2);
@ -617,6 +621,12 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*/ */
struct dentry *get_nonsnap_parent(struct dentry *dentry) struct dentry *get_nonsnap_parent(struct dentry *dentry)
{ {
/*
* we don't need to worry about protecting the d_parent access
* here because we never renaming inside the snapped namespace
* except to resplice to another snapdir, and either the old or new
* result is a valid result.
*/
while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
dentry = dentry->d_parent; dentry = dentry->d_parent;
return dentry; return dentry;
@ -652,7 +662,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
if (req->r_inode) { if (req->r_inode) {
inode = req->r_inode; inode = req->r_inode;
} else if (req->r_dentry) { } else if (req->r_dentry) {
struct inode *dir = req->r_dentry->d_parent->d_inode; /* ignore race with rename; old or new d_parent is okay */
struct dentry *parent = req->r_dentry->d_parent;
struct inode *dir = parent->d_inode;
if (dir->i_sb != mdsc->fsc->sb) { if (dir->i_sb != mdsc->fsc->sb) {
/* not this fs! */ /* not this fs! */
@ -660,8 +672,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else if (ceph_snap(dir) != CEPH_NOSNAP) { } else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests /* direct snapped/virtual snapdir requests
* based on parent dir inode */ * based on parent dir inode */
struct dentry *dn = struct dentry *dn = get_nonsnap_parent(parent);
get_nonsnap_parent(req->r_dentry->d_parent);
inode = dn->d_inode; inode = dn->d_inode;
dout("__choose_mds using nonsnap parent %p\n", inode); dout("__choose_mds using nonsnap parent %p\n", inode);
} else if (req->r_dentry->d_inode) { } else if (req->r_dentry->d_inode) {
@ -670,7 +681,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else { } else {
/* dir + name */ /* dir + name */
inode = dir; inode = dir;
hash = ceph_dentry_hash(req->r_dentry); hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true; is_hash = true;
} }
} }
@ -1931,9 +1942,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
if (req->r_locked_dir) if (req->r_locked_dir)
ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
if (req->r_old_dentry) if (req->r_old_dentry)
ceph_get_cap_refs( ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
ceph_inode(req->r_old_dentry->d_parent->d_inode), CEPH_CAP_PIN);
CEPH_CAP_PIN);
/* issue */ /* issue */
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
@ -2714,7 +2724,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_mds_lease *h = msg->front.iov_base; struct ceph_mds_lease *h = msg->front.iov_base;
u32 seq; u32 seq;
struct ceph_vino vino; struct ceph_vino vino;
int mask;
struct qstr dname; struct qstr dname;
int release = 0; int release = 0;
@ -2725,7 +2734,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
goto bad; goto bad;
vino.ino = le64_to_cpu(h->ino); vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP; vino.snap = CEPH_NOSNAP;
mask = le16_to_cpu(h->mask);
seq = le32_to_cpu(h->seq); seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32); dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
@ -2737,8 +2745,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */ /* lookup inode */
inode = ceph_find_inode(sb, vino); inode = ceph_find_inode(sb, vino);
dout("handle_lease %s, mask %d, ino %llx %p %.*s\n", dout("handle_lease %s, ino %llx %p %.*s\n",
ceph_lease_op_name(h->action), mask, vino.ino, inode, ceph_lease_op_name(h->action), vino.ino, inode,
dname.len, dname.name); dname.len, dname.name);
if (inode == NULL) { if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino); dout("handle_lease no inode %llx\n", vino.ino);
@ -2828,7 +2836,6 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
return; return;
lease = msg->front.iov_base; lease = msg->front.iov_base;
lease->action = action; lease->action = action;
lease->mask = cpu_to_le16(1);
lease->ino = cpu_to_le64(ceph_vino(inode).ino); lease->ino = cpu_to_le64(ceph_vino(inode).ino);
lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
lease->seq = cpu_to_le32(seq); lease->seq = cpu_to_le32(seq);
@ -2850,7 +2857,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
* Pass @inode always, @dentry is optional. * Pass @inode always, @dentry is optional.
*/ */
void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
struct dentry *dentry, int mask) struct dentry *dentry)
{ {
struct ceph_dentry_info *di; struct ceph_dentry_info *di;
struct ceph_mds_session *session; struct ceph_mds_session *session;
@ -2858,7 +2865,6 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
BUG_ON(inode == NULL); BUG_ON(inode == NULL);
BUG_ON(dentry == NULL); BUG_ON(dentry == NULL);
BUG_ON(mask == 0);
/* is dentry lease valid? */ /* is dentry lease valid? */
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
@ -2868,8 +2874,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
di->lease_gen != di->lease_session->s_cap_gen || di->lease_gen != di->lease_session->s_cap_gen ||
!time_before(jiffies, dentry->d_time)) { !time_before(jiffies, dentry->d_time)) {
dout("lease_release inode %p dentry %p -- " dout("lease_release inode %p dentry %p -- "
"no lease on %d\n", "no lease\n",
inode, dentry, mask); inode, dentry);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
return; return;
} }
@ -2880,8 +2886,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
__ceph_mdsc_drop_dentry_lease(dentry); __ceph_mdsc_drop_dentry_lease(dentry);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
dout("lease_release inode %p dentry %p mask %d to mds%d\n", dout("lease_release inode %p dentry %p to mds%d\n",
inode, dentry, mask, session->s_mds); inode, dentry, session->s_mds);
ceph_mdsc_lease_send_msg(session, inode, dentry, ceph_mdsc_lease_send_msg(session, inode, dentry,
CEPH_MDS_LEASE_RELEASE, seq); CEPH_MDS_LEASE_RELEASE, seq);
ceph_put_mds_session(session); ceph_put_mds_session(session);

View File

@ -171,6 +171,7 @@ struct ceph_mds_request {
struct inode *r_inode; /* arg1 */ struct inode *r_inode; /* arg1 */
struct dentry *r_dentry; /* arg1 */ struct dentry *r_dentry; /* arg1 */
struct dentry *r_old_dentry; /* arg2: rename from or link from */ struct dentry *r_old_dentry; /* arg2: rename from or link from */
struct inode *r_old_dentry_dir; /* arg2: old dentry's parent dir */
char *r_path1, *r_path2; char *r_path1, *r_path2;
struct ceph_vino r_ino1, r_ino2; struct ceph_vino r_ino1, r_ino2;
@ -333,7 +334,7 @@ extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
struct inode *inode, struct inode *inode,
struct dentry *dn, int mask); struct dentry *dn);
extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);

View File

@ -449,6 +449,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci); used = __ceph_caps_used(ci);
dirty = __ceph_caps_dirty(ci); dirty = __ceph_caps_dirty(ci);
/*
* If there is a write in progress, treat that as a dirty Fw,
* even though it hasn't completed yet; by the time we finish
* up this capsnap it will be.
*/
if (used & CEPH_CAP_FILE_WR)
dirty |= CEPH_CAP_FILE_WR;
if (__ceph_have_pending_cap_snap(ci)) { if (__ceph_have_pending_cap_snap(ci)) {
/* there is no point in queuing multiple "pending" cap_snaps, /* there is no point in queuing multiple "pending" cap_snaps,
as no new writes are allowed to start when pending, so any as no new writes are allowed to start when pending, so any
@ -456,13 +465,19 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */ cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode); dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap); kfree(capsnap);
} else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) || } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
(dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) {
struct ceph_snap_context *snapc = ci->i_head_snapc; struct ceph_snap_context *snapc = ci->i_head_snapc;
dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, /*
capsnap, snapc); * if we are a sync write, we may need to go to the snaprealm
* to get the current snapc.
*/
if (!snapc)
snapc = ci->i_snap_realm->cached_context;
dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
inode, capsnap, snapc, ceph_cap_string(dirty));
ihold(inode); ihold(inode);
atomic_set(&capsnap->nref, 1); atomic_set(&capsnap->nref, 1);

View File

@ -73,8 +73,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
*/ */
buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >> buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
(CEPH_BLOCK_SHIFT-10);
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
buf->f_files = le64_to_cpu(st.num_objects); buf->f_files = le64_to_cpu(st.num_objects);
@ -780,6 +779,10 @@ static int ceph_register_bdi(struct super_block *sb,
fsc->backing_dev_info.ra_pages = fsc->backing_dev_info.ra_pages =
(fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>> PAGE_SHIFT; >> PAGE_SHIFT;
else
fsc->backing_dev_info.ra_pages =
default_backing_dev_info.ra_pages;
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
atomic_long_inc_return(&bdi_seq)); atomic_long_inc_return(&bdi_seq));
if (!err) if (!err)

View File

@ -543,13 +543,16 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
/* /*
* we keep buffered readdir results attached to file->private_data * we keep buffered readdir results attached to file->private_data
*/ */
#define CEPH_F_SYNC 1
#define CEPH_F_ATEND 2
struct ceph_file_info { struct ceph_file_info {
int fmode; /* initialized on open */ short fmode; /* initialized on open */
short flags; /* CEPH_F_* */
/* readdir: position within the dir */ /* readdir: position within the dir */
u32 frag; u32 frag;
struct ceph_mds_request *last_readdir; struct ceph_mds_request *last_readdir;
int at_end;
/* readdir: position within a frag */ /* readdir: position within a frag */
unsigned offset; /* offset of last chunk, adjusted for . and .. */ unsigned offset; /* offset of last chunk, adjusted for . and .. */
@ -789,6 +792,8 @@ extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
ceph_snapdir_dentry_ops; ceph_snapdir_dentry_ops;
extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
extern int ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err);
extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err); struct dentry *dentry, int err);
@ -796,7 +801,8 @@ extern void ceph_dentry_lru_add(struct dentry *dn);
extern void ceph_dentry_lru_touch(struct dentry *dn); extern void ceph_dentry_lru_touch(struct dentry *dn);
extern void ceph_dentry_lru_del(struct dentry *dn); extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry); extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
extern unsigned ceph_dentry_hash(struct dentry *dn); extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
/* /*
* our d_ops vary depending on whether the inode is live, * our d_ops vary depending on whether the inode is live,
@ -819,14 +825,6 @@ extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p,
int p_locks, int f_locks); int p_locks, int f_locks);
extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
{
if (dentry && dentry->d_parent)
return dentry->d_parent->d_inode;
return NULL;
}
/* debugfs.c */ /* debugfs.c */
extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);

View File

@ -629,7 +629,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct inode *parent_inode = dentry->d_parent->d_inode; struct inode *parent_inode;
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
int err; int err;
@ -677,7 +677,9 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
req->r_data_len = size; req->r_data_len = size;
dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req); err = ceph_mdsc_do_request(mdsc, parent_inode, req);
iput(parent_inode);
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
@ -788,7 +790,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct inode *parent_inode = dentry->d_parent->d_inode; struct inode *parent_inode;
struct ceph_mds_request *req; struct ceph_mds_request *req;
int err; int err;
@ -802,7 +804,9 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
req->r_num_caps = 1; req->r_num_caps = 1;
req->r_path2 = kstrdup(name, GFP_NOFS); req->r_path2 = kstrdup(name, GFP_NOFS);
parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req); err = ceph_mdsc_do_request(mdsc, parent_inode, req);
iput(parent_inode);
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
return err; return err;
} }

View File

@ -94,6 +94,7 @@ struct ceph_msg {
bool more_to_follow; bool more_to_follow;
bool needs_out_seq; bool needs_out_seq;
int front_max; int front_max;
unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool; struct ceph_msgpool *pool;
}; };

View File

@ -486,13 +486,10 @@ static void prepare_write_message(struct ceph_connection *con)
m = list_first_entry(&con->out_queue, m = list_first_entry(&con->out_queue,
struct ceph_msg, list_head); struct ceph_msg, list_head);
con->out_msg = m; con->out_msg = m;
if (test_bit(LOSSYTX, &con->state)) {
list_del_init(&m->list_head); /* put message on sent list */
} else { ceph_msg_get(m);
/* put message on sent list */ list_move_tail(&m->list_head, &con->out_sent);
ceph_msg_get(m);
list_move_tail(&m->list_head, &con->out_sent);
}
/* /*
* only assign outgoing seq # if we haven't sent this message * only assign outgoing seq # if we haven't sent this message
@ -1399,6 +1396,7 @@ static void process_ack(struct ceph_connection *con)
break; break;
dout("got ack for seq %llu type %d at %p\n", seq, dout("got ack for seq %llu type %d at %p\n", seq,
le16_to_cpu(m->hdr.type), m); le16_to_cpu(m->hdr.type), m);
m->ack_stamp = jiffies;
ceph_msg_remove(m); ceph_msg_remove(m);
} }
prepare_read_tag(con); prepare_read_tag(con);

View File

@ -1085,9 +1085,15 @@ static void handle_timeout(struct work_struct *work)
req = list_entry(osdc->req_lru.next, struct ceph_osd_request, req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
r_req_lru_item); r_req_lru_item);
/* hasn't been long enough since we sent it? */
if (time_before(jiffies, req->r_stamp + timeout)) if (time_before(jiffies, req->r_stamp + timeout))
break; break;
/* hasn't been long enough since it was acked? */
if (req->r_request->ack_stamp == 0 ||
time_before(jiffies, req->r_request->ack_stamp + timeout))
break;
BUG_ON(req == last_req && req->r_stamp == last_stamp); BUG_ON(req == last_req && req->r_stamp == last_stamp);
last_req = req; last_req = req;
last_stamp = req->r_stamp; last_stamp = req->r_stamp;