Merge branch 'upstream-linus' of git://oss.oracle.com/home/sourcebo/git/ocfs2
* 'upstream-linus' of git://oss.oracle.com/home/sourcebo/git/ocfs2: [PATCH] CONFIGFS_FS must depend on SYSFS [PATCH] Bogus NULL pointer check in fs/configfs/dir.c ocfs2: Better I/O error handling in heartbeat ocfs2: test and set teardown flag early in user_dlm_destroy_lock() ocfs2: Handle the DLM_CANCELGRANT case in user_unlock_ast() ocfs2: catch an invalid ast case in dlmfs ocfs2: remove an overly aggressive BUG() in dlmfs ocfs2: multi node truncate fix
This commit is contained in:
commit
e38d557896
|
@ -861,7 +861,7 @@ config RAMFS
|
||||||
|
|
||||||
config CONFIGFS_FS
|
config CONFIGFS_FS
|
||||||
tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
|
tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
|
||||||
depends on EXPERIMENTAL
|
depends on SYSFS && EXPERIMENTAL
|
||||||
help
|
help
|
||||||
configfs is a ram-based filesystem that provides the converse
|
configfs is a ram-based filesystem that provides the converse
|
||||||
of sysfs's functionality. Where sysfs is a filesystem-based
|
of sysfs's functionality. Where sysfs is a filesystem-based
|
||||||
|
|
|
@ -504,7 +504,7 @@ static int populate_groups(struct config_group *group)
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (group && group->default_groups) {
|
if (group->default_groups) {
|
||||||
/* FYI, we're faking mkdir here
|
/* FYI, we're faking mkdir here
|
||||||
* I'm not sure we need this semaphore, as we're called
|
* I'm not sure we need this semaphore, as we're called
|
||||||
* from our parent's mkdir. That holds our parent's
|
* from our parent's mkdir. That holds our parent's
|
||||||
|
|
|
@ -153,6 +153,7 @@ struct o2hb_region {
|
||||||
struct o2hb_bio_wait_ctxt {
|
struct o2hb_bio_wait_ctxt {
|
||||||
atomic_t wc_num_reqs;
|
atomic_t wc_num_reqs;
|
||||||
struct completion wc_io_complete;
|
struct completion wc_io_complete;
|
||||||
|
int wc_error;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void o2hb_write_timeout(void *arg)
|
static void o2hb_write_timeout(void *arg)
|
||||||
|
@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
|
||||||
{
|
{
|
||||||
atomic_set(&wc->wc_num_reqs, num_ios);
|
atomic_set(&wc->wc_num_reqs, num_ios);
|
||||||
init_completion(&wc->wc_io_complete);
|
init_completion(&wc->wc_io_complete);
|
||||||
|
wc->wc_error = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Used in error paths too */
|
/* Used in error paths too */
|
||||||
|
@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio,
|
||||||
{
|
{
|
||||||
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
|
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
|
||||||
|
|
||||||
if (error)
|
if (error) {
|
||||||
mlog(ML_ERROR, "IO Error %d\n", error);
|
mlog(ML_ERROR, "IO Error %d\n", error);
|
||||||
|
wc->wc_error = error;
|
||||||
|
}
|
||||||
|
|
||||||
if (bio->bi_size)
|
if (bio->bi_size)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
|
||||||
|
|
||||||
bail_and_wait:
|
bail_and_wait:
|
||||||
o2hb_wait_on_io(reg, &wc);
|
o2hb_wait_on_io(reg, &wc);
|
||||||
|
if (wc.wc_error && !status)
|
||||||
|
status = wc.wc_error;
|
||||||
|
|
||||||
if (bios) {
|
if (bios) {
|
||||||
for(i = 0; i < num_bios; i++)
|
for(i = 0; i < num_bios; i++)
|
||||||
|
@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes,
|
||||||
return highest;
|
return highest;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
||||||
{
|
{
|
||||||
int i, ret, highest_node, change = 0;
|
int i, ret, highest_node, change = 0;
|
||||||
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||||
struct bio *write_bio;
|
struct bio *write_bio;
|
||||||
struct o2hb_bio_wait_ctxt write_wc;
|
struct o2hb_bio_wait_ctxt write_wc;
|
||||||
|
|
||||||
if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
|
ret = o2nm_configured_node_map(configured_nodes,
|
||||||
return;
|
sizeof(configured_nodes));
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
|
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
|
||||||
if (highest_node >= O2NM_MAX_NODES) {
|
if (highest_node >= O2NM_MAX_NODES) {
|
||||||
mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
|
mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
|
||||||
return;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* No sense in reading the slots of nodes that don't exist
|
/* No sense in reading the slots of nodes that don't exist
|
||||||
|
@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
||||||
ret = o2hb_read_slots(reg, highest_node + 1);
|
ret = o2hb_read_slots(reg, highest_node + 1);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
mlog_errno(ret);
|
mlog_errno(ret);
|
||||||
return;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* With an up to date view of the slots, we can check that no
|
/* With an up to date view of the slots, we can check that no
|
||||||
|
@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
||||||
ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
|
ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
mlog_errno(ret);
|
mlog_errno(ret);
|
||||||
return;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
i = -1;
|
i = -1;
|
||||||
|
@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
||||||
*/
|
*/
|
||||||
o2hb_wait_on_io(reg, &write_wc);
|
o2hb_wait_on_io(reg, &write_wc);
|
||||||
bio_put(write_bio);
|
bio_put(write_bio);
|
||||||
|
if (write_wc.wc_error) {
|
||||||
|
/* Do not re-arm the write timeout on I/O error - we
|
||||||
|
* can't be sure that the new block ever made it to
|
||||||
|
* disk */
|
||||||
|
mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
|
||||||
|
write_wc.wc_error, reg->hr_dev_name);
|
||||||
|
return write_wc.wc_error;
|
||||||
|
}
|
||||||
|
|
||||||
o2hb_arm_write_timeout(reg);
|
o2hb_arm_write_timeout(reg);
|
||||||
|
|
||||||
/* let the person who launched us know when things are steady */
|
/* let the person who launched us know when things are steady */
|
||||||
|
@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
||||||
if (atomic_dec_and_test(®->hr_steady_iterations))
|
if (atomic_dec_and_test(®->hr_steady_iterations))
|
||||||
wake_up(&o2hb_steady_queue);
|
wake_up(&o2hb_steady_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Subtract b from a, storing the result in a. a *must* have a larger
|
/* Subtract b from a, storing the result in a. a *must* have a larger
|
||||||
|
@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
|
||||||
* likely to time itself out. */
|
* likely to time itself out. */
|
||||||
do_gettimeofday(&before_hb);
|
do_gettimeofday(&before_hb);
|
||||||
|
|
||||||
o2hb_do_disk_heartbeat(reg);
|
i = 0;
|
||||||
|
do {
|
||||||
|
ret = o2hb_do_disk_heartbeat(reg);
|
||||||
|
} while (ret && ++i < 2);
|
||||||
|
|
||||||
do_gettimeofday(&after_hb);
|
do_gettimeofday(&after_hb);
|
||||||
elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
|
elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
|
||||||
|
|
|
@ -139,6 +139,10 @@ static void user_ast(void *opaque)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
|
||||||
|
"Lockres %s, requested ivmode. flags 0x%x\n",
|
||||||
|
lockres->l_name, lockres->l_flags);
|
||||||
|
|
||||||
/* we're downconverting. */
|
/* we're downconverting. */
|
||||||
if (lockres->l_requested < lockres->l_level) {
|
if (lockres->l_requested < lockres->l_level) {
|
||||||
if (lockres->l_requested <=
|
if (lockres->l_requested <=
|
||||||
|
@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
|
||||||
|
|
||||||
mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
|
mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
|
||||||
|
|
||||||
if (status != DLM_NORMAL)
|
if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
|
||||||
mlog(ML_ERROR, "Dlm returns status %d\n", status);
|
mlog(ML_ERROR, "Dlm returns status %d\n", status);
|
||||||
|
|
||||||
spin_lock(&lockres->l_lock);
|
spin_lock(&lockres->l_lock);
|
||||||
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN)
|
/* The teardown flag gets set early during the unlock process,
|
||||||
|
* so test the cancel flag to make sure that this ast isn't
|
||||||
|
* for a concurrent cancel. */
|
||||||
|
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
|
||||||
|
&& !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
|
||||||
lockres->l_level = LKM_IVMODE;
|
lockres->l_level = LKM_IVMODE;
|
||||||
else {
|
} else if (status == DLM_CANCELGRANT) {
|
||||||
|
mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
|
||||||
|
lockres->l_name, lockres->l_flags);
|
||||||
|
/* We tried to cancel a convert request, but it was
|
||||||
|
* already granted. Don't clear the busy flag - the
|
||||||
|
* ast should've done this already. */
|
||||||
|
BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
|
||||||
|
lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
|
||||||
|
goto out_noclear;
|
||||||
|
} else {
|
||||||
|
BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
|
||||||
|
/* Cancel succeeded, we want to re-queue */
|
||||||
|
mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
|
||||||
|
lockres->l_name, lockres->l_flags);
|
||||||
lockres->l_requested = LKM_IVMODE; /* cancel an
|
lockres->l_requested = LKM_IVMODE; /* cancel an
|
||||||
* upconvert
|
* upconvert
|
||||||
* request. */
|
* request. */
|
||||||
lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
|
lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
|
||||||
/* we want the unblock thread to look at it again
|
/* we want the unblock thread to look at it again
|
||||||
* now. */
|
* now. */
|
||||||
__user_dlm_queue_lockres(lockres);
|
if (lockres->l_flags & USER_LOCK_BLOCKED)
|
||||||
|
__user_dlm_queue_lockres(lockres);
|
||||||
}
|
}
|
||||||
|
|
||||||
lockres->l_flags &= ~USER_LOCK_BUSY;
|
lockres->l_flags &= ~USER_LOCK_BUSY;
|
||||||
|
out_noclear:
|
||||||
spin_unlock(&lockres->l_lock);
|
spin_unlock(&lockres->l_lock);
|
||||||
|
|
||||||
wake_up(&lockres->l_event);
|
wake_up(&lockres->l_event);
|
||||||
|
@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque)
|
||||||
|
|
||||||
spin_lock(&lockres->l_lock);
|
spin_lock(&lockres->l_lock);
|
||||||
|
|
||||||
BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
|
mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
|
||||||
BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED));
|
"Lockres %s, flags 0x%x\n",
|
||||||
|
lockres->l_name, lockres->l_flags);
|
||||||
|
|
||||||
/* notice that we don't clear USER_LOCK_BLOCKED here. That's
|
/* notice that we don't clear USER_LOCK_BLOCKED here. If it's
|
||||||
* for user_ast to do. */
|
* set, we want user_ast clear it. */
|
||||||
lockres->l_flags &= ~USER_LOCK_QUEUED;
|
lockres->l_flags &= ~USER_LOCK_QUEUED;
|
||||||
|
|
||||||
|
/* It's valid to get here and no longer be blocked - if we get
|
||||||
|
* several basts in a row, we might be queued by the first
|
||||||
|
* one, the unblock thread might run and clear the queued
|
||||||
|
* flag, and finally we might get another bast which re-queues
|
||||||
|
* us before our ast for the downconvert is called. */
|
||||||
|
if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
|
||||||
|
mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
|
||||||
|
lockres->l_name, lockres->l_flags);
|
||||||
|
spin_unlock(&lockres->l_lock);
|
||||||
|
goto drop_ref;
|
||||||
|
}
|
||||||
|
|
||||||
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
|
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
|
||||||
mlog(0, "lock is in teardown so we do nothing\n");
|
mlog(0, "lock is in teardown so we do nothing\n");
|
||||||
spin_unlock(&lockres->l_lock);
|
spin_unlock(&lockres->l_lock);
|
||||||
|
@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lockres->l_flags & USER_LOCK_BUSY) {
|
if (lockres->l_flags & USER_LOCK_BUSY) {
|
||||||
mlog(0, "BUSY flag detected...\n");
|
mlog(0, "Cancel lock %s, flags 0x%x\n",
|
||||||
|
lockres->l_name, lockres->l_flags);
|
||||||
|
|
||||||
if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
|
if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
|
||||||
spin_unlock(&lockres->l_lock);
|
spin_unlock(&lockres->l_lock);
|
||||||
goto drop_ref;
|
goto drop_ref;
|
||||||
|
@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque)
|
||||||
LKM_CANCEL,
|
LKM_CANCEL,
|
||||||
user_unlock_ast,
|
user_unlock_ast,
|
||||||
lockres);
|
lockres);
|
||||||
if (status == DLM_CANCELGRANT) {
|
if (status != DLM_NORMAL)
|
||||||
/* If we got this, then the ast was fired
|
|
||||||
* before we could cancel. We cleanup our
|
|
||||||
* state, and restart the function. */
|
|
||||||
spin_lock(&lockres->l_lock);
|
|
||||||
lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
|
|
||||||
spin_unlock(&lockres->l_lock);
|
|
||||||
} else if (status != DLM_NORMAL)
|
|
||||||
user_log_dlm_error("dlmunlock", status, lockres);
|
user_log_dlm_error("dlmunlock", status, lockres);
|
||||||
goto drop_ref;
|
goto drop_ref;
|
||||||
}
|
}
|
||||||
|
@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
|
||||||
mlog(0, "asked to destroy %s\n", lockres->l_name);
|
mlog(0, "asked to destroy %s\n", lockres->l_name);
|
||||||
|
|
||||||
spin_lock(&lockres->l_lock);
|
spin_lock(&lockres->l_lock);
|
||||||
|
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
|
||||||
|
mlog(0, "Lock is already torn down\n");
|
||||||
|
spin_unlock(&lockres->l_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
|
||||||
|
|
||||||
while (lockres->l_flags & USER_LOCK_BUSY) {
|
while (lockres->l_flags & USER_LOCK_BUSY) {
|
||||||
spin_unlock(&lockres->l_lock);
|
spin_unlock(&lockres->l_lock);
|
||||||
|
|
||||||
|
@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
|
||||||
|
|
||||||
lockres->l_flags &= ~USER_LOCK_ATTACHED;
|
lockres->l_flags &= ~USER_LOCK_ATTACHED;
|
||||||
lockres->l_flags |= USER_LOCK_BUSY;
|
lockres->l_flags |= USER_LOCK_BUSY;
|
||||||
lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
|
|
||||||
spin_unlock(&lockres->l_lock);
|
spin_unlock(&lockres->l_lock);
|
||||||
|
|
||||||
mlog(0, "unlocking lockres %s\n", lockres->l_name);
|
mlog(0, "unlocking lockres %s\n", lockres->l_name);
|
||||||
|
|
|
@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode,
|
||||||
if (new_i_size == le64_to_cpu(fe->i_size))
|
if (new_i_size == le64_to_cpu(fe->i_size))
|
||||||
goto bail;
|
goto bail;
|
||||||
|
|
||||||
|
/* This forces other nodes to sync and drop their pages. Do
|
||||||
|
* this even if we have a truncate without allocation change -
|
||||||
|
* ocfs2 cluster sizes can be much greater than page size, so
|
||||||
|
* we have to truncate them anyway. */
|
||||||
|
status = ocfs2_data_lock(inode, 1);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
ocfs2_data_unlock(inode, 1);
|
||||||
|
|
||||||
if (le32_to_cpu(fe->i_clusters) ==
|
if (le32_to_cpu(fe->i_clusters) ==
|
||||||
ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
|
ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
|
||||||
mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
|
mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
|
||||||
|
@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode,
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This forces other nodes to sync and drop their pages */
|
|
||||||
status = ocfs2_data_lock(inode, 1);
|
|
||||||
if (status < 0) {
|
|
||||||
mlog_errno(status);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
ocfs2_data_unlock(inode, 1);
|
|
||||||
|
|
||||||
/* alright, we're going to need to do a full blown alloc size
|
/* alright, we're going to need to do a full blown alloc size
|
||||||
* change. Orphan the inode so that recovery can complete the
|
* change. Orphan the inode so that recovery can complete the
|
||||||
* truncate if necessary. This does the task of marking
|
* truncate if necessary. This does the task of marking
|
||||||
|
|
Loading…
Reference in New Issue