Initiate recovery on node failure
The DLM informs us in case of node failure with the DLM slot number. cluster_info->recovery_map sets the bit corresponding to the slot number and wakes up the recovery thread. The recovery thread: 1. Derives the slot number from the recovery_map 2. Locks the bitmap corresponding to the slot 3. Copies the set bits to the node-local bitmap Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
This commit is contained in:
parent
11dd35daaa
commit
e94987db2e
|
@ -13,6 +13,7 @@
|
|||
#include <linux/dlm.h>
|
||||
#include <linux/sched.h>
|
||||
#include "md.h"
|
||||
#include "bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
|
||||
#define LVB_SIZE 64
|
||||
|
@ -49,6 +50,8 @@ struct md_cluster_info {
|
|||
struct dlm_lock_resource *bitmap_lockres;
|
||||
struct list_head suspend_list;
|
||||
spinlock_t suspend_lock;
|
||||
struct md_thread *recovery_thread;
|
||||
unsigned long recovery_map;
|
||||
};
|
||||
|
||||
static void sync_ast(void *arg)
|
||||
|
@ -184,6 +187,50 @@ out:
|
|||
return s;
|
||||
}
|
||||
|
||||
void recover_bitmaps(struct md_thread *thread)
|
||||
{
|
||||
struct mddev *mddev = thread->mddev;
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
struct dlm_lock_resource *bm_lockres;
|
||||
char str[64];
|
||||
int slot, ret;
|
||||
struct suspend_info *s, *tmp;
|
||||
sector_t lo, hi;
|
||||
|
||||
while (cinfo->recovery_map) {
|
||||
slot = fls64((u64)cinfo->recovery_map) - 1;
|
||||
|
||||
/* Clear suspend_area associated with the bitmap */
|
||||
spin_lock_irq(&cinfo->suspend_lock);
|
||||
list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
|
||||
if (slot == s->slot) {
|
||||
list_del(&s->list);
|
||||
kfree(s);
|
||||
}
|
||||
spin_unlock_irq(&cinfo->suspend_lock);
|
||||
|
||||
snprintf(str, 64, "bitmap%04d", slot);
|
||||
bm_lockres = lockres_init(mddev, str, NULL, 1);
|
||||
if (!bm_lockres) {
|
||||
pr_err("md-cluster: Cannot initialize bitmaps\n");
|
||||
goto clear_bit;
|
||||
}
|
||||
|
||||
ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
|
||||
if (ret) {
|
||||
pr_err("md-cluster: Could not DLM lock %s: %d\n",
|
||||
str, ret);
|
||||
goto clear_bit;
|
||||
}
|
||||
ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi);
|
||||
if (ret)
|
||||
pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
|
||||
dlm_unlock_sync(bm_lockres);
|
||||
clear_bit:
|
||||
clear_bit(slot, &cinfo->recovery_map);
|
||||
}
|
||||
}
|
||||
|
||||
static void recover_prep(void *arg)
|
||||
{
|
||||
}
|
||||
|
@ -197,6 +244,16 @@ static void recover_slot(void *arg, struct dlm_slot *slot)
|
|||
mddev->bitmap_info.cluster_name,
|
||||
slot->nodeid, slot->slot,
|
||||
cinfo->slot_number);
|
||||
set_bit(slot->slot - 1, &cinfo->recovery_map);
|
||||
if (!cinfo->recovery_thread) {
|
||||
cinfo->recovery_thread = md_register_thread(recover_bitmaps,
|
||||
mddev, "recover");
|
||||
if (!cinfo->recovery_thread) {
|
||||
pr_warn("md-cluster: Could not create recovery thread\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
md_wakeup_thread(cinfo->recovery_thread);
|
||||
}
|
||||
|
||||
static void recover_done(void *arg, struct dlm_slot *slots,
|
||||
|
@ -338,6 +395,7 @@ static int leave(struct mddev *mddev)
|
|||
|
||||
if (!cinfo)
|
||||
return 0;
|
||||
md_unregister_thread(&cinfo->recovery_thread);
|
||||
lockres_free(cinfo->sb_lock);
|
||||
lockres_free(cinfo->bitmap_lockres);
|
||||
dlm_release_lockspace(cinfo->lockspace, 2);
|
||||
|
|
Loading…
Reference in New Issue