drbd: add missing spinlock to bitmap receive

During bitmap exchange, when using the RLE bitmap compression scheme,
we have a code path that can set the whole bitmap at once.

To avoid holding spin_lock_irq() for too long, we used to lock out other
bitmap modifications during bitmap exchange by other means, and then,
knowing we have exclusive access to the bitmap, modify it without
the spinlock, and with IRQs enabled.

Since we now allow local IO to continue, potentially setting additional
bits during the bitmap receive phase, this is no longer true, and we get
uncoordinated updates of bitmap members, causing bm_set to no longer
accurately reflect the total number of set bits.

To actually see this, you'd need to have a large bitmap, use RLE bitmap
compression, and have busy IO during sync handshake and bitmap exchange.

Fix this by taking the spin_lock_irq() in this code path as well, but
calling cond_resched_lock() after each page worth of bits processed.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
Lars Ellenberg 2011-06-03 21:18:13 +02:00 committed by Philipp Reisner
parent 0cfdd247d1
commit 829c608786
1 changed files with 19 additions and 15 deletions

View File

@ -112,9 +112,6 @@ struct drbd_bitmap {
struct task_struct *bm_task; struct task_struct *bm_task;
}; };
static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
unsigned long e, int val, const enum km_type km);
#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
{ {
@ -1256,7 +1253,7 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f
* expected to be called for only a few bits (e - s about BITS_PER_LONG). * expected to be called for only a few bits (e - s about BITS_PER_LONG).
* Must hold bitmap lock already. */ * Must hold bitmap lock already. */
static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
unsigned long e, int val, const enum km_type km) unsigned long e, int val)
{ {
struct drbd_bitmap *b = mdev->bitmap; struct drbd_bitmap *b = mdev->bitmap;
unsigned long *p_addr = NULL; unsigned long *p_addr = NULL;
@ -1274,14 +1271,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
if (page_nr != last_page_nr) { if (page_nr != last_page_nr) {
if (p_addr) if (p_addr)
__bm_unmap(p_addr, km); __bm_unmap(p_addr, KM_IRQ1);
if (c < 0) if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0) else if (c > 0)
bm_set_page_need_writeout(b->bm_pages[last_page_nr]); bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
changed_total += c; changed_total += c;
c = 0; c = 0;
p_addr = __bm_map_pidx(b, page_nr, km); p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
last_page_nr = page_nr; last_page_nr = page_nr;
} }
if (val) if (val)
@ -1290,7 +1287,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
} }
if (p_addr) if (p_addr)
__bm_unmap(p_addr, km); __bm_unmap(p_addr, KM_IRQ1);
if (c < 0) if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0) else if (c > 0)
@ -1318,7 +1315,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
bm_print_lock_info(mdev); bm_print_lock_info(mdev);
c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); c = __bm_change_bits_to(mdev, s, e, val);
spin_unlock_irqrestore(&b->bm_lock, flags); spin_unlock_irqrestore(&b->bm_lock, flags);
return c; return c;
@ -1343,16 +1340,17 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
{ {
int i; int i;
int bits; int bits;
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0); unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
for (i = first_word; i < last_word; i++) { for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]); bits = hweight_long(paddr[i]);
paddr[i] = ~0UL; paddr[i] = ~0UL;
b->bm_set += BITS_PER_LONG - bits; b->bm_set += BITS_PER_LONG - bits;
} }
kunmap_atomic(paddr, KM_USER0); kunmap_atomic(paddr, KM_IRQ1);
} }
/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. /* Same thing as drbd_bm_set_bits,
* but more efficient for a large bit range.
* You must first drbd_bm_lock(). * You must first drbd_bm_lock().
* Can be called to set the whole bitmap in one go. * Can be called to set the whole bitmap in one go.
* Sets bits from s to e _inclusive_. */ * Sets bits from s to e _inclusive_. */
@ -1366,6 +1364,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* Do not use memset, because we must account for changes, * Do not use memset, because we must account for changes,
* so we need to loop over the words with hweight() anyways. * so we need to loop over the words with hweight() anyways.
*/ */
struct drbd_bitmap *b = mdev->bitmap;
unsigned long sl = ALIGN(s,BITS_PER_LONG); unsigned long sl = ALIGN(s,BITS_PER_LONG);
unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
int first_page; int first_page;
@ -1376,15 +1375,19 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
if (e - s <= 3*BITS_PER_LONG) { if (e - s <= 3*BITS_PER_LONG) {
/* don't bother; el and sl may even be wrong. */ /* don't bother; el and sl may even be wrong. */
__bm_change_bits_to(mdev, s, e, 1, KM_USER0); spin_lock_irq(&b->bm_lock);
__bm_change_bits_to(mdev, s, e, 1);
spin_unlock_irq(&b->bm_lock);
return; return;
} }
/* difference is large enough that we can trust sl and el */ /* difference is large enough that we can trust sl and el */
spin_lock_irq(&b->bm_lock);
/* bits filling the current long */ /* bits filling the current long */
if (sl) if (sl)
__bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0); __bm_change_bits_to(mdev, s, sl-1, 1);
first_page = sl >> (3 + PAGE_SHIFT); first_page = sl >> (3 + PAGE_SHIFT);
last_page = el >> (3 + PAGE_SHIFT); last_page = el >> (3 + PAGE_SHIFT);
@ -1397,7 +1400,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
/* first and full pages, unless first page == last page */ /* first and full pages, unless first page == last page */
for (page_nr = first_page; page_nr < last_page; page_nr++) { for (page_nr = first_page; page_nr < last_page; page_nr++) {
bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word); bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
cond_resched(); cond_resched_lock(&b->bm_lock);
first_word = 0; first_word = 0;
} }
@ -1411,7 +1414,8 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* it would trigger an assert in __bm_change_bits_to() * it would trigger an assert in __bm_change_bits_to()
*/ */
if (el <= e) if (el <= e)
__bm_change_bits_to(mdev, el, e, 1, KM_USER0); __bm_change_bits_to(mdev, el, e, 1);
spin_unlock_irq(&b->bm_lock);
} }
/* returns bit state /* returns bit state