Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6
This commit is contained in:
commit
986836503e
|
@ -1,5 +1,7 @@
|
|||
drbd-y := drbd_bitmap.o drbd_proc.o
|
||||
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
|
||||
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
|
||||
drbd-y += drbd_interval.o drbd_state.o
|
||||
drbd-y += drbd_nla.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
|
|||
if (!__ratelimit(&drbd_ratelimit_state))
|
||||
return;
|
||||
dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
|
||||
current == mdev->receiver.task ? "receiver" :
|
||||
current == mdev->asender.task ? "asender" :
|
||||
current == mdev->worker.task ? "worker" : current->comm,
|
||||
func, b->bm_why ?: "?",
|
||||
b->bm_task == mdev->receiver.task ? "receiver" :
|
||||
b->bm_task == mdev->asender.task ? "asender" :
|
||||
b->bm_task == mdev->worker.task ? "worker" : "?");
|
||||
drbd_task_to_thread_name(mdev->tconn, current),
|
||||
func, b->bm_why ?: "?",
|
||||
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
|
||||
}
|
||||
|
||||
void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
|
||||
|
@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
|
|||
|
||||
if (trylock_failed) {
|
||||
dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
|
||||
current == mdev->receiver.task ? "receiver" :
|
||||
current == mdev->asender.task ? "asender" :
|
||||
current == mdev->worker.task ? "worker" : current->comm,
|
||||
why, b->bm_why ?: "?",
|
||||
b->bm_task == mdev->receiver.task ? "receiver" :
|
||||
b->bm_task == mdev->asender.task ? "asender" :
|
||||
b->bm_task == mdev->worker.task ? "worker" : "?");
|
||||
drbd_task_to_thread_name(mdev->tconn, current),
|
||||
why, b->bm_why ?: "?",
|
||||
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
|
||||
mutex_lock(&b->bm_change);
|
||||
}
|
||||
if (BM_LOCKED_MASK & b->bm_flags)
|
||||
|
@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
|
|||
/* to mark for lazy writeout once syncer cleared all clearable bits,
|
||||
* we if bits have been cleared since last IO. */
|
||||
#define BM_PAGE_LAZY_WRITEOUT 28
|
||||
/* pages marked with this "HINT" will be considered for writeout
|
||||
* on activity log transactions */
|
||||
#define BM_PAGE_HINT_WRITEOUT 27
|
||||
|
||||
/* store_page_idx uses non-atomic assignment. It is only used directly after
|
||||
* allocating the page. All other bm_set_page_* and bm_clear_page_* need to
|
||||
|
@ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
|
|||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
void *addr = &page_private(b->bm_pages[page_nr]);
|
||||
clear_bit(BM_PAGE_IO_LOCK, addr);
|
||||
smp_mb__after_clear_bit();
|
||||
clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
|
||||
wake_up(&mdev->bitmap->bm_io_wait);
|
||||
}
|
||||
|
||||
|
@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page)
|
|||
set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
|
||||
* @mdev: DRBD device.
|
||||
* @page_nr: the bitmap page to mark with the "hint" flag
|
||||
*
|
||||
* From within an activity log transaction, we mark a few pages with these
|
||||
* hints, then call drbd_bm_write_hinted(), which will only write out changed
|
||||
* pages which are flagged with this mark.
|
||||
*/
|
||||
void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
|
||||
{
|
||||
struct page *page;
|
||||
if (page_nr >= mdev->bitmap->bm_number_of_pages) {
|
||||
dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
|
||||
page_nr, (int)mdev->bitmap->bm_number_of_pages);
|
||||
return;
|
||||
}
|
||||
page = mdev->bitmap->bm_pages[page_nr];
|
||||
set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
|
||||
}
|
||||
|
||||
static int bm_test_page_unchanged(struct page *page)
|
||||
{
|
||||
volatile const unsigned long *addr = &page_private(page);
|
||||
|
@ -376,7 +391,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
|
|||
* GFP_NOIO, as this is called while drbd IO is "suspended",
|
||||
* and during resize or attach on diskless Primary,
|
||||
* we must not block on IO to ourselves.
|
||||
* Context is receiver thread or cqueue thread/dmsetup. */
|
||||
* Context is receiver thread or dmsetup. */
|
||||
bytes = sizeof(struct page *)*want;
|
||||
new_pages = kzalloc(bytes, GFP_NOIO);
|
||||
if (!new_pages) {
|
||||
|
@ -441,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev)
|
|||
|
||||
sector_t drbd_bm_capacity(struct drbd_conf *mdev)
|
||||
{
|
||||
ERR_IF(!mdev->bitmap) return 0;
|
||||
if (!expect(mdev->bitmap))
|
||||
return 0;
|
||||
return mdev->bitmap->bm_dev_capacity;
|
||||
}
|
||||
|
||||
|
@ -449,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev)
|
|||
*/
|
||||
void drbd_bm_cleanup(struct drbd_conf *mdev)
|
||||
{
|
||||
ERR_IF (!mdev->bitmap) return;
|
||||
if (!expect(mdev->bitmap))
|
||||
return;
|
||||
bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
|
||||
bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
|
||||
kfree(mdev->bitmap);
|
||||
|
@ -612,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
|
|||
int err = 0, growing;
|
||||
int opages_vmalloced;
|
||||
|
||||
ERR_IF(!b) return -ENOMEM;
|
||||
if (!expect(b))
|
||||
return -ENOMEM;
|
||||
|
||||
drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
|
||||
|
||||
|
@ -734,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
|
|||
unsigned long s;
|
||||
unsigned long flags;
|
||||
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
s = b->bm_set;
|
||||
|
@ -758,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
|
|||
size_t drbd_bm_words(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
return b->bm_words;
|
||||
}
|
||||
|
@ -767,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev)
|
|||
unsigned long drbd_bm_bits(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
|
||||
return b->bm_bits;
|
||||
}
|
||||
|
@ -788,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
|
|||
|
||||
end = offset + number;
|
||||
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
if (number == 0)
|
||||
return;
|
||||
WARN_ON(offset >= b->bm_words);
|
||||
|
@ -833,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
|
|||
|
||||
end = offset + number;
|
||||
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
if ((offset >= b->bm_words) ||
|
||||
|
@ -862,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
|
|||
void drbd_bm_set_all(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
bm_memset(b, 0, 0xff, b->bm_words);
|
||||
|
@ -876,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
|
|||
void drbd_bm_clear_all(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
bm_memset(b, 0, 0, b->bm_words);
|
||||
|
@ -891,7 +922,8 @@ struct bm_aio_ctx {
|
|||
unsigned int done;
|
||||
unsigned flags;
|
||||
#define BM_AIO_COPY_PAGES 1
|
||||
#define BM_WRITE_ALL_PAGES 2
|
||||
#define BM_AIO_WRITE_HINTED 2
|
||||
#define BM_WRITE_ALL_PAGES 4
|
||||
int error;
|
||||
struct kref kref;
|
||||
};
|
||||
|
@ -1062,6 +1094,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
|
|||
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
|
||||
break;
|
||||
if (rw & WRITE) {
|
||||
if ((flags & BM_AIO_WRITE_HINTED) &&
|
||||
!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
|
||||
&page_private(b->bm_pages[i])))
|
||||
continue;
|
||||
|
||||
if (!(flags & BM_WRITE_ALL_PAGES) &&
|
||||
bm_test_page_unchanged(b->bm_pages[i])) {
|
||||
dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
|
||||
|
@ -1094,9 +1131,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
|
|||
else
|
||||
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
|
||||
|
||||
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
|
||||
rw == WRITE ? "WRITE" : "READ",
|
||||
count, jiffies - now);
|
||||
/* summary for global bitmap IO */
|
||||
if (flags == 0)
|
||||
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
|
||||
rw == WRITE ? "WRITE" : "READ",
|
||||
count, jiffies - now);
|
||||
|
||||
if (ctx->error) {
|
||||
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
|
||||
|
@ -1117,8 +1156,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
|
|||
}
|
||||
now = b->bm_set;
|
||||
|
||||
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
|
||||
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
|
||||
if (flags == 0)
|
||||
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
|
||||
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
|
||||
|
||||
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
|
||||
return err;
|
||||
|
@ -1181,9 +1221,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
|
|||
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
|
||||
* @mdev: DRBD device.
|
||||
*/
|
||||
int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
|
||||
{
|
||||
return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
|
||||
* drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
|
||||
* @mdev: DRBD device.
|
||||
* @idx: bitmap page index
|
||||
*
|
||||
|
@ -1291,8 +1339,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
|
|||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
unsigned long i = DRBD_END_OF_BITMAP;
|
||||
|
||||
ERR_IF(!b) return i;
|
||||
ERR_IF(!b->bm_pages) return i;
|
||||
if (!expect(b))
|
||||
return i;
|
||||
if (!expect(b->bm_pages))
|
||||
return i;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1393,8 +1443,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
|
|||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
int c = 0;
|
||||
|
||||
ERR_IF(!b) return 1;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 1;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
|
||||
|
@ -1425,13 +1477,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
|
|||
{
|
||||
int i;
|
||||
int bits;
|
||||
int changed = 0;
|
||||
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
|
||||
for (i = first_word; i < last_word; i++) {
|
||||
bits = hweight_long(paddr[i]);
|
||||
paddr[i] = ~0UL;
|
||||
b->bm_set += BITS_PER_LONG - bits;
|
||||
changed += BITS_PER_LONG - bits;
|
||||
}
|
||||
kunmap_atomic(paddr);
|
||||
if (changed) {
|
||||
/* We only need lazy writeout, the information is still in the
|
||||
* remote bitmap as well, and is reconstructed during the next
|
||||
* bitmap exchange, if lost locally due to a crash. */
|
||||
bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
|
||||
b->bm_set += changed;
|
||||
}
|
||||
}
|
||||
|
||||
/* Same thing as drbd_bm_set_bits,
|
||||
|
@ -1526,8 +1586,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
|
|||
unsigned long *p_addr;
|
||||
int i;
|
||||
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1561,8 +1623,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
|
|||
* robust in case we screwed up elsewhere, in that case pretend there
|
||||
* was one dirty bit in the requested area, so we won't try to do a
|
||||
* local read there (no bitmap probably implies no disk) */
|
||||
ERR_IF(!b) return 1;
|
||||
ERR_IF(!b->bm_pages) return 1;
|
||||
if (!expect(b))
|
||||
return 1;
|
||||
if (!expect(b->bm_pages))
|
||||
return 1;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1575,11 +1639,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
|
|||
bm_unmap(p_addr);
|
||||
p_addr = bm_map_pidx(b, idx);
|
||||
}
|
||||
ERR_IF (bitnr >= b->bm_bits) {
|
||||
dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
|
||||
} else {
|
||||
if (expect(bitnr < b->bm_bits))
|
||||
c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
|
||||
}
|
||||
else
|
||||
dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
|
||||
}
|
||||
if (p_addr)
|
||||
bm_unmap(p_addr);
|
||||
|
@ -1609,8 +1672,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
|
|||
unsigned long flags;
|
||||
unsigned long *p_addr, *bm;
|
||||
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1632,47 +1697,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
|
|||
spin_unlock_irqrestore(&b->bm_lock, flags);
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Set all bits covered by the AL-extent al_enr.
|
||||
* Returns number of bits changed. */
|
||||
unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
unsigned long *p_addr, *bm;
|
||||
unsigned long weight;
|
||||
unsigned long s, e;
|
||||
int count, i, do_now;
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
if (BM_DONT_SET & b->bm_flags)
|
||||
bm_print_lock_info(mdev);
|
||||
weight = b->bm_set;
|
||||
|
||||
s = al_enr * BM_WORDS_PER_AL_EXT;
|
||||
e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
|
||||
/* assert that s and e are on the same page */
|
||||
D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
|
||||
== s >> (PAGE_SHIFT - LN2_BPL + 3));
|
||||
count = 0;
|
||||
if (s < b->bm_words) {
|
||||
i = do_now = e-s;
|
||||
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
|
||||
bm = p_addr + MLPP(s);
|
||||
while (i--) {
|
||||
count += hweight_long(*bm);
|
||||
*bm = -1UL;
|
||||
bm++;
|
||||
}
|
||||
bm_unmap(p_addr);
|
||||
b->bm_set += do_now*BITS_PER_LONG - count;
|
||||
if (e == b->bm_words)
|
||||
b->bm_set -= bm_clear_surplus(b);
|
||||
} else {
|
||||
dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
|
||||
}
|
||||
weight = b->bm_set - weight;
|
||||
spin_unlock_irq(&b->bm_lock);
|
||||
return weight;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,207 @@
|
|||
#include <asm/bug.h>
|
||||
#include <linux/rbtree_augmented.h>
|
||||
#include "drbd_interval.h"
|
||||
|
||||
/**
|
||||
* interval_end - return end of @node
|
||||
*/
|
||||
static inline
|
||||
sector_t interval_end(struct rb_node *node)
|
||||
{
|
||||
struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
|
||||
return this->end;
|
||||
}
|
||||
|
||||
/**
|
||||
* compute_subtree_last - compute end of @node
|
||||
*
|
||||
* The end of an interval is the highest (start + (size >> 9)) value of this
|
||||
* node and of its children. Called for @node and its parents whenever the end
|
||||
* may have changed.
|
||||
*/
|
||||
static inline sector_t
|
||||
compute_subtree_last(struct drbd_interval *node)
|
||||
{
|
||||
sector_t max = node->sector + (node->size >> 9);
|
||||
|
||||
if (node->rb.rb_left) {
|
||||
sector_t left = interval_end(node->rb.rb_left);
|
||||
if (left > max)
|
||||
max = left;
|
||||
}
|
||||
if (node->rb.rb_right) {
|
||||
sector_t right = interval_end(node->rb.rb_right);
|
||||
if (right > max)
|
||||
max = right;
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
|
||||
{
|
||||
while (rb != stop) {
|
||||
struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb);
|
||||
sector_t subtree_last = compute_subtree_last(node);
|
||||
if (node->end == subtree_last)
|
||||
break;
|
||||
node->end = subtree_last;
|
||||
rb = rb_parent(&node->rb);
|
||||
}
|
||||
}
|
||||
|
||||
static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
|
||||
{
|
||||
struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
|
||||
struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
|
||||
|
||||
new->end = old->end;
|
||||
}
|
||||
|
||||
static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
|
||||
{
|
||||
struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
|
||||
struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
|
||||
|
||||
new->end = old->end;
|
||||
old->end = compute_subtree_last(old);
|
||||
}
|
||||
|
||||
static const struct rb_augment_callbacks augment_callbacks = {
|
||||
augment_propagate,
|
||||
augment_copy,
|
||||
augment_rotate,
|
||||
};
|
||||
|
||||
/**
|
||||
* drbd_insert_interval - insert a new interval into a tree
|
||||
*/
|
||||
bool
|
||||
drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
{
|
||||
struct rb_node **new = &root->rb_node, *parent = NULL;
|
||||
|
||||
BUG_ON(!IS_ALIGNED(this->size, 512));
|
||||
|
||||
while (*new) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(*new, struct drbd_interval, rb);
|
||||
|
||||
parent = *new;
|
||||
if (this->sector < here->sector)
|
||||
new = &(*new)->rb_left;
|
||||
else if (this->sector > here->sector)
|
||||
new = &(*new)->rb_right;
|
||||
else if (this < here)
|
||||
new = &(*new)->rb_left;
|
||||
else if (this > here)
|
||||
new = &(*new)->rb_right;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
rb_link_node(&this->rb, parent, new);
|
||||
rb_insert_augmented(&this->rb, root, &augment_callbacks);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_contains_interval - check if a tree contains a given interval
|
||||
* @sector: start sector of @interval
|
||||
* @interval: may not be a valid pointer
|
||||
*
|
||||
* Returns if the tree contains the node @interval with start sector @start.
|
||||
* Does not dereference @interval until @interval is known to be a valid object
|
||||
* in @tree. Returns %false if @interval is in the tree but with a different
|
||||
* sector number.
|
||||
*/
|
||||
bool
|
||||
drbd_contains_interval(struct rb_root *root, sector_t sector,
|
||||
struct drbd_interval *interval)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
|
||||
while (node) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(node, struct drbd_interval, rb);
|
||||
|
||||
if (sector < here->sector)
|
||||
node = node->rb_left;
|
||||
else if (sector > here->sector)
|
||||
node = node->rb_right;
|
||||
else if (interval < here)
|
||||
node = node->rb_left;
|
||||
else if (interval > here)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_remove_interval - remove an interval from a tree
|
||||
*/
|
||||
void
|
||||
drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
{
|
||||
rb_erase_augmented(&this->rb, root, &augment_callbacks);
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_find_overlap - search for an interval overlapping with [sector, sector + size)
|
||||
* @sector: start sector
|
||||
* @size: size, aligned to 512 bytes
|
||||
*
|
||||
* Returns an interval overlapping with [sector, sector + size), or NULL if
|
||||
* there is none. When there is more than one overlapping interval in the
|
||||
* tree, the interval with the lowest start sector is returned, and all other
|
||||
* overlapping intervals will be on the right side of the tree, reachable with
|
||||
* rb_next().
|
||||
*/
|
||||
struct drbd_interval *
|
||||
drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
struct drbd_interval *overlap = NULL;
|
||||
sector_t end = sector + (size >> 9);
|
||||
|
||||
BUG_ON(!IS_ALIGNED(size, 512));
|
||||
|
||||
while (node) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(node, struct drbd_interval, rb);
|
||||
|
||||
if (node->rb_left &&
|
||||
sector < interval_end(node->rb_left)) {
|
||||
/* Overlap if any must be on left side */
|
||||
node = node->rb_left;
|
||||
} else if (here->sector < end &&
|
||||
sector < here->sector + (here->size >> 9)) {
|
||||
overlap = here;
|
||||
break;
|
||||
} else if (sector >= here->sector) {
|
||||
/* Overlap if any must be on right side */
|
||||
node = node->rb_right;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
return overlap;
|
||||
}
|
||||
|
||||
struct drbd_interval *
|
||||
drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
|
||||
{
|
||||
sector_t end = sector + (size >> 9);
|
||||
struct rb_node *node;
|
||||
|
||||
for (;;) {
|
||||
node = rb_next(&i->rb);
|
||||
if (!node)
|
||||
return NULL;
|
||||
i = rb_entry(node, struct drbd_interval, rb);
|
||||
if (i->sector >= end)
|
||||
return NULL;
|
||||
if (sector < i->sector + (i->size >> 9))
|
||||
return i;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef __DRBD_INTERVAL_H
|
||||
#define __DRBD_INTERVAL_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
struct drbd_interval {
|
||||
struct rb_node rb;
|
||||
sector_t sector; /* start sector of the interval */
|
||||
unsigned int size; /* size in bytes */
|
||||
sector_t end; /* highest interval end in subtree */
|
||||
int local:1 /* local or remote request? */;
|
||||
int waiting:1;
|
||||
};
|
||||
|
||||
static inline void drbd_clear_interval(struct drbd_interval *i)
|
||||
{
|
||||
RB_CLEAR_NODE(&i->rb);
|
||||
}
|
||||
|
||||
static inline bool drbd_interval_empty(struct drbd_interval *i)
|
||||
{
|
||||
return RB_EMPTY_NODE(&i->rb);
|
||||
}
|
||||
|
||||
extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *);
|
||||
extern bool drbd_contains_interval(struct rb_root *, sector_t,
|
||||
struct drbd_interval *);
|
||||
extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *);
|
||||
extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t,
|
||||
unsigned int);
|
||||
extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t,
|
||||
unsigned int);
|
||||
|
||||
#define drbd_for_each_overlap(i, root, sector, size) \
|
||||
for (i = drbd_find_overlap(root, sector, size); \
|
||||
i; \
|
||||
i = drbd_next_overlap(i, sector, size))
|
||||
|
||||
#endif /* __DRBD_INTERVAL_H */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,55 @@
|
|||
#include "drbd_wrappers.h"
|
||||
#include <linux/kernel.h>
|
||||
#include <net/netlink.h>
|
||||
#include <linux/drbd_genl_api.h>
|
||||
#include "drbd_nla.h"
|
||||
|
||||
static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
|
||||
{
|
||||
struct nlattr *head = nla_data(nla);
|
||||
int len = nla_len(nla);
|
||||
int rem;
|
||||
|
||||
/*
|
||||
* validate_nla (called from nla_parse_nested) ignores attributes
|
||||
* beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
|
||||
* In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
|
||||
* flag set also, check and remove that flag before calling
|
||||
* nla_parse_nested.
|
||||
*/
|
||||
|
||||
nla_for_each_attr(nla, head, len, rem) {
|
||||
if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
|
||||
nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
|
||||
if (nla_type(nla) > maxtype)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (!err)
|
||||
err = nla_parse_nested(tb, maxtype, nla, policy);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
|
||||
{
|
||||
int err;
|
||||
/*
|
||||
* If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
|
||||
* we don't know about that attribute, reject all the nested
|
||||
* attributes.
|
||||
*/
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
return nla_find_nested(nla, attrtype);
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
#ifndef __DRBD_NLA_H
|
||||
#define __DRBD_NLA_H
|
||||
|
||||
extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy);
|
||||
extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
|
||||
|
||||
#endif /* __DRBD_NLA_H */
|
|
@ -171,7 +171,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
|
|||
if (mdev->state.conn == C_VERIFY_S ||
|
||||
mdev->state.conn == C_VERIFY_T) {
|
||||
bit_pos = bm_bits - mdev->ov_left;
|
||||
if (mdev->agreed_pro_version >= 97)
|
||||
if (verify_can_do_stop_sector(mdev))
|
||||
stop_sector = mdev->ov_stop_sector;
|
||||
} else
|
||||
bit_pos = mdev->bm_resync_fo;
|
||||
|
@ -200,9 +200,11 @@ static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
|
|||
|
||||
static int drbd_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
int i, hole = 0;
|
||||
int i, prev_i = -1;
|
||||
const char *sn;
|
||||
struct drbd_conf *mdev;
|
||||
struct net_conf *nc;
|
||||
char wp;
|
||||
|
||||
static char write_ordering_chars[] = {
|
||||
[WO_none] = 'n',
|
||||
|
@ -233,16 +235,11 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
oos .. known out-of-sync kB
|
||||
*/
|
||||
|
||||
for (i = 0; i < minor_count; i++) {
|
||||
mdev = minor_to_mdev(i);
|
||||
if (!mdev) {
|
||||
hole = 1;
|
||||
continue;
|
||||
}
|
||||
if (hole) {
|
||||
hole = 0;
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&minors, mdev, i) {
|
||||
if (prev_i != i - 1)
|
||||
seq_printf(seq, "\n");
|
||||
}
|
||||
prev_i = i;
|
||||
|
||||
sn = drbd_conn_str(mdev->state.conn);
|
||||
|
||||
|
@ -254,6 +251,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
/* reset mdev->congestion_reason */
|
||||
bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
|
||||
|
||||
nc = rcu_dereference(mdev->tconn->net_conf);
|
||||
wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
|
||||
seq_printf(seq,
|
||||
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
|
||||
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
|
||||
|
@ -263,14 +262,13 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
drbd_role_str(mdev->state.peer),
|
||||
drbd_disk_str(mdev->state.disk),
|
||||
drbd_disk_str(mdev->state.pdsk),
|
||||
(mdev->net_conf == NULL ? ' ' :
|
||||
(mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
|
||||
is_susp(mdev->state) ? 's' : 'r',
|
||||
wp,
|
||||
drbd_suspended(mdev) ? 's' : 'r',
|
||||
mdev->state.aftr_isp ? 'a' : '-',
|
||||
mdev->state.peer_isp ? 'p' : '-',
|
||||
mdev->state.user_isp ? 'u' : '-',
|
||||
mdev->congestion_reason ?: '-',
|
||||
drbd_test_flag(mdev, AL_SUSPENDED) ? 's' : '-',
|
||||
test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
|
||||
mdev->send_cnt/2,
|
||||
mdev->recv_cnt/2,
|
||||
mdev->writ_cnt/2,
|
||||
|
@ -282,8 +280,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
atomic_read(&mdev->rs_pending_cnt),
|
||||
atomic_read(&mdev->unacked_cnt),
|
||||
atomic_read(&mdev->ap_bio_cnt),
|
||||
mdev->epochs,
|
||||
write_ordering_chars[mdev->write_ordering]
|
||||
mdev->tconn->epochs,
|
||||
write_ordering_chars[mdev->tconn->write_ordering]
|
||||
);
|
||||
seq_printf(seq, " oos:%llu\n",
|
||||
Bit2KB((unsigned long long)
|
||||
|
@ -308,6 +306,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
}
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -77,40 +77,41 @@
|
|||
*/
|
||||
|
||||
enum drbd_req_event {
|
||||
created,
|
||||
to_be_send,
|
||||
to_be_submitted,
|
||||
CREATED,
|
||||
TO_BE_SENT,
|
||||
TO_BE_SUBMITTED,
|
||||
|
||||
/* XXX yes, now I am inconsistent...
|
||||
* these are not "events" but "actions"
|
||||
* oh, well... */
|
||||
queue_for_net_write,
|
||||
queue_for_net_read,
|
||||
queue_for_send_oos,
|
||||
QUEUE_FOR_NET_WRITE,
|
||||
QUEUE_FOR_NET_READ,
|
||||
QUEUE_FOR_SEND_OOS,
|
||||
|
||||
send_canceled,
|
||||
send_failed,
|
||||
handed_over_to_network,
|
||||
oos_handed_to_network,
|
||||
connection_lost_while_pending,
|
||||
read_retry_remote_canceled,
|
||||
recv_acked_by_peer,
|
||||
write_acked_by_peer,
|
||||
write_acked_by_peer_and_sis, /* and set_in_sync */
|
||||
conflict_discarded_by_peer,
|
||||
neg_acked,
|
||||
barrier_acked, /* in protocol A and B */
|
||||
data_received, /* (remote read) */
|
||||
SEND_CANCELED,
|
||||
SEND_FAILED,
|
||||
HANDED_OVER_TO_NETWORK,
|
||||
OOS_HANDED_TO_NETWORK,
|
||||
CONNECTION_LOST_WHILE_PENDING,
|
||||
READ_RETRY_REMOTE_CANCELED,
|
||||
RECV_ACKED_BY_PEER,
|
||||
WRITE_ACKED_BY_PEER,
|
||||
WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
|
||||
CONFLICT_RESOLVED,
|
||||
POSTPONE_WRITE,
|
||||
NEG_ACKED,
|
||||
BARRIER_ACKED, /* in protocol A and B */
|
||||
DATA_RECEIVED, /* (remote read) */
|
||||
|
||||
read_completed_with_error,
|
||||
read_ahead_completed_with_error,
|
||||
write_completed_with_error,
|
||||
abort_disk_io,
|
||||
completed_ok,
|
||||
resend,
|
||||
fail_frozen_disk_io,
|
||||
restart_frozen_disk_io,
|
||||
nothing, /* for tracing only */
|
||||
READ_COMPLETED_WITH_ERROR,
|
||||
READ_AHEAD_COMPLETED_WITH_ERROR,
|
||||
WRITE_COMPLETED_WITH_ERROR,
|
||||
ABORT_DISK_IO,
|
||||
COMPLETED_OK,
|
||||
RESEND,
|
||||
FAIL_FROZEN_DISK_IO,
|
||||
RESTART_FROZEN_DISK_IO,
|
||||
NOTHING,
|
||||
};
|
||||
|
||||
/* encoding of request states for now. we don't actually need that many bits.
|
||||
|
@ -142,8 +143,8 @@ enum drbd_req_state_bits {
|
|||
* recv_ack (B) or implicit "ack" (A),
|
||||
* still waiting for the barrier ack.
|
||||
* master_bio may already be completed and invalidated.
|
||||
* 11100: write_acked (C),
|
||||
* data_received (for remote read, any protocol)
|
||||
* 11100: write acked (C),
|
||||
* data received (for remote read, any protocol)
|
||||
* or finally the barrier ack has arrived (B,A)...
|
||||
* request can be freed
|
||||
* 01100: neg-acked (write, protocol C)
|
||||
|
@ -198,6 +199,22 @@ enum drbd_req_state_bits {
|
|||
|
||||
/* Should call drbd_al_complete_io() for this request... */
|
||||
__RQ_IN_ACT_LOG,
|
||||
|
||||
/* The peer has sent a retry ACK */
|
||||
__RQ_POSTPONED,
|
||||
|
||||
/* would have been completed,
|
||||
* but was not, because of drbd_suspended() */
|
||||
__RQ_COMPLETION_SUSP,
|
||||
|
||||
/* We expect a receive ACK (wire proto B) */
|
||||
__RQ_EXP_RECEIVE_ACK,
|
||||
|
||||
/* We expect a write ACK (wite proto C) */
|
||||
__RQ_EXP_WRITE_ACK,
|
||||
|
||||
/* waiting for a barrier ack, did an extra kref_get */
|
||||
__RQ_EXP_BARR_ACK,
|
||||
};
|
||||
|
||||
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
|
||||
|
@ -219,56 +236,16 @@ enum drbd_req_state_bits {
|
|||
|
||||
#define RQ_WRITE (1UL << __RQ_WRITE)
|
||||
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
|
||||
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
|
||||
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
|
||||
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
|
||||
#define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK)
|
||||
#define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK)
|
||||
|
||||
/* For waking up the frozen transfer log mod_req() has to return if the request
|
||||
should be counted in the epoch object*/
|
||||
#define MR_WRITE_SHIFT 0
|
||||
#define MR_WRITE (1 << MR_WRITE_SHIFT)
|
||||
#define MR_READ_SHIFT 1
|
||||
#define MR_READ (1 << MR_READ_SHIFT)
|
||||
|
||||
/* epoch entries */
|
||||
static inline
|
||||
struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
||||
{
|
||||
BUG_ON(mdev->ee_hash_s == 0);
|
||||
return mdev->ee_hash +
|
||||
((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
|
||||
}
|
||||
|
||||
/* transfer log (drbd_request objects) */
|
||||
static inline
|
||||
struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
||||
{
|
||||
BUG_ON(mdev->tl_hash_s == 0);
|
||||
return mdev->tl_hash +
|
||||
((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
|
||||
}
|
||||
|
||||
/* application reads (drbd_request objects) */
|
||||
static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
||||
{
|
||||
return mdev->app_reads_hash
|
||||
+ ((unsigned int)(sector) % APP_R_HSIZE);
|
||||
}
|
||||
|
||||
/* when we receive the answer for a read request,
|
||||
* verify that we actually know about it */
|
||||
static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
|
||||
u64 id, sector_t sector)
|
||||
{
|
||||
struct hlist_head *slot = ar_hash_slot(mdev, sector);
|
||||
struct hlist_node *n;
|
||||
struct drbd_request *req;
|
||||
|
||||
hlist_for_each_entry(req, n, slot, collision) {
|
||||
if ((unsigned long)req == (unsigned long)id) {
|
||||
D_ASSERT(req->sector == sector);
|
||||
return req;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
#define MR_WRITE 1
|
||||
#define MR_READ 2
|
||||
|
||||
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
|
||||
{
|
||||
|
@ -278,41 +255,10 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi
|
|||
req->private_bio = bio;
|
||||
|
||||
bio->bi_private = req;
|
||||
bio->bi_end_io = drbd_endio_pri;
|
||||
bio->bi_end_io = drbd_request_endio;
|
||||
bio->bi_next = NULL;
|
||||
}
|
||||
|
||||
static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
|
||||
struct bio *bio_src)
|
||||
{
|
||||
struct drbd_request *req =
|
||||
mempool_alloc(drbd_request_mempool, GFP_NOIO);
|
||||
if (likely(req)) {
|
||||
drbd_req_make_private_bio(req, bio_src);
|
||||
|
||||
req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
|
||||
req->mdev = mdev;
|
||||
req->master_bio = bio_src;
|
||||
req->epoch = 0;
|
||||
req->sector = bio_src->bi_sector;
|
||||
req->size = bio_src->bi_size;
|
||||
INIT_HLIST_NODE(&req->collision);
|
||||
INIT_LIST_HEAD(&req->tl_requests);
|
||||
INIT_LIST_HEAD(&req->w.list);
|
||||
}
|
||||
return req;
|
||||
}
|
||||
|
||||
static inline void drbd_req_free(struct drbd_request *req)
|
||||
{
|
||||
mempool_free(req, drbd_request_mempool);
|
||||
}
|
||||
|
||||
static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
|
||||
{
|
||||
return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
|
||||
}
|
||||
|
||||
/* Short lived temporary struct on the stack.
|
||||
* We could squirrel the error to be returned into
|
||||
* bio->bi_size, or similar. But that would be too ugly. */
|
||||
|
@ -321,6 +267,7 @@ struct bio_and_error {
|
|||
int error;
|
||||
};
|
||||
|
||||
extern void drbd_req_destroy(struct kref *kref);
|
||||
extern void _req_may_be_done(struct drbd_request *req,
|
||||
struct bio_and_error *m);
|
||||
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
|
@ -328,13 +275,17 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||
extern void complete_master_bio(struct drbd_conf *mdev,
|
||||
struct bio_and_error *m);
|
||||
extern void request_timer_fn(unsigned long data);
|
||||
extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
|
||||
extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
|
||||
extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
|
||||
|
||||
/* this is in drbd_main.c */
|
||||
extern void drbd_restart_request(struct drbd_request *req);
|
||||
|
||||
/* use this if you don't want to deal with calling complete_master_bio()
|
||||
* outside the spinlock, e.g. when walking some list on cleanup. */
|
||||
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
|
||||
{
|
||||
struct drbd_conf *mdev = req->mdev;
|
||||
struct drbd_conf *mdev = req->w.mdev;
|
||||
struct bio_and_error m;
|
||||
int rv;
|
||||
|
||||
|
@ -354,13 +305,13 @@ static inline int req_mod(struct drbd_request *req,
|
|||
enum drbd_req_event what)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct drbd_conf *mdev = req->mdev;
|
||||
struct drbd_conf *mdev = req->w.mdev;
|
||||
struct bio_and_error m;
|
||||
int rv;
|
||||
|
||||
spin_lock_irqsave(&mdev->req_lock, flags);
|
||||
spin_lock_irqsave(&mdev->tconn->req_lock, flags);
|
||||
rv = __req_mod(req, what, &m);
|
||||
spin_unlock_irqrestore(&mdev->req_lock, flags);
|
||||
spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
|
||||
|
||||
if (m.bio)
|
||||
complete_master_bio(mdev, &m);
|
||||
|
@ -368,7 +319,7 @@ static inline int req_mod(struct drbd_request *req,
|
|||
return rv;
|
||||
}
|
||||
|
||||
static inline bool drbd_should_do_remote(union drbd_state s)
|
||||
static inline bool drbd_should_do_remote(union drbd_dev_state s)
|
||||
{
|
||||
return s.pdsk == D_UP_TO_DATE ||
|
||||
(s.pdsk >= D_INCONSISTENT &&
|
||||
|
@ -378,7 +329,7 @@ static inline bool drbd_should_do_remote(union drbd_state s)
|
|||
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
|
||||
states. */
|
||||
}
|
||||
static inline bool drbd_should_send_oos(union drbd_state s)
|
||||
static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
{
|
||||
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
|
||||
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,161 @@
|
|||
#ifndef DRBD_STATE_H
|
||||
#define DRBD_STATE_H
|
||||
|
||||
struct drbd_conf;
|
||||
struct drbd_tconn;
|
||||
|
||||
/**
|
||||
* DOC: DRBD State macros
|
||||
*
|
||||
* These macros are used to express state changes in easily readable form.
|
||||
*
|
||||
* The NS macros expand to a mask and a value, that can be bit ored onto the
|
||||
* current state as soon as the spinlock (req_lock) was taken.
|
||||
*
|
||||
* The _NS macros are used for state functions that get called with the
|
||||
* spinlock. These macros expand directly to the new state value.
|
||||
*
|
||||
* Besides the basic forms NS() and _NS() additional _?NS[23] are defined
|
||||
* to express state changes that affect more than one aspect of the state.
|
||||
*
|
||||
* E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
|
||||
* Means that the network connection was established and that the peer
|
||||
* is in secondary role.
|
||||
*/
|
||||
#define role_MASK R_MASK
|
||||
#define peer_MASK R_MASK
|
||||
#define disk_MASK D_MASK
|
||||
#define pdsk_MASK D_MASK
|
||||
#define conn_MASK C_MASK
|
||||
#define susp_MASK 1
|
||||
#define user_isp_MASK 1
|
||||
#define aftr_isp_MASK 1
|
||||
#define susp_nod_MASK 1
|
||||
#define susp_fen_MASK 1
|
||||
|
||||
#define NS(T, S) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T = (S); val; })
|
||||
#define NS2(T1, S1, T2, S2) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
|
||||
mask.T2 = T2##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
|
||||
val.T2 = (S2); val; })
|
||||
#define NS3(T1, S1, T2, S2, T3, S3) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
|
||||
mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
|
||||
val.T2 = (S2); val.T3 = (S3); val; })
|
||||
|
||||
#define _NS(D, T, S) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; })
|
||||
#define _NS2(D, T1, S1, T2, S2) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
|
||||
__ns.T2 = (S2); __ns; })
|
||||
#define _NS3(D, T1, S1, T2, S2, T3, S3) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
|
||||
__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
|
||||
|
||||
enum chg_state_flags {
|
||||
CS_HARD = 1 << 0,
|
||||
CS_VERBOSE = 1 << 1,
|
||||
CS_WAIT_COMPLETE = 1 << 2,
|
||||
CS_SERIALIZE = 1 << 3,
|
||||
CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
|
||||
CS_LOCAL_ONLY = 1 << 4, /* Do not consider a device pair wide state change */
|
||||
CS_DC_ROLE = 1 << 5, /* DC = display as connection state change */
|
||||
CS_DC_PEER = 1 << 6,
|
||||
CS_DC_CONN = 1 << 7,
|
||||
CS_DC_DISK = 1 << 8,
|
||||
CS_DC_PDSK = 1 << 9,
|
||||
CS_DC_SUSP = 1 << 10,
|
||||
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
|
||||
CS_IGN_OUTD_FAIL = 1 << 11,
|
||||
};
|
||||
|
||||
/* drbd_dev_state and drbd_state are different types. This is to stress the
|
||||
small difference. There is no suspended flag (.susp), and no suspended
|
||||
while fence handler runs flas (susp_fen). */
|
||||
union drbd_dev_state {
|
||||
struct {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned conn:5 ; /* 17/32 cstates */
|
||||
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned _unused:1 ;
|
||||
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
|
||||
unsigned peer_isp:1 ;
|
||||
unsigned user_isp:1 ;
|
||||
unsigned _pad:11; /* 0 unused */
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
unsigned _pad:11;
|
||||
unsigned user_isp:1 ;
|
||||
unsigned peer_isp:1 ;
|
||||
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
|
||||
unsigned _unused:1 ;
|
||||
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned conn:5 ; /* 17/32 cstates */
|
||||
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
|
||||
#else
|
||||
# error "this endianess is not supported"
|
||||
#endif
|
||||
};
|
||||
unsigned int i;
|
||||
};
|
||||
|
||||
extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
|
||||
enum chg_state_flags f,
|
||||
union drbd_state mask,
|
||||
union drbd_state val);
|
||||
extern void drbd_force_state(struct drbd_conf *, union drbd_state,
|
||||
union drbd_state);
|
||||
extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
|
||||
union drbd_state,
|
||||
union drbd_state,
|
||||
enum chg_state_flags);
|
||||
extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
|
||||
enum chg_state_flags,
|
||||
struct completion *done);
|
||||
extern void print_st_err(struct drbd_conf *, union drbd_state,
|
||||
union drbd_state, int);
|
||||
|
||||
enum drbd_state_rv
|
||||
_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
|
||||
enum chg_state_flags flags);
|
||||
|
||||
enum drbd_state_rv
|
||||
conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
|
||||
enum chg_state_flags flags);
|
||||
|
||||
extern void drbd_resume_al(struct drbd_conf *mdev);
|
||||
extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
|
||||
|
||||
/**
|
||||
* drbd_request_state() - Reqest a state change
|
||||
* @mdev: DRBD device.
|
||||
* @mask: mask of state bits to change.
|
||||
* @val: value of new state bits.
|
||||
*
|
||||
* This is the most graceful way of requesting a state change. It is verbose
|
||||
* quite verbose in case the state change is not possible, and all those
|
||||
* state changes are globally serialized.
|
||||
*/
|
||||
static inline int drbd_request_state(struct drbd_conf *mdev,
|
||||
union drbd_state mask,
|
||||
union drbd_state val)
|
||||
{
|
||||
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
|
||||
}
|
||||
|
||||
enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
|
||||
enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
|
||||
enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
|
||||
enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn);
|
||||
enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
|
||||
enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn);
|
||||
|
||||
#endif
|
|
@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
|
|||
[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
|
||||
[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
|
||||
[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
|
||||
[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
|
||||
};
|
||||
|
||||
const char *drbd_conn_str(enum drbd_conns s)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/mm.h>
|
||||
#include "drbd_int.h"
|
||||
|
||||
/* see get_sb_bdev and bd_claim */
|
||||
extern char *drbd_sec_holder;
|
||||
|
@ -20,8 +21,8 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
|
|||
|
||||
/* bi_end_io handlers */
|
||||
extern void drbd_md_io_complete(struct bio *bio, int error);
|
||||
extern void drbd_endio_sec(struct bio *bio, int error);
|
||||
extern void drbd_endio_pri(struct bio *bio, int error);
|
||||
extern void drbd_peer_request_endio(struct bio *bio, int error);
|
||||
extern void drbd_request_endio(struct bio *bio, int error);
|
||||
|
||||
/*
|
||||
* used to submit our private bio
|
||||
|
@ -45,12 +46,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
|
|||
generic_make_request(bio);
|
||||
}
|
||||
|
||||
static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
|
||||
{
|
||||
return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
|
||||
== CRYPTO_ALG_TYPE_HASH;
|
||||
}
|
||||
|
||||
#ifndef __CHECKER__
|
||||
# undef __cond_lock
|
||||
# define __cond_lock(x,c) (c)
|
||||
|
|
|
@ -51,12 +51,11 @@
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
extern const char *drbd_buildtag(void);
|
||||
#define REL_VERSION "8.3.14"
|
||||
#define API_VERSION 88
|
||||
#define REL_VERSION "8.4.2"
|
||||
#define API_VERSION 1
|
||||
#define PRO_VERSION_MIN 86
|
||||
#define PRO_VERSION_MAX 97
|
||||
#define PRO_VERSION_MAX 101
|
||||
|
||||
|
||||
enum drbd_io_error_p {
|
||||
|
@ -66,7 +65,8 @@ enum drbd_io_error_p {
|
|||
};
|
||||
|
||||
enum drbd_fencing_p {
|
||||
FP_DONT_CARE,
|
||||
FP_NOT_AVAIL = -1, /* Not a policy */
|
||||
FP_DONT_CARE = 0,
|
||||
FP_RESOURCE,
|
||||
FP_STONITH
|
||||
};
|
||||
|
@ -102,6 +102,20 @@ enum drbd_on_congestion {
|
|||
OC_DISCONNECT,
|
||||
};
|
||||
|
||||
enum drbd_read_balancing {
|
||||
RB_PREFER_LOCAL,
|
||||
RB_PREFER_REMOTE,
|
||||
RB_ROUND_ROBIN,
|
||||
RB_LEAST_PENDING,
|
||||
RB_CONGESTED_REMOTE,
|
||||
RB_32K_STRIPING,
|
||||
RB_64K_STRIPING,
|
||||
RB_128K_STRIPING,
|
||||
RB_256K_STRIPING,
|
||||
RB_512K_STRIPING,
|
||||
RB_1M_STRIPING,
|
||||
};
|
||||
|
||||
/* KEEP the order, do not delete or insert. Only append. */
|
||||
enum drbd_ret_code {
|
||||
ERR_CODE_BASE = 100,
|
||||
|
@ -122,7 +136,7 @@ enum drbd_ret_code {
|
|||
ERR_AUTH_ALG = 120,
|
||||
ERR_AUTH_ALG_ND = 121,
|
||||
ERR_NOMEM = 122,
|
||||
ERR_DISCARD = 123,
|
||||
ERR_DISCARD_IMPOSSIBLE = 123,
|
||||
ERR_DISK_CONFIGURED = 124,
|
||||
ERR_NET_CONFIGURED = 125,
|
||||
ERR_MANDATORY_TAG = 126,
|
||||
|
@ -130,8 +144,8 @@ enum drbd_ret_code {
|
|||
ERR_INTR = 129, /* EINTR */
|
||||
ERR_RESIZE_RESYNC = 130,
|
||||
ERR_NO_PRIMARY = 131,
|
||||
ERR_SYNC_AFTER = 132,
|
||||
ERR_SYNC_AFTER_CYCLE = 133,
|
||||
ERR_RESYNC_AFTER = 132,
|
||||
ERR_RESYNC_AFTER_CYCLE = 133,
|
||||
ERR_PAUSE_IS_SET = 134,
|
||||
ERR_PAUSE_IS_CLEAR = 135,
|
||||
ERR_PACKET_NR = 137,
|
||||
|
@ -155,6 +169,14 @@ enum drbd_ret_code {
|
|||
ERR_CONG_NOT_PROTO_A = 155,
|
||||
ERR_PIC_AFTER_DEP = 156,
|
||||
ERR_PIC_PEER_DEP = 157,
|
||||
ERR_RES_NOT_KNOWN = 158,
|
||||
ERR_RES_IN_USE = 159,
|
||||
ERR_MINOR_CONFIGURED = 160,
|
||||
ERR_MINOR_EXISTS = 161,
|
||||
ERR_INVALID_REQUEST = 162,
|
||||
ERR_NEED_APV_100 = 163,
|
||||
ERR_NEED_ALLOW_TWO_PRI = 164,
|
||||
ERR_MD_UNCLEAN = 165,
|
||||
|
||||
/* insert new ones above this line */
|
||||
AFTER_LAST_ERR_CODE
|
||||
|
@ -296,7 +318,8 @@ enum drbd_state_rv {
|
|||
SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */
|
||||
SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */
|
||||
SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */
|
||||
SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */
|
||||
SS_O_VOL_PEER_PRI = -20,
|
||||
SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */
|
||||
};
|
||||
|
||||
/* from drbd_strings.c */
|
||||
|
@ -313,7 +336,9 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
|
|||
#define MDF_FULL_SYNC (1 << 3)
|
||||
#define MDF_WAS_UP_TO_DATE (1 << 4)
|
||||
#define MDF_PEER_OUT_DATED (1 << 5)
|
||||
#define MDF_CRASHED_PRIMARY (1 << 6)
|
||||
#define MDF_CRASHED_PRIMARY (1 << 6)
|
||||
#define MDF_AL_CLEAN (1 << 7)
|
||||
#define MDF_AL_DISABLED (1 << 8)
|
||||
|
||||
enum drbd_uuid_index {
|
||||
UI_CURRENT,
|
||||
|
@ -333,37 +358,23 @@ enum drbd_timeout_flag {
|
|||
|
||||
#define UUID_JUST_CREATED ((__u64)4)
|
||||
|
||||
/* magic numbers used in meta data and network packets */
|
||||
#define DRBD_MAGIC 0x83740267
|
||||
#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
|
||||
#define DRBD_MAGIC_BIG 0x835a
|
||||
#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
|
||||
#define DRBD_MAGIC_100 0x8620ec20
|
||||
|
||||
#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3)
|
||||
#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4)
|
||||
#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5)
|
||||
|
||||
|
||||
/* how I came up with this magic?
|
||||
* base64 decode "actlog==" ;) */
|
||||
#define DRBD_AL_MAGIC 0x69cb65a2
|
||||
|
||||
/* these are of type "int" */
|
||||
#define DRBD_MD_INDEX_INTERNAL -1
|
||||
#define DRBD_MD_INDEX_FLEX_EXT -2
|
||||
#define DRBD_MD_INDEX_FLEX_INT -3
|
||||
|
||||
/* Start of the new netlink/connector stuff */
|
||||
|
||||
#define DRBD_NL_CREATE_DEVICE 0x01
|
||||
#define DRBD_NL_SET_DEFAULTS 0x02
|
||||
|
||||
|
||||
/* For searching a vacant cn_idx value */
|
||||
#define CN_IDX_STEP 6977
|
||||
|
||||
struct drbd_nl_cfg_req {
|
||||
int packet_type;
|
||||
unsigned int drbd_minor;
|
||||
int flags;
|
||||
unsigned short tag_list[];
|
||||
};
|
||||
|
||||
struct drbd_nl_cfg_reply {
|
||||
int packet_type;
|
||||
unsigned int minor;
|
||||
int ret_code; /* enum ret_code or set_st_err_t */
|
||||
unsigned short tag_list[]; /* only used with get_* calls */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,378 @@
|
|||
/*
|
||||
* General overview:
|
||||
* full generic netlink message:
|
||||
* |nlmsghdr|genlmsghdr|<payload>
|
||||
*
|
||||
* payload:
|
||||
* |optional fixed size family header|<sequence of netlink attributes>
|
||||
*
|
||||
* sequence of netlink attributes:
|
||||
* I chose to have all "top level" attributes NLA_NESTED,
|
||||
* corresponding to some real struct.
|
||||
* So we have a sequence of |tla, len|<nested nla sequence>
|
||||
*
|
||||
* nested nla sequence:
|
||||
* may be empty, or contain a sequence of netlink attributes
|
||||
* representing the struct fields.
|
||||
*
|
||||
* The tag number of any field (regardless of containing struct)
|
||||
* will be available as T_ ## field_name,
|
||||
* so you cannot have the same field name in two differnt structs.
|
||||
*
|
||||
* The tag numbers themselves are per struct, though,
|
||||
* so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
|
||||
* which we won't use here).
|
||||
* The tag numbers are used as index in the respective nla_policy array.
|
||||
*
|
||||
* GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
|
||||
* genl_magic_struct.h
|
||||
* generates the struct declaration,
|
||||
* generates an entry in the tla enum,
|
||||
* genl_magic_func.h
|
||||
* generates an entry in the static tla policy
|
||||
* with .type = NLA_NESTED
|
||||
* generates the static <struct_name>_nl_policy definition,
|
||||
* and static conversion functions
|
||||
*
|
||||
* genl_magic_func.h
|
||||
*
|
||||
* GENL_mc_group(group)
|
||||
* genl_magic_struct.h
|
||||
* does nothing
|
||||
* genl_magic_func.h
|
||||
* defines and registers the mcast group,
|
||||
* and provides a send helper
|
||||
*
|
||||
* GENL_notification(op_name, op_num, mcast_group, tla list)
|
||||
* These are notifications to userspace.
|
||||
*
|
||||
* genl_magic_struct.h
|
||||
* generates an entry in the genl_ops enum,
|
||||
* genl_magic_func.h
|
||||
* does nothing
|
||||
*
|
||||
* mcast group: the name of the mcast group this notification should be
|
||||
* expected on
|
||||
* tla list: the list of expected top level attributes,
|
||||
* for documentation and sanity checking.
|
||||
*
|
||||
* GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
|
||||
* These are requests from userspace.
|
||||
*
|
||||
* _op and _notification share the same "number space",
|
||||
* op_nr will be assigned to "genlmsghdr->cmd"
|
||||
*
|
||||
* genl_magic_struct.h
|
||||
* generates an entry in the genl_ops enum,
|
||||
* genl_magic_func.h
|
||||
* generates an entry in the static genl_ops array,
|
||||
* and static register/unregister functions to
|
||||
* genl_register_family_with_ops().
|
||||
*
|
||||
* flags and handler:
|
||||
* GENL_op_init( .doit = x, .dumpit = y, .flags = something)
|
||||
* GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
|
||||
* tla list: the list of expected top level attributes,
|
||||
* for documentation and sanity checking.
|
||||
*/
|
||||
|
||||
/*
|
||||
* STRUCTS
|
||||
*/
|
||||
|
||||
/* this is sent kernel -> userland on various error conditions, and contains
|
||||
* informational textual info, which is supposedly human readable.
|
||||
* The computer relevant return code is in the drbd_genlmsghdr.
|
||||
*/
|
||||
GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
|
||||
/* "arbitrary" size strings, nla_policy.len = 0 */
|
||||
__str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0)
|
||||
)
|
||||
|
||||
/* Configuration requests typically need a context to operate on.
|
||||
* Possible keys are device minor (fits in the drbd_genlmsghdr),
|
||||
* the replication link (aka connection) name,
|
||||
* and/or the replication group (aka resource) name,
|
||||
* and the volume id within the resource. */
|
||||
GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
|
||||
__u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume)
|
||||
__str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128)
|
||||
__bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128)
|
||||
__bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
|
||||
__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128)
|
||||
__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128)
|
||||
__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx)
|
||||
|
||||
/* use the resize command to try and change the disk_size */
|
||||
__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size)
|
||||
/* we could change the max_bio_bvecs,
|
||||
* but it won't propagate through the stack */
|
||||
__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs)
|
||||
|
||||
__u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF)
|
||||
__u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF)
|
||||
|
||||
__u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF)
|
||||
__s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF)
|
||||
__u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF)
|
||||
__u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
|
||||
__u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF)
|
||||
__u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF)
|
||||
__u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF)
|
||||
__u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF)
|
||||
|
||||
__flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF)
|
||||
__flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF)
|
||||
__flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF)
|
||||
__flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF)
|
||||
__u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
|
||||
__u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF)
|
||||
/* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
|
||||
__flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
|
||||
__str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32)
|
||||
__u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
|
||||
__str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
|
||||
shared_secret, SHARED_SECRET_MAX)
|
||||
__str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX)
|
||||
__str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX)
|
||||
__str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX)
|
||||
__str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX)
|
||||
__u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF)
|
||||
__u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF)
|
||||
__u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF)
|
||||
__u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF)
|
||||
__u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF)
|
||||
__u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
|
||||
__u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
|
||||
__u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF)
|
||||
__u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF)
|
||||
__u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
|
||||
__u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
|
||||
__u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF)
|
||||
__u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF)
|
||||
__u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF)
|
||||
__u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF)
|
||||
__u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF)
|
||||
__u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF)
|
||||
__u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF)
|
||||
__flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
|
||||
__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data)
|
||||
__flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF)
|
||||
__flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF)
|
||||
__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative)
|
||||
__flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF)
|
||||
/* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
|
||||
__u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size)
|
||||
__flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force)
|
||||
__flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
|
||||
/* the reason of the broadcast,
|
||||
* if this is an event triggered broadcast. */
|
||||
__u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason)
|
||||
__u32_field(2, DRBD_F_REQUIRED, current_state)
|
||||
__u64_field(3, DRBD_GENLA_F_MANDATORY, capacity)
|
||||
__u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid)
|
||||
|
||||
/* These are for broadcast from after state change work.
|
||||
* prev_state and new_state are from the moment the state change took
|
||||
* place, new_state is not neccessarily the same as current_state,
|
||||
* there may have been more state changes since. Which will be
|
||||
* broadcasted soon, in their respective after state change work. */
|
||||
__u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state)
|
||||
__u32_field(6, DRBD_GENLA_F_MANDATORY, new_state)
|
||||
|
||||
/* if we have a local disk: */
|
||||
__bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64)))
|
||||
__u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags)
|
||||
__u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total)
|
||||
__u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos)
|
||||
/* and in case resync or online verify is active */
|
||||
__u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total)
|
||||
__u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed)
|
||||
|
||||
/* for pre and post notifications of helper execution */
|
||||
__str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32)
|
||||
__u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code)
|
||||
|
||||
__u64_field(15, 0, send_cnt)
|
||||
__u64_field(16, 0, recv_cnt)
|
||||
__u64_field(17, 0, read_cnt)
|
||||
__u64_field(18, 0, writ_cnt)
|
||||
__u64_field(19, 0, al_writ_cnt)
|
||||
__u64_field(20, 0, bm_writ_cnt)
|
||||
__u32_field(21, 0, ap_bio_cnt)
|
||||
__u32_field(22, 0, ap_pending_cnt)
|
||||
__u32_field(23, 0, rs_pending_cnt)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
|
||||
__u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector)
|
||||
__u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
|
||||
__u32_field(1, DRBD_F_REQUIRED, timeout_type)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach)
|
||||
)
|
||||
|
||||
/*
|
||||
* Notifications and commands (genlmsghdr->cmd)
|
||||
*/
|
||||
GENL_mc_group(events)
|
||||
|
||||
/* kernel -> userspace announcement of changes */
|
||||
GENL_notification(
|
||||
DRBD_EVENT, 1, events,
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
/* query kernel for specific or all info */
|
||||
GENL_op(
|
||||
DRBD_ADM_GET_STATUS, 2,
|
||||
GENL_op_init(
|
||||
.doit = drbd_adm_get_status,
|
||||
.dumpit = drbd_adm_get_status_all,
|
||||
/* anyone may ask for the status,
|
||||
* it is broadcasted anyways */
|
||||
),
|
||||
/* To select the object .doit.
|
||||
* Or a subset of objects in .dumpit. */
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
/* add DRBD minor devices as volumes to resources */
|
||||
GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
|
||||
/* add or delete resources */
|
||||
GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
|
||||
GENL_doit(drbd_adm_resource_opts),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_CONNECT, 10,
|
||||
GENL_doit(drbd_adm_connect),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_CHG_NET_OPTS, 29,
|
||||
GENL_doit(drbd_adm_net_opts),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_op(DRBD_ADM_ATTACH, 12,
|
||||
GENL_doit(drbd_adm_attach),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
|
||||
GENL_doit(drbd_adm_disk_opts),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_RESIZE, 13,
|
||||
GENL_doit(drbd_adm_resize),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_PRIMARY, 14,
|
||||
GENL_doit(drbd_adm_set_role),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_SECONDARY, 15,
|
||||
GENL_doit(drbd_adm_set_role),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_NEW_C_UUID, 16,
|
||||
GENL_doit(drbd_adm_new_c_uuid),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_START_OV, 17,
|
||||
GENL_doit(drbd_adm_start_ov),
|
||||
GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
|
@ -0,0 +1,55 @@
|
|||
#ifndef DRBD_GENL_STRUCT_H
|
||||
#define DRBD_GENL_STRUCT_H
|
||||
|
||||
/**
|
||||
* struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
|
||||
* @minor:
|
||||
* For admin requests (user -> kernel): which minor device to operate on.
|
||||
* For (unicast) replies or informational (broadcast) messages
|
||||
* (kernel -> user): which minor device the information is about.
|
||||
* If we do not operate on minors, but on connections or resources,
|
||||
* the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
|
||||
* is used instead.
|
||||
* @flags: possible operation modifiers (relevant only for user->kernel):
|
||||
* DRBD_GENL_F_SET_DEFAULTS
|
||||
* @volume:
|
||||
* When creating a new minor (adding it to a resource), the resource needs
|
||||
* to know which volume number within the resource this is supposed to be.
|
||||
* The volume number corresponds to the same volume number on the remote side,
|
||||
* whereas the minor number on the remote side may be different
|
||||
* (union with flags).
|
||||
* @ret_code: kernel->userland unicast cfg reply return code (union with flags);
|
||||
*/
|
||||
struct drbd_genlmsghdr {
|
||||
__u32 minor;
|
||||
union {
|
||||
__u32 flags;
|
||||
__s32 ret_code;
|
||||
};
|
||||
};
|
||||
|
||||
/* To be used in drbd_genlmsghdr.flags */
|
||||
enum {
|
||||
DRBD_GENL_F_SET_DEFAULTS = 1,
|
||||
};
|
||||
|
||||
enum drbd_state_info_bcast_reason {
|
||||
SIB_GET_STATUS_REPLY = 1,
|
||||
SIB_STATE_CHANGE = 2,
|
||||
SIB_HELPER_PRE = 3,
|
||||
SIB_HELPER_POST = 4,
|
||||
SIB_SYNC_PROGRESS = 5,
|
||||
};
|
||||
|
||||
/* hack around predefined gcc/cpp "linux=1",
|
||||
* we cannot possibly include <1/drbd_genl.h> */
|
||||
#undef linux
|
||||
|
||||
#include <linux/drbd.h>
|
||||
#define GENL_MAGIC_VERSION API_VERSION
|
||||
#define GENL_MAGIC_FAMILY drbd
|
||||
#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr)
|
||||
#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
|
||||
#include <linux/genl_magic_struct.h>
|
||||
|
||||
#endif
|
|
@ -16,29 +16,37 @@
|
|||
#define DEBUG_RANGE_CHECK 0
|
||||
|
||||
#define DRBD_MINOR_COUNT_MIN 1
|
||||
#define DRBD_MINOR_COUNT_MAX 256
|
||||
#define DRBD_MINOR_COUNT_MAX 255
|
||||
#define DRBD_MINOR_COUNT_DEF 32
|
||||
#define DRBD_MINOR_COUNT_SCALE '1'
|
||||
|
||||
#define DRBD_VOLUME_MAX 65535
|
||||
|
||||
#define DRBD_DIALOG_REFRESH_MIN 0
|
||||
#define DRBD_DIALOG_REFRESH_MAX 600
|
||||
#define DRBD_DIALOG_REFRESH_SCALE '1'
|
||||
|
||||
/* valid port number */
|
||||
#define DRBD_PORT_MIN 1
|
||||
#define DRBD_PORT_MAX 0xffff
|
||||
#define DRBD_PORT_SCALE '1'
|
||||
|
||||
/* startup { */
|
||||
/* if you want more than 3.4 days, disable */
|
||||
#define DRBD_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_WFC_TIMEOUT_SCALE '1'
|
||||
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
|
||||
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
|
||||
/* }*/
|
||||
|
||||
/* net { */
|
||||
|
@ -47,75 +55,91 @@
|
|||
#define DRBD_TIMEOUT_MIN 1
|
||||
#define DRBD_TIMEOUT_MAX 600
|
||||
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
|
||||
#define DRBD_TIMEOUT_SCALE '1'
|
||||
|
||||
/* If backing disk takes longer than disk_timeout, mark the disk as failed */
|
||||
#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
|
||||
#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
|
||||
#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
|
||||
#define DRBD_DISK_TIMEOUT_SCALE '1'
|
||||
|
||||
/* active connection retries when C_WF_CONNECTION */
|
||||
#define DRBD_CONNECT_INT_MIN 1
|
||||
#define DRBD_CONNECT_INT_MAX 120
|
||||
#define DRBD_CONNECT_INT_DEF 10 /* seconds */
|
||||
#define DRBD_CONNECT_INT_SCALE '1'
|
||||
|
||||
/* keep-alive probes when idle */
|
||||
#define DRBD_PING_INT_MIN 1
|
||||
#define DRBD_PING_INT_MAX 120
|
||||
#define DRBD_PING_INT_DEF 10
|
||||
#define DRBD_PING_INT_SCALE '1'
|
||||
|
||||
/* timeout for the ping packets.*/
|
||||
#define DRBD_PING_TIMEO_MIN 1
|
||||
#define DRBD_PING_TIMEO_MAX 300
|
||||
#define DRBD_PING_TIMEO_DEF 5
|
||||
#define DRBD_PING_TIMEO_SCALE '1'
|
||||
|
||||
/* max number of write requests between write barriers */
|
||||
#define DRBD_MAX_EPOCH_SIZE_MIN 1
|
||||
#define DRBD_MAX_EPOCH_SIZE_MAX 20000
|
||||
#define DRBD_MAX_EPOCH_SIZE_DEF 2048
|
||||
#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
|
||||
|
||||
/* I don't think that a tcp send buffer of more than 10M is useful */
|
||||
#define DRBD_SNDBUF_SIZE_MIN 0
|
||||
#define DRBD_SNDBUF_SIZE_MAX (10<<20)
|
||||
#define DRBD_SNDBUF_SIZE_DEF 0
|
||||
#define DRBD_SNDBUF_SIZE_SCALE '1'
|
||||
|
||||
#define DRBD_RCVBUF_SIZE_MIN 0
|
||||
#define DRBD_RCVBUF_SIZE_MAX (10<<20)
|
||||
#define DRBD_RCVBUF_SIZE_DEF 0
|
||||
#define DRBD_RCVBUF_SIZE_SCALE '1'
|
||||
|
||||
/* @4k PageSize -> 128kB - 512MB */
|
||||
#define DRBD_MAX_BUFFERS_MIN 32
|
||||
#define DRBD_MAX_BUFFERS_MAX 131072
|
||||
#define DRBD_MAX_BUFFERS_DEF 2048
|
||||
#define DRBD_MAX_BUFFERS_SCALE '1'
|
||||
|
||||
/* @4k PageSize -> 4kB - 512MB */
|
||||
#define DRBD_UNPLUG_WATERMARK_MIN 1
|
||||
#define DRBD_UNPLUG_WATERMARK_MAX 131072
|
||||
#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
|
||||
#define DRBD_UNPLUG_WATERMARK_SCALE '1'
|
||||
|
||||
/* 0 is disabled.
|
||||
* 200 should be more than enough even for very short timeouts */
|
||||
#define DRBD_KO_COUNT_MIN 0
|
||||
#define DRBD_KO_COUNT_MAX 200
|
||||
#define DRBD_KO_COUNT_DEF 0
|
||||
#define DRBD_KO_COUNT_DEF 7
|
||||
#define DRBD_KO_COUNT_SCALE '1'
|
||||
/* } */
|
||||
|
||||
/* syncer { */
|
||||
/* FIXME allow rate to be zero? */
|
||||
#define DRBD_RATE_MIN 1
|
||||
#define DRBD_RESYNC_RATE_MIN 1
|
||||
/* channel bonding 10 GbE, or other hardware */
|
||||
#define DRBD_RATE_MAX (4 << 20)
|
||||
#define DRBD_RATE_DEF 250 /* kb/second */
|
||||
#define DRBD_RESYNC_RATE_MAX (4 << 20)
|
||||
#define DRBD_RESYNC_RATE_DEF 250
|
||||
#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
/* less than 7 would hit performance unnecessarily.
|
||||
* 3833 is the largest prime that still does fit
|
||||
* into 64 sectors of activity log */
|
||||
* 919 slots context information per transaction,
|
||||
* 32k activity log, 4k transaction size,
|
||||
* one transaction in flight:
|
||||
* 919 * 7 = 6433 */
|
||||
#define DRBD_AL_EXTENTS_MIN 7
|
||||
#define DRBD_AL_EXTENTS_MAX 3833
|
||||
#define DRBD_AL_EXTENTS_DEF 127
|
||||
#define DRBD_AL_EXTENTS_MAX 6433
|
||||
#define DRBD_AL_EXTENTS_DEF 1237
|
||||
#define DRBD_AL_EXTENTS_SCALE '1'
|
||||
|
||||
#define DRBD_AFTER_MIN -1
|
||||
#define DRBD_AFTER_MAX 255
|
||||
#define DRBD_AFTER_DEF -1
|
||||
#define DRBD_MINOR_NUMBER_MIN -1
|
||||
#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1)
|
||||
#define DRBD_MINOR_NUMBER_DEF -1
|
||||
#define DRBD_MINOR_NUMBER_SCALE '1'
|
||||
|
||||
/* } */
|
||||
|
||||
|
@ -124,11 +148,12 @@
|
|||
* the upper limit with 64bit kernel, enough ram and flexible meta data
|
||||
* is 1 PiB, currently. */
|
||||
/* DRBD_MAX_SECTORS */
|
||||
#define DRBD_DISK_SIZE_SECT_MIN 0
|
||||
#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40))
|
||||
#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */
|
||||
#define DRBD_DISK_SIZE_MIN 0
|
||||
#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40))
|
||||
#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */
|
||||
#define DRBD_DISK_SIZE_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
|
||||
#define DRBD_ON_IO_ERROR_DEF EP_DETACH
|
||||
#define DRBD_FENCING_DEF FP_DONT_CARE
|
||||
#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
|
||||
#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
|
||||
|
@ -136,38 +161,59 @@
|
|||
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
|
||||
#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
|
||||
#define DRBD_ON_CONGESTION_DEF OC_BLOCK
|
||||
#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
|
||||
|
||||
#define DRBD_MAX_BIO_BVECS_MIN 0
|
||||
#define DRBD_MAX_BIO_BVECS_MAX 128
|
||||
#define DRBD_MAX_BIO_BVECS_DEF 0
|
||||
#define DRBD_MAX_BIO_BVECS_SCALE '1'
|
||||
|
||||
#define DRBD_C_PLAN_AHEAD_MIN 0
|
||||
#define DRBD_C_PLAN_AHEAD_MAX 300
|
||||
#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */
|
||||
#define DRBD_C_PLAN_AHEAD_DEF 20
|
||||
#define DRBD_C_PLAN_AHEAD_SCALE '1'
|
||||
|
||||
#define DRBD_C_DELAY_TARGET_MIN 1
|
||||
#define DRBD_C_DELAY_TARGET_MAX 100
|
||||
#define DRBD_C_DELAY_TARGET_DEF 10
|
||||
#define DRBD_C_DELAY_TARGET_SCALE '1'
|
||||
|
||||
#define DRBD_C_FILL_TARGET_MIN 0
|
||||
#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
|
||||
#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
|
||||
#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
|
||||
#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */
|
||||
#define DRBD_C_MAX_RATE_MIN 250
|
||||
#define DRBD_C_MAX_RATE_MAX (4 << 20)
|
||||
#define DRBD_C_MAX_RATE_DEF 102400
|
||||
#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */
|
||||
#define DRBD_C_MIN_RATE_MIN 0
|
||||
#define DRBD_C_MIN_RATE_MAX (4 << 20)
|
||||
#define DRBD_C_MIN_RATE_DEF 4096
|
||||
#define DRBD_C_MIN_RATE_DEF 250
|
||||
#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_CONG_FILL_MIN 0
|
||||
#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */
|
||||
#define DRBD_CONG_FILL_DEF 0
|
||||
#define DRBD_CONG_FILL_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN
|
||||
#define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX
|
||||
#define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF
|
||||
#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
|
||||
|
||||
#define DRBD_PROTOCOL_DEF DRBD_PROT_C
|
||||
|
||||
#define DRBD_DISK_BARRIER_DEF 0
|
||||
#define DRBD_DISK_FLUSHES_DEF 1
|
||||
#define DRBD_DISK_DRAIN_DEF 1
|
||||
#define DRBD_MD_FLUSHES_DEF 1
|
||||
#define DRBD_TCP_CORK_DEF 1
|
||||
#define DRBD_AL_UPDATES_DEF 1
|
||||
|
||||
#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
|
||||
#define DRBD_ALWAYS_ASBP_DEF 0
|
||||
#define DRBD_USE_RLE_DEF 1
|
||||
|
||||
#undef RANGE
|
||||
#endif
|
||||
|
|
|
@ -1,164 +0,0 @@
|
|||
/*
|
||||
PAKET( name,
|
||||
TYPE ( pn, pr, member )
|
||||
...
|
||||
)
|
||||
|
||||
You may never reissue one of the pn arguments
|
||||
*/
|
||||
|
||||
#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
|
||||
#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
|
||||
#endif
|
||||
|
||||
NL_PACKET(primary, 1,
|
||||
NL_BIT( 1, T_MAY_IGNORE, primary_force)
|
||||
)
|
||||
|
||||
NL_PACKET(secondary, 2, )
|
||||
|
||||
NL_PACKET(disk_conf, 3,
|
||||
NL_INT64( 2, T_MAY_IGNORE, disk_size)
|
||||
NL_STRING( 3, T_MANDATORY, backing_dev, 128)
|
||||
NL_STRING( 4, T_MANDATORY, meta_dev, 128)
|
||||
NL_INTEGER( 5, T_MANDATORY, meta_dev_idx)
|
||||
NL_INTEGER( 6, T_MAY_IGNORE, on_io_error)
|
||||
NL_INTEGER( 7, T_MAY_IGNORE, fencing)
|
||||
NL_BIT( 37, T_MAY_IGNORE, use_bmbv)
|
||||
NL_BIT( 53, T_MAY_IGNORE, no_disk_flush)
|
||||
NL_BIT( 54, T_MAY_IGNORE, no_md_flush)
|
||||
/* 55 max_bio_size was available in 8.2.6rc2 */
|
||||
NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
|
||||
NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
|
||||
NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
|
||||
NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
|
||||
)
|
||||
|
||||
NL_PACKET(detach, 4,
|
||||
NL_BIT( 88, T_MANDATORY, detach_force)
|
||||
)
|
||||
|
||||
NL_PACKET(net_conf, 5,
|
||||
NL_STRING( 8, T_MANDATORY, my_addr, 128)
|
||||
NL_STRING( 9, T_MANDATORY, peer_addr, 128)
|
||||
NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX)
|
||||
NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX)
|
||||
NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX)
|
||||
NL_INTEGER( 14, T_MAY_IGNORE, timeout)
|
||||
NL_INTEGER( 15, T_MANDATORY, wire_protocol)
|
||||
NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int)
|
||||
NL_INTEGER( 17, T_MAY_IGNORE, ping_int)
|
||||
NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size)
|
||||
NL_INTEGER( 19, T_MAY_IGNORE, max_buffers)
|
||||
NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark)
|
||||
NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size)
|
||||
NL_INTEGER( 22, T_MAY_IGNORE, ko_count)
|
||||
NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p)
|
||||
NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p)
|
||||
NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p)
|
||||
NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
|
||||
NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
|
||||
NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
|
||||
NL_INTEGER( 81, T_MAY_IGNORE, on_congestion)
|
||||
NL_INTEGER( 82, T_MAY_IGNORE, cong_fill)
|
||||
NL_INTEGER( 83, T_MAY_IGNORE, cong_extents)
|
||||
/* 59 addr_family was available in GIT, never released */
|
||||
NL_BIT( 60, T_MANDATORY, mind_af)
|
||||
NL_BIT( 27, T_MAY_IGNORE, want_lose)
|
||||
NL_BIT( 28, T_MAY_IGNORE, two_primaries)
|
||||
NL_BIT( 41, T_MAY_IGNORE, always_asbp)
|
||||
NL_BIT( 61, T_MAY_IGNORE, no_cork)
|
||||
NL_BIT( 62, T_MANDATORY, auto_sndbuf_size)
|
||||
NL_BIT( 70, T_MANDATORY, dry_run)
|
||||
)
|
||||
|
||||
NL_PACKET(disconnect, 6,
|
||||
NL_BIT( 84, T_MAY_IGNORE, force)
|
||||
)
|
||||
|
||||
NL_PACKET(resize, 7,
|
||||
NL_INT64( 29, T_MAY_IGNORE, resize_size)
|
||||
NL_BIT( 68, T_MAY_IGNORE, resize_force)
|
||||
NL_BIT( 69, T_MANDATORY, no_resync)
|
||||
)
|
||||
|
||||
NL_PACKET(syncer_conf, 8,
|
||||
NL_INTEGER( 30, T_MAY_IGNORE, rate)
|
||||
NL_INTEGER( 31, T_MAY_IGNORE, after)
|
||||
NL_INTEGER( 32, T_MAY_IGNORE, al_extents)
|
||||
/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
|
||||
* NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
|
||||
* NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
|
||||
* NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
|
||||
* feature will be reimplemented differently with 8.3.9 */
|
||||
NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
|
||||
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
|
||||
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
|
||||
NL_BIT( 65, T_MAY_IGNORE, use_rle)
|
||||
NL_INTEGER( 75, T_MAY_IGNORE, on_no_data)
|
||||
NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead)
|
||||
NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target)
|
||||
NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target)
|
||||
NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate)
|
||||
NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate)
|
||||
)
|
||||
|
||||
NL_PACKET(invalidate, 9, )
|
||||
NL_PACKET(invalidate_peer, 10, )
|
||||
NL_PACKET(pause_sync, 11, )
|
||||
NL_PACKET(resume_sync, 12, )
|
||||
NL_PACKET(suspend_io, 13, )
|
||||
NL_PACKET(resume_io, 14, )
|
||||
NL_PACKET(outdate, 15, )
|
||||
NL_PACKET(get_config, 16, )
|
||||
NL_PACKET(get_state, 17,
|
||||
NL_INTEGER( 33, T_MAY_IGNORE, state_i)
|
||||
)
|
||||
|
||||
NL_PACKET(get_uuids, 18,
|
||||
NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64)))
|
||||
NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags)
|
||||
)
|
||||
|
||||
NL_PACKET(get_timeout_flag, 19,
|
||||
NL_BIT( 36, T_MAY_IGNORE, use_degraded)
|
||||
)
|
||||
|
||||
NL_PACKET(call_helper, 20,
|
||||
NL_STRING( 38, T_MAY_IGNORE, helper, 32)
|
||||
)
|
||||
|
||||
/* Tag nr 42 already allocated in drbd-8.1 development. */
|
||||
|
||||
NL_PACKET(sync_progress, 23,
|
||||
NL_INTEGER( 43, T_MAY_IGNORE, sync_progress)
|
||||
)
|
||||
|
||||
NL_PACKET(dump_ee, 24,
|
||||
NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32)
|
||||
NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX)
|
||||
NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX)
|
||||
NL_INT64( 48, T_MAY_IGNORE, ee_sector)
|
||||
NL_INT64( 49, T_MAY_IGNORE, ee_block_id)
|
||||
NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10)
|
||||
)
|
||||
|
||||
NL_PACKET(start_ov, 25,
|
||||
NL_INT64( 66, T_MAY_IGNORE, start_sector)
|
||||
NL_INT64( 90, T_MANDATORY, stop_sector)
|
||||
)
|
||||
|
||||
NL_PACKET(new_c_uuid, 26,
|
||||
NL_BIT( 63, T_MANDATORY, clear_bm)
|
||||
)
|
||||
|
||||
#ifdef NL_RESPONSE
|
||||
NL_RESPONSE(return_code_only, 27)
|
||||
#endif
|
||||
|
||||
#undef NL_PACKET
|
||||
#undef NL_INTEGER
|
||||
#undef NL_INT64
|
||||
#undef NL_BIT
|
||||
#undef NL_STRING
|
||||
#undef NL_RESPONSE
|
|
@ -1,84 +0,0 @@
|
|||
#ifndef DRBD_TAG_MAGIC_H
|
||||
#define DRBD_TAG_MAGIC_H
|
||||
|
||||
#define TT_END 0
|
||||
#define TT_REMOVED 0xE000
|
||||
|
||||
/* declare packet_type enums */
|
||||
enum packet_types {
|
||||
#define NL_PACKET(name, number, fields) P_ ## name = number,
|
||||
#define NL_RESPONSE(name, number) P_ ## name = number,
|
||||
#define NL_INTEGER(pn, pr, member)
|
||||
#define NL_INT64(pn, pr, member)
|
||||
#define NL_BIT(pn, pr, member)
|
||||
#define NL_STRING(pn, pr, member, len)
|
||||
#include <linux/drbd_nl.h>
|
||||
P_nl_after_last_packet,
|
||||
};
|
||||
|
||||
/* These struct are used to deduce the size of the tag lists: */
|
||||
#define NL_PACKET(name, number, fields) \
|
||||
struct name ## _tag_len_struct { fields };
|
||||
#define NL_INTEGER(pn, pr, member) \
|
||||
int member; int tag_and_len ## member;
|
||||
#define NL_INT64(pn, pr, member) \
|
||||
__u64 member; int tag_and_len ## member;
|
||||
#define NL_BIT(pn, pr, member) \
|
||||
unsigned char member:1; int tag_and_len ## member;
|
||||
#define NL_STRING(pn, pr, member, len) \
|
||||
unsigned char member[len]; int member ## _len; \
|
||||
int tag_and_len ## member;
|
||||
#include <linux/drbd_nl.h>
|
||||
|
||||
/* declare tag-list-sizes */
|
||||
static const int tag_list_sizes[] = {
|
||||
#define NL_PACKET(name, number, fields) 2 fields ,
|
||||
#define NL_INTEGER(pn, pr, member) + 4 + 4
|
||||
#define NL_INT64(pn, pr, member) + 4 + 8
|
||||
#define NL_BIT(pn, pr, member) + 4 + 1
|
||||
#define NL_STRING(pn, pr, member, len) + 4 + (len)
|
||||
#include <linux/drbd_nl.h>
|
||||
};
|
||||
|
||||
/* The two highest bits are used for the tag type */
|
||||
#define TT_MASK 0xC000
|
||||
#define TT_INTEGER 0x0000
|
||||
#define TT_INT64 0x4000
|
||||
#define TT_BIT 0x8000
|
||||
#define TT_STRING 0xC000
|
||||
/* The next bit indicates if processing of the tag is mandatory */
|
||||
#define T_MANDATORY 0x2000
|
||||
#define T_MAY_IGNORE 0x0000
|
||||
#define TN_MASK 0x1fff
|
||||
/* The remaining 13 bits are used to enumerate the tags */
|
||||
|
||||
#define tag_type(T) ((T) & TT_MASK)
|
||||
#define tag_number(T) ((T) & TN_MASK)
|
||||
|
||||
/* declare tag enums */
|
||||
#define NL_PACKET(name, number, fields) fields
|
||||
enum drbd_tags {
|
||||
#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr ,
|
||||
#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr ,
|
||||
#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr ,
|
||||
#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr ,
|
||||
#include <linux/drbd_nl.h>
|
||||
};
|
||||
|
||||
struct tag {
|
||||
const char *name;
|
||||
int type_n_flags;
|
||||
int max_len;
|
||||
};
|
||||
|
||||
/* declare tag names */
|
||||
#define NL_PACKET(name, number, fields) fields
|
||||
static const struct tag tag_descriptions[] = {
|
||||
#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) },
|
||||
#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) },
|
||||
#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) },
|
||||
#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) },
|
||||
#include <linux/drbd_nl.h>
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,422 @@
|
|||
#ifndef GENL_MAGIC_FUNC_H
|
||||
#define GENL_MAGIC_FUNC_H
|
||||
|
||||
#include <linux/genl_magic_struct.h>
|
||||
|
||||
/*
|
||||
* Magic: declare tla policy {{{1
|
||||
* Magic: declare nested policies
|
||||
* {{{2
|
||||
*/
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group)
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
[tag_name] = { .type = NLA_NESTED },
|
||||
|
||||
static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static struct nla_policy s_name ## _nl_policy[] __read_mostly = \
|
||||
{ s_fields };
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \
|
||||
__put, __is_signed) \
|
||||
[attr_nr] = { .type = nla_type },
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
[attr_nr] = { .type = nla_type, \
|
||||
.len = maxlen - (nla_type == NLA_NUL_STRING) },
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#ifndef pr_info
|
||||
#define pr_info(args...) fprintf(stderr, args);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef GENL_MAGIC_DEBUG
|
||||
static void dprint_field(const char *dir, int nla_type,
|
||||
const char *name, void *valp)
|
||||
{
|
||||
__u64 val = valp ? *(__u32 *)valp : 1;
|
||||
switch (nla_type) {
|
||||
case NLA_U8: val = (__u8)val;
|
||||
case NLA_U16: val = (__u16)val;
|
||||
case NLA_U32: val = (__u32)val;
|
||||
pr_info("%s attr %s: %d 0x%08x\n", dir,
|
||||
name, (int)val, (unsigned)val);
|
||||
break;
|
||||
case NLA_U64:
|
||||
val = *(__u64*)valp;
|
||||
pr_info("%s attr %s: %lld 0x%08llx\n", dir,
|
||||
name, (long long)val, (unsigned long long)val);
|
||||
break;
|
||||
case NLA_FLAG:
|
||||
if (val)
|
||||
pr_info("%s attr %s: set\n", dir, name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void dprint_array(const char *dir, int nla_type,
|
||||
const char *name, const char *val, unsigned len)
|
||||
{
|
||||
switch (nla_type) {
|
||||
case NLA_NUL_STRING:
|
||||
if (len && val[len-1] == '\0')
|
||||
len--;
|
||||
pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
|
||||
break;
|
||||
default:
|
||||
/* we can always show 4 byte,
|
||||
* thats what nlattr are aligned to. */
|
||||
pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
|
||||
dir, name, len, val[0], val[1], val[2], val[3]);
|
||||
}
|
||||
}
|
||||
|
||||
#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
|
||||
|
||||
/* Name is a member field name of the struct s.
|
||||
* If s is NULL (only parsing, no copy requested in *_from_attrs()),
|
||||
* nla is supposed to point to the attribute containing the information
|
||||
* corresponding to that struct member. */
|
||||
#define DPRINT_FIELD(dir, nla_type, name, s, nla) \
|
||||
do { \
|
||||
if (s) \
|
||||
dprint_field(dir, nla_type, #name, &s->name); \
|
||||
else if (nla) \
|
||||
dprint_field(dir, nla_type, #name, \
|
||||
(nla_type == NLA_FLAG) ? NULL \
|
||||
: nla_data(nla)); \
|
||||
} while (0)
|
||||
|
||||
#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \
|
||||
do { \
|
||||
if (s) \
|
||||
dprint_array(dir, nla_type, #name, \
|
||||
s->name, s->name ## _len); \
|
||||
else if (nla) \
|
||||
dprint_array(dir, nla_type, #name, \
|
||||
nla_data(nla), nla_len(nla)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define DPRINT_TLA(a, op, b) do {} while (0)
|
||||
#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
|
||||
#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Magic: provide conversion functions {{{1
|
||||
* populate struct from attribute table:
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
/* processing of generic netlink messages is serialized.
|
||||
* use one static buffer for parsing of nested attributes */
|
||||
static struct nlattr *nested_attr_tb[128];
|
||||
|
||||
#ifndef BUILD_BUG_ON
|
||||
/* Force a compilation error if condition is true */
|
||||
#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
|
||||
/* Force a compilation error if condition is true, but also produce a
|
||||
result (of value 0 and type size_t), so the expression can be used
|
||||
e.g. in a structure initializer (or where-ever else comma expressions
|
||||
aren't permitted). */
|
||||
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
|
||||
#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
|
||||
#endif
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
/* *_from_attrs functions are static, but potentially unused */ \
|
||||
static int __ ## s_name ## _from_attrs(struct s_name *s, \
|
||||
struct genl_info *info, bool exclude_invariants) \
|
||||
{ \
|
||||
const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \
|
||||
struct nlattr *tla = info->attrs[tag_number]; \
|
||||
struct nlattr **ntb = nested_attr_tb; \
|
||||
struct nlattr *nla; \
|
||||
int err; \
|
||||
BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \
|
||||
if (!tla) \
|
||||
return -ENOMSG; \
|
||||
DPRINT_TLA(#s_name, "<=-", #tag_name); \
|
||||
err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
|
||||
if (err) \
|
||||
return err; \
|
||||
\
|
||||
s_fields \
|
||||
return 0; \
|
||||
} __attribute__((unused)) \
|
||||
static int s_name ## _from_attrs(struct s_name *s, \
|
||||
struct genl_info *info) \
|
||||
{ \
|
||||
return __ ## s_name ## _from_attrs(s, info, false); \
|
||||
} __attribute__((unused)) \
|
||||
static int s_name ## _from_attrs_for_change(struct s_name *s, \
|
||||
struct genl_info *info) \
|
||||
{ \
|
||||
return __ ## s_name ## _from_attrs(s, info, true); \
|
||||
} __attribute__((unused)) \
|
||||
|
||||
#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \
|
||||
nla = ntb[attr_nr]; \
|
||||
if (nla) { \
|
||||
if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
|
||||
pr_info("<< must not change invariant attr: %s\n", #name); \
|
||||
return -EEXIST; \
|
||||
} \
|
||||
assignment; \
|
||||
} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
|
||||
/* attribute missing from payload, */ \
|
||||
/* which was expected */ \
|
||||
} else if ((attr_flag) & DRBD_F_REQUIRED) { \
|
||||
pr_info("<< missing attr: %s\n", #name); \
|
||||
return -ENOMSG; \
|
||||
}
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
__assign(attr_nr, attr_flag, name, nla_type, type, \
|
||||
if (s) \
|
||||
s->name = __get(nla); \
|
||||
DPRINT_FIELD("<<", nla_type, name, s, nla))
|
||||
|
||||
/* validate_nla() already checked nla_len <= maxlen appropriately. */
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
__assign(attr_nr, attr_flag, name, nla_type, type, \
|
||||
if (s) \
|
||||
s->name ## _len = \
|
||||
__get(s->name, nla, maxlen); \
|
||||
DPRINT_ARRAY("<<", nla_type, name, s, nla))
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
|
||||
|
||||
/*
|
||||
* Magic: define op number to op name mapping {{{1
|
||||
* {{{2
|
||||
*/
|
||||
const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list) \
|
||||
case op_num: return #op_name;
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/stringify.h>
|
||||
/*
|
||||
* Magic: define genl_ops {{{1
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list) \
|
||||
{ \
|
||||
handler \
|
||||
.cmd = op_name, \
|
||||
.policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \
|
||||
},
|
||||
|
||||
#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
|
||||
static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list)
|
||||
|
||||
/*
|
||||
* Define the genl_family, multicast groups, {{{1
|
||||
* and provide register/unregister functions.
|
||||
* {{{2
|
||||
*/
|
||||
#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
|
||||
static struct genl_family ZZZ_genl_family __read_mostly = {
|
||||
.id = GENL_ID_GENERATE,
|
||||
.name = __stringify(GENL_MAGIC_FAMILY),
|
||||
.version = GENL_MAGIC_VERSION,
|
||||
#ifdef GENL_MAGIC_FAMILY_HDRSZ
|
||||
.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
|
||||
#endif
|
||||
.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
|
||||
};
|
||||
|
||||
/*
|
||||
* Magic: define multicast groups
|
||||
* Magic: define multicast group registration helper
|
||||
*/
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group) \
|
||||
static struct genl_multicast_group \
|
||||
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \
|
||||
.name = #group, \
|
||||
}; \
|
||||
static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \
|
||||
struct sk_buff *skb, gfp_t flags) \
|
||||
{ \
|
||||
unsigned int group_id = \
|
||||
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \
|
||||
if (!group_id) \
|
||||
return -EINVAL; \
|
||||
return genlmsg_multicast(skb, 0, group_id, flags); \
|
||||
}
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
|
||||
{
|
||||
int err = genl_register_family_with_ops(&ZZZ_genl_family,
|
||||
ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
|
||||
if (err)
|
||||
return err;
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group) \
|
||||
err = genl_register_mc_group(&ZZZ_genl_family, \
|
||||
&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \
|
||||
if (err) \
|
||||
goto fail; \
|
||||
else \
|
||||
pr_info("%s: mcg %s: %u\n", #group, \
|
||||
__stringify(GENL_MAGIC_FAMILY), \
|
||||
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group)
|
||||
return 0;
|
||||
fail:
|
||||
genl_unregister_family(&ZZZ_genl_family);
|
||||
return err;
|
||||
}
|
||||
|
||||
void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
|
||||
{
|
||||
genl_unregister_family(&ZZZ_genl_family);
|
||||
}
|
||||
|
||||
/*
|
||||
* Magic: provide conversion functions {{{1
|
||||
* populate skb from struct.
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \
|
||||
const bool exclude_sensitive) \
|
||||
{ \
|
||||
struct nlattr *tla = nla_nest_start(skb, tag_number); \
|
||||
if (!tla) \
|
||||
goto nla_put_failure; \
|
||||
DPRINT_TLA(#s_name, "-=>", #tag_name); \
|
||||
s_fields \
|
||||
nla_nest_end(skb, tla); \
|
||||
return 0; \
|
||||
\
|
||||
nla_put_failure: \
|
||||
if (tla) \
|
||||
nla_nest_cancel(skb, tla); \
|
||||
return -EMSGSIZE; \
|
||||
} \
|
||||
static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \
|
||||
struct s_name *s) \
|
||||
{ \
|
||||
return s_name ## _to_skb(skb, s, 0); \
|
||||
} \
|
||||
static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \
|
||||
struct s_name *s) \
|
||||
{ \
|
||||
return s_name ## _to_skb(skb, s, 1); \
|
||||
}
|
||||
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
|
||||
DPRINT_FIELD(">>", nla_type, name, s, NULL); \
|
||||
if (__put(skb, attr_nr, s->name)) \
|
||||
goto nla_put_failure; \
|
||||
}
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
|
||||
DPRINT_ARRAY(">>",nla_type, name, s, NULL); \
|
||||
if (__put(skb, attr_nr, min_t(int, maxlen, \
|
||||
s->name ## _len + (nla_type == NLA_NUL_STRING)),\
|
||||
s->name)) \
|
||||
goto nla_put_failure; \
|
||||
}
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
|
||||
/* Functions for initializing structs to default values. */
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed)
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed)
|
||||
#undef __u32_field_def
|
||||
#define __u32_field_def(attr_nr, attr_flag, name, default) \
|
||||
x->name = default;
|
||||
#undef __s32_field_def
|
||||
#define __s32_field_def(attr_nr, attr_flag, name, default) \
|
||||
x->name = default;
|
||||
#undef __flg_field_def
|
||||
#define __flg_field_def(attr_nr, attr_flag, name, default) \
|
||||
x->name = default;
|
||||
#undef __str_field_def
|
||||
#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
|
||||
memset(x->name, 0, sizeof(x->name)); \
|
||||
x->name ## _len = 0;
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
|
||||
static void set_ ## s_name ## _defaults(struct s_name *x) { \
|
||||
s_fields \
|
||||
}
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
/* }}}1 */
|
||||
#endif /* GENL_MAGIC_FUNC_H */
|
||||
/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
|
|
@ -0,0 +1,277 @@
|
|||
#ifndef GENL_MAGIC_STRUCT_H
|
||||
#define GENL_MAGIC_STRUCT_H
|
||||
|
||||
#ifndef GENL_MAGIC_FAMILY
|
||||
# error "you need to define GENL_MAGIC_FAMILY before inclusion"
|
||||
#endif
|
||||
|
||||
#ifndef GENL_MAGIC_VERSION
|
||||
# error "you need to define GENL_MAGIC_VERSION before inclusion"
|
||||
#endif
|
||||
|
||||
#ifndef GENL_MAGIC_INCLUDE_FILE
|
||||
# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
|
||||
#endif
|
||||
|
||||
#include <linux/genetlink.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define CONCAT__(a,b) a ## b
|
||||
#define CONCAT_(a,b) CONCAT__(a,b)
|
||||
|
||||
extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
|
||||
extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
|
||||
|
||||
/*
|
||||
* Extension of genl attribute validation policies {{{2
|
||||
*/
|
||||
|
||||
/*
|
||||
* @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
|
||||
* know about. This flag can be set in nlattr->nla_type to indicate that this
|
||||
* attribute must not be ignored.
|
||||
*
|
||||
* We check and remove this flag in drbd_nla_check_mandatory() before
|
||||
* validating the attribute types and lengths via nla_parse_nested().
|
||||
*/
|
||||
#define DRBD_GENLA_F_MANDATORY (1 << 14)
|
||||
|
||||
/*
|
||||
* Flags specific to drbd and not visible at the netlink layer, used in
|
||||
* <struct>_from_attrs and <struct>_to_skb:
|
||||
*
|
||||
* @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
|
||||
* invalid.
|
||||
*
|
||||
* @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
|
||||
* included in unpriviledged get requests or broadcasts.
|
||||
*
|
||||
* @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
|
||||
* cannot subsequently be changed.
|
||||
*/
|
||||
#define DRBD_F_REQUIRED (1 << 0)
|
||||
#define DRBD_F_SENSITIVE (1 << 1)
|
||||
#define DRBD_F_INVARIANT (1 << 2)
|
||||
|
||||
#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
/* }}}1
|
||||
* MAGIC
|
||||
* multi-include macro expansion magic starts here
|
||||
*/
|
||||
|
||||
/* MAGIC helpers {{{2 */
|
||||
|
||||
/* possible field types */
|
||||
#define __flg_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U8, char, \
|
||||
nla_get_u8, nla_put_u8, false)
|
||||
#define __u8_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
|
||||
nla_get_u8, nla_put_u8, false)
|
||||
#define __u16_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
|
||||
nla_get_u16, nla_put_u16, false)
|
||||
#define __u32_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
|
||||
nla_get_u32, nla_put_u32, false)
|
||||
#define __s32_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
|
||||
nla_get_u32, nla_put_u32, true)
|
||||
#define __u64_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
|
||||
nla_get_u64, nla_put_u64, false)
|
||||
#define __str_field(attr_nr, attr_flag, name, maxlen) \
|
||||
__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
|
||||
nla_strlcpy, nla_put, false)
|
||||
#define __bin_field(attr_nr, attr_flag, name, maxlen) \
|
||||
__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
|
||||
nla_memcpy, nla_put, false)
|
||||
|
||||
/* fields with default values */
|
||||
#define __flg_field_def(attr_nr, attr_flag, name, default) \
|
||||
__flg_field(attr_nr, attr_flag, name)
|
||||
#define __u32_field_def(attr_nr, attr_flag, name, default) \
|
||||
__u32_field(attr_nr, attr_flag, name)
|
||||
#define __s32_field_def(attr_nr, attr_flag, name, default) \
|
||||
__s32_field(attr_nr, attr_flag, name)
|
||||
#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
|
||||
__str_field(attr_nr, attr_flag, name, maxlen)
|
||||
|
||||
#define GENL_op_init(args...) args
|
||||
#define GENL_doit(handler) \
|
||||
.doit = handler, \
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
#define GENL_dumpit(handler) \
|
||||
.dumpit = handler, \
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
|
||||
/* }}}1
|
||||
* Magic: define the enum symbols for genl_ops
|
||||
* Magic: define the enum symbols for top level attributes
|
||||
* Magic: define the enum symbols for nested attributes
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
|
||||
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group)
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
|
||||
op_name = op_num,
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list) \
|
||||
op_name = op_num,
|
||||
|
||||
enum {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, attr_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
tag_name = tag_number,
|
||||
|
||||
enum {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
enum { \
|
||||
s_fields \
|
||||
};
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, \
|
||||
__get, __put, __is_signed) \
|
||||
T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, \
|
||||
maxlen, __get, __put, __is_signed) \
|
||||
T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
/* }}}1
|
||||
* Magic: compile time assert unique numbers for operations
|
||||
* Magic: -"- unique numbers for top level attributes
|
||||
* Magic: -"- unique numbers for nested attributes
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, attr_list) \
|
||||
case op_name:
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
|
||||
case op_name:
|
||||
|
||||
static inline void ct_assert_unique_operations(void)
|
||||
{
|
||||
switch (0) {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, attr_list)
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
case tag_number:
|
||||
|
||||
static inline void ct_assert_unique_top_level_attributes(void)
|
||||
{
|
||||
switch (0) {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static inline void ct_assert_unique_ ## s_name ## _attributes(void) \
|
||||
{ \
|
||||
switch (0) { \
|
||||
s_fields \
|
||||
; \
|
||||
} \
|
||||
}
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
case attr_nr:
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
case attr_nr:
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
/* }}}1
|
||||
* Magic: declare structs
|
||||
* struct <name> {
|
||||
* fields
|
||||
* };
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
struct s_name { s_fields };
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
type name;
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
type name[maxlen]; \
|
||||
__u32 name ## _len;
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
enum { \
|
||||
s_fields \
|
||||
};
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
is_signed) \
|
||||
F_ ## name ## _IS_SIGNED = is_signed,
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, is_signed) \
|
||||
F_ ## name ## _IS_SIGNED = is_signed,
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
/* }}}1 */
|
||||
#endif /* GENL_MAGIC_STRUCT_H */
|
||||
/* vim: set foldmethod=marker nofoldenable : */
|
|
@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
|
|||
|
||||
void __init idr_init_cache(void);
|
||||
|
||||
/**
|
||||
* idr_for_each_entry - iterate over an idr's elements of a given type
|
||||
* @idp: idr handle
|
||||
* @entry: the type * to use as cursor
|
||||
* @id: id entry's key
|
||||
*/
|
||||
#define idr_for_each_entry(idp, entry, id) \
|
||||
for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
|
||||
entry != NULL; \
|
||||
++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
|
||||
|
||||
#endif /* __IDR_H__ */
|
||||
|
|
|
@ -166,9 +166,11 @@ struct lc_element {
|
|||
/* if we want to track a larger set of objects,
|
||||
* it needs to become arch independend u64 */
|
||||
unsigned lc_number;
|
||||
|
||||
/* special label when on free list */
|
||||
#define LC_FREE (~0U)
|
||||
|
||||
/* for pending changes */
|
||||
unsigned lc_new_number;
|
||||
};
|
||||
|
||||
struct lru_cache {
|
||||
|
@ -176,6 +178,7 @@ struct lru_cache {
|
|||
struct list_head lru;
|
||||
struct list_head free;
|
||||
struct list_head in_use;
|
||||
struct list_head to_be_changed;
|
||||
|
||||
/* the pre-created kmem cache to allocate the objects from */
|
||||
struct kmem_cache *lc_cache;
|
||||
|
@ -186,7 +189,7 @@ struct lru_cache {
|
|||
size_t element_off;
|
||||
|
||||
/* number of elements (indices) */
|
||||
unsigned int nr_elements;
|
||||
unsigned int nr_elements;
|
||||
/* Arbitrary limit on maximum tracked objects. Practical limit is much
|
||||
* lower due to allocation failures, probably. For typical use cases,
|
||||
* nr_elements should be a few thousand at most.
|
||||
|
@ -194,18 +197,19 @@ struct lru_cache {
|
|||
* 8 high bits of .lc_index to be overloaded with flags in the future. */
|
||||
#define LC_MAX_ACTIVE (1<<24)
|
||||
|
||||
/* allow to accumulate a few (index:label) changes,
|
||||
* but no more than max_pending_changes */
|
||||
unsigned int max_pending_changes;
|
||||
/* number of elements currently on to_be_changed list */
|
||||
unsigned int pending_changes;
|
||||
|
||||
/* statistics */
|
||||
unsigned used; /* number of lelements currently on in_use list */
|
||||
unsigned long hits, misses, starving, dirty, changed;
|
||||
unsigned used; /* number of elements currently on in_use list */
|
||||
unsigned long hits, misses, starving, locked, changed;
|
||||
|
||||
/* see below: flag-bits for lru_cache */
|
||||
unsigned long flags;
|
||||
|
||||
/* when changing the label of an index element */
|
||||
unsigned int new_number;
|
||||
|
||||
/* for paranoia when changing the label of an index element */
|
||||
struct lc_element *changing_element;
|
||||
|
||||
void *lc_private;
|
||||
const char *name;
|
||||
|
@ -221,10 +225,15 @@ enum {
|
|||
/* debugging aid, to catch concurrent access early.
|
||||
* user needs to guarantee exclusive access by proper locking! */
|
||||
__LC_PARANOIA,
|
||||
/* if we need to change the set, but currently there is a changing
|
||||
* transaction pending, we are "dirty", and must deferr further
|
||||
* changing requests */
|
||||
|
||||
/* annotate that the set is "dirty", possibly accumulating further
|
||||
* changes, until a transaction is finally triggered */
|
||||
__LC_DIRTY,
|
||||
|
||||
/* Locked, no further changes allowed.
|
||||
* Also used to serialize changing transactions. */
|
||||
__LC_LOCKED,
|
||||
|
||||
/* if we need to change the set, but currently there is no free nor
|
||||
* unused element available, we are "starving", and must not give out
|
||||
* further references, to guarantee that eventually some refcnt will
|
||||
|
@ -236,9 +245,11 @@ enum {
|
|||
};
|
||||
#define LC_PARANOIA (1<<__LC_PARANOIA)
|
||||
#define LC_DIRTY (1<<__LC_DIRTY)
|
||||
#define LC_LOCKED (1<<__LC_LOCKED)
|
||||
#define LC_STARVING (1<<__LC_STARVING)
|
||||
|
||||
extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
||||
unsigned max_pending_changes,
|
||||
unsigned e_count, size_t e_size, size_t e_off);
|
||||
extern void lc_reset(struct lru_cache *lc);
|
||||
extern void lc_destroy(struct lru_cache *lc);
|
||||
|
@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
|
|||
extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
|
||||
extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
|
||||
extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
|
||||
extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
|
||||
extern void lc_committed(struct lru_cache *lc);
|
||||
|
||||
struct seq_file;
|
||||
extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
|
||||
|
@ -258,16 +269,28 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
|
|||
void (*detail) (struct seq_file *, struct lc_element *));
|
||||
|
||||
/**
|
||||
* lc_try_lock - can be used to stop lc_get() from changing the tracked set
|
||||
* lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
|
||||
* @lc: the lru cache to operate on
|
||||
*
|
||||
* Allows (expects) the set to be "dirty". Note that the reference counts and
|
||||
* order on the active and lru lists may still change. Used to serialize
|
||||
* changing transactions. Returns true if we aquired the lock.
|
||||
*/
|
||||
static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
|
||||
{
|
||||
return !test_and_set_bit(__LC_LOCKED, &lc->flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_try_lock - variant to stop lc_get() from changing the tracked set
|
||||
* @lc: the lru cache to operate on
|
||||
*
|
||||
* Note that the reference counts and order on the active and lru lists may
|
||||
* still change. Returns true if we acquired the lock.
|
||||
* still change. Only works on a "clean" set. Returns true if we aquired the
|
||||
* lock, which means there are no pending changes, and any further attempt to
|
||||
* change the set will not succeed until the next lc_unlock().
|
||||
*/
|
||||
static inline int lc_try_lock(struct lru_cache *lc)
|
||||
{
|
||||
return !test_and_set_bit(__LC_DIRTY, &lc->flags);
|
||||
}
|
||||
extern int lc_try_lock(struct lru_cache *lc);
|
||||
|
||||
/**
|
||||
* lc_unlock - unlock @lc, allow lc_get() to change the set again
|
||||
|
@ -276,14 +299,10 @@ static inline int lc_try_lock(struct lru_cache *lc)
|
|||
static inline void lc_unlock(struct lru_cache *lc)
|
||||
{
|
||||
clear_bit(__LC_DIRTY, &lc->flags);
|
||||
smp_mb__after_clear_bit();
|
||||
clear_bit_unlock(__LC_LOCKED, &lc->flags);
|
||||
}
|
||||
|
||||
static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct lc_element *e = lc_find(lc, enr);
|
||||
return e && e->refcnt;
|
||||
}
|
||||
extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
|
||||
|
||||
#define lc_entry(ptr, type, member) \
|
||||
container_of(ptr, type, member)
|
||||
|
|
371
lib/lru_cache.c
371
lib/lru_cache.c
|
@ -44,8 +44,8 @@ MODULE_LICENSE("GPL");
|
|||
} while (0)
|
||||
|
||||
#define RETURN(x...) do { \
|
||||
clear_bit(__LC_PARANOIA, &lc->flags); \
|
||||
smp_mb__after_clear_bit(); return x ; } while (0)
|
||||
clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
|
||||
return x ; } while (0)
|
||||
|
||||
/* BUG() if e is not one of the elements tracked by lc */
|
||||
#define PARANOIA_LC_ELEMENT(lc, e) do { \
|
||||
|
@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
|
|||
BUG_ON(i >= lc_->nr_elements); \
|
||||
BUG_ON(lc_->lc_element[i] != e_); } while (0)
|
||||
|
||||
|
||||
/* We need to atomically
|
||||
* - try to grab the lock (set LC_LOCKED)
|
||||
* - only if there is no pending transaction
|
||||
* (neither LC_DIRTY nor LC_STARVING is set)
|
||||
* Because of PARANOIA_ENTRY() above abusing lc->flags as well,
|
||||
* it is not sufficient to just say
|
||||
* return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
|
||||
*/
|
||||
int lc_try_lock(struct lru_cache *lc)
|
||||
{
|
||||
unsigned long val;
|
||||
do {
|
||||
val = cmpxchg(&lc->flags, 0, LC_LOCKED);
|
||||
} while (unlikely (val == LC_PARANOIA));
|
||||
/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
|
||||
return 0 == val;
|
||||
#if 0
|
||||
/* Alternative approach, spin in case someone enters or leaves a
|
||||
* PARANOIA_ENTRY()/RETURN() section. */
|
||||
unsigned long old, new, val;
|
||||
do {
|
||||
old = lc->flags & LC_PARANOIA;
|
||||
new = old | LC_LOCKED;
|
||||
val = cmpxchg(&lc->flags, old, new);
|
||||
} while (unlikely (val == (old ^ LC_PARANOIA)));
|
||||
return old == val;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_create - prepares to track objects in an active set
|
||||
* @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
|
||||
* @max_pending_changes: maximum changes to accumulate until a transaction is required
|
||||
* @e_count: number of elements allowed to be active simultaneously
|
||||
* @e_size: size of the tracked objects
|
||||
* @e_off: offset to the &struct lc_element member in a tracked object
|
||||
|
@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
|
|||
* or NULL on (allocation) failure.
|
||||
*/
|
||||
struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
||||
unsigned max_pending_changes,
|
||||
unsigned e_count, size_t e_size, size_t e_off)
|
||||
{
|
||||
struct hlist_head *slot = NULL;
|
||||
|
@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
|||
INIT_LIST_HEAD(&lc->in_use);
|
||||
INIT_LIST_HEAD(&lc->lru);
|
||||
INIT_LIST_HEAD(&lc->free);
|
||||
INIT_LIST_HEAD(&lc->to_be_changed);
|
||||
|
||||
lc->name = name;
|
||||
lc->element_size = e_size;
|
||||
lc->element_off = e_off;
|
||||
lc->nr_elements = e_count;
|
||||
lc->new_number = LC_FREE;
|
||||
lc->max_pending_changes = max_pending_changes;
|
||||
lc->lc_cache = cache;
|
||||
lc->lc_element = element;
|
||||
lc->lc_slot = slot;
|
||||
|
@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
|||
e = p + e_off;
|
||||
e->lc_index = i;
|
||||
e->lc_number = LC_FREE;
|
||||
e->lc_new_number = LC_FREE;
|
||||
list_add(&e->list, &lc->free);
|
||||
element[i] = e;
|
||||
}
|
||||
|
@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
|
|||
INIT_LIST_HEAD(&lc->in_use);
|
||||
INIT_LIST_HEAD(&lc->lru);
|
||||
INIT_LIST_HEAD(&lc->free);
|
||||
INIT_LIST_HEAD(&lc->to_be_changed);
|
||||
lc->used = 0;
|
||||
lc->hits = 0;
|
||||
lc->misses = 0;
|
||||
lc->starving = 0;
|
||||
lc->dirty = 0;
|
||||
lc->locked = 0;
|
||||
lc->changed = 0;
|
||||
lc->pending_changes = 0;
|
||||
lc->flags = 0;
|
||||
lc->changing_element = NULL;
|
||||
lc->new_number = LC_FREE;
|
||||
memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
|
||||
|
||||
for (i = 0; i < lc->nr_elements; i++) {
|
||||
|
@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
|
|||
/* re-init it */
|
||||
e->lc_index = i;
|
||||
e->lc_number = LC_FREE;
|
||||
e->lc_new_number = LC_FREE;
|
||||
list_add(&e->list, &lc->free);
|
||||
}
|
||||
}
|
||||
|
@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
|
|||
/* NOTE:
|
||||
* total calls to lc_get are
|
||||
* (starving + hits + misses)
|
||||
* misses include "dirty" count (update from an other thread in
|
||||
* misses include "locked" count (update from an other thread in
|
||||
* progress) and "changed", when this in fact lead to an successful
|
||||
* update of the cache.
|
||||
*/
|
||||
return seq_printf(seq, "\t%s: used:%u/%u "
|
||||
"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
|
||||
"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
|
||||
lc->name, lc->used, lc->nr_elements,
|
||||
lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
|
||||
lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
|
||||
}
|
||||
|
||||
static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
|
||||
|
@ -224,6 +259,27 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
|
|||
}
|
||||
|
||||
|
||||
static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
|
||||
bool include_changing)
|
||||
{
|
||||
struct hlist_node *n;
|
||||
struct lc_element *e;
|
||||
|
||||
BUG_ON(!lc);
|
||||
BUG_ON(!lc->nr_elements);
|
||||
hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
|
||||
/* "about to be changed" elements, pending transaction commit,
|
||||
* are hashed by their "new number". "Normal" elements have
|
||||
* lc_number == lc_new_number. */
|
||||
if (e->lc_new_number != enr)
|
||||
continue;
|
||||
if (e->lc_new_number == e->lc_number || include_changing)
|
||||
return e;
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_find - find element by label, if present in the hash table
|
||||
* @lc: The lru_cache object
|
||||
|
@ -232,38 +288,28 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
|
|||
* Returns the pointer to an element, if the element with the requested
|
||||
* "label" or element number is present in the hash table,
|
||||
* or NULL if not found. Does not change the refcnt.
|
||||
* Ignores elements that are "about to be used", i.e. not yet in the active
|
||||
* set, but still pending transaction commit.
|
||||
*/
|
||||
struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct hlist_node *n;
|
||||
struct lc_element *e;
|
||||
|
||||
BUG_ON(!lc);
|
||||
BUG_ON(!lc->nr_elements);
|
||||
hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
|
||||
if (e->lc_number == enr)
|
||||
return e;
|
||||
}
|
||||
return NULL;
|
||||
return __lc_find(lc, enr, 0);
|
||||
}
|
||||
|
||||
/* returned element will be "recycled" immediately */
|
||||
static struct lc_element *lc_evict(struct lru_cache *lc)
|
||||
/**
|
||||
* lc_is_used - find element by label
|
||||
* @lc: The lru_cache object
|
||||
* @enr: element number
|
||||
*
|
||||
* Returns true, if the element with the requested "label" or element number is
|
||||
* present in the hash table, and is used (refcnt > 0).
|
||||
* Also finds elements that are not _currently_ used but only "about to be
|
||||
* used", i.e. on the "to_be_changed" list, pending transaction commit.
|
||||
*/
|
||||
bool lc_is_used(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct list_head *n;
|
||||
struct lc_element *e;
|
||||
|
||||
if (list_empty(&lc->lru))
|
||||
return NULL;
|
||||
|
||||
n = lc->lru.prev;
|
||||
e = list_entry(n, struct lc_element, list);
|
||||
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
|
||||
list_del(&e->list);
|
||||
hlist_del(&e->colision);
|
||||
return e;
|
||||
struct lc_element *e = __lc_find(lc, enr, 1);
|
||||
return e && e->refcnt;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
|
|||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
BUG_ON(e->refcnt);
|
||||
|
||||
e->lc_number = LC_FREE;
|
||||
e->lc_number = e->lc_new_number = LC_FREE;
|
||||
hlist_del_init(&e->colision);
|
||||
list_move(&e->list, &lc->free);
|
||||
RETURN();
|
||||
}
|
||||
|
||||
static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
|
||||
static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
|
||||
{
|
||||
struct list_head *n;
|
||||
struct lc_element *e;
|
||||
|
||||
if (list_empty(&lc->free))
|
||||
return lc_evict(lc);
|
||||
if (!list_empty(&lc->free))
|
||||
n = lc->free.next;
|
||||
else if (!list_empty(&lc->lru))
|
||||
n = lc->lru.prev;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
n = lc->free.next;
|
||||
list_del(n);
|
||||
return list_entry(n, struct lc_element, list);
|
||||
e = list_entry(n, struct lc_element, list);
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
|
||||
e->lc_new_number = new_number;
|
||||
if (!hlist_unhashed(&e->colision))
|
||||
__hlist_del(&e->colision);
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
|
||||
list_move(&e->list, &lc->to_be_changed);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
static int lc_unused_element_available(struct lru_cache *lc)
|
||||
|
@ -308,6 +366,75 @@ static int lc_unused_element_available(struct lru_cache *lc)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change)
|
||||
{
|
||||
struct lc_element *e;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
if (lc->flags & LC_STARVING) {
|
||||
++lc->starving;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = __lc_find(lc, enr, 1);
|
||||
/* if lc_new_number != lc_number,
|
||||
* this enr is currently being pulled in already,
|
||||
* and will be available once the pending transaction
|
||||
* has been committed. */
|
||||
if (e && e->lc_new_number == e->lc_number) {
|
||||
++lc->hits;
|
||||
if (e->refcnt++ == 0)
|
||||
lc->used++;
|
||||
list_move(&e->list, &lc->in_use); /* Not evictable... */
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
++lc->misses;
|
||||
if (!may_change)
|
||||
RETURN(NULL);
|
||||
|
||||
/* It has been found above, but on the "to_be_changed" list, not yet
|
||||
* committed. Don't pull it in twice, wait for the transaction, then
|
||||
* try again */
|
||||
if (e)
|
||||
RETURN(NULL);
|
||||
|
||||
/* To avoid races with lc_try_lock(), first, mark us dirty
|
||||
* (using test_and_set_bit, as it implies memory barriers), ... */
|
||||
test_and_set_bit(__LC_DIRTY, &lc->flags);
|
||||
|
||||
/* ... only then check if it is locked anyways. If lc_unlock clears
|
||||
* the dirty bit again, that's not a problem, we will come here again.
|
||||
*/
|
||||
if (test_bit(__LC_LOCKED, &lc->flags)) {
|
||||
++lc->locked;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
/* In case there is nothing available and we can not kick out
|
||||
* the LRU element, we have to wait ...
|
||||
*/
|
||||
if (!lc_unused_element_available(lc)) {
|
||||
__set_bit(__LC_STARVING, &lc->flags);
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
/* It was not present in the active set. We are going to recycle an
|
||||
* unused (or even "free") element, but we won't accumulate more than
|
||||
* max_pending_changes changes. */
|
||||
if (lc->pending_changes >= lc->max_pending_changes)
|
||||
RETURN(NULL);
|
||||
|
||||
e = lc_prepare_for_change(lc, enr);
|
||||
BUG_ON(!e);
|
||||
|
||||
clear_bit(__LC_STARVING, &lc->flags);
|
||||
BUG_ON(++e->refcnt != 1);
|
||||
lc->used++;
|
||||
lc->pending_changes++;
|
||||
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_get - get element by label, maybe change the active set
|
||||
|
@ -336,110 +463,65 @@ static int lc_unused_element_available(struct lru_cache *lc)
|
|||
* pointer to an UNUSED element with some different element number,
|
||||
* where that different number may also be %LC_FREE.
|
||||
*
|
||||
* In this case, the cache is marked %LC_DIRTY (blocking further changes),
|
||||
* and the returned element pointer is removed from the lru list and
|
||||
* hash collision chains. The user now should do whatever housekeeping
|
||||
* is necessary.
|
||||
* Then he must call lc_changed(lc,element_pointer), to finish
|
||||
* the change.
|
||||
* In this case, the cache is marked %LC_DIRTY,
|
||||
* so lc_try_lock() will no longer succeed.
|
||||
* The returned element pointer is moved to the "to_be_changed" list,
|
||||
* and registered with the new element number on the hash collision chains,
|
||||
* so it is possible to pick it up from lc_is_used().
|
||||
* Up to "max_pending_changes" (see lc_create()) can be accumulated.
|
||||
* The user now should do whatever housekeeping is necessary,
|
||||
* typically serialize on lc_try_lock_for_transaction(), then call
|
||||
* lc_committed(lc) and lc_unlock(), to finish the change.
|
||||
*
|
||||
* NOTE: The user needs to check the lc_number on EACH use, so he recognizes
|
||||
* any cache set change.
|
||||
*/
|
||||
struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct lc_element *e;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
if (lc->flags & LC_STARVING) {
|
||||
++lc->starving;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = lc_find(lc, enr);
|
||||
if (e) {
|
||||
++lc->hits;
|
||||
if (e->refcnt++ == 0)
|
||||
lc->used++;
|
||||
list_move(&e->list, &lc->in_use); /* Not evictable... */
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
++lc->misses;
|
||||
|
||||
/* In case there is nothing available and we can not kick out
|
||||
* the LRU element, we have to wait ...
|
||||
*/
|
||||
if (!lc_unused_element_available(lc)) {
|
||||
__set_bit(__LC_STARVING, &lc->flags);
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
/* it was not present in the active set.
|
||||
* we are going to recycle an unused (or even "free") element.
|
||||
* user may need to commit a transaction to record that change.
|
||||
* we serialize on flags & TF_DIRTY */
|
||||
if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
|
||||
++lc->dirty;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = lc_get_unused_element(lc);
|
||||
BUG_ON(!e);
|
||||
|
||||
clear_bit(__LC_STARVING, &lc->flags);
|
||||
BUG_ON(++e->refcnt != 1);
|
||||
lc->used++;
|
||||
|
||||
lc->changing_element = e;
|
||||
lc->new_number = enr;
|
||||
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
/* similar to lc_get,
|
||||
* but only gets a new reference on an existing element.
|
||||
* you either get the requested element, or NULL.
|
||||
* will be consolidated into one function.
|
||||
*/
|
||||
struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct lc_element *e;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
if (lc->flags & LC_STARVING) {
|
||||
++lc->starving;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = lc_find(lc, enr);
|
||||
if (e) {
|
||||
++lc->hits;
|
||||
if (e->refcnt++ == 0)
|
||||
lc->used++;
|
||||
list_move(&e->list, &lc->in_use); /* Not evictable... */
|
||||
}
|
||||
RETURN(e);
|
||||
return __lc_get(lc, enr, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_changed - tell @lc that the change has been recorded
|
||||
* lc_try_get - get element by label, if present; do not change the active set
|
||||
* @lc: the lru cache to operate on
|
||||
* @e: the element pending label change
|
||||
* @enr: the label to look up
|
||||
*
|
||||
* Finds an element in the cache, increases its usage count,
|
||||
* "touches" and returns it.
|
||||
*
|
||||
* Return values:
|
||||
* NULL
|
||||
* The cache was marked %LC_STARVING,
|
||||
* or the requested label was not in the active set
|
||||
*
|
||||
* pointer to the element with the REQUESTED element number.
|
||||
* In this case, it can be used right away
|
||||
*/
|
||||
void lc_changed(struct lru_cache *lc, struct lc_element *e)
|
||||
struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
return __lc_get(lc, enr, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_committed - tell @lc that pending changes have been recorded
|
||||
* @lc: the lru cache to operate on
|
||||
*
|
||||
* User is expected to serialize on explicit lc_try_lock_for_transaction()
|
||||
* before the transaction is started, and later needs to lc_unlock() explicitly
|
||||
* as well.
|
||||
*/
|
||||
void lc_committed(struct lru_cache *lc)
|
||||
{
|
||||
struct lc_element *e, *tmp;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
BUG_ON(e != lc->changing_element);
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
++lc->changed;
|
||||
e->lc_number = lc->new_number;
|
||||
list_add(&e->list, &lc->in_use);
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
|
||||
lc->changing_element = NULL;
|
||||
lc->new_number = LC_FREE;
|
||||
clear_bit(__LC_DIRTY, &lc->flags);
|
||||
smp_mb__after_clear_bit();
|
||||
list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
|
||||
/* count number of changes, not number of transactions */
|
||||
++lc->changed;
|
||||
e->lc_number = e->lc_new_number;
|
||||
list_move(&e->list, &lc->in_use);
|
||||
}
|
||||
lc->pending_changes = 0;
|
||||
RETURN();
|
||||
}
|
||||
|
||||
|
@ -458,13 +540,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
|
|||
PARANOIA_ENTRY();
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
BUG_ON(e->refcnt == 0);
|
||||
BUG_ON(e == lc->changing_element);
|
||||
BUG_ON(e->lc_number != e->lc_new_number);
|
||||
if (--e->refcnt == 0) {
|
||||
/* move it to the front of LRU. */
|
||||
list_move(&e->list, &lc->lru);
|
||||
lc->used--;
|
||||
clear_bit(__LC_STARVING, &lc->flags);
|
||||
smp_mb__after_clear_bit();
|
||||
clear_bit_unlock(__LC_STARVING, &lc->flags);
|
||||
}
|
||||
RETURN(e->refcnt);
|
||||
}
|
||||
|
@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
|
|||
void lc_set(struct lru_cache *lc, unsigned int enr, int index)
|
||||
{
|
||||
struct lc_element *e;
|
||||
struct list_head *lh;
|
||||
|
||||
if (index < 0 || index >= lc->nr_elements)
|
||||
return;
|
||||
|
||||
e = lc_element_by_index(lc, index);
|
||||
e->lc_number = enr;
|
||||
BUG_ON(e->lc_number != e->lc_new_number);
|
||||
BUG_ON(e->refcnt != 0);
|
||||
|
||||
e->lc_number = e->lc_new_number = enr;
|
||||
hlist_del_init(&e->colision);
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
|
||||
list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
|
||||
if (enr == LC_FREE)
|
||||
lh = &lc->free;
|
||||
else {
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
|
||||
lh = &lc->lru;
|
||||
}
|
||||
list_move(&e->list, lh);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get);
|
|||
EXPORT_SYMBOL(lc_find);
|
||||
EXPORT_SYMBOL(lc_get);
|
||||
EXPORT_SYMBOL(lc_put);
|
||||
EXPORT_SYMBOL(lc_changed);
|
||||
EXPORT_SYMBOL(lc_committed);
|
||||
EXPORT_SYMBOL(lc_element_by_index);
|
||||
EXPORT_SYMBOL(lc_index_of);
|
||||
EXPORT_SYMBOL(lc_seq_printf_stats);
|
||||
EXPORT_SYMBOL(lc_seq_dump_details);
|
||||
EXPORT_SYMBOL(lc_try_lock);
|
||||
EXPORT_SYMBOL(lc_is_used);
|
||||
|
|
Loading…
Reference in New Issue