Merge branch 'for-3.8/drivers' of git://git.kernel.dk/linux-block
Pull block driver update from Jens Axboe: "Now that the core bits are in, here are the driver bits for 3.8. The branch contains: - A huge pile of drbd bits that were dumped from the 3.7 merge window. Following that, it was both made perfectly clear that there is going to be no more over-the-wall pulls and how the situation on individual pulls can be improved. - A few cleanups from Akinobu Mita for drbd and cciss. - Queue improvement for loop from Lukas. This grew into adding a generic interface for waiting/checking an even with a specific lock, allowing this to be pulled out of md and now loop and drbd is also using it. - A few fixes for xen back/front block driver from Roger Pau Monne. - Partition improvements from Stephen Warren, allowing partiion UUID to be used as an identifier." * 'for-3.8/drivers' of git://git.kernel.dk/linux-block: (609 commits) drbd: update Kconfig to match current dependencies drbd: Fix drbdsetup wait-connect, wait-sync etc... commands drbd: close race between drbd_set_role and drbd_connect drbd: respect no-md-barriers setting also when changed online via disk-options drbd: Remove obsolete check drbd: fixup after wait_even_lock_irq() addition to generic code loop: Limit the number of requests in the bio list wait: add wait_event_lock_irq() interface xen-blkfront: free allocated page xen-blkback: move free persistent grants code block: partition: msdos: provide UUIDs for partitions init: reduce PARTUUID min length to 1 from 36 block: store partition_meta_info.uuid as a string cciss: use check_signature() cciss: cleanup bitops usage drbd: use copy_highpage drbd: if the replication link breaks during handshake, keep retrying drbd: check return of kmalloc in receive_uuids drbd: Broadcast sync progress no more often than once per second drbd: don't try to clear bits once the disk has failed ...
This commit is contained in:
commit
9228ff9038
|
@ -743,7 +743,6 @@ void __init printk_all_partitions(void)
|
|||
struct hd_struct *part;
|
||||
char name_buf[BDEVNAME_SIZE];
|
||||
char devt_buf[BDEVT_SIZE];
|
||||
char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
|
||||
|
||||
/*
|
||||
* Don't show empty devices or things that have been
|
||||
|
@ -762,16 +761,11 @@ void __init printk_all_partitions(void)
|
|||
while ((part = disk_part_iter_next(&piter))) {
|
||||
bool is_part0 = part == &disk->part0;
|
||||
|
||||
uuid_buf[0] = '\0';
|
||||
if (part->info)
|
||||
snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
|
||||
part->info->uuid);
|
||||
|
||||
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
|
||||
bdevt_str(part_devt(part), devt_buf),
|
||||
(unsigned long long)part_nr_sects_read(part) >> 1
|
||||
, disk_name(disk, part->partno, name_buf),
|
||||
uuid_buf);
|
||||
part->info ? part->info->uuid : "");
|
||||
if (is_part0) {
|
||||
if (disk->driverfs_dev != NULL &&
|
||||
disk->driverfs_dev->driver != NULL)
|
||||
|
|
|
@ -620,7 +620,6 @@ int efi_partition(struct parsed_partitions *state)
|
|||
gpt_entry *ptes = NULL;
|
||||
u32 i;
|
||||
unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
|
||||
u8 unparsed_guid[37];
|
||||
|
||||
if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
|
||||
kfree(gpt);
|
||||
|
@ -649,11 +648,7 @@ int efi_partition(struct parsed_partitions *state)
|
|||
state->parts[i + 1].flags = ADDPART_FLAG_RAID;
|
||||
|
||||
info = &state->parts[i + 1].info;
|
||||
/* Instead of doing a manual swap to big endian, reuse the
|
||||
* common ASCII hex format as the interim.
|
||||
*/
|
||||
efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
|
||||
part_pack_uuid(unparsed_guid, info->uuid);
|
||||
efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
|
||||
|
||||
/* Naively convert UTF16-LE to 7 bits. */
|
||||
label_max = min(sizeof(info->volname) - 1,
|
||||
|
|
|
@ -94,6 +94,17 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void set_info(struct parsed_partitions *state, int slot,
|
||||
u32 disksig)
|
||||
{
|
||||
struct partition_meta_info *info = &state->parts[slot].info;
|
||||
|
||||
snprintf(info->uuid, sizeof(info->uuid), "%08x-%02x", disksig,
|
||||
slot);
|
||||
info->volname[0] = 0;
|
||||
state->parts[slot].has_info = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create devices for each logical partition in an extended partition.
|
||||
* The logical partitions form a linked list, with each entry being
|
||||
|
@ -106,7 +117,8 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
|
|||
*/
|
||||
|
||||
static void parse_extended(struct parsed_partitions *state,
|
||||
sector_t first_sector, sector_t first_size)
|
||||
sector_t first_sector, sector_t first_size,
|
||||
u32 disksig)
|
||||
{
|
||||
struct partition *p;
|
||||
Sector sect;
|
||||
|
@ -166,6 +178,7 @@ static void parse_extended(struct parsed_partitions *state,
|
|||
}
|
||||
|
||||
put_partition(state, state->next, next, size);
|
||||
set_info(state, state->next, disksig);
|
||||
if (SYS_IND(p) == LINUX_RAID_PARTITION)
|
||||
state->parts[state->next].flags = ADDPART_FLAG_RAID;
|
||||
loopct = 0;
|
||||
|
@ -437,6 +450,7 @@ int msdos_partition(struct parsed_partitions *state)
|
|||
struct partition *p;
|
||||
struct fat_boot_sector *fb;
|
||||
int slot;
|
||||
u32 disksig;
|
||||
|
||||
data = read_part_sector(state, 0, §);
|
||||
if (!data)
|
||||
|
@ -491,6 +505,8 @@ int msdos_partition(struct parsed_partitions *state)
|
|||
#endif
|
||||
p = (struct partition *) (data + 0x1be);
|
||||
|
||||
disksig = le32_to_cpup((__le32 *)(data + 0x1b8));
|
||||
|
||||
/*
|
||||
* Look for partitions in two passes:
|
||||
* First find the primary and DOS-type extended partitions.
|
||||
|
@ -515,11 +531,12 @@ int msdos_partition(struct parsed_partitions *state)
|
|||
put_partition(state, slot, start, n);
|
||||
|
||||
strlcat(state->pp_buf, " <", PAGE_SIZE);
|
||||
parse_extended(state, start, size);
|
||||
parse_extended(state, start, size, disksig);
|
||||
strlcat(state->pp_buf, " >", PAGE_SIZE);
|
||||
continue;
|
||||
}
|
||||
put_partition(state, slot, start, size);
|
||||
set_info(state, slot, disksig);
|
||||
if (SYS_IND(p) == LINUX_RAID_PARTITION)
|
||||
state->parts[slot].flags = ADDPART_FLAG_RAID;
|
||||
if (SYS_IND(p) == DM6_PARTITION)
|
||||
|
|
|
@ -41,8 +41,9 @@
|
|||
#include <linux/spinlock.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/io.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
@ -978,8 +979,7 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h)
|
|||
i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
|
||||
if (i == h->nr_cmds)
|
||||
return NULL;
|
||||
} while (test_and_set_bit(i & (BITS_PER_LONG - 1),
|
||||
h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
|
||||
} while (test_and_set_bit(i, h->cmd_pool_bits) != 0);
|
||||
c = h->cmd_pool + i;
|
||||
memset(c, 0, sizeof(CommandList_struct));
|
||||
cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct);
|
||||
|
@ -1046,8 +1046,7 @@ static void cmd_free(ctlr_info_t *h, CommandList_struct *c)
|
|||
int i;
|
||||
|
||||
i = c - h->cmd_pool;
|
||||
clear_bit(i & (BITS_PER_LONG - 1),
|
||||
h->cmd_pool_bits + (i / BITS_PER_LONG));
|
||||
clear_bit(i, h->cmd_pool_bits);
|
||||
h->nr_frees++;
|
||||
}
|
||||
|
||||
|
@ -4268,10 +4267,7 @@ static void __devinit cciss_find_board_params(ctlr_info_t *h)
|
|||
|
||||
static inline bool CISS_signature_present(ctlr_info_t *h)
|
||||
{
|
||||
if ((readb(&h->cfgtable->Signature[0]) != 'C') ||
|
||||
(readb(&h->cfgtable->Signature[1]) != 'I') ||
|
||||
(readb(&h->cfgtable->Signature[2]) != 'S') ||
|
||||
(readb(&h->cfgtable->Signature[3]) != 'S')) {
|
||||
if (!check_signature(h->cfgtable->Signature, "CISS", 4)) {
|
||||
dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
|
||||
return false;
|
||||
}
|
||||
|
@ -4812,8 +4808,7 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
|
|||
|
||||
static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h)
|
||||
{
|
||||
h->cmd_pool_bits = kmalloc(
|
||||
DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
|
||||
h->cmd_pool_bits = kmalloc(BITS_TO_LONGS(h->nr_cmds) *
|
||||
sizeof(unsigned long), GFP_KERNEL);
|
||||
h->cmd_pool = pci_alloc_consistent(h->pdev,
|
||||
h->nr_cmds * sizeof(CommandList_struct),
|
||||
|
@ -5068,9 +5063,7 @@ reinit_after_soft_reset:
|
|||
pci_set_drvdata(pdev, h);
|
||||
/* command and error info recs zeroed out before
|
||||
they are used */
|
||||
memset(h->cmd_pool_bits, 0,
|
||||
DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
|
||||
* sizeof(unsigned long));
|
||||
bitmap_zero(h->cmd_pool_bits, h->nr_cmds);
|
||||
|
||||
h->num_luns = 0;
|
||||
h->highest_lun = -1;
|
||||
|
|
|
@ -2,13 +2,14 @@
|
|||
# DRBD device driver configuration
|
||||
#
|
||||
|
||||
comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
|
||||
depends on PROC_FS='n' || INET='n' || CONNECTOR='n'
|
||||
comment "DRBD disabled because PROC_FS or INET not selected"
|
||||
depends on PROC_FS='n' || INET='n'
|
||||
|
||||
config BLK_DEV_DRBD
|
||||
tristate "DRBD Distributed Replicated Block Device support"
|
||||
depends on PROC_FS && INET && CONNECTOR
|
||||
depends on PROC_FS && INET
|
||||
select LRU_CACHE
|
||||
select LIBCRC32C
|
||||
default n
|
||||
help
|
||||
|
||||
|
@ -58,7 +59,8 @@ config DRBD_FAULT_INJECTION
|
|||
32 data read
|
||||
64 read ahead
|
||||
128 kmalloc of bitmap
|
||||
256 allocation of EE (epoch_entries)
|
||||
256 allocation of peer_requests
|
||||
512 insert data corruption on receiving side
|
||||
|
||||
fault_devs: bitmask of minor numbers
|
||||
fault_rate: frequency in percent
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
drbd-y := drbd_bitmap.o drbd_proc.o
|
||||
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
|
||||
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
|
||||
drbd-y += drbd_interval.o drbd_state.o
|
||||
drbd-y += drbd_nla.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
|
|||
if (!__ratelimit(&drbd_ratelimit_state))
|
||||
return;
|
||||
dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
|
||||
current == mdev->receiver.task ? "receiver" :
|
||||
current == mdev->asender.task ? "asender" :
|
||||
current == mdev->worker.task ? "worker" : current->comm,
|
||||
func, b->bm_why ?: "?",
|
||||
b->bm_task == mdev->receiver.task ? "receiver" :
|
||||
b->bm_task == mdev->asender.task ? "asender" :
|
||||
b->bm_task == mdev->worker.task ? "worker" : "?");
|
||||
drbd_task_to_thread_name(mdev->tconn, current),
|
||||
func, b->bm_why ?: "?",
|
||||
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
|
||||
}
|
||||
|
||||
void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
|
||||
|
@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
|
|||
|
||||
if (trylock_failed) {
|
||||
dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
|
||||
current == mdev->receiver.task ? "receiver" :
|
||||
current == mdev->asender.task ? "asender" :
|
||||
current == mdev->worker.task ? "worker" : current->comm,
|
||||
why, b->bm_why ?: "?",
|
||||
b->bm_task == mdev->receiver.task ? "receiver" :
|
||||
b->bm_task == mdev->asender.task ? "asender" :
|
||||
b->bm_task == mdev->worker.task ? "worker" : "?");
|
||||
drbd_task_to_thread_name(mdev->tconn, current),
|
||||
why, b->bm_why ?: "?",
|
||||
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
|
||||
mutex_lock(&b->bm_change);
|
||||
}
|
||||
if (BM_LOCKED_MASK & b->bm_flags)
|
||||
|
@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
|
|||
/* to mark for lazy writeout once syncer cleared all clearable bits,
|
||||
* we if bits have been cleared since last IO. */
|
||||
#define BM_PAGE_LAZY_WRITEOUT 28
|
||||
/* pages marked with this "HINT" will be considered for writeout
|
||||
* on activity log transactions */
|
||||
#define BM_PAGE_HINT_WRITEOUT 27
|
||||
|
||||
/* store_page_idx uses non-atomic assignment. It is only used directly after
|
||||
* allocating the page. All other bm_set_page_* and bm_clear_page_* need to
|
||||
|
@ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
|
|||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
void *addr = &page_private(b->bm_pages[page_nr]);
|
||||
clear_bit(BM_PAGE_IO_LOCK, addr);
|
||||
smp_mb__after_clear_bit();
|
||||
clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
|
||||
wake_up(&mdev->bitmap->bm_io_wait);
|
||||
}
|
||||
|
||||
|
@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page)
|
|||
set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
|
||||
* @mdev: DRBD device.
|
||||
* @page_nr: the bitmap page to mark with the "hint" flag
|
||||
*
|
||||
* From within an activity log transaction, we mark a few pages with these
|
||||
* hints, then call drbd_bm_write_hinted(), which will only write out changed
|
||||
* pages which are flagged with this mark.
|
||||
*/
|
||||
void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
|
||||
{
|
||||
struct page *page;
|
||||
if (page_nr >= mdev->bitmap->bm_number_of_pages) {
|
||||
dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
|
||||
page_nr, (int)mdev->bitmap->bm_number_of_pages);
|
||||
return;
|
||||
}
|
||||
page = mdev->bitmap->bm_pages[page_nr];
|
||||
set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
|
||||
}
|
||||
|
||||
static int bm_test_page_unchanged(struct page *page)
|
||||
{
|
||||
volatile const unsigned long *addr = &page_private(page);
|
||||
|
@ -373,14 +388,16 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
|
|||
return old_pages;
|
||||
|
||||
/* Trying kmalloc first, falling back to vmalloc.
|
||||
* GFP_KERNEL is ok, as this is done when a lower level disk is
|
||||
* "attached" to the drbd. Context is receiver thread or cqueue
|
||||
* thread. As we have no disk yet, we are not in the IO path,
|
||||
* not even the IO path of the peer. */
|
||||
* GFP_NOIO, as this is called while drbd IO is "suspended",
|
||||
* and during resize or attach on diskless Primary,
|
||||
* we must not block on IO to ourselves.
|
||||
* Context is receiver thread or dmsetup. */
|
||||
bytes = sizeof(struct page *)*want;
|
||||
new_pages = kzalloc(bytes, GFP_KERNEL);
|
||||
new_pages = kzalloc(bytes, GFP_NOIO);
|
||||
if (!new_pages) {
|
||||
new_pages = vzalloc(bytes);
|
||||
new_pages = __vmalloc(bytes,
|
||||
GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO,
|
||||
PAGE_KERNEL);
|
||||
if (!new_pages)
|
||||
return NULL;
|
||||
vmalloced = 1;
|
||||
|
@ -390,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
|
|||
for (i = 0; i < have; i++)
|
||||
new_pages[i] = old_pages[i];
|
||||
for (; i < want; i++) {
|
||||
page = alloc_page(GFP_HIGHUSER);
|
||||
page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
|
||||
if (!page) {
|
||||
bm_free_pages(new_pages + have, i - have);
|
||||
bm_vk_free(new_pages, vmalloced);
|
||||
|
@ -439,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev)
|
|||
|
||||
sector_t drbd_bm_capacity(struct drbd_conf *mdev)
|
||||
{
|
||||
ERR_IF(!mdev->bitmap) return 0;
|
||||
if (!expect(mdev->bitmap))
|
||||
return 0;
|
||||
return mdev->bitmap->bm_dev_capacity;
|
||||
}
|
||||
|
||||
|
@ -447,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev)
|
|||
*/
|
||||
void drbd_bm_cleanup(struct drbd_conf *mdev)
|
||||
{
|
||||
ERR_IF (!mdev->bitmap) return;
|
||||
if (!expect(mdev->bitmap))
|
||||
return;
|
||||
bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
|
||||
bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
|
||||
kfree(mdev->bitmap);
|
||||
|
@ -610,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
|
|||
int err = 0, growing;
|
||||
int opages_vmalloced;
|
||||
|
||||
ERR_IF(!b) return -ENOMEM;
|
||||
if (!expect(b))
|
||||
return -ENOMEM;
|
||||
|
||||
drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
|
||||
|
||||
|
@ -732,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
|
|||
unsigned long s;
|
||||
unsigned long flags;
|
||||
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
s = b->bm_set;
|
||||
|
@ -756,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
|
|||
size_t drbd_bm_words(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
return b->bm_words;
|
||||
}
|
||||
|
@ -765,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev)
|
|||
unsigned long drbd_bm_bits(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
|
||||
return b->bm_bits;
|
||||
}
|
||||
|
@ -786,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
|
|||
|
||||
end = offset + number;
|
||||
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
if (number == 0)
|
||||
return;
|
||||
WARN_ON(offset >= b->bm_words);
|
||||
|
@ -831,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
|
|||
|
||||
end = offset + number;
|
||||
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
if ((offset >= b->bm_words) ||
|
||||
|
@ -860,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
|
|||
void drbd_bm_set_all(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
bm_memset(b, 0, 0xff, b->bm_words);
|
||||
|
@ -874,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
|
|||
void drbd_bm_clear_all(struct drbd_conf *mdev)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
ERR_IF(!b) return;
|
||||
ERR_IF(!b->bm_pages) return;
|
||||
if (!expect(b))
|
||||
return;
|
||||
if (!expect(b->bm_pages))
|
||||
return;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
bm_memset(b, 0, 0, b->bm_words);
|
||||
|
@ -889,7 +922,8 @@ struct bm_aio_ctx {
|
|||
unsigned int done;
|
||||
unsigned flags;
|
||||
#define BM_AIO_COPY_PAGES 1
|
||||
#define BM_WRITE_ALL_PAGES 2
|
||||
#define BM_AIO_WRITE_HINTED 2
|
||||
#define BM_WRITE_ALL_PAGES 4
|
||||
int error;
|
||||
struct kref kref;
|
||||
};
|
||||
|
@ -977,17 +1011,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
|
|||
bm_set_page_unchanged(b->bm_pages[page_nr]);
|
||||
|
||||
if (ctx->flags & BM_AIO_COPY_PAGES) {
|
||||
void *src, *dest;
|
||||
page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
|
||||
dest = kmap_atomic(page);
|
||||
src = kmap_atomic(b->bm_pages[page_nr]);
|
||||
memcpy(dest, src, PAGE_SIZE);
|
||||
kunmap_atomic(src);
|
||||
kunmap_atomic(dest);
|
||||
copy_highpage(page, b->bm_pages[page_nr]);
|
||||
bm_store_page_idx(page, page_nr);
|
||||
} else
|
||||
page = b->bm_pages[page_nr];
|
||||
|
||||
bio->bi_bdev = mdev->ldev->md_bdev;
|
||||
bio->bi_sector = on_disk_sector;
|
||||
/* bio_add_page of a single page to an empty bio will always succeed,
|
||||
|
@ -1060,6 +1088,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
|
|||
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
|
||||
break;
|
||||
if (rw & WRITE) {
|
||||
if ((flags & BM_AIO_WRITE_HINTED) &&
|
||||
!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
|
||||
&page_private(b->bm_pages[i])))
|
||||
continue;
|
||||
|
||||
if (!(flags & BM_WRITE_ALL_PAGES) &&
|
||||
bm_test_page_unchanged(b->bm_pages[i])) {
|
||||
dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
|
||||
|
@ -1088,13 +1121,15 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
|
|||
* "in_flight reached zero, all done" event.
|
||||
*/
|
||||
if (!atomic_dec_and_test(&ctx->in_flight))
|
||||
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
|
||||
wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
|
||||
else
|
||||
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
|
||||
|
||||
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
|
||||
rw == WRITE ? "WRITE" : "READ",
|
||||
count, jiffies - now);
|
||||
/* summary for global bitmap IO */
|
||||
if (flags == 0)
|
||||
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
|
||||
rw == WRITE ? "WRITE" : "READ",
|
||||
count, jiffies - now);
|
||||
|
||||
if (ctx->error) {
|
||||
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
|
||||
|
@ -1103,7 +1138,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
|
|||
}
|
||||
|
||||
if (atomic_read(&ctx->in_flight))
|
||||
err = -EIO; /* Disk failed during IO... */
|
||||
err = -EIO; /* Disk timeout/force-detach during IO... */
|
||||
|
||||
now = jiffies;
|
||||
if (rw == WRITE) {
|
||||
|
@ -1115,8 +1150,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
|
|||
}
|
||||
now = b->bm_set;
|
||||
|
||||
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
|
||||
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
|
||||
if (flags == 0)
|
||||
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
|
||||
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
|
||||
|
||||
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
|
||||
return err;
|
||||
|
@ -1179,9 +1215,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
|
|||
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
|
||||
* @mdev: DRBD device.
|
||||
*/
|
||||
int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
|
||||
{
|
||||
return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
|
||||
* drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
|
||||
* @mdev: DRBD device.
|
||||
* @idx: bitmap page index
|
||||
*
|
||||
|
@ -1222,11 +1266,11 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
|
|||
}
|
||||
|
||||
bm_page_io_async(ctx, idx, WRITE_SYNC);
|
||||
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
|
||||
wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
|
||||
|
||||
if (ctx->error)
|
||||
drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
|
||||
/* that should force detach, so the in memory bitmap will be
|
||||
/* that causes us to detach, so the in memory bitmap will be
|
||||
* gone in a moment as well. */
|
||||
|
||||
mdev->bm_writ_cnt++;
|
||||
|
@ -1289,8 +1333,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
|
|||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
unsigned long i = DRBD_END_OF_BITMAP;
|
||||
|
||||
ERR_IF(!b) return i;
|
||||
ERR_IF(!b->bm_pages) return i;
|
||||
if (!expect(b))
|
||||
return i;
|
||||
if (!expect(b->bm_pages))
|
||||
return i;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1391,8 +1437,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
|
|||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
int c = 0;
|
||||
|
||||
ERR_IF(!b) return 1;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 1;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
|
||||
|
@ -1423,13 +1471,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
|
|||
{
|
||||
int i;
|
||||
int bits;
|
||||
int changed = 0;
|
||||
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
|
||||
for (i = first_word; i < last_word; i++) {
|
||||
bits = hweight_long(paddr[i]);
|
||||
paddr[i] = ~0UL;
|
||||
b->bm_set += BITS_PER_LONG - bits;
|
||||
changed += BITS_PER_LONG - bits;
|
||||
}
|
||||
kunmap_atomic(paddr);
|
||||
if (changed) {
|
||||
/* We only need lazy writeout, the information is still in the
|
||||
* remote bitmap as well, and is reconstructed during the next
|
||||
* bitmap exchange, if lost locally due to a crash. */
|
||||
bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
|
||||
b->bm_set += changed;
|
||||
}
|
||||
}
|
||||
|
||||
/* Same thing as drbd_bm_set_bits,
|
||||
|
@ -1524,8 +1580,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
|
|||
unsigned long *p_addr;
|
||||
int i;
|
||||
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1559,8 +1617,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
|
|||
* robust in case we screwed up elsewhere, in that case pretend there
|
||||
* was one dirty bit in the requested area, so we won't try to do a
|
||||
* local read there (no bitmap probably implies no disk) */
|
||||
ERR_IF(!b) return 1;
|
||||
ERR_IF(!b->bm_pages) return 1;
|
||||
if (!expect(b))
|
||||
return 1;
|
||||
if (!expect(b->bm_pages))
|
||||
return 1;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1573,11 +1633,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
|
|||
bm_unmap(p_addr);
|
||||
p_addr = bm_map_pidx(b, idx);
|
||||
}
|
||||
ERR_IF (bitnr >= b->bm_bits) {
|
||||
dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
|
||||
} else {
|
||||
if (expect(bitnr < b->bm_bits))
|
||||
c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
|
||||
}
|
||||
else
|
||||
dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
|
||||
}
|
||||
if (p_addr)
|
||||
bm_unmap(p_addr);
|
||||
|
@ -1607,8 +1666,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
|
|||
unsigned long flags;
|
||||
unsigned long *p_addr, *bm;
|
||||
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
if (!expect(b))
|
||||
return 0;
|
||||
if (!expect(b->bm_pages))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&b->bm_lock, flags);
|
||||
if (BM_DONT_TEST & b->bm_flags)
|
||||
|
@ -1630,47 +1691,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
|
|||
spin_unlock_irqrestore(&b->bm_lock, flags);
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Set all bits covered by the AL-extent al_enr.
|
||||
* Returns number of bits changed. */
|
||||
unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
|
||||
{
|
||||
struct drbd_bitmap *b = mdev->bitmap;
|
||||
unsigned long *p_addr, *bm;
|
||||
unsigned long weight;
|
||||
unsigned long s, e;
|
||||
int count, i, do_now;
|
||||
ERR_IF(!b) return 0;
|
||||
ERR_IF(!b->bm_pages) return 0;
|
||||
|
||||
spin_lock_irq(&b->bm_lock);
|
||||
if (BM_DONT_SET & b->bm_flags)
|
||||
bm_print_lock_info(mdev);
|
||||
weight = b->bm_set;
|
||||
|
||||
s = al_enr * BM_WORDS_PER_AL_EXT;
|
||||
e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
|
||||
/* assert that s and e are on the same page */
|
||||
D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
|
||||
== s >> (PAGE_SHIFT - LN2_BPL + 3));
|
||||
count = 0;
|
||||
if (s < b->bm_words) {
|
||||
i = do_now = e-s;
|
||||
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
|
||||
bm = p_addr + MLPP(s);
|
||||
while (i--) {
|
||||
count += hweight_long(*bm);
|
||||
*bm = -1UL;
|
||||
bm++;
|
||||
}
|
||||
bm_unmap(p_addr);
|
||||
b->bm_set += do_now*BITS_PER_LONG - count;
|
||||
if (e == b->bm_words)
|
||||
b->bm_set -= bm_clear_surplus(b);
|
||||
} else {
|
||||
dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
|
||||
}
|
||||
weight = b->bm_set - weight;
|
||||
spin_unlock_irq(&b->bm_lock);
|
||||
return weight;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,207 @@
|
|||
#include <asm/bug.h>
|
||||
#include <linux/rbtree_augmented.h>
|
||||
#include "drbd_interval.h"
|
||||
|
||||
/**
|
||||
* interval_end - return end of @node
|
||||
*/
|
||||
static inline
|
||||
sector_t interval_end(struct rb_node *node)
|
||||
{
|
||||
struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
|
||||
return this->end;
|
||||
}
|
||||
|
||||
/**
|
||||
* compute_subtree_last - compute end of @node
|
||||
*
|
||||
* The end of an interval is the highest (start + (size >> 9)) value of this
|
||||
* node and of its children. Called for @node and its parents whenever the end
|
||||
* may have changed.
|
||||
*/
|
||||
static inline sector_t
|
||||
compute_subtree_last(struct drbd_interval *node)
|
||||
{
|
||||
sector_t max = node->sector + (node->size >> 9);
|
||||
|
||||
if (node->rb.rb_left) {
|
||||
sector_t left = interval_end(node->rb.rb_left);
|
||||
if (left > max)
|
||||
max = left;
|
||||
}
|
||||
if (node->rb.rb_right) {
|
||||
sector_t right = interval_end(node->rb.rb_right);
|
||||
if (right > max)
|
||||
max = right;
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
|
||||
{
|
||||
while (rb != stop) {
|
||||
struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb);
|
||||
sector_t subtree_last = compute_subtree_last(node);
|
||||
if (node->end == subtree_last)
|
||||
break;
|
||||
node->end = subtree_last;
|
||||
rb = rb_parent(&node->rb);
|
||||
}
|
||||
}
|
||||
|
||||
static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
|
||||
{
|
||||
struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
|
||||
struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
|
||||
|
||||
new->end = old->end;
|
||||
}
|
||||
|
||||
static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
|
||||
{
|
||||
struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
|
||||
struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
|
||||
|
||||
new->end = old->end;
|
||||
old->end = compute_subtree_last(old);
|
||||
}
|
||||
|
||||
static const struct rb_augment_callbacks augment_callbacks = {
|
||||
augment_propagate,
|
||||
augment_copy,
|
||||
augment_rotate,
|
||||
};
|
||||
|
||||
/**
|
||||
* drbd_insert_interval - insert a new interval into a tree
|
||||
*/
|
||||
bool
|
||||
drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
{
|
||||
struct rb_node **new = &root->rb_node, *parent = NULL;
|
||||
|
||||
BUG_ON(!IS_ALIGNED(this->size, 512));
|
||||
|
||||
while (*new) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(*new, struct drbd_interval, rb);
|
||||
|
||||
parent = *new;
|
||||
if (this->sector < here->sector)
|
||||
new = &(*new)->rb_left;
|
||||
else if (this->sector > here->sector)
|
||||
new = &(*new)->rb_right;
|
||||
else if (this < here)
|
||||
new = &(*new)->rb_left;
|
||||
else if (this > here)
|
||||
new = &(*new)->rb_right;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
rb_link_node(&this->rb, parent, new);
|
||||
rb_insert_augmented(&this->rb, root, &augment_callbacks);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_contains_interval - check if a tree contains a given interval
|
||||
* @sector: start sector of @interval
|
||||
* @interval: may not be a valid pointer
|
||||
*
|
||||
* Returns if the tree contains the node @interval with start sector @start.
|
||||
* Does not dereference @interval until @interval is known to be a valid object
|
||||
* in @tree. Returns %false if @interval is in the tree but with a different
|
||||
* sector number.
|
||||
*/
|
||||
bool
|
||||
drbd_contains_interval(struct rb_root *root, sector_t sector,
|
||||
struct drbd_interval *interval)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
|
||||
while (node) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(node, struct drbd_interval, rb);
|
||||
|
||||
if (sector < here->sector)
|
||||
node = node->rb_left;
|
||||
else if (sector > here->sector)
|
||||
node = node->rb_right;
|
||||
else if (interval < here)
|
||||
node = node->rb_left;
|
||||
else if (interval > here)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_remove_interval - remove an interval from a tree
|
||||
*/
|
||||
void
|
||||
drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
{
|
||||
rb_erase_augmented(&this->rb, root, &augment_callbacks);
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_find_overlap - search for an interval overlapping with [sector, sector + size)
|
||||
* @sector: start sector
|
||||
* @size: size, aligned to 512 bytes
|
||||
*
|
||||
* Returns an interval overlapping with [sector, sector + size), or NULL if
|
||||
* there is none. When there is more than one overlapping interval in the
|
||||
* tree, the interval with the lowest start sector is returned, and all other
|
||||
* overlapping intervals will be on the right side of the tree, reachable with
|
||||
* rb_next().
|
||||
*/
|
||||
struct drbd_interval *
|
||||
drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
struct drbd_interval *overlap = NULL;
|
||||
sector_t end = sector + (size >> 9);
|
||||
|
||||
BUG_ON(!IS_ALIGNED(size, 512));
|
||||
|
||||
while (node) {
|
||||
struct drbd_interval *here =
|
||||
rb_entry(node, struct drbd_interval, rb);
|
||||
|
||||
if (node->rb_left &&
|
||||
sector < interval_end(node->rb_left)) {
|
||||
/* Overlap if any must be on left side */
|
||||
node = node->rb_left;
|
||||
} else if (here->sector < end &&
|
||||
sector < here->sector + (here->size >> 9)) {
|
||||
overlap = here;
|
||||
break;
|
||||
} else if (sector >= here->sector) {
|
||||
/* Overlap if any must be on right side */
|
||||
node = node->rb_right;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
return overlap;
|
||||
}
|
||||
|
||||
struct drbd_interval *
|
||||
drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
|
||||
{
|
||||
sector_t end = sector + (size >> 9);
|
||||
struct rb_node *node;
|
||||
|
||||
for (;;) {
|
||||
node = rb_next(&i->rb);
|
||||
if (!node)
|
||||
return NULL;
|
||||
i = rb_entry(node, struct drbd_interval, rb);
|
||||
if (i->sector >= end)
|
||||
return NULL;
|
||||
if (sector < i->sector + (i->size >> 9))
|
||||
return i;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef __DRBD_INTERVAL_H
|
||||
#define __DRBD_INTERVAL_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
struct drbd_interval {
|
||||
struct rb_node rb;
|
||||
sector_t sector; /* start sector of the interval */
|
||||
unsigned int size; /* size in bytes */
|
||||
sector_t end; /* highest interval end in subtree */
|
||||
int local:1 /* local or remote request? */;
|
||||
int waiting:1;
|
||||
};
|
||||
|
||||
static inline void drbd_clear_interval(struct drbd_interval *i)
|
||||
{
|
||||
RB_CLEAR_NODE(&i->rb);
|
||||
}
|
||||
|
||||
static inline bool drbd_interval_empty(struct drbd_interval *i)
|
||||
{
|
||||
return RB_EMPTY_NODE(&i->rb);
|
||||
}
|
||||
|
||||
extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *);
|
||||
extern bool drbd_contains_interval(struct rb_root *, sector_t,
|
||||
struct drbd_interval *);
|
||||
extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *);
|
||||
extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t,
|
||||
unsigned int);
|
||||
extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t,
|
||||
unsigned int);
|
||||
|
||||
#define drbd_for_each_overlap(i, root, sector, size) \
|
||||
for (i = drbd_find_overlap(root, sector, size); \
|
||||
i; \
|
||||
i = drbd_next_overlap(i, sector, size))
|
||||
|
||||
#endif /* __DRBD_INTERVAL_H */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,55 @@
|
|||
#include "drbd_wrappers.h"
|
||||
#include <linux/kernel.h>
|
||||
#include <net/netlink.h>
|
||||
#include <linux/drbd_genl_api.h>
|
||||
#include "drbd_nla.h"
|
||||
|
||||
static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
|
||||
{
|
||||
struct nlattr *head = nla_data(nla);
|
||||
int len = nla_len(nla);
|
||||
int rem;
|
||||
|
||||
/*
|
||||
* validate_nla (called from nla_parse_nested) ignores attributes
|
||||
* beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
|
||||
* In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
|
||||
* flag set also, check and remove that flag before calling
|
||||
* nla_parse_nested.
|
||||
*/
|
||||
|
||||
nla_for_each_attr(nla, head, len, rem) {
|
||||
if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
|
||||
nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
|
||||
if (nla_type(nla) > maxtype)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (!err)
|
||||
err = nla_parse_nested(tb, maxtype, nla, policy);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
|
||||
{
|
||||
int err;
|
||||
/*
|
||||
* If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
|
||||
* we don't know about that attribute, reject all the nested
|
||||
* attributes.
|
||||
*/
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
return nla_find_nested(nla, attrtype);
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
#ifndef __DRBD_NLA_H
|
||||
#define __DRBD_NLA_H
|
||||
|
||||
extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy);
|
||||
extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
|
||||
|
||||
#endif /* __DRBD_NLA_H */
|
|
@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
|
|||
* we convert to sectors in the display below. */
|
||||
unsigned long bm_bits = drbd_bm_bits(mdev);
|
||||
unsigned long bit_pos;
|
||||
unsigned long long stop_sector = 0;
|
||||
if (mdev->state.conn == C_VERIFY_S ||
|
||||
mdev->state.conn == C_VERIFY_T)
|
||||
mdev->state.conn == C_VERIFY_T) {
|
||||
bit_pos = bm_bits - mdev->ov_left;
|
||||
else
|
||||
if (verify_can_do_stop_sector(mdev))
|
||||
stop_sector = mdev->ov_stop_sector;
|
||||
} else
|
||||
bit_pos = mdev->bm_resync_fo;
|
||||
/* Total sectors may be slightly off for oddly
|
||||
* sized devices. So what. */
|
||||
seq_printf(seq,
|
||||
"\t%3d%% sector pos: %llu/%llu\n",
|
||||
"\t%3d%% sector pos: %llu/%llu",
|
||||
(int)(bit_pos / (bm_bits/100+1)),
|
||||
(unsigned long long)bit_pos * BM_SECT_PER_BIT,
|
||||
(unsigned long long)bm_bits * BM_SECT_PER_BIT);
|
||||
if (stop_sector != 0 && stop_sector != ULLONG_MAX)
|
||||
seq_printf(seq, " stop sector: %llu", stop_sector);
|
||||
seq_printf(seq, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -194,9 +200,11 @@ static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
|
|||
|
||||
static int drbd_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
int i, hole = 0;
|
||||
int i, prev_i = -1;
|
||||
const char *sn;
|
||||
struct drbd_conf *mdev;
|
||||
struct net_conf *nc;
|
||||
char wp;
|
||||
|
||||
static char write_ordering_chars[] = {
|
||||
[WO_none] = 'n',
|
||||
|
@ -227,16 +235,11 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
oos .. known out-of-sync kB
|
||||
*/
|
||||
|
||||
for (i = 0; i < minor_count; i++) {
|
||||
mdev = minor_to_mdev(i);
|
||||
if (!mdev) {
|
||||
hole = 1;
|
||||
continue;
|
||||
}
|
||||
if (hole) {
|
||||
hole = 0;
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&minors, mdev, i) {
|
||||
if (prev_i != i - 1)
|
||||
seq_printf(seq, "\n");
|
||||
}
|
||||
prev_i = i;
|
||||
|
||||
sn = drbd_conn_str(mdev->state.conn);
|
||||
|
||||
|
@ -248,6 +251,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
/* reset mdev->congestion_reason */
|
||||
bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
|
||||
|
||||
nc = rcu_dereference(mdev->tconn->net_conf);
|
||||
wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
|
||||
seq_printf(seq,
|
||||
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
|
||||
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
|
||||
|
@ -257,9 +262,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
drbd_role_str(mdev->state.peer),
|
||||
drbd_disk_str(mdev->state.disk),
|
||||
drbd_disk_str(mdev->state.pdsk),
|
||||
(mdev->net_conf == NULL ? ' ' :
|
||||
(mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
|
||||
is_susp(mdev->state) ? 's' : 'r',
|
||||
wp,
|
||||
drbd_suspended(mdev) ? 's' : 'r',
|
||||
mdev->state.aftr_isp ? 'a' : '-',
|
||||
mdev->state.peer_isp ? 'p' : '-',
|
||||
mdev->state.user_isp ? 'u' : '-',
|
||||
|
@ -276,8 +280,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
atomic_read(&mdev->rs_pending_cnt),
|
||||
atomic_read(&mdev->unacked_cnt),
|
||||
atomic_read(&mdev->ap_bio_cnt),
|
||||
mdev->epochs,
|
||||
write_ordering_chars[mdev->write_ordering]
|
||||
mdev->tconn->epochs,
|
||||
write_ordering_chars[mdev->tconn->write_ordering]
|
||||
);
|
||||
seq_printf(seq, " oos:%llu\n",
|
||||
Bit2KB((unsigned long long)
|
||||
|
@ -302,6 +306,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
|||
}
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -77,40 +77,41 @@
|
|||
*/
|
||||
|
||||
enum drbd_req_event {
|
||||
created,
|
||||
to_be_send,
|
||||
to_be_submitted,
|
||||
CREATED,
|
||||
TO_BE_SENT,
|
||||
TO_BE_SUBMITTED,
|
||||
|
||||
/* XXX yes, now I am inconsistent...
|
||||
* these are not "events" but "actions"
|
||||
* oh, well... */
|
||||
queue_for_net_write,
|
||||
queue_for_net_read,
|
||||
queue_for_send_oos,
|
||||
QUEUE_FOR_NET_WRITE,
|
||||
QUEUE_FOR_NET_READ,
|
||||
QUEUE_FOR_SEND_OOS,
|
||||
|
||||
send_canceled,
|
||||
send_failed,
|
||||
handed_over_to_network,
|
||||
oos_handed_to_network,
|
||||
connection_lost_while_pending,
|
||||
read_retry_remote_canceled,
|
||||
recv_acked_by_peer,
|
||||
write_acked_by_peer,
|
||||
write_acked_by_peer_and_sis, /* and set_in_sync */
|
||||
conflict_discarded_by_peer,
|
||||
neg_acked,
|
||||
barrier_acked, /* in protocol A and B */
|
||||
data_received, /* (remote read) */
|
||||
SEND_CANCELED,
|
||||
SEND_FAILED,
|
||||
HANDED_OVER_TO_NETWORK,
|
||||
OOS_HANDED_TO_NETWORK,
|
||||
CONNECTION_LOST_WHILE_PENDING,
|
||||
READ_RETRY_REMOTE_CANCELED,
|
||||
RECV_ACKED_BY_PEER,
|
||||
WRITE_ACKED_BY_PEER,
|
||||
WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
|
||||
CONFLICT_RESOLVED,
|
||||
POSTPONE_WRITE,
|
||||
NEG_ACKED,
|
||||
BARRIER_ACKED, /* in protocol A and B */
|
||||
DATA_RECEIVED, /* (remote read) */
|
||||
|
||||
read_completed_with_error,
|
||||
read_ahead_completed_with_error,
|
||||
write_completed_with_error,
|
||||
abort_disk_io,
|
||||
completed_ok,
|
||||
resend,
|
||||
fail_frozen_disk_io,
|
||||
restart_frozen_disk_io,
|
||||
nothing, /* for tracing only */
|
||||
READ_COMPLETED_WITH_ERROR,
|
||||
READ_AHEAD_COMPLETED_WITH_ERROR,
|
||||
WRITE_COMPLETED_WITH_ERROR,
|
||||
ABORT_DISK_IO,
|
||||
COMPLETED_OK,
|
||||
RESEND,
|
||||
FAIL_FROZEN_DISK_IO,
|
||||
RESTART_FROZEN_DISK_IO,
|
||||
NOTHING,
|
||||
};
|
||||
|
||||
/* encoding of request states for now. we don't actually need that many bits.
|
||||
|
@ -142,8 +143,8 @@ enum drbd_req_state_bits {
|
|||
* recv_ack (B) or implicit "ack" (A),
|
||||
* still waiting for the barrier ack.
|
||||
* master_bio may already be completed and invalidated.
|
||||
* 11100: write_acked (C),
|
||||
* data_received (for remote read, any protocol)
|
||||
* 11100: write acked (C),
|
||||
* data received (for remote read, any protocol)
|
||||
* or finally the barrier ack has arrived (B,A)...
|
||||
* request can be freed
|
||||
* 01100: neg-acked (write, protocol C)
|
||||
|
@ -198,6 +199,22 @@ enum drbd_req_state_bits {
|
|||
|
||||
/* Should call drbd_al_complete_io() for this request... */
|
||||
__RQ_IN_ACT_LOG,
|
||||
|
||||
/* The peer has sent a retry ACK */
|
||||
__RQ_POSTPONED,
|
||||
|
||||
/* would have been completed,
|
||||
* but was not, because of drbd_suspended() */
|
||||
__RQ_COMPLETION_SUSP,
|
||||
|
||||
/* We expect a receive ACK (wire proto B) */
|
||||
__RQ_EXP_RECEIVE_ACK,
|
||||
|
||||
/* We expect a write ACK (wite proto C) */
|
||||
__RQ_EXP_WRITE_ACK,
|
||||
|
||||
/* waiting for a barrier ack, did an extra kref_get */
|
||||
__RQ_EXP_BARR_ACK,
|
||||
};
|
||||
|
||||
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
|
||||
|
@ -219,56 +236,16 @@ enum drbd_req_state_bits {
|
|||
|
||||
#define RQ_WRITE (1UL << __RQ_WRITE)
|
||||
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
|
||||
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
|
||||
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
|
||||
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
|
||||
#define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK)
|
||||
#define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK)
|
||||
|
||||
/* For waking up the frozen transfer log mod_req() has to return if the request
|
||||
should be counted in the epoch object*/
|
||||
#define MR_WRITE_SHIFT 0
|
||||
#define MR_WRITE (1 << MR_WRITE_SHIFT)
|
||||
#define MR_READ_SHIFT 1
|
||||
#define MR_READ (1 << MR_READ_SHIFT)
|
||||
|
||||
/* epoch entries */
|
||||
static inline
|
||||
struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
||||
{
|
||||
BUG_ON(mdev->ee_hash_s == 0);
|
||||
return mdev->ee_hash +
|
||||
((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
|
||||
}
|
||||
|
||||
/* transfer log (drbd_request objects) */
|
||||
static inline
|
||||
struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
||||
{
|
||||
BUG_ON(mdev->tl_hash_s == 0);
|
||||
return mdev->tl_hash +
|
||||
((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
|
||||
}
|
||||
|
||||
/* application reads (drbd_request objects) */
|
||||
static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
||||
{
|
||||
return mdev->app_reads_hash
|
||||
+ ((unsigned int)(sector) % APP_R_HSIZE);
|
||||
}
|
||||
|
||||
/* when we receive the answer for a read request,
|
||||
* verify that we actually know about it */
|
||||
static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
|
||||
u64 id, sector_t sector)
|
||||
{
|
||||
struct hlist_head *slot = ar_hash_slot(mdev, sector);
|
||||
struct hlist_node *n;
|
||||
struct drbd_request *req;
|
||||
|
||||
hlist_for_each_entry(req, n, slot, collision) {
|
||||
if ((unsigned long)req == (unsigned long)id) {
|
||||
D_ASSERT(req->sector == sector);
|
||||
return req;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
#define MR_WRITE 1
|
||||
#define MR_READ 2
|
||||
|
||||
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
|
||||
{
|
||||
|
@ -278,41 +255,10 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi
|
|||
req->private_bio = bio;
|
||||
|
||||
bio->bi_private = req;
|
||||
bio->bi_end_io = drbd_endio_pri;
|
||||
bio->bi_end_io = drbd_request_endio;
|
||||
bio->bi_next = NULL;
|
||||
}
|
||||
|
||||
static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
|
||||
struct bio *bio_src)
|
||||
{
|
||||
struct drbd_request *req =
|
||||
mempool_alloc(drbd_request_mempool, GFP_NOIO);
|
||||
if (likely(req)) {
|
||||
drbd_req_make_private_bio(req, bio_src);
|
||||
|
||||
req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
|
||||
req->mdev = mdev;
|
||||
req->master_bio = bio_src;
|
||||
req->epoch = 0;
|
||||
req->sector = bio_src->bi_sector;
|
||||
req->size = bio_src->bi_size;
|
||||
INIT_HLIST_NODE(&req->collision);
|
||||
INIT_LIST_HEAD(&req->tl_requests);
|
||||
INIT_LIST_HEAD(&req->w.list);
|
||||
}
|
||||
return req;
|
||||
}
|
||||
|
||||
static inline void drbd_req_free(struct drbd_request *req)
|
||||
{
|
||||
mempool_free(req, drbd_request_mempool);
|
||||
}
|
||||
|
||||
static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
|
||||
{
|
||||
return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
|
||||
}
|
||||
|
||||
/* Short lived temporary struct on the stack.
|
||||
* We could squirrel the error to be returned into
|
||||
* bio->bi_size, or similar. But that would be too ugly. */
|
||||
|
@ -321,6 +267,7 @@ struct bio_and_error {
|
|||
int error;
|
||||
};
|
||||
|
||||
extern void drbd_req_destroy(struct kref *kref);
|
||||
extern void _req_may_be_done(struct drbd_request *req,
|
||||
struct bio_and_error *m);
|
||||
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
|
@ -328,13 +275,17 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||
extern void complete_master_bio(struct drbd_conf *mdev,
|
||||
struct bio_and_error *m);
|
||||
extern void request_timer_fn(unsigned long data);
|
||||
extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
|
||||
extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
|
||||
extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
|
||||
|
||||
/* this is in drbd_main.c */
|
||||
extern void drbd_restart_request(struct drbd_request *req);
|
||||
|
||||
/* use this if you don't want to deal with calling complete_master_bio()
|
||||
* outside the spinlock, e.g. when walking some list on cleanup. */
|
||||
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
|
||||
{
|
||||
struct drbd_conf *mdev = req->mdev;
|
||||
struct drbd_conf *mdev = req->w.mdev;
|
||||
struct bio_and_error m;
|
||||
int rv;
|
||||
|
||||
|
@ -354,13 +305,13 @@ static inline int req_mod(struct drbd_request *req,
|
|||
enum drbd_req_event what)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct drbd_conf *mdev = req->mdev;
|
||||
struct drbd_conf *mdev = req->w.mdev;
|
||||
struct bio_and_error m;
|
||||
int rv;
|
||||
|
||||
spin_lock_irqsave(&mdev->req_lock, flags);
|
||||
spin_lock_irqsave(&mdev->tconn->req_lock, flags);
|
||||
rv = __req_mod(req, what, &m);
|
||||
spin_unlock_irqrestore(&mdev->req_lock, flags);
|
||||
spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
|
||||
|
||||
if (m.bio)
|
||||
complete_master_bio(mdev, &m);
|
||||
|
@ -368,7 +319,7 @@ static inline int req_mod(struct drbd_request *req,
|
|||
return rv;
|
||||
}
|
||||
|
||||
static inline bool drbd_should_do_remote(union drbd_state s)
|
||||
static inline bool drbd_should_do_remote(union drbd_dev_state s)
|
||||
{
|
||||
return s.pdsk == D_UP_TO_DATE ||
|
||||
(s.pdsk >= D_INCONSISTENT &&
|
||||
|
@ -378,7 +329,7 @@ static inline bool drbd_should_do_remote(union drbd_state s)
|
|||
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
|
||||
states. */
|
||||
}
|
||||
static inline bool drbd_should_send_oos(union drbd_state s)
|
||||
static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
{
|
||||
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
|
||||
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,161 @@
|
|||
#ifndef DRBD_STATE_H
|
||||
#define DRBD_STATE_H
|
||||
|
||||
struct drbd_conf;
|
||||
struct drbd_tconn;
|
||||
|
||||
/**
|
||||
* DOC: DRBD State macros
|
||||
*
|
||||
* These macros are used to express state changes in easily readable form.
|
||||
*
|
||||
* The NS macros expand to a mask and a value, that can be bit ored onto the
|
||||
* current state as soon as the spinlock (req_lock) was taken.
|
||||
*
|
||||
* The _NS macros are used for state functions that get called with the
|
||||
* spinlock. These macros expand directly to the new state value.
|
||||
*
|
||||
* Besides the basic forms NS() and _NS() additional _?NS[23] are defined
|
||||
* to express state changes that affect more than one aspect of the state.
|
||||
*
|
||||
* E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
|
||||
* Means that the network connection was established and that the peer
|
||||
* is in secondary role.
|
||||
*/
|
||||
#define role_MASK R_MASK
|
||||
#define peer_MASK R_MASK
|
||||
#define disk_MASK D_MASK
|
||||
#define pdsk_MASK D_MASK
|
||||
#define conn_MASK C_MASK
|
||||
#define susp_MASK 1
|
||||
#define user_isp_MASK 1
|
||||
#define aftr_isp_MASK 1
|
||||
#define susp_nod_MASK 1
|
||||
#define susp_fen_MASK 1
|
||||
|
||||
#define NS(T, S) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T = (S); val; })
|
||||
#define NS2(T1, S1, T2, S2) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
|
||||
mask.T2 = T2##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
|
||||
val.T2 = (S2); val; })
|
||||
#define NS3(T1, S1, T2, S2, T3, S3) \
|
||||
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
|
||||
mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
|
||||
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
|
||||
val.T2 = (S2); val.T3 = (S3); val; })
|
||||
|
||||
#define _NS(D, T, S) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; })
|
||||
#define _NS2(D, T1, S1, T2, S2) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
|
||||
__ns.T2 = (S2); __ns; })
|
||||
#define _NS3(D, T1, S1, T2, S2, T3, S3) \
|
||||
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
|
||||
__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
|
||||
|
||||
enum chg_state_flags {
|
||||
CS_HARD = 1 << 0,
|
||||
CS_VERBOSE = 1 << 1,
|
||||
CS_WAIT_COMPLETE = 1 << 2,
|
||||
CS_SERIALIZE = 1 << 3,
|
||||
CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
|
||||
CS_LOCAL_ONLY = 1 << 4, /* Do not consider a device pair wide state change */
|
||||
CS_DC_ROLE = 1 << 5, /* DC = display as connection state change */
|
||||
CS_DC_PEER = 1 << 6,
|
||||
CS_DC_CONN = 1 << 7,
|
||||
CS_DC_DISK = 1 << 8,
|
||||
CS_DC_PDSK = 1 << 9,
|
||||
CS_DC_SUSP = 1 << 10,
|
||||
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
|
||||
CS_IGN_OUTD_FAIL = 1 << 11,
|
||||
};
|
||||
|
||||
/* drbd_dev_state and drbd_state are different types. This is to stress the
|
||||
small difference. There is no suspended flag (.susp), and no suspended
|
||||
while fence handler runs flas (susp_fen). */
|
||||
union drbd_dev_state {
|
||||
struct {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned conn:5 ; /* 17/32 cstates */
|
||||
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned _unused:1 ;
|
||||
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
|
||||
unsigned peer_isp:1 ;
|
||||
unsigned user_isp:1 ;
|
||||
unsigned _pad:11; /* 0 unused */
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
unsigned _pad:11;
|
||||
unsigned user_isp:1 ;
|
||||
unsigned peer_isp:1 ;
|
||||
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
|
||||
unsigned _unused:1 ;
|
||||
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
|
||||
unsigned conn:5 ; /* 17/32 cstates */
|
||||
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
|
||||
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
|
||||
#else
|
||||
# error "this endianess is not supported"
|
||||
#endif
|
||||
};
|
||||
unsigned int i;
|
||||
};
|
||||
|
||||
extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
|
||||
enum chg_state_flags f,
|
||||
union drbd_state mask,
|
||||
union drbd_state val);
|
||||
extern void drbd_force_state(struct drbd_conf *, union drbd_state,
|
||||
union drbd_state);
|
||||
extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
|
||||
union drbd_state,
|
||||
union drbd_state,
|
||||
enum chg_state_flags);
|
||||
extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
|
||||
enum chg_state_flags,
|
||||
struct completion *done);
|
||||
extern void print_st_err(struct drbd_conf *, union drbd_state,
|
||||
union drbd_state, int);
|
||||
|
||||
enum drbd_state_rv
|
||||
_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
|
||||
enum chg_state_flags flags);
|
||||
|
||||
enum drbd_state_rv
|
||||
conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
|
||||
enum chg_state_flags flags);
|
||||
|
||||
extern void drbd_resume_al(struct drbd_conf *mdev);
|
||||
extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
|
||||
|
||||
/**
|
||||
* drbd_request_state() - Reqest a state change
|
||||
* @mdev: DRBD device.
|
||||
* @mask: mask of state bits to change.
|
||||
* @val: value of new state bits.
|
||||
*
|
||||
* This is the most graceful way of requesting a state change. It is verbose
|
||||
* quite verbose in case the state change is not possible, and all those
|
||||
* state changes are globally serialized.
|
||||
*/
|
||||
static inline int drbd_request_state(struct drbd_conf *mdev,
|
||||
union drbd_state mask,
|
||||
union drbd_state val)
|
||||
{
|
||||
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
|
||||
}
|
||||
|
||||
enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
|
||||
enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
|
||||
enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
|
||||
enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn);
|
||||
enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
|
||||
enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn);
|
||||
|
||||
#endif
|
|
@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
|
|||
[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
|
||||
[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
|
||||
[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
|
||||
[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
|
||||
};
|
||||
|
||||
const char *drbd_conn_str(enum drbd_conns s)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/mm.h>
|
||||
#include "drbd_int.h"
|
||||
|
||||
/* see get_sb_bdev and bd_claim */
|
||||
extern char *drbd_sec_holder;
|
||||
|
@ -20,8 +21,8 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
|
|||
|
||||
/* bi_end_io handlers */
|
||||
extern void drbd_md_io_complete(struct bio *bio, int error);
|
||||
extern void drbd_endio_sec(struct bio *bio, int error);
|
||||
extern void drbd_endio_pri(struct bio *bio, int error);
|
||||
extern void drbd_peer_request_endio(struct bio *bio, int error);
|
||||
extern void drbd_request_endio(struct bio *bio, int error);
|
||||
|
||||
/*
|
||||
* used to submit our private bio
|
||||
|
@ -45,12 +46,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
|
|||
generic_make_request(bio);
|
||||
}
|
||||
|
||||
static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
|
||||
{
|
||||
return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
|
||||
== CRYPTO_ALG_TYPE_HASH;
|
||||
}
|
||||
|
||||
#ifndef __CHECKER__
|
||||
# undef __cond_lock
|
||||
# define __cond_lock(x,c) (c)
|
||||
|
|
|
@ -463,6 +463,7 @@ out:
|
|||
*/
|
||||
static void loop_add_bio(struct loop_device *lo, struct bio *bio)
|
||||
{
|
||||
lo->lo_bio_count++;
|
||||
bio_list_add(&lo->lo_bio_list, bio);
|
||||
}
|
||||
|
||||
|
@ -471,6 +472,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio)
|
|||
*/
|
||||
static struct bio *loop_get_bio(struct loop_device *lo)
|
||||
{
|
||||
lo->lo_bio_count--;
|
||||
return bio_list_pop(&lo->lo_bio_list);
|
||||
}
|
||||
|
||||
|
@ -489,6 +491,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
|
|||
goto out;
|
||||
if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
|
||||
goto out;
|
||||
if (lo->lo_bio_count >= q->nr_congestion_on)
|
||||
wait_event_lock_irq(lo->lo_req_wait,
|
||||
lo->lo_bio_count < q->nr_congestion_off,
|
||||
lo->lo_lock);
|
||||
loop_add_bio(lo, old_bio);
|
||||
wake_up(&lo->lo_event);
|
||||
spin_unlock_irq(&lo->lo_lock);
|
||||
|
@ -546,6 +552,8 @@ static int loop_thread(void *data)
|
|||
continue;
|
||||
spin_lock_irq(&lo->lo_lock);
|
||||
bio = loop_get_bio(lo);
|
||||
if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
|
||||
wake_up(&lo->lo_req_wait);
|
||||
spin_unlock_irq(&lo->lo_lock);
|
||||
|
||||
BUG_ON(!bio);
|
||||
|
@ -873,6 +881,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
|
|||
lo->transfer = transfer_none;
|
||||
lo->ioctl = NULL;
|
||||
lo->lo_sizelimit = 0;
|
||||
lo->lo_bio_count = 0;
|
||||
lo->old_gfp_mask = mapping_gfp_mask(mapping);
|
||||
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
|
||||
|
||||
|
@ -1673,6 +1682,7 @@ static int loop_add(struct loop_device **l, int i)
|
|||
lo->lo_number = i;
|
||||
lo->lo_thread = NULL;
|
||||
init_waitqueue_head(&lo->lo_event);
|
||||
init_waitqueue_head(&lo->lo_req_wait);
|
||||
spin_lock_init(&lo->lo_lock);
|
||||
disk->major = LOOP_MAJOR;
|
||||
disk->first_minor = i << part_shift;
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include <linux/list.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/bitmap.h>
|
||||
|
||||
#include <xen/events.h>
|
||||
#include <xen/page.h>
|
||||
|
@ -79,6 +80,7 @@ struct pending_req {
|
|||
unsigned short operation;
|
||||
int status;
|
||||
struct list_head free_list;
|
||||
DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
};
|
||||
|
||||
#define BLKBACK_INVALID_HANDLE (~0)
|
||||
|
@ -98,6 +100,36 @@ struct xen_blkbk {
|
|||
|
||||
static struct xen_blkbk *blkbk;
|
||||
|
||||
/*
|
||||
* Maximum number of grant pages that can be mapped in blkback.
|
||||
* BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
|
||||
* pages that blkback will persistently map.
|
||||
* Currently, this is:
|
||||
* RING_SIZE = 32 (for all known ring types)
|
||||
* BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
|
||||
* sizeof(struct persistent_gnt) = 48
|
||||
* So the maximum memory used to store the grants is:
|
||||
* 32 * 11 * 48 = 16896 bytes
|
||||
*/
|
||||
static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
|
||||
{
|
||||
switch (protocol) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
case BLKIF_PROTOCOL_X86_32:
|
||||
return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
case BLKIF_PROTOCOL_X86_64:
|
||||
return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Little helpful macro to figure out the index and virtual address of the
|
||||
* pending_pages[..]. For each 'pending_req' we have have up to
|
||||
|
@ -129,6 +161,90 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
|||
static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
unsigned short op, int st);
|
||||
|
||||
#define foreach_grant(pos, rbtree, node) \
|
||||
for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
|
||||
&(pos)->node != NULL; \
|
||||
(pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
|
||||
|
||||
|
||||
static void add_persistent_gnt(struct rb_root *root,
|
||||
struct persistent_gnt *persistent_gnt)
|
||||
{
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
struct persistent_gnt *this;
|
||||
|
||||
/* Figure out where to put new node */
|
||||
while (*new) {
|
||||
this = container_of(*new, struct persistent_gnt, node);
|
||||
|
||||
parent = *new;
|
||||
if (persistent_gnt->gnt < this->gnt)
|
||||
new = &((*new)->rb_left);
|
||||
else if (persistent_gnt->gnt > this->gnt)
|
||||
new = &((*new)->rb_right);
|
||||
else {
|
||||
pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/* Add new node and rebalance tree. */
|
||||
rb_link_node(&(persistent_gnt->node), parent, new);
|
||||
rb_insert_color(&(persistent_gnt->node), root);
|
||||
}
|
||||
|
||||
static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
|
||||
grant_ref_t gref)
|
||||
{
|
||||
struct persistent_gnt *data;
|
||||
struct rb_node *node = root->rb_node;
|
||||
|
||||
while (node) {
|
||||
data = container_of(node, struct persistent_gnt, node);
|
||||
|
||||
if (gref < data->gnt)
|
||||
node = node->rb_left;
|
||||
else if (gref > data->gnt)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return data;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void free_persistent_gnts(struct rb_root *root, unsigned int num)
|
||||
{
|
||||
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct persistent_gnt *persistent_gnt;
|
||||
int ret = 0;
|
||||
int segs_to_unmap = 0;
|
||||
|
||||
foreach_grant(persistent_gnt, root, node) {
|
||||
BUG_ON(persistent_gnt->handle ==
|
||||
BLKBACK_INVALID_HANDLE);
|
||||
gnttab_set_unmap_op(&unmap[segs_to_unmap],
|
||||
(unsigned long) pfn_to_kaddr(page_to_pfn(
|
||||
persistent_gnt->page)),
|
||||
GNTMAP_host_map,
|
||||
persistent_gnt->handle);
|
||||
|
||||
pages[segs_to_unmap] = persistent_gnt->page;
|
||||
rb_erase(&persistent_gnt->node, root);
|
||||
kfree(persistent_gnt);
|
||||
num--;
|
||||
|
||||
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
|
||||
!rb_next(&persistent_gnt->node)) {
|
||||
ret = gnttab_unmap_refs(unmap, NULL, pages,
|
||||
segs_to_unmap);
|
||||
BUG_ON(ret);
|
||||
segs_to_unmap = 0;
|
||||
}
|
||||
}
|
||||
BUG_ON(num != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve from the 'pending_reqs' a free pending_req structure to be used.
|
||||
*/
|
||||
|
@ -302,6 +418,14 @@ int xen_blkif_schedule(void *arg)
|
|||
print_stats(blkif);
|
||||
}
|
||||
|
||||
/* Free all persistent grant pages */
|
||||
if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
|
||||
free_persistent_gnts(&blkif->persistent_gnts,
|
||||
blkif->persistent_gnt_c);
|
||||
|
||||
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
|
||||
blkif->persistent_gnt_c = 0;
|
||||
|
||||
if (log_stats)
|
||||
print_stats(blkif);
|
||||
|
||||
|
@ -328,6 +452,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
|
|||
int ret;
|
||||
|
||||
for (i = 0; i < req->nr_pages; i++) {
|
||||
if (!test_bit(i, req->unmap_seg))
|
||||
continue;
|
||||
handle = pending_handle(req, i);
|
||||
if (handle == BLKBACK_INVALID_HANDLE)
|
||||
continue;
|
||||
|
@ -344,12 +470,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
|
|||
|
||||
static int xen_blkbk_map(struct blkif_request *req,
|
||||
struct pending_req *pending_req,
|
||||
struct seg_buf seg[])
|
||||
struct seg_buf seg[],
|
||||
struct page *pages[])
|
||||
{
|
||||
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
int i;
|
||||
struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct persistent_gnt *persistent_gnt = NULL;
|
||||
struct xen_blkif *blkif = pending_req->blkif;
|
||||
phys_addr_t addr = 0;
|
||||
int i, j;
|
||||
bool new_map;
|
||||
int nseg = req->u.rw.nr_segments;
|
||||
int segs_to_map = 0;
|
||||
int ret = 0;
|
||||
int use_persistent_gnts;
|
||||
|
||||
use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
|
||||
|
||||
BUG_ON(blkif->persistent_gnt_c >
|
||||
max_mapped_grant_pages(pending_req->blkif->blk_protocol));
|
||||
|
||||
/*
|
||||
* Fill out preq.nr_sects with proper amount of sectors, and setup
|
||||
|
@ -359,36 +499,146 @@ static int xen_blkbk_map(struct blkif_request *req,
|
|||
for (i = 0; i < nseg; i++) {
|
||||
uint32_t flags;
|
||||
|
||||
flags = GNTMAP_host_map;
|
||||
if (pending_req->operation != BLKIF_OP_READ)
|
||||
flags |= GNTMAP_readonly;
|
||||
gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
|
||||
req->u.rw.seg[i].gref,
|
||||
pending_req->blkif->domid);
|
||||
if (use_persistent_gnts)
|
||||
persistent_gnt = get_persistent_gnt(
|
||||
&blkif->persistent_gnts,
|
||||
req->u.rw.seg[i].gref);
|
||||
|
||||
if (persistent_gnt) {
|
||||
/*
|
||||
* We are using persistent grants and
|
||||
* the grant is already mapped
|
||||
*/
|
||||
new_map = false;
|
||||
} else if (use_persistent_gnts &&
|
||||
blkif->persistent_gnt_c <
|
||||
max_mapped_grant_pages(blkif->blk_protocol)) {
|
||||
/*
|
||||
* We are using persistent grants, the grant is
|
||||
* not mapped but we have room for it
|
||||
*/
|
||||
new_map = true;
|
||||
persistent_gnt = kmalloc(
|
||||
sizeof(struct persistent_gnt),
|
||||
GFP_KERNEL);
|
||||
if (!persistent_gnt)
|
||||
return -ENOMEM;
|
||||
persistent_gnt->page = alloc_page(GFP_KERNEL);
|
||||
if (!persistent_gnt->page) {
|
||||
kfree(persistent_gnt);
|
||||
return -ENOMEM;
|
||||
}
|
||||
persistent_gnt->gnt = req->u.rw.seg[i].gref;
|
||||
persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
|
||||
|
||||
pages_to_gnt[segs_to_map] =
|
||||
persistent_gnt->page;
|
||||
addr = (unsigned long) pfn_to_kaddr(
|
||||
page_to_pfn(persistent_gnt->page));
|
||||
|
||||
add_persistent_gnt(&blkif->persistent_gnts,
|
||||
persistent_gnt);
|
||||
blkif->persistent_gnt_c++;
|
||||
pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
|
||||
persistent_gnt->gnt, blkif->persistent_gnt_c,
|
||||
max_mapped_grant_pages(blkif->blk_protocol));
|
||||
} else {
|
||||
/*
|
||||
* We are either using persistent grants and
|
||||
* hit the maximum limit of grants mapped,
|
||||
* or we are not using persistent grants.
|
||||
*/
|
||||
if (use_persistent_gnts &&
|
||||
!blkif->vbd.overflow_max_grants) {
|
||||
blkif->vbd.overflow_max_grants = 1;
|
||||
pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
|
||||
blkif->domid, blkif->vbd.handle);
|
||||
}
|
||||
new_map = true;
|
||||
pages[i] = blkbk->pending_page(pending_req, i);
|
||||
addr = vaddr(pending_req, i);
|
||||
pages_to_gnt[segs_to_map] =
|
||||
blkbk->pending_page(pending_req, i);
|
||||
}
|
||||
|
||||
if (persistent_gnt) {
|
||||
pages[i] = persistent_gnt->page;
|
||||
persistent_gnts[i] = persistent_gnt;
|
||||
} else {
|
||||
persistent_gnts[i] = NULL;
|
||||
}
|
||||
|
||||
if (new_map) {
|
||||
flags = GNTMAP_host_map;
|
||||
if (!persistent_gnt &&
|
||||
(pending_req->operation != BLKIF_OP_READ))
|
||||
flags |= GNTMAP_readonly;
|
||||
gnttab_set_map_op(&map[segs_to_map++], addr,
|
||||
flags, req->u.rw.seg[i].gref,
|
||||
blkif->domid);
|
||||
}
|
||||
}
|
||||
|
||||
ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
|
||||
BUG_ON(ret);
|
||||
if (segs_to_map) {
|
||||
ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now swizzle the MFN in our domain with the MFN from the other domain
|
||||
* so that when we access vaddr(pending_req,i) it has the contents of
|
||||
* the page from the other domain.
|
||||
*/
|
||||
for (i = 0; i < nseg; i++) {
|
||||
if (unlikely(map[i].status != 0)) {
|
||||
pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
|
||||
map[i].handle = BLKBACK_INVALID_HANDLE;
|
||||
ret |= 1;
|
||||
bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
for (i = 0, j = 0; i < nseg; i++) {
|
||||
if (!persistent_gnts[i] ||
|
||||
persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
|
||||
/* This is a newly mapped grant */
|
||||
BUG_ON(j >= segs_to_map);
|
||||
if (unlikely(map[j].status != 0)) {
|
||||
pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
|
||||
map[j].handle = BLKBACK_INVALID_HANDLE;
|
||||
ret |= 1;
|
||||
if (persistent_gnts[i]) {
|
||||
rb_erase(&persistent_gnts[i]->node,
|
||||
&blkif->persistent_gnts);
|
||||
blkif->persistent_gnt_c--;
|
||||
kfree(persistent_gnts[i]);
|
||||
persistent_gnts[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (persistent_gnts[i]) {
|
||||
if (persistent_gnts[i]->handle ==
|
||||
BLKBACK_INVALID_HANDLE) {
|
||||
/*
|
||||
* If this is a new persistent grant
|
||||
* save the handler
|
||||
*/
|
||||
persistent_gnts[i]->handle = map[j].handle;
|
||||
persistent_gnts[i]->dev_bus_addr =
|
||||
map[j++].dev_bus_addr;
|
||||
}
|
||||
pending_handle(pending_req, i) =
|
||||
persistent_gnts[i]->handle;
|
||||
|
||||
pending_handle(pending_req, i) = map[i].handle;
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (ret)
|
||||
continue;
|
||||
seg[i].buf = persistent_gnts[i]->dev_bus_addr |
|
||||
(req->u.rw.seg[i].first_sect << 9);
|
||||
} else {
|
||||
pending_handle(pending_req, i) = map[j].handle;
|
||||
bitmap_set(pending_req->unmap_seg, i, 1);
|
||||
|
||||
seg[i].buf = map[i].dev_bus_addr |
|
||||
(req->u.rw.seg[i].first_sect << 9);
|
||||
if (ret) {
|
||||
j++;
|
||||
continue;
|
||||
}
|
||||
|
||||
seg[i].buf = map[j++].dev_bus_addr |
|
||||
(req->u.rw.seg[i].first_sect << 9);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -591,6 +841,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
|||
int operation;
|
||||
struct blk_plug plug;
|
||||
bool drain = false;
|
||||
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
|
||||
switch (req->operation) {
|
||||
case BLKIF_OP_READ:
|
||||
|
@ -677,7 +928,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
|||
* the hypercall to unmap the grants - that is all done in
|
||||
* xen_blkbk_unmap.
|
||||
*/
|
||||
if (xen_blkbk_map(req, pending_req, seg))
|
||||
if (xen_blkbk_map(req, pending_req, seg, pages))
|
||||
goto fail_flush;
|
||||
|
||||
/*
|
||||
|
@ -689,7 +940,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
|||
for (i = 0; i < nseg; i++) {
|
||||
while ((bio == NULL) ||
|
||||
(bio_add_page(bio,
|
||||
blkbk->pending_page(pending_req, i),
|
||||
pages[i],
|
||||
seg[i].nsec << 9,
|
||||
seg[i].buf & ~PAGE_MASK) == 0)) {
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/hypervisor.h>
|
||||
|
@ -160,10 +161,21 @@ struct xen_vbd {
|
|||
sector_t size;
|
||||
unsigned int flush_support:1;
|
||||
unsigned int discard_secure:1;
|
||||
unsigned int feature_gnt_persistent:1;
|
||||
unsigned int overflow_max_grants:1;
|
||||
};
|
||||
|
||||
struct backend_info;
|
||||
|
||||
|
||||
struct persistent_gnt {
|
||||
struct page *page;
|
||||
grant_ref_t gnt;
|
||||
grant_handle_t handle;
|
||||
uint64_t dev_bus_addr;
|
||||
struct rb_node node;
|
||||
};
|
||||
|
||||
struct xen_blkif {
|
||||
/* Unique identifier for this interface. */
|
||||
domid_t domid;
|
||||
|
@ -190,6 +202,10 @@ struct xen_blkif {
|
|||
struct task_struct *xenblkd;
|
||||
unsigned int waiting_reqs;
|
||||
|
||||
/* tree to store persistent grants */
|
||||
struct rb_root persistent_gnts;
|
||||
unsigned int persistent_gnt_c;
|
||||
|
||||
/* statistics */
|
||||
unsigned long st_print;
|
||||
int st_rd_req;
|
||||
|
|
|
@ -117,6 +117,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
|
|||
atomic_set(&blkif->drain, 0);
|
||||
blkif->st_print = jiffies;
|
||||
init_waitqueue_head(&blkif->waiting_to_free);
|
||||
blkif->persistent_gnts.rb_node = NULL;
|
||||
|
||||
return blkif;
|
||||
}
|
||||
|
@ -672,6 +673,13 @@ again:
|
|||
|
||||
xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
|
||||
|
||||
err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
|
||||
dev->nodename);
|
||||
goto abort;
|
||||
}
|
||||
|
||||
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
|
||||
(unsigned long long)vbd_sz(&be->blkif->vbd));
|
||||
if (err) {
|
||||
|
@ -720,6 +728,7 @@ static int connect_ring(struct backend_info *be)
|
|||
struct xenbus_device *dev = be->dev;
|
||||
unsigned long ring_ref;
|
||||
unsigned int evtchn;
|
||||
unsigned int pers_grants;
|
||||
char protocol[64] = "";
|
||||
int err;
|
||||
|
||||
|
@ -749,8 +758,18 @@ static int connect_ring(struct backend_info *be)
|
|||
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
|
||||
return -1;
|
||||
}
|
||||
pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
|
||||
ring_ref, evtchn, be->blkif->blk_protocol, protocol);
|
||||
err = xenbus_gather(XBT_NIL, dev->otherend,
|
||||
"feature-persistent", "%u",
|
||||
&pers_grants, NULL);
|
||||
if (err)
|
||||
pers_grants = 0;
|
||||
|
||||
be->blkif->vbd.feature_gnt_persistent = pers_grants;
|
||||
be->blkif->vbd.overflow_max_grants = 0;
|
||||
|
||||
pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
|
||||
ring_ref, evtchn, be->blkif->blk_protocol, protocol,
|
||||
pers_grants ? "persistent grants" : "");
|
||||
|
||||
/* Map the shared frame, irq etc. */
|
||||
err = xen_blkif_map(be->blkif, ring_ref, evtchn);
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#include <linux/mutex.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/llist.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/xenbus.h>
|
||||
|
@ -64,10 +65,17 @@ enum blkif_state {
|
|||
BLKIF_STATE_SUSPENDED,
|
||||
};
|
||||
|
||||
struct grant {
|
||||
grant_ref_t gref;
|
||||
unsigned long pfn;
|
||||
struct llist_node node;
|
||||
};
|
||||
|
||||
struct blk_shadow {
|
||||
struct blkif_request req;
|
||||
struct request *request;
|
||||
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(blkfront_mutex);
|
||||
|
@ -97,6 +105,8 @@ struct blkfront_info
|
|||
struct work_struct work;
|
||||
struct gnttab_free_callback callback;
|
||||
struct blk_shadow shadow[BLK_RING_SIZE];
|
||||
struct llist_head persistent_gnts;
|
||||
unsigned int persistent_gnts_c;
|
||||
unsigned long shadow_free;
|
||||
unsigned int feature_flush;
|
||||
unsigned int flush_op;
|
||||
|
@ -104,6 +114,7 @@ struct blkfront_info
|
|||
unsigned int feature_secdiscard:1;
|
||||
unsigned int discard_granularity;
|
||||
unsigned int discard_alignment;
|
||||
unsigned int feature_persistent:1;
|
||||
int is_ready;
|
||||
};
|
||||
|
||||
|
@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req)
|
|||
unsigned long id;
|
||||
unsigned int fsect, lsect;
|
||||
int i, ref;
|
||||
|
||||
/*
|
||||
* Used to store if we are able to queue the request by just using
|
||||
* existing persistent grants, or if we have to get new grants,
|
||||
* as there are not sufficiently many free.
|
||||
*/
|
||||
bool new_persistent_gnts;
|
||||
grant_ref_t gref_head;
|
||||
struct page *granted_page;
|
||||
struct grant *gnt_list_entry = NULL;
|
||||
struct scatterlist *sg;
|
||||
|
||||
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
||||
return 1;
|
||||
|
||||
if (gnttab_alloc_grant_references(
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
|
||||
gnttab_request_free_callback(
|
||||
&info->callback,
|
||||
blkif_restart_queue_callback,
|
||||
info,
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
return 1;
|
||||
}
|
||||
/* Check if we have enought grants to allocate a requests */
|
||||
if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
|
||||
new_persistent_gnts = 1;
|
||||
if (gnttab_alloc_grant_references(
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
|
||||
&gref_head) < 0) {
|
||||
gnttab_request_free_callback(
|
||||
&info->callback,
|
||||
blkif_restart_queue_callback,
|
||||
info,
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
return 1;
|
||||
}
|
||||
} else
|
||||
new_persistent_gnts = 0;
|
||||
|
||||
/* Fill out a communications ring structure. */
|
||||
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
||||
|
@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
|
|||
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
|
||||
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
|
||||
buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
|
||||
fsect = sg->offset >> 9;
|
||||
lsect = fsect + (sg->length >> 9) - 1;
|
||||
/* install a grant reference. */
|
||||
ref = gnttab_claim_grant_reference(&gref_head);
|
||||
BUG_ON(ref == -ENOSPC);
|
||||
|
||||
gnttab_grant_foreign_access_ref(
|
||||
ref,
|
||||
if (info->persistent_gnts_c) {
|
||||
BUG_ON(llist_empty(&info->persistent_gnts));
|
||||
gnt_list_entry = llist_entry(
|
||||
llist_del_first(&info->persistent_gnts),
|
||||
struct grant, node);
|
||||
|
||||
ref = gnt_list_entry->gref;
|
||||
buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
|
||||
info->persistent_gnts_c--;
|
||||
} else {
|
||||
ref = gnttab_claim_grant_reference(&gref_head);
|
||||
BUG_ON(ref == -ENOSPC);
|
||||
|
||||
gnt_list_entry =
|
||||
kmalloc(sizeof(struct grant),
|
||||
GFP_ATOMIC);
|
||||
if (!gnt_list_entry)
|
||||
return -ENOMEM;
|
||||
|
||||
granted_page = alloc_page(GFP_ATOMIC);
|
||||
if (!granted_page) {
|
||||
kfree(gnt_list_entry);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
gnt_list_entry->pfn =
|
||||
page_to_pfn(granted_page);
|
||||
gnt_list_entry->gref = ref;
|
||||
|
||||
buffer_mfn = pfn_to_mfn(page_to_pfn(
|
||||
granted_page));
|
||||
gnttab_grant_foreign_access_ref(ref,
|
||||
info->xbdev->otherend_id,
|
||||
buffer_mfn,
|
||||
rq_data_dir(req));
|
||||
buffer_mfn, 0);
|
||||
}
|
||||
|
||||
info->shadow[id].grants_used[i] = gnt_list_entry;
|
||||
|
||||
if (rq_data_dir(req)) {
|
||||
char *bvec_data;
|
||||
void *shared_data;
|
||||
|
||||
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
|
||||
|
||||
shared_data = kmap_atomic(
|
||||
pfn_to_page(gnt_list_entry->pfn));
|
||||
bvec_data = kmap_atomic(sg_page(sg));
|
||||
|
||||
/*
|
||||
* this does not wipe data stored outside the
|
||||
* range sg->offset..sg->offset+sg->length.
|
||||
* Therefore, blkback *could* see data from
|
||||
* previous requests. This is OK as long as
|
||||
* persistent grants are shared with just one
|
||||
* domain. It may need refactoring if this
|
||||
* changes
|
||||
*/
|
||||
memcpy(shared_data + sg->offset,
|
||||
bvec_data + sg->offset,
|
||||
sg->length);
|
||||
|
||||
kunmap_atomic(bvec_data);
|
||||
kunmap_atomic(shared_data);
|
||||
}
|
||||
|
||||
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
|
||||
ring_req->u.rw.seg[i] =
|
||||
|
@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req)
|
|||
/* Keep a private copy so we can reissue requests when recovering. */
|
||||
info->shadow[id].req = *ring_req;
|
||||
|
||||
gnttab_free_grant_references(gref_head);
|
||||
if (new_persistent_gnts)
|
||||
gnttab_free_grant_references(gref_head);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
|||
static void xlvbd_flush(struct blkfront_info *info)
|
||||
{
|
||||
blk_queue_flush(info->rq, info->feature_flush);
|
||||
printk(KERN_INFO "blkfront: %s: %s: %s\n",
|
||||
printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
|
||||
info->gd->disk_name,
|
||||
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
|
||||
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
|
||||
"flush diskcache" : "barrier or flush"),
|
||||
info->feature_flush ? "enabled" : "disabled");
|
||||
info->feature_flush ? "enabled" : "disabled",
|
||||
info->feature_persistent ? "using persistent grants" : "");
|
||||
}
|
||||
|
||||
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
|
||||
|
@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)
|
|||
|
||||
static void blkif_free(struct blkfront_info *info, int suspend)
|
||||
{
|
||||
struct llist_node *all_gnts;
|
||||
struct grant *persistent_gnt;
|
||||
|
||||
/* Prevent new requests being issued until we fix things up. */
|
||||
spin_lock_irq(&info->io_lock);
|
||||
info->connected = suspend ?
|
||||
|
@ -714,6 +800,18 @@ static void blkif_free(struct blkfront_info *info, int suspend)
|
|||
/* No more blkif_request(). */
|
||||
if (info->rq)
|
||||
blk_stop_queue(info->rq);
|
||||
|
||||
/* Remove all persistent grants */
|
||||
if (info->persistent_gnts_c) {
|
||||
all_gnts = llist_del_all(&info->persistent_gnts);
|
||||
llist_for_each_entry(persistent_gnt, all_gnts, node) {
|
||||
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
|
||||
__free_page(pfn_to_page(persistent_gnt->pfn));
|
||||
kfree(persistent_gnt);
|
||||
}
|
||||
info->persistent_gnts_c = 0;
|
||||
}
|
||||
|
||||
/* No more gnttab callback work. */
|
||||
gnttab_cancel_free_callback(&info->callback);
|
||||
spin_unlock_irq(&info->io_lock);
|
||||
|
@ -734,13 +832,43 @@ static void blkif_free(struct blkfront_info *info, int suspend)
|
|||
|
||||
}
|
||||
|
||||
static void blkif_completion(struct blk_shadow *s)
|
||||
static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
|
||||
struct blkif_response *bret)
|
||||
{
|
||||
int i;
|
||||
/* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
|
||||
* flag. */
|
||||
for (i = 0; i < s->req.u.rw.nr_segments; i++)
|
||||
gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
|
||||
struct bio_vec *bvec;
|
||||
struct req_iterator iter;
|
||||
unsigned long flags;
|
||||
char *bvec_data;
|
||||
void *shared_data;
|
||||
unsigned int offset = 0;
|
||||
|
||||
if (bret->operation == BLKIF_OP_READ) {
|
||||
/*
|
||||
* Copy the data received from the backend into the bvec.
|
||||
* Since bv_offset can be different than 0, and bv_len different
|
||||
* than PAGE_SIZE, we have to keep track of the current offset,
|
||||
* to be sure we are copying the data from the right shared page.
|
||||
*/
|
||||
rq_for_each_segment(bvec, s->request, iter) {
|
||||
BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
|
||||
i = offset >> PAGE_SHIFT;
|
||||
BUG_ON(i >= s->req.u.rw.nr_segments);
|
||||
shared_data = kmap_atomic(
|
||||
pfn_to_page(s->grants_used[i]->pfn));
|
||||
bvec_data = bvec_kmap_irq(bvec, &flags);
|
||||
memcpy(bvec_data, shared_data + bvec->bv_offset,
|
||||
bvec->bv_len);
|
||||
bvec_kunmap_irq(bvec_data, &flags);
|
||||
kunmap_atomic(shared_data);
|
||||
offset += bvec->bv_len;
|
||||
}
|
||||
}
|
||||
/* Add the persistent grant into the list of free grants */
|
||||
for (i = 0; i < s->req.u.rw.nr_segments; i++) {
|
||||
llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
|
||||
info->persistent_gnts_c++;
|
||||
}
|
||||
}
|
||||
|
||||
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
|
@ -783,7 +911,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|||
req = info->shadow[id].request;
|
||||
|
||||
if (bret->operation != BLKIF_OP_DISCARD)
|
||||
blkif_completion(&info->shadow[id]);
|
||||
blkif_completion(&info->shadow[id], info, bret);
|
||||
|
||||
if (add_id_to_freelist(info, id)) {
|
||||
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
|
||||
|
@ -942,6 +1070,11 @@ again:
|
|||
message = "writing protocol";
|
||||
goto abort_transaction;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename,
|
||||
"feature-persistent", "%u", 1);
|
||||
if (err)
|
||||
dev_warn(&dev->dev,
|
||||
"writing persistent grants feature to xenbus");
|
||||
|
||||
err = xenbus_transaction_end(xbt, 0);
|
||||
if (err) {
|
||||
|
@ -1029,6 +1162,8 @@ static int blkfront_probe(struct xenbus_device *dev,
|
|||
spin_lock_init(&info->io_lock);
|
||||
info->xbdev = dev;
|
||||
info->vdevice = vdevice;
|
||||
init_llist_head(&info->persistent_gnts);
|
||||
info->persistent_gnts_c = 0;
|
||||
info->connected = BLKIF_STATE_DISCONNECTED;
|
||||
INIT_WORK(&info->work, blkif_restart_queue);
|
||||
|
||||
|
@ -1093,7 +1228,7 @@ static int blkif_recover(struct blkfront_info *info)
|
|||
req->u.rw.seg[j].gref,
|
||||
info->xbdev->otherend_id,
|
||||
pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
|
||||
rq_data_dir(info->shadow[req->u.rw.id].request));
|
||||
0);
|
||||
}
|
||||
info->shadow[req->u.rw.id].req = *req;
|
||||
|
||||
|
@ -1225,7 +1360,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
|||
unsigned long sector_size;
|
||||
unsigned int binfo;
|
||||
int err;
|
||||
int barrier, flush, discard;
|
||||
int barrier, flush, discard, persistent;
|
||||
|
||||
switch (info->connected) {
|
||||
case BLKIF_STATE_CONNECTED:
|
||||
|
@ -1303,6 +1438,14 @@ static void blkfront_connect(struct blkfront_info *info)
|
|||
if (!err && discard)
|
||||
blkfront_setup_discard(info);
|
||||
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"feature-persistent", "%u", &persistent,
|
||||
NULL);
|
||||
if (err)
|
||||
info->feature_persistent = 0;
|
||||
else
|
||||
info->feature_persistent = persistent;
|
||||
|
||||
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|
||||
|
|
|
@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
|
|||
spin_lock_irq(&mddev->write_lock);
|
||||
wait_event_lock_irq(mddev->sb_wait,
|
||||
!mddev->flush_bio,
|
||||
mddev->write_lock, /*nothing*/);
|
||||
mddev->write_lock);
|
||||
mddev->flush_bio = bio;
|
||||
spin_unlock_irq(&mddev->write_lock);
|
||||
|
||||
|
|
|
@ -551,32 +551,6 @@ struct md_thread {
|
|||
|
||||
#define THREAD_WAKEUP 0
|
||||
|
||||
#define __wait_event_lock_irq(wq, condition, lock, cmd) \
|
||||
do { \
|
||||
wait_queue_t __wait; \
|
||||
init_waitqueue_entry(&__wait, current); \
|
||||
\
|
||||
add_wait_queue(&wq, &__wait); \
|
||||
for (;;) { \
|
||||
set_current_state(TASK_UNINTERRUPTIBLE); \
|
||||
if (condition) \
|
||||
break; \
|
||||
spin_unlock_irq(&lock); \
|
||||
cmd; \
|
||||
schedule(); \
|
||||
spin_lock_irq(&lock); \
|
||||
} \
|
||||
current->state = TASK_RUNNING; \
|
||||
remove_wait_queue(&wq, &__wait); \
|
||||
} while (0)
|
||||
|
||||
#define wait_event_lock_irq(wq, condition, lock, cmd) \
|
||||
do { \
|
||||
if (condition) \
|
||||
break; \
|
||||
__wait_event_lock_irq(wq, condition, lock, cmd); \
|
||||
} while (0)
|
||||
|
||||
static inline void safe_put_page(struct page *p)
|
||||
{
|
||||
if (p) put_page(p);
|
||||
|
|
|
@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
|
|||
|
||||
/* Wait until no block IO is waiting */
|
||||
wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
|
||||
conf->resync_lock, );
|
||||
conf->resync_lock);
|
||||
|
||||
/* block any new IO from starting */
|
||||
conf->barrier++;
|
||||
|
@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
|
|||
/* Now wait for all pending IO to complete */
|
||||
wait_event_lock_irq(conf->wait_barrier,
|
||||
!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
|
||||
conf->resync_lock, );
|
||||
conf->resync_lock);
|
||||
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
}
|
||||
|
@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
|
|||
(conf->nr_pending &&
|
||||
current->bio_list &&
|
||||
!bio_list_empty(current->bio_list)),
|
||||
conf->resync_lock,
|
||||
);
|
||||
conf->resync_lock);
|
||||
conf->nr_waiting--;
|
||||
}
|
||||
conf->nr_pending++;
|
||||
|
@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
|
|||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->barrier++;
|
||||
conf->nr_waiting++;
|
||||
wait_event_lock_irq(conf->wait_barrier,
|
||||
conf->nr_pending == conf->nr_queued+1,
|
||||
conf->resync_lock,
|
||||
flush_pending_writes(conf));
|
||||
wait_event_lock_irq_cmd(conf->wait_barrier,
|
||||
conf->nr_pending == conf->nr_queued+1,
|
||||
conf->resync_lock,
|
||||
flush_pending_writes(conf));
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
}
|
||||
static void unfreeze_array(struct r1conf *conf)
|
||||
|
|
|
@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
|
|||
|
||||
/* Wait until no block IO is waiting (unless 'force') */
|
||||
wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
|
||||
conf->resync_lock, );
|
||||
conf->resync_lock);
|
||||
|
||||
/* block any new IO from starting */
|
||||
conf->barrier++;
|
||||
|
@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
|
|||
/* Now wait for all pending IO to complete */
|
||||
wait_event_lock_irq(conf->wait_barrier,
|
||||
!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
|
||||
conf->resync_lock, );
|
||||
conf->resync_lock);
|
||||
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
}
|
||||
|
@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
|
|||
(conf->nr_pending &&
|
||||
current->bio_list &&
|
||||
!bio_list_empty(current->bio_list)),
|
||||
conf->resync_lock,
|
||||
);
|
||||
conf->resync_lock);
|
||||
conf->nr_waiting--;
|
||||
}
|
||||
conf->nr_pending++;
|
||||
|
@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
|
|||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->barrier++;
|
||||
conf->nr_waiting++;
|
||||
wait_event_lock_irq(conf->wait_barrier,
|
||||
conf->nr_pending == conf->nr_queued+1,
|
||||
conf->resync_lock,
|
||||
flush_pending_writes(conf));
|
||||
wait_event_lock_irq_cmd(conf->wait_barrier,
|
||||
conf->nr_pending == conf->nr_queued+1,
|
||||
conf->resync_lock,
|
||||
flush_pending_writes(conf));
|
||||
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
}
|
||||
|
|
|
@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
|
|||
do {
|
||||
wait_event_lock_irq(conf->wait_for_stripe,
|
||||
conf->quiesce == 0 || noquiesce,
|
||||
conf->device_lock, /* nothing */);
|
||||
conf->device_lock);
|
||||
sh = __find_stripe(conf, sector, conf->generation - previous);
|
||||
if (!sh) {
|
||||
if (!conf->inactive_blocked)
|
||||
|
@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
|
|||
(atomic_read(&conf->active_stripes)
|
||||
< (conf->max_nr_stripes *3/4)
|
||||
|| !conf->inactive_blocked),
|
||||
conf->device_lock,
|
||||
);
|
||||
conf->device_lock);
|
||||
conf->inactive_blocked = 0;
|
||||
} else
|
||||
init_stripe(sh, sector, previous);
|
||||
|
@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
|||
spin_lock_irq(&conf->device_lock);
|
||||
wait_event_lock_irq(conf->wait_for_stripe,
|
||||
!list_empty(&conf->inactive_list),
|
||||
conf->device_lock,
|
||||
);
|
||||
conf->device_lock);
|
||||
osh = get_free_stripe(conf);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
atomic_set(&nsh->count, 1);
|
||||
|
@ -4003,7 +4001,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
|
|||
spin_lock_irq(&conf->device_lock);
|
||||
wait_event_lock_irq(conf->wait_for_stripe,
|
||||
conf->quiesce == 0,
|
||||
conf->device_lock, /* nothing */);
|
||||
conf->device_lock);
|
||||
atomic_inc(&conf->active_aligned_reads);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
|
@ -6095,7 +6093,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
|
|||
wait_event_lock_irq(conf->wait_for_stripe,
|
||||
atomic_read(&conf->active_stripes) == 0 &&
|
||||
atomic_read(&conf->active_aligned_reads) == 0,
|
||||
conf->device_lock, /* nothing */);
|
||||
conf->device_lock);
|
||||
conf->quiesce = 1;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
/* allow reshape to continue */
|
||||
|
|
|
@ -51,12 +51,11 @@
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
extern const char *drbd_buildtag(void);
|
||||
#define REL_VERSION "8.3.13"
|
||||
#define API_VERSION 88
|
||||
#define REL_VERSION "8.4.2"
|
||||
#define API_VERSION 1
|
||||
#define PRO_VERSION_MIN 86
|
||||
#define PRO_VERSION_MAX 96
|
||||
#define PRO_VERSION_MAX 101
|
||||
|
||||
|
||||
enum drbd_io_error_p {
|
||||
|
@ -66,7 +65,8 @@ enum drbd_io_error_p {
|
|||
};
|
||||
|
||||
enum drbd_fencing_p {
|
||||
FP_DONT_CARE,
|
||||
FP_NOT_AVAIL = -1, /* Not a policy */
|
||||
FP_DONT_CARE = 0,
|
||||
FP_RESOURCE,
|
||||
FP_STONITH
|
||||
};
|
||||
|
@ -102,6 +102,20 @@ enum drbd_on_congestion {
|
|||
OC_DISCONNECT,
|
||||
};
|
||||
|
||||
enum drbd_read_balancing {
|
||||
RB_PREFER_LOCAL,
|
||||
RB_PREFER_REMOTE,
|
||||
RB_ROUND_ROBIN,
|
||||
RB_LEAST_PENDING,
|
||||
RB_CONGESTED_REMOTE,
|
||||
RB_32K_STRIPING,
|
||||
RB_64K_STRIPING,
|
||||
RB_128K_STRIPING,
|
||||
RB_256K_STRIPING,
|
||||
RB_512K_STRIPING,
|
||||
RB_1M_STRIPING,
|
||||
};
|
||||
|
||||
/* KEEP the order, do not delete or insert. Only append. */
|
||||
enum drbd_ret_code {
|
||||
ERR_CODE_BASE = 100,
|
||||
|
@ -122,7 +136,7 @@ enum drbd_ret_code {
|
|||
ERR_AUTH_ALG = 120,
|
||||
ERR_AUTH_ALG_ND = 121,
|
||||
ERR_NOMEM = 122,
|
||||
ERR_DISCARD = 123,
|
||||
ERR_DISCARD_IMPOSSIBLE = 123,
|
||||
ERR_DISK_CONFIGURED = 124,
|
||||
ERR_NET_CONFIGURED = 125,
|
||||
ERR_MANDATORY_TAG = 126,
|
||||
|
@ -130,8 +144,8 @@ enum drbd_ret_code {
|
|||
ERR_INTR = 129, /* EINTR */
|
||||
ERR_RESIZE_RESYNC = 130,
|
||||
ERR_NO_PRIMARY = 131,
|
||||
ERR_SYNC_AFTER = 132,
|
||||
ERR_SYNC_AFTER_CYCLE = 133,
|
||||
ERR_RESYNC_AFTER = 132,
|
||||
ERR_RESYNC_AFTER_CYCLE = 133,
|
||||
ERR_PAUSE_IS_SET = 134,
|
||||
ERR_PAUSE_IS_CLEAR = 135,
|
||||
ERR_PACKET_NR = 137,
|
||||
|
@ -155,6 +169,14 @@ enum drbd_ret_code {
|
|||
ERR_CONG_NOT_PROTO_A = 155,
|
||||
ERR_PIC_AFTER_DEP = 156,
|
||||
ERR_PIC_PEER_DEP = 157,
|
||||
ERR_RES_NOT_KNOWN = 158,
|
||||
ERR_RES_IN_USE = 159,
|
||||
ERR_MINOR_CONFIGURED = 160,
|
||||
ERR_MINOR_EXISTS = 161,
|
||||
ERR_INVALID_REQUEST = 162,
|
||||
ERR_NEED_APV_100 = 163,
|
||||
ERR_NEED_ALLOW_TWO_PRI = 164,
|
||||
ERR_MD_UNCLEAN = 165,
|
||||
|
||||
/* insert new ones above this line */
|
||||
AFTER_LAST_ERR_CODE
|
||||
|
@ -296,7 +318,8 @@ enum drbd_state_rv {
|
|||
SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */
|
||||
SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */
|
||||
SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */
|
||||
SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */
|
||||
SS_O_VOL_PEER_PRI = -20,
|
||||
SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */
|
||||
};
|
||||
|
||||
/* from drbd_strings.c */
|
||||
|
@ -313,7 +336,9 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
|
|||
#define MDF_FULL_SYNC (1 << 3)
|
||||
#define MDF_WAS_UP_TO_DATE (1 << 4)
|
||||
#define MDF_PEER_OUT_DATED (1 << 5)
|
||||
#define MDF_CRASHED_PRIMARY (1 << 6)
|
||||
#define MDF_CRASHED_PRIMARY (1 << 6)
|
||||
#define MDF_AL_CLEAN (1 << 7)
|
||||
#define MDF_AL_DISABLED (1 << 8)
|
||||
|
||||
enum drbd_uuid_index {
|
||||
UI_CURRENT,
|
||||
|
@ -333,37 +358,23 @@ enum drbd_timeout_flag {
|
|||
|
||||
#define UUID_JUST_CREATED ((__u64)4)
|
||||
|
||||
/* magic numbers used in meta data and network packets */
|
||||
#define DRBD_MAGIC 0x83740267
|
||||
#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
|
||||
#define DRBD_MAGIC_BIG 0x835a
|
||||
#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
|
||||
#define DRBD_MAGIC_100 0x8620ec20
|
||||
|
||||
#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3)
|
||||
#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4)
|
||||
#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5)
|
||||
|
||||
|
||||
/* how I came up with this magic?
|
||||
* base64 decode "actlog==" ;) */
|
||||
#define DRBD_AL_MAGIC 0x69cb65a2
|
||||
|
||||
/* these are of type "int" */
|
||||
#define DRBD_MD_INDEX_INTERNAL -1
|
||||
#define DRBD_MD_INDEX_FLEX_EXT -2
|
||||
#define DRBD_MD_INDEX_FLEX_INT -3
|
||||
|
||||
/* Start of the new netlink/connector stuff */
|
||||
|
||||
#define DRBD_NL_CREATE_DEVICE 0x01
|
||||
#define DRBD_NL_SET_DEFAULTS 0x02
|
||||
|
||||
|
||||
/* For searching a vacant cn_idx value */
|
||||
#define CN_IDX_STEP 6977
|
||||
|
||||
struct drbd_nl_cfg_req {
|
||||
int packet_type;
|
||||
unsigned int drbd_minor;
|
||||
int flags;
|
||||
unsigned short tag_list[];
|
||||
};
|
||||
|
||||
struct drbd_nl_cfg_reply {
|
||||
int packet_type;
|
||||
unsigned int minor;
|
||||
int ret_code; /* enum ret_code or set_st_err_t */
|
||||
unsigned short tag_list[]; /* only used with get_* calls */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,378 @@
|
|||
/*
|
||||
* General overview:
|
||||
* full generic netlink message:
|
||||
* |nlmsghdr|genlmsghdr|<payload>
|
||||
*
|
||||
* payload:
|
||||
* |optional fixed size family header|<sequence of netlink attributes>
|
||||
*
|
||||
* sequence of netlink attributes:
|
||||
* I chose to have all "top level" attributes NLA_NESTED,
|
||||
* corresponding to some real struct.
|
||||
* So we have a sequence of |tla, len|<nested nla sequence>
|
||||
*
|
||||
* nested nla sequence:
|
||||
* may be empty, or contain a sequence of netlink attributes
|
||||
* representing the struct fields.
|
||||
*
|
||||
* The tag number of any field (regardless of containing struct)
|
||||
* will be available as T_ ## field_name,
|
||||
* so you cannot have the same field name in two differnt structs.
|
||||
*
|
||||
* The tag numbers themselves are per struct, though,
|
||||
* so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
|
||||
* which we won't use here).
|
||||
* The tag numbers are used as index in the respective nla_policy array.
|
||||
*
|
||||
* GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
|
||||
* genl_magic_struct.h
|
||||
* generates the struct declaration,
|
||||
* generates an entry in the tla enum,
|
||||
* genl_magic_func.h
|
||||
* generates an entry in the static tla policy
|
||||
* with .type = NLA_NESTED
|
||||
* generates the static <struct_name>_nl_policy definition,
|
||||
* and static conversion functions
|
||||
*
|
||||
* genl_magic_func.h
|
||||
*
|
||||
* GENL_mc_group(group)
|
||||
* genl_magic_struct.h
|
||||
* does nothing
|
||||
* genl_magic_func.h
|
||||
* defines and registers the mcast group,
|
||||
* and provides a send helper
|
||||
*
|
||||
* GENL_notification(op_name, op_num, mcast_group, tla list)
|
||||
* These are notifications to userspace.
|
||||
*
|
||||
* genl_magic_struct.h
|
||||
* generates an entry in the genl_ops enum,
|
||||
* genl_magic_func.h
|
||||
* does nothing
|
||||
*
|
||||
* mcast group: the name of the mcast group this notification should be
|
||||
* expected on
|
||||
* tla list: the list of expected top level attributes,
|
||||
* for documentation and sanity checking.
|
||||
*
|
||||
* GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
|
||||
* These are requests from userspace.
|
||||
*
|
||||
* _op and _notification share the same "number space",
|
||||
* op_nr will be assigned to "genlmsghdr->cmd"
|
||||
*
|
||||
* genl_magic_struct.h
|
||||
* generates an entry in the genl_ops enum,
|
||||
* genl_magic_func.h
|
||||
* generates an entry in the static genl_ops array,
|
||||
* and static register/unregister functions to
|
||||
* genl_register_family_with_ops().
|
||||
*
|
||||
* flags and handler:
|
||||
* GENL_op_init( .doit = x, .dumpit = y, .flags = something)
|
||||
* GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
|
||||
* tla list: the list of expected top level attributes,
|
||||
* for documentation and sanity checking.
|
||||
*/
|
||||
|
||||
/*
|
||||
* STRUCTS
|
||||
*/
|
||||
|
||||
/* this is sent kernel -> userland on various error conditions, and contains
|
||||
* informational textual info, which is supposedly human readable.
|
||||
* The computer relevant return code is in the drbd_genlmsghdr.
|
||||
*/
|
||||
GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
|
||||
/* "arbitrary" size strings, nla_policy.len = 0 */
|
||||
__str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0)
|
||||
)
|
||||
|
||||
/* Configuration requests typically need a context to operate on.
|
||||
* Possible keys are device minor (fits in the drbd_genlmsghdr),
|
||||
* the replication link (aka connection) name,
|
||||
* and/or the replication group (aka resource) name,
|
||||
* and the volume id within the resource. */
|
||||
GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
|
||||
__u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume)
|
||||
__str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128)
|
||||
__bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128)
|
||||
__bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
|
||||
__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128)
|
||||
__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128)
|
||||
__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx)
|
||||
|
||||
/* use the resize command to try and change the disk_size */
|
||||
__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size)
|
||||
/* we could change the max_bio_bvecs,
|
||||
* but it won't propagate through the stack */
|
||||
__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs)
|
||||
|
||||
__u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF)
|
||||
__u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF)
|
||||
|
||||
__u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF)
|
||||
__s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF)
|
||||
__u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF)
|
||||
__u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
|
||||
__u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF)
|
||||
__u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF)
|
||||
__u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF)
|
||||
__u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF)
|
||||
|
||||
__flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF)
|
||||
__flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF)
|
||||
__flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF)
|
||||
__flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF)
|
||||
__u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
|
||||
__u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF)
|
||||
/* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
|
||||
__flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
|
||||
__str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32)
|
||||
__u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
|
||||
__str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
|
||||
shared_secret, SHARED_SECRET_MAX)
|
||||
__str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX)
|
||||
__str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX)
|
||||
__str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX)
|
||||
__str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX)
|
||||
__u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF)
|
||||
__u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF)
|
||||
__u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF)
|
||||
__u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF)
|
||||
__u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF)
|
||||
__u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
|
||||
__u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
|
||||
__u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF)
|
||||
__u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF)
|
||||
__u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
|
||||
__u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
|
||||
__u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF)
|
||||
__u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF)
|
||||
__u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF)
|
||||
__u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF)
|
||||
__u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF)
|
||||
__u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF)
|
||||
__u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF)
|
||||
__flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
|
||||
__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data)
|
||||
__flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF)
|
||||
__flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF)
|
||||
__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative)
|
||||
__flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF)
|
||||
/* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
|
||||
__u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size)
|
||||
__flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force)
|
||||
__flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
|
||||
/* the reason of the broadcast,
|
||||
* if this is an event triggered broadcast. */
|
||||
__u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason)
|
||||
__u32_field(2, DRBD_F_REQUIRED, current_state)
|
||||
__u64_field(3, DRBD_GENLA_F_MANDATORY, capacity)
|
||||
__u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid)
|
||||
|
||||
/* These are for broadcast from after state change work.
|
||||
* prev_state and new_state are from the moment the state change took
|
||||
* place, new_state is not neccessarily the same as current_state,
|
||||
* there may have been more state changes since. Which will be
|
||||
* broadcasted soon, in their respective after state change work. */
|
||||
__u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state)
|
||||
__u32_field(6, DRBD_GENLA_F_MANDATORY, new_state)
|
||||
|
||||
/* if we have a local disk: */
|
||||
__bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64)))
|
||||
__u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags)
|
||||
__u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total)
|
||||
__u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos)
|
||||
/* and in case resync or online verify is active */
|
||||
__u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total)
|
||||
__u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed)
|
||||
|
||||
/* for pre and post notifications of helper execution */
|
||||
__str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32)
|
||||
__u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code)
|
||||
|
||||
__u64_field(15, 0, send_cnt)
|
||||
__u64_field(16, 0, recv_cnt)
|
||||
__u64_field(17, 0, read_cnt)
|
||||
__u64_field(18, 0, writ_cnt)
|
||||
__u64_field(19, 0, al_writ_cnt)
|
||||
__u64_field(20, 0, bm_writ_cnt)
|
||||
__u32_field(21, 0, ap_bio_cnt)
|
||||
__u32_field(22, 0, ap_pending_cnt)
|
||||
__u32_field(23, 0, rs_pending_cnt)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
|
||||
__u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector)
|
||||
__u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
|
||||
__u32_field(1, DRBD_F_REQUIRED, timeout_type)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach)
|
||||
)
|
||||
|
||||
/*
|
||||
* Notifications and commands (genlmsghdr->cmd)
|
||||
*/
|
||||
GENL_mc_group(events)
|
||||
|
||||
/* kernel -> userspace announcement of changes */
|
||||
GENL_notification(
|
||||
DRBD_EVENT, 1, events,
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
/* query kernel for specific or all info */
|
||||
GENL_op(
|
||||
DRBD_ADM_GET_STATUS, 2,
|
||||
GENL_op_init(
|
||||
.doit = drbd_adm_get_status,
|
||||
.dumpit = drbd_adm_get_status_all,
|
||||
/* anyone may ask for the status,
|
||||
* it is broadcasted anyways */
|
||||
),
|
||||
/* To select the object .doit.
|
||||
* Or a subset of objects in .dumpit. */
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
/* add DRBD minor devices as volumes to resources */
|
||||
GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
|
||||
/* add or delete resources */
|
||||
GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
|
||||
GENL_doit(drbd_adm_resource_opts),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_CONNECT, 10,
|
||||
GENL_doit(drbd_adm_connect),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_CHG_NET_OPTS, 29,
|
||||
GENL_doit(drbd_adm_net_opts),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_op(DRBD_ADM_ATTACH, 12,
|
||||
GENL_doit(drbd_adm_attach),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
|
||||
GENL_doit(drbd_adm_disk_opts),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_RESIZE, 13,
|
||||
GENL_doit(drbd_adm_resize),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_PRIMARY, 14,
|
||||
GENL_doit(drbd_adm_set_role),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_SECONDARY, 15,
|
||||
GENL_doit(drbd_adm_set_role),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_NEW_C_UUID, 16,
|
||||
GENL_doit(drbd_adm_new_c_uuid),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_START_OV, 17,
|
||||
GENL_doit(drbd_adm_start_ov),
|
||||
GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
|
||||
)
|
||||
|
||||
GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
|
@ -0,0 +1,55 @@
|
|||
#ifndef DRBD_GENL_STRUCT_H
|
||||
#define DRBD_GENL_STRUCT_H
|
||||
|
||||
/**
|
||||
* struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
|
||||
* @minor:
|
||||
* For admin requests (user -> kernel): which minor device to operate on.
|
||||
* For (unicast) replies or informational (broadcast) messages
|
||||
* (kernel -> user): which minor device the information is about.
|
||||
* If we do not operate on minors, but on connections or resources,
|
||||
* the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
|
||||
* is used instead.
|
||||
* @flags: possible operation modifiers (relevant only for user->kernel):
|
||||
* DRBD_GENL_F_SET_DEFAULTS
|
||||
* @volume:
|
||||
* When creating a new minor (adding it to a resource), the resource needs
|
||||
* to know which volume number within the resource this is supposed to be.
|
||||
* The volume number corresponds to the same volume number on the remote side,
|
||||
* whereas the minor number on the remote side may be different
|
||||
* (union with flags).
|
||||
* @ret_code: kernel->userland unicast cfg reply return code (union with flags);
|
||||
*/
|
||||
struct drbd_genlmsghdr {
|
||||
__u32 minor;
|
||||
union {
|
||||
__u32 flags;
|
||||
__s32 ret_code;
|
||||
};
|
||||
};
|
||||
|
||||
/* To be used in drbd_genlmsghdr.flags */
|
||||
enum {
|
||||
DRBD_GENL_F_SET_DEFAULTS = 1,
|
||||
};
|
||||
|
||||
enum drbd_state_info_bcast_reason {
|
||||
SIB_GET_STATUS_REPLY = 1,
|
||||
SIB_STATE_CHANGE = 2,
|
||||
SIB_HELPER_PRE = 3,
|
||||
SIB_HELPER_POST = 4,
|
||||
SIB_SYNC_PROGRESS = 5,
|
||||
};
|
||||
|
||||
/* hack around predefined gcc/cpp "linux=1",
|
||||
* we cannot possibly include <1/drbd_genl.h> */
|
||||
#undef linux
|
||||
|
||||
#include <linux/drbd.h>
|
||||
#define GENL_MAGIC_VERSION API_VERSION
|
||||
#define GENL_MAGIC_FAMILY drbd
|
||||
#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr)
|
||||
#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
|
||||
#include <linux/genl_magic_struct.h>
|
||||
|
||||
#endif
|
|
@ -16,29 +16,37 @@
|
|||
#define DEBUG_RANGE_CHECK 0
|
||||
|
||||
#define DRBD_MINOR_COUNT_MIN 1
|
||||
#define DRBD_MINOR_COUNT_MAX 256
|
||||
#define DRBD_MINOR_COUNT_MAX 255
|
||||
#define DRBD_MINOR_COUNT_DEF 32
|
||||
#define DRBD_MINOR_COUNT_SCALE '1'
|
||||
|
||||
#define DRBD_VOLUME_MAX 65535
|
||||
|
||||
#define DRBD_DIALOG_REFRESH_MIN 0
|
||||
#define DRBD_DIALOG_REFRESH_MAX 600
|
||||
#define DRBD_DIALOG_REFRESH_SCALE '1'
|
||||
|
||||
/* valid port number */
|
||||
#define DRBD_PORT_MIN 1
|
||||
#define DRBD_PORT_MAX 0xffff
|
||||
#define DRBD_PORT_SCALE '1'
|
||||
|
||||
/* startup { */
|
||||
/* if you want more than 3.4 days, disable */
|
||||
#define DRBD_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_WFC_TIMEOUT_SCALE '1'
|
||||
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
|
||||
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
|
||||
/* }*/
|
||||
|
||||
/* net { */
|
||||
|
@ -47,75 +55,91 @@
|
|||
#define DRBD_TIMEOUT_MIN 1
|
||||
#define DRBD_TIMEOUT_MAX 600
|
||||
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
|
||||
#define DRBD_TIMEOUT_SCALE '1'
|
||||
|
||||
/* If backing disk takes longer than disk_timeout, mark the disk as failed */
|
||||
#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
|
||||
#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
|
||||
#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
|
||||
#define DRBD_DISK_TIMEOUT_SCALE '1'
|
||||
|
||||
/* active connection retries when C_WF_CONNECTION */
|
||||
#define DRBD_CONNECT_INT_MIN 1
|
||||
#define DRBD_CONNECT_INT_MAX 120
|
||||
#define DRBD_CONNECT_INT_DEF 10 /* seconds */
|
||||
#define DRBD_CONNECT_INT_SCALE '1'
|
||||
|
||||
/* keep-alive probes when idle */
|
||||
#define DRBD_PING_INT_MIN 1
|
||||
#define DRBD_PING_INT_MAX 120
|
||||
#define DRBD_PING_INT_DEF 10
|
||||
#define DRBD_PING_INT_SCALE '1'
|
||||
|
||||
/* timeout for the ping packets.*/
|
||||
#define DRBD_PING_TIMEO_MIN 1
|
||||
#define DRBD_PING_TIMEO_MAX 300
|
||||
#define DRBD_PING_TIMEO_DEF 5
|
||||
#define DRBD_PING_TIMEO_SCALE '1'
|
||||
|
||||
/* max number of write requests between write barriers */
|
||||
#define DRBD_MAX_EPOCH_SIZE_MIN 1
|
||||
#define DRBD_MAX_EPOCH_SIZE_MAX 20000
|
||||
#define DRBD_MAX_EPOCH_SIZE_DEF 2048
|
||||
#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
|
||||
|
||||
/* I don't think that a tcp send buffer of more than 10M is useful */
|
||||
#define DRBD_SNDBUF_SIZE_MIN 0
|
||||
#define DRBD_SNDBUF_SIZE_MAX (10<<20)
|
||||
#define DRBD_SNDBUF_SIZE_DEF 0
|
||||
#define DRBD_SNDBUF_SIZE_SCALE '1'
|
||||
|
||||
#define DRBD_RCVBUF_SIZE_MIN 0
|
||||
#define DRBD_RCVBUF_SIZE_MAX (10<<20)
|
||||
#define DRBD_RCVBUF_SIZE_DEF 0
|
||||
#define DRBD_RCVBUF_SIZE_SCALE '1'
|
||||
|
||||
/* @4k PageSize -> 128kB - 512MB */
|
||||
#define DRBD_MAX_BUFFERS_MIN 32
|
||||
#define DRBD_MAX_BUFFERS_MAX 131072
|
||||
#define DRBD_MAX_BUFFERS_DEF 2048
|
||||
#define DRBD_MAX_BUFFERS_SCALE '1'
|
||||
|
||||
/* @4k PageSize -> 4kB - 512MB */
|
||||
#define DRBD_UNPLUG_WATERMARK_MIN 1
|
||||
#define DRBD_UNPLUG_WATERMARK_MAX 131072
|
||||
#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
|
||||
#define DRBD_UNPLUG_WATERMARK_SCALE '1'
|
||||
|
||||
/* 0 is disabled.
|
||||
* 200 should be more than enough even for very short timeouts */
|
||||
#define DRBD_KO_COUNT_MIN 0
|
||||
#define DRBD_KO_COUNT_MAX 200
|
||||
#define DRBD_KO_COUNT_DEF 0
|
||||
#define DRBD_KO_COUNT_DEF 7
|
||||
#define DRBD_KO_COUNT_SCALE '1'
|
||||
/* } */
|
||||
|
||||
/* syncer { */
|
||||
/* FIXME allow rate to be zero? */
|
||||
#define DRBD_RATE_MIN 1
|
||||
#define DRBD_RESYNC_RATE_MIN 1
|
||||
/* channel bonding 10 GbE, or other hardware */
|
||||
#define DRBD_RATE_MAX (4 << 20)
|
||||
#define DRBD_RATE_DEF 250 /* kb/second */
|
||||
#define DRBD_RESYNC_RATE_MAX (4 << 20)
|
||||
#define DRBD_RESYNC_RATE_DEF 250
|
||||
#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
/* less than 7 would hit performance unnecessarily.
|
||||
* 3833 is the largest prime that still does fit
|
||||
* into 64 sectors of activity log */
|
||||
* 919 slots context information per transaction,
|
||||
* 32k activity log, 4k transaction size,
|
||||
* one transaction in flight:
|
||||
* 919 * 7 = 6433 */
|
||||
#define DRBD_AL_EXTENTS_MIN 7
|
||||
#define DRBD_AL_EXTENTS_MAX 3833
|
||||
#define DRBD_AL_EXTENTS_DEF 127
|
||||
#define DRBD_AL_EXTENTS_MAX 6433
|
||||
#define DRBD_AL_EXTENTS_DEF 1237
|
||||
#define DRBD_AL_EXTENTS_SCALE '1'
|
||||
|
||||
#define DRBD_AFTER_MIN -1
|
||||
#define DRBD_AFTER_MAX 255
|
||||
#define DRBD_AFTER_DEF -1
|
||||
#define DRBD_MINOR_NUMBER_MIN -1
|
||||
#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1)
|
||||
#define DRBD_MINOR_NUMBER_DEF -1
|
||||
#define DRBD_MINOR_NUMBER_SCALE '1'
|
||||
|
||||
/* } */
|
||||
|
||||
|
@ -124,11 +148,12 @@
|
|||
* the upper limit with 64bit kernel, enough ram and flexible meta data
|
||||
* is 1 PiB, currently. */
|
||||
/* DRBD_MAX_SECTORS */
|
||||
#define DRBD_DISK_SIZE_SECT_MIN 0
|
||||
#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40))
|
||||
#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */
|
||||
#define DRBD_DISK_SIZE_MIN 0
|
||||
#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40))
|
||||
#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */
|
||||
#define DRBD_DISK_SIZE_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
|
||||
#define DRBD_ON_IO_ERROR_DEF EP_DETACH
|
||||
#define DRBD_FENCING_DEF FP_DONT_CARE
|
||||
#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
|
||||
#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
|
||||
|
@ -136,38 +161,59 @@
|
|||
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
|
||||
#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
|
||||
#define DRBD_ON_CONGESTION_DEF OC_BLOCK
|
||||
#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
|
||||
|
||||
#define DRBD_MAX_BIO_BVECS_MIN 0
|
||||
#define DRBD_MAX_BIO_BVECS_MAX 128
|
||||
#define DRBD_MAX_BIO_BVECS_DEF 0
|
||||
#define DRBD_MAX_BIO_BVECS_SCALE '1'
|
||||
|
||||
#define DRBD_C_PLAN_AHEAD_MIN 0
|
||||
#define DRBD_C_PLAN_AHEAD_MAX 300
|
||||
#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */
|
||||
#define DRBD_C_PLAN_AHEAD_DEF 20
|
||||
#define DRBD_C_PLAN_AHEAD_SCALE '1'
|
||||
|
||||
#define DRBD_C_DELAY_TARGET_MIN 1
|
||||
#define DRBD_C_DELAY_TARGET_MAX 100
|
||||
#define DRBD_C_DELAY_TARGET_DEF 10
|
||||
#define DRBD_C_DELAY_TARGET_SCALE '1'
|
||||
|
||||
#define DRBD_C_FILL_TARGET_MIN 0
|
||||
#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
|
||||
#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
|
||||
#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
|
||||
#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */
|
||||
#define DRBD_C_MAX_RATE_MIN 250
|
||||
#define DRBD_C_MAX_RATE_MAX (4 << 20)
|
||||
#define DRBD_C_MAX_RATE_DEF 102400
|
||||
#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */
|
||||
#define DRBD_C_MIN_RATE_MIN 0
|
||||
#define DRBD_C_MIN_RATE_MAX (4 << 20)
|
||||
#define DRBD_C_MIN_RATE_DEF 4096
|
||||
#define DRBD_C_MIN_RATE_DEF 250
|
||||
#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_CONG_FILL_MIN 0
|
||||
#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */
|
||||
#define DRBD_CONG_FILL_DEF 0
|
||||
#define DRBD_CONG_FILL_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN
|
||||
#define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX
|
||||
#define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF
|
||||
#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
|
||||
|
||||
#define DRBD_PROTOCOL_DEF DRBD_PROT_C
|
||||
|
||||
#define DRBD_DISK_BARRIER_DEF 0
|
||||
#define DRBD_DISK_FLUSHES_DEF 1
|
||||
#define DRBD_DISK_DRAIN_DEF 1
|
||||
#define DRBD_MD_FLUSHES_DEF 1
|
||||
#define DRBD_TCP_CORK_DEF 1
|
||||
#define DRBD_AL_UPDATES_DEF 1
|
||||
|
||||
#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
|
||||
#define DRBD_ALWAYS_ASBP_DEF 0
|
||||
#define DRBD_USE_RLE_DEF 1
|
||||
|
||||
#undef RANGE
|
||||
#endif
|
||||
|
|
|
@ -1,163 +0,0 @@
|
|||
/*
|
||||
PAKET( name,
|
||||
TYPE ( pn, pr, member )
|
||||
...
|
||||
)
|
||||
|
||||
You may never reissue one of the pn arguments
|
||||
*/
|
||||
|
||||
#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
|
||||
#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
|
||||
#endif
|
||||
|
||||
NL_PACKET(primary, 1,
|
||||
NL_BIT( 1, T_MAY_IGNORE, primary_force)
|
||||
)
|
||||
|
||||
NL_PACKET(secondary, 2, )
|
||||
|
||||
NL_PACKET(disk_conf, 3,
|
||||
NL_INT64( 2, T_MAY_IGNORE, disk_size)
|
||||
NL_STRING( 3, T_MANDATORY, backing_dev, 128)
|
||||
NL_STRING( 4, T_MANDATORY, meta_dev, 128)
|
||||
NL_INTEGER( 5, T_MANDATORY, meta_dev_idx)
|
||||
NL_INTEGER( 6, T_MAY_IGNORE, on_io_error)
|
||||
NL_INTEGER( 7, T_MAY_IGNORE, fencing)
|
||||
NL_BIT( 37, T_MAY_IGNORE, use_bmbv)
|
||||
NL_BIT( 53, T_MAY_IGNORE, no_disk_flush)
|
||||
NL_BIT( 54, T_MAY_IGNORE, no_md_flush)
|
||||
/* 55 max_bio_size was available in 8.2.6rc2 */
|
||||
NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
|
||||
NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
|
||||
NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
|
||||
NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
|
||||
)
|
||||
|
||||
NL_PACKET(detach, 4,
|
||||
NL_BIT( 88, T_MANDATORY, detach_force)
|
||||
)
|
||||
|
||||
NL_PACKET(net_conf, 5,
|
||||
NL_STRING( 8, T_MANDATORY, my_addr, 128)
|
||||
NL_STRING( 9, T_MANDATORY, peer_addr, 128)
|
||||
NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX)
|
||||
NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX)
|
||||
NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX)
|
||||
NL_INTEGER( 14, T_MAY_IGNORE, timeout)
|
||||
NL_INTEGER( 15, T_MANDATORY, wire_protocol)
|
||||
NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int)
|
||||
NL_INTEGER( 17, T_MAY_IGNORE, ping_int)
|
||||
NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size)
|
||||
NL_INTEGER( 19, T_MAY_IGNORE, max_buffers)
|
||||
NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark)
|
||||
NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size)
|
||||
NL_INTEGER( 22, T_MAY_IGNORE, ko_count)
|
||||
NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p)
|
||||
NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p)
|
||||
NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p)
|
||||
NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
|
||||
NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
|
||||
NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
|
||||
NL_INTEGER( 81, T_MAY_IGNORE, on_congestion)
|
||||
NL_INTEGER( 82, T_MAY_IGNORE, cong_fill)
|
||||
NL_INTEGER( 83, T_MAY_IGNORE, cong_extents)
|
||||
/* 59 addr_family was available in GIT, never released */
|
||||
NL_BIT( 60, T_MANDATORY, mind_af)
|
||||
NL_BIT( 27, T_MAY_IGNORE, want_lose)
|
||||
NL_BIT( 28, T_MAY_IGNORE, two_primaries)
|
||||
NL_BIT( 41, T_MAY_IGNORE, always_asbp)
|
||||
NL_BIT( 61, T_MAY_IGNORE, no_cork)
|
||||
NL_BIT( 62, T_MANDATORY, auto_sndbuf_size)
|
||||
NL_BIT( 70, T_MANDATORY, dry_run)
|
||||
)
|
||||
|
||||
NL_PACKET(disconnect, 6,
|
||||
NL_BIT( 84, T_MAY_IGNORE, force)
|
||||
)
|
||||
|
||||
NL_PACKET(resize, 7,
|
||||
NL_INT64( 29, T_MAY_IGNORE, resize_size)
|
||||
NL_BIT( 68, T_MAY_IGNORE, resize_force)
|
||||
NL_BIT( 69, T_MANDATORY, no_resync)
|
||||
)
|
||||
|
||||
NL_PACKET(syncer_conf, 8,
|
||||
NL_INTEGER( 30, T_MAY_IGNORE, rate)
|
||||
NL_INTEGER( 31, T_MAY_IGNORE, after)
|
||||
NL_INTEGER( 32, T_MAY_IGNORE, al_extents)
|
||||
/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
|
||||
* NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
|
||||
* NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
|
||||
* NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
|
||||
* feature will be reimplemented differently with 8.3.9 */
|
||||
NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
|
||||
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
|
||||
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
|
||||
NL_BIT( 65, T_MAY_IGNORE, use_rle)
|
||||
NL_INTEGER( 75, T_MAY_IGNORE, on_no_data)
|
||||
NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead)
|
||||
NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target)
|
||||
NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target)
|
||||
NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate)
|
||||
NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate)
|
||||
)
|
||||
|
||||
NL_PACKET(invalidate, 9, )
|
||||
NL_PACKET(invalidate_peer, 10, )
|
||||
NL_PACKET(pause_sync, 11, )
|
||||
NL_PACKET(resume_sync, 12, )
|
||||
NL_PACKET(suspend_io, 13, )
|
||||
NL_PACKET(resume_io, 14, )
|
||||
NL_PACKET(outdate, 15, )
|
||||
NL_PACKET(get_config, 16, )
|
||||
NL_PACKET(get_state, 17,
|
||||
NL_INTEGER( 33, T_MAY_IGNORE, state_i)
|
||||
)
|
||||
|
||||
NL_PACKET(get_uuids, 18,
|
||||
NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64)))
|
||||
NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags)
|
||||
)
|
||||
|
||||
NL_PACKET(get_timeout_flag, 19,
|
||||
NL_BIT( 36, T_MAY_IGNORE, use_degraded)
|
||||
)
|
||||
|
||||
NL_PACKET(call_helper, 20,
|
||||
NL_STRING( 38, T_MAY_IGNORE, helper, 32)
|
||||
)
|
||||
|
||||
/* Tag nr 42 already allocated in drbd-8.1 development. */
|
||||
|
||||
NL_PACKET(sync_progress, 23,
|
||||
NL_INTEGER( 43, T_MAY_IGNORE, sync_progress)
|
||||
)
|
||||
|
||||
NL_PACKET(dump_ee, 24,
|
||||
NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32)
|
||||
NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX)
|
||||
NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX)
|
||||
NL_INT64( 48, T_MAY_IGNORE, ee_sector)
|
||||
NL_INT64( 49, T_MAY_IGNORE, ee_block_id)
|
||||
NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10)
|
||||
)
|
||||
|
||||
NL_PACKET(start_ov, 25,
|
||||
NL_INT64( 66, T_MAY_IGNORE, start_sector)
|
||||
)
|
||||
|
||||
NL_PACKET(new_c_uuid, 26,
|
||||
NL_BIT( 63, T_MANDATORY, clear_bm)
|
||||
)
|
||||
|
||||
#ifdef NL_RESPONSE
|
||||
NL_RESPONSE(return_code_only, 27)
|
||||
#endif
|
||||
|
||||
#undef NL_PACKET
|
||||
#undef NL_INTEGER
|
||||
#undef NL_INT64
|
||||
#undef NL_BIT
|
||||
#undef NL_STRING
|
||||
#undef NL_RESPONSE
|
|
@ -1,84 +0,0 @@
|
|||
#ifndef DRBD_TAG_MAGIC_H
|
||||
#define DRBD_TAG_MAGIC_H
|
||||
|
||||
#define TT_END 0
|
||||
#define TT_REMOVED 0xE000
|
||||
|
||||
/* declare packet_type enums */
|
||||
enum packet_types {
|
||||
#define NL_PACKET(name, number, fields) P_ ## name = number,
|
||||
#define NL_RESPONSE(name, number) P_ ## name = number,
|
||||
#define NL_INTEGER(pn, pr, member)
|
||||
#define NL_INT64(pn, pr, member)
|
||||
#define NL_BIT(pn, pr, member)
|
||||
#define NL_STRING(pn, pr, member, len)
|
||||
#include <linux/drbd_nl.h>
|
||||
P_nl_after_last_packet,
|
||||
};
|
||||
|
||||
/* These struct are used to deduce the size of the tag lists: */
|
||||
#define NL_PACKET(name, number, fields) \
|
||||
struct name ## _tag_len_struct { fields };
|
||||
#define NL_INTEGER(pn, pr, member) \
|
||||
int member; int tag_and_len ## member;
|
||||
#define NL_INT64(pn, pr, member) \
|
||||
__u64 member; int tag_and_len ## member;
|
||||
#define NL_BIT(pn, pr, member) \
|
||||
unsigned char member:1; int tag_and_len ## member;
|
||||
#define NL_STRING(pn, pr, member, len) \
|
||||
unsigned char member[len]; int member ## _len; \
|
||||
int tag_and_len ## member;
|
||||
#include <linux/drbd_nl.h>
|
||||
|
||||
/* declare tag-list-sizes */
|
||||
static const int tag_list_sizes[] = {
|
||||
#define NL_PACKET(name, number, fields) 2 fields ,
|
||||
#define NL_INTEGER(pn, pr, member) + 4 + 4
|
||||
#define NL_INT64(pn, pr, member) + 4 + 8
|
||||
#define NL_BIT(pn, pr, member) + 4 + 1
|
||||
#define NL_STRING(pn, pr, member, len) + 4 + (len)
|
||||
#include <linux/drbd_nl.h>
|
||||
};
|
||||
|
||||
/* The two highest bits are used for the tag type */
|
||||
#define TT_MASK 0xC000
|
||||
#define TT_INTEGER 0x0000
|
||||
#define TT_INT64 0x4000
|
||||
#define TT_BIT 0x8000
|
||||
#define TT_STRING 0xC000
|
||||
/* The next bit indicates if processing of the tag is mandatory */
|
||||
#define T_MANDATORY 0x2000
|
||||
#define T_MAY_IGNORE 0x0000
|
||||
#define TN_MASK 0x1fff
|
||||
/* The remaining 13 bits are used to enumerate the tags */
|
||||
|
||||
#define tag_type(T) ((T) & TT_MASK)
|
||||
#define tag_number(T) ((T) & TN_MASK)
|
||||
|
||||
/* declare tag enums */
|
||||
#define NL_PACKET(name, number, fields) fields
|
||||
enum drbd_tags {
|
||||
#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr ,
|
||||
#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr ,
|
||||
#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr ,
|
||||
#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr ,
|
||||
#include <linux/drbd_nl.h>
|
||||
};
|
||||
|
||||
struct tag {
|
||||
const char *name;
|
||||
int type_n_flags;
|
||||
int max_len;
|
||||
};
|
||||
|
||||
/* declare tag names */
|
||||
#define NL_PACKET(name, number, fields) fields
|
||||
static const struct tag tag_descriptions[] = {
|
||||
#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) },
|
||||
#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) },
|
||||
#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) },
|
||||
#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) },
|
||||
#include <linux/drbd_nl.h>
|
||||
};
|
||||
|
||||
#endif
|
|
@ -88,10 +88,14 @@ struct disk_stats {
|
|||
};
|
||||
|
||||
#define PARTITION_META_INFO_VOLNAMELTH 64
|
||||
#define PARTITION_META_INFO_UUIDLTH 16
|
||||
/*
|
||||
* Enough for the string representation of any kind of UUID plus NULL.
|
||||
* EFI UUID is 36 characters. MSDOS UUID is 11 characters.
|
||||
*/
|
||||
#define PARTITION_META_INFO_UUIDLTH 37
|
||||
|
||||
struct partition_meta_info {
|
||||
u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */
|
||||
char uuid[PARTITION_META_INFO_UUIDLTH];
|
||||
u8 volname[PARTITION_META_INFO_VOLNAMELTH];
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,422 @@
|
|||
#ifndef GENL_MAGIC_FUNC_H
|
||||
#define GENL_MAGIC_FUNC_H
|
||||
|
||||
#include <linux/genl_magic_struct.h>
|
||||
|
||||
/*
|
||||
* Magic: declare tla policy {{{1
|
||||
* Magic: declare nested policies
|
||||
* {{{2
|
||||
*/
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group)
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
[tag_name] = { .type = NLA_NESTED },
|
||||
|
||||
static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static struct nla_policy s_name ## _nl_policy[] __read_mostly = \
|
||||
{ s_fields };
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \
|
||||
__put, __is_signed) \
|
||||
[attr_nr] = { .type = nla_type },
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
[attr_nr] = { .type = nla_type, \
|
||||
.len = maxlen - (nla_type == NLA_NUL_STRING) },
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#ifndef pr_info
|
||||
#define pr_info(args...) fprintf(stderr, args);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef GENL_MAGIC_DEBUG
|
||||
static void dprint_field(const char *dir, int nla_type,
|
||||
const char *name, void *valp)
|
||||
{
|
||||
__u64 val = valp ? *(__u32 *)valp : 1;
|
||||
switch (nla_type) {
|
||||
case NLA_U8: val = (__u8)val;
|
||||
case NLA_U16: val = (__u16)val;
|
||||
case NLA_U32: val = (__u32)val;
|
||||
pr_info("%s attr %s: %d 0x%08x\n", dir,
|
||||
name, (int)val, (unsigned)val);
|
||||
break;
|
||||
case NLA_U64:
|
||||
val = *(__u64*)valp;
|
||||
pr_info("%s attr %s: %lld 0x%08llx\n", dir,
|
||||
name, (long long)val, (unsigned long long)val);
|
||||
break;
|
||||
case NLA_FLAG:
|
||||
if (val)
|
||||
pr_info("%s attr %s: set\n", dir, name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void dprint_array(const char *dir, int nla_type,
|
||||
const char *name, const char *val, unsigned len)
|
||||
{
|
||||
switch (nla_type) {
|
||||
case NLA_NUL_STRING:
|
||||
if (len && val[len-1] == '\0')
|
||||
len--;
|
||||
pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
|
||||
break;
|
||||
default:
|
||||
/* we can always show 4 byte,
|
||||
* thats what nlattr are aligned to. */
|
||||
pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
|
||||
dir, name, len, val[0], val[1], val[2], val[3]);
|
||||
}
|
||||
}
|
||||
|
||||
#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
|
||||
|
||||
/* Name is a member field name of the struct s.
|
||||
* If s is NULL (only parsing, no copy requested in *_from_attrs()),
|
||||
* nla is supposed to point to the attribute containing the information
|
||||
* corresponding to that struct member. */
|
||||
#define DPRINT_FIELD(dir, nla_type, name, s, nla) \
|
||||
do { \
|
||||
if (s) \
|
||||
dprint_field(dir, nla_type, #name, &s->name); \
|
||||
else if (nla) \
|
||||
dprint_field(dir, nla_type, #name, \
|
||||
(nla_type == NLA_FLAG) ? NULL \
|
||||
: nla_data(nla)); \
|
||||
} while (0)
|
||||
|
||||
#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \
|
||||
do { \
|
||||
if (s) \
|
||||
dprint_array(dir, nla_type, #name, \
|
||||
s->name, s->name ## _len); \
|
||||
else if (nla) \
|
||||
dprint_array(dir, nla_type, #name, \
|
||||
nla_data(nla), nla_len(nla)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define DPRINT_TLA(a, op, b) do {} while (0)
|
||||
#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
|
||||
#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Magic: provide conversion functions {{{1
|
||||
* populate struct from attribute table:
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
/* processing of generic netlink messages is serialized.
|
||||
* use one static buffer for parsing of nested attributes */
|
||||
static struct nlattr *nested_attr_tb[128];
|
||||
|
||||
#ifndef BUILD_BUG_ON
|
||||
/* Force a compilation error if condition is true */
|
||||
#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
|
||||
/* Force a compilation error if condition is true, but also produce a
|
||||
result (of value 0 and type size_t), so the expression can be used
|
||||
e.g. in a structure initializer (or where-ever else comma expressions
|
||||
aren't permitted). */
|
||||
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
|
||||
#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
|
||||
#endif
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
/* *_from_attrs functions are static, but potentially unused */ \
|
||||
static int __ ## s_name ## _from_attrs(struct s_name *s, \
|
||||
struct genl_info *info, bool exclude_invariants) \
|
||||
{ \
|
||||
const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \
|
||||
struct nlattr *tla = info->attrs[tag_number]; \
|
||||
struct nlattr **ntb = nested_attr_tb; \
|
||||
struct nlattr *nla; \
|
||||
int err; \
|
||||
BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \
|
||||
if (!tla) \
|
||||
return -ENOMSG; \
|
||||
DPRINT_TLA(#s_name, "<=-", #tag_name); \
|
||||
err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
|
||||
if (err) \
|
||||
return err; \
|
||||
\
|
||||
s_fields \
|
||||
return 0; \
|
||||
} __attribute__((unused)) \
|
||||
static int s_name ## _from_attrs(struct s_name *s, \
|
||||
struct genl_info *info) \
|
||||
{ \
|
||||
return __ ## s_name ## _from_attrs(s, info, false); \
|
||||
} __attribute__((unused)) \
|
||||
static int s_name ## _from_attrs_for_change(struct s_name *s, \
|
||||
struct genl_info *info) \
|
||||
{ \
|
||||
return __ ## s_name ## _from_attrs(s, info, true); \
|
||||
} __attribute__((unused)) \
|
||||
|
||||
#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \
|
||||
nla = ntb[attr_nr]; \
|
||||
if (nla) { \
|
||||
if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
|
||||
pr_info("<< must not change invariant attr: %s\n", #name); \
|
||||
return -EEXIST; \
|
||||
} \
|
||||
assignment; \
|
||||
} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
|
||||
/* attribute missing from payload, */ \
|
||||
/* which was expected */ \
|
||||
} else if ((attr_flag) & DRBD_F_REQUIRED) { \
|
||||
pr_info("<< missing attr: %s\n", #name); \
|
||||
return -ENOMSG; \
|
||||
}
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
__assign(attr_nr, attr_flag, name, nla_type, type, \
|
||||
if (s) \
|
||||
s->name = __get(nla); \
|
||||
DPRINT_FIELD("<<", nla_type, name, s, nla))
|
||||
|
||||
/* validate_nla() already checked nla_len <= maxlen appropriately. */
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
__assign(attr_nr, attr_flag, name, nla_type, type, \
|
||||
if (s) \
|
||||
s->name ## _len = \
|
||||
__get(s->name, nla, maxlen); \
|
||||
DPRINT_ARRAY("<<", nla_type, name, s, nla))
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
|
||||
|
||||
/*
|
||||
* Magic: define op number to op name mapping {{{1
|
||||
* {{{2
|
||||
*/
|
||||
const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list) \
|
||||
case op_num: return #op_name;
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/stringify.h>
|
||||
/*
|
||||
* Magic: define genl_ops {{{1
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list) \
|
||||
{ \
|
||||
handler \
|
||||
.cmd = op_name, \
|
||||
.policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \
|
||||
},
|
||||
|
||||
#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
|
||||
static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list)
|
||||
|
||||
/*
|
||||
* Define the genl_family, multicast groups, {{{1
|
||||
* and provide register/unregister functions.
|
||||
* {{{2
|
||||
*/
|
||||
#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
|
||||
static struct genl_family ZZZ_genl_family __read_mostly = {
|
||||
.id = GENL_ID_GENERATE,
|
||||
.name = __stringify(GENL_MAGIC_FAMILY),
|
||||
.version = GENL_MAGIC_VERSION,
|
||||
#ifdef GENL_MAGIC_FAMILY_HDRSZ
|
||||
.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
|
||||
#endif
|
||||
.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
|
||||
};
|
||||
|
||||
/*
|
||||
* Magic: define multicast groups
|
||||
* Magic: define multicast group registration helper
|
||||
*/
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group) \
|
||||
static struct genl_multicast_group \
|
||||
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \
|
||||
.name = #group, \
|
||||
}; \
|
||||
static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \
|
||||
struct sk_buff *skb, gfp_t flags) \
|
||||
{ \
|
||||
unsigned int group_id = \
|
||||
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \
|
||||
if (!group_id) \
|
||||
return -EINVAL; \
|
||||
return genlmsg_multicast(skb, 0, group_id, flags); \
|
||||
}
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
|
||||
{
|
||||
int err = genl_register_family_with_ops(&ZZZ_genl_family,
|
||||
ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
|
||||
if (err)
|
||||
return err;
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group) \
|
||||
err = genl_register_mc_group(&ZZZ_genl_family, \
|
||||
&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \
|
||||
if (err) \
|
||||
goto fail; \
|
||||
else \
|
||||
pr_info("%s: mcg %s: %u\n", #group, \
|
||||
__stringify(GENL_MAGIC_FAMILY), \
|
||||
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group)
|
||||
return 0;
|
||||
fail:
|
||||
genl_unregister_family(&ZZZ_genl_family);
|
||||
return err;
|
||||
}
|
||||
|
||||
void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
|
||||
{
|
||||
genl_unregister_family(&ZZZ_genl_family);
|
||||
}
|
||||
|
||||
/*
|
||||
* Magic: provide conversion functions {{{1
|
||||
* populate skb from struct.
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \
|
||||
const bool exclude_sensitive) \
|
||||
{ \
|
||||
struct nlattr *tla = nla_nest_start(skb, tag_number); \
|
||||
if (!tla) \
|
||||
goto nla_put_failure; \
|
||||
DPRINT_TLA(#s_name, "-=>", #tag_name); \
|
||||
s_fields \
|
||||
nla_nest_end(skb, tla); \
|
||||
return 0; \
|
||||
\
|
||||
nla_put_failure: \
|
||||
if (tla) \
|
||||
nla_nest_cancel(skb, tla); \
|
||||
return -EMSGSIZE; \
|
||||
} \
|
||||
static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \
|
||||
struct s_name *s) \
|
||||
{ \
|
||||
return s_name ## _to_skb(skb, s, 0); \
|
||||
} \
|
||||
static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \
|
||||
struct s_name *s) \
|
||||
{ \
|
||||
return s_name ## _to_skb(skb, s, 1); \
|
||||
}
|
||||
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
|
||||
DPRINT_FIELD(">>", nla_type, name, s, NULL); \
|
||||
if (__put(skb, attr_nr, s->name)) \
|
||||
goto nla_put_failure; \
|
||||
}
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
|
||||
DPRINT_ARRAY(">>",nla_type, name, s, NULL); \
|
||||
if (__put(skb, attr_nr, min_t(int, maxlen, \
|
||||
s->name ## _len + (nla_type == NLA_NUL_STRING)),\
|
||||
s->name)) \
|
||||
goto nla_put_failure; \
|
||||
}
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
|
||||
/* Functions for initializing structs to default values. */
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed)
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed)
|
||||
#undef __u32_field_def
|
||||
#define __u32_field_def(attr_nr, attr_flag, name, default) \
|
||||
x->name = default;
|
||||
#undef __s32_field_def
|
||||
#define __s32_field_def(attr_nr, attr_flag, name, default) \
|
||||
x->name = default;
|
||||
#undef __flg_field_def
|
||||
#define __flg_field_def(attr_nr, attr_flag, name, default) \
|
||||
x->name = default;
|
||||
#undef __str_field_def
|
||||
#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
|
||||
memset(x->name, 0, sizeof(x->name)); \
|
||||
x->name ## _len = 0;
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
|
||||
static void set_ ## s_name ## _defaults(struct s_name *x) { \
|
||||
s_fields \
|
||||
}
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
/* }}}1 */
|
||||
#endif /* GENL_MAGIC_FUNC_H */
|
||||
/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
|
|
@ -0,0 +1,277 @@
|
|||
#ifndef GENL_MAGIC_STRUCT_H
|
||||
#define GENL_MAGIC_STRUCT_H
|
||||
|
||||
#ifndef GENL_MAGIC_FAMILY
|
||||
# error "you need to define GENL_MAGIC_FAMILY before inclusion"
|
||||
#endif
|
||||
|
||||
#ifndef GENL_MAGIC_VERSION
|
||||
# error "you need to define GENL_MAGIC_VERSION before inclusion"
|
||||
#endif
|
||||
|
||||
#ifndef GENL_MAGIC_INCLUDE_FILE
|
||||
# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
|
||||
#endif
|
||||
|
||||
#include <linux/genetlink.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define CONCAT__(a,b) a ## b
|
||||
#define CONCAT_(a,b) CONCAT__(a,b)
|
||||
|
||||
extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
|
||||
extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
|
||||
|
||||
/*
|
||||
* Extension of genl attribute validation policies {{{2
|
||||
*/
|
||||
|
||||
/*
|
||||
* @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
|
||||
* know about. This flag can be set in nlattr->nla_type to indicate that this
|
||||
* attribute must not be ignored.
|
||||
*
|
||||
* We check and remove this flag in drbd_nla_check_mandatory() before
|
||||
* validating the attribute types and lengths via nla_parse_nested().
|
||||
*/
|
||||
#define DRBD_GENLA_F_MANDATORY (1 << 14)
|
||||
|
||||
/*
|
||||
* Flags specific to drbd and not visible at the netlink layer, used in
|
||||
* <struct>_from_attrs and <struct>_to_skb:
|
||||
*
|
||||
* @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
|
||||
* invalid.
|
||||
*
|
||||
* @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
|
||||
* included in unpriviledged get requests or broadcasts.
|
||||
*
|
||||
* @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
|
||||
* cannot subsequently be changed.
|
||||
*/
|
||||
#define DRBD_F_REQUIRED (1 << 0)
|
||||
#define DRBD_F_SENSITIVE (1 << 1)
|
||||
#define DRBD_F_INVARIANT (1 << 2)
|
||||
|
||||
#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
/* }}}1
|
||||
* MAGIC
|
||||
* multi-include macro expansion magic starts here
|
||||
*/
|
||||
|
||||
/* MAGIC helpers {{{2 */
|
||||
|
||||
/* possible field types */
|
||||
#define __flg_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U8, char, \
|
||||
nla_get_u8, nla_put_u8, false)
|
||||
#define __u8_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
|
||||
nla_get_u8, nla_put_u8, false)
|
||||
#define __u16_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
|
||||
nla_get_u16, nla_put_u16, false)
|
||||
#define __u32_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
|
||||
nla_get_u32, nla_put_u32, false)
|
||||
#define __s32_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
|
||||
nla_get_u32, nla_put_u32, true)
|
||||
#define __u64_field(attr_nr, attr_flag, name) \
|
||||
__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
|
||||
nla_get_u64, nla_put_u64, false)
|
||||
#define __str_field(attr_nr, attr_flag, name, maxlen) \
|
||||
__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
|
||||
nla_strlcpy, nla_put, false)
|
||||
#define __bin_field(attr_nr, attr_flag, name, maxlen) \
|
||||
__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
|
||||
nla_memcpy, nla_put, false)
|
||||
|
||||
/* fields with default values */
|
||||
#define __flg_field_def(attr_nr, attr_flag, name, default) \
|
||||
__flg_field(attr_nr, attr_flag, name)
|
||||
#define __u32_field_def(attr_nr, attr_flag, name, default) \
|
||||
__u32_field(attr_nr, attr_flag, name)
|
||||
#define __s32_field_def(attr_nr, attr_flag, name, default) \
|
||||
__s32_field(attr_nr, attr_flag, name)
|
||||
#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
|
||||
__str_field(attr_nr, attr_flag, name, maxlen)
|
||||
|
||||
#define GENL_op_init(args...) args
|
||||
#define GENL_doit(handler) \
|
||||
.doit = handler, \
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
#define GENL_dumpit(handler) \
|
||||
.dumpit = handler, \
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
|
||||
/* }}}1
|
||||
* Magic: define the enum symbols for genl_ops
|
||||
* Magic: define the enum symbols for top level attributes
|
||||
* Magic: define the enum symbols for nested attributes
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
|
||||
|
||||
#undef GENL_mc_group
|
||||
#define GENL_mc_group(group)
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
|
||||
op_name = op_num,
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, tla_list) \
|
||||
op_name = op_num,
|
||||
|
||||
enum {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, attr_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
tag_name = tag_number,
|
||||
|
||||
enum {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
};
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
enum { \
|
||||
s_fields \
|
||||
};
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, \
|
||||
__get, __put, __is_signed) \
|
||||
T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, \
|
||||
maxlen, __get, __put, __is_signed) \
|
||||
T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
/* }}}1
|
||||
* Magic: compile time assert unique numbers for operations
|
||||
* Magic: -"- unique numbers for top level attributes
|
||||
* Magic: -"- unique numbers for nested attributes
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, attr_list) \
|
||||
case op_name:
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
|
||||
case op_name:
|
||||
|
||||
static inline void ct_assert_unique_operations(void)
|
||||
{
|
||||
switch (0) {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#undef GENL_op
|
||||
#define GENL_op(op_name, op_num, handler, attr_list)
|
||||
|
||||
#undef GENL_notification
|
||||
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
case tag_number:
|
||||
|
||||
static inline void ct_assert_unique_top_level_attributes(void)
|
||||
{
|
||||
switch (0) {
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
static inline void ct_assert_unique_ ## s_name ## _attributes(void) \
|
||||
{ \
|
||||
switch (0) { \
|
||||
s_fields \
|
||||
; \
|
||||
} \
|
||||
}
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
case attr_nr:
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
case attr_nr:
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
/* }}}1
|
||||
* Magic: declare structs
|
||||
* struct <name> {
|
||||
* fields
|
||||
* };
|
||||
* {{{2
|
||||
*/
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
struct s_name { s_fields };
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
__is_signed) \
|
||||
type name;
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, __is_signed) \
|
||||
type name[maxlen]; \
|
||||
__u32 name ## _len;
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
#undef GENL_struct
|
||||
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
|
||||
enum { \
|
||||
s_fields \
|
||||
};
|
||||
|
||||
#undef __field
|
||||
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
|
||||
is_signed) \
|
||||
F_ ## name ## _IS_SIGNED = is_signed,
|
||||
|
||||
#undef __array
|
||||
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
|
||||
__get, __put, is_signed) \
|
||||
F_ ## name ## _IS_SIGNED = is_signed,
|
||||
|
||||
#include GENL_MAGIC_INCLUDE_FILE
|
||||
|
||||
/* }}}1 */
|
||||
#endif /* GENL_MAGIC_STRUCT_H */
|
||||
/* vim: set foldmethod=marker nofoldenable : */
|
|
@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
|
|||
|
||||
void __init idr_init_cache(void);
|
||||
|
||||
/**
|
||||
* idr_for_each_entry - iterate over an idr's elements of a given type
|
||||
* @idp: idr handle
|
||||
* @entry: the type * to use as cursor
|
||||
* @id: id entry's key
|
||||
*/
|
||||
#define idr_for_each_entry(idp, entry, id) \
|
||||
for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
|
||||
entry != NULL; \
|
||||
++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
|
||||
|
||||
#endif /* __IDR_H__ */
|
||||
|
|
|
@ -53,10 +53,13 @@ struct loop_device {
|
|||
|
||||
spinlock_t lo_lock;
|
||||
struct bio_list lo_bio_list;
|
||||
unsigned int lo_bio_count;
|
||||
int lo_state;
|
||||
struct mutex lo_ctl_mutex;
|
||||
struct task_struct *lo_thread;
|
||||
wait_queue_head_t lo_event;
|
||||
/* wait queue for incoming requests */
|
||||
wait_queue_head_t lo_req_wait;
|
||||
|
||||
struct request_queue *lo_queue;
|
||||
struct gendisk *lo_disk;
|
||||
|
|
|
@ -166,9 +166,11 @@ struct lc_element {
|
|||
/* if we want to track a larger set of objects,
|
||||
* it needs to become arch independend u64 */
|
||||
unsigned lc_number;
|
||||
|
||||
/* special label when on free list */
|
||||
#define LC_FREE (~0U)
|
||||
|
||||
/* for pending changes */
|
||||
unsigned lc_new_number;
|
||||
};
|
||||
|
||||
struct lru_cache {
|
||||
|
@ -176,6 +178,7 @@ struct lru_cache {
|
|||
struct list_head lru;
|
||||
struct list_head free;
|
||||
struct list_head in_use;
|
||||
struct list_head to_be_changed;
|
||||
|
||||
/* the pre-created kmem cache to allocate the objects from */
|
||||
struct kmem_cache *lc_cache;
|
||||
|
@ -186,7 +189,7 @@ struct lru_cache {
|
|||
size_t element_off;
|
||||
|
||||
/* number of elements (indices) */
|
||||
unsigned int nr_elements;
|
||||
unsigned int nr_elements;
|
||||
/* Arbitrary limit on maximum tracked objects. Practical limit is much
|
||||
* lower due to allocation failures, probably. For typical use cases,
|
||||
* nr_elements should be a few thousand at most.
|
||||
|
@ -194,18 +197,19 @@ struct lru_cache {
|
|||
* 8 high bits of .lc_index to be overloaded with flags in the future. */
|
||||
#define LC_MAX_ACTIVE (1<<24)
|
||||
|
||||
/* allow to accumulate a few (index:label) changes,
|
||||
* but no more than max_pending_changes */
|
||||
unsigned int max_pending_changes;
|
||||
/* number of elements currently on to_be_changed list */
|
||||
unsigned int pending_changes;
|
||||
|
||||
/* statistics */
|
||||
unsigned used; /* number of lelements currently on in_use list */
|
||||
unsigned long hits, misses, starving, dirty, changed;
|
||||
unsigned used; /* number of elements currently on in_use list */
|
||||
unsigned long hits, misses, starving, locked, changed;
|
||||
|
||||
/* see below: flag-bits for lru_cache */
|
||||
unsigned long flags;
|
||||
|
||||
/* when changing the label of an index element */
|
||||
unsigned int new_number;
|
||||
|
||||
/* for paranoia when changing the label of an index element */
|
||||
struct lc_element *changing_element;
|
||||
|
||||
void *lc_private;
|
||||
const char *name;
|
||||
|
@ -221,10 +225,15 @@ enum {
|
|||
/* debugging aid, to catch concurrent access early.
|
||||
* user needs to guarantee exclusive access by proper locking! */
|
||||
__LC_PARANOIA,
|
||||
/* if we need to change the set, but currently there is a changing
|
||||
* transaction pending, we are "dirty", and must deferr further
|
||||
* changing requests */
|
||||
|
||||
/* annotate that the set is "dirty", possibly accumulating further
|
||||
* changes, until a transaction is finally triggered */
|
||||
__LC_DIRTY,
|
||||
|
||||
/* Locked, no further changes allowed.
|
||||
* Also used to serialize changing transactions. */
|
||||
__LC_LOCKED,
|
||||
|
||||
/* if we need to change the set, but currently there is no free nor
|
||||
* unused element available, we are "starving", and must not give out
|
||||
* further references, to guarantee that eventually some refcnt will
|
||||
|
@ -236,9 +245,11 @@ enum {
|
|||
};
|
||||
#define LC_PARANOIA (1<<__LC_PARANOIA)
|
||||
#define LC_DIRTY (1<<__LC_DIRTY)
|
||||
#define LC_LOCKED (1<<__LC_LOCKED)
|
||||
#define LC_STARVING (1<<__LC_STARVING)
|
||||
|
||||
extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
||||
unsigned max_pending_changes,
|
||||
unsigned e_count, size_t e_size, size_t e_off);
|
||||
extern void lc_reset(struct lru_cache *lc);
|
||||
extern void lc_destroy(struct lru_cache *lc);
|
||||
|
@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
|
|||
extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
|
||||
extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
|
||||
extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
|
||||
extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
|
||||
extern void lc_committed(struct lru_cache *lc);
|
||||
|
||||
struct seq_file;
|
||||
extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
|
||||
|
@ -258,16 +269,28 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
|
|||
void (*detail) (struct seq_file *, struct lc_element *));
|
||||
|
||||
/**
|
||||
* lc_try_lock - can be used to stop lc_get() from changing the tracked set
|
||||
* lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
|
||||
* @lc: the lru cache to operate on
|
||||
*
|
||||
* Allows (expects) the set to be "dirty". Note that the reference counts and
|
||||
* order on the active and lru lists may still change. Used to serialize
|
||||
* changing transactions. Returns true if we aquired the lock.
|
||||
*/
|
||||
static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
|
||||
{
|
||||
return !test_and_set_bit(__LC_LOCKED, &lc->flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_try_lock - variant to stop lc_get() from changing the tracked set
|
||||
* @lc: the lru cache to operate on
|
||||
*
|
||||
* Note that the reference counts and order on the active and lru lists may
|
||||
* still change. Returns true if we acquired the lock.
|
||||
* still change. Only works on a "clean" set. Returns true if we aquired the
|
||||
* lock, which means there are no pending changes, and any further attempt to
|
||||
* change the set will not succeed until the next lc_unlock().
|
||||
*/
|
||||
static inline int lc_try_lock(struct lru_cache *lc)
|
||||
{
|
||||
return !test_and_set_bit(__LC_DIRTY, &lc->flags);
|
||||
}
|
||||
extern int lc_try_lock(struct lru_cache *lc);
|
||||
|
||||
/**
|
||||
* lc_unlock - unlock @lc, allow lc_get() to change the set again
|
||||
|
@ -276,14 +299,10 @@ static inline int lc_try_lock(struct lru_cache *lc)
|
|||
static inline void lc_unlock(struct lru_cache *lc)
|
||||
{
|
||||
clear_bit(__LC_DIRTY, &lc->flags);
|
||||
smp_mb__after_clear_bit();
|
||||
clear_bit_unlock(__LC_LOCKED, &lc->flags);
|
||||
}
|
||||
|
||||
static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct lc_element *e = lc_find(lc, enr);
|
||||
return e && e->refcnt;
|
||||
}
|
||||
extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
|
||||
|
||||
#define lc_entry(ptr, type, member) \
|
||||
container_of(ptr, type, member)
|
||||
|
|
|
@ -550,6 +550,170 @@ do { \
|
|||
__ret; \
|
||||
})
|
||||
|
||||
|
||||
#define __wait_event_lock_irq(wq, condition, lock, cmd) \
|
||||
do { \
|
||||
DEFINE_WAIT(__wait); \
|
||||
\
|
||||
for (;;) { \
|
||||
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
|
||||
if (condition) \
|
||||
break; \
|
||||
spin_unlock_irq(&lock); \
|
||||
cmd; \
|
||||
schedule(); \
|
||||
spin_lock_irq(&lock); \
|
||||
} \
|
||||
finish_wait(&wq, &__wait); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* wait_event_lock_irq_cmd - sleep until a condition gets true. The
|
||||
* condition is checked under the lock. This
|
||||
* is expected to be called with the lock
|
||||
* taken.
|
||||
* @wq: the waitqueue to wait on
|
||||
* @condition: a C expression for the event to wait for
|
||||
* @lock: a locked spinlock_t, which will be released before cmd
|
||||
* and schedule() and reacquired afterwards.
|
||||
* @cmd: a command which is invoked outside the critical section before
|
||||
* sleep
|
||||
*
|
||||
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
||||
* @condition evaluates to true. The @condition is checked each time
|
||||
* the waitqueue @wq is woken up.
|
||||
*
|
||||
* wake_up() has to be called after changing any variable that could
|
||||
* change the result of the wait condition.
|
||||
*
|
||||
* This is supposed to be called while holding the lock. The lock is
|
||||
* dropped before invoking the cmd and going to sleep and is reacquired
|
||||
* afterwards.
|
||||
*/
|
||||
#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \
|
||||
do { \
|
||||
if (condition) \
|
||||
break; \
|
||||
__wait_event_lock_irq(wq, condition, lock, cmd); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* wait_event_lock_irq - sleep until a condition gets true. The
|
||||
* condition is checked under the lock. This
|
||||
* is expected to be called with the lock
|
||||
* taken.
|
||||
* @wq: the waitqueue to wait on
|
||||
* @condition: a C expression for the event to wait for
|
||||
* @lock: a locked spinlock_t, which will be released before schedule()
|
||||
* and reacquired afterwards.
|
||||
*
|
||||
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
||||
* @condition evaluates to true. The @condition is checked each time
|
||||
* the waitqueue @wq is woken up.
|
||||
*
|
||||
* wake_up() has to be called after changing any variable that could
|
||||
* change the result of the wait condition.
|
||||
*
|
||||
* This is supposed to be called while holding the lock. The lock is
|
||||
* dropped before going to sleep and is reacquired afterwards.
|
||||
*/
|
||||
#define wait_event_lock_irq(wq, condition, lock) \
|
||||
do { \
|
||||
if (condition) \
|
||||
break; \
|
||||
__wait_event_lock_irq(wq, condition, lock, ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define __wait_event_interruptible_lock_irq(wq, condition, \
|
||||
lock, ret, cmd) \
|
||||
do { \
|
||||
DEFINE_WAIT(__wait); \
|
||||
\
|
||||
for (;;) { \
|
||||
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
|
||||
if (condition) \
|
||||
break; \
|
||||
if (signal_pending(current)) { \
|
||||
ret = -ERESTARTSYS; \
|
||||
break; \
|
||||
} \
|
||||
spin_unlock_irq(&lock); \
|
||||
cmd; \
|
||||
schedule(); \
|
||||
spin_lock_irq(&lock); \
|
||||
} \
|
||||
finish_wait(&wq, &__wait); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
|
||||
* The condition is checked under the lock. This is expected to
|
||||
* be called with the lock taken.
|
||||
* @wq: the waitqueue to wait on
|
||||
* @condition: a C expression for the event to wait for
|
||||
* @lock: a locked spinlock_t, which will be released before cmd and
|
||||
* schedule() and reacquired afterwards.
|
||||
* @cmd: a command which is invoked outside the critical section before
|
||||
* sleep
|
||||
*
|
||||
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
||||
* @condition evaluates to true or a signal is received. The @condition is
|
||||
* checked each time the waitqueue @wq is woken up.
|
||||
*
|
||||
* wake_up() has to be called after changing any variable that could
|
||||
* change the result of the wait condition.
|
||||
*
|
||||
* This is supposed to be called while holding the lock. The lock is
|
||||
* dropped before invoking the cmd and going to sleep and is reacquired
|
||||
* afterwards.
|
||||
*
|
||||
* The macro will return -ERESTARTSYS if it was interrupted by a signal
|
||||
* and 0 if @condition evaluated to true.
|
||||
*/
|
||||
#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
\
|
||||
if (!(condition)) \
|
||||
__wait_event_interruptible_lock_irq(wq, condition, \
|
||||
lock, __ret, cmd); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
/**
|
||||
* wait_event_interruptible_lock_irq - sleep until a condition gets true.
|
||||
* The condition is checked under the lock. This is expected
|
||||
* to be called with the lock taken.
|
||||
* @wq: the waitqueue to wait on
|
||||
* @condition: a C expression for the event to wait for
|
||||
* @lock: a locked spinlock_t, which will be released before schedule()
|
||||
* and reacquired afterwards.
|
||||
*
|
||||
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
||||
* @condition evaluates to true or signal is received. The @condition is
|
||||
* checked each time the waitqueue @wq is woken up.
|
||||
*
|
||||
* wake_up() has to be called after changing any variable that could
|
||||
* change the result of the wait condition.
|
||||
*
|
||||
* This is supposed to be called while holding the lock. The lock is
|
||||
* dropped before going to sleep and is reacquired afterwards.
|
||||
*
|
||||
* The macro will return -ERESTARTSYS if it was interrupted by a signal
|
||||
* and 0 if @condition evaluated to true.
|
||||
*/
|
||||
#define wait_event_interruptible_lock_irq(wq, condition, lock) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
\
|
||||
if (!(condition)) \
|
||||
__wait_event_interruptible_lock_irq(wq, condition, \
|
||||
lock, __ret, ); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
|
||||
/*
|
||||
* These are the old interfaces to sleep waiting for an event.
|
||||
* They are racy. DO NOT use them, use the wait_event* interfaces above.
|
||||
|
|
|
@ -69,23 +69,28 @@ __setup("ro", readonly);
|
|||
__setup("rw", readwrite);
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
struct uuidcmp {
|
||||
const char *uuid;
|
||||
int len;
|
||||
};
|
||||
|
||||
/**
|
||||
* match_dev_by_uuid - callback for finding a partition using its uuid
|
||||
* @dev: device passed in by the caller
|
||||
* @data: opaque pointer to a 36 byte char array with a UUID
|
||||
* @data: opaque pointer to the desired struct uuidcmp to match
|
||||
*
|
||||
* Returns 1 if the device matches, and 0 otherwise.
|
||||
*/
|
||||
static int match_dev_by_uuid(struct device *dev, void *data)
|
||||
{
|
||||
u8 *uuid = data;
|
||||
struct uuidcmp *cmp = data;
|
||||
struct hd_struct *part = dev_to_part(dev);
|
||||
|
||||
if (!part->info)
|
||||
goto no_match;
|
||||
|
||||
if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
|
||||
goto no_match;
|
||||
if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len))
|
||||
goto no_match;
|
||||
|
||||
return 1;
|
||||
no_match:
|
||||
|
@ -95,7 +100,7 @@ no_match:
|
|||
|
||||
/**
|
||||
* devt_from_partuuid - looks up the dev_t of a partition by its UUID
|
||||
* @uuid: min 36 byte char array containing a hex ascii UUID
|
||||
* @uuid: char array containing ascii UUID
|
||||
*
|
||||
* The function will return the first partition which contains a matching
|
||||
* UUID value in its partition_meta_info struct. This does not search
|
||||
|
@ -106,38 +111,41 @@ no_match:
|
|||
*
|
||||
* Returns the matching dev_t on success or 0 on failure.
|
||||
*/
|
||||
static dev_t devt_from_partuuid(char *uuid_str)
|
||||
static dev_t devt_from_partuuid(const char *uuid_str)
|
||||
{
|
||||
dev_t res = 0;
|
||||
struct uuidcmp cmp;
|
||||
struct device *dev = NULL;
|
||||
u8 uuid[16];
|
||||
struct gendisk *disk;
|
||||
struct hd_struct *part;
|
||||
int offset = 0;
|
||||
bool clear_root_wait = false;
|
||||
char *slash;
|
||||
|
||||
if (strlen(uuid_str) < 36)
|
||||
goto done;
|
||||
cmp.uuid = uuid_str;
|
||||
|
||||
slash = strchr(uuid_str, '/');
|
||||
/* Check for optional partition number offset attributes. */
|
||||
if (uuid_str[36]) {
|
||||
if (slash) {
|
||||
char c = 0;
|
||||
/* Explicitly fail on poor PARTUUID syntax. */
|
||||
if (sscanf(&uuid_str[36],
|
||||
"/PARTNROFF=%d%c", &offset, &c) != 1) {
|
||||
printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
|
||||
"Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
|
||||
if (root_wait)
|
||||
printk(KERN_ERR
|
||||
"Disabling rootwait; root= is invalid.\n");
|
||||
root_wait = 0;
|
||||
if (sscanf(slash + 1,
|
||||
"PARTNROFF=%d%c", &offset, &c) != 1) {
|
||||
clear_root_wait = true;
|
||||
goto done;
|
||||
}
|
||||
cmp.len = slash - uuid_str;
|
||||
} else {
|
||||
cmp.len = strlen(uuid_str);
|
||||
}
|
||||
|
||||
/* Pack the requested UUID in the expected format. */
|
||||
part_pack_uuid(uuid_str, uuid);
|
||||
if (!cmp.len) {
|
||||
clear_root_wait = true;
|
||||
goto done;
|
||||
}
|
||||
|
||||
dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
|
||||
dev = class_find_device(&block_class, NULL, &cmp,
|
||||
&match_dev_by_uuid);
|
||||
if (!dev)
|
||||
goto done;
|
||||
|
||||
|
@ -158,6 +166,13 @@ static dev_t devt_from_partuuid(char *uuid_str)
|
|||
no_offset:
|
||||
put_device(dev);
|
||||
done:
|
||||
if (clear_root_wait) {
|
||||
pr_err("VFS: PARTUUID= is invalid.\n"
|
||||
"Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
|
||||
if (root_wait)
|
||||
pr_err("Disabling rootwait; root= is invalid.\n");
|
||||
root_wait = 0;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
@ -174,6 +189,10 @@ done:
|
|||
* used when disk name of partitioned disk ends on a digit.
|
||||
* 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
|
||||
* unique id of a partition if the partition table provides it.
|
||||
* The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
|
||||
* partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
|
||||
* filled hex representation of the 32-bit "NT disk signature", and PP
|
||||
* is a zero-filled hex representation of the 1-based partition number.
|
||||
* 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
|
||||
* a partition with a known unique id.
|
||||
*
|
||||
|
|
371
lib/lru_cache.c
371
lib/lru_cache.c
|
@ -44,8 +44,8 @@ MODULE_LICENSE("GPL");
|
|||
} while (0)
|
||||
|
||||
#define RETURN(x...) do { \
|
||||
clear_bit(__LC_PARANOIA, &lc->flags); \
|
||||
smp_mb__after_clear_bit(); return x ; } while (0)
|
||||
clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
|
||||
return x ; } while (0)
|
||||
|
||||
/* BUG() if e is not one of the elements tracked by lc */
|
||||
#define PARANOIA_LC_ELEMENT(lc, e) do { \
|
||||
|
@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
|
|||
BUG_ON(i >= lc_->nr_elements); \
|
||||
BUG_ON(lc_->lc_element[i] != e_); } while (0)
|
||||
|
||||
|
||||
/* We need to atomically
|
||||
* - try to grab the lock (set LC_LOCKED)
|
||||
* - only if there is no pending transaction
|
||||
* (neither LC_DIRTY nor LC_STARVING is set)
|
||||
* Because of PARANOIA_ENTRY() above abusing lc->flags as well,
|
||||
* it is not sufficient to just say
|
||||
* return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
|
||||
*/
|
||||
int lc_try_lock(struct lru_cache *lc)
|
||||
{
|
||||
unsigned long val;
|
||||
do {
|
||||
val = cmpxchg(&lc->flags, 0, LC_LOCKED);
|
||||
} while (unlikely (val == LC_PARANOIA));
|
||||
/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
|
||||
return 0 == val;
|
||||
#if 0
|
||||
/* Alternative approach, spin in case someone enters or leaves a
|
||||
* PARANOIA_ENTRY()/RETURN() section. */
|
||||
unsigned long old, new, val;
|
||||
do {
|
||||
old = lc->flags & LC_PARANOIA;
|
||||
new = old | LC_LOCKED;
|
||||
val = cmpxchg(&lc->flags, old, new);
|
||||
} while (unlikely (val == (old ^ LC_PARANOIA)));
|
||||
return old == val;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_create - prepares to track objects in an active set
|
||||
* @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
|
||||
* @max_pending_changes: maximum changes to accumulate until a transaction is required
|
||||
* @e_count: number of elements allowed to be active simultaneously
|
||||
* @e_size: size of the tracked objects
|
||||
* @e_off: offset to the &struct lc_element member in a tracked object
|
||||
|
@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
|
|||
* or NULL on (allocation) failure.
|
||||
*/
|
||||
struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
||||
unsigned max_pending_changes,
|
||||
unsigned e_count, size_t e_size, size_t e_off)
|
||||
{
|
||||
struct hlist_head *slot = NULL;
|
||||
|
@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
|||
INIT_LIST_HEAD(&lc->in_use);
|
||||
INIT_LIST_HEAD(&lc->lru);
|
||||
INIT_LIST_HEAD(&lc->free);
|
||||
INIT_LIST_HEAD(&lc->to_be_changed);
|
||||
|
||||
lc->name = name;
|
||||
lc->element_size = e_size;
|
||||
lc->element_off = e_off;
|
||||
lc->nr_elements = e_count;
|
||||
lc->new_number = LC_FREE;
|
||||
lc->max_pending_changes = max_pending_changes;
|
||||
lc->lc_cache = cache;
|
||||
lc->lc_element = element;
|
||||
lc->lc_slot = slot;
|
||||
|
@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
|
|||
e = p + e_off;
|
||||
e->lc_index = i;
|
||||
e->lc_number = LC_FREE;
|
||||
e->lc_new_number = LC_FREE;
|
||||
list_add(&e->list, &lc->free);
|
||||
element[i] = e;
|
||||
}
|
||||
|
@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
|
|||
INIT_LIST_HEAD(&lc->in_use);
|
||||
INIT_LIST_HEAD(&lc->lru);
|
||||
INIT_LIST_HEAD(&lc->free);
|
||||
INIT_LIST_HEAD(&lc->to_be_changed);
|
||||
lc->used = 0;
|
||||
lc->hits = 0;
|
||||
lc->misses = 0;
|
||||
lc->starving = 0;
|
||||
lc->dirty = 0;
|
||||
lc->locked = 0;
|
||||
lc->changed = 0;
|
||||
lc->pending_changes = 0;
|
||||
lc->flags = 0;
|
||||
lc->changing_element = NULL;
|
||||
lc->new_number = LC_FREE;
|
||||
memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
|
||||
|
||||
for (i = 0; i < lc->nr_elements; i++) {
|
||||
|
@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
|
|||
/* re-init it */
|
||||
e->lc_index = i;
|
||||
e->lc_number = LC_FREE;
|
||||
e->lc_new_number = LC_FREE;
|
||||
list_add(&e->list, &lc->free);
|
||||
}
|
||||
}
|
||||
|
@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
|
|||
/* NOTE:
|
||||
* total calls to lc_get are
|
||||
* (starving + hits + misses)
|
||||
* misses include "dirty" count (update from an other thread in
|
||||
* misses include "locked" count (update from an other thread in
|
||||
* progress) and "changed", when this in fact lead to an successful
|
||||
* update of the cache.
|
||||
*/
|
||||
return seq_printf(seq, "\t%s: used:%u/%u "
|
||||
"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
|
||||
"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
|
||||
lc->name, lc->used, lc->nr_elements,
|
||||
lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
|
||||
lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
|
||||
}
|
||||
|
||||
static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
|
||||
|
@ -224,6 +259,27 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
|
|||
}
|
||||
|
||||
|
||||
static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
|
||||
bool include_changing)
|
||||
{
|
||||
struct hlist_node *n;
|
||||
struct lc_element *e;
|
||||
|
||||
BUG_ON(!lc);
|
||||
BUG_ON(!lc->nr_elements);
|
||||
hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
|
||||
/* "about to be changed" elements, pending transaction commit,
|
||||
* are hashed by their "new number". "Normal" elements have
|
||||
* lc_number == lc_new_number. */
|
||||
if (e->lc_new_number != enr)
|
||||
continue;
|
||||
if (e->lc_new_number == e->lc_number || include_changing)
|
||||
return e;
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_find - find element by label, if present in the hash table
|
||||
* @lc: The lru_cache object
|
||||
|
@ -232,38 +288,28 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
|
|||
* Returns the pointer to an element, if the element with the requested
|
||||
* "label" or element number is present in the hash table,
|
||||
* or NULL if not found. Does not change the refcnt.
|
||||
* Ignores elements that are "about to be used", i.e. not yet in the active
|
||||
* set, but still pending transaction commit.
|
||||
*/
|
||||
struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct hlist_node *n;
|
||||
struct lc_element *e;
|
||||
|
||||
BUG_ON(!lc);
|
||||
BUG_ON(!lc->nr_elements);
|
||||
hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
|
||||
if (e->lc_number == enr)
|
||||
return e;
|
||||
}
|
||||
return NULL;
|
||||
return __lc_find(lc, enr, 0);
|
||||
}
|
||||
|
||||
/* returned element will be "recycled" immediately */
|
||||
static struct lc_element *lc_evict(struct lru_cache *lc)
|
||||
/**
|
||||
* lc_is_used - find element by label
|
||||
* @lc: The lru_cache object
|
||||
* @enr: element number
|
||||
*
|
||||
* Returns true, if the element with the requested "label" or element number is
|
||||
* present in the hash table, and is used (refcnt > 0).
|
||||
* Also finds elements that are not _currently_ used but only "about to be
|
||||
* used", i.e. on the "to_be_changed" list, pending transaction commit.
|
||||
*/
|
||||
bool lc_is_used(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct list_head *n;
|
||||
struct lc_element *e;
|
||||
|
||||
if (list_empty(&lc->lru))
|
||||
return NULL;
|
||||
|
||||
n = lc->lru.prev;
|
||||
e = list_entry(n, struct lc_element, list);
|
||||
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
|
||||
list_del(&e->list);
|
||||
hlist_del(&e->colision);
|
||||
return e;
|
||||
struct lc_element *e = __lc_find(lc, enr, 1);
|
||||
return e && e->refcnt;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
|
|||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
BUG_ON(e->refcnt);
|
||||
|
||||
e->lc_number = LC_FREE;
|
||||
e->lc_number = e->lc_new_number = LC_FREE;
|
||||
hlist_del_init(&e->colision);
|
||||
list_move(&e->list, &lc->free);
|
||||
RETURN();
|
||||
}
|
||||
|
||||
static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
|
||||
static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
|
||||
{
|
||||
struct list_head *n;
|
||||
struct lc_element *e;
|
||||
|
||||
if (list_empty(&lc->free))
|
||||
return lc_evict(lc);
|
||||
if (!list_empty(&lc->free))
|
||||
n = lc->free.next;
|
||||
else if (!list_empty(&lc->lru))
|
||||
n = lc->lru.prev;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
n = lc->free.next;
|
||||
list_del(n);
|
||||
return list_entry(n, struct lc_element, list);
|
||||
e = list_entry(n, struct lc_element, list);
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
|
||||
e->lc_new_number = new_number;
|
||||
if (!hlist_unhashed(&e->colision))
|
||||
__hlist_del(&e->colision);
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
|
||||
list_move(&e->list, &lc->to_be_changed);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
static int lc_unused_element_available(struct lru_cache *lc)
|
||||
|
@ -308,6 +366,75 @@ static int lc_unused_element_available(struct lru_cache *lc)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change)
|
||||
{
|
||||
struct lc_element *e;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
if (lc->flags & LC_STARVING) {
|
||||
++lc->starving;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = __lc_find(lc, enr, 1);
|
||||
/* if lc_new_number != lc_number,
|
||||
* this enr is currently being pulled in already,
|
||||
* and will be available once the pending transaction
|
||||
* has been committed. */
|
||||
if (e && e->lc_new_number == e->lc_number) {
|
||||
++lc->hits;
|
||||
if (e->refcnt++ == 0)
|
||||
lc->used++;
|
||||
list_move(&e->list, &lc->in_use); /* Not evictable... */
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
++lc->misses;
|
||||
if (!may_change)
|
||||
RETURN(NULL);
|
||||
|
||||
/* It has been found above, but on the "to_be_changed" list, not yet
|
||||
* committed. Don't pull it in twice, wait for the transaction, then
|
||||
* try again */
|
||||
if (e)
|
||||
RETURN(NULL);
|
||||
|
||||
/* To avoid races with lc_try_lock(), first, mark us dirty
|
||||
* (using test_and_set_bit, as it implies memory barriers), ... */
|
||||
test_and_set_bit(__LC_DIRTY, &lc->flags);
|
||||
|
||||
/* ... only then check if it is locked anyways. If lc_unlock clears
|
||||
* the dirty bit again, that's not a problem, we will come here again.
|
||||
*/
|
||||
if (test_bit(__LC_LOCKED, &lc->flags)) {
|
||||
++lc->locked;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
/* In case there is nothing available and we can not kick out
|
||||
* the LRU element, we have to wait ...
|
||||
*/
|
||||
if (!lc_unused_element_available(lc)) {
|
||||
__set_bit(__LC_STARVING, &lc->flags);
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
/* It was not present in the active set. We are going to recycle an
|
||||
* unused (or even "free") element, but we won't accumulate more than
|
||||
* max_pending_changes changes. */
|
||||
if (lc->pending_changes >= lc->max_pending_changes)
|
||||
RETURN(NULL);
|
||||
|
||||
e = lc_prepare_for_change(lc, enr);
|
||||
BUG_ON(!e);
|
||||
|
||||
clear_bit(__LC_STARVING, &lc->flags);
|
||||
BUG_ON(++e->refcnt != 1);
|
||||
lc->used++;
|
||||
lc->pending_changes++;
|
||||
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_get - get element by label, maybe change the active set
|
||||
|
@ -336,110 +463,65 @@ static int lc_unused_element_available(struct lru_cache *lc)
|
|||
* pointer to an UNUSED element with some different element number,
|
||||
* where that different number may also be %LC_FREE.
|
||||
*
|
||||
* In this case, the cache is marked %LC_DIRTY (blocking further changes),
|
||||
* and the returned element pointer is removed from the lru list and
|
||||
* hash collision chains. The user now should do whatever housekeeping
|
||||
* is necessary.
|
||||
* Then he must call lc_changed(lc,element_pointer), to finish
|
||||
* the change.
|
||||
* In this case, the cache is marked %LC_DIRTY,
|
||||
* so lc_try_lock() will no longer succeed.
|
||||
* The returned element pointer is moved to the "to_be_changed" list,
|
||||
* and registered with the new element number on the hash collision chains,
|
||||
* so it is possible to pick it up from lc_is_used().
|
||||
* Up to "max_pending_changes" (see lc_create()) can be accumulated.
|
||||
* The user now should do whatever housekeeping is necessary,
|
||||
* typically serialize on lc_try_lock_for_transaction(), then call
|
||||
* lc_committed(lc) and lc_unlock(), to finish the change.
|
||||
*
|
||||
* NOTE: The user needs to check the lc_number on EACH use, so he recognizes
|
||||
* any cache set change.
|
||||
*/
|
||||
struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct lc_element *e;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
if (lc->flags & LC_STARVING) {
|
||||
++lc->starving;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = lc_find(lc, enr);
|
||||
if (e) {
|
||||
++lc->hits;
|
||||
if (e->refcnt++ == 0)
|
||||
lc->used++;
|
||||
list_move(&e->list, &lc->in_use); /* Not evictable... */
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
++lc->misses;
|
||||
|
||||
/* In case there is nothing available and we can not kick out
|
||||
* the LRU element, we have to wait ...
|
||||
*/
|
||||
if (!lc_unused_element_available(lc)) {
|
||||
__set_bit(__LC_STARVING, &lc->flags);
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
/* it was not present in the active set.
|
||||
* we are going to recycle an unused (or even "free") element.
|
||||
* user may need to commit a transaction to record that change.
|
||||
* we serialize on flags & TF_DIRTY */
|
||||
if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
|
||||
++lc->dirty;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = lc_get_unused_element(lc);
|
||||
BUG_ON(!e);
|
||||
|
||||
clear_bit(__LC_STARVING, &lc->flags);
|
||||
BUG_ON(++e->refcnt != 1);
|
||||
lc->used++;
|
||||
|
||||
lc->changing_element = e;
|
||||
lc->new_number = enr;
|
||||
|
||||
RETURN(e);
|
||||
}
|
||||
|
||||
/* similar to lc_get,
|
||||
* but only gets a new reference on an existing element.
|
||||
* you either get the requested element, or NULL.
|
||||
* will be consolidated into one function.
|
||||
*/
|
||||
struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
struct lc_element *e;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
if (lc->flags & LC_STARVING) {
|
||||
++lc->starving;
|
||||
RETURN(NULL);
|
||||
}
|
||||
|
||||
e = lc_find(lc, enr);
|
||||
if (e) {
|
||||
++lc->hits;
|
||||
if (e->refcnt++ == 0)
|
||||
lc->used++;
|
||||
list_move(&e->list, &lc->in_use); /* Not evictable... */
|
||||
}
|
||||
RETURN(e);
|
||||
return __lc_get(lc, enr, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_changed - tell @lc that the change has been recorded
|
||||
* lc_try_get - get element by label, if present; do not change the active set
|
||||
* @lc: the lru cache to operate on
|
||||
* @e: the element pending label change
|
||||
* @enr: the label to look up
|
||||
*
|
||||
* Finds an element in the cache, increases its usage count,
|
||||
* "touches" and returns it.
|
||||
*
|
||||
* Return values:
|
||||
* NULL
|
||||
* The cache was marked %LC_STARVING,
|
||||
* or the requested label was not in the active set
|
||||
*
|
||||
* pointer to the element with the REQUESTED element number.
|
||||
* In this case, it can be used right away
|
||||
*/
|
||||
void lc_changed(struct lru_cache *lc, struct lc_element *e)
|
||||
struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
|
||||
{
|
||||
return __lc_get(lc, enr, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* lc_committed - tell @lc that pending changes have been recorded
|
||||
* @lc: the lru cache to operate on
|
||||
*
|
||||
* User is expected to serialize on explicit lc_try_lock_for_transaction()
|
||||
* before the transaction is started, and later needs to lc_unlock() explicitly
|
||||
* as well.
|
||||
*/
|
||||
void lc_committed(struct lru_cache *lc)
|
||||
{
|
||||
struct lc_element *e, *tmp;
|
||||
|
||||
PARANOIA_ENTRY();
|
||||
BUG_ON(e != lc->changing_element);
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
++lc->changed;
|
||||
e->lc_number = lc->new_number;
|
||||
list_add(&e->list, &lc->in_use);
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
|
||||
lc->changing_element = NULL;
|
||||
lc->new_number = LC_FREE;
|
||||
clear_bit(__LC_DIRTY, &lc->flags);
|
||||
smp_mb__after_clear_bit();
|
||||
list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
|
||||
/* count number of changes, not number of transactions */
|
||||
++lc->changed;
|
||||
e->lc_number = e->lc_new_number;
|
||||
list_move(&e->list, &lc->in_use);
|
||||
}
|
||||
lc->pending_changes = 0;
|
||||
RETURN();
|
||||
}
|
||||
|
||||
|
@ -458,13 +540,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
|
|||
PARANOIA_ENTRY();
|
||||
PARANOIA_LC_ELEMENT(lc, e);
|
||||
BUG_ON(e->refcnt == 0);
|
||||
BUG_ON(e == lc->changing_element);
|
||||
BUG_ON(e->lc_number != e->lc_new_number);
|
||||
if (--e->refcnt == 0) {
|
||||
/* move it to the front of LRU. */
|
||||
list_move(&e->list, &lc->lru);
|
||||
lc->used--;
|
||||
clear_bit(__LC_STARVING, &lc->flags);
|
||||
smp_mb__after_clear_bit();
|
||||
clear_bit_unlock(__LC_STARVING, &lc->flags);
|
||||
}
|
||||
RETURN(e->refcnt);
|
||||
}
|
||||
|
@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
|
|||
void lc_set(struct lru_cache *lc, unsigned int enr, int index)
|
||||
{
|
||||
struct lc_element *e;
|
||||
struct list_head *lh;
|
||||
|
||||
if (index < 0 || index >= lc->nr_elements)
|
||||
return;
|
||||
|
||||
e = lc_element_by_index(lc, index);
|
||||
e->lc_number = enr;
|
||||
BUG_ON(e->lc_number != e->lc_new_number);
|
||||
BUG_ON(e->refcnt != 0);
|
||||
|
||||
e->lc_number = e->lc_new_number = enr;
|
||||
hlist_del_init(&e->colision);
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
|
||||
list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
|
||||
if (enr == LC_FREE)
|
||||
lh = &lc->free;
|
||||
else {
|
||||
hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
|
||||
lh = &lc->lru;
|
||||
}
|
||||
list_move(&e->list, lh);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get);
|
|||
EXPORT_SYMBOL(lc_find);
|
||||
EXPORT_SYMBOL(lc_get);
|
||||
EXPORT_SYMBOL(lc_put);
|
||||
EXPORT_SYMBOL(lc_changed);
|
||||
EXPORT_SYMBOL(lc_committed);
|
||||
EXPORT_SYMBOL(lc_element_by_index);
|
||||
EXPORT_SYMBOL(lc_index_of);
|
||||
EXPORT_SYMBOL(lc_seq_printf_stats);
|
||||
EXPORT_SYMBOL(lc_seq_dump_details);
|
||||
EXPORT_SYMBOL(lc_try_lock);
|
||||
EXPORT_SYMBOL(lc_is_used);
|
||||
|
|
Loading…
Reference in New Issue