xen/blkback: make pool of persistent grants and free pages per-queue
Make pool of persistent grants and free pages per-queue/ring instead of per-device to get better scalability. Test was done based on null_blk driver: dom0: v4.2-rc8 16vcpus 10GB "modprobe null_blk" domu: v4.2-rc8 16vcpus 10GB [test] rw=read direct=1 ioengine=libaio bs=4k time_based runtime=30 filename=/dev/xvdb numjobs=16 iodepth=64 iodepth_batch=64 iodepth_batch_complete=64 group_reporting Results: iops1: After patch "xen/blkfront: make persistent grants per-queue". iops2: After this patch. Queues: 1 4 8 16 Iops orig(k): 810 1064 780 700 Iops1(k): 810 1230(~20%) 1024(~20%) 850(~20%) Iops2(k): 810 1410(~35%) 1354(~75%) 1440(~100%) With 4 queues after this commit we can get ~75% increase in IOPS, and performance won't drop if increasing queue numbers. Please find the respective chart in this link: https://www.dropbox.com/s/agrcy2pbzbsvmwv/iops.png?dl=0 Signed-off-by: Bob Liu <bob.liu@oracle.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
This commit is contained in:
parent
d62d860003
commit
d4bf0065b7
|
@ -123,60 +123,60 @@ module_param(log_stats, int, 0644);
|
|||
/* Number of free pages to remove on each call to gnttab_free_pages */
|
||||
#define NUM_BATCH_FREE_PAGES 10
|
||||
|
||||
static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
|
||||
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
if (list_empty(&blkif->free_pages)) {
|
||||
BUG_ON(blkif->free_pages_num != 0);
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
if (list_empty(&ring->free_pages)) {
|
||||
BUG_ON(ring->free_pages_num != 0);
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
return gnttab_alloc_pages(1, page);
|
||||
}
|
||||
BUG_ON(blkif->free_pages_num == 0);
|
||||
page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
|
||||
BUG_ON(ring->free_pages_num == 0);
|
||||
page[0] = list_first_entry(&ring->free_pages, struct page, lru);
|
||||
list_del(&page[0]->lru);
|
||||
blkif->free_pages_num--;
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
ring->free_pages_num--;
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
|
||||
static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
|
||||
int num)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
for (i = 0; i < num; i++)
|
||||
list_add(&page[i]->lru, &blkif->free_pages);
|
||||
blkif->free_pages_num += num;
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
list_add(&page[i]->lru, &ring->free_pages);
|
||||
ring->free_pages_num += num;
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
}
|
||||
|
||||
static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
|
||||
static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
|
||||
{
|
||||
/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
|
||||
struct page *page[NUM_BATCH_FREE_PAGES];
|
||||
unsigned int num_pages = 0;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
while (blkif->free_pages_num > num) {
|
||||
BUG_ON(list_empty(&blkif->free_pages));
|
||||
page[num_pages] = list_first_entry(&blkif->free_pages,
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
while (ring->free_pages_num > num) {
|
||||
BUG_ON(list_empty(&ring->free_pages));
|
||||
page[num_pages] = list_first_entry(&ring->free_pages,
|
||||
struct page, lru);
|
||||
list_del(&page[num_pages]->lru);
|
||||
blkif->free_pages_num--;
|
||||
ring->free_pages_num--;
|
||||
if (++num_pages == NUM_BATCH_FREE_PAGES) {
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
gnttab_free_pages(num_pages, page);
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
num_pages = 0;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
if (num_pages != 0)
|
||||
gnttab_free_pages(num_pages, page);
|
||||
}
|
||||
|
@ -199,23 +199,29 @@ static void make_response(struct xen_blkif_ring *ring, u64 id,
|
|||
|
||||
|
||||
/*
|
||||
* pers_gnts_lock must be used around all the persistent grant helpers
|
||||
* because blkback may use multi-thread/queue for each backend.
|
||||
* We don't need locking around the persistent grant helpers
|
||||
* because blkback uses a single-thread for each backend, so we
|
||||
* can be sure that this functions will never be called recursively.
|
||||
*
|
||||
* The only exception to that is put_persistent_grant, that can be called
|
||||
* from interrupt context (by xen_blkbk_unmap), so we have to use atomic
|
||||
* bit operations to modify the flags of a persistent grant and to count
|
||||
* the number of used grants.
|
||||
*/
|
||||
static int add_persistent_gnt(struct xen_blkif *blkif,
|
||||
static int add_persistent_gnt(struct xen_blkif_ring *ring,
|
||||
struct persistent_gnt *persistent_gnt)
|
||||
{
|
||||
struct rb_node **new = NULL, *parent = NULL;
|
||||
struct persistent_gnt *this;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
|
||||
BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
|
||||
if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
|
||||
if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
|
||||
if (!blkif->vbd.overflow_max_grants)
|
||||
blkif->vbd.overflow_max_grants = 1;
|
||||
return -EBUSY;
|
||||
}
|
||||
/* Figure out where to put new node */
|
||||
new = &blkif->persistent_gnts.rb_node;
|
||||
new = &ring->persistent_gnts.rb_node;
|
||||
while (*new) {
|
||||
this = container_of(*new, struct persistent_gnt, node);
|
||||
|
||||
|
@ -234,20 +240,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
|
|||
set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
|
||||
/* Add new node and rebalance tree. */
|
||||
rb_link_node(&(persistent_gnt->node), parent, new);
|
||||
rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
|
||||
blkif->persistent_gnt_c++;
|
||||
atomic_inc(&blkif->persistent_gnt_in_use);
|
||||
rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
|
||||
ring->persistent_gnt_c++;
|
||||
atomic_inc(&ring->persistent_gnt_in_use);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
|
||||
static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
|
||||
grant_ref_t gref)
|
||||
{
|
||||
struct persistent_gnt *data;
|
||||
struct rb_node *node = NULL;
|
||||
|
||||
BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
|
||||
node = blkif->persistent_gnts.rb_node;
|
||||
node = ring->persistent_gnts.rb_node;
|
||||
while (node) {
|
||||
data = container_of(node, struct persistent_gnt, node);
|
||||
|
||||
|
@ -261,25 +266,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
|
|||
return NULL;
|
||||
}
|
||||
set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
|
||||
atomic_inc(&blkif->persistent_gnt_in_use);
|
||||
atomic_inc(&ring->persistent_gnt_in_use);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void put_persistent_gnt(struct xen_blkif *blkif,
|
||||
static void put_persistent_gnt(struct xen_blkif_ring *ring,
|
||||
struct persistent_gnt *persistent_gnt)
|
||||
{
|
||||
BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
|
||||
if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
|
||||
pr_alert_ratelimited("freeing a grant already unused\n");
|
||||
set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
|
||||
clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
|
||||
atomic_dec(&blkif->persistent_gnt_in_use);
|
||||
atomic_dec(&ring->persistent_gnt_in_use);
|
||||
}
|
||||
|
||||
static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
|
||||
static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
|
||||
unsigned int num)
|
||||
{
|
||||
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
|
@ -293,7 +297,6 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
|
|||
unmap_data.unmap_ops = unmap;
|
||||
unmap_data.kunmap_ops = NULL;
|
||||
|
||||
BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
|
||||
foreach_grant_safe(persistent_gnt, n, root, node) {
|
||||
BUG_ON(persistent_gnt->handle ==
|
||||
BLKBACK_INVALID_HANDLE);
|
||||
|
@ -311,7 +314,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
|
|||
unmap_data.count = segs_to_unmap;
|
||||
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
|
||||
|
||||
put_free_pages(blkif, pages, segs_to_unmap);
|
||||
put_free_pages(ring, pages, segs_to_unmap);
|
||||
segs_to_unmap = 0;
|
||||
}
|
||||
|
||||
|
@ -328,17 +331,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
|
|||
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct persistent_gnt *persistent_gnt;
|
||||
int segs_to_unmap = 0;
|
||||
struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
|
||||
struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
|
||||
struct gntab_unmap_queue_data unmap_data;
|
||||
unsigned long flags;
|
||||
|
||||
unmap_data.pages = pages;
|
||||
unmap_data.unmap_ops = unmap;
|
||||
unmap_data.kunmap_ops = NULL;
|
||||
|
||||
spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
|
||||
while(!list_empty(&blkif->persistent_purge_list)) {
|
||||
persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
|
||||
while(!list_empty(&ring->persistent_purge_list)) {
|
||||
persistent_gnt = list_first_entry(&ring->persistent_purge_list,
|
||||
struct persistent_gnt,
|
||||
remove_node);
|
||||
list_del(&persistent_gnt->remove_node);
|
||||
|
@ -353,45 +354,42 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
|
|||
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
|
||||
unmap_data.count = segs_to_unmap;
|
||||
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
|
||||
put_free_pages(blkif, pages, segs_to_unmap);
|
||||
put_free_pages(ring, pages, segs_to_unmap);
|
||||
segs_to_unmap = 0;
|
||||
}
|
||||
kfree(persistent_gnt);
|
||||
}
|
||||
spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
|
||||
if (segs_to_unmap > 0) {
|
||||
unmap_data.count = segs_to_unmap;
|
||||
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
|
||||
put_free_pages(blkif, pages, segs_to_unmap);
|
||||
put_free_pages(ring, pages, segs_to_unmap);
|
||||
}
|
||||
}
|
||||
|
||||
static void purge_persistent_gnt(struct xen_blkif *blkif)
|
||||
static void purge_persistent_gnt(struct xen_blkif_ring *ring)
|
||||
{
|
||||
struct persistent_gnt *persistent_gnt;
|
||||
struct rb_node *n;
|
||||
unsigned int num_clean, total;
|
||||
bool scan_used = false, clean_used = false;
|
||||
struct rb_root *root;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
|
||||
if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
|
||||
(blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
|
||||
!blkif->vbd.overflow_max_grants)) {
|
||||
if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
|
||||
(ring->persistent_gnt_c == xen_blkif_max_pgrants &&
|
||||
!ring->blkif->vbd.overflow_max_grants)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (work_busy(&blkif->persistent_purge_work)) {
|
||||
if (work_busy(&ring->persistent_purge_work)) {
|
||||
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
|
||||
num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
|
||||
num_clean = min(blkif->persistent_gnt_c, num_clean);
|
||||
num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
|
||||
num_clean = min(ring->persistent_gnt_c, num_clean);
|
||||
if ((num_clean == 0) ||
|
||||
(num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
|
||||
(num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
|
@ -407,8 +405,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
|
|||
|
||||
pr_debug("Going to purge %u persistent grants\n", num_clean);
|
||||
|
||||
BUG_ON(!list_empty(&blkif->persistent_purge_list));
|
||||
root = &blkif->persistent_gnts;
|
||||
BUG_ON(!list_empty(&ring->persistent_purge_list));
|
||||
root = &ring->persistent_gnts;
|
||||
purge_list:
|
||||
foreach_grant_safe(persistent_gnt, n, root, node) {
|
||||
BUG_ON(persistent_gnt->handle ==
|
||||
|
@ -427,7 +425,7 @@ purge_list:
|
|||
|
||||
rb_erase(&persistent_gnt->node, root);
|
||||
list_add(&persistent_gnt->remove_node,
|
||||
&blkif->persistent_purge_list);
|
||||
&ring->persistent_purge_list);
|
||||
if (--num_clean == 0)
|
||||
goto finished;
|
||||
}
|
||||
|
@ -448,18 +446,14 @@ finished:
|
|||
goto purge_list;
|
||||
}
|
||||
|
||||
blkif->persistent_gnt_c -= (total - num_clean);
|
||||
spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
|
||||
blkif->vbd.overflow_max_grants = 0;
|
||||
ring->persistent_gnt_c -= (total - num_clean);
|
||||
ring->blkif->vbd.overflow_max_grants = 0;
|
||||
|
||||
/* We can defer this work */
|
||||
schedule_work(&blkif->persistent_purge_work);
|
||||
schedule_work(&ring->persistent_purge_work);
|
||||
pr_debug("Purged %u/%u\n", (total - num_clean), total);
|
||||
return;
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -591,14 +585,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
|
|||
* SCHEDULER FUNCTIONS
|
||||
*/
|
||||
|
||||
static void print_stats(struct xen_blkif *blkif)
|
||||
static void print_stats(struct xen_blkif_ring *ring)
|
||||
{
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
|
||||
pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
|
||||
" | ds %4llu | pg: %4u/%4d\n",
|
||||
current->comm, blkif->st_oo_req,
|
||||
blkif->st_rd_req, blkif->st_wr_req,
|
||||
blkif->st_f_req, blkif->st_ds_req,
|
||||
blkif->persistent_gnt_c,
|
||||
ring->persistent_gnt_c,
|
||||
xen_blkif_max_pgrants);
|
||||
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
|
||||
blkif->st_rd_req = 0;
|
||||
|
@ -651,23 +647,23 @@ int xen_blkif_schedule(void *arg)
|
|||
|
||||
purge_gnt_list:
|
||||
if (blkif->vbd.feature_gnt_persistent &&
|
||||
time_after(jiffies, blkif->next_lru)) {
|
||||
purge_persistent_gnt(blkif);
|
||||
blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
|
||||
time_after(jiffies, ring->next_lru)) {
|
||||
purge_persistent_gnt(ring);
|
||||
ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
|
||||
}
|
||||
|
||||
/* Shrink if we have more than xen_blkif_max_buffer_pages */
|
||||
shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
|
||||
shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
|
||||
|
||||
if (log_stats && time_after(jiffies, blkif->st_print))
|
||||
print_stats(blkif);
|
||||
if (log_stats && time_after(jiffies, ring->blkif->st_print))
|
||||
print_stats(ring);
|
||||
}
|
||||
|
||||
/* Drain pending purge work */
|
||||
flush_work(&blkif->persistent_purge_work);
|
||||
flush_work(&ring->persistent_purge_work);
|
||||
|
||||
if (log_stats)
|
||||
print_stats(blkif);
|
||||
print_stats(ring);
|
||||
|
||||
ring->xenblkd = NULL;
|
||||
xen_blkif_put(blkif);
|
||||
|
@ -680,21 +676,16 @@ purge_gnt_list:
|
|||
*/
|
||||
void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
|
||||
{
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
unsigned long flags;
|
||||
|
||||
/* Free all persistent grant pages */
|
||||
spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
|
||||
if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
|
||||
free_persistent_gnts(blkif, &blkif->persistent_gnts,
|
||||
blkif->persistent_gnt_c);
|
||||
if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
|
||||
free_persistent_gnts(ring, &ring->persistent_gnts,
|
||||
ring->persistent_gnt_c);
|
||||
|
||||
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
|
||||
blkif->persistent_gnt_c = 0;
|
||||
spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
|
||||
BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
|
||||
ring->persistent_gnt_c = 0;
|
||||
|
||||
/* Since we are shutting down remove all pages from the buffer */
|
||||
shrink_free_pagepool(blkif, 0 /* All */);
|
||||
shrink_free_pagepool(ring, 0 /* All */);
|
||||
}
|
||||
|
||||
static unsigned int xen_blkbk_unmap_prepare(
|
||||
|
@ -705,13 +696,10 @@ static unsigned int xen_blkbk_unmap_prepare(
|
|||
struct page **unmap_pages)
|
||||
{
|
||||
unsigned int i, invcount = 0;
|
||||
unsigned long flags;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (pages[i]->persistent_gnt != NULL) {
|
||||
spin_lock_irqsave(&ring->blkif->pers_gnts_lock, flags);
|
||||
put_persistent_gnt(ring->blkif, pages[i]->persistent_gnt);
|
||||
spin_unlock_irqrestore(&ring->blkif->pers_gnts_lock, flags);
|
||||
put_persistent_gnt(ring, pages[i]->persistent_gnt);
|
||||
continue;
|
||||
}
|
||||
if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
|
||||
|
@ -736,7 +724,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
|
|||
but is this the best way to deal with this? */
|
||||
BUG_ON(result);
|
||||
|
||||
put_free_pages(blkif, data->pages, data->count);
|
||||
put_free_pages(ring, data->pages, data->count);
|
||||
make_response(ring, pending_req->id,
|
||||
pending_req->operation, pending_req->status);
|
||||
free_req(ring, pending_req);
|
||||
|
@ -803,7 +791,7 @@ static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
|
|||
if (invcount) {
|
||||
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
|
||||
BUG_ON(ret);
|
||||
put_free_pages(ring->blkif, unmap_pages, invcount);
|
||||
put_free_pages(ring, unmap_pages, invcount);
|
||||
}
|
||||
pages += batch;
|
||||
num -= batch;
|
||||
|
@ -824,7 +812,6 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
|
|||
int last_map = 0, map_until = 0;
|
||||
int use_persistent_gnts;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
unsigned long irq_flags;
|
||||
|
||||
use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
|
||||
|
||||
|
@ -838,11 +825,9 @@ again:
|
|||
uint32_t flags;
|
||||
|
||||
if (use_persistent_gnts) {
|
||||
spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
|
||||
persistent_gnt = get_persistent_gnt(
|
||||
blkif,
|
||||
ring,
|
||||
pages[i]->gref);
|
||||
spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
|
||||
}
|
||||
|
||||
if (persistent_gnt) {
|
||||
|
@ -853,7 +838,7 @@ again:
|
|||
pages[i]->page = persistent_gnt->page;
|
||||
pages[i]->persistent_gnt = persistent_gnt;
|
||||
} else {
|
||||
if (get_free_page(blkif, &pages[i]->page))
|
||||
if (get_free_page(ring, &pages[i]->page))
|
||||
goto out_of_memory;
|
||||
addr = vaddr(pages[i]->page);
|
||||
pages_to_gnt[segs_to_map] = pages[i]->page;
|
||||
|
@ -886,7 +871,7 @@ again:
|
|||
BUG_ON(new_map_idx >= segs_to_map);
|
||||
if (unlikely(map[new_map_idx].status != 0)) {
|
||||
pr_debug("invalid buffer -- could not remap it\n");
|
||||
put_free_pages(blkif, &pages[seg_idx]->page, 1);
|
||||
put_free_pages(ring, &pages[seg_idx]->page, 1);
|
||||
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
|
||||
ret |= 1;
|
||||
goto next;
|
||||
|
@ -896,7 +881,7 @@ again:
|
|||
continue;
|
||||
}
|
||||
if (use_persistent_gnts &&
|
||||
blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
|
||||
ring->persistent_gnt_c < xen_blkif_max_pgrants) {
|
||||
/*
|
||||
* We are using persistent grants, the grant is
|
||||
* not mapped but we might have room for it.
|
||||
|
@ -914,19 +899,16 @@ again:
|
|||
persistent_gnt->gnt = map[new_map_idx].ref;
|
||||
persistent_gnt->handle = map[new_map_idx].handle;
|
||||
persistent_gnt->page = pages[seg_idx]->page;
|
||||
spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
|
||||
if (add_persistent_gnt(blkif,
|
||||
if (add_persistent_gnt(ring,
|
||||
persistent_gnt)) {
|
||||
spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
|
||||
kfree(persistent_gnt);
|
||||
persistent_gnt = NULL;
|
||||
goto next;
|
||||
}
|
||||
pages[seg_idx]->persistent_gnt = persistent_gnt;
|
||||
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
|
||||
persistent_gnt->gnt, blkif->persistent_gnt_c,
|
||||
persistent_gnt->gnt, ring->persistent_gnt_c,
|
||||
xen_blkif_max_pgrants);
|
||||
spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
|
||||
goto next;
|
||||
}
|
||||
if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
|
||||
|
@ -950,7 +932,7 @@ next:
|
|||
|
||||
out_of_memory:
|
||||
pr_alert("%s: out of memory\n", __func__);
|
||||
put_free_pages(blkif, pages_to_gnt, segs_to_map);
|
||||
put_free_pages(ring, pages_to_gnt, segs_to_map);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
|
|
@ -291,6 +291,22 @@ struct xen_blkif_ring {
|
|||
spinlock_t pending_free_lock;
|
||||
wait_queue_head_t pending_free_wq;
|
||||
|
||||
/* Tree to store persistent grants. */
|
||||
spinlock_t pers_gnts_lock;
|
||||
struct rb_root persistent_gnts;
|
||||
unsigned int persistent_gnt_c;
|
||||
atomic_t persistent_gnt_in_use;
|
||||
unsigned long next_lru;
|
||||
|
||||
/* Used by the kworker that offload work from the persistent purge. */
|
||||
struct list_head persistent_purge_list;
|
||||
struct work_struct persistent_purge_work;
|
||||
|
||||
/* Buffer of free pages to map grant refs. */
|
||||
spinlock_t free_pages_lock;
|
||||
int free_pages_num;
|
||||
struct list_head free_pages;
|
||||
|
||||
struct work_struct free_work;
|
||||
/* Thread shutdown wait queue. */
|
||||
wait_queue_head_t shutdown_wq;
|
||||
|
@ -312,22 +328,6 @@ struct xen_blkif {
|
|||
struct completion drain_complete;
|
||||
atomic_t drain;
|
||||
|
||||
/* tree to store persistent grants */
|
||||
spinlock_t pers_gnts_lock;
|
||||
struct rb_root persistent_gnts;
|
||||
unsigned int persistent_gnt_c;
|
||||
atomic_t persistent_gnt_in_use;
|
||||
unsigned long next_lru;
|
||||
|
||||
/* used by the kworker that offload work from the persistent purge */
|
||||
struct list_head persistent_purge_list;
|
||||
struct work_struct persistent_purge_work;
|
||||
|
||||
/* buffer of free pages to map grant refs */
|
||||
spinlock_t free_pages_lock;
|
||||
int free_pages_num;
|
||||
struct list_head free_pages;
|
||||
|
||||
/* statistics */
|
||||
unsigned long st_print;
|
||||
unsigned long long st_rd_req;
|
||||
|
|
|
@ -150,6 +150,10 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
|
|||
spin_lock_init(&ring->blk_ring_lock);
|
||||
init_waitqueue_head(&ring->wq);
|
||||
INIT_LIST_HEAD(&ring->pending_free);
|
||||
INIT_LIST_HEAD(&ring->persistent_purge_list);
|
||||
INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
|
||||
spin_lock_init(&ring->free_pages_lock);
|
||||
INIT_LIST_HEAD(&ring->free_pages);
|
||||
|
||||
spin_lock_init(&ring->pending_free_lock);
|
||||
init_waitqueue_head(&ring->pending_free_wq);
|
||||
|
@ -175,11 +179,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
|
|||
atomic_set(&blkif->refcnt, 1);
|
||||
init_completion(&blkif->drain_complete);
|
||||
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
|
||||
spin_lock_init(&blkif->free_pages_lock);
|
||||
INIT_LIST_HEAD(&blkif->free_pages);
|
||||
INIT_LIST_HEAD(&blkif->persistent_purge_list);
|
||||
blkif->st_print = jiffies;
|
||||
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
|
||||
|
||||
return blkif;
|
||||
}
|
||||
|
@ -290,6 +290,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
|
|||
i++;
|
||||
}
|
||||
|
||||
BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
|
||||
BUG_ON(!list_empty(&ring->persistent_purge_list));
|
||||
BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
|
||||
BUG_ON(!list_empty(&ring->free_pages));
|
||||
BUG_ON(ring->free_pages_num != 0);
|
||||
BUG_ON(ring->persistent_gnt_c != 0);
|
||||
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
|
||||
}
|
||||
blkif->nr_ring_pages = 0;
|
||||
|
@ -304,13 +310,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
|
|||
xen_vbd_free(&blkif->vbd);
|
||||
|
||||
/* Make sure everything is drained before shutting down */
|
||||
BUG_ON(blkif->persistent_gnt_c != 0);
|
||||
BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
|
||||
BUG_ON(blkif->free_pages_num != 0);
|
||||
BUG_ON(!list_empty(&blkif->persistent_purge_list));
|
||||
BUG_ON(!list_empty(&blkif->free_pages));
|
||||
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
|
||||
|
||||
kfree(blkif->rings);
|
||||
kmem_cache_free(xen_blkif_cachep, blkif);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue