diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index bd2b3bbbb22c..9121a2c3e26f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -83,6 +83,13 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); +/* + * Maximum order of pages to be used for the shared ring between front and + * backend, 4KB page granularity is used. + */ +unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); /* * The LRU mechanism to clean the lists of persistent grants needs to * be executed periodically. The time interval between consecutive executions @@ -1451,6 +1458,12 @@ static int __init xen_blkif_init(void) if (!xen_domain()) return -ENODEV; + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); + xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; + } + rc = xen_blkif_interface_init(); if (rc) goto failed_init; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 043f13b7b7b0..8ccc49d01c8e 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -44,6 +44,7 @@ #include #include +extern unsigned int xen_blkif_max_ring_order; /* * This is the maximum number of segments that would be allowed in indirect * requests. This value will also be passed to the frontend. @@ -320,6 +321,7 @@ struct xen_blkif { struct work_struct free_work; /* Thread shutdown wait queue. */ wait_queue_head_t shutdown_wq; + unsigned int nr_ring_pages; }; struct seg_buf { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c212d41fd5bd..deb3f001791f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -25,6 +25,7 @@ /* Enlarge the array size in order to fully show blkback name. */ #define BLKBACK_NAME_LEN (20) +#define RINGREF_NAME_LEN (20) struct backend_info { struct xenbus_device *dev; @@ -156,8 +157,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) return blkif; } -static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, - unsigned int evtchn) +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, + unsigned int nr_grefs, unsigned int evtchn) { int err; @@ -165,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1, + err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, &blkif->blk_ring); if (err < 0) return err; @@ -175,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, { struct blkif_sring *sring; sring = (struct blkif_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs); break; } case BLKIF_PROTOCOL_X86_32: { struct blkif_x86_32_sring *sring_x86_32; sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs); break; } case BLKIF_PROTOCOL_X86_64: { struct blkif_x86_64_sring *sring_x86_64; sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs); break; } default: @@ -270,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif) i++; } - WARN_ON(i != XEN_BLKIF_REQS_PER_PAGE); + WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); kmem_cache_free(xen_blkif_cachep, blkif); } @@ -555,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, if (err) goto fail; + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u", + xen_blkif_max_ring_order); + if (err) + pr_warn("%s write out 'max-ring-page-order' failed\n", __func__); + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -818,8 +824,8 @@ again: static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; - unsigned long ring_ref; - unsigned int evtchn; + unsigned int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int evtchn, nr_grefs, ring_page_order; unsigned int pers_grants; char protocol[64] = ""; struct pending_req *req, *n; @@ -827,14 +833,57 @@ static int connect_ring(struct backend_info *be) pr_debug("%s %s\n", __func__, dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", - &ring_ref, "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(dev, err, - "reading %s/ring-ref and event-channel", + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", + &evtchn); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/event-channel", dev->otherend); return err; } + pr_info("event-channel %u\n", evtchn); + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", + &ring_page_order); + if (err != 1) { + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", + "%u", &ring_ref[0]); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", + dev->otherend); + return err; + } + nr_grefs = 1; + pr_info("%s:using single page: ring-ref %d\n", dev->otherend, + ring_ref[0]); + } else { + unsigned int i; + + if (ring_page_order > xen_blkif_max_ring_order) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", + dev->otherend, ring_page_order, + xen_blkif_max_ring_order); + return err; + } + + nr_grefs = 1 << ring_page_order; + for (i = 0; i < nr_grefs; i++) { + char ring_ref_name[RINGREF_NAME_LEN]; + + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); + err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name, + "%u", &ring_ref[i]); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/%s", + dev->otherend, ring_ref_name); + return err; + } + pr_info("ring-ref%u: %u\n", i, ring_ref[i]); + } + } be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", @@ -859,12 +908,13 @@ static int connect_ring(struct backend_info *be) be->blkif->vbd.feature_gnt_persistent = pers_grants; be->blkif->vbd.overflow_max_grants = 0; + be->blkif->nr_ring_pages = nr_grefs; - pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol, + pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n", + nr_grefs, evtchn, be->blkif->blk_protocol, protocol, pers_grants ? "persistent grants" : ""); - for (i = 0; i < XEN_BLKIF_REQS_PER_PAGE; i++) { + for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) goto fail; @@ -883,10 +933,9 @@ static int connect_ring(struct backend_info *be) } /* Map the shared frame, irq etc. */ - err = xen_blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn); if (err) { - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", - ring_ref, evtchn); + xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); return err; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 88e23fd8c7f3..d3c1a9523d1f 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32; module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) +/* + * Maximum order of pages to be used for the shared ring between front and + * backend, 4KB page granularity is used. + */ +static unsigned int xen_blkif_max_ring_order; +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); + +#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages) +#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES) +/* + * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 + * characters are enough. Define to 20 to keep consist with backend. + */ +#define RINGREF_NAME_LEN (20) /* * We have one of these per vbd, whether ide, scsi or 'other'. They @@ -114,13 +128,14 @@ struct blkfront_info int vdevice; blkif_vdev_t handle; enum blkif_state connected; - int ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int nr_ring_pages; struct blkif_front_ring ring; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; struct list_head grants; struct list_head indirect_pages; unsigned int persistent_gnts_c; @@ -139,8 +154,6 @@ static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 @@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info); static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; - BUG_ON(free >= BLK_RING_SIZE); + BUG_ON(free >= BLK_RING_SIZE(info)); info->shadow_free = info->shadow[free].req.u.rw.id; info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; @@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) } } - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { /* * Clear persistent grants present in requests already * on the shared ring @@ -1033,12 +1046,15 @@ free_shadow: flush_work(&info->work); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; - info->ring.sring = NULL; + for (i = 0; i < info->nr_ring_pages; i++) { + if (info->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); + info->ring_ref[i] = GRANT_INVALID_REF; + } } + free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); + info->ring.sring = NULL; + if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; @@ -1157,7 +1173,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) * never have given to it (we stamp it up to BLK_RING_SIZE - * look in get_id_from_freelist. */ - if (id >= BLK_RING_SIZE) { + if (id >= BLK_RING_SIZE(info)) { WARN(1, "%s: response to %s has incorrect id (%ld)\n", info->gd->disk_name, op_name(bret->operation), id); /* We can't safely get the 'struct request' as @@ -1245,26 +1261,30 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { struct blkif_sring *sring; - grant_ref_t gref; - int err; + int err, i; + unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE; + grant_ref_t gref[XENBUS_MAX_RING_PAGES]; - info->ring_ref = GRANT_INVALID_REF; + for (i = 0; i < info->nr_ring_pages; i++) + info->ring_ref[i] = GRANT_INVALID_REF; - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, + get_order(ring_size)); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&info->ring, sring, ring_size); - err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref); + err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref); if (err < 0) { - free_page((unsigned long)sring); + free_pages((unsigned long)sring, get_order(ring_size)); info->ring.sring = NULL; goto fail; } - info->ring_ref = gref; + for (i = 0; i < info->nr_ring_pages; i++) + info->ring_ref[i] = gref[i]; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) @@ -1292,7 +1312,18 @@ static int talk_to_blkback(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; - int err; + int err, i; + unsigned int max_page_order = 0; + unsigned int ring_page_order = 0; + + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "max-ring-page-order", "%u", &max_page_order); + if (err != 1) + info->nr_ring_pages = 1; + else { + ring_page_order = min(xen_blkif_max_ring_order, max_page_order); + info->nr_ring_pages = 1 << ring_page_order; + } /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, info); @@ -1306,11 +1337,32 @@ again: goto destroy_blkring; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; + if (info->nr_ring_pages == 1) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%u", info->ring_ref[0]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } else { + err = xenbus_printf(xbt, dev->nodename, + "ring-page-order", "%u", ring_page_order); + if (err) { + message = "writing ring-page-order"; + goto abort_transaction; + } + + for (i = 0; i < info->nr_ring_pages; i++) { + char ring_ref_name[RINGREF_NAME_LEN]; + + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); + err = xenbus_printf(xbt, dev->nodename, ring_ref_name, + "%u", info->ring_ref[i]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } } err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); @@ -1338,6 +1390,9 @@ again: goto destroy_blkring; } + for (i = 0; i < BLK_RING_SIZE(info); i++) + info->shadow[i].req.u.rw.id = i+1; + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; xenbus_switch_state(dev, XenbusStateInitialised); return 0; @@ -1361,7 +1416,7 @@ again: static int blkfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int err, vdevice, i; + int err, vdevice; struct blkfront_info *info; /* FIXME: Use dynamic device id if this is not set. */ @@ -1422,10 +1477,6 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.u.rw.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; - /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); @@ -1469,10 +1520,10 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_RING_SIZE(info); i++) info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; rc = blkfront_setup_indirect(info); if (rc) { @@ -1484,7 +1535,7 @@ static int blkif_recover(struct blkfront_info *info) blk_queue_max_segments(info->rq, segs); bio_list_init(&bio_list); INIT_LIST_HEAD(&requests); - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { /* Not in use? */ if (!copy[i].request) continue; @@ -1690,7 +1741,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) segs = info->max_indirect_segments; } - err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); + err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info)); if (err) goto out_of_memory; @@ -1700,7 +1751,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) * grants, we need to allocate a set of pages that can be * used for mapping indirect grefs */ - int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE; + int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&info->indirect_pages)); for (i = 0; i < num; i++) { @@ -1711,7 +1762,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) } } - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { info->shadow[i].grants_used = kzalloc( sizeof(info->shadow[i].grants_used[0]) * segs, GFP_NOIO); @@ -1733,7 +1784,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) return 0; out_of_memory: - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { kfree(info->shadow[i].grants_used); info->shadow[i].grants_used = NULL; kfree(info->shadow[i].sg); @@ -2089,6 +2140,12 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); + xen_blkif_max_ring_order = 0; + } + if (!xen_has_pv_disk_devices()) return -ENODEV;