net/xen-netback: Make it running on 64KB page granularity

The PV network protocol is using 4KB page granularity. The goal of this
patch is to allow a Linux using 64KB page granularity working as a
network backend on a non-modified Xen.

It's only necessary to adapt the ring size and break skb data in small
chunk of 4KB. The rest of the code is relying on the grant table code.

Signed-off-by: Julien Grall <julien.grall@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
This commit is contained in:
Julien Grall 2015-05-05 13:15:29 +01:00 committed by David Vrabel
parent 30c5d7f0da
commit d0089e8a0e
2 changed files with 111 additions and 62 deletions

View File

@ -44,6 +44,7 @@
#include <xen/interface/grant_table.h> #include <xen/interface/grant_table.h>
#include <xen/grant_table.h> #include <xen/grant_table.h>
#include <xen/xenbus.h> #include <xen/xenbus.h>
#include <xen/page.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
typedef unsigned int pending_ring_idx_t; typedef unsigned int pending_ring_idx_t;
@ -64,8 +65,8 @@ struct pending_tx_info {
struct ubuf_info callback_struct; struct ubuf_info callback_struct;
}; };
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) #define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
struct xenvif_rx_meta { struct xenvif_rx_meta {
int id; int id;
@ -80,16 +81,21 @@ struct xenvif_rx_meta {
/* Discriminate from any valid pending_idx value. */ /* Discriminate from any valid pending_idx value. */
#define INVALID_PENDING_IDX 0xFFFF #define INVALID_PENDING_IDX 0xFFFF
#define MAX_BUFFER_OFFSET PAGE_SIZE #define MAX_BUFFER_OFFSET XEN_PAGE_SIZE
#define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE
/* The maximum number of frags is derived from the size of a grant (same
* as a Xen page size for now).
*/
#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
/* It's possible for an skb to have a maximal number of frags /* It's possible for an skb to have a maximal number of frags
* but still be less than MAX_BUFFER_OFFSET in size. Thus the * but still be less than MAX_BUFFER_OFFSET in size. Thus the
* worst-case number of copy operations is MAX_SKB_FRAGS per * worst-case number of copy operations is MAX_XEN_SKB_FRAGS per
* ring slot. * ring slot.
*/ */
#define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) #define MAX_GRANT_COPY_OPS (MAX_XEN_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE)
#define NETBACK_INVALID_HANDLE -1 #define NETBACK_INVALID_HANDLE -1

View File

@ -152,9 +152,9 @@ static inline pending_ring_idx_t pending_index(unsigned i)
static int xenvif_rx_ring_slots_needed(struct xenvif *vif) static int xenvif_rx_ring_slots_needed(struct xenvif *vif)
{ {
if (vif->gso_mask) if (vif->gso_mask)
return DIV_ROUND_UP(vif->dev->gso_max_size, PAGE_SIZE) + 1; return DIV_ROUND_UP(vif->dev->gso_max_size, XEN_PAGE_SIZE) + 1;
else else
return DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); return DIV_ROUND_UP(vif->dev->mtu, XEN_PAGE_SIZE);
} }
static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
@ -274,6 +274,80 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
return meta; return meta;
} }
struct gop_frag_copy {
struct xenvif_queue *queue;
struct netrx_pending_operations *npo;
struct xenvif_rx_meta *meta;
int head;
int gso_type;
struct page *page;
};
static void xenvif_setup_copy_gop(unsigned long gfn,
unsigned int offset,
unsigned int *len,
struct gop_frag_copy *info)
{
struct gnttab_copy *copy_gop;
struct xen_page_foreign *foreign;
/* Convenient aliases */
struct xenvif_queue *queue = info->queue;
struct netrx_pending_operations *npo = info->npo;
struct page *page = info->page;
BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
if (npo->copy_off == MAX_BUFFER_OFFSET)
info->meta = get_next_rx_buffer(queue, npo);
if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
*len = MAX_BUFFER_OFFSET - npo->copy_off;
copy_gop = npo->copy + npo->copy_prod++;
copy_gop->flags = GNTCOPY_dest_gref;
copy_gop->len = *len;
foreign = xen_page_foreign(page);
if (foreign) {
copy_gop->source.domid = foreign->domid;
copy_gop->source.u.ref = foreign->gref;
copy_gop->flags |= GNTCOPY_source_gref;
} else {
copy_gop->source.domid = DOMID_SELF;
copy_gop->source.u.gmfn = gfn;
}
copy_gop->source.offset = offset;
copy_gop->dest.domid = queue->vif->domid;
copy_gop->dest.offset = npo->copy_off;
copy_gop->dest.u.ref = npo->copy_gref;
npo->copy_off += *len;
info->meta->size += *len;
/* Leave a gap for the GSO descriptor. */
if (info->head && ((1 << info->gso_type) & queue->vif->gso_mask))
queue->rx.req_cons++;
info->head = 0; /* There must be something in this buffer now */
}
static void xenvif_gop_frag_copy_grant(unsigned long gfn,
unsigned offset,
unsigned int len,
void *data)
{
unsigned int bytes;
while (len) {
bytes = len;
xenvif_setup_copy_gop(gfn, offset, &bytes, data);
offset += bytes;
len -= bytes;
}
}
/* /*
* Set up the grant operations for this fragment. If it's a flipping * Set up the grant operations for this fragment. If it's a flipping
* interface, we also set up the unmap request from here. * interface, we also set up the unmap request from here.
@ -283,83 +357,52 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb
struct page *page, unsigned long size, struct page *page, unsigned long size,
unsigned long offset, int *head) unsigned long offset, int *head)
{ {
struct gnttab_copy *copy_gop; struct gop_frag_copy info = {
struct xenvif_rx_meta *meta; .queue = queue,
.npo = npo,
.head = *head,
.gso_type = XEN_NETIF_GSO_TYPE_NONE,
};
unsigned long bytes; unsigned long bytes;
int gso_type = XEN_NETIF_GSO_TYPE_NONE;
if (skb_is_gso(skb)) { if (skb_is_gso(skb)) {
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
gso_type = XEN_NETIF_GSO_TYPE_TCPV4; info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
gso_type = XEN_NETIF_GSO_TYPE_TCPV6; info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
} }
/* Data must not cross a page boundary. */ /* Data must not cross a page boundary. */
BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
meta = npo->meta + npo->meta_prod - 1; info.meta = npo->meta + npo->meta_prod - 1;
/* Skip unused frames from start of page */ /* Skip unused frames from start of page */
page += offset >> PAGE_SHIFT; page += offset >> PAGE_SHIFT;
offset &= ~PAGE_MASK; offset &= ~PAGE_MASK;
while (size > 0) { while (size > 0) {
struct xen_page_foreign *foreign;
BUG_ON(offset >= PAGE_SIZE); BUG_ON(offset >= PAGE_SIZE);
BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
if (npo->copy_off == MAX_BUFFER_OFFSET)
meta = get_next_rx_buffer(queue, npo);
bytes = PAGE_SIZE - offset; bytes = PAGE_SIZE - offset;
if (bytes > size) if (bytes > size)
bytes = size; bytes = size;
if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) info.page = page;
bytes = MAX_BUFFER_OFFSET - npo->copy_off; gnttab_foreach_grant_in_range(page, offset, bytes,
xenvif_gop_frag_copy_grant,
copy_gop = npo->copy + npo->copy_prod++; &info);
copy_gop->flags = GNTCOPY_dest_gref;
copy_gop->len = bytes;
foreign = xen_page_foreign(page);
if (foreign) {
copy_gop->source.domid = foreign->domid;
copy_gop->source.u.ref = foreign->gref;
copy_gop->flags |= GNTCOPY_source_gref;
} else {
copy_gop->source.domid = DOMID_SELF;
copy_gop->source.u.gmfn =
virt_to_gfn(page_address(page));
}
copy_gop->source.offset = offset;
copy_gop->dest.domid = queue->vif->domid;
copy_gop->dest.offset = npo->copy_off;
copy_gop->dest.u.ref = npo->copy_gref;
npo->copy_off += bytes;
meta->size += bytes;
offset += bytes;
size -= bytes; size -= bytes;
offset = 0;
/* Next frame */ /* Next page */
if (offset == PAGE_SIZE && size) { if (size) {
BUG_ON(!PageCompound(page)); BUG_ON(!PageCompound(page));
page++; page++;
offset = 0;
} }
/* Leave a gap for the GSO descriptor. */
if (*head && ((1 << gso_type) & queue->vif->gso_mask))
queue->rx.req_cons++;
*head = 0; /* There must be something in this buffer now. */
} }
*head = info.head;
} }
/* /*
@ -758,7 +801,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
first->size -= txp->size; first->size -= txp->size;
slots++; slots++;
if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n", netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n",
txp->offset, txp->size); txp->offset, txp->size);
xenvif_fatal_tx_err(queue->vif); xenvif_fatal_tx_err(queue->vif);
@ -1339,11 +1382,11 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
} }
/* No crossing a page as the payload mustn't fragment. */ /* No crossing a page as the payload mustn't fragment. */
if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
netdev_err(queue->vif->dev, netdev_err(queue->vif->dev,
"txreq.offset: %u, size: %u, end: %lu\n", "txreq.offset: %u, size: %u, end: %lu\n",
txreq.offset, txreq.size, txreq.offset, txreq.size,
(unsigned long)(txreq.offset&~PAGE_MASK) + txreq.size); (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size);
xenvif_fatal_tx_err(queue->vif); xenvif_fatal_tx_err(queue->vif);
break; break;
} }
@ -1409,7 +1452,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
virt_to_gfn(skb->data); virt_to_gfn(skb->data);
queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
queue->tx_copy_ops[*copy_ops].dest.offset = queue->tx_copy_ops[*copy_ops].dest.offset =
offset_in_page(skb->data); offset_in_page(skb->data) & ~XEN_PAGE_MASK;
queue->tx_copy_ops[*copy_ops].len = data_len; queue->tx_copy_ops[*copy_ops].len = data_len;
queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
@ -1894,7 +1937,7 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue,
goto err; goto err;
txs = (struct xen_netif_tx_sring *)addr; txs = (struct xen_netif_tx_sring *)addr;
BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE); BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
&rx_ring_ref, 1, &addr); &rx_ring_ref, 1, &addr);
@ -1902,7 +1945,7 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue,
goto err; goto err;
rxs = (struct xen_netif_rx_sring *)addr; rxs = (struct xen_netif_rx_sring *)addr;
BACK_RING_INIT(&queue->rx, rxs, PAGE_SIZE); BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
return 0; return 0;