From b51ca34a262f5a3556eb6f1d05fd88503927d3ae Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Wed, 3 Jul 2013 14:07:40 +0800 Subject: [PATCH 01/10] sfc: efx_ethtool_get_ts_info() can be static Signed-off-by: Fengguang Wu Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 5b471cf5c323..c8dc407513b0 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -1035,8 +1035,8 @@ static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev, return 0; } -int efx_ethtool_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *ts_info) +static int efx_ethtool_get_ts_info(struct net_device *net_dev, + struct ethtool_ts_info *ts_info) { struct efx_nic *efx = netdev_priv(net_dev); From 9fd8095dc1140c45bfc4b7132fb00815a354fb63 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Sat, 31 Aug 2013 06:54:05 +0800 Subject: [PATCH 02/10] sfc: efx_ef10_filter_update_rx_scatter() can be static Signed-off-by: Fengguang Wu Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/ef10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 9f18ae984f9e..0a3a3b92a131 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2137,7 +2137,7 @@ out_unlock: return rc; } -void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx) +static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx) { /* no need to do anything here on EF10 */ } From 137b79220c5a066fe98d4cfb0bde578637534296 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 19 Sep 2012 02:16:05 +0100 Subject: [PATCH 03/10] sfc: Add EF10 registers to register dump There are very few readable registers, but we may as well report them. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/nic.c | 73 +++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index e7dbd2dd202e..c75009b8c0d9 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -19,6 +19,7 @@ #include "bitfield.h" #include "efx.h" #include "nic.h" +#include "ef10_regs.h" #include "farch_regs.h" #include "io.h" #include "workarounds.h" @@ -166,26 +167,30 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) /* Register dump */ -#define REGISTER_REVISION_A 1 -#define REGISTER_REVISION_B 2 -#define REGISTER_REVISION_C 3 -#define REGISTER_REVISION_Z 3 /* latest revision */ +#define REGISTER_REVISION_FA 1 +#define REGISTER_REVISION_FB 2 +#define REGISTER_REVISION_FC 3 +#define REGISTER_REVISION_FZ 3 /* last Falcon arch revision */ +#define REGISTER_REVISION_ED 4 +#define REGISTER_REVISION_EZ 4 /* latest EF10 revision */ struct efx_nic_reg { u32 offset:24; - u32 min_revision:2, max_revision:2; + u32 min_revision:3, max_revision:3; }; -#define REGISTER(name, min_rev, max_rev) { \ - FR_ ## min_rev ## max_rev ## _ ## name, \ - REGISTER_REVISION_ ## min_rev, REGISTER_REVISION_ ## max_rev \ +#define REGISTER(name, arch, min_rev, max_rev) { \ + arch ## R_ ## min_rev ## max_rev ## _ ## name, \ + REGISTER_REVISION_ ## arch ## min_rev, \ + REGISTER_REVISION_ ## arch ## max_rev \ } -#define REGISTER_AA(name) REGISTER(name, A, A) -#define REGISTER_AB(name) REGISTER(name, A, B) -#define REGISTER_AZ(name) REGISTER(name, A, Z) -#define REGISTER_BB(name) REGISTER(name, B, B) -#define REGISTER_BZ(name) REGISTER(name, B, Z) -#define REGISTER_CZ(name) REGISTER(name, C, Z) +#define REGISTER_AA(name) REGISTER(name, F, A, A) +#define REGISTER_AB(name) REGISTER(name, F, A, B) +#define REGISTER_AZ(name) REGISTER(name, F, A, Z) +#define REGISTER_BB(name) REGISTER(name, F, B, B) +#define REGISTER_BZ(name) REGISTER(name, F, B, Z) +#define REGISTER_CZ(name) REGISTER(name, F, C, Z) +#define REGISTER_DZ(name) REGISTER(name, E, D, Z) static const struct efx_nic_reg efx_nic_regs[] = { REGISTER_AZ(ADR_REGION), @@ -292,37 +297,42 @@ static const struct efx_nic_reg efx_nic_regs[] = { REGISTER_AB(XX_TXDRV_CTL), /* XX_PRBS_CTL, XX_PRBS_CHK and XX_PRBS_ERR are not used */ /* XX_CORE_STAT is partly RC */ + REGISTER_DZ(BIU_HW_REV_ID), + REGISTER_DZ(MC_DB_LWRD), + REGISTER_DZ(MC_DB_HWRD), }; struct efx_nic_reg_table { u32 offset:24; - u32 min_revision:2, max_revision:2; + u32 min_revision:3, max_revision:3; u32 step:6, rows:21; }; -#define REGISTER_TABLE_DIMENSIONS(_, offset, min_rev, max_rev, step, rows) { \ +#define REGISTER_TABLE_DIMENSIONS(_, offset, arch, min_rev, max_rev, step, rows) { \ offset, \ - REGISTER_REVISION_ ## min_rev, REGISTER_REVISION_ ## max_rev, \ + REGISTER_REVISION_ ## arch ## min_rev, \ + REGISTER_REVISION_ ## arch ## max_rev, \ step, rows \ } -#define REGISTER_TABLE(name, min_rev, max_rev) \ +#define REGISTER_TABLE(name, arch, min_rev, max_rev) \ REGISTER_TABLE_DIMENSIONS( \ - name, FR_ ## min_rev ## max_rev ## _ ## name, \ - min_rev, max_rev, \ - FR_ ## min_rev ## max_rev ## _ ## name ## _STEP, \ - FR_ ## min_rev ## max_rev ## _ ## name ## _ROWS) -#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, A, A) -#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, A, Z) -#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, B, B) -#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, B, Z) + name, arch ## R_ ## min_rev ## max_rev ## _ ## name, \ + arch, min_rev, max_rev, \ + arch ## R_ ## min_rev ## max_rev ## _ ## name ## _STEP, \ + arch ## R_ ## min_rev ## max_rev ## _ ## name ## _ROWS) +#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, F, A, A) +#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, F, A, Z) +#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, F, B, B) +#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, F, B, Z) #define REGISTER_TABLE_BB_CZ(name) \ - REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, B, B, \ + REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, B, B, \ FR_BZ_ ## name ## _STEP, \ FR_BB_ ## name ## _ROWS), \ - REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, C, Z, \ + REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, C, Z, \ FR_BZ_ ## name ## _STEP, \ FR_CZ_ ## name ## _ROWS) -#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, C, Z) +#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, F, C, Z) +#define REGISTER_TABLE_DZ(name) REGISTER_TABLE(name, E, D, Z) static const struct efx_nic_reg_table efx_nic_reg_tables[] = { /* DRIVER is not used */ @@ -340,9 +350,9 @@ static const struct efx_nic_reg_table efx_nic_reg_tables[] = { * 1K entries allows for some expansion of queue count and * size before we need to change the version. */ REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL_KER, FR_AA_BUF_FULL_TBL_KER, - A, A, 8, 1024), + F, A, A, 8, 1024), REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL, FR_BZ_BUF_FULL_TBL, - B, Z, 8, 1024), + F, B, Z, 8, 1024), REGISTER_TABLE_CZ(RX_MAC_FILTER_TBL0), REGISTER_TABLE_BB_CZ(TIMER_TBL), REGISTER_TABLE_BB_CZ(TX_PACE_TBL), @@ -353,6 +363,7 @@ static const struct efx_nic_reg_table efx_nic_reg_tables[] = { /* MSIX_PBA_TABLE is not mapped */ /* SRM_DBG is not mapped (and is redundant with BUF_FLL_TBL) */ REGISTER_TABLE_BZ(RX_FILTER_TBL0), + REGISTER_TABLE_DZ(BIU_MC_SFT_STATUS), }; size_t efx_nic_get_regs_len(struct efx_nic *efx) From c78c39e67cf65d147fae72c3c994b1ddc1a3dd04 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 8 Mar 2013 20:03:17 +0000 Subject: [PATCH 04/10] sfc: Fold tso_get_head_fragment() into tso_start() Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/tx.c | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 2ac91c5b5eea..902535696928 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -822,11 +822,17 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) /* Parse the SKB header and initialise state. */ -static void tso_start(struct tso_state *st, const struct sk_buff *skb) +static int tso_start(struct tso_state *st, struct efx_nic *efx, + const struct sk_buff *skb) { + unsigned int header_len, in_len; + st->ip_off = skb_network_header(skb) - skb->data; st->tcp_off = skb_transport_header(skb) - skb->data; - st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); + header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); + in_len = skb_headlen(skb) - header_len; + st->header_len = header_len; + st->in_len = in_len; if (st->protocol == htons(ETH_P_IP)) { st->ip_base_len = st->header_len - st->ip_off; st->ipv4_id = ntohs(ip_hdr(skb)->id); @@ -840,9 +846,24 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb) EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn); EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst); - st->out_len = skb->len - st->header_len; - st->unmap_len = 0; - st->dma_flags = 0; + st->out_len = skb->len - header_len; + + if (likely(in_len == 0)) { + st->unmap_len = 0; + st->dma_flags = 0; + return 0; + } + + st->unmap_addr = dma_map_single(&efx->pci_dev->dev, + skb->data + header_len, in_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) + return -ENOMEM; + + st->dma_flags = EFX_TX_BUF_MAP_SINGLE; + st->unmap_len = in_len; + st->dma_addr = st->unmap_addr; + return 0; } static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, @@ -860,24 +881,6 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, return -ENOMEM; } -static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, - const struct sk_buff *skb) -{ - int hl = st->header_len; - int len = skb_headlen(skb) - hl; - - st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl, - len, DMA_TO_DEVICE); - if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->dma_flags = EFX_TX_BUF_MAP_SINGLE; - st->unmap_len = len; - st->in_len = len; - st->dma_addr = st->unmap_addr; - return 0; - } - return -ENOMEM; -} - /** * tso_fill_packet_with_fragment - form descriptors for the current fragment @@ -1023,12 +1026,11 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); - tso_start(&state, skb); + rc = tso_start(&state, efx, skb); + if (rc) + goto mem_err; - /* Assume that skb header area contains exactly the headers, and - * all payload is in the frag list. - */ - if (skb_headlen(skb) == state.header_len) { + if (likely(state.in_len == 0)) { /* Grab the first payload fragment. */ EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1); frag_i = 0; @@ -1037,9 +1039,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, if (rc) goto mem_err; } else { - rc = tso_get_head_fragment(&state, efx, skb); - if (rc) - goto mem_err; + /* Payload starts in the header area. */ frag_i = -1; } From dfa50be95cc2c42d243c33433a5ff977177a3236 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 8 Mar 2013 21:20:09 +0000 Subject: [PATCH 05/10] sfc: Implement firmware-assisted TSO for EF10 Segmentation remains in the driver, but we generate option descriptors describing the required packet editing rather than making our own copies. Reduce tso_state::ipv4_id to 16 bits, so it doesn't overflow into the TCP_FLAGS field of the option descriptor. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/tx.c | 173 ++++++++++++++++++++++++---------- 1 file changed, 121 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 902535696928..a23ba0dbdd1d 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -21,6 +21,7 @@ #include "efx.h" #include "nic.h" #include "workarounds.h" +#include "ef10_regs.h" static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, @@ -83,8 +84,10 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) */ unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; - /* Possibly one more per segment for the alignment workaround */ - if (EFX_WORKAROUND_5391(efx)) + /* Possibly one more per segment for the alignment workaround, + * or for option descriptors + */ + if (EFX_WORKAROUND_5391(efx) || efx_nic_rev(efx) >= EFX_REV_HUNT_A0) max_descs += EFX_TSO_MAX_SEGS; /* Possibly more for PCIe page boundaries within input fragments */ @@ -628,6 +631,9 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) * @tcp_off: Offset of TCP header * @header_len: Number of bytes of header * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload + * @header_dma_addr: Header DMA address, when using option descriptors + * @header_unmap_len: Header DMA mapped length, or 0 if not using option + * descriptors * * The state used during segmentation. It is put into this data structure * just to make it easy to pass into inline functions. @@ -636,7 +642,7 @@ struct tso_state { /* Output position */ unsigned out_len; unsigned seqnum; - unsigned ipv4_id; + u16 ipv4_id; unsigned packet_space; /* Input position */ @@ -651,6 +657,8 @@ struct tso_state { unsigned int tcp_off; unsigned header_len; unsigned int ip_base_len; + dma_addr_t header_dma_addr; + unsigned int header_unmap_len; }; @@ -825,7 +833,10 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) static int tso_start(struct tso_state *st, struct efx_nic *efx, const struct sk_buff *skb) { + bool use_options = efx_nic_rev(efx) >= EFX_REV_HUNT_A0; + struct device *dma_dev = &efx->pci_dev->dev; unsigned int header_len, in_len; + dma_addr_t dma_addr; st->ip_off = skb_network_header(skb) - skb->data; st->tcp_off = skb_transport_header(skb) - skb->data; @@ -848,22 +859,32 @@ static int tso_start(struct tso_state *st, struct efx_nic *efx, st->out_len = skb->len - header_len; - if (likely(in_len == 0)) { - st->unmap_len = 0; + if (!use_options) { + st->header_unmap_len = 0; + + if (likely(in_len == 0)) { + st->dma_flags = 0; + st->unmap_len = 0; + return 0; + } + + dma_addr = dma_map_single(dma_dev, skb->data + header_len, + in_len, DMA_TO_DEVICE); + st->dma_flags = EFX_TX_BUF_MAP_SINGLE; + st->dma_addr = dma_addr; + st->unmap_addr = dma_addr; + st->unmap_len = in_len; + } else { + dma_addr = dma_map_single(dma_dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + st->header_dma_addr = dma_addr; + st->header_unmap_len = skb_headlen(skb); st->dma_flags = 0; - return 0; + st->dma_addr = dma_addr + header_len; + st->unmap_len = 0; } - st->unmap_addr = dma_map_single(&efx->pci_dev->dev, - skb->data + header_len, in_len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) - return -ENOMEM; - - st->dma_flags = EFX_TX_BUF_MAP_SINGLE; - st->unmap_len = in_len; - st->dma_addr = st->unmap_addr; - return 0; + return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0; } static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, @@ -948,54 +969,97 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, { struct efx_tx_buffer *buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - struct tcphdr *tsoh_th; - unsigned ip_length; - u8 *header; - int rc; + bool is_last = st->out_len <= skb_shinfo(skb)->gso_size; + u8 tcp_flags_clear; - /* Allocate and insert a DMA-mapped header buffer. */ - header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); - if (!header) - return -ENOMEM; - - tsoh_th = (struct tcphdr *)(header + st->tcp_off); - - /* Copy and update the headers. */ - memcpy(header, skb->data, st->header_len); - - tsoh_th->seq = htonl(st->seqnum); - st->seqnum += skb_shinfo(skb)->gso_size; - if (st->out_len > skb_shinfo(skb)->gso_size) { - /* This packet will not finish the TSO burst. */ + if (!is_last) { st->packet_space = skb_shinfo(skb)->gso_size; - tsoh_th->fin = 0; - tsoh_th->psh = 0; + tcp_flags_clear = 0x09; /* mask out FIN and PSH */ } else { - /* This packet will be the last in the TSO burst. */ st->packet_space = st->out_len; - tsoh_th->fin = tcp_hdr(skb)->fin; - tsoh_th->psh = tcp_hdr(skb)->psh; + tcp_flags_clear = 0x00; } - ip_length = st->ip_base_len + st->packet_space; - if (st->protocol == htons(ETH_P_IP)) { - struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off); + if (!st->header_unmap_len) { + /* Allocate and insert a DMA-mapped header buffer. */ + struct tcphdr *tsoh_th; + unsigned ip_length; + u8 *header; + int rc; - tsoh_iph->tot_len = htons(ip_length); + header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); + if (!header) + return -ENOMEM; - /* Linux leaves suitable gaps in the IP ID space for us to fill. */ - tsoh_iph->id = htons(st->ipv4_id); - st->ipv4_id++; + tsoh_th = (struct tcphdr *)(header + st->tcp_off); + + /* Copy and update the headers. */ + memcpy(header, skb->data, st->header_len); + + tsoh_th->seq = htonl(st->seqnum); + ((u8 *)tsoh_th)[13] &= ~tcp_flags_clear; + + ip_length = st->ip_base_len + st->packet_space; + + if (st->protocol == htons(ETH_P_IP)) { + struct iphdr *tsoh_iph = + (struct iphdr *)(header + st->ip_off); + + tsoh_iph->tot_len = htons(ip_length); + tsoh_iph->id = htons(st->ipv4_id); + } else { + struct ipv6hdr *tsoh_iph = + (struct ipv6hdr *)(header + st->ip_off); + + tsoh_iph->payload_len = htons(ip_length); + } + + rc = efx_tso_put_header(tx_queue, buffer, header); + if (unlikely(rc)) + return rc; } else { - struct ipv6hdr *tsoh_iph = - (struct ipv6hdr *)(header + st->ip_off); + /* Send the original headers with a TSO option descriptor + * in front + */ + u8 tcp_flags = ((u8 *)tcp_hdr(skb))[13] & ~tcp_flags_clear; - tsoh_iph->payload_len = htons(ip_length); + buffer->flags = EFX_TX_BUF_OPTION; + buffer->len = 0; + buffer->unmap_len = 0; + EFX_POPULATE_QWORD_5(buffer->option, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags, + ESF_DZ_TX_TSO_IP_ID, st->ipv4_id, + ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum); + ++tx_queue->insert_count; + + /* We mapped the headers in tso_start(). Unmap them + * when the last segment is completed. + */ + buffer = &tx_queue->buffer[tx_queue->insert_count & + tx_queue->ptr_mask]; + buffer->dma_addr = st->header_dma_addr; + buffer->len = st->header_len; + if (is_last) { + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE; + buffer->unmap_len = st->header_unmap_len; + /* Ensure we only unmap them once in case of a + * later DMA mapping error and rollback + */ + st->header_unmap_len = 0; + } else { + buffer->flags = EFX_TX_BUF_CONT; + buffer->unmap_len = 0; + } + ++tx_queue->insert_count; } - rc = efx_tso_put_header(tx_queue, buffer, header); - if (unlikely(rc)) - return rc; + st->seqnum += skb_shinfo(skb)->gso_size; + + /* Linux leaves suitable gaps in the IP ID space for us to fill. */ + ++st->ipv4_id; ++tx_queue->tso_packets; @@ -1091,6 +1155,11 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, state.unmap_len, DMA_TO_DEVICE); } + /* Free the header DMA mapping, if using option descriptors */ + if (state.header_unmap_len) + dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr, + state.header_unmap_len, DMA_TO_DEVICE); + efx_enqueue_unwind(tx_queue); return NETDEV_TX_OK; } From 183233bec8105c06341d8cd380db1d830d0efc00 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 28 Jun 2013 21:47:12 +0100 Subject: [PATCH 06/10] sfc: Allocate and link PIO buffers; map them with write-combining Try to allocate a segment of PIO buffer to each TX channel. If allocation fails, log an error but continue. PIO buffers must be mapped separately from the NIC registers, with write-combining enabled. Where the host page size is 4K, we could potentially map each VI's registers and PIO buffer separately. However, this would add significant complexity, and we also need to support architectures such as POWER which have a greater page size. So make a single contiguous write-combining mapping after the uncacheable mapping, aligned to the host page size, and link PIO buffers there. Where necessary, allocate additional VIs within the write-combining mapping purely for access to PIO buffers. Link all TX buffers to TX queues and the additional VIs in efx_ef10_dimension_resources() and in efx_ef10_init_nic() after an MC reboot. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/ef10.c | 317 +++++++++++++++++++++++++- drivers/net/ethernet/sfc/efx.h | 1 + drivers/net/ethernet/sfc/io.h | 5 + drivers/net/ethernet/sfc/net_driver.h | 5 + drivers/net/ethernet/sfc/nic.h | 18 ++ drivers/net/ethernet/sfc/tx.c | 10 + 6 files changed, 353 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 0a3a3b92a131..844ee7539d33 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -285,6 +285,181 @@ static int efx_ef10_free_vis(struct efx_nic *efx) return rc; } +#ifdef EFX_USE_PIO + +static void efx_ef10_free_piobufs(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + MCDI_DECLARE_BUF(inbuf, MC_CMD_FREE_PIOBUF_IN_LEN); + unsigned int i; + int rc; + + BUILD_BUG_ON(MC_CMD_FREE_PIOBUF_OUT_LEN != 0); + + for (i = 0; i < nic_data->n_piobufs; i++) { + MCDI_SET_DWORD(inbuf, FREE_PIOBUF_IN_PIOBUF_HANDLE, + nic_data->piobuf_handle[i]); + rc = efx_mcdi_rpc(efx, MC_CMD_FREE_PIOBUF, inbuf, sizeof(inbuf), + NULL, 0, NULL); + WARN_ON(rc); + } + + nic_data->n_piobufs = 0; +} + +static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_PIOBUF_OUT_LEN); + unsigned int i; + size_t outlen; + int rc = 0; + + BUILD_BUG_ON(MC_CMD_ALLOC_PIOBUF_IN_LEN != 0); + + for (i = 0; i < n; i++) { + rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_PIOBUF, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + break; + if (outlen < MC_CMD_ALLOC_PIOBUF_OUT_LEN) { + rc = -EIO; + break; + } + nic_data->piobuf_handle[i] = + MCDI_DWORD(outbuf, ALLOC_PIOBUF_OUT_PIOBUF_HANDLE); + netif_dbg(efx, probe, efx->net_dev, + "allocated PIO buffer %u handle %x\n", i, + nic_data->piobuf_handle[i]); + } + + nic_data->n_piobufs = i; + if (rc) + efx_ef10_free_piobufs(efx); + return rc; +} + +static int efx_ef10_link_piobufs(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + MCDI_DECLARE_BUF(inbuf, + max(MC_CMD_LINK_PIOBUF_IN_LEN, + MC_CMD_UNLINK_PIOBUF_IN_LEN)); + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + unsigned int offset, index; + int rc; + + BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_OUT_LEN != 0); + BUILD_BUG_ON(MC_CMD_UNLINK_PIOBUF_OUT_LEN != 0); + + /* Link a buffer to each VI in the write-combining mapping */ + for (index = 0; index < nic_data->n_piobufs; ++index) { + MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_PIOBUF_HANDLE, + nic_data->piobuf_handle[index]); + MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_TXQ_INSTANCE, + nic_data->pio_write_vi_base + index); + rc = efx_mcdi_rpc(efx, MC_CMD_LINK_PIOBUF, + inbuf, MC_CMD_LINK_PIOBUF_IN_LEN, + NULL, 0, NULL); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "failed to link VI %u to PIO buffer %u (%d)\n", + nic_data->pio_write_vi_base + index, index, + rc); + goto fail; + } + netif_dbg(efx, probe, efx->net_dev, + "linked VI %u to PIO buffer %u\n", + nic_data->pio_write_vi_base + index, index); + } + + /* Link a buffer to each TX queue */ + efx_for_each_channel(channel, efx) { + efx_for_each_channel_tx_queue(tx_queue, channel) { + /* We assign the PIO buffers to queues in + * reverse order to allow for the following + * special case. + */ + offset = ((efx->tx_channel_offset + efx->n_tx_channels - + tx_queue->channel->channel - 1) * + efx_piobuf_size); + index = offset / ER_DZ_TX_PIOBUF_SIZE; + offset = offset % ER_DZ_TX_PIOBUF_SIZE; + + /* When the host page size is 4K, the first + * host page in the WC mapping may be within + * the same VI page as the last TX queue. We + * can only link one buffer to each VI. + */ + if (tx_queue->queue == nic_data->pio_write_vi_base) { + BUG_ON(index != 0); + rc = 0; + } else { + MCDI_SET_DWORD(inbuf, + LINK_PIOBUF_IN_PIOBUF_HANDLE, + nic_data->piobuf_handle[index]); + MCDI_SET_DWORD(inbuf, + LINK_PIOBUF_IN_TXQ_INSTANCE, + tx_queue->queue); + rc = efx_mcdi_rpc(efx, MC_CMD_LINK_PIOBUF, + inbuf, MC_CMD_LINK_PIOBUF_IN_LEN, + NULL, 0, NULL); + } + + if (rc) { + /* This is non-fatal; the TX path just + * won't use PIO for this queue + */ + netif_err(efx, drv, efx->net_dev, + "failed to link VI %u to PIO buffer %u (%d)\n", + tx_queue->queue, index, rc); + tx_queue->piobuf = NULL; + } else { + tx_queue->piobuf = + nic_data->pio_write_base + + index * EFX_VI_PAGE_SIZE + offset; + tx_queue->piobuf_offset = offset; + netif_dbg(efx, probe, efx->net_dev, + "linked VI %u to PIO buffer %u offset %x addr %p\n", + tx_queue->queue, index, + tx_queue->piobuf_offset, + tx_queue->piobuf); + } + } + } + + return 0; + +fail: + while (index--) { + MCDI_SET_DWORD(inbuf, UNLINK_PIOBUF_IN_TXQ_INSTANCE, + nic_data->pio_write_vi_base + index); + efx_mcdi_rpc(efx, MC_CMD_UNLINK_PIOBUF, + inbuf, MC_CMD_UNLINK_PIOBUF_IN_LEN, + NULL, 0, NULL); + } + return rc; +} + +#else /* !EFX_USE_PIO */ + +static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n) +{ + return n == 0 ? 0 : -ENOBUFS; +} + +static int efx_ef10_link_piobufs(struct efx_nic *efx) +{ + return 0; +} + +static void efx_ef10_free_piobufs(struct efx_nic *efx) +{ +} + +#endif /* EFX_USE_PIO */ + static void efx_ef10_remove(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; @@ -295,9 +470,15 @@ static void efx_ef10_remove(struct efx_nic *efx) /* This needs to be after efx_ptp_remove_channel() with no filters */ efx_ef10_rx_free_indir_table(efx); + if (nic_data->wc_membase) + iounmap(nic_data->wc_membase); + rc = efx_ef10_free_vis(efx); WARN_ON(rc != 0); + if (!nic_data->must_restore_piobufs) + efx_ef10_free_piobufs(efx); + efx_mcdi_fini(efx); efx_nic_free_buffer(efx, &nic_data->mcdi_buf); kfree(nic_data); @@ -330,12 +511,126 @@ static int efx_ef10_alloc_vis(struct efx_nic *efx, return 0; } +/* Note that the failure path of this function does not free + * resources, as this will be done by efx_ef10_remove(). + */ static int efx_ef10_dimension_resources(struct efx_nic *efx) { - unsigned int n_vis = - max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + unsigned int uc_mem_map_size, wc_mem_map_size; + unsigned int min_vis, pio_write_vi_base, max_vis; + void __iomem *membase; + int rc; - return efx_ef10_alloc_vis(efx, n_vis, n_vis); + min_vis = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + +#ifdef EFX_USE_PIO + /* Try to allocate PIO buffers if wanted and if the full + * number of PIO buffers would be sufficient to allocate one + * copy-buffer per TX channel. Failure is non-fatal, as there + * are only a small number of PIO buffers shared between all + * functions of the controller. + */ + if (efx_piobuf_size != 0 && + ER_DZ_TX_PIOBUF_SIZE / efx_piobuf_size * EF10_TX_PIOBUF_COUNT >= + efx->n_tx_channels) { + unsigned int n_piobufs = + DIV_ROUND_UP(efx->n_tx_channels, + ER_DZ_TX_PIOBUF_SIZE / efx_piobuf_size); + + rc = efx_ef10_alloc_piobufs(efx, n_piobufs); + if (rc) + netif_err(efx, probe, efx->net_dev, + "failed to allocate PIO buffers (%d)\n", rc); + else + netif_dbg(efx, probe, efx->net_dev, + "allocated %u PIO buffers\n", n_piobufs); + } +#else + nic_data->n_piobufs = 0; +#endif + + /* PIO buffers should be mapped with write-combining enabled, + * and we want to make single UC and WC mappings rather than + * several of each (in fact that's the only option if host + * page size is >4K). So we may allocate some extra VIs just + * for writing PIO buffers through. + */ + uc_mem_map_size = PAGE_ALIGN((min_vis - 1) * EFX_VI_PAGE_SIZE + + ER_DZ_TX_PIOBUF); + if (nic_data->n_piobufs) { + pio_write_vi_base = uc_mem_map_size / EFX_VI_PAGE_SIZE; + wc_mem_map_size = (PAGE_ALIGN((pio_write_vi_base + + nic_data->n_piobufs) * + EFX_VI_PAGE_SIZE) - + uc_mem_map_size); + max_vis = pio_write_vi_base + nic_data->n_piobufs; + } else { + pio_write_vi_base = 0; + wc_mem_map_size = 0; + max_vis = min_vis; + } + + /* In case the last attached driver failed to free VIs, do it now */ + rc = efx_ef10_free_vis(efx); + if (rc != 0) + return rc; + + rc = efx_ef10_alloc_vis(efx, min_vis, max_vis); + if (rc != 0) + return rc; + + /* If we didn't get enough VIs to map all the PIO buffers, free the + * PIO buffers + */ + if (nic_data->n_piobufs && + nic_data->n_allocated_vis < + pio_write_vi_base + nic_data->n_piobufs) { + netif_dbg(efx, probe, efx->net_dev, + "%u VIs are not sufficient to map %u PIO buffers\n", + nic_data->n_allocated_vis, nic_data->n_piobufs); + efx_ef10_free_piobufs(efx); + } + + /* Shrink the original UC mapping of the memory BAR */ + membase = ioremap_nocache(efx->membase_phys, uc_mem_map_size); + if (!membase) { + netif_err(efx, probe, efx->net_dev, + "could not shrink memory BAR to %x\n", + uc_mem_map_size); + return -ENOMEM; + } + iounmap(efx->membase); + efx->membase = membase; + + /* Set up the WC mapping if needed */ + if (wc_mem_map_size) { + nic_data->wc_membase = ioremap_wc(efx->membase_phys + + uc_mem_map_size, + wc_mem_map_size); + if (!nic_data->wc_membase) { + netif_err(efx, probe, efx->net_dev, + "could not allocate WC mapping of size %x\n", + wc_mem_map_size); + return -ENOMEM; + } + nic_data->pio_write_vi_base = pio_write_vi_base; + nic_data->pio_write_base = + nic_data->wc_membase + + (pio_write_vi_base * EFX_VI_PAGE_SIZE + ER_DZ_TX_PIOBUF - + uc_mem_map_size); + + rc = efx_ef10_link_piobufs(efx); + if (rc) + efx_ef10_free_piobufs(efx); + } + + netif_dbg(efx, probe, efx->net_dev, + "memory BAR at %pa (virtual %p+%x UC, %p+%x WC)\n", + &efx->membase_phys, efx->membase, uc_mem_map_size, + nic_data->wc_membase, wc_mem_map_size); + + return 0; } static int efx_ef10_init_nic(struct efx_nic *efx) @@ -359,6 +654,21 @@ static int efx_ef10_init_nic(struct efx_nic *efx) nic_data->must_realloc_vis = false; } + if (nic_data->must_restore_piobufs && nic_data->n_piobufs) { + rc = efx_ef10_alloc_piobufs(efx, nic_data->n_piobufs); + if (rc == 0) { + rc = efx_ef10_link_piobufs(efx); + if (rc) + efx_ef10_free_piobufs(efx); + } + + /* Log an error on failure, but this is non-fatal */ + if (rc) + netif_err(efx, drv, efx->net_dev, + "failed to restore PIO buffers (%d)\n", rc); + nic_data->must_restore_piobufs = false; + } + efx_ef10_rx_push_indir_table(efx); return 0; } @@ -716,6 +1026,7 @@ static int efx_ef10_mcdi_poll_reboot(struct efx_nic *efx) /* All our allocations have been reset */ nic_data->must_realloc_vis = true; nic_data->must_restore_filters = true; + nic_data->must_restore_piobufs = true; nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; /* The datapath firmware might have been changed */ diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 34d00f5771fe..31d01284e333 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -30,6 +30,7 @@ efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc); extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); +extern unsigned int efx_piobuf_size; /* RX */ extern void efx_rx_config_page_split(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/io.h b/drivers/net/ethernet/sfc/io.h index 96ce507d8602..4d3f119b67b3 100644 --- a/drivers/net/ethernet/sfc/io.h +++ b/drivers/net/ethernet/sfc/io.h @@ -66,6 +66,11 @@ #define EFX_USE_QWORD_IO 1 #endif +/* PIO is a win only if write-combining is possible */ +#ifdef ARCH_HAS_IOREMAP_WC +#define EFX_USE_PIO 1 +#endif + #ifdef EFX_USE_QWORD_IO static inline void _efx_writeq(struct efx_nic *efx, __le64 value, unsigned int reg) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index b172ed133055..6febecc46e86 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -182,6 +182,9 @@ struct efx_tx_buffer { * @tsoh_page: Array of pages of TSO header buffers * @txd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. + * @piobuf: PIO buffer region for this TX queue (shared with its partner). + * Size of the region is efx_piobuf_size. + * @piobuf_offset: Buffer offset to be specified in PIO descriptors * @initialised: Has hardware queue been initialised? * @read_count: Current read pointer. * This is the number of buffers that have been removed from both rings. @@ -223,6 +226,8 @@ struct efx_tx_queue { struct efx_buffer *tsoh_page; struct efx_special_buffer txd; unsigned int ptr_mask; + void __iomem *piobuf; + unsigned int piobuf_offset; bool initialised; /* Members used mainly on the completion path */ diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index fda29d39032f..db70005999c3 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -389,6 +389,12 @@ enum { EF10_STAT_COUNT }; +/* Maximum number of TX PIO buffers we may allocate to a function. + * This matches the total number of buffers on each SFC9100-family + * controller. + */ +#define EF10_TX_PIOBUF_COUNT 16 + /** * struct efx_ef10_nic_data - EF10 architecture NIC state * @mcdi_buf: DMA buffer for MCDI @@ -397,6 +403,13 @@ enum { * @n_allocated_vis: Number of VIs allocated to this function * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot * @must_restore_filters: Flag: filters have yet to be restored after MC reboot + * @n_piobufs: Number of PIO buffers allocated to this function + * @wc_membase: Base address of write-combining mapping of the memory BAR + * @pio_write_base: Base address for writing PIO buffers + * @pio_write_vi_base: Relative VI number for @pio_write_base + * @piobuf_handle: Handle of each PIO buffer allocated + * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC + * reboot * @rx_rss_context: Firmware handle for our RSS context * @stats: Hardware statistics * @workaround_35388: Flag: firmware supports workaround for bug 35388 @@ -412,6 +425,11 @@ struct efx_ef10_nic_data { unsigned int n_allocated_vis; bool must_realloc_vis; bool must_restore_filters; + unsigned int n_piobufs; + void __iomem *wc_membase, *pio_write_base; + unsigned int pio_write_vi_base; + unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT]; + bool must_restore_piobufs; u32 rx_rss_context; u64 stats[EF10_STAT_COUNT]; bool workaround_35388; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index a23ba0dbdd1d..87543476382d 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -17,12 +17,22 @@ #include #include #include +#include #include "net_driver.h" #include "efx.h" +#include "io.h" #include "nic.h" #include "workarounds.h" #include "ef10_regs.h" +#ifdef EFX_USE_PIO + +#define EFX_PIOBUF_SIZE_MAX ER_DZ_TX_PIOBUF_SIZE +#define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES) +unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF; + +#endif /* EFX_USE_PIO */ + static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, From 306a27825c54e840bc33db924878fe6ef2a2be4f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 28 Jun 2013 21:47:15 +0100 Subject: [PATCH 07/10] sfc: Separate out queue-empty check from efx_nic_may_push_tx_desc() Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/nic.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index db70005999c3..609f06769245 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -71,6 +71,26 @@ efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) return ((efx_qword_t *) (tx_queue->txd.buf.addr)) + index; } +/* Report whether the NIC considers this TX queue empty, given the + * write_count used for the last doorbell push. May return false + * negative. + */ +static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, + unsigned int write_count) +{ + unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); + + if (empty_read_count == 0) + return false; + + return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0; +} + +static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue) +{ + return __efx_nic_tx_is_empty(tx_queue, tx_queue->write_count); +} + /* Decide whether to push a TX descriptor to the NIC vs merely writing * the doorbell. This can reduce latency when we are adding a single * descriptor to an empty queue, but is otherwise pointless. Further, @@ -80,14 +100,10 @@ efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue, unsigned int write_count) { - unsigned empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); - - if (empty_read_count == 0) - return false; + bool was_empty = __efx_nic_tx_is_empty(tx_queue, write_count); tx_queue->empty_read_count = 0; - return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0 - && tx_queue->write_count - write_count == 1; + return was_empty && tx_queue->write_count - write_count == 1; } /* Returns a pointer to the specified descriptor in the RX descriptor queue */ From 0fe5565b64667677a405cf14df49a7af8de4bff4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 28 Jun 2013 21:47:15 +0100 Subject: [PATCH 08/10] sfc: Introduce inline functions to simplify TX insertion Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/tx.c | 53 ++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 87543476382d..6cecda3b3cbe 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -33,6 +33,31 @@ unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF; #endif /* EFX_USE_PIO */ +static inline unsigned int +efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue) +{ + return tx_queue->insert_count & tx_queue->ptr_mask; +} + +static inline struct efx_tx_buffer * +__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) +{ + return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)]; +} + +static inline struct efx_tx_buffer * +efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer = + __efx_tx_queue_get_insert_buffer(tx_queue); + + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->flags); + EFX_BUG_ON_PARANOID(buffer->unmap_len); + + return buffer; +} + static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, @@ -180,7 +205,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct device *dma_dev = &efx->pci_dev->dev; struct efx_tx_buffer *buffer; skb_frag_t *fragment; - unsigned int len, unmap_len = 0, insert_ptr; + unsigned int len, unmap_len = 0; dma_addr_t dma_addr, unmap_addr = 0; unsigned int dma_len; unsigned short dma_flags; @@ -221,11 +246,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Add to TX queue, splitting across DMA boundaries */ do { - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; - buffer = &tx_queue->buffer[insert_ptr]; - EFX_BUG_ON_PARANOID(buffer->flags); - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->unmap_len); + buffer = efx_tx_queue_get_insert_buffer(tx_queue); dma_len = efx_max_tx_len(efx, dma_addr); if (likely(dma_len >= len)) @@ -283,8 +304,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) while (tx_queue->insert_count != tx_queue->write_count) { unsigned int pkts_compl = 0, bytes_compl = 0; --tx_queue->insert_count; - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; - buffer = &tx_queue->buffer[insert_ptr]; + buffer = __efx_tx_queue_get_insert_buffer(tx_queue); efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); } @@ -755,23 +775,18 @@ static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, { struct efx_tx_buffer *buffer; struct efx_nic *efx = tx_queue->efx; - unsigned dma_len, insert_ptr; + unsigned dma_len; EFX_BUG_ON_PARANOID(len <= 0); while (1) { - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; - buffer = &tx_queue->buffer[insert_ptr]; + buffer = efx_tx_queue_get_insert_buffer(tx_queue); ++tx_queue->insert_count; EFX_BUG_ON_PARANOID(tx_queue->insert_count - tx_queue->read_count >= efx->txq_entries); - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->flags); - buffer->dma_addr = dma_addr; dma_len = efx_max_tx_len(efx, dma_addr); @@ -832,8 +847,7 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { --tx_queue->insert_count; - buffer = &tx_queue->buffer[tx_queue->insert_count & - tx_queue->ptr_mask]; + buffer = __efx_tx_queue_get_insert_buffer(tx_queue); efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); } } @@ -978,7 +992,7 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, struct tso_state *st) { struct efx_tx_buffer *buffer = - &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; + efx_tx_queue_get_insert_buffer(tx_queue); bool is_last = st->out_len <= skb_shinfo(skb)->gso_size; u8 tcp_flags_clear; @@ -1048,8 +1062,7 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, /* We mapped the headers in tso_start(). Unmap them * when the last segment is completed. */ - buffer = &tx_queue->buffer[tx_queue->insert_count & - tx_queue->ptr_mask]; + buffer = efx_tx_queue_get_insert_buffer(tx_queue); buffer->dma_addr = st->header_dma_addr; buffer->len = st->header_len; if (is_last) { From ee45fd92c739db5b7950163d91dfe5f016af6d24 Mon Sep 17 00:00:00 2001 From: Jon Cooper Date: Mon, 2 Sep 2013 18:24:29 +0100 Subject: [PATCH 09/10] sfc: Use TX PIO for sufficiently small packets Sufficiently small linear packets can be copied into the PIO buffer with a single call to memcpy_toio(). Non-linear packets require an intermediate cache-line-sized buffer. [bwh: I wrote the first version of this, but Jon did the hard work to handle non-linear packets.] Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/ef10_regs.h | 1 + drivers/net/ethernet/sfc/ethtool.c | 1 + drivers/net/ethernet/sfc/net_driver.h | 2 + drivers/net/ethernet/sfc/tx.c | 151 ++++++++++++++++++++++++++ 4 files changed, 155 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10_regs.h b/drivers/net/ethernet/sfc/ef10_regs.h index b3f4e3755fd9..207ac9a1e3de 100644 --- a/drivers/net/ethernet/sfc/ef10_regs.h +++ b/drivers/net/ethernet/sfc/ef10_regs.h @@ -315,6 +315,7 @@ #define ESF_DZ_TX_PIO_TYPE_WIDTH 1 #define ESF_DZ_TX_PIO_OPT_LBN 60 #define ESF_DZ_TX_PIO_OPT_WIDTH 3 +#define ESE_DZ_TX_OPTION_DESC_PIO 1 #define ESF_DZ_TX_PIO_CONT_LBN 59 #define ESF_DZ_TX_PIO_CONT_WIDTH 1 #define ESF_DZ_TX_PIO_BYTE_CNT_LBN 32 diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index c8dc407513b0..1f529fa2edb1 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -70,6 +70,7 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers), EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets), EFX_ETHTOOL_UINT_TXQ_STAT(pushes), + EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets), EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 6febecc46e86..aac22a1e85b8 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -212,6 +212,7 @@ struct efx_tx_buffer { * blocks * @tso_packets: Number of packets via the TSO xmit path * @pushes: Number of times the TX push feature has been used + * @pio_packets: Number of times the TX PIO feature has been used * @empty_read_count: If the completion path has seen the queue as empty * and the transmission path has not yet checked this, the value of * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. @@ -243,6 +244,7 @@ struct efx_tx_queue { unsigned int tso_long_headers; unsigned int tso_packets; unsigned int pushes; + unsigned int pio_packets; /* Members shared between paths and sometimes updated */ unsigned int empty_read_count ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 6cecda3b3cbe..282692c48e6b 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -183,6 +183,145 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) } } +#ifdef EFX_USE_PIO + +struct efx_short_copy_buffer { + int used; + u8 buf[L1_CACHE_BYTES]; +}; + +/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned. + * Advances piobuf pointer. Leaves additional data in the copy buffer. + */ +static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf, + u8 *data, int len, + struct efx_short_copy_buffer *copy_buf) +{ + int block_len = len & ~(sizeof(copy_buf->buf) - 1); + + memcpy_toio(*piobuf, data, block_len); + *piobuf += block_len; + len -= block_len; + + if (len) { + data += block_len; + BUG_ON(copy_buf->used); + BUG_ON(len > sizeof(copy_buf->buf)); + memcpy(copy_buf->buf, data, len); + copy_buf->used = len; + } +} + +/* Copy to PIO, respecting dword alignment, popping data from copy buffer first. + * Advances piobuf pointer. Leaves additional data in the copy buffer. + */ +static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf, + u8 *data, int len, + struct efx_short_copy_buffer *copy_buf) +{ + if (copy_buf->used) { + /* if the copy buffer is partially full, fill it up and write */ + int copy_to_buf = + min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len); + + memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf); + copy_buf->used += copy_to_buf; + + /* if we didn't fill it up then we're done for now */ + if (copy_buf->used < sizeof(copy_buf->buf)) + return; + + memcpy_toio(*piobuf, copy_buf->buf, sizeof(copy_buf->buf)); + *piobuf += sizeof(copy_buf->buf); + data += copy_to_buf; + len -= copy_to_buf; + copy_buf->used = 0; + } + + efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf); +} + +static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf, + struct efx_short_copy_buffer *copy_buf) +{ + /* if there's anything in it, write the whole buffer, including junk */ + if (copy_buf->used) + memcpy_toio(piobuf, copy_buf->buf, sizeof(copy_buf->buf)); +} + +/* Traverse skb structure and copy fragments in to PIO buffer. + * Advances piobuf pointer. + */ +static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb, + u8 __iomem **piobuf, + struct efx_short_copy_buffer *copy_buf) +{ + int i; + + efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb), + copy_buf); + + for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + u8 *vaddr; + + vaddr = kmap_atomic(skb_frag_page(f)); + + efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset, + skb_frag_size(f), copy_buf); + kunmap_atomic(vaddr); + } + + EFX_BUG_ON_PARANOID(skb_shinfo(skb)->frag_list); +} + +static struct efx_tx_buffer * +efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb) +{ + struct efx_tx_buffer *buffer = + efx_tx_queue_get_insert_buffer(tx_queue); + u8 __iomem *piobuf = tx_queue->piobuf; + + /* Copy to PIO buffer. Ensure the writes are padded to the end + * of a cache line, as this is required for write-combining to be + * effective on at least x86. + */ + + if (skb_shinfo(skb)->nr_frags) { + /* The size of the copy buffer will ensure all writes + * are the size of a cache line. + */ + struct efx_short_copy_buffer copy_buf; + + copy_buf.used = 0; + + efx_skb_copy_bits_to_pio(tx_queue->efx, skb, + &piobuf, ©_buf); + efx_flush_copy_buffer(tx_queue->efx, piobuf, ©_buf); + } else { + /* Pad the write to the size of a cache line. + * We can do this because we know the skb_shared_info sruct is + * after the source, and the destination buffer is big enough. + */ + BUILD_BUG_ON(L1_CACHE_BYTES > + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + memcpy_toio(tx_queue->piobuf, skb->data, + ALIGN(skb->len, L1_CACHE_BYTES)); + } + + EFX_POPULATE_QWORD_5(buffer->option, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO, + ESF_DZ_TX_PIO_CONT, 0, + ESF_DZ_TX_PIO_BYTE_CNT, skb->len, + ESF_DZ_TX_PIO_BUF_ADDR, + tx_queue->piobuf_offset); + ++tx_queue->pio_packets; + ++tx_queue->insert_count; + return buffer; +} +#endif /* EFX_USE_PIO */ + /* * Add a socket buffer to a TX queue * @@ -227,6 +366,17 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) return NETDEV_TX_OK; } + /* Consider using PIO for short packets */ +#ifdef EFX_USE_PIO + if (skb->len <= efx_piobuf_size && tx_queue->piobuf && + efx_nic_tx_is_empty(tx_queue) && + efx_nic_tx_is_empty(efx_tx_queue_partner(tx_queue))) { + buffer = efx_enqueue_skb_pio(tx_queue, skb); + dma_flags = EFX_TX_BUF_OPTION; + goto finish_packet; + } +#endif + /* Map for DMA. Use dma_map_single rather than dma_map_page * since this is more efficient on machines with sparse * memory. @@ -279,6 +429,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) } /* Transfer ownership of the skb to the final buffer */ +finish_packet: buffer->skb = skb; buffer->flags = EFX_TX_BUF_SKB | dma_flags; From c47b2d9d56832e7ff1a20bd598623de42701a3a3 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Sep 2013 17:22:23 +0100 Subject: [PATCH 10/10] sfc: Support ARFS for IPv6 flows Extend efx_filter_rfs() to map TCP/IPv6 and UDP/IPv6 flows into efx_filter_spec. These are only supported on EF10; on Falcon and Siena they will be rejected by efx_farch_filter_from_gen_spec(). Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/rx.c | 90 ++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 4a596725023f..8f09e686fc23 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -818,44 +819,70 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, struct efx_nic *efx = netdev_priv(net_dev); struct efx_channel *channel; struct efx_filter_spec spec; - const struct iphdr *ip; const __be16 *ports; + __be16 ether_type; int nhoff; int rc; - nhoff = skb_network_offset(skb); + /* The core RPS/RFS code has already parsed and validated + * VLAN, IP and transport headers. We assume they are in the + * header area. + */ if (skb->protocol == htons(ETH_P_8021Q)) { - EFX_BUG_ON_PARANOID(skb_headlen(skb) < - nhoff + sizeof(struct vlan_hdr)); - if (((const struct vlan_hdr *)skb->data + nhoff)-> - h_vlan_encapsulated_proto != htons(ETH_P_IP)) - return -EPROTONOSUPPORT; + const struct vlan_hdr *vh = + (const struct vlan_hdr *)skb->data; - /* This is IP over 802.1q VLAN. We can't filter on the - * IP 5-tuple and the vlan together, so just strip the - * vlan header and filter on the IP part. + /* We can't filter on the IP 5-tuple and the vlan + * together, so just strip the vlan header and filter + * on the IP part. */ - nhoff += sizeof(struct vlan_hdr); - } else if (skb->protocol != htons(ETH_P_IP)) { - return -EPROTONOSUPPORT; + EFX_BUG_ON_PARANOID(skb_headlen(skb) < sizeof(*vh)); + ether_type = vh->h_vlan_encapsulated_proto; + nhoff = sizeof(struct vlan_hdr); + } else { + ether_type = skb->protocol; + nhoff = 0; } - /* RFS must validate the IP header length before calling us */ - EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + sizeof(*ip)); - ip = (const struct iphdr *)(skb->data + nhoff); - if (ip_is_fragment(ip)) + if (ether_type != htons(ETH_P_IP) && ether_type != htons(ETH_P_IPV6)) return -EPROTONOSUPPORT; - EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + 4 * ip->ihl + 4); - ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT, efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, rxq_index); - rc = efx_filter_set_ipv4_full(&spec, ip->protocol, - ip->daddr, ports[1], ip->saddr, ports[0]); - if (rc) - return rc; + spec.match_flags = + EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO | + EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT | + EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT; + spec.ether_type = ether_type; + + if (ether_type == htons(ETH_P_IP)) { + const struct iphdr *ip = + (const struct iphdr *)(skb->data + nhoff); + + EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + sizeof(*ip)); + if (ip_is_fragment(ip)) + return -EPROTONOSUPPORT; + spec.ip_proto = ip->protocol; + spec.rem_host[0] = ip->saddr; + spec.loc_host[0] = ip->daddr; + EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + 4 * ip->ihl + 4); + ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); + } else { + const struct ipv6hdr *ip6 = + (const struct ipv6hdr *)(skb->data + nhoff); + + EFX_BUG_ON_PARANOID(skb_headlen(skb) < + nhoff + sizeof(*ip6) + 4); + spec.ip_proto = ip6->nexthdr; + memcpy(spec.rem_host, &ip6->saddr, sizeof(ip6->saddr)); + memcpy(spec.loc_host, &ip6->daddr, sizeof(ip6->daddr)); + ports = (const __be16 *)(ip6 + 1); + } + + spec.rem_port = ports[0]; + spec.loc_port = ports[1]; rc = efx->type->filter_rfs_insert(efx, &spec); if (rc < 0) @@ -866,11 +893,18 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, channel = efx_get_channel(efx, skb_get_rx_queue(skb)); ++channel->rfs_filters_added; - netif_info(efx, rx_status, efx->net_dev, - "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", - (ip->protocol == IPPROTO_TCP) ? "TCP" : "UDP", - &ip->saddr, ntohs(ports[0]), &ip->daddr, ntohs(ports[1]), - rxq_index, flow_id, rc); + if (ether_type == htons(ETH_P_IP)) + netif_info(efx, rx_status, efx->net_dev, + "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", + (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + spec.rem_host, ntohs(ports[0]), spec.loc_host, + ntohs(ports[1]), rxq_index, flow_id, rc); + else + netif_info(efx, rx_status, efx->net_dev, + "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n", + (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + spec.rem_host, ntohs(ports[0]), spec.loc_host, + ntohs(ports[1]), rxq_index, flow_id, rc); return rc; }