From 6b8f092284672c6504ed215052bfee6b7171411e Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 6 Mar 2012 09:41:53 +0000 Subject: [PATCH 01/12] igb: Support sending custom Ethernet FCS. Including bad FCS, used generate frames with bad FCS to test other system's handling of RX of bad packets. Signed-off-by: Ben Greear Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e96cef89f121..99364edf0314 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1964,6 +1964,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, NETIF_F_IPV6_CSUM | NETIF_F_SG; + netdev->priv_flags |= IFF_SUPP_NOFCS; + if (pci_using_dac) { netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= NETIF_F_HIGHDMA; @@ -4293,6 +4295,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, /* write last descriptor with RS and EOP bits */ cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD); + if (unlikely(skb->no_fcs)) + cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type; /* set the timestamp */ From 89eaefb61dc9170237d95b844dd357338fc7225d Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 6 Mar 2012 09:41:58 +0000 Subject: [PATCH 02/12] igb: Support RX-ALL feature flag. This allows the NIC to receive all frames available, including those with bad FCS, un-matched vlans, ethernet control frames, and more. Tested by sending frames with bad FCS. Signed-off-by: Ben Greear Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/igb/e1000_defines.h | 2 ++ drivers/net/ethernet/intel/igb/igb_main.c | 33 +++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index aed217449f0d..89eb1f85b9fa 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -134,6 +134,8 @@ #define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ #define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ #define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_DPF 0x00400000 /* Discard Pause Frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ #define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ /* diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 99364edf0314..c4902411d749 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1769,10 +1769,21 @@ static int igb_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t changed = netdev->features ^ features; + struct igb_adapter *adapter = netdev_priv(netdev); if (changed & NETIF_F_HW_VLAN_RX) igb_vlan_mode(netdev, features); + if (!(changed & NETIF_F_RXALL)) + return 0; + + netdev->features = features; + + if (netif_running(netdev)) + igb_reinit_locked(adapter); + else + igb_reset(adapter); + return 0; } @@ -1954,6 +1965,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; + netdev->hw_features |= NETIF_F_RXALL; /* set this bit last since it cannot be part of hw_features */ netdev->features |= NETIF_F_HW_VLAN_FILTER; @@ -3005,6 +3017,22 @@ void igb_setup_rctl(struct igb_adapter *adapter) wr32(E1000_QDE, ALL_QUEUES); } + /* This is useful for sniffing bad packets. */ + if (adapter->netdev->features & NETIF_F_RXALL) { + /* UPE and MPE will be handled by normal PROMISC logic + * in e1000e_set_rx_mode */ + rctl |= (E1000_RCTL_SBP | /* Receive bad packets */ + E1000_RCTL_BAM | /* RX All Bcast Pkts */ + E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ + + rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ + E1000_RCTL_DPF | /* Allow filtered pause */ + E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ + /* Do not mess with E1000_CTRL_VME, it affects transmit as well, + * and that breaks VLANs. + */ + } + wr32(E1000_RCTL, rctl); } @@ -6102,8 +6130,9 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) goto next_desc; } - if (igb_test_staterr(rx_desc, - E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + if (unlikely((igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)) + && !(rx_ring->netdev->features & NETIF_F_RXALL))) { dev_kfree_skb_any(skb); goto next_desc; } From f43f313eb7c5f189ff4e27da6cc96ad2e613e333 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 6 Mar 2012 09:42:04 +0000 Subject: [PATCH 03/12] ixgbe: Support sending custom Ethernet FCS. Including bad FCS, used generate frames with bad FCS to test other system's handling of RX of bad packets. Signed-off-by: Ben Greear Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 167e898fbba6..986897bf2b83 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6768,6 +6768,8 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, tx_buffer_info->dma = dma; tx_desc->read.buffer_addr = cpu_to_le64(dma + offset); + if (unlikely(skb->no_fcs)) + cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); tx_desc->read.olinfo_status = olinfo_status; @@ -7778,6 +7780,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, netdev->vlan_features |= NETIF_F_SG; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->priv_flags |= IFF_SUPP_NOFCS; if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) adapter->flags &= ~(IXGBE_FLAG_RSS_ENABLED | From 3f2d1c0f57c6bfad7aacc249fca3514e98a94137 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 8 Mar 2012 08:28:41 +0000 Subject: [PATCH 04/12] ixgbe: Support RX-ALL feature flag. This allows the NIC to receive all frames available, including those with bad FCS, ethernet control frames, and more. Tested by sending frames with bad FCS. Signed-off-by: Ben Greear Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 986897bf2b83..cae763c57dc7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1482,8 +1482,8 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct sk_buff *skb; unsigned int total_rx_bytes = 0, total_rx_packets = 0; const int current_node = numa_node_id(); -#ifdef IXGBE_FCOE struct ixgbe_adapter *adapter = q_vector->adapter; +#ifdef IXGBE_FCOE int ddp_bytes = 0; #endif /* IXGBE_FCOE */ u16 i; @@ -1612,7 +1612,8 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* ERR_MASK will only have valid bits if EOP set */ if (unlikely(ixgbe_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { + IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && + !(adapter->netdev->features & NETIF_F_RXALL))) { dev_kfree_skb_any(skb); goto next_desc; } @@ -3342,6 +3343,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); /* set all bits that we expect to always be set */ + fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */ fctrl |= IXGBE_FCTRL_BAM; fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ fctrl |= IXGBE_FCTRL_PMCF; @@ -3390,6 +3392,18 @@ void ixgbe_set_rx_mode(struct net_device *netdev) IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr); } + /* This is useful for sniffing bad packets. */ + if (adapter->netdev->features & NETIF_F_RXALL) { + /* UPE and MPE will be handled by normal PROMISC logic + * in e1000e_set_rx_mode */ + fctrl |= (IXGBE_FCTRL_SBP | /* Receive bad packets */ + IXGBE_FCTRL_BAM | /* RX All Bcast Pkts */ + IXGBE_FCTRL_PMCF); /* RX All MAC Ctrl Pkts */ + + fctrl &= ~(IXGBE_FCTRL_DPF); + /* NOTE: VLAN filtering is disabled by setting PROMISC */ + } + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); if (netdev->features & NETIF_F_HW_VLAN_RX) @@ -7459,6 +7473,7 @@ static int ixgbe_set_features(struct net_device *netdev, netdev_features_t data) { struct ixgbe_adapter *adapter = netdev_priv(netdev); + netdev_features_t changed = netdev->features ^ data; bool need_reset = false; /* Make sure RSC matches LRO, reset if change */ @@ -7495,6 +7510,10 @@ static int ixgbe_set_features(struct net_device *netdev, need_reset = true; } + if (changed & NETIF_F_RXALL) + need_reset = true; + + netdev->features = data; if (need_reset) ixgbe_do_reset(netdev); @@ -7773,6 +7792,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, break; } + netdev->hw_features |= NETIF_F_RXALL; + netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_IP_CSUM; From f800326dca7bc158f4c886aa92f222de37993c80 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 3 Mar 2012 02:35:52 +0000 Subject: [PATCH 05/12] ixgbe: Replace standard receive path with a page based receive This patch replaces the existing Rx hot-path in the ixgbe driver with a new implementation that is based on performing a double buffered receive. The ixgbe driver already had something similar in place for its' packet split path, however in that case we were still receiving the header for the packet into the sk_buff. The big change here is the entire receive path will receive into pages only, and then pull the header out of the page and copy it into the sk_buff data. There are several motivations behind this approach. First, this allows us to avoid several cache misses as we were taking a set of cache misses for allocating the sk_buff and then another set for receiving data into the sk_buff. We are able to avoid these misses on receive now as we allocate the sk_buff when data is available. Second we are able to see a considerable performance gain when an IOMMU is enabled because we are no longer unmapping every buffer on receive. Instead we can delay the unmap until we are unable to use the page, and instead we can simply call sync_single_range on the half of the page that contains new data. Finally we are able to drop a considerable amount of code from the driver as we no longer have to support 2 different receive modes, packet split and one buffer. This allows us to optimize the Rx path further since less branching is required. Signed-off-by: Alexander Duyck Tested-by: Ross Brattain Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 45 +- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 23 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 817 +++++++++--------- 3 files changed, 456 insertions(+), 429 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index f069c1b10753..b1e3baf8d450 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -72,12 +72,6 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_512 512 /* Used for packet split */ -#define IXGBE_RXBUFFER_2K 2048 -#define IXGBE_RXBUFFER_3K 3072 -#define IXGBE_RXBUFFER_4K 4096 -#define IXGBE_RXBUFFER_7K 7168 -#define IXGBE_RXBUFFER_8K 8192 -#define IXGBE_RXBUFFER_15K 15360 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* @@ -168,7 +162,6 @@ struct ixgbe_rx_buffer { struct sk_buff *skb; dma_addr_t dma; struct page *page; - dma_addr_t page_dma; unsigned int page_offset; }; @@ -193,21 +186,15 @@ struct ixgbe_rx_queue_stats { u64 csum_err; }; -enum ixbge_ring_state_t { +enum ixgbe_ring_state_t { __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_DETECT_HANG, __IXGBE_HANG_CHECK_ARMED, - __IXGBE_RX_PS_ENABLED, __IXGBE_RX_RSC_ENABLED, __IXGBE_RX_CSUM_UDP_ZERO_ERR, + __IXGBE_RX_FCOE_BUFSZ, }; -#define ring_is_ps_enabled(ring) \ - test_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) -#define set_ring_ps_enabled(ring) \ - set_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) -#define clear_ring_ps_enabled(ring) \ - clear_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) #define check_for_tx_hang(ring) \ test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) #define set_check_for_tx_hang(ring) \ @@ -233,7 +220,6 @@ struct ixgbe_ring { u8 __iomem *tail; u16 count; /* amount of descriptors */ - u16 rx_buf_len; u8 queue_index; /* needed for multiqueue queue management */ u8 reg_idx; /* holds the special value that gets @@ -241,8 +227,13 @@ struct ixgbe_ring { * associated with this ring, which is * different for DCB and RSS modes */ - u8 atr_sample_rate; - u8 atr_count; + union { + struct { + u8 atr_sample_rate; + u8 atr_count; + }; + u16 next_to_alloc; + }; u16 next_to_use; u16 next_to_clean; @@ -287,6 +278,22 @@ struct ixgbe_ring_feature { int mask; } ____cacheline_internodealigned_in_smp; +/* + * FCoE requires that all Rx buffers be over 2200 bytes in length. Since + * this is twice the size of a half page we need to double the page order + * for FCoE enabled Rx queues. + */ +#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ + return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0; +} +#else +#define ixgbe_rx_pg_order(_ring) 0 +#endif +#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) +#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) + struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -554,7 +561,7 @@ struct ixgbe_cb { }; dma_addr_t dma; u16 append_cnt; - bool delay_unmap; + bool page_released; }; #define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 24f7291dbe57..b09e67cc9d6e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "ixgbe.h" @@ -1615,7 +1616,6 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter) rx_ring->dev = &adapter->pdev->dev; rx_ring->netdev = adapter->netdev; rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx; - rx_ring->rx_buf_len = IXGBE_RXBUFFER_2K; err = ixgbe_setup_rx_resources(rx_ring); if (err) { @@ -1718,13 +1718,15 @@ static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer, frame_size >>= 1; - data = rx_buffer->skb->data; + data = kmap(rx_buffer->page) + rx_buffer->page_offset; if (data[3] != 0xFF || data[frame_size + 10] != 0xBE || data[frame_size + 12] != 0xAF) match = false; + kunmap(rx_buffer->page); + return match; } @@ -1746,17 +1748,22 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, /* check Rx buffer */ rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; - /* unmap Rx buffer, will be remapped by alloc_rx_buffers */ - dma_unmap_single(rx_ring->dev, - rx_buffer->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - rx_buffer->dma = 0; + /* sync Rx buffer for CPU read */ + dma_sync_single_for_cpu(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); /* verify contents of skb */ if (ixgbe_check_lbtest_frame(rx_buffer, size)) count++; + /* sync Rx buffer for device write */ + dma_sync_single_for_device(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + /* unmap buffer on Tx side */ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index cae763c57dc7..e97ef4591ade 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -469,17 +469,7 @@ rx_ring_summary: print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, phys_to_virt(rx_buffer_info->dma), - rx_ring->rx_buf_len, true); - - if (rx_ring->rx_buf_len - < IXGBE_RXBUFFER_2K) - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt( - rx_buffer_info->page_dma + - rx_buffer_info->page_offset - ), - PAGE_SIZE/2, true); + ixgbe_rx_bufsz(rx_ring), true); } } @@ -1006,6 +996,7 @@ static inline void ixgbe_rx_hash(struct ixgbe_ring *ring, skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); } +#ifdef IXGBE_FCOE /** * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type * @adapter: address of board private structure @@ -1024,6 +1015,7 @@ static inline bool ixgbe_rx_is_fcoe(struct ixgbe_adapter *adapter, IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT))); } +#endif /* IXGBE_FCOE */ /** * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum * @ring: structure containing ring specific data @@ -1051,7 +1043,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, return; if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { - u16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; /* * 82599 errata, UDP frames with a 0 checksum can be marked as @@ -1072,6 +1064,9 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) { rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; /* * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -1082,67 +1077,46 @@ static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) writel(val, rx_ring->tail); } -static bool ixgbe_alloc_mapped_skb(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) -{ - struct sk_buff *skb = bi->skb; - dma_addr_t dma = bi->dma; - - if (dma) - return true; - - if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); - bi->skb = skb; - if (!skb) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return false; - } - } - - dma = dma_map_single(rx_ring->dev, skb->data, - rx_ring->rx_buf_len, DMA_FROM_DEVICE); - - if (dma_mapping_error(rx_ring->dev, dma)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return false; - } - - bi->dma = dma; - return true; -} - static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t page_dma = bi->page_dma; - unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); + dma_addr_t dma = bi->dma; - if (page_dma) + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(dma)) return true; - if (!page) { - page = alloc_page(GFP_ATOMIC | __GFP_COLD); - bi->page = page; + /* alloc new page for storage */ + if (likely(!page)) { + page = alloc_pages(GFP_ATOMIC | __GFP_COLD, + ixgbe_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } + bi->page = page; } - page_dma = dma_map_page(rx_ring->dev, page, - page_offset, PAGE_SIZE / 2, - DMA_FROM_DEVICE); + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + + /* + * if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + put_page(page); + bi->page = NULL; - if (dma_mapping_error(rx_ring->dev, page_dma)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } - bi->page_dma = page_dma; - bi->page_offset = page_offset; + bi->dma = dma; + bi->page_offset ^= ixgbe_rx_bufsz(rx_ring); + return true; } @@ -1157,30 +1131,23 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) struct ixgbe_rx_buffer *bi; u16 i = rx_ring->next_to_use; - /* nothing to do or no valid netdev defined */ - if (!cleaned_count || !rx_ring->netdev) + /* nothing to do */ + if (!cleaned_count) return; rx_desc = IXGBE_RX_DESC(rx_ring, i); bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; - while (cleaned_count--) { - if (!ixgbe_alloc_mapped_skb(rx_ring, bi)) + do { + if (!ixgbe_alloc_mapped_page(rx_ring, bi)) break; - /* Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. */ - if (ring_is_ps_enabled(rx_ring)) { - rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); - - if (!ixgbe_alloc_mapped_page(rx_ring, bi)) - break; - - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); - } else { - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); - } + /* + * Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; @@ -1193,7 +1160,9 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) /* clear the hdr_addr for the next_to_use descriptor */ rx_desc->read.hdr_addr = 0; - } + + cleaned_count--; + } while (cleaned_count); i += rx_ring->count; @@ -1201,90 +1170,6 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) ixgbe_release_rx_desc(rx_ring, i); } -static inline u16 ixgbe_get_hlen(union ixgbe_adv_rx_desc *rx_desc) -{ - /* HW will not DMA in data larger than the given buffer, even if it - * parses the (NFS, of course) header to be larger. In that case, it - * fills the header buffer and spills the rest into the page. - */ - u16 hdr_info = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info); - u16 hlen = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >> - IXGBE_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IXGBE_RX_HDR_SIZE) - hlen = IXGBE_RX_HDR_SIZE; - return hlen; -} - -/** - * ixgbe_merge_active_tail - merge active tail into lro skb - * @tail: pointer to active tail in frag_list - * - * This function merges the length and data of an active tail into the - * skb containing the frag_list. It resets the tail's pointer to the head, - * but it leaves the heads pointer to tail intact. - **/ -static inline struct sk_buff *ixgbe_merge_active_tail(struct sk_buff *tail) -{ - struct sk_buff *head = IXGBE_CB(tail)->head; - - if (!head) - return tail; - - head->len += tail->len; - head->data_len += tail->len; - head->truesize += tail->len; - - IXGBE_CB(tail)->head = NULL; - - return head; -} - -/** - * ixgbe_add_active_tail - adds an active tail into the skb frag_list - * @head: pointer to the start of the skb - * @tail: pointer to active tail to add to frag_list - * - * This function adds an active tail to the end of the frag list. This tail - * will still be receiving data so we cannot yet ad it's stats to the main - * skb. That is done via ixgbe_merge_active_tail. - **/ -static inline void ixgbe_add_active_tail(struct sk_buff *head, - struct sk_buff *tail) -{ - struct sk_buff *old_tail = IXGBE_CB(head)->tail; - - if (old_tail) { - ixgbe_merge_active_tail(old_tail); - old_tail->next = tail; - } else { - skb_shinfo(head)->frag_list = tail; - } - - IXGBE_CB(tail)->head = head; - IXGBE_CB(head)->tail = tail; -} - -/** - * ixgbe_close_active_frag_list - cleanup pointers on a frag_list skb - * @head: pointer to head of an active frag list - * - * This function will clear the frag_tail_tracker pointer on an active - * frag_list and returns true if the pointer was actually set - **/ -static inline bool ixgbe_close_active_frag_list(struct sk_buff *head) -{ - struct sk_buff *tail = IXGBE_CB(head)->tail; - - if (!tail) - return false; - - ixgbe_merge_active_tail(tail); - - IXGBE_CB(head)->tail = NULL; - - return true; -} - /** * ixgbe_get_headlen - determine size of header for RSC/LRO/GRO/FCOE * @data: pointer to the start of the headers @@ -1346,7 +1231,7 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, /* record next protocol */ nexthdr = hdr.ipv4->protocol; hdr.network += hlen; -#ifdef CONFIG_FCOE +#ifdef IXGBE_FCOE } else if (protocol == __constant_htons(ETH_P_FCOE)) { if ((hdr.network - data) > (max_len - FCOE_HEADER_LEN)) return max_len; @@ -1409,7 +1294,7 @@ static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring, static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, struct sk_buff *skb) { - u16 hdr_len = ixgbe_get_headlen(skb->data, skb_headlen(skb)); + u16 hdr_len = skb_headlen(skb); /* set gso_size to avoid messing up TCP MSS */ skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len), @@ -1473,150 +1358,346 @@ static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, netif_rx(skb); } +/** + * ixgbe_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IXGBE_RX_DESC(rx_ring, ntc)); + + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + + /* append_cnt indicates packet is RSC, if so fetch nextp */ + if (IXGBE_CB(skb)->append_cnt) { + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + } + + /* place skb in next buffer to be received */ + rx_ring->rx_buffer_info[ntc].skb = skb; + rx_ring->rx_stats.non_eop_descs++; + + return true; +} + +/** + * ixgbe_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + struct net_device *netdev = rx_ring->netdev; + unsigned char *va; + unsigned int pull_len; + + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; + + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbe_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && + !(netdev->features & NETIF_F_RXALL))) { + dev_kfree_skb_any(skb); + return true; + } + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = skb_frag_size(frag); + if (pull_len > 256) + pull_len = ixgbe_get_headlen(va, pull_len); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; + + /* + * if we sucked the frag empty then we should free it, + * if there are other frags here something is screwed up in hardware + */ + if (skb_frag_size(frag) == 0) { + BUG_ON(skb_shinfo(skb)->nr_frags != 1); + skb_shinfo(skb)->nr_frags = 0; + __skb_frag_unref(frag); + skb->truesize -= ixgbe_rx_bufsz(rx_ring); + } + + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; +} + +/** + * ixgbe_can_reuse_page - determine if we can reuse a page + * @rx_buffer: pointer to rx_buffer containing the page we want to reuse + * + * Returns true if page can be reused in another Rx buffer + **/ +static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer) +{ + struct page *page = rx_buffer->page; + + /* if we are only owner of page and it is local we can reuse it */ + return likely(page_count(page) == 1) && + likely(page_to_nid(page) == numa_node_id()); +} + +/** + * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Syncronizes page for reuse by the adapter + **/ +static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *old_buff) +{ + struct ixgbe_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + u16 bufsz = ixgbe_rx_bufsz(rx_ring); + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->page = old_buff->page; + new_buff->dma = old_buff->dma; + + /* flip page offset to other buffer and store to new_buff */ + new_buff->page_offset = old_buff->page_offset ^ bufsz; + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, + new_buff->page_offset, bufsz, + DMA_FROM_DEVICE); + + /* bump ref count on page before it is given to the stack */ + get_page(new_buff->page); +} + +/** + * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function is based on skb_add_rx_frag. I would have used that + * function however it doesn't handle the truesize case correctly since we + * are allocating more memory than might be used for a single receive. + **/ +static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + struct sk_buff *skb, int size) +{ + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buffer->page, rx_buffer->page_offset, + size); + skb->len += size; + skb->data_len += size; + skb->truesize += ixgbe_rx_bufsz(rx_ring); +} + +/** + * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf + * @q_vector: structure containing interrupt and ring information + * @rx_ring: rx descriptor ring to transact packets on + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the syste. + * + * Returns true if all work is completed without reaching budget + **/ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, int budget) { - union ixgbe_adv_rx_desc *rx_desc, *next_rxd; - struct ixgbe_rx_buffer *rx_buffer_info; - struct sk_buff *skb; unsigned int total_rx_bytes = 0, total_rx_packets = 0; - const int current_node = numa_node_id(); - struct ixgbe_adapter *adapter = q_vector->adapter; #ifdef IXGBE_FCOE + struct ixgbe_adapter *adapter = q_vector->adapter; int ddp_bytes = 0; #endif /* IXGBE_FCOE */ - u16 i; - u16 cleaned_count = 0; + u16 cleaned_count = ixgbe_desc_unused(rx_ring); - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC(rx_ring, i); + do { + struct ixgbe_rx_buffer *rx_buffer; + union ixgbe_adv_rx_desc *rx_desc; + struct sk_buff *skb; + struct page *page; + u16 ntc; - while (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) { - u32 upper_len = 0; + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { + ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } - rmb(); /* read descriptor and rx_buffer_info after status DD */ + ntc = rx_ring->next_to_clean; + rx_desc = IXGBE_RX_DESC(rx_ring, ntc); + rx_buffer = &rx_ring->rx_buffer_info[ntc]; - rx_buffer_info = &rx_ring->rx_buffer_info[i]; + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) + break; - skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - prefetch(skb->data); + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); - /* linear means we are building an skb from multiple pages */ - if (!skb_is_nonlinear(skb)) { - u16 hlen; - if (ring_is_ps_enabled(rx_ring)) { - hlen = ixgbe_get_hlen(rx_desc); - upper_len = le16_to_cpu(rx_desc->wb.upper.length); - } else { - hlen = le16_to_cpu(rx_desc->wb.upper.length); + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; } - skb_put(skb, hlen); + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); /* * Delay unmapping of the first packet. It carries the * header information, HW may still access the header - * after writeback. Only unmap it when EOP is reached + * after the writeback. Only unmap it when EOP is + * reached */ - if (!IXGBE_CB(skb)->head) { - IXGBE_CB(skb)->delay_unmap = true; - IXGBE_CB(skb)->dma = rx_buffer_info->dma; - } else { - skb = ixgbe_merge_active_tail(skb); - dma_unmap_single(rx_ring->dev, - rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - } - rx_buffer_info->dma = 0; + IXGBE_CB(skb)->dma = rx_buffer->dma; } else { - /* assume packet split since header is unmapped */ - upper_len = le16_to_cpu(rx_desc->wb.upper.length); + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); } - if (upper_len) { - dma_unmap_page(rx_ring->dev, - rx_buffer_info->page_dma, - PAGE_SIZE / 2, + /* pull page into skb */ + ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, + le16_to_cpu(rx_desc->wb.upper.length)); + + if (ixgbe_can_reuse_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer_info->page, - rx_buffer_info->page_offset, - upper_len); - - if ((page_count(rx_buffer_info->page) == 1) && - (page_to_nid(rx_buffer_info->page) == current_node)) - get_page(rx_buffer_info->page); - else - rx_buffer_info->page = NULL; - - skb->len += upper_len; - skb->data_len += upper_len; - skb->truesize += PAGE_SIZE / 2; } + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; + ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); - i++; - if (i == rx_ring->count) - i = 0; - - next_rxd = IXGBE_RX_DESC(rx_ring, i); - prefetch(next_rxd); cleaned_count++; - if ((!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) { - struct ixgbe_rx_buffer *next_buffer; - u32 nextp; + /* place incomplete frames back on ring for completion */ + if (ixgbe_is_non_eop(rx_ring, rx_desc, skb)) + continue; - if (IXGBE_CB(skb)->append_cnt) { - nextp = le32_to_cpu( - rx_desc->wb.upper.status_error); - nextp >>= IXGBE_RXDADV_NEXTP_SHIFT; - } else { - nextp = i; - } - - next_buffer = &rx_ring->rx_buffer_info[nextp]; - - if (ring_is_ps_enabled(rx_ring)) { - rx_buffer_info->skb = next_buffer->skb; - rx_buffer_info->dma = next_buffer->dma; - next_buffer->skb = skb; - next_buffer->dma = 0; - } else { - struct sk_buff *next_skb = next_buffer->skb; - ixgbe_add_active_tail(skb, next_skb); - IXGBE_CB(next_skb)->head = skb; - } - rx_ring->rx_stats.non_eop_descs++; - goto next_desc; - } - - dma_unmap_single(rx_ring->dev, - IXGBE_CB(skb)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_CB(skb)->dma = 0; - IXGBE_CB(skb)->delay_unmap = false; - - if (ixgbe_close_active_frag_list(skb) && - !IXGBE_CB(skb)->append_cnt) { - /* if we got here without RSC the packet is invalid */ - dev_kfree_skb_any(skb); - goto next_desc; - } - - /* ERR_MASK will only have valid bits if EOP set */ - if (unlikely(ixgbe_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && - !(adapter->netdev->features & NETIF_F_RXALL))) { - dev_kfree_skb_any(skb); - goto next_desc; - } + /* verify the packet layout is correct */ + if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb)) + continue; /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1631,32 +1712,16 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb); if (!ddp_bytes) { dev_kfree_skb_any(skb); - goto next_desc; + continue; } } + #endif /* IXGBE_FCOE */ ixgbe_rx_skb(q_vector, skb); + /* update budget accounting */ budget--; -next_desc: - if (!budget) - break; - - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { - ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; - } - - /* use prefetched values */ - rx_desc = next_rxd; - } - - rx_ring->next_to_clean = i; - cleaned_count = ixgbe_desc_unused(rx_ring); - - if (cleaned_count) - ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + } while (likely(budget)); #ifdef IXGBE_FCOE /* include DDPed FCoE data */ @@ -1671,8 +1736,8 @@ next_desc: total_rx_bytes += ddp_bytes; total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); } -#endif /* IXGBE_FCOE */ +#endif /* IXGBE_FCOE */ u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1680,6 +1745,9 @@ next_desc: q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; + if (cleaned_count) + ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + return !!budget; } @@ -2635,18 +2703,12 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl |= (IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) & IXGBE_SRRCTL_BSIZEHDR_MASK; - if (ring_is_ps_enabled(rx_ring)) { -#if (PAGE_SIZE / 2) > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; +#if PAGE_SIZE > IXGBE_MAX_RXBUFFER + srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; #else - srrctl |= (PAGE_SIZE / 2) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; #endif - srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; - } else { - srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; - srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - } + srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(&adapter->hw, IXGBE_SRRCTL(reg_idx), srrctl); } @@ -2729,13 +2791,11 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, { struct ixgbe_hw *hw = &adapter->hw; u32 rscctrl; - int rx_buf_len; u8 reg_idx = ring->reg_idx; if (!ring_is_rsc_enabled(ring)) return; - rx_buf_len = ring->rx_buf_len; rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(reg_idx)); rscctrl |= IXGBE_RSCCTL_RSCEN; /* @@ -2743,24 +2803,13 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ - if (ring_is_ps_enabled(ring)) { -#if (PAGE_SIZE < 8192) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE < 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#elif (PAGE_SIZE < 32768) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; +#if (PAGE_SIZE <= 8192) + rscctrl |= IXGBE_RSCCTL_MAXDESC_16; +#elif (PAGE_SIZE <= 16384) + rscctrl |= IXGBE_RSCCTL_MAXDESC_8; #else - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; + rscctrl |= IXGBE_RSCCTL_MAXDESC_4; #endif - } else { - if (rx_buf_len <= IXGBE_RXBUFFER_4K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rx_buf_len <= IXGBE_RXBUFFER_8K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - } IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } @@ -2977,23 +3026,10 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - int rx_buf_len; struct ixgbe_ring *rx_ring; int i; u32 mhadd, hlreg0; - /* Decide whether to use packet split mode or not */ - /* On by default */ - adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED; - - /* Do not use packet split if we're in SR-IOV Mode */ - if (adapter->num_vfs) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - - /* Disable packet split due to 82599 erratum #45 */ - if (hw->mac.type == ixgbe_mac_82599EB) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - #ifdef IXGBE_FCOE /* adjust max frame to be able to do baby jumbo for FCoE */ if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && @@ -3012,27 +3048,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) /* MHADD will allow an extra 4 bytes past for vlan tagged frames */ max_frame += VLAN_HLEN; - /* Set the RX buffer length according to the mode */ - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) { - rx_buf_len = IXGBE_RX_HDR_SIZE; - } else { - if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; - /* - * Make best use of allocation by using all but 1K of a - * power of 2 allocation that will be used for skb->head. - */ - else if (max_frame <= IXGBE_RXBUFFER_3K) - rx_buf_len = IXGBE_RXBUFFER_3K; - else if (max_frame <= IXGBE_RXBUFFER_7K) - rx_buf_len = IXGBE_RXBUFFER_7K; - else if (max_frame <= IXGBE_RXBUFFER_15K) - rx_buf_len = IXGBE_RXBUFFER_15K; - else - rx_buf_len = IXGBE_MAX_RXBUFFER; - } - hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); /* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */ hlreg0 |= IXGBE_HLREG0_JUMBOEN; @@ -3044,32 +3059,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) */ for (i = 0; i < adapter->num_rx_queues; i++) { rx_ring = adapter->rx_ring[i]; - rx_ring->rx_buf_len = rx_buf_len; - - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) - set_ring_ps_enabled(rx_ring); - else - clear_ring_ps_enabled(rx_ring); - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); else clear_ring_rsc_enabled(rx_ring); - #ifdef IXGBE_FCOE if (netdev->features & NETIF_F_FCOE_MTU) { struct ixgbe_ring_feature *f; f = &adapter->ring_feature[RING_F_FCOE]; - if ((i >= f->mask) && (i < f->mask + f->indices)) { - clear_ring_ps_enabled(rx_ring); - if (rx_buf_len < IXGBE_FCOE_JUMBO_FRAME_SIZE) - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } else if (!ring_is_rsc_enabled(rx_ring) && - !ring_is_ps_enabled(rx_ring)) { - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } + if ((i >= f->mask) && (i < f->mask + f->indices)) + set_bit(__IXGBE_RX_FCOE_BUFSZ, &rx_ring->state); } #endif /* IXGBE_FCOE */ } @@ -3990,6 +3989,27 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) IXGBE_RAH_AV); } +/** + * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers + * @rx_ring: ring to setup + * + * On many IA platforms the L1 cache has a critical stride of 4K, this + * results in each receive buffer starting in the same cache set. To help + * reduce the pressure on this cache set we can interleave the offsets so + * that only every other buffer will be in the same cache set. + **/ +static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring) +{ + struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info; + u16 i; + + for (i = 0; i < rx_ring->count; i += 2) { + rx_buffer[0].page_offset = 0; + rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring); + rx_buffer = &rx_buffer[2]; + } +} + /** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from @@ -4006,49 +4026,40 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { - struct ixgbe_rx_buffer *rx_buffer_info; + struct ixgbe_rx_buffer *rx_buffer; - rx_buffer_info = &rx_ring->rx_buffer_info[i]; - if (rx_buffer_info->dma) { - dma_unmap_single(rx_ring->dev, rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - rx_buffer_info->dma = 0; - } - if (rx_buffer_info->skb) { - struct sk_buff *skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - /* We need to clean up RSC frag lists */ - skb = ixgbe_merge_active_tail(skb); - ixgbe_close_active_frag_list(skb); - if (IXGBE_CB(skb)->delay_unmap) { - dma_unmap_single(dev, - IXGBE_CB(skb)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_CB(skb)->dma = 0; - IXGBE_CB(skb)->delay_unmap = false; + rx_buffer = &rx_ring->rx_buffer_info[i]; + if (rx_buffer->skb) { + struct sk_buff *skb = rx_buffer->skb; + if (IXGBE_CB(skb)->page_released) { + dma_unmap_page(dev, + IXGBE_CB(skb)->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; } dev_kfree_skb(skb); } - if (!rx_buffer_info->page) - continue; - if (rx_buffer_info->page_dma) { - dma_unmap_page(dev, rx_buffer_info->page_dma, - PAGE_SIZE / 2, DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - } - put_page(rx_buffer_info->page); - rx_buffer_info->page = NULL; - rx_buffer_info->page_offset = 0; + rx_buffer->skb = NULL; + if (rx_buffer->dma) + dma_unmap_page(dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + rx_buffer->dma = 0; + if (rx_buffer->page) + put_page(rx_buffer->page); + rx_buffer->page = NULL; } size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; memset(rx_ring->rx_buffer_info, 0, size); + ixgbe_init_rx_page_offset(rx_ring); + /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -5412,6 +5423,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + ixgbe_init_rx_page_offset(rx_ring); + return 0; err: vfree(rx_ring->rx_buffer_info); From 655309e944fd482e59850d55186571c1b2a91e55 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Feb 2012 07:50:35 +0000 Subject: [PATCH 06/12] ixgbe: cleanup logic in ixgbe_change_mtu This change is meant to just cleanup the logic in ixgbe_change_mtu since we are making it unnecessarily complex due to a workaround required for 82599 when SR-IOV is enabled. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e97ef4591ade..db1f17c3ed47 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5544,20 +5544,24 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; /* MTU < 68 is an error and causes problems on some kernels */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED && - hw->mac.type != ixgbe_mac_X540) { - if ((new_mtu < 68) || (max_frame > MAXIMUM_ETHERNET_VLAN_SIZE)) + if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) + return -EINVAL; + + /* + * For 82599EB we cannot allow PF to change MTU greater than 1500 + * in SR-IOV mode as it may cause buffer overruns in guest VFs that + * don't allocate and chain buffers correctly. + */ + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && + (adapter->hw.mac.type == ixgbe_mac_82599EB) && + (max_frame > MAXIMUM_ETHERNET_VLAN_SIZE)) return -EINVAL; - } else { - if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) - return -EINVAL; - } e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); + /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; From a50c29dd09ed14f8489677813a4b1c8b62dcf19d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Feb 2012 07:50:40 +0000 Subject: [PATCH 07/12] ixgbe: Make certain that all frames fit minimum size requirements This change makes certain that any packet we attempt to transmit will meet minimum size requirements for the hardware. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index db1f17c3ed47..bf6d122e6f99 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7162,11 +7162,27 @@ out_drop: return NETDEV_TX_OK; } -static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_ring *tx_ring; + if (skb->len <= 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* + * The minimum packet size for olinfo paylen is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + tx_ring = adapter->tx_ring[skb->queue_mapping]; return ixgbe_xmit_frame_ring(skb, adapter, tx_ring); } From 93f5b3c1f148f2cca247a2c5afdd3ba7a123a6f1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Feb 2012 07:50:45 +0000 Subject: [PATCH 08/12] ixgbe: Modify setup of descriptor flags to avoid conditional jumps This change makes it more likely that the descriptor flags setup will use cmov instructions instead of conditional jumps when setting up the flags. The advantage to this is that the code should just flow a bit more smoothly. To do this it is necessary to set the TX_FLAGS_CSUM bit in tx_flags when doing TSO so that we also do the checksum in addition to the segmentation offload. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bf6d122e6f99..efce423586d4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6689,7 +6689,7 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) /* set segmentation enable bits for TSO/FSO */ #ifdef IXGBE_FCOE - if ((tx_flags & IXGBE_TX_FLAGS_TSO) || (tx_flags & IXGBE_TX_FLAGS_FSO)) + if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_FSO)) #else if (tx_flags & IXGBE_TX_FLAGS_TSO) #endif @@ -6700,33 +6700,33 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) static __le32 ixgbe_tx_olinfo_status(u32 tx_flags, unsigned int paylen) { - __le32 olinfo_status = - cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT); - - if (tx_flags & IXGBE_TX_FLAGS_TSO) { - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM | - (1 << IXGBE_ADVTXD_IDX_SHIFT)); - /* enble IPv4 checksum for TSO */ - if (tx_flags & IXGBE_TX_FLAGS_IPV4) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM); - } + __le32 olinfo_status = cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT); /* enable L4 checksum for TSO and TX checksum offload */ if (tx_flags & IXGBE_TX_FLAGS_CSUM) olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM); -#ifdef IXGBE_FCOE - /* use index 1 context for FCOE/FSO */ - if (tx_flags & IXGBE_TX_FLAGS_FCOE) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC | - (1 << IXGBE_ADVTXD_IDX_SHIFT)); + /* enble IPv4 checksum for TSO */ + if (tx_flags & IXGBE_TX_FLAGS_IPV4) + olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM); + /* use index 1 context for TSO/FSO/FCOE */ +#ifdef IXGBE_FCOE + if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_FCOE)) +#else + if (tx_flags & IXGBE_TX_FLAGS_TSO) #endif + olinfo_status |= cpu_to_le32(1 << IXGBE_ADVTXD_IDX_SHIFT); + /* * Check Context must be set if Tx switch is enabled, which it * always is for case where virtual functions are running */ +#ifdef IXGBE_FCOE + if (tx_flags & (IXGBE_TX_FLAGS_TXSW | IXGBE_TX_FLAGS_FCOE)) +#else if (tx_flags & IXGBE_TX_FLAGS_TXSW) +#endif olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC); return olinfo_status; @@ -7140,7 +7140,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (tso < 0) goto out_drop; else if (tso) - tx_flags |= IXGBE_TX_FLAGS_TSO; + tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM; else if (ixgbe_tx_csum(tx_ring, skb, tx_flags, protocol)) tx_flags |= IXGBE_TX_FLAGS_CSUM; From 7d7ce682f8437ff91c964c478b8845ed98a3207d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Feb 2012 07:50:51 +0000 Subject: [PATCH 09/12] ixgbe: Use packets to track Tx completions instead of a seperate value A separate value was added to track Tx completions in order to determine if the Tx unit was hung. However we can do the same thing using the number of packets completed without having to add another stat to the Tx ring. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 - drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b1e3baf8d450..6d4ef1a30890 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -173,7 +173,6 @@ struct ixgbe_queue_stats { struct ixgbe_tx_queue_stats { u64 restart_queue; u64 tx_busy; - u64 completed; u64 tx_done_old; }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index efce423586d4..832a9fc3ad57 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -661,7 +661,7 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring) { - return ring->tx_stats.completed; + return ring->stats.packets; } static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring) @@ -760,9 +760,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; - /* count the packet as being completed */ - tx_ring->tx_stats.completed++; - /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; From fd0db0ed02a6abce5427e90d1e8522322107d62b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Feb 2012 07:50:56 +0000 Subject: [PATCH 10/12] ixgbe: Place skb on first buffer_info structure to avoid using stack space Instead of keeping a local copy of the skb on the stack for as long as long as we do it makes sense to instead just place it on the first tx_buffer structure so that we can save space on the stack and avoid unnecessary read/write operations copying the pointer out of the stack and onto the ring later. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 13 ++--- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 52 +++++++++++-------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 6d4ef1a30890..55f31fe58e41 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -150,12 +150,12 @@ struct vf_macvlans { struct ixgbe_tx_buffer { union ixgbe_adv_tx_desc *next_to_watch; unsigned long time_stamp; - dma_addr_t dma; - u32 length; - u32 tx_flags; struct sk_buff *skb; - u32 bytecount; - u16 gso_segs; + unsigned int bytecount; + unsigned short gso_segs; + dma_addr_t dma; + unsigned int length; + u32 tx_flags; }; struct ixgbe_rx_buffer { @@ -631,7 +631,8 @@ extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32); extern void ixgbe_do_reset(struct net_device *netdev); #ifdef IXGBE_FCOE extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter); -extern int ixgbe_fso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, +extern int ixgbe_fso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, u8 *hdr_len); extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter); extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index da7da752b6b4..910847a901ef 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -447,7 +447,7 @@ ddp_out: /** * ixgbe_fso - ixgbe FCoE Sequence Offload (FSO) * @tx_ring: tx desc ring - * @skb: associated skb + * @first: first tx_buffer structure containing skb, tx_flags, and protocol * @tx_flags: tx flags * @hdr_len: hdr_len to be returned * @@ -455,9 +455,11 @@ ddp_out: * * Returns : 0 indicates no FSO, > 0 for FSO, < 0 for error */ -int ixgbe_fso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, +int ixgbe_fso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, u8 *hdr_len) { + struct sk_buff *skb = first->skb; struct fc_frame_header *fh; u32 vlan_macip_lens; u32 fcoe_sof_eof = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 832a9fc3ad57..c05d560c004f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -763,12 +763,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; + /* free the skb */ + dev_kfree_skb_any(tx_buffer->skb); + + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + do { ixgbe_unmap_tx_resource(tx_ring, tx_buffer); if (likely(tx_desc == eop_desc)) { eop_desc = NULL; - dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; @@ -6551,9 +6555,11 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); } -static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, +static int ixgbe_tso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, __be16 protocol, u8 *hdr_len) { + struct sk_buff *skb = first->skb; int err; u32 vlan_macip_lens, type_tucmd; u32 mss_l4len_idx, l4len; @@ -6607,9 +6613,10 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, } static bool ixgbe_tx_csum(struct ixgbe_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, - __be16 protocol) + struct ixgbe_tx_buffer *first, + u32 tx_flags, __be16 protocol) { + struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; u32 mss_l4len_idx = 0; u32 type_tucmd = 0; @@ -6733,11 +6740,11 @@ static __le32 ixgbe_tx_olinfo_status(u32 tx_flags, unsigned int paylen) IXGBE_TXD_CMD_RS) static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, - struct sk_buff *skb, struct ixgbe_tx_buffer *first, u32 tx_flags, const u8 hdr_len) { + struct sk_buff *skb = first->skb; struct device *dev = tx_ring->dev; struct ixgbe_tx_buffer *tx_buffer_info; union ixgbe_adv_tx_desc *tx_desc; @@ -6850,7 +6857,6 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, /* multiply data chunks by size of headers */ tx_buffer_info->bytecount = paylen + (gso_segs * hdr_len); tx_buffer_info->gso_segs = gso_segs; - tx_buffer_info->skb = skb; netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer_info->bytecount); @@ -6878,7 +6884,7 @@ dma_error: /* clear dma mappings for failed tx_buffer_info map */ for (;;) { tx_buffer_info = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info); + ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); if (tx_buffer_info == first) break; if (i == 0) @@ -6886,12 +6892,11 @@ dma_error: i--; } - dev_kfree_skb_any(skb); - tx_ring->next_to_use = i; } -static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, +static void ixgbe_atr(struct ixgbe_ring *ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, __be16 protocol) { struct ixgbe_q_vector *q_vector = ring->q_vector; @@ -6916,7 +6921,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, ring->atr_count++; /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + hdr.network = skb_network_header(first->skb); /* Currently only IPv4/IPv6 with TCP is supported */ if ((protocol != __constant_htons(ETH_P_IPV6) || @@ -6925,7 +6930,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, hdr.ipv4->protocol != IPPROTO_TCP)) return; - th = tcp_hdr(skb); + th = tcp_hdr(first->skb); /* skip this packet since it is invalid or the socket is closing */ if (!th || th->fin) @@ -7063,6 +7068,10 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_BUSY; } + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->skb = skb; + /* if we have a HW VLAN tag being added default to the HW one */ if (vlan_tx_tag_present(skb)) { tx_flags |= vlan_tx_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT; @@ -7109,14 +7118,11 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, } } - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; - #ifdef IXGBE_FCOE /* setup tx offload for FCoE */ if ((protocol == __constant_htons(ETH_P_FCOE)) && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) { - tso = ixgbe_fso(tx_ring, skb, tx_flags, &hdr_len); + tso = ixgbe_fso(tx_ring, first, tx_flags, &hdr_len); if (tso < 0) goto out_drop; else if (tso) @@ -7133,29 +7139,31 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (protocol == __constant_htons(ETH_P_IP)) tx_flags |= IXGBE_TX_FLAGS_IPV4; - tso = ixgbe_tso(tx_ring, skb, tx_flags, protocol, &hdr_len); + tso = ixgbe_tso(tx_ring, first, tx_flags, protocol, &hdr_len); if (tso < 0) goto out_drop; else if (tso) tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM; - else if (ixgbe_tx_csum(tx_ring, skb, tx_flags, protocol)) + else if (ixgbe_tx_csum(tx_ring, first, tx_flags, protocol)) tx_flags |= IXGBE_TX_FLAGS_CSUM; /* add the ATR filter if ATR is on */ if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state)) - ixgbe_atr(tx_ring, skb, tx_flags, protocol); + ixgbe_atr(tx_ring, first, tx_flags, protocol); #ifdef IXGBE_FCOE xmit_fcoe: #endif /* IXGBE_FCOE */ - ixgbe_tx_map(tx_ring, skb, first, tx_flags, hdr_len); + ixgbe_tx_map(tx_ring, first, tx_flags, hdr_len); ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED); return NETDEV_TX_OK; out_drop: - dev_kfree_skb_any(skb); + dev_kfree_skb_any(first->skb); + first->skb = NULL; + return NETDEV_TX_OK; } From 091a6246869cec2ac66e897b436f7fd59ec4d316 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Feb 2012 07:51:01 +0000 Subject: [PATCH 11/12] ixgbe: Write gso_segs and bytcount to the ring sooner This change makes it so that gso_segs and bytecount are written to the ring sooner. This helps to simplify the logic for the two since segmentation offloads can now update them within their own function. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 11 ++++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 35 +++++++------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 910847a901ef..5f943d3f85c4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -532,9 +532,14 @@ int ixgbe_fso(struct ixgbe_ring *tx_ring, *hdr_len = sizeof(struct fcoe_crc_eof); /* hdr_len includes fc_hdr if FCoE LSO is enabled */ - if (skb_is_gso(skb)) - *hdr_len += (skb_transport_offset(skb) + - sizeof(struct fc_frame_header)); + if (skb_is_gso(skb)) { + *hdr_len += skb_transport_offset(skb) + + sizeof(struct fc_frame_header); + /* update gso_segs and bytecount */ + first->gso_segs = DIV_ROUND_UP(skb->len - *hdr_len, + skb_shinfo(skb)->gso_size); + first->bytecount += (first->gso_segs - 1) * *hdr_len; + } /* mss_l4len_id: use 1 for FSO as TSO, no need for L4LEN */ mss_l4len_idx = skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c05d560c004f..40d729eb1443 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -763,6 +763,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; + /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); @@ -771,13 +775,9 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, do { ixgbe_unmap_tx_resource(tx_ring, tx_buffer); - if (likely(tx_desc == eop_desc)) { + if (likely(tx_desc == eop_desc)) eop_desc = NULL; - total_bytes += tx_buffer->bytecount; - total_packets += tx_buffer->gso_segs; - } - tx_buffer++; tx_desc++; i++; @@ -6593,9 +6593,14 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, 0, IPPROTO_TCP, 0); } + /* compute header lengths */ l4len = tcp_hdrlen(skb); *hdr_len = skb_transport_offset(skb) + l4len; + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + /* mss_l4len_id: use 1 as index for TSO */ mss_l4len_idx = l4len << IXGBE_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; @@ -6757,7 +6762,6 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, u32 offset = 0; u32 paylen = skb->len - hdr_len; u16 i = tx_ring->next_to_use; - u16 gso_segs; #ifdef IXGBE_FCOE if (tx_flags & IXGBE_TX_FLAGS_FCOE) { @@ -6843,22 +6847,7 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, tx_ring->next_to_use = i; - if (tx_flags & IXGBE_TX_FLAGS_TSO) - gso_segs = skb_shinfo(skb)->gso_segs; -#ifdef IXGBE_FCOE - /* adjust for FCoE Sequence Offload */ - else if (tx_flags & IXGBE_TX_FLAGS_FSO) - gso_segs = DIV_ROUND_UP(skb->len - hdr_len, - skb_shinfo(skb)->gso_size); -#endif /* IXGBE_FCOE */ - else - gso_segs = 1; - - /* multiply data chunks by size of headers */ - tx_buffer_info->bytecount = paylen + (gso_segs * hdr_len); - tx_buffer_info->gso_segs = gso_segs; - - netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer_info->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); /* set the timestamp */ first->time_stamp = jiffies; @@ -7071,6 +7060,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; /* if we have a HW VLAN tag being added default to the HW one */ if (vlan_tx_tag_present(skb)) { From 729739b754affa482e92fa7836e4066096089d11 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Feb 2012 07:51:06 +0000 Subject: [PATCH 12/12] ixgbe: always write DMA for single_mapped value with skb This change makes it so that we always write the DMA address for the skb itself on the same tx_buffer struct that the skb is written on. This way we don't need the MAPPED_AS_PAGE flag and we always know it will be the first DMA value that we will have to unmap. In addition I have found an issue in which we were leaking a DMA mapping if the value happened to be 0 which is possible on some platforms. In order to resolve that I have updated the transmit path to use the length instead of the DMA mapping in order to determine if a mapping is actually present. One other tweak in this patch is that it only writes the olinfo information on the first descriptor. As it turns out it isn't necessary to write it for anything but the first descriptor so there is no need to carry it forward. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 5 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 270 ++++++++++-------- 2 files changed, 155 insertions(+), 120 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 55f31fe58e41..e0d809d0ed75 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -96,7 +96,6 @@ #define IXGBE_TX_FLAGS_FCOE (u32)(1 << 5) #define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) #define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) -#define IXGBE_TX_FLAGS_MAPPED_AS_PAGE (u32)(1 << 8) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 @@ -153,8 +152,8 @@ struct ixgbe_tx_buffer { struct sk_buff *skb; unsigned int bytecount; unsigned short gso_segs; - dma_addr_t dma; - unsigned int length; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 40d729eb1443..1d8f9f83f8ed 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -289,7 +289,7 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) struct ixgbe_reg_info *reginfo; int n = 0; struct ixgbe_ring *tx_ring; - struct ixgbe_tx_buffer *tx_buffer_info; + struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; struct my_u0 { u64 a; u64 b; } *u0; struct ixgbe_ring *rx_ring; @@ -329,14 +329,13 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); for (n = 0; n < adapter->num_tx_queues; n++) { tx_ring = adapter->tx_ring[n]; - tx_buffer_info = - &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; + tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)tx_buffer_info->dma, - tx_buffer_info->length, - tx_buffer_info->next_to_watch, - (u64)tx_buffer_info->time_stamp); + (u64)dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + tx_buffer->next_to_watch, + (u64)tx_buffer->time_stamp); } /* Print TX Rings */ @@ -367,17 +366,17 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { tx_desc = IXGBE_TX_DESC(tx_ring, i); - tx_buffer_info = &tx_ring->tx_buffer_info[i]; + tx_buffer = &tx_ring->tx_buffer_info[i]; u0 = (struct my_u0 *)tx_desc; pr_info("T [0x%03X] %016llX %016llX %016llX" " %04X %p %016llX %p", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)tx_buffer_info->dma, - tx_buffer_info->length, - tx_buffer_info->next_to_watch, - (u64)tx_buffer_info->time_stamp, - tx_buffer_info->skb); + (u64)dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + tx_buffer->next_to_watch, + (u64)tx_buffer->time_stamp, + tx_buffer->skb); if (i == tx_ring->next_to_use && i == tx_ring->next_to_clean) pr_cont(" NTC/U\n"); @@ -389,11 +388,13 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) pr_cont("\n"); if (netif_msg_pktdata(adapter) && - tx_buffer_info->dma != 0) + dma_unmap_len(tx_buffer, len) != 0) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt(tx_buffer_info->dma), - tx_buffer_info->length, true); + phys_to_virt(dma_unmap_addr(tx_buffer, + dma)), + dma_unmap_len(tx_buffer, len), + true); } } @@ -579,32 +580,26 @@ static inline void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, } } -static inline void ixgbe_unmap_tx_resource(struct ixgbe_ring *ring, - struct ixgbe_tx_buffer *tx_buffer) +void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *ring, + struct ixgbe_tx_buffer *tx_buffer) { - if (tx_buffer->dma) { - if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_MAPPED_AS_PAGE) - dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, - DMA_TO_DEVICE); - else + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, - DMA_TO_DEVICE); + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); } - tx_buffer->dma = 0; -} - -void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *tx_ring, - struct ixgbe_tx_buffer *tx_buffer_info) -{ - ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info->skb) - dev_kfree_skb_any(tx_buffer_info->skb); - tx_buffer_info->skb = NULL; - /* tx_buffer_info must be completely set up in the transmit path */ + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* tx_buffer must be completely set up in the transmit path */ } static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) @@ -741,12 +736,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, union ixgbe_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; - u16 i = tx_ring->next_to_clean; + unsigned int i = tx_ring->next_to_clean; + + if (test_bit(__IXGBE_DOWN, &adapter->state)) + return true; tx_buffer = &tx_ring->tx_buffer_info[i]; tx_desc = IXGBE_TX_DESC(tx_ring, i); + i -= tx_ring->count; - for (; budget; budget--) { + do { union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ @@ -770,27 +769,55 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + /* clear tx_buffer data */ tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); - do { - ixgbe_unmap_tx_resource(tx_ring, tx_buffer); - if (likely(tx_desc == eop_desc)) - eop_desc = NULL; - + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { tx_buffer++; tx_desc++; i++; - if (unlikely(i == tx_ring->count)) { - i = 0; - + if (unlikely(!i)) { + i -= tx_ring->count; tx_buffer = tx_ring->tx_buffer_info; tx_desc = IXGBE_TX_DESC(tx_ring, 0); } - } while (eop_desc); - } + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + } + } + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; tx_ring->next_to_clean = i; u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; @@ -802,7 +829,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { /* schedule immediate reset if we believe we hung */ struct ixgbe_hw *hw = &adapter->hw; - tx_desc = IXGBE_TX_DESC(tx_ring, i); e_err(drv, "Detected Tx Unit Hang\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" @@ -840,9 +866,11 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, * sees the new next_to_clean. */ smp_mb(); - if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - !test_bit(__IXGBE_DOWN, &adapter->state)) { - netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->queue_index) + && !test_bit(__IXGBE_DOWN, &adapter->state)) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->queue_index); ++tx_ring->tx_stats.restart_queue; } } @@ -6707,7 +6735,8 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) return cmd_type; } -static __le32 ixgbe_tx_olinfo_status(u32 tx_flags, unsigned int paylen) +static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) { __le32 olinfo_status = cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT); @@ -6738,7 +6767,7 @@ static __le32 ixgbe_tx_olinfo_status(u32 tx_flags, unsigned int paylen) #endif olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC); - return olinfo_status; + tx_desc->read.olinfo_status = olinfo_status; } #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ @@ -6749,103 +6778,102 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, u32 tx_flags, const u8 hdr_len) { - struct sk_buff *skb = first->skb; - struct device *dev = tx_ring->dev; - struct ixgbe_tx_buffer *tx_buffer_info; - union ixgbe_adv_tx_desc *tx_desc; dma_addr_t dma; - __le32 cmd_type, olinfo_status; - struct skb_frag_struct *frag; - unsigned int f = 0; + struct sk_buff *skb = first->skb; + struct ixgbe_tx_buffer *tx_buffer; + union ixgbe_adv_tx_desc *tx_desc; + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; unsigned int data_len = skb->data_len; unsigned int size = skb_headlen(skb); - u32 offset = 0; - u32 paylen = skb->len - hdr_len; + unsigned int paylen = skb->len - hdr_len; + __le32 cmd_type; u16 i = tx_ring->next_to_use; + tx_desc = IXGBE_TX_DESC(tx_ring, i); + + ixgbe_tx_olinfo_status(tx_desc, tx_flags, paylen); + cmd_type = ixgbe_tx_cmd_type(tx_flags); + #ifdef IXGBE_FCOE if (tx_flags & IXGBE_TX_FLAGS_FCOE) { - if (data_len >= sizeof(struct fcoe_crc_eof)) { - data_len -= sizeof(struct fcoe_crc_eof); - } else { + if (data_len < sizeof(struct fcoe_crc_eof)) { size -= sizeof(struct fcoe_crc_eof) - data_len; data_len = 0; + } else { + data_len -= sizeof(struct fcoe_crc_eof); } } #endif - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - cmd_type = ixgbe_tx_cmd_type(tx_flags); - olinfo_status = ixgbe_tx_olinfo_status(tx_flags, paylen); + /* record length, and DMA address */ + dma_unmap_len_set(first, len, size); + dma_unmap_addr_set(first, dma, dma); + first->tx_flags = tx_flags; - tx_desc = IXGBE_TX_DESC(tx_ring, i); + tx_desc->read.buffer_addr = cpu_to_le64(dma); for (;;) { - while (size > IXGBE_MAX_DATA_PER_TXD) { - tx_desc->read.buffer_addr = cpu_to_le64(dma + offset); + while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) { tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD); - tx_desc->read.olinfo_status = olinfo_status; - offset += IXGBE_MAX_DATA_PER_TXD; - size -= IXGBE_MAX_DATA_PER_TXD; - - tx_desc++; i++; + tx_desc++; if (i == tx_ring->count) { tx_desc = IXGBE_TX_DESC(tx_ring, 0); i = 0; } + + dma += IXGBE_MAX_DATA_PER_TXD; + size -= IXGBE_MAX_DATA_PER_TXD; + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + tx_desc->read.olinfo_status = 0; } - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = offset + size; - tx_buffer_info->tx_flags = tx_flags; - tx_buffer_info->dma = dma; + if (likely(!data_len)) + break; - tx_desc->read.buffer_addr = cpu_to_le64(dma + offset); if (unlikely(skb->no_fcs)) cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); - tx_desc->read.olinfo_status = olinfo_status; - if (!data_len) - break; + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + i = 0; + } - frag = &skb_shinfo(skb)->frags[f]; #ifdef IXGBE_FCOE size = min_t(unsigned int, data_len, skb_frag_size(frag)); #else size = skb_frag_size(frag); #endif data_len -= size; - f++; - offset = 0; - tx_flags |= IXGBE_TX_FLAGS_MAPPED_AS_PAGE; - - dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - tx_desc++; - i++; - if (i == tx_ring->count) { - tx_desc = IXGBE_TX_DESC(tx_ring, 0); - i = 0; - } + tx_buffer = &tx_ring->tx_buffer_info[i]; + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + tx_desc->read.olinfo_status = 0; + + frag++; } - tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); - - i++; - if (i == tx_ring->count) - i = 0; - - tx_ring->next_to_use = i; + /* write last descriptor with RS and EOP bits */ + cmd_type |= cpu_to_le32(size) | cpu_to_le32(IXGBE_TXD_CMD); + tx_desc->read.cmd_type_len = cmd_type; netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); @@ -6853,28 +6881,36 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, first->time_stamp = jiffies; /* - * Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). + * Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. (Only applicable for weak-ordered + * memory model archs, such as IA-64). + * + * We also need this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. */ wmb(); /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + /* notify HW of packet */ writel(i, tx_ring->tail); return; dma_error: - dev_err(dev, "TX DMA map failed\n"); + dev_err(tx_ring->dev, "TX DMA map failed\n"); /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count;