ice: Reorganize tx_buf and ring structs

Use more efficient structure ordering by using the pahole tool
and a lot of code inspection to get hot cache lines to have
packed data (no holes if possible) and adjacent warm data.

ice_ring prior to this change:
  /* size: 192, cachelines: 3, members: 23 */
  /* sum members: 158, holes: 4, sum holes: 12 */
  /* padding: 22 */

ice_ring after this change:
  /* size: 192, cachelines: 3, members: 25 */
  /* sum members: 162, holes: 1, sum holes: 1 */
  /* padding: 29 */

ice_tx_buf prior to this change:
  /* size: 48, cachelines: 1, members: 7 */
  /* sum members: 38, holes: 2, sum holes: 6 */
  /* padding: 4 */
  /* last cacheline: 48 bytes */

ice_tx_buf after this change:
  /* size: 40, cachelines: 1, members: 7 */
  /* sum members: 38, holes: 1, sum holes: 2 */
  /* last cacheline: 40 bytes */

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
Jesse Brandeburg 2019-04-16 10:24:34 -07:00 committed by Jeff Kirsher
parent 55e062ba77
commit 65124bbf98
1 changed files with 21 additions and 14 deletions

View File

@ -58,19 +58,19 @@ struct ice_tx_buf {
unsigned int bytecount;
unsigned short gso_segs;
u32 tx_flags;
DEFINE_DMA_UNMAP_ADDR(dma);
DEFINE_DMA_UNMAP_LEN(len);
DEFINE_DMA_UNMAP_ADDR(dma);
};
struct ice_tx_offload_params {
u8 header_len;
u64 cd_qw1;
struct ice_ring *tx_ring;
u32 td_cmd;
u32 td_offset;
u32 td_l2tag1;
u16 cd_l2tag2;
u32 cd_tunnel_params;
u64 cd_qw1;
struct ice_ring *tx_ring;
u16 cd_l2tag2;
u8 header_len;
};
struct ice_rx_buf {
@ -150,6 +150,7 @@ enum ice_rx_dtype {
/* descriptor ring, associated with a VSI */
struct ice_ring {
/* CL1 - 1st cacheline starts here */
struct ice_ring *next; /* pointer to next ring in q_vector */
void *desc; /* Descriptor ring memory */
struct device *dev; /* Used for DMA mapping */
@ -161,11 +162,11 @@ struct ice_ring {
struct ice_tx_buf *tx_buf;
struct ice_rx_buf *rx_buf;
};
/* CL2 - 2nd cacheline starts here */
u16 q_index; /* Queue number of ring */
u32 txq_teid; /* Added Tx queue TEID */
#ifdef CONFIG_DCB
u8 dcb_tc; /* Traffic class of ring */
#endif /* CONFIG_DCB */
u16 q_handle; /* Queue handle per TC */
u8 ring_active; /* is ring online or not */
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
@ -173,8 +174,7 @@ struct ice_ring {
/* used in interrupt processing */
u16 next_to_use;
u16 next_to_clean;
u8 ring_active; /* is ring online or not */
u16 next_to_alloc;
/* stats structs */
struct ice_q_stats stats;
@ -184,10 +184,17 @@ struct ice_ring {
struct ice_rxq_stats rx_stats;
};
unsigned int size; /* length of descriptor ring in bytes */
dma_addr_t dma; /* physical address of ring */
struct rcu_head rcu; /* to avoid race on free */
u16 next_to_alloc;
/* CLX - the below items are only accessed infrequently and should be
* in their own cache line if possible
*/
dma_addr_t dma; /* physical address of ring */
unsigned int size; /* length of descriptor ring in bytes */
u32 txq_teid; /* Added Tx queue TEID */
u16 rx_buf_len;
#ifdef CONFIG_DCB
u8 dcb_tc; /* Traffic class of ring */
#endif /* CONFIG_DCB */
} ____cacheline_internodealigned_in_smp;
struct ice_ring_container {