ice: Reorganize tx_buf and ring structs
Use more efficient structure ordering by using the pahole tool and a lot of code inspection to get hot cache lines to have packed data (no holes if possible) and adjacent warm data. ice_ring prior to this change: /* size: 192, cachelines: 3, members: 23 */ /* sum members: 158, holes: 4, sum holes: 12 */ /* padding: 22 */ ice_ring after this change: /* size: 192, cachelines: 3, members: 25 */ /* sum members: 162, holes: 1, sum holes: 1 */ /* padding: 29 */ ice_tx_buf prior to this change: /* size: 48, cachelines: 1, members: 7 */ /* sum members: 38, holes: 2, sum holes: 6 */ /* padding: 4 */ /* last cacheline: 48 bytes */ ice_tx_buf after this change: /* size: 40, cachelines: 1, members: 7 */ /* sum members: 38, holes: 1, sum holes: 2 */ /* last cacheline: 40 bytes */ Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
parent
55e062ba77
commit
65124bbf98
|
@ -58,19 +58,19 @@ struct ice_tx_buf {
|
|||
unsigned int bytecount;
|
||||
unsigned short gso_segs;
|
||||
u32 tx_flags;
|
||||
DEFINE_DMA_UNMAP_ADDR(dma);
|
||||
DEFINE_DMA_UNMAP_LEN(len);
|
||||
DEFINE_DMA_UNMAP_ADDR(dma);
|
||||
};
|
||||
|
||||
struct ice_tx_offload_params {
|
||||
u8 header_len;
|
||||
u64 cd_qw1;
|
||||
struct ice_ring *tx_ring;
|
||||
u32 td_cmd;
|
||||
u32 td_offset;
|
||||
u32 td_l2tag1;
|
||||
u16 cd_l2tag2;
|
||||
u32 cd_tunnel_params;
|
||||
u64 cd_qw1;
|
||||
struct ice_ring *tx_ring;
|
||||
u16 cd_l2tag2;
|
||||
u8 header_len;
|
||||
};
|
||||
|
||||
struct ice_rx_buf {
|
||||
|
@ -150,6 +150,7 @@ enum ice_rx_dtype {
|
|||
|
||||
/* descriptor ring, associated with a VSI */
|
||||
struct ice_ring {
|
||||
/* CL1 - 1st cacheline starts here */
|
||||
struct ice_ring *next; /* pointer to next ring in q_vector */
|
||||
void *desc; /* Descriptor ring memory */
|
||||
struct device *dev; /* Used for DMA mapping */
|
||||
|
@ -161,11 +162,11 @@ struct ice_ring {
|
|||
struct ice_tx_buf *tx_buf;
|
||||
struct ice_rx_buf *rx_buf;
|
||||
};
|
||||
/* CL2 - 2nd cacheline starts here */
|
||||
u16 q_index; /* Queue number of ring */
|
||||
u32 txq_teid; /* Added Tx queue TEID */
|
||||
#ifdef CONFIG_DCB
|
||||
u8 dcb_tc; /* Traffic class of ring */
|
||||
#endif /* CONFIG_DCB */
|
||||
u16 q_handle; /* Queue handle per TC */
|
||||
|
||||
u8 ring_active; /* is ring online or not */
|
||||
|
||||
u16 count; /* Number of descriptors */
|
||||
u16 reg_idx; /* HW register index of the ring */
|
||||
|
@ -173,8 +174,7 @@ struct ice_ring {
|
|||
/* used in interrupt processing */
|
||||
u16 next_to_use;
|
||||
u16 next_to_clean;
|
||||
|
||||
u8 ring_active; /* is ring online or not */
|
||||
u16 next_to_alloc;
|
||||
|
||||
/* stats structs */
|
||||
struct ice_q_stats stats;
|
||||
|
@ -184,10 +184,17 @@ struct ice_ring {
|
|||
struct ice_rxq_stats rx_stats;
|
||||
};
|
||||
|
||||
unsigned int size; /* length of descriptor ring in bytes */
|
||||
dma_addr_t dma; /* physical address of ring */
|
||||
struct rcu_head rcu; /* to avoid race on free */
|
||||
u16 next_to_alloc;
|
||||
/* CLX - the below items are only accessed infrequently and should be
|
||||
* in their own cache line if possible
|
||||
*/
|
||||
dma_addr_t dma; /* physical address of ring */
|
||||
unsigned int size; /* length of descriptor ring in bytes */
|
||||
u32 txq_teid; /* Added Tx queue TEID */
|
||||
u16 rx_buf_len;
|
||||
#ifdef CONFIG_DCB
|
||||
u8 dcb_tc; /* Traffic class of ring */
|
||||
#endif /* CONFIG_DCB */
|
||||
} ____cacheline_internodealigned_in_smp;
|
||||
|
||||
struct ice_ring_container {
|
||||
|
|
Loading…
Reference in New Issue