IB/hfi1: Optimize pio_buf and send_context structs
Both pio_buf and send_context structs have oversized fields and have cachelines that can be optimized. Reduce oversized fields for both structs. Make sure pio_buf struct fits within a cacheline. Move read-only fields to their own cacheline in send_context struct. All of this will avoid cacheline trading as the ring progresses and pio buffers/send contexts are used. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
2474d775d9
commit
8af8d2970e
|
@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
|
||||||
sc->hw_context = hw_context;
|
sc->hw_context = hw_context;
|
||||||
cr_group_addresses(sc, &dma);
|
cr_group_addresses(sc, &dma);
|
||||||
sc->credits = sci->credits;
|
sc->credits = sci->credits;
|
||||||
|
sc->size = sc->credits * PIO_BLOCK_SIZE;
|
||||||
|
|
||||||
/* PIO Send Memory Address details */
|
/* PIO Send Memory Address details */
|
||||||
#define PIO_ADDR_CONTEXT_MASK 0xfful
|
#define PIO_ADDR_CONTEXT_MASK 0xfful
|
||||||
|
@ -1470,9 +1471,7 @@ retry:
|
||||||
|
|
||||||
/* finish filling in the buffer outside the lock */
|
/* finish filling in the buffer outside the lock */
|
||||||
pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
|
pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
|
||||||
pbuf->size = sc->credits * PIO_BLOCK_SIZE;
|
pbuf->end = sc->base_addr + sc->size;
|
||||||
pbuf->end = sc->base_addr + pbuf->size;
|
|
||||||
pbuf->block_count = blocks;
|
|
||||||
pbuf->qw_written = 0;
|
pbuf->qw_written = 0;
|
||||||
pbuf->carry_bytes = 0;
|
pbuf->carry_bytes = 0;
|
||||||
pbuf->carry.val64 = 0;
|
pbuf->carry.val64 = 0;
|
||||||
|
|
|
@ -83,43 +83,43 @@ struct pio_buf {
|
||||||
void *arg; /* argument for cb */
|
void *arg; /* argument for cb */
|
||||||
void __iomem *start; /* buffer start address */
|
void __iomem *start; /* buffer start address */
|
||||||
void __iomem *end; /* context end address */
|
void __iomem *end; /* context end address */
|
||||||
unsigned long size; /* context size, in bytes */
|
|
||||||
unsigned long sent_at; /* buffer is sent when <= free */
|
unsigned long sent_at; /* buffer is sent when <= free */
|
||||||
u32 block_count; /* size of buffer, in blocks */
|
|
||||||
u32 qw_written; /* QW written so far */
|
|
||||||
u32 carry_bytes; /* number of valid bytes in carry */
|
|
||||||
union mix carry; /* pending unwritten bytes */
|
union mix carry; /* pending unwritten bytes */
|
||||||
|
u16 qw_written; /* QW written so far */
|
||||||
|
u8 carry_bytes; /* number of valid bytes in carry */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cache line aligned pio buffer array */
|
/* cache line aligned pio buffer array */
|
||||||
union pio_shadow_ring {
|
union pio_shadow_ring {
|
||||||
struct pio_buf pbuf;
|
struct pio_buf pbuf;
|
||||||
u64 unused[16]; /* cache line spacer */
|
|
||||||
} ____cacheline_aligned;
|
} ____cacheline_aligned;
|
||||||
|
|
||||||
/* per-NUMA send context */
|
/* per-NUMA send context */
|
||||||
struct send_context {
|
struct send_context {
|
||||||
/* read-only after init */
|
/* read-only after init */
|
||||||
struct hfi1_devdata *dd; /* device */
|
struct hfi1_devdata *dd; /* device */
|
||||||
void __iomem *base_addr; /* start of PIO memory */
|
|
||||||
union pio_shadow_ring *sr; /* shadow ring */
|
union pio_shadow_ring *sr; /* shadow ring */
|
||||||
|
void __iomem *base_addr; /* start of PIO memory */
|
||||||
|
u32 __percpu *buffers_allocated;/* count of buffers allocated */
|
||||||
|
u32 size; /* context size, in bytes */
|
||||||
|
|
||||||
struct work_struct halt_work; /* halted context work queue entry */
|
|
||||||
unsigned long flags; /* flags */
|
|
||||||
int node; /* context home node */
|
int node; /* context home node */
|
||||||
int type; /* context type */
|
|
||||||
u32 sw_index; /* software index number */
|
|
||||||
u32 hw_context; /* hardware context number */
|
|
||||||
u32 credits; /* number of blocks in context */
|
|
||||||
u32 sr_size; /* size of the shadow ring */
|
u32 sr_size; /* size of the shadow ring */
|
||||||
u32 group; /* credit return group */
|
u16 flags; /* flags */
|
||||||
|
u8 type; /* context type */
|
||||||
|
u8 sw_index; /* software index number */
|
||||||
|
u8 hw_context; /* hardware context number */
|
||||||
|
u8 group; /* credit return group */
|
||||||
|
|
||||||
/* allocator fields */
|
/* allocator fields */
|
||||||
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
|
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
|
||||||
u32 sr_head; /* shadow ring head */
|
u32 sr_head; /* shadow ring head */
|
||||||
unsigned long fill; /* official alloc count */
|
unsigned long fill; /* official alloc count */
|
||||||
unsigned long alloc_free; /* copy of free (less cache thrash) */
|
unsigned long alloc_free; /* copy of free (less cache thrash) */
|
||||||
u32 __percpu *buffers_allocated;/* count of buffers allocated */
|
|
||||||
u32 fill_wrap; /* tracks fill within ring */
|
u32 fill_wrap; /* tracks fill within ring */
|
||||||
|
u32 credits; /* number of blocks in context */
|
||||||
|
/* adding a new field here would make it part of this cacheline */
|
||||||
|
|
||||||
/* releaser fields */
|
/* releaser fields */
|
||||||
spinlock_t release_lock ____cacheline_aligned_in_smp;
|
spinlock_t release_lock ____cacheline_aligned_in_smp;
|
||||||
u32 sr_tail; /* shadow ring tail */
|
u32 sr_tail; /* shadow ring tail */
|
||||||
|
@ -131,6 +131,7 @@ struct send_context {
|
||||||
u32 credit_intr_count; /* count of credit intr users */
|
u32 credit_intr_count; /* count of credit intr users */
|
||||||
u64 credit_ctrl; /* cache for credit control */
|
u64 credit_ctrl; /* cache for credit control */
|
||||||
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
|
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
|
||||||
|
struct work_struct halt_work; /* halted context work queue entry */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* send context flags */
|
/* send context flags */
|
||||||
|
|
|
@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
|
||||||
dest += sizeof(u64);
|
dest += sizeof(u64);
|
||||||
}
|
}
|
||||||
|
|
||||||
dest -= pbuf->size;
|
dest -= pbuf->sc->size;
|
||||||
dend -= pbuf->size;
|
dend -= pbuf->sc->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write 8-byte non-SOP, non-wrap chunk data */
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
||||||
|
@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
|
||||||
dest += sizeof(u64);
|
dest += sizeof(u64);
|
||||||
}
|
}
|
||||||
|
|
||||||
dest -= pbuf->size;
|
dest -= pbuf->sc->size;
|
||||||
dend -= pbuf->size;
|
dend -= pbuf->sc->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write 8-byte non-SOP, non-wrap chunk data */
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
||||||
|
@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
|
||||||
dest += sizeof(u64);
|
dest += sizeof(u64);
|
||||||
}
|
}
|
||||||
|
|
||||||
dest -= pbuf->size;
|
dest -= pbuf->sc->size;
|
||||||
dend -= pbuf->size;
|
dend -= pbuf->sc->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write 8-byte non-SOP, non-wrap chunk data */
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
||||||
|
@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
|
||||||
*/
|
*/
|
||||||
/* adjust if we have wrapped */
|
/* adjust if we have wrapped */
|
||||||
if (dest >= pbuf->end)
|
if (dest >= pbuf->end)
|
||||||
dest -= pbuf->size;
|
dest -= pbuf->sc->size;
|
||||||
/* jump to the SOP range if within the first block */
|
/* jump to the SOP range if within the first block */
|
||||||
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
||||||
dest += SOP_DISTANCE;
|
dest += SOP_DISTANCE;
|
||||||
|
@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf,
|
||||||
dest += sizeof(u64);
|
dest += sizeof(u64);
|
||||||
}
|
}
|
||||||
|
|
||||||
dest -= pbuf->size;
|
dest -= pbuf->sc->size;
|
||||||
dend -= pbuf->size;
|
dend -= pbuf->sc->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write 8-byte non-SOP, non-wrap chunk data */
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
||||||
|
@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
|
||||||
*/
|
*/
|
||||||
/* adjust if we've wrapped */
|
/* adjust if we've wrapped */
|
||||||
if (dest >= pbuf->end)
|
if (dest >= pbuf->end)
|
||||||
dest -= pbuf->size;
|
dest -= pbuf->sc->size;
|
||||||
/* jump to SOP range if within the first block */
|
/* jump to SOP range if within the first block */
|
||||||
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
||||||
dest += SOP_DISTANCE;
|
dest += SOP_DISTANCE;
|
||||||
|
@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf)
|
||||||
*/
|
*/
|
||||||
/* adjust if we have wrapped */
|
/* adjust if we have wrapped */
|
||||||
if (dest >= pbuf->end)
|
if (dest >= pbuf->end)
|
||||||
dest -= pbuf->size;
|
dest -= pbuf->sc->size;
|
||||||
/* jump to the SOP range if within the first block */
|
/* jump to the SOP range if within the first block */
|
||||||
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
||||||
dest += SOP_DISTANCE;
|
dest += SOP_DISTANCE;
|
||||||
|
|
Loading…
Reference in New Issue