IB/hfi1: Get rid of divide when setting the tx request header
Div instructions show costly in profiles when the tx request header is set. Using right shift instead of a divide operation reduces the cycles spent in the function that sets the tx request header as shown in the profile. Use right shift operation instead. Profile before change: 43.24% 009 | |--23.41%-- user_sdma_send_pkts | | | |--99.90%-- hfi1_user_sdma_process_requestAfter: Profile after change: 45.75% 009 | |--14.81%-- user_sdma_send_pkts | | | |--99.95%-- hfi1_user_sdma_process_request Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
dd1ed10817
commit
ade6f8af52
|
@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
|
||||||
|
|
||||||
/* KDETH OM multipliers and switch over point */
|
/* KDETH OM multipliers and switch over point */
|
||||||
#define KDETH_OM_SMALL 4
|
#define KDETH_OM_SMALL 4
|
||||||
|
#define KDETH_OM_SMALL_SHIFT 2
|
||||||
#define KDETH_OM_LARGE 64
|
#define KDETH_OM_LARGE 64
|
||||||
|
#define KDETH_OM_LARGE_SHIFT 6
|
||||||
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
|
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
|
||||||
|
|
||||||
/* Tx request flag bits */
|
/* Tx request flag bits */
|
||||||
|
@ -228,12 +230,6 @@ struct user_sdma_request {
|
||||||
* size of the TID entry.
|
* size of the TID entry.
|
||||||
*/
|
*/
|
||||||
u32 tidoffset;
|
u32 tidoffset;
|
||||||
/*
|
|
||||||
* KDETH.OM
|
|
||||||
* Remember this because the header template always sets it
|
|
||||||
* to 0.
|
|
||||||
*/
|
|
||||||
u8 omfactor;
|
|
||||||
/*
|
/*
|
||||||
* We copy the iovs for this request (based on
|
* We copy the iovs for this request (based on
|
||||||
* info.iovcnt). These are only the data vectors
|
* info.iovcnt). These are only the data vectors
|
||||||
|
@ -1323,6 +1319,7 @@ static int set_txreq_header(struct user_sdma_request *req,
|
||||||
{
|
{
|
||||||
struct hfi1_user_sdma_pkt_q *pq = req->pq;
|
struct hfi1_user_sdma_pkt_q *pq = req->pq;
|
||||||
struct hfi1_pkt_header *hdr = &tx->hdr;
|
struct hfi1_pkt_header *hdr = &tx->hdr;
|
||||||
|
u8 omfactor; /* KDETH.OM */
|
||||||
u16 pbclen;
|
u16 pbclen;
|
||||||
int ret;
|
int ret;
|
||||||
u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
|
u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
|
||||||
|
@ -1400,8 +1397,9 @@ static int set_txreq_header(struct user_sdma_request *req,
|
||||||
}
|
}
|
||||||
tidval = req->tids[req->tididx];
|
tidval = req->tids[req->tididx];
|
||||||
}
|
}
|
||||||
req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
|
omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
|
||||||
KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL;
|
KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
|
||||||
|
KDETH_OM_SMALL_SHIFT;
|
||||||
/* Set KDETH.TIDCtrl based on value for this TID. */
|
/* Set KDETH.TIDCtrl based on value for this TID. */
|
||||||
KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
|
KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
|
||||||
EXP_TID_GET(tidval, CTRL));
|
EXP_TID_GET(tidval, CTRL));
|
||||||
|
@ -1416,12 +1414,12 @@ static int set_txreq_header(struct user_sdma_request *req,
|
||||||
* transfer.
|
* transfer.
|
||||||
*/
|
*/
|
||||||
SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
|
SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
|
||||||
req->tidoffset, req->tidoffset / req->omfactor,
|
req->tidoffset, req->tidoffset >> omfactor,
|
||||||
req->omfactor != KDETH_OM_SMALL);
|
omfactor != KDETH_OM_SMALL_SHIFT);
|
||||||
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
|
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
|
||||||
req->tidoffset / req->omfactor);
|
req->tidoffset >> omfactor);
|
||||||
KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
|
KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
|
||||||
req->omfactor != KDETH_OM_SMALL);
|
omfactor != KDETH_OM_SMALL_SHIFT);
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
|
trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
|
||||||
|
@ -1433,6 +1431,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
|
||||||
struct user_sdma_txreq *tx, u32 len)
|
struct user_sdma_txreq *tx, u32 len)
|
||||||
{
|
{
|
||||||
int diff = 0;
|
int diff = 0;
|
||||||
|
u8 omfactor; /* KDETH.OM */
|
||||||
struct hfi1_user_sdma_pkt_q *pq = req->pq;
|
struct hfi1_user_sdma_pkt_q *pq = req->pq;
|
||||||
struct hfi1_pkt_header *hdr = &req->hdr;
|
struct hfi1_pkt_header *hdr = &req->hdr;
|
||||||
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
|
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
|
||||||
|
@ -1484,14 +1483,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
|
||||||
}
|
}
|
||||||
tidval = req->tids[req->tididx];
|
tidval = req->tids[req->tididx];
|
||||||
}
|
}
|
||||||
req->omfactor = ((EXP_TID_GET(tidval, LEN) *
|
omfactor = ((EXP_TID_GET(tidval, LEN) *
|
||||||
PAGE_SIZE) >=
|
PAGE_SIZE) >=
|
||||||
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE :
|
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
|
||||||
KDETH_OM_SMALL;
|
KDETH_OM_SMALL_SHIFT;
|
||||||
/* KDETH.OM and KDETH.OFFSET (TID) */
|
/* KDETH.OM and KDETH.OFFSET (TID) */
|
||||||
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
|
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
|
||||||
((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
|
((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
|
||||||
((req->tidoffset / req->omfactor) & 0x7fff)));
|
((req->tidoffset >> omfactor)
|
||||||
|
& 0x7fff)));
|
||||||
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
|
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
|
||||||
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
|
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
|
||||||
(EXP_TID_GET(tidval, IDX) & 0x3ff));
|
(EXP_TID_GET(tidval, IDX) & 0x3ff));
|
||||||
|
|
Loading…
Reference in New Issue