IB/hif1: Remove static tracing from SDMA hot path

The hfi1_cdbg() macro can be instantiated in the hot path even when it
is not in use.  This shows up on perf profiles.

Rework the macros (for SDMA and MMU), to use the trace interface directly
to eliminate this performance hit.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Michael J. Ruhl 2017-08-28 11:23:27 -07:00 committed by Doug Ledford
parent ba81a427c3
commit 34ab4de77f
7 changed files with 255 additions and 29 deletions

View File

@ -430,8 +430,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
if (!iter_is_iovec(from) || !dim)
return -EINVAL;
hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
fd->uctxt->ctxt, fd->subctxt, dim);
trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
return -ENOSPC;

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2016 Intel Corporation.
* Copyright(c) 2016 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -172,9 +172,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
unsigned long flags;
int ret = 0;
trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
spin_lock_irqsave(&handler->lock, flags);
hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
mnode->len);
node = __mmu_rb_search(handler, mnode->addr, mnode->len);
if (node) {
ret = -EINVAL;
@ -200,7 +199,7 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
{
struct mmu_rb_node *node = NULL;
hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
trace_hfi1_mmu_rb_search(addr, len);
if (!handler->ops->filter) {
node = __mmu_int_rb_iter_first(&handler->root, addr,
(addr + len) - 1);
@ -281,8 +280,7 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
unsigned long flags;
/* Validity of handler and node pointers has been checked by caller. */
hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
node->len);
trace_hfi1_mmu_rb_remove(node->addr, node->len);
spin_lock_irqsave(&handler->lock, flags);
__mmu_int_rb_remove(node, &handler->root);
list_del(&node->list); /* remove from LRU list */
@ -321,8 +319,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
node; node = ptr) {
/* Guard against node removal. */
ptr = __mmu_int_rb_iter_next(node, start, end - 1);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len);
trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
if (handler->ops->invalidate(handler->ops_arg, node)) {
__mmu_int_rb_remove(node, root);
/* move from LRU list to delete list */

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -51,3 +51,4 @@
#include "trace_rc.h"
#include "trace_rx.h"
#include "trace_tx.h"
#include "trace_mmu.h"

View File

@ -0,0 +1,95 @@
/*
* Copyright(c) 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(__HFI1_TRACE_MMU_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_MMU_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include "hfi.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_mmu
DECLARE_EVENT_CLASS(hfi1_mmu_rb_template,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len),
TP_STRUCT__entry(__field(unsigned long, addr)
__field(unsigned long, len)
),
TP_fast_assign(__entry->addr = addr;
__entry->len = len;
),
TP_printk("MMU node addr 0x%lx, len %lu",
__entry->addr,
__entry->len
)
);
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len));
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len));
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len));
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len));
#endif /* __HFI1_TRACE_RC_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_mmu
#include <trace/define_trace.h>

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -198,6 +198,140 @@ TRACE_EVENT(hfi1_sdma_engine_select,
)
);
TRACE_EVENT(hfi1_sdma_user_free_queues,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt),
TP_ARGS(dd, ctxt, subctxt),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u16, subctxt)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
),
TP_printk("[%s] SDMA [%u:%u] Freeing user SDMA queues",
__get_str(dev),
__entry->ctxt,
__entry->subctxt
)
);
TRACE_EVENT(hfi1_sdma_user_process_request,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
u16 comp_idx),
TP_ARGS(dd, ctxt, subctxt, comp_idx),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u16, subctxt)
__field(u16, comp_idx)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->comp_idx = comp_idx;
),
TP_printk("[%s] SDMA [%u:%u] Using req/comp entry: %u",
__get_str(dev),
__entry->ctxt,
__entry->subctxt,
__entry->comp_idx
)
);
DECLARE_EVENT_CLASS(
hfi1_sdma_value_template,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, u16 comp_idx,
u32 value),
TP_ARGS(dd, ctxt, subctxt, comp_idx, value),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u16, subctxt)
__field(u16, comp_idx)
__field(u32, value)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->comp_idx = comp_idx;
__entry->value = value;
),
TP_printk("[%s] SDMA [%u:%u:%u] value: %u",
__get_str(dev),
__entry->ctxt,
__entry->subctxt,
__entry->comp_idx,
__entry->value
)
);
DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_initial_tidoffset,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
u16 comp_idx, u32 tidoffset),
TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset));
DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_data_length,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
u16 comp_idx, u32 data_len),
TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len));
DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_compute_length,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
u16 comp_idx, u32 data_len),
TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len));
TRACE_EVENT(hfi1_sdma_user_tid_info,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
u16 comp_idx, u32 tidoffset, u32 units, u8 shift),
TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset, units, shift),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u16, subctxt)
__field(u16, comp_idx)
__field(u32, tidoffset)
__field(u32, units)
__field(u8, shift)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->comp_idx = comp_idx;
__entry->tidoffset = tidoffset;
__entry->units = units;
__entry->shift = shift;
),
TP_printk("[%s] SDMA [%u:%u:%u] TID offset %ubytes %uunits om %u",
__get_str(dev),
__entry->ctxt,
__entry->subctxt,
__entry->comp_idx,
__entry->tidoffset,
__entry->units,
__entry->shift
)
);
TRACE_EVENT(hfi1_sdma_request,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
unsigned long dim),
TP_ARGS(dd, ctxt, subctxt, dim),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u16, subctxt)
__field(unsigned long, dim)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->dim = dim;
),
TP_printk("[%s] SDMA from %u:%u (%lu)",
__get_str(dev),
__entry->ctxt,
__entry->subctxt,
__entry->dim
)
);
DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
TP_PROTO(struct sdma_engine *sde, u64 status),
TP_ARGS(sde, status),

View File

@ -266,8 +266,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
{
struct hfi1_user_sdma_pkt_q *pq;
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
uctxt->ctxt, fd->subctxt);
trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
pq = fd->pq;
if (pq) {
if (pq->handler)
@ -349,7 +349,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
(u16 *)&info);
if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
hfi1_cdbg(SDMA,
"[%u:%u:%u:%u] Invalid comp index",
@ -386,8 +385,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
/*
* All safety checks have been done and this request has been claimed.
*/
hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
uctxt->ctxt, fd->subctxt, info.comp_idx);
trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt,
info.comp_idx);
req = pq->reqs + info.comp_idx;
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
req->data_len = 0;
@ -487,7 +486,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
KDETH_OM_LARGE : KDETH_OM_SMALL);
SDMA_DBG(req, "Initial TID offset %u", req->tidoffset);
trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt,
info.comp_idx, req->tidoffset);
idx++;
/* Save all the IO vector structures */
@ -505,8 +505,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
}
req->data_len += req->iovs[i].iov.iov_len;
}
SDMA_DBG(req, "total data length %u", req->data_len);
trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt,
info.comp_idx, req->data_len);
if (pcount > req->info.npkts)
pcount = req->info.npkts;
/*
@ -661,7 +661,11 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
} else {
len = min(req->data_len - req->sent, (u32)req->info.fragsize);
}
SDMA_DBG(req, "Data Length = %u", len);
trace_hfi1_sdma_user_compute_length(req->pq->dd,
req->pq->ctxt,
req->pq->subctxt,
req->info.comp_idx,
len);
return len;
}
@ -1231,7 +1235,8 @@ static int set_txreq_header(struct user_sdma_request *req,
* Set the KDETH.OFFSET and KDETH.OM based on size of
* transfer.
*/
SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
trace_hfi1_sdma_user_tid_info(
pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx,
req->tidoffset, req->tidoffset >> omfactor,
omfactor != KDETH_OM_SMALL_SHIFT);
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
@ -1441,8 +1446,6 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
u16 idx, enum hfi1_sdma_comp_state state,
int ret)
{
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
if (state == ERROR)
cq->comps[idx].errcode = -ret;
smp_wmb(); /* make sure errcode is visible first */

View File

@ -110,9 +110,6 @@
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \
(req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
##__VA_ARGS__)
#define SDMA_Q_DBG(pq, fmt, ...) \
hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
(pq)->subctxt, ##__VA_ARGS__)
extern uint extended_psn;