IB/hfi1: Add receive fault injection feature

Add fault injection capability:
  - Drop packets unconditionally (fault_by_packet)
  - Drop packets based on opcode (fault_by_opcode)

This feature reacts to the global FAULT_INJECTION
config flag.

The faulting traces have been added:
  - misc/fault_opcode
  - misc/fault_packet

See 'Documentation/fault-injection/fault-injection.txt'
for details.

Examples:
  - Dropping packets by opcode:
    /sys/kernel/debug/hfi1/hfi1_X/fault_opcode
	# Enable fault
	echo Y > fault_by_opcode
	# Setprobability of dropping (0-100%)
	# echo 25 > probability
	# Set opcode
	echo 0x64 > opcode
	# Number of times to fault
	echo 3 > times
	# An optional mask allows you to fault
	# a range of opcodes
	echo 0xf0 > mask
    /sys/kernel/debug/hfi1/hfi1_X/fault_stats
    contains a value in parentheses to indicate
    number of each opcode dropped.

  - Dropping packets unconditionally
    /sys/kernel/debug/hfi1/hfi1_X/fault_packet
	# Enable fault
	echo Y > fault_by_packet
    /sys/kernel/debug/hfi1/hfi1_X/fault_packet/fault_stats
    contains the number of packets dropped.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Don Hiatt <don.hiatt@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Don Hiatt 2017-03-20 17:26:14 -07:00 committed by Doug Ledford
parent f7b4263372
commit 0181ce31b2
6 changed files with 336 additions and 3 deletions

View File

@ -51,8 +51,12 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/types.h>
#include <linux/ratelimit.h>
#include <linux/fault-inject.h>
#include "hfi.h" #include "hfi.h"
#include "trace.h"
#include "debugfs.h" #include "debugfs.h"
#include "device.h" #include "device.h"
#include "qp.h" #include "qp.h"
@ -1063,6 +1067,217 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list); DEBUGFS_FILE_OPS(sdma_cpu_list);
#ifdef CONFIG_FAULT_INJECTION
static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
++*pos;
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void _fault_stats_seq_stop(struct seq_file *s, void *v)
{
}
static int _fault_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
loff_t i = *spos, j;
u64 n_packets = 0, n_bytes = 0;
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
for (j = 0; j < dd->first_user_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
}
if (!n_packets && !n_bytes)
return SEQ_SKIP;
if (!ibd->fault_opcode->n_rxfaults[i] &&
!ibd->fault_opcode->n_txfaults[i])
return SEQ_SKIP;
seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
(unsigned long long)n_packets,
(unsigned long long)n_bytes,
(unsigned long long)ibd->fault_opcode->n_rxfaults[i],
(unsigned long long)ibd->fault_opcode->n_txfaults[i]);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(fault_stats);
DEBUGFS_SEQ_FILE_OPEN(fault_stats);
DEBUGFS_FILE_OPS(fault_stats);
static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
{
debugfs_remove_recursive(ibd->fault_opcode->dir);
kfree(ibd->fault_opcode);
ibd->fault_opcode = NULL;
}
static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
if (!ibd->fault_opcode)
return -ENOMEM;
ibd->fault_opcode->attr.interval = 1;
ibd->fault_opcode->attr.require_end = ULONG_MAX;
ibd->fault_opcode->attr.stacktrace_depth = 32;
ibd->fault_opcode->attr.dname = NULL;
ibd->fault_opcode->attr.verbose = 0;
ibd->fault_opcode->fault_by_opcode = false;
ibd->fault_opcode->opcode = 0;
ibd->fault_opcode->mask = 0xff;
ibd->fault_opcode->dir =
fault_create_debugfs_attr("fault_opcode",
parent,
&ibd->fault_opcode->attr);
if (IS_ERR(ibd->fault_opcode->dir)) {
kfree(ibd->fault_opcode);
return -ENOENT;
}
DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
if (!debugfs_create_bool("fault_by_opcode", 0600,
ibd->fault_opcode->dir,
&ibd->fault_opcode->fault_by_opcode))
goto fail;
if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
&ibd->fault_opcode->opcode))
goto fail;
if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
&ibd->fault_opcode->mask))
goto fail;
return 0;
fail:
fault_exit_opcode_debugfs(ibd);
return -ENOMEM;
}
static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
{
debugfs_remove_recursive(ibd->fault_packet->dir);
kfree(ibd->fault_packet);
ibd->fault_packet = NULL;
}
static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
if (!ibd->fault_packet)
return -ENOMEM;
ibd->fault_packet->attr.interval = 1;
ibd->fault_packet->attr.require_end = ULONG_MAX;
ibd->fault_packet->attr.stacktrace_depth = 32;
ibd->fault_packet->attr.dname = NULL;
ibd->fault_packet->attr.verbose = 0;
ibd->fault_packet->fault_by_packet = false;
ibd->fault_packet->dir =
fault_create_debugfs_attr("fault_packet",
parent,
&ibd->fault_opcode->attr);
if (IS_ERR(ibd->fault_packet->dir)) {
kfree(ibd->fault_packet);
return -ENOENT;
}
if (!debugfs_create_bool("fault_by_packet", 0600,
ibd->fault_packet->dir,
&ibd->fault_packet->fault_by_packet))
goto fail;
if (!debugfs_create_u64("fault_stats", 0400,
ibd->fault_packet->dir,
&ibd->fault_packet->n_faults))
goto fail;
return 0;
fail:
fault_exit_packet_debugfs(ibd);
return -ENOMEM;
}
static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
{
fault_exit_opcode_debugfs(ibd);
fault_exit_packet_debugfs(ibd);
}
static int fault_init_debugfs(struct hfi1_ibdev *ibd)
{
int ret = 0;
ret = fault_init_opcode_debugfs(ibd);
if (ret)
return ret;
ret = fault_init_packet_debugfs(ibd);
if (ret)
fault_exit_opcode_debugfs(ibd);
return ret;
}
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
{
bool ret = false;
struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
return false;
if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
return false;
ret = should_fail(&ibd->fault_opcode->attr, 1);
if (ret) {
trace_hfi1_fault_opcode(qp, opcode);
if (rx)
ibd->fault_opcode->n_rxfaults[opcode]++;
else
ibd->fault_opcode->n_txfaults[opcode]++;
}
return ret;
}
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
bool ret = false;
if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
return false;
ret = should_fail(&ibd->fault_packet->attr, 1);
if (ret) {
++ibd->fault_packet->n_faults;
trace_hfi1_fault_packet(packet);
}
return ret;
}
#endif
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{ {
char name[sizeof("port0counters") + 1]; char name[sizeof("port0counters") + 1];
@ -1112,12 +1327,19 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ? !port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR); S_IRUGO : S_IRUGO | S_IWUSR);
} }
#ifdef CONFIG_FAULT_INJECTION
fault_init_debugfs(ibd);
#endif
} }
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{ {
if (!hfi1_dbg_root) if (!hfi1_dbg_root)
goto out; goto out;
#ifdef CONFIG_FAULT_INJECTION
fault_exit_debugfs(ibd);
#endif
debugfs_remove(ibd->hfi1_ibdev_link); debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out: out:

View File

@ -53,23 +53,68 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void); void hfi1_dbg_init(void);
void hfi1_dbg_exit(void); void hfi1_dbg_exit(void);
#ifdef CONFIG_FAULT_INJECTION
#include <linux/fault-inject.h>
struct fault_opcode {
struct fault_attr attr;
struct dentry *dir;
bool fault_by_opcode;
u64 n_rxfaults[256];
u64 n_txfaults[256];
u8 opcode;
u8 mask;
};
struct fault_packet {
struct fault_attr attr;
struct dentry *dir;
bool fault_by_packet;
u64 n_faults;
};
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
#else
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
return false;
}
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
u32 opcode, bool rx)
{
return false;
}
#endif
#else #else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{ {
} }
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{ {
} }
void hfi1_dbg_init(void) static inline void hfi1_dbg_init(void)
{ {
} }
void hfi1_dbg_exit(void) static inline void hfi1_dbg_exit(void)
{ {
} }
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
return false;
}
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
u32 opcode, bool rx)
{
return false;
}
#endif #endif
#endif /* _HFI1_DEBUGFS_H */ #endif /* _HFI1_DEBUGFS_H */

View File

@ -59,6 +59,7 @@
#include "trace.h" #include "trace.h"
#include "qp.h" #include "qp.h"
#include "sdma.h" #include "sdma.h"
#include "debugfs.h"
#undef pr_fmt #undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@ -1354,6 +1355,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/ */
int process_receive_ib(struct hfi1_packet *packet) int process_receive_ib(struct hfi1_packet *packet)
{ {
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
trace_hfi1_rcvhdr(packet->rcd->ppd->dd, trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt, packet->rcd->ctxt,
rhf_err_flags(packet->rhf), rhf_err_flags(packet->rhf),
@ -1409,6 +1413,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_expected(struct hfi1_packet *packet)
{ {
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf))) if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet); handle_eflags(packet);
@ -1421,6 +1427,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{ {
if (unlikely(rhf_err_flags(packet->rhf))) if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet); handle_eflags(packet);
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd, dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n"); "Unhandled eager packet received. Dropping.\n");

View File

@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src) __entry->src)
); );
#ifdef CONFIG_FAULT_INJECTION
TRACE_EVENT(hfi1_fault_opcode,
TP_PROTO(struct rvt_qp *qp, u8 opcode),
TP_ARGS(qp, opcode),
TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u8, opcode)
),
TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->opcode = opcode;
),
TP_printk("[%s] qpn 0x%x opcode 0x%x",
__get_str(dev), __entry->qpn, __entry->opcode)
);
TRACE_EVENT(hfi1_fault_packet,
TP_PROTO(struct hfi1_packet *packet),
TP_ARGS(packet),
TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
__field(u64, eflags)
__field(u32, ctxt)
__field(u32, hlen)
__field(u32, tlen)
__field(u32, updegr)
__field(u32, etail)
),
TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
__entry->eflags = rhf_err_flags(packet->rhf);
__entry->ctxt = packet->rcd->ctxt;
__entry->hlen = packet->hlen;
__entry->tlen = packet->tlen;
__entry->updegr = packet->updegr;
__entry->etail = rhf_egr_index(packet->rhf);
),
TP_printk(
"[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
__get_str(dev),
__entry->ctxt,
__entry->eflags,
__entry->hlen,
__entry->tlen,
__entry->updegr,
__entry->etail
)
);
#endif
#endif /* __HFI1_TRACE_MISC_H */ #endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH

View File

@ -60,6 +60,7 @@
#include "trace.h" #include "trace.h"
#include "qp.h" #include "qp.h"
#include "verbs_txreq.h" #include "verbs_txreq.h"
#include "debugfs.h"
static unsigned int hfi1_lkey_table_size = 16; static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@ -599,6 +600,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock(); rcu_read_unlock();
goto drop; goto drop;
} }
if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
true))) {
rcu_read_unlock();
goto drop;
}
spin_lock_irqsave(&packet->qp->r_lock, flags); spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet); packet_handler = qp_ok(opcode, packet);
if (likely(packet_handler)) if (likely(packet_handler))

View File

@ -195,6 +195,10 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg; struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */ /* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link; struct dentry *hfi1_ibdev_link;
#ifdef CONFIG_FAULT_INJECTION
struct fault_opcode *fault_opcode;
struct fault_packet *fault_packet;
#endif
#endif #endif
}; };