From 0c7f77afb7f6e74af899584ac672c8d9e63058b0 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 12 May 2016 10:22:33 -0700 Subject: [PATCH 01/42] IB/hfi1: Ignore non-temperature warnings on a downed link QSFP modules can raise an interrupt to inform us of expected conditions while the link is down, such as RX power low. Actively ignore these conditions when the link is down as they only add reporting noise. Continue reporting conditions that are valid at all times, such as temperature alarms and warnings. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/chip.c | 17 +++++++++++------ drivers/staging/rdma/hfi1/diag.c | 2 +- drivers/staging/rdma/hfi1/hfi.h | 1 + 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index dcae8e723f98..f6ae0ba3a3b8 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -6105,7 +6105,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) } /* this access is valid only when the link is up */ - if ((ppd->host_link_state & HLS_UP) == 0) { + if (ppd->host_link_state & HLS_DOWN) { dd_dev_info(dd, "%s: link state %s not up\n", __func__, link_state_name(ppd->host_link_state)); ret = -EBUSY; @@ -7429,7 +7429,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths) retry: mutex_lock(&ppd->hls_lock); /* only apply if the link is up */ - if (!(ppd->host_link_state & HLS_UP)) { + if (ppd->host_link_state & HLS_DOWN) { /* still going up..wait and retry */ if (ppd->host_link_state & HLS_GOING_UP) { if (++tries < 1000) { @@ -9252,6 +9252,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, dd_dev_info(dd, "%s: QSFP cable temperature too low\n", __func__); + /* + * The remaining alarms/warnings don't matter if the link is down. + */ + if (ppd->host_link_state & HLS_DOWN) + return 0; + if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) dd_dev_info(dd, "%s: QSFP supply voltage too high\n", @@ -9346,9 +9352,8 @@ void qsfp_event(struct work_struct *work) return; /* - * Turn DC back on after cables has been - * re-inserted. Up until now, the DC has been in - * reset to save power. + * Turn DC back on after cable has been re-inserted. Up until + * now, the DC has been in reset to save power. */ dc_start(dd); @@ -10074,7 +10079,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd) */ u32 driver_logical_state(struct hfi1_pportdata *ppd) { - if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP)) + if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN)) return IB_PORT_DOWN; switch (ppd->host_link_state & HLS_UP) { diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index bb2409ad891a..3a679e46fa7d 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -998,7 +998,7 @@ static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, u16 per_vl_credits; __be16 be_per_vl_credits; - if (!(ppd->host_link_state & HLS_UP)) + if (ppd->host_link_state & HLS_DOWN) goto err_exit; if (total_credits < vl15_credits) goto err_exit; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 7b78d56de7f5..4aac75f7ce59 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -453,6 +453,7 @@ struct rvt_sge_state; #define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) +#define HLS_DOWN ~(HLS_UP) /* use this MTU size if none other is given */ #define HFI1_DEFAULT_ACTIVE_MTU 10240 From 9775a991f9bdbdde3cc38e553326755af5b2b2a9 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 12 May 2016 10:22:39 -0700 Subject: [PATCH 02/42] IB/hfi1: Wait for QSFP modules to initialize The function level reset in init_chip() and subsequent write of all 1s to the ASIC_QSFP registers effectively resets attached active and optical QSFP modules that pay attention to the RESET_N pin. We subsequently try to access the QSFP management interface to qualify and tune the channel and fabric SerDes before enough time (2 seconds per SFF 8679 spec for QSFP28 modules) has elapsed for the module to finish initialization. This fails and causes the failure of the channel tuning algorithm, preventing us from bringing the link up. This patch checks the port type prior to beginning channel and SerDes tuning, and if found to be QSFP, watches for the QSFP initialization complete interrupt, with a maximum timeout of 2 seconds, to allow the initialization to complete. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/chip.c | 10 +++++++++- drivers/staging/rdma/hfi1/platform.c | 17 +++++++++++------ drivers/staging/rdma/hfi1/platform.h | 1 + 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index f6ae0ba3a3b8..ce4c275b60f2 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -9485,7 +9485,15 @@ int bringup_serdes(struct hfi1_pportdata *ppd) return ret; } - /* tune the SERDES to a ballpark setting for + get_port_type(ppd); + if (ppd->port_type == PORT_TYPE_QSFP) { + set_qsfp_int_n(ppd, 0); + wait_for_qsfp_init(ppd); + set_qsfp_int_n(ppd, 1); + } + + /* + * Tune the SerDes to a ballpark setting for * optimal signal and bit error rate * Needs to be done before starting the link */ diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index 8fe8a205b5bb..dfa413cfa351 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -87,6 +87,17 @@ void free_platform_config(struct hfi1_devdata *dd) */ } +void get_port_type(struct hfi1_pportdata *ppd) +{ + int ret; + + ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_PORT_TYPE, &ppd->port_type, + 4); + if (ret) + ppd->port_type = PORT_TYPE_UNKNOWN; +} + int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) { u8 tx_ctrl_byte = on ? 0x0 : 0xF; @@ -784,12 +795,6 @@ void tune_serdes(struct hfi1_pportdata *ppd) return; } - ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, - 4); - if (ret) - ppd->port_type = PORT_TYPE_UNKNOWN; - switch (ppd->port_type) { case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/staging/rdma/hfi1/platform.h index 19620cf546d5..e2c21613c326 100644 --- a/drivers/staging/rdma/hfi1/platform.h +++ b/drivers/staging/rdma/hfi1/platform.h @@ -298,6 +298,7 @@ enum link_tuning_encoding { /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); +void get_port_type(struct hfi1_pportdata *ppd); int set_qsfp_tx(struct hfi1_pportdata *ppd, int on); void tune_serdes(struct hfi1_pportdata *ppd); From 27a340f6b206dd2a0208d3bc4605c0708b55d7b4 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 12 May 2016 10:22:45 -0700 Subject: [PATCH 03/42] IB/hfi1: Correct external device configuration shift The external device configuration was incorrectly shifted to byte 3 of the 32 bit DC_HOST_COMM_SETTINGS instead of byte 0. This patch corrects the shift and provides the cable capability information in byte 0. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index dfa413cfa351..ae0e4985cc7e 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -575,8 +575,8 @@ static void apply_tunings( ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, &config_data); /* Clear, then set the external device config field */ - config_data &= ~(0xFF << 24); - config_data |= (external_device_config << 24); + config_data &= ~(u32)0xFF; + config_data |= external_device_config; ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) From 1ebe79c9484bb1b9bd51b3dd6e82b8ff87ebaeba Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 12 May 2016 10:22:51 -0700 Subject: [PATCH 04/42] IB/hfi1: Remove no-op QSFP reset code The RESET_N bit of the ASIC_QSFPn_OE register is not used by the hardware. Remove code that tries to use it - it does nothing. Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/chip.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index ce4c275b60f2..ec0953ec7ee3 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -9212,9 +9212,6 @@ void reset_qsfp(struct hfi1_pportdata *ppd) /* Reset the QSFP */ mask = (u64)QSFP_HFI0_RESET_N; - qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); - qsfp_mask |= mask; - write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); From 17f15bf66884b9e33c5005ee0149b039af8f7af2 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 12 May 2016 10:22:57 -0700 Subject: [PATCH 05/42] IB/hfi1: Fix pio wait counter double increment The code unconditionlly increments the pio wait counter making the counter inacurate and unusable. Fixes: 14553ca11039 ("staging/rdma/hfi1: Adaptive PIO for short messages") Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/verbs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 9cdc85fa366f..f3531fdfef01 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -946,7 +946,6 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); - dev->n_piowait++; qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); From b96b040445f5d84fb8aa2ff986be71f5069c976e Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 12 May 2016 10:23:03 -0700 Subject: [PATCH 06/42] IB/hfi1: Fix potential panic with sdma drained mechanism The guard is backwards, potentially causing the SDMA client to panic if a wait structure was not specified. psm and verbs are not exposed to the issue, but fix the code just to be correct. Fixes: a545f5308b6c ("staging/rdma/hfi: fix CQ completion order issue") Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index abb8ebc1fcac..e938017f78e3 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -376,7 +376,7 @@ static inline void complete_tx(struct sdma_engine *sde, sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); - if (iowait_sdma_dec(wait) && wait) + if (wait && iowait_sdma_dec(wait)) iowait_drain_wakeup(wait); } From 67caea1fec85266590dddfa431e1cb000bc942f4 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Thu, 12 May 2016 10:23:09 -0700 Subject: [PATCH 07/42] IB/hfi1: Improve performance of interval RB trees The interval RB tree management functions use handlers to store user-specific callback for the various tree operations. These handlers are put on a doubly-linked list. When a RB tree function is called, the list is searched for the handler of the particular tree. The list which holds the handlers is modified very rarely - when a handler is created and when a handler is removed. On the other hand, it is searched very often. This a perfect usage scenario for RCU. The result is a much lower overhead of traversing the list as most of the time no locking will be required. Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/mmu_rb.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index 2b0e91d3093d..b7a80aa1ae30 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -45,6 +45,7 @@ * */ #include +#include #include #include @@ -97,7 +98,6 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node) int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) { struct mmu_rb_handler *handlr; - unsigned long flags; if (!ops->invalidate) return -EINVAL; @@ -111,9 +111,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) INIT_HLIST_NODE(&handlr->mn.hlist); spin_lock_init(&handlr->lock); handlr->mn.ops = &mn_opts; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_add_tail(&handlr->list, &mmu_rb_handlers); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_add_tail_rcu(&handlr->list, &mmu_rb_handlers); + spin_unlock(&mmu_rb_lock); return mmu_notifier_register(&handlr->mn, current->mm); } @@ -130,9 +130,10 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) if (current->mm) mmu_notifier_unregister(&handler->mn, current->mm); - spin_lock_irqsave(&mmu_rb_lock, flags); - list_del(&handler->list); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_del_rcu(&handler->list); + spin_unlock(&mmu_rb_lock); + synchronize_rcu(); spin_lock_irqsave(&handler->lock, flags); if (!RB_EMPTY_ROOT(root)) { @@ -271,16 +272,15 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) { struct mmu_rb_handler *handler; - unsigned long flags; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_for_each_entry(handler, &mmu_rb_handlers, list) { + rcu_read_lock(); + list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) { if (handler->root == root) goto unlock; } handler = NULL; unlock: - spin_unlock_irqrestore(&mmu_rb_lock, flags); + rcu_read_unlock(); return handler; } From 63d0b4a5fc5f589c6e980fc5d43c220e74a6f06c Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 12 May 2016 10:23:16 -0700 Subject: [PATCH 08/42] IB/hfi1: Remove unnecessary header While running perftests, there is a significant utilization of the random number daemon. This is due to the linux/random.h header being included in qp.c and verbs.c. However, none of the functions from this header are being used in these files, so remove the unnecessary header. Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/qp.c | 1 - drivers/staging/rdma/hfi1/verbs.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 91eb42316df9..c62650892e00 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index f3531fdfef01..ae92ac5ff232 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include "hfi.h" From 02ba00c0bbfbbe8abd81abd38302d400b59e220f Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 12 May 2016 10:23:22 -0700 Subject: [PATCH 09/42] IB/hfi1: Fix hfi_rcvhdr tracepoint The hfi_rcvhdr tracepoint has the ctxt and eflags switched in the prototype of the trace event, compared to the args and usage of the trace function. Fix this by swapping these 2 fields in the trace event prototype. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index 963dc948c38a..42bcfc3e15d5 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -74,8 +74,8 @@ __print_symbolic(etype, \ TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, - u64 eflags, u32 ctxt, + u64 eflags, u32 etype, u32 hlen, u32 tlen, From cdbff5042d69bbe3f9840bea03863c93f93c88fa Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 12 May 2016 10:23:28 -0700 Subject: [PATCH 10/42] IB/rdmavt: Increase CQ callback thread priority The priority of the send engines is higher than the CQ completion thread potentially causing completions to be starved for very fast interfaces. Change the CQ kthread to match the send engine threads to minimize this delay for ULP completion processing. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index b1ffc8b4a6c0..6ca6fa80dd6e 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -525,6 +525,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) return PTR_ERR(task); } + set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); kthread_bind(task, cpu); wake_up_process(task); From cde10afa6c8d0db75a3dc038ec46ae901c51cfab Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Thu, 12 May 2016 10:23:35 -0700 Subject: [PATCH 11/42] IB/hfi1: Correct log message strings Remove "IB" keyword from log messages. Correct comment for thermal sensor init function. Reviewed-by: Dean Luick Signed-off-by: Jakub Pawlak Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/chip.c | 2 +- drivers/staging/rdma/hfi1/driver.c | 2 +- drivers/staging/rdma/hfi1/hfi.h | 5 ++--- drivers/staging/rdma/hfi1/sysfs.c | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index ec0953ec7ee3..d8cc329901d0 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -14588,7 +14588,7 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, (reason), (ret)) /* - * Initialize the Avago Thermal sensor. + * Initialize the thermal sensor. * * After initialization, enable polling of thermal sensor through * SBus interface. In order for this to work, the SBus Master diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 700c6fa3a633..c75b0ae688f8 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -1161,7 +1161,7 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) ppd->lmc = lmc; hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0); - dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid); + dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid); return 0; } diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 4aac75f7ce59..337bd2dc2dbf 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1883,9 +1883,8 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) get_unit_name((dd)->unit), ##__VA_ARGS__) #define hfi1_dev_porterr(dd, port, fmt, ...) \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__) + dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \ + get_unit_name((dd)->unit), (port), ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index 8cd6df8634ad..91fc2aed6aed 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -721,8 +721,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, } dd_dev_info(dd, - "IB%u: Congestion Control Agent enabled for port %d\n", - dd->unit, port_num); + "Congestion Control Agent enabled for port %d\n", + port_num); return 0; From f036780be8ac7abee912bd1eeb459230e6bcc878 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 12 May 2016 10:23:41 -0700 Subject: [PATCH 12/42] IB/hfi1: Immediately apply congestion setting MAD The handling of the congestion setting MAD packet only saved off the values, waiting for a congestion control table packet before going active. Instead, immediately apply the values. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/mad.c | 93 +++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 33 deletions(-) diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index ed58cf21e790..17882dc8650e 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -3363,6 +3363,50 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, return reply((struct ib_mad_hdr *)smp); } +/* + * Apply congestion control information stored in the ppd to the + * active structure. + */ +static void apply_cc_state(struct hfi1_pportdata *ppd) +{ + struct cc_state *old_cc_state, *new_cc_state; + + new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); + if (!new_cc_state) + return; + + /* + * Hold the lock for updating *and* to prevent ppd information + * from changing during the update. + */ + spin_lock(&ppd->cc_state_lock); + + old_cc_state = get_cc_state(ppd); + if (!old_cc_state) { + /* never active, or shutting down */ + spin_unlock(&ppd->cc_state_lock); + kfree(new_cc_state); + return; + } + + *new_cc_state = *old_cc_state; + + new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + memcpy(new_cc_state->cct.entries, ppd->ccti_entries, + ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)); + + new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; + new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; + memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, + OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); + + rcu_assign_pointer(ppd->cc_state, new_cc_state); + + spin_unlock(&ppd->cc_state_lock); + + call_rcu(&old_cc_state->rcu, cc_state_reclaim); +} + static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -3374,6 +3418,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ + spin_lock(&ppd->cc_state_lock); ppd->cc_sl_control_map = be32_to_cpu(p->control_map); entries = ppd->congestion_entries; @@ -3384,6 +3433,10 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, p->entries[i].trigger_threshold; entries[i].ccti_min = p->entries[i].ccti_min; } + spin_unlock(&ppd->cc_state_lock); + + /* now apply the information */ + apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, resp_len); @@ -3526,7 +3579,6 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; - struct cc_state *old_cc_state, *new_cc_state; /* sanity check n_blocks, start_block */ if (n_blocks == 0 || @@ -3546,45 +3598,20 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); - if (!new_cc_state) - goto getit; - + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ spin_lock(&ppd->cc_state_lock); - - old_cc_state = get_cc_state(ppd); - - if (!old_cc_state) { - spin_unlock(&ppd->cc_state_lock); - kfree(new_cc_state); - return reply((struct ib_mad_hdr *)smp); - } - - *new_cc_state = *old_cc_state; - - new_cc_state->cct.ccti_limit = ccti_limit; - - entries = ppd->ccti_entries; ppd->total_cct_entry = ccti_limit + 1; - + entries = ppd->ccti_entries; for (j = 0, i = sentry; i < eentry; j++, i++) entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry); - - memcpy(new_cc_state->cct.entries, entries, - eentry * sizeof(struct ib_cc_table_entry)); - - new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; - new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; - memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, - OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); - - rcu_assign_pointer(ppd->cc_state, new_cc_state); - spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + /* now apply the information */ + apply_cc_state(ppd); -getit: return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); } From 859b527f00ad1364a4152c0365e2e5ef5a6bf20a Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Thu, 12 May 2016 10:23:47 -0700 Subject: [PATCH 13/42] IB/hfi1: Keep SC_USER as the last send context type SC_USER needs to be the last send context type to ensure other send context types get their allocation when num_user_contexts is set to a large number. This fixes a panic when the module parameter num_user_contexts is set to 141 and larger. Reviewed-by: Dean Luick Signed-off-by: Jianxin Xiong Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/pio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h index 53a08edb7f64..464cbd27b975 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/staging/rdma/hfi1/pio.h @@ -49,10 +49,10 @@ /* send context types */ #define SC_KERNEL 0 -#define SC_ACK 1 -#define SC_USER 2 -#define SC_VL15 3 -#define SC_MAX 4 +#define SC_VL15 1 +#define SC_ACK 2 +#define SC_USER 3 /* must be the last one: it may take all left */ +#define SC_MAX 4 /* count of send context types */ /* invalid send context index */ #define INVALID_SCI 0xff From bf77cc34f3e6905bc8841489dd628bbffe37d743 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Thu, 12 May 2016 10:23:53 -0700 Subject: [PATCH 14/42] ib_pack.h: Add opcode definition for send with invalidate The opcode for "SEND Last with Invalidate" and "SEND Only with Invalidate" have been defined for RC in IBA Specification Vol 1 since Release 1.2. Add the definition to the header file in preparation of supporting these opcodes in rdmavt based drivers. Reviewed-by: Mike Marciniszyn Signed-off-by: Jianxin Xiong Signed-off-by: Doug Ledford --- include/rdma/ib_pack.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 0f3daae44bf9..b13419ce99ff 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -103,6 +103,9 @@ enum { IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, IB_OPCODE_COMPARE_SWAP = 0x13, IB_OPCODE_FETCH_ADD = 0x14, + /* opcode 0x15 is reserved */ + IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, + IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, /* real constants follow -- see comment about above IB_OPCODE() macro for more details */ @@ -129,6 +132,8 @@ enum { IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RC, COMPARE_SWAP), IB_OPCODE(RC, FETCH_ADD), + IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), + IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), /* UC */ IB_OPCODE(UC, SEND_FIRST), From 3923979e9bc6aa6f168a644314d6729e9f98eccc Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 12 May 2016 10:24:00 -0700 Subject: [PATCH 15/42] IB/hfi1: Change hfi1_init loop to preserve error returns If one iteration of the loop causes an error return and a later iteration doesn't, the later iteration causes the earlier error condition to be lost. This could result in driver probe succeeding when it should have failed. Therefore save off the error return in the loop itself rather than outside the loop. Reviewed-by: Dennis Dalessandro Reviewed-by: Mitko Haralanov Signed-off-by: Ashutosh Dixit Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 502b7cf4647d..9e71abf0f40f 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -732,12 +732,12 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); - if (lastfail) + if (lastfail) { dd_dev_err(dd, "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); + ret = lastfail; + } } - if (lastfail) - ret = lastfail; /* Allocate enough memory for user event notification. */ len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * From eea570788ebeb817a5ecb173dafc316779120081 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Sun, 1 May 2016 18:05:31 +0530 Subject: [PATCH 16/42] staging/rdma/hfi1: use RCU_INIT_POINTER() when NULLing. It is safe to use RCU_INIT_POINTER() to NULL a pointer, instead of rcu_assign_pointer(). This results in slightly smaller/faster code. Signed-off-by: Muhammad Falak R Wani Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 9e71abf0f40f..b9beaae5953d 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1300,7 +1300,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_lock(&ppd->cc_state_lock); cc_state = get_cc_state(ppd); - rcu_assign_pointer(ppd->cc_state, NULL); + RCU_INIT_POINTER(ppd->cc_state, NULL); spin_unlock(&ppd->cc_state_lock); if (cc_state) From 9565c6a37a9d69f00e2a7dabbee2b4f6d20dc1ae Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Thu, 19 May 2016 05:21:18 -0700 Subject: [PATCH 17/42] IB/hfi1: Fix an interval RB node reference count leak Commit e88c9271d9f8 ("IB/hfi1: Fix buffer cache corner case which may cause corruption") introduced a bug which may cause a reference count of a interval RB node to be leaked in the case where an SDMA transfer from that node completes at the same time as the node is being extended. If a node is being extended, it is first removed from the RB tree in order to be processed without the risk of an invalidation event removing the node at the same time. If a SDMA completion happens during that time, the completion handler will fail to find the node in the RB tree and, therefore, fail to correctly decrement its refcount. This leaves the node in the tree and its pages pinned for the duration of the user process. To prevent this from happening the io vector adds a reference to the RB node, which is used during the SDMA completion instead of looking up the node in the RB tree. This change adds a performance improvement as a side effect by avoiding the RB tree lookup. Fixes: e88c9271d9f8 ("IB/hfi1: Fix buffer cache corner case which may cause corruption") Reviewed-by: Dean Luick Reviewed-by: Harish Chegondi Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/user_sdma.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 0014c9c0e967..aed2878c97f1 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -166,6 +166,8 @@ static unsigned initial_pkt_count = 8; #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ +struct sdma_mmu_node; + struct user_sdma_iovec { struct list_head list; struct iovec iov; @@ -178,6 +180,7 @@ struct user_sdma_iovec { * which we last left off. */ u64 offset; + struct sdma_mmu_node *node; }; #define SDMA_CACHE_NODE_EVICT BIT(0) @@ -1153,6 +1156,7 @@ retry: } iovec->pages = node->pages; iovec->npages = npages; + iovec->node = node; ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); if (ret) { @@ -1519,18 +1523,13 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) } if (req->data_iovs) { struct sdma_mmu_node *node; - struct mmu_rb_node *mnode; int i; for (i = 0; i < req->data_iovs; i++) { - mnode = hfi1_mmu_rb_search( - &req->pq->sdma_rb_root, - (unsigned long)req->iovs[i].iov.iov_base, - req->iovs[i].iov.iov_len); - if (!mnode || IS_ERR(mnode)) + node = req->iovs[i].node; + if (!node) continue; - node = container_of(mnode, struct sdma_mmu_node, rb); if (unpin) hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, &node->rb); From 654b643670d82f14a62c888710e25248e03b0716 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 19 May 2016 05:21:25 -0700 Subject: [PATCH 18/42] IB/rdmavt: Insure QP vmalloc variants zero memory The usage of the various vmalloc APIs do not consistently zero memory when allocating the swqe. Insure zeroing variants are used. Reviewed-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0f12c211c385..dd03b23a6a7c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -653,9 +653,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (gfp == GFP_NOIO) swq = __vmalloc( (init_attr->cap.max_send_wr + 1) * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - swq = vmalloc_node( + swq = vzalloc_node( (init_attr->cap.max_send_wr + 1) * sz, rdi->dparms.node); if (!swq) @@ -704,9 +704,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.wq = __vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - qp->r_rq.wq = vmalloc_node( + qp->r_rq.wq = vzalloc_node( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, rdi->dparms.node); From 49961f8fe8264fa231fbdcc4ca52c6706fdb2577 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 19 May 2016 05:21:31 -0700 Subject: [PATCH 19/42] IB/rdmavt: Use kzalloc_node Use kzalloc_node instead of kzalloc for rdmavt memory region segment allocation to optimize for performance on NUMA platforms. Reviewed-by: Dennis Dalessandro Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0ff765bfd619..0f4d4500f45e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -124,11 +124,13 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, int count) { int m, i = 0; + struct rvt_dev_info *dev = ib_to_rvt(pd->device); mr->mapsz = 0; m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; for (; i < m; i++) { - mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); + mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, + dev->dparms.node); if (!mr->map[i]) { rvt_deinit_mregion(mr); return -ENOMEM; From eac7193632e64383e35fa387e683dd317b9df047 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 19 May 2016 05:21:37 -0700 Subject: [PATCH 20/42] IB/hfi1: Fix sdma_event_names[] build warning sdma_event_names[] is only used within CONFIG_SDMA_VERBOSITY ifdefs, so when CONFIG_SDMA_VERBOSITY is disabled, it results in the following 0-day build warning: >> drivers/infiniband/hw/hfi1/sdma.c:137:27: warning: 'sdma_event_names' >> defined but not used [-Wunused-const-variable=] static const char * const sdma_event_names[] = { ^~~~~~~~~~~~~~~~ This occurs on the following compiler: compiler: gcc-6 (Debian 6.1.1-1) 6.1.1 20160430 For more information check: https://lists.01.org/pipermail/kbuild-all/2016-May/020060.html Fix this warning by defining sdma_event_name[] only within the CONFIG_SDMA_VERBOSITY ifdefs. Reported-by: kbuild test robot Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/sdma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index e938017f78e3..f9befc05b349 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -134,6 +134,7 @@ static const char * const sdma_state_names[] = { [sdma_state_s99_running] = "s99_Running", }; +#ifdef CONFIG_SDMA_VERBOSITY static const char * const sdma_event_names[] = { [sdma_event_e00_go_hw_down] = "e00_GoHwDown", [sdma_event_e10_go_hw_start] = "e10_GoHwStart", @@ -150,6 +151,7 @@ static const char * const sdma_event_names[] = { [sdma_event_e85_link_down] = "e85_LinkDown", [sdma_event_e90_sw_halted] = "e90_SwHalted", }; +#endif static const struct sdma_set_state_action sdma_action_table[] = { [sdma_state_s00_hw_down] = { From 46aa5baf96107bb35cc549ad1d19cf312dd5fb58 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 19 May 2016 05:21:44 -0700 Subject: [PATCH 21/42] IB/hfi1: Remove unnecessary comment This comment was old, the MTU enums have been defined. Reviewed-by: Mitko Haralanov Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/qp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index c62650892e00..14f889e3655b 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -160,9 +160,6 @@ static inline int opa_mtu_enum_to_int(int mtu) * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. - * - * The actual flag used to determine "8k MTU" will change and is currently - * unknown. */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { From f70f5f6af36bce29fe2c4bc733a223b5746eb65f Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 19 May 2016 05:21:50 -0700 Subject: [PATCH 22/42] IB/qib: Remove unused qib_7322_intr_msgs[] Building the qib driver with gcc version 6.1.0 raises the following build warning: drivers/infiniband/hw/qib/qib_iba7322.c:1311:39: warning: 'qib_7322_intr_msgs' defined but not used [-Wunused-const-variable=] static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { ^~~~~~~~~~~~~~~~~~ Remove the unused qib_7322_intr_msgs[] Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba7322.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 82d7c4bf5970..ce4034071f9c 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1308,21 +1308,6 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { SYM_LSB(IntMask, fldname##17IntMask)), \ .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } -static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { - INTR_AUTO_P(SDmaInt), - INTR_AUTO_P(SDmaProgressInt), - INTR_AUTO_P(SDmaIdleInt), - INTR_AUTO_P(SDmaCleanupDone), - INTR_AUTO_C(RcvUrg), - INTR_AUTO_P(ErrInt), - INTR_AUTO(ErrInt), /* non-port-specific errs */ - INTR_AUTO(AssertGPIOInt), - INTR_AUTO_P(SendDoneInt), - INTR_AUTO(SendBufAvailInt), - INTR_AUTO_C(RcvAvail), - { .mask = 0, .sz = 0 } -}; - #define TXSYMPTOM_AUTO_P(fldname) \ { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ .msg = #fldname, .sz = sizeof(#fldname) } From b583faf4dc6eaa64895c37b81983e75a8c3c1e4e Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Thu, 19 May 2016 05:21:57 -0700 Subject: [PATCH 23/42] IB/hfi1: Fix bug that blocks process on exit after port bounce During the processing of a user SDMA request, if there was an error before the request counter was increased, the state of the packet queue could be updated incorrectly, causing the counter to underflow. As the result, the process could get stuck later since the counter could never get back to 0. This patch adds a condition to guard the packet queue update so that the counter is only decreased if it has been increased before the error happens. Reviewed-by: Mitko Haralanov Signed-off-by: Jianxin Xiong Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/user_sdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index aed2878c97f1..29f4795f866c 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -510,6 +510,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; + int req_queued = 0; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -706,6 +707,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); atomic_inc(&pq->n_reqs); + req_queued = 1; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); if (unlikely(ret < 0 && ret != -EBUSY)) { @@ -750,7 +752,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, return 0; free_req: user_sdma_free_request(req, true); - pq_update(pq); + if (req_queued) + pq_update(pq); set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); return ret; } From f3225c3f1107104f5e143797550476182b844cfb Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:22:03 -0700 Subject: [PATCH 24/42] IB/hfi1: Remove anti-pattern in cdev init Remove the usage of an anti-pattern goto in hfi1_cdev_init to improve code readability. Suggested-by: Jason Gunthorpe Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/device.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c index c05c39da83b1..6ee800f0359f 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/staging/rdma/hfi1/device.c @@ -82,13 +82,13 @@ int hfi1_cdev_init(int minor, const char *name, else device = device_create(class, NULL, dev, NULL, "%s", name); - if (!IS_ERR(device)) - goto done; - ret = PTR_ERR(device); - device = NULL; - pr_err("Could not create device for minor %d, %s (err %d)\n", - minor, name, -ret); - cdev_del(cdev); + if (IS_ERR(device)) { + ret = PTR_ERR(device); + device = NULL; + pr_err("Could not create device for minor %d, %s (err %d)\n", + minor, name, -ret); + cdev_del(cdev); + } done: *devp = device; return ret; From 0eb626590dcf1280c6d01a784e9d53a3de6d5e8e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:25:50 -0700 Subject: [PATCH 25/42] IB/hfi1: Remove multiple device cdev hfi1 current exports a cdev that can be used to target all of the hfi's in the system. However there is a problem with this approach in that the devices could be on different subnets. This is a problem that user space can figure out and explicitly tell the driver on which device to create a context. Remove the multi-purpose cdev leaving a dedicated cdev for each port. Also remove the striping capability that is dependent upon the user choosing the multi-purpose cdev. It is now up to user space to determine how to stripe contexts. Reviewed-by: Dean Luick Reviewed-by: Mitko Haralanov Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/file_ops.c | 98 +++++----------------------- include/uapi/rdma/hfi/hfi1_user.h | 19 +----- 2 files changed, 19 insertions(+), 98 deletions(-) diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index c1c5bf82addb..518cd891b63d 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -86,8 +86,7 @@ static int get_ctxt_info(struct file *, void __user *, __u32); static int get_base_info(struct file *, void __user *, __u32); static int setup_ctxt(struct file *); static int setup_subctxt(struct hfi1_ctxtdata *); -static int get_user_context(struct file *, struct hfi1_user_info *, - int, unsigned); +static int get_user_context(struct file *, struct hfi1_user_info *, int); static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); static int allocate_ctxt(struct file *, struct hfi1_devdata *, struct hfi1_user_info *); @@ -836,7 +835,7 @@ static u64 kvirt_to_phys(void *addr) static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) { int i_minor, ret = 0; - unsigned swmajor, swminor, alg = HFI1_ALG_ACROSS; + unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) { @@ -846,9 +845,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) swminor = uinfo->userversion & 0xffff; - if (uinfo->hfi1_alg < HFI1_ALG_COUNT) - alg = uinfo->hfi1_alg; - mutex_lock(&hfi1_mutex); /* First, lets check if we need to setup a shared context? */ if (uinfo->subctxt_cnt) { @@ -868,7 +864,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) */ if (!ret) { i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp, uinfo, i_minor - 1, alg); + ret = get_user_context(fp, uinfo, i_minor); } done_unlock: mutex_unlock(&hfi1_mutex); @@ -876,71 +872,26 @@ done: return ret; } -/* return true if the device available for general use */ -static int usable_device(struct hfi1_devdata *dd) -{ - struct hfi1_pportdata *ppd = dd->pport; - - return driver_lstate(ppd) == IB_PORT_ACTIVE; -} - static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno, unsigned alg) + int devno) { struct hfi1_devdata *dd = NULL; - int ret = 0, devmax, npresent, nup, dev; + int devmax, npresent, nup; devmax = hfi1_count_units(&npresent, &nup); - if (!npresent) { - ret = -ENXIO; - goto done; - } - if (!nup) { - ret = -ENETDOWN; - goto done; - } - if (devno >= 0) { - dd = hfi1_lookup(devno); - if (!dd) - ret = -ENODEV; - else if (!dd->freectxts) - ret = -EBUSY; - } else { - struct hfi1_devdata *pdd; + if (!npresent) + return -ENXIO; - if (alg == HFI1_ALG_ACROSS) { - unsigned free = 0U; + if (!nup) + return -ENETDOWN; - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts && - pdd->freectxts > free) { - dd = pdd; - free = pdd->freectxts; - } - } - } else { - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts) { - dd = pdd; - break; - } - } - } - if (!dd) - ret = -EBUSY; - } -done: - return ret ? ret : allocate_ctxt(fp, dd, uinfo); + dd = hfi1_lookup(devno); + if (!dd) + return -ENODEV; + else if (!dd->freectxts) + return -EBUSY; + + return allocate_ctxt(fp, dd, uinfo); } static int find_shared_ctxt(struct file *fp, @@ -1698,15 +1649,8 @@ static const struct file_operations ui_file_ops = { #define UI_OFFSET 192 /* device minor offset for UI devices */ static int create_ui = 1; -static struct cdev wildcard_cdev; -static struct device *wildcard_device; - -static atomic_t user_count = ATOMIC_INIT(0); - static void user_remove(struct hfi1_devdata *dd) { - if (atomic_dec_return(&user_count) == 0) - hfi1_cdev_cleanup(&wildcard_cdev, &wildcard_device); hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device); @@ -1717,16 +1661,8 @@ static int user_add(struct hfi1_devdata *dd) char name[10]; int ret; - if (atomic_inc_return(&user_count) == 1) { - ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops, - &wildcard_cdev, &wildcard_device, - true); - if (ret) - goto done; - } - snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops, + ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, &dd->user_cdev, &dd->user_device, true); if (ret) diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index a533cecab14f..09558999ca1d 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -75,7 +75,7 @@ * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define HFI1_USER_SWMINOR 0 +#define HFI1_USER_SWMINOR 1 /* * Set of HW and driver capability/feature bits. @@ -107,19 +107,6 @@ #define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1) #define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2) -/* - * If the unit is specified via open, HFI choice is fixed. If port is - * specified, it's also fixed. Otherwise we try to spread contexts - * across ports and HFIs, using different algorithms. WITHIN is - * the old default, prior to this mechanism. - */ -#define HFI1_ALG_ACROSS 0 /* round robin contexts across HFIs, then - * ports; this is the default */ -#define HFI1_ALG_WITHIN 1 /* use all contexts on an HFI (round robin - * active ports within), then next HFI */ -#define HFI1_ALG_COUNT 2 /* number of algorithm choices */ - - /* User commands. */ #define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */ #define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */ @@ -199,9 +186,7 @@ struct hfi1_user_info { * Should be set to HFI1_USER_SWVERSION. */ __u32 userversion; - __u16 pad; - /* HFI selection algorithm, if unit has not selected */ - __u16 hfi1_alg; + __u32 pad; /* * If two or more processes wish to share a context, each process * must set the subcontext_cnt and subcontext_id to the same From 7312f29d8ee5518c71572d7bfcbfcd5800b12d16 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:25:57 -0700 Subject: [PATCH 26/42] IB/hfi1: Remove UI char device Remove UI char device which exposes direct access to registers for user space. This was put in to aid in debugging the hardware. We are looking into alternatives means of providing the same functionality. This removes another char device from HFI1's footprint. Reviewed-by: Dean Luick Reviewed-by: Mitko Haralanov Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/file_ops.c | 168 +-------------------------- 1 file changed, 1 insertion(+), 167 deletions(-) diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 518cd891b63d..eb66befd51fc 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1497,163 +1497,10 @@ done: return ret; } -static int ui_open(struct inode *inode, struct file *filp) -{ - struct hfi1_devdata *dd; - - dd = container_of(inode->i_cdev, struct hfi1_devdata, ui_cdev); - filp->private_data = dd; /* for other methods */ - return 0; -} - -static int ui_release(struct inode *inode, struct file *filp) -{ - /* nothing to do */ - return 0; -} - -static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) -{ - struct hfi1_devdata *dd = filp->private_data; - - return fixed_size_llseek(filp, offset, whence, - (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE); -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_read(struct file *filp, char __user *buf, size_t count, - loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base = dd->kregbase; - unsigned long total, csr_off, - barlen = (dd->kregend - dd->kregbase); - u64 data; - - /* only read 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* destination buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > (barlen + DC8051_DATA_MEM_SIZE)) - return -EINVAL; - /* only set the base if we are not starting past the BAR */ - if (*f_pos < barlen) - base += *f_pos; - csr_off = *f_pos; - for (total = 0; total < count; total += 8, csr_off += 8) { - /* accessing LCB CSRs requires more checks */ - if (is_lcb_offset(csr_off)) { - if (read_lcb_csr(dd, csr_off, (u64 *)&data)) - break; /* failed */ - } - /* - * Cannot read ASIC GPIO/QSFP* clear and force CSRs without a - * false parity error. Avoid the whole issue by not reading - * them. These registers are defined as having a read value - * of 0. - */ - else if (csr_off == ASIC_GPIO_CLEAR || - csr_off == ASIC_GPIO_FORCE || - csr_off == ASIC_QSFP1_CLEAR || - csr_off == ASIC_QSFP1_FORCE || - csr_off == ASIC_QSFP2_CLEAR || - csr_off == ASIC_QSFP2_FORCE) - data = 0; - else if (csr_off >= barlen) { - /* - * read_8051_data can read more than just 8 bytes at - * a time. However, folding this into the loop and - * handling the reads in 8 byte increments allows us - * to smoothly transition from chip memory to 8051 - * memory. - */ - if (read_8051_data(dd, - (u32)(csr_off - barlen), - sizeof(data), &data)) - break; /* failed */ - } else - data = readq(base + total); - if (put_user(data, (unsigned long __user *)(buf + total))) - break; - } - *f_pos += total; - return total; -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base; - unsigned long total, data, csr_off; - int in_lcb; - - /* only write 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* source buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > dd->kregend - dd->kregbase) - return -EINVAL; - - base = (void __iomem *)dd->kregbase + *f_pos; - csr_off = *f_pos; - in_lcb = 0; - for (total = 0; total < count; total += 8, csr_off += 8) { - if (get_user(data, (unsigned long __user *)(buf + total))) - break; - /* accessing LCB CSRs requires a special procedure */ - if (is_lcb_offset(csr_off)) { - if (!in_lcb) { - int ret = acquire_lcb_access(dd, 1); - - if (ret) - break; - in_lcb = 1; - } - } else { - if (in_lcb) { - release_lcb_access(dd, 1); - in_lcb = 0; - } - } - writeq(data, base + total); - } - if (in_lcb) - release_lcb_access(dd, 1); - *f_pos += total; - return total; -} - -static const struct file_operations ui_file_ops = { - .owner = THIS_MODULE, - .llseek = ui_lseek, - .read = ui_read, - .write = ui_write, - .open = ui_open, - .release = ui_release, -}; - -#define UI_OFFSET 192 /* device minor offset for UI devices */ -static int create_ui = 1; - static void user_remove(struct hfi1_devdata *dd) { hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); - hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device); } static int user_add(struct hfi1_devdata *dd) @@ -1666,21 +1513,8 @@ static int user_add(struct hfi1_devdata *dd) &dd->user_cdev, &dd->user_device, true); if (ret) - goto done; + user_remove(dd); - if (create_ui) { - snprintf(name, sizeof(name), - "%s_ui%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops, - &dd->ui_cdev, &dd->ui_device, - false); - if (ret) - goto done; - } - - return 0; -done: - user_remove(dd); return ret; } From d079031742023a00e1deda0fa847d403b4b91c76 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:04 -0700 Subject: [PATCH 27/42] IB/hfi1: Remove EPROM functionality from data device Remove EPROM handling from the cdev which is used for user application data traffic. Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/eprom.c | 369 --------------------------- drivers/staging/rdma/hfi1/file_ops.c | 23 -- include/uapi/rdma/hfi/hfi1_user.h | 7 - 3 files changed, 399 deletions(-) diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index bd8771570f81..36b77943cbfd 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -49,387 +49,18 @@ #include "common.h" #include "eprom.h" -/* - * The EPROM is logically divided into three partitions: - * partition 0: the first 128K, visible from PCI ROM BAR - * partition 1: 4K config file (sector size) - * partition 2: the rest - */ -#define P0_SIZE (128 * 1024) -#define P1_SIZE (4 * 1024) -#define P1_START P0_SIZE -#define P2_START (P0_SIZE + P1_SIZE) - -/* erase sizes supported by the controller */ -#define SIZE_4KB (4 * 1024) -#define MASK_4KB (SIZE_4KB - 1) - -#define SIZE_32KB (32 * 1024) -#define MASK_32KB (SIZE_32KB - 1) - -#define SIZE_64KB (64 * 1024) -#define MASK_64KB (SIZE_64KB - 1) - -/* controller page size, in bytes */ -#define EP_PAGE_SIZE 256 -#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1) - -/* controller commands */ #define CMD_SHIFT 24 -#define CMD_NOP (0) -#define CMD_PAGE_PROGRAM(addr) ((0x02 << CMD_SHIFT) | addr) -#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) -#define CMD_READ_SR1 ((0x05 << CMD_SHIFT)) -#define CMD_WRITE_ENABLE ((0x06 << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_4KB(addr) ((0x20 << CMD_SHIFT) | addr) -#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr) -#define CMD_CHIP_ERASE ((0x60 << CMD_SHIFT)) -#define CMD_READ_MANUF_DEV_ID ((0x90 << CMD_SHIFT)) #define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_64KB(addr) ((0xd8 << CMD_SHIFT) | addr) /* controller interface speeds */ #define EP_SPEED_FULL 0x2 /* full speed */ -/* controller status register 1 bits */ -#define SR1_BUSY 0x1ull /* the BUSY bit in SR1 */ - -/* sleep length while waiting for controller */ -#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */ -#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US)) - -/* GPIO pins */ -#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */ - /* * How long to wait for the EPROM to become available, in ms. * The spec 32 Mb EPROM takes around 40s to erase then write. * Double it for safety. */ #define EPROM_TIMEOUT 80000 /* ms */ - -/* - * Turn on external enable line that allows writing on the flash. - */ -static void write_enable(struct hfi1_devdata *dd) -{ - /* raise signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N); - /* raise enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N); -} - -/* - * Turn off external enable line that allows writing on the flash. - */ -static void write_disable(struct hfi1_devdata *dd) -{ - /* lower signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N); - /* lower enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N); -} - -/* - * Wait for the device to become not busy. Must be called after all - * write or erase operations. - */ -static int wait_for_not_busy(struct hfi1_devdata *dd) -{ - unsigned long count = 0; - u64 reg; - int ret = 0; - - /* starts page mode */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1); - while (1) { - udelay(WAIT_SLEEP_US); - usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5); - count++; - reg = read_csr(dd, ASIC_EEP_DATA); - if ((reg & SR1_BUSY) == 0) - break; - /* 200s is the largest time for a 128Mb device */ - if (count > COUNT_DELAY_SEC(200)) { - dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n"); - ret = -ETIMEDOUT; - break; /* break, not goto - must stop page mode */ - } - } - - /* stop page mode with a NOP */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); - - return ret; -} - -/* - * Read the device ID from the SPI controller. - */ -static u32 read_device_id(struct hfi1_devdata *dd) -{ - /* read the Manufacture Device ID */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID); - return (u32)read_csr(dd, ASIC_EEP_DATA); -} - -/* - * Erase the whole flash. - */ -static int erase_chip(struct hfi1_devdata *dd) -{ - int ret; - - write_enable(dd); - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE); - ret = wait_for_not_busy(dd); - - write_disable(dd); - - return ret; -} - -/* - * Erase a range. - */ -static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len) -{ - u32 end = start + len; - int ret = 0; - - if (end < start) - return -EINVAL; - - /* check the end points for the minimum erase */ - if ((start & MASK_4KB) || (end & MASK_4KB)) { - dd_dev_err(dd, - "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n", - __func__, start, end); - return -EINVAL; - } - - write_enable(dd); - - while (start < end) { - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - /* check in order of largest to smallest */ - if (((start & MASK_64KB) == 0) && (start + SIZE_64KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_64KB(start)); - start += SIZE_64KB; - } else if (((start & MASK_32KB) == 0) && - (start + SIZE_32KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_32KB(start)); - start += SIZE_32KB; - } else { /* 4KB will work */ - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_4KB(start)); - start += SIZE_4KB; - } - ret = wait_for_not_busy(dd); - if (ret) - goto done; - } - -done: - write_disable(dd); - - return ret; -} - -/* - * Read a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); - for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++) - result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ -} - -/* - * Read length bytes starting at offset. Copy to user address addr. - */ -static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - read_page(dd, start + offset, buffer); - if (copy_to_user((void __user *)(addr + offset), - buffer, EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - } - -done: - return ret; -} - -/* - * Write a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_DATA, data[0]); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset)); - for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++) - write_csr(dd, ASIC_EEP_DATA, data[i]); - /* will close the open page */ - return wait_for_not_busy(dd); -} - -/* - * Write length bytes starting at offset. Read from user address addr. - */ -static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - write_enable(dd); - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - if (copy_from_user(buffer, (void __user *)(addr + offset), - EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - ret = write_page(dd, start + offset, buffer); - if (ret) - goto done; - } - -done: - write_disable(dd); - return ret; -} - -/* convert an range composite to a length, in bytes */ -static inline u32 extract_rlen(u32 composite) -{ - return (composite & 0xffff) * EP_PAGE_SIZE; -} - -/* convert an range composite to a start, in bytes */ -static inline u32 extract_rstart(u32 composite) -{ - return (composite >> 16) * EP_PAGE_SIZE; -} - -/* - * Perform the given operation on the EPROM. Called from user space. The - * user credentials have already been checked. - * - * Return 0 on success, -ERRNO on error - */ -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) -{ - struct hfi1_devdata *dd; - u32 dev_id; - u32 rlen; /* range length */ - u32 rstart; /* range start */ - int i_minor; - int ret = 0; - - /* - * Map the device file to device data using the relative minor. - * The device file minor number is the unit number + 1. 0 is - * the generic device file - reject it. - */ - i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - if (i_minor <= 0) - return -EINVAL; - dd = hfi1_lookup(i_minor - 1); - if (!dd) { - pr_err("%s: cannot find unit %d!\n", __func__, i_minor); - return -EINVAL; - } - - /* some devices do not have an EPROM */ - if (!dd->eprom_available) - return -EOPNOTSUPP; - - ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); - if (ret) { - dd_dev_err(dd, "%s: unable to acquire EPROM resource\n", - __func__); - goto done_asic; - } - - dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n", - __func__, cmd->type, cmd->len, cmd->addr); - - switch (cmd->type) { - case HFI1_CMD_EP_INFO: - if (cmd->len != sizeof(u32)) { - ret = -ERANGE; - break; - } - dev_id = read_device_id(dd); - /* addr points to a u32 user buffer */ - if (copy_to_user((void __user *)cmd->addr, &dev_id, - sizeof(u32))) - ret = -EFAULT; - break; - - case HFI1_CMD_EP_ERASE_CHIP: - ret = erase_chip(dd); - break; - - case HFI1_CMD_EP_ERASE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = erase_range(dd, rstart, rlen); - break; - - case HFI1_CMD_EP_READ_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = read_length(dd, rstart, rlen, cmd->addr); - break; - - case HFI1_CMD_EP_WRITE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = write_length(dd, rstart, rlen, cmd->addr); - break; - - default: - dd_dev_err(dd, "%s: unexpected command %d\n", - __func__, cmd->type); - ret = -EINVAL; - break; - } - - release_chip_resource(dd, CR_EPROM); -done_asic: - return ret; -} - /* * Initialize the EPROM handler. */ diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index eb66befd51fc..2eddd3305c6a 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -189,7 +189,6 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, void *dest = NULL; __u64 user_val = 0; int uctxt_required = 1; - int must_be_root = 0; /* FIXME: This interface cannot continue out of staging */ if (WARN_ON_ONCE(!ib_safe_file_access(fp))) @@ -234,15 +233,6 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, copy = 0; user_val = cmd.addr; break; - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - uctxt_required = 0; /* assigned user context not required */ - must_be_root = 1; /* validate user */ - copy = 0; - break; default: ret = -EINVAL; goto bail; @@ -265,12 +255,6 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, goto bail; } - /* only root can do these operations */ - if (must_be_root && !capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto bail; - } - switch (cmd.type) { case HFI1_CMD_ASSIGN_CTXT: ret = assign_ctxt(fp, &uinfo); @@ -409,13 +393,6 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, sc_return_credits(sc); break; } - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - ret = handle_eprom_command(fp, &cmd); - break; } if (ret >= 0) diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 09558999ca1d..3e3680d11b25 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -122,13 +122,6 @@ #define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ #define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ #define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ -/* separate EPROM commands from normal PSM commands */ -#define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */ -#define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */ -/* range 66-74 no longer used */ -#define HFI1_CMD_EP_ERASE_RANGE 75 /* erase EPROM range */ -#define HFI1_CMD_EP_READ_RANGE 76 /* read EPROM range */ -#define HFI1_CMD_EP_WRITE_RANGE 77 /* write EPROM range */ #define _HFI1_EVENT_FROZEN_BIT 0 #define _HFI1_EVENT_LINKDOWN_BIT 1 From 0f7b1f917ca84493d40e4fce32db29e3f7afd5ad Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:10 -0700 Subject: [PATCH 28/42] IB/hfi1: Remove snoop/diag interface The snoop/diag interface is better served by an implementation which is more general and usable by other drivers perhaps. Go ahead and remove the code now and get rid of the char dev. We can put the feature back when we have a more agreeable solution. Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/Makefile | 2 +- drivers/staging/rdma/hfi1/diag.c | 1925 -------------------------- drivers/staging/rdma/hfi1/file_ops.c | 9 +- 3 files changed, 2 insertions(+), 1934 deletions(-) delete mode 100644 drivers/staging/rdma/hfi1/diag.c diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 8dc59382ee96..9b5382c94b0c 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -7,7 +7,7 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ +hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c deleted file mode 100644 index 3a679e46fa7d..000000000000 --- a/drivers/staging/rdma/hfi1/diag.c +++ /dev/null @@ -1,1925 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* - * This file contains support for diagnostic functions. It is accessed by - * opening the hfi1_diag device, normally minor number 129. Diagnostic use - * of the chip may render the chip or board unusable until the driver - * is unloaded, or in some cases, until the system is rebooted. - * - * Accesses to the chip through this interface are not similar to going - * through the /sys/bus/pci resource mmap interface. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "hfi.h" -#include "device.h" -#include "common.h" -#include "verbs_txreq.h" -#include "trace.h" - -#undef pr_fmt -#define pr_fmt(fmt) DRIVER_NAME ": " fmt -#define snoop_dbg(fmt, ...) \ - hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__) - -/* Snoop option mask */ -#define SNOOP_DROP_SEND BIT(0) -#define SNOOP_USE_METADATA BIT(1) -#define SNOOP_SET_VL0TOVL15 BIT(2) - -static u8 snoop_flags; - -/* - * Extract packet length from LRH header. - * This is in Dwords so multiply by 4 to get size in bytes - */ -#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2) - -enum hfi1_filter_status { - HFI1_FILTER_HIT, - HFI1_FILTER_ERR, - HFI1_FILTER_MISS -}; - -/* snoop processing functions */ -rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = { - [RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler, - [RHF_RCV_TYPE_EAGER] = snoop_recv_handler, - [RHF_RCV_TYPE_IB] = snoop_recv_handler, - [RHF_RCV_TYPE_ERROR] = snoop_recv_handler, - [RHF_RCV_TYPE_BYPASS] = snoop_recv_handler, - [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID7] = process_receive_invalid -}; - -/* Snoop packet structure */ -struct snoop_packet { - struct list_head list; - u32 total_len; - u8 data[]; -}; - -/* Do not make these an enum or it will blow up the capture_md */ -#define PKT_DIR_EGRESS 0x0 -#define PKT_DIR_INGRESS 0x1 - -/* Packet capture metadata returned to the user with the packet. */ -struct capture_md { - u8 port; - u8 dir; - u8 reserved[6]; - union { - u64 pbc; - u64 rhf; - } u; -}; - -static atomic_t diagpkt_count = ATOMIC_INIT(0); -static struct cdev diagpkt_cdev; -static struct device *diagpkt_device; - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diagpkt_file_ops = { - .owner = THIS_MODULE, - .write = diagpkt_write, - .llseek = noop_llseek, -}; - -/* - * This is used for communication with user space for snoop extended IOCTLs - */ -struct hfi1_link_info { - __be64 node_guid; - u8 port_mode; - u8 port_state; - u16 link_speed_active; - u16 link_width_active; - u16 vl15_init; - u8 port_number; - /* - * Add padding to make this a full IB SMP payload. Note: changing the - * size of this structure will make the IOCTLs created with _IOWR - * change. - * Be sure to run tests on all IOCTLs when making changes to this - * structure. - */ - u8 res[47]; -}; - -/* - * This starts our ioctl sequence numbers *way* off from the ones - * defined in ib_core. - */ -#define SNOOP_CAPTURE_VERSION 0x1 - -#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl-number.txt */ -#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC -#define HFI1_SNOOP_IOC_BASE_SEQ 0x80 - -#define HFI1_SNOOP_IOCGETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ) -#define HFI1_SNOOP_IOCSETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 1) -#define HFI1_SNOOP_IOCCLEARQUEUE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 2) -#define HFI1_SNOOP_IOCCLEARFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 3) -#define HFI1_SNOOP_IOCSETFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 4) -#define HFI1_SNOOP_IOCGETVERSION \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 5) -#define HFI1_SNOOP_IOCSET_OPTS \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6) - -/* - * These offsets +6/+7 could change, but these are already known and used - * IOCTL numbers so don't change them without a good reason. - */ -#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6, \ - struct hfi1_link_info) -#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 7, \ - struct hfi1_link_info) - -static int hfi1_snoop_open(struct inode *in, struct file *fp); -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off); -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait); -static int hfi1_snoop_release(struct inode *in, struct file *fp); - -struct hfi1_packet_filter_command { - int opcode; - int length; - void *value_ptr; -}; - -/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */ -#define HFI1_SNOOP_INGRESS 0x1 -#define HFI1_SNOOP_EGRESS 0x2 - -enum hfi1_packet_filter_opcodes { - FILTER_BY_LID, - FILTER_BY_DLID, - FILTER_BY_MAD_MGMT_CLASS, - FILTER_BY_QP_NUMBER, - FILTER_BY_PKT_TYPE, - FILTER_BY_SERVICE_LEVEL, - FILTER_BY_PKEY, - FILTER_BY_DIRECTION, -}; - -static const struct file_operations snoop_file_ops = { - .owner = THIS_MODULE, - .open = hfi1_snoop_open, - .read = hfi1_snoop_read, - .unlocked_ioctl = hfi1_ioctl, - .poll = hfi1_snoop_poll, - .write = hfi1_snoop_write, - .release = hfi1_snoop_release -}; - -struct hfi1_filter_array { - int (*filter)(void *, void *, void *); -}; - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value); - -static const struct hfi1_filter_array hfi1_filters[] = { - { hfi1_filter_lid }, - { hfi1_filter_dlid }, - { hfi1_filter_mad_mgmt_class }, - { hfi1_filter_qp_number }, - { hfi1_filter_ibpacket_type }, - { hfi1_filter_ib_service_level }, - { hfi1_filter_ib_pkey }, - { hfi1_filter_direction }, -}; - -#define HFI1_MAX_FILTERS ARRAY_SIZE(hfi1_filters) -#define HFI1_DIAG_MINOR_BASE 129 - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name); - -int hfi1_diag_add(struct hfi1_devdata *dd) -{ - char name[16]; - int ret = 0; - - snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(), - dd->unit); - /* - * Do this for each device as opposed to the normal diagpkt - * interface which is one per host - */ - ret = hfi1_snoop_add(dd, name); - if (ret) - dd_dev_err(dd, "Unable to init snoop/capture device"); - - snprintf(name, sizeof(name), "%s_diagpkt", class_name()); - if (atomic_inc_return(&diagpkt_count) == 1) { - ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name, - &diagpkt_file_ops, &diagpkt_cdev, - &diagpkt_device, false); - } - - return ret; -} - -/* this must be called w/ dd->snoop_in_lock held */ -static void drain_snoop_list(struct list_head *queue) -{ - struct list_head *pos, *q; - struct snoop_packet *packet; - - list_for_each_safe(pos, q, queue) { - packet = list_entry(pos, struct snoop_packet, list); - list_del(pos); - kfree(packet); - } -} - -static void hfi1_snoop_remove(struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); -} - -void hfi1_diag_remove(struct hfi1_devdata *dd) -{ - hfi1_snoop_remove(dd); - if (atomic_dec_and_test(&diagpkt_count)) - hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device); - hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device); -} - -/* - * Allocated structure shared between the credit return mechanism and - * diagpkt_send(). - */ -struct diagpkt_wait { - struct completion credits_returned; - int code; - atomic_t count; -}; - -/* - * When each side is finished with the structure, they call this. - * The last user frees the structure. - */ -static void put_diagpkt_wait(struct diagpkt_wait *wait) -{ - if (atomic_dec_and_test(&wait->count)) - kfree(wait); -} - -/* - * Callback from the credit return code. Set the complete, which - * will let diapkt_send() continue. - */ -static void diagpkt_complete(void *arg, int code) -{ - struct diagpkt_wait *wait = (struct diagpkt_wait *)arg; - - wait->code = code; - complete(&wait->credits_returned); - put_diagpkt_wait(wait); /* finished with the structure */ -} - -/** - * diagpkt_send - send a packet - * @dp: diag packet descriptor - */ -static ssize_t diagpkt_send(struct diag_pkt *dp) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - struct pio_buf *pbuf; - u32 *tmpbuf = NULL; - ssize_t ret = 0; - u32 pkt_len, total_len; - pio_release_cb credit_cb = NULL; - void *credit_arg = NULL; - struct diagpkt_wait *wait = NULL; - int trycount = 0; - - dd = hfi1_lookup(dp->unit); - if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) { - ret = -ENODEV; - goto bail; - } - if (!(dd->flags & HFI1_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -ENODEV; - goto bail; - } - - if (dp->version != _DIAG_PKT_VERS) { - dd_dev_err(dd, "Invalid version %u for diagpkt_write\n", - dp->version); - ret = -EINVAL; - goto bail; - } - - /* send count must be an exact number of dwords */ - if (dp->len & 3) { - ret = -EINVAL; - goto bail; - } - - /* there is only port 1 */ - if (dp->port != 1) { - ret = -EINVAL; - goto bail; - } - - /* need a valid context */ - if (dp->sw_index >= dd->num_send_contexts) { - ret = -EINVAL; - goto bail; - } - /* can only use kernel contexts */ - if (dd->send_contexts[dp->sw_index].type != SC_KERNEL && - dd->send_contexts[dp->sw_index].type != SC_VL15) { - ret = -EINVAL; - goto bail; - } - /* must be allocated */ - sc = dd->send_contexts[dp->sw_index].sc; - if (!sc) { - ret = -EINVAL; - goto bail; - } - /* must be enabled */ - if (!(sc->flags & SCF_ENABLED)) { - ret = -EINVAL; - goto bail; - } - - /* allocate a buffer and copy the data in */ - tmpbuf = vmalloc(dp->len); - if (!tmpbuf) { - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmpbuf, - (const void __user *)(unsigned long)dp->data, - dp->len)) { - ret = -EFAULT; - goto bail; - } - - /* - * pkt_len is how much data we have to write, includes header and data. - * total_len is length of the packet in Dwords plus the PBC should not - * include the CRC. - */ - pkt_len = dp->len >> 2; - total_len = pkt_len + 2; /* PBC + packet */ - - /* if 0, fill in a default */ - if (dp->pbc == 0) { - struct hfi1_pportdata *ppd = dd->pport; - - hfi1_cdbg(PKT, "Generating PBC"); - dp->pbc = create_pbc(ppd, 0, 0, 0, total_len); - } else { - hfi1_cdbg(PKT, "Using passed in PBC"); - } - - hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc); - - /* - * The caller wants to wait until the packet is sent and to - * check for errors. The best we can do is wait until - * the buffer credits are returned and check if any packet - * error has occurred. If there are any late errors, this - * could miss it. If there are other senders who generate - * an error, this may find it. However, in general, it - * should catch most. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - /* always force a credit return */ - dp->pbc |= PBC_CREDIT_RETURN; - /* turn on credit return interrupts */ - sc_add_credit_return_intr(sc); - wait = kmalloc(sizeof(*wait), GFP_KERNEL); - if (!wait) { - ret = -ENOMEM; - goto bail; - } - init_completion(&wait->credits_returned); - atomic_set(&wait->count, 2); - wait->code = PRC_OK; - - credit_cb = diagpkt_complete; - credit_arg = wait; - } - -retry: - pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg); - if (!pbuf) { - if (trycount == 0) { - /* force a credit return and try again */ - sc_return_credits(sc); - trycount = 1; - goto retry; - } - /* - * No send buffer means no credit callback. Undo - * the wait set-up that was done above. We free wait - * because the callback will never be called. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - sc_del_credit_return_intr(sc); - kfree(wait); - wait = NULL; - } - ret = -ENOSPC; - goto bail; - } - - pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len); - /* no flush needed as the HW knows the packet size */ - - ret = sizeof(*dp); - - if (dp->flags & F_DIAGPKT_WAIT) { - /* wait for credit return */ - ret = wait_for_completion_interruptible( - &wait->credits_returned); - /* - * If the wait returns an error, the wait was interrupted, - * e.g. with a ^C in the user program. The callback is - * still pending. This is OK as the wait structure is - * kmalloc'ed and the structure will free itself when - * all users are done with it. - * - * A context disable occurs on a send context restart, so - * include that in the list of errors below to check for. - * NOTE: PRC_FILL_ERR is at best informational and cannot - * be depended on. - */ - if (!ret && (((wait->code & PRC_STATUS_ERR) || - (wait->code & PRC_FILL_ERR) || - (wait->code & PRC_SC_DISABLE)))) - ret = -EIO; - - put_diagpkt_wait(wait); /* finished with the structure */ - sc_del_credit_return_intr(sc); - } - -bail: - vfree(tmpbuf); - return ret; -} - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - u8 vl; - - struct diag_pkt dp; - - if (count != sizeof(dp)) - return -EINVAL; - - if (copy_from_user(&dp, data, sizeof(dp))) - return -EFAULT; - - /* - * The Send Context is derived from the PbcVL value - * if PBC is populated - */ - if (dp.pbc) { - dd = hfi1_lookup(dp.unit); - if (!dd) - return -ENODEV; - vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; - if (sc) { - dp.sw_index = sc->sw_index; - hfi1_cdbg( - PKT, - "Packet sent over VL %d via Send Context %u(%u)", - vl, sc->sw_index, sc->hw_context); - } - } - - return diagpkt_send(&dp); -} - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name) -{ - int ret = 0; - - dd->hfi1_snoop.mode_flag = 0; - spin_lock_init(&dd->hfi1_snoop.snoop_lock); - INIT_LIST_HEAD(&dd->hfi1_snoop.queue); - init_waitqueue_head(&dd->hfi1_snoop.waitq); - - ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name, - &snoop_file_ops, - &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev, - false); - - if (ret) { - dd_dev_err(dd, "Couldn't create %s device: %d", name, ret); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, - &dd->hfi1_snoop.class_dev); - } - - return ret; -} - -static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in) -{ - int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE; - struct hfi1_devdata *dd; - - dd = hfi1_lookup(unit); - return dd; -} - -/* clear or restore send context integrity checks */ -static void adjust_integrity_checks(struct hfi1_devdata *dd) -{ - struct send_context *sc; - unsigned long sc_flags; - int i; - - spin_lock_irqsave(&dd->sc_lock, sc_flags); - for (i = 0; i < dd->num_send_contexts; i++) { - int enable; - - sc = dd->send_contexts[i].sc; - - if (!sc) - continue; /* not allocated */ - - enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) && - dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE; - - set_pio_integrity(sc); - - if (enable) /* take HFI_CAP_* flags into account */ - hfi1_init_ctxt(sc); - } - spin_unlock_irqrestore(&dd->sc_lock, sc_flags); -} - -static int hfi1_snoop_open(struct inode *in, struct file *fp) -{ - int ret; - int mode_flag = 0; - unsigned long flags = 0; - struct hfi1_devdata *dd; - struct list_head *queue; - - mutex_lock(&hfi1_mutex); - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) { - ret = -ENODEV; - goto bail; - } - - /* - * File mode determines snoop or capture. Some existing user - * applications expect the capture device to be able to be opened RDWR - * because they expect a dedicated capture device. For this reason we - * support a module param to force capture mode even if the file open - * mode matches snoop. - */ - if ((fp->f_flags & O_ACCMODE) == O_RDONLY) { - snoop_dbg("Capture Enabled"); - mode_flag = HFI1_PORT_CAPTURE_MODE; - } else if ((fp->f_flags & O_ACCMODE) == O_RDWR) { - snoop_dbg("Snoop Enabled"); - mode_flag = HFI1_PORT_SNOOP_MODE; - } else { - snoop_dbg("Invalid"); - ret = -EINVAL; - goto bail; - } - queue = &dd->hfi1_snoop.queue; - - /* - * We are not supporting snoop and capture at the same time. - */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.mode_flag) { - ret = -EBUSY; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - goto bail; - } - - dd->hfi1_snoop.mode_flag = mode_flag; - drain_snoop_list(queue); - - dd->hfi1_snoop.filter_callback = NULL; - dd->hfi1_snoop.filter_value = NULL; - - /* - * Send side packet integrity checks are not helpful when snooping so - * disable and re-enable when we stop snooping. - */ - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* clear after snoop mode is on */ - adjust_integrity_checks(dd); /* clear */ - - /* - * We also do not want to be doing the DLID LMC check for - * ingressed packets. - */ - dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1); - write_csr(dd, DCC_CFG_PORT_CONFIG1, - (dd->hfi1_snoop.dcc_cfg >> 32) << 32); - } - - /* - * As soon as we set these function pointers the recv and send handlers - * are active. This is a race condition so we must make sure to drain - * the queue and init filter values above. Technically we should add - * locking here but all that will happen is on recv a packet will get - * allocated and get stuck on the snoop_lock before getting added to the - * queue. Same goes for send. - */ - dd->rhf_rcv_function_map = snoop_rhf_rcv_functions; - dd->process_pio_send = snoop_send_pio_handler; - dd->process_dma_send = snoop_send_pio_handler; - dd->pio_inline_send = snoop_inline_pio_send; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - ret = 0; - -bail: - mutex_unlock(&hfi1_mutex); - - return ret; -} - -static int hfi1_snoop_release(struct inode *in, struct file *fp) -{ - unsigned long flags = 0; - struct hfi1_devdata *dd; - int mode_flag; - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - /* clear the snoop mode before re-adjusting send context CSRs */ - mode_flag = dd->hfi1_snoop.mode_flag; - dd->hfi1_snoop.mode_flag = 0; - - /* - * Drain the queue and clear the filters we are done with it. Don't - * forget to restore the packet integrity checks - */ - drain_snoop_list(&dd->hfi1_snoop.queue); - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* restore after snoop mode is clear */ - adjust_integrity_checks(dd); /* restore */ - - /* - * Also should probably reset the DCC_CONFIG1 register for DLID - * checking on incoming packets again. Use the value saved when - * opening the snoop device. - */ - write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg); - } - - dd->hfi1_snoop.filter_callback = NULL; - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - - /* - * User is done snooping and capturing, return control to the normal - * handler. Re-enable SDMA handling. - */ - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - dd->process_pio_send = hfi1_verbs_send_pio; - dd->process_dma_send = hfi1_verbs_send_dma; - dd->pio_inline_send = pio_copy; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - snoop_dbg("snoop/capture device released"); - - return 0; -} - -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait) -{ - int ret = 0; - unsigned long flags = 0; - - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - poll_wait(fp, &dd->hfi1_snoop.waitq, wait); - if (!list_empty(&dd->hfi1_snoop.queue)) - ret |= POLLIN | POLLRDNORM; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - return ret; -} - -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct diag_pkt dpkt; - struct hfi1_devdata *dd; - size_t ret; - u8 byte_two, sl, sc5, sc4, vl, byte_one; - struct send_context *sc; - u32 len; - u64 pbc; - struct hfi1_ibport *ibp; - struct hfi1_pportdata *ppd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - ppd = dd->pport; - snoop_dbg("received %lu bytes from user", count); - - memset(&dpkt, 0, sizeof(struct diag_pkt)); - dpkt.version = _DIAG_PKT_VERS; - dpkt.unit = dd->unit; - dpkt.port = 1; - - if (likely(!(snoop_flags & SNOOP_USE_METADATA))) { - /* - * We need to generate the PBC and not let diagpkt_send do it, - * to do this we need the VL and the length in dwords. - * The VL can be determined by using the SL and looking up the - * SC. Then the SC can be converted into VL. The exception to - * this is those packets which are from an SMI queue pair. - * Since we can't detect anything about the QP here we have to - * rely on the SC. If its 0xF then we assume its SMI and - * do not look at the SL. - */ - if (copy_from_user(&byte_one, data, 1)) - return -EINVAL; - - if (copy_from_user(&byte_two, data + 1, 1)) - return -EINVAL; - - sc4 = (byte_one >> 4) & 0xf; - if (sc4 == 0xF) { - snoop_dbg("Detected VL15 packet ignoring SL in packet"); - vl = sc4; - } else { - sl = (byte_two >> 4) & 0xf; - ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1); - sc5 = ibp->sl_to_sc[sl]; - vl = sc_to_vlt(dd, sc5); - if (vl != sc4) { - snoop_dbg("VL %d does not match SC %d of packet", - vl, sc4); - return -EINVAL; - } - } - - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - snoop_dbg("Sending on context %u(%u)", sc->sw_index, - sc->hw_context); - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - - len = (count >> 2) + 2; /* Add in PBC */ - pbc = create_pbc(ppd, 0, 0, vl, len); - } else { - if (copy_from_user(&pbc, data, sizeof(pbc))) - return -EINVAL; - vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - data += sizeof(pbc); - count -= sizeof(pbc); - } - dpkt.len = count; - dpkt.data = (unsigned long)data; - - snoop_dbg("PBC: vl=0x%llx Length=0x%llx", - (pbc >> 12) & 0xf, - (pbc & 0xfff)); - - dpkt.pbc = pbc; - ret = diagpkt_send(&dpkt); - /* - * diagpkt_send only returns number of bytes in the diagpkt so patch - * that up here before returning. - */ - if (ret == sizeof(dpkt)) - return count; - - return ret; -} - -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off) -{ - ssize_t ret = 0; - unsigned long flags = 0; - struct snoop_packet *packet = NULL; - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - while (list_empty(&dd->hfi1_snoop.queue)) { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - if (fp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible( - dd->hfi1_snoop.waitq, - !list_empty(&dd->hfi1_snoop.queue))) - return -EINTR; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - } - - if (!list_empty(&dd->hfi1_snoop.queue)) { - packet = list_entry(dd->hfi1_snoop.queue.next, - struct snoop_packet, list); - list_del(&packet->list); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - if (pkt_len >= packet->total_len) { - if (copy_to_user(data, packet->data, - packet->total_len)) - ret = -EFAULT; - else - ret = packet->total_len; - } else { - ret = -EINVAL; - } - - kfree(packet); - } else { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - } - - return ret; -} - -/** - * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others - * @ppd : ptr to hfi1 port data - * @value : options from user space - * - * Assumes the rest of the CM credit registers are zero from a - * previous global or credit reset. - * Leave shared count at zero for both global and all vls. - * In snoop mode ideally we don't use shared credits - * Reserve 8.5k for VL15 - * If total credits less than 8.5kbytes return error. - * Divide the rest of the credits across VL0 to VL7 and if - * each of these levels has less than 34 credits (at least 2048 + 128 bytes) - * return with an error. - * The credit registers will be reset to zero on link negotiation or link up - * so this function should be activated from user space only if the port has - * gone past link negotiation and link up. - * - * Return -- 0 if successful else error condition - * - */ -static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, - int value) -{ -#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */ - struct buffer_control t; - int i; - struct hfi1_devdata *dd = ppd->dd; - u16 total_credits = (value >> 16) & 0xffff; - u16 vl15_credits = dd->vl15_init / 2; - u16 per_vl_credits; - __be16 be_per_vl_credits; - - if (ppd->host_link_state & HLS_DOWN) - goto err_exit; - if (total_credits < vl15_credits) - goto err_exit; - - per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL; - - if (per_vl_credits < OPA_MIN_PER_VL_CREDITS) - goto err_exit; - - memset(&t, 0, sizeof(t)); - be_per_vl_credits = cpu_to_be16(per_vl_credits); - - for (i = 0; i < TXE_NUM_DATA_VL; i++) - t.vl[i].dedicated = be_per_vl_credits; - - t.vl[15].dedicated = cpu_to_be16(vl15_credits); - return set_buffer_control(ppd, &t); - -err_exit: - snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d", - ppd->host_link_state, total_credits, vl15_credits); - - return -EINVAL; -} - -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - struct hfi1_devdata *dd; - void *filter_value = NULL; - long ret = 0; - int value = 0; - u8 phys_state = 0; - u8 link_state = 0; - u16 dev_state = 0; - unsigned long flags = 0; - unsigned long *argp = NULL; - struct hfi1_packet_filter_command filter_cmd = {0}; - int mode_flag = 0; - struct hfi1_pportdata *ppd = NULL; - unsigned int index; - struct hfi1_link_info link_info; - int read_cmd, write_cmd, read_ok, write_ok; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - mode_flag = dd->hfi1_snoop.mode_flag; - read_cmd = _IOC_DIR(cmd) & _IOC_READ; - write_cmd = _IOC_DIR(cmd) & _IOC_WRITE; - write_ok = access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)); - read_ok = access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)); - - if ((read_cmd && !write_ok) || (write_cmd && !read_ok)) - return -EFAULT; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if ((mode_flag & HFI1_PORT_CAPTURE_MODE) && - (cmd != HFI1_SNOOP_IOCCLEARQUEUE) && - (cmd != HFI1_SNOOP_IOCCLEARFILTER) && - (cmd != HFI1_SNOOP_IOCSETFILTER)) - /* Capture devices are allowed only 3 operations - * 1.Clear capture queue - * 2.Clear capture filter - * 3.Set capture filter - * Other are invalid. - */ - return -EINVAL; - - switch (cmd) { - case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA: - memset(&link_info, 0, sizeof(link_info)); - - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - - value = link_info.port_state; - index = link_info.port_number; - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - /* What we want to transition to */ - phys_state = (value >> 4) & 0xF; - link_state = value & 0xF; - snoop_dbg("Setting link state 0x%x", value); - - switch (link_state) { - case IB_PORT_NOP: - if (phys_state == 0) - break; - /* fall through */ - case IB_PORT_DOWN: - switch (phys_state) { - case 0: - dev_state = HLS_DN_DOWNDEF; - break; - case 2: - dev_state = HLS_DN_POLL; - break; - case 3: - dev_state = HLS_DN_DISABLE; - break; - default: - return -EINVAL; - } - ret = set_link_state(ppd, dev_state); - break; - case IB_PORT_ARMED: - ret = set_link_state(ppd, HLS_UP_ARMED); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ARM); - break; - case IB_PORT_ACTIVE: - ret = set_link_state(ppd, HLS_UP_ACTIVE); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ACTIVE); - break; - default: - return -EINVAL; - } - - if (ret) - break; - /* fall through */ - case HFI1_SNOOP_IOCGETLINKSTATE: - case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA: - if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) { - memset(&link_info, 0, sizeof(link_info)); - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - index = link_info.port_number; - } else { - ret = __get_user(index, (int __user *)arg); - if (ret != 0) - break; - } - - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - value = hfi1_ibphys_portstate(ppd); - value <<= 4; - value |= driver_lstate(ppd); - - snoop_dbg("Link port | Link State: %d", value); - - if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) || - (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) { - link_info.port_state = value; - link_info.node_guid = cpu_to_be64(ppd->guid); - link_info.link_speed_active = - ppd->link_speed_active; - link_info.link_width_active = - ppd->link_width_active; - if (copy_to_user((struct hfi1_link_info __user *)arg, - &link_info, sizeof(link_info))) - return -EFAULT; - } else { - ret = __put_user(value, (int __user *)arg); - } - break; - - case HFI1_SNOOP_IOCCLEARQUEUE: - snoop_dbg("Clearing snoop queue"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCCLEARFILTER: - snoop_dbg("Clearing filter"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.filter_callback) { - /* Drain packets first */ - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = NULL; - } - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCSETFILTER: - snoop_dbg("Setting filter"); - /* just copy command structure */ - argp = (unsigned long *)arg; - if (copy_from_user(&filter_cmd, (void __user *)argp, - sizeof(filter_cmd))) - return -EFAULT; - - if (filter_cmd.opcode >= HFI1_MAX_FILTERS) { - pr_alert("Invalid opcode in request\n"); - return -EINVAL; - } - - snoop_dbg("Opcode %d Len %d Ptr %p", - filter_cmd.opcode, filter_cmd.length, - filter_cmd.value_ptr); - - filter_value = kcalloc(filter_cmd.length, sizeof(u8), - GFP_KERNEL); - if (!filter_value) - return -ENOMEM; - - /* copy remaining data from userspace */ - if (copy_from_user((u8 *)filter_value, - (void __user *)filter_cmd.value_ptr, - filter_cmd.length)) { - kfree(filter_value); - return -EFAULT; - } - /* Drain packets first */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = - hfi1_filters[filter_cmd.opcode].filter; - /* just in case we see back to back sets */ - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = filter_value; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - case HFI1_SNOOP_IOCGETVERSION: - value = SNOOP_CAPTURE_VERSION; - snoop_dbg("Getting version: %d", value); - ret = __put_user(value, (int __user *)arg); - break; - case HFI1_SNOOP_IOCSET_OPTS: - snoop_flags = 0; - ret = __get_user(value, (int __user *)arg); - if (ret != 0) - break; - - snoop_dbg("Setting snoop option %d", value); - if (value & SNOOP_DROP_SEND) - snoop_flags |= SNOOP_DROP_SEND; - if (value & SNOOP_USE_METADATA) - snoop_flags |= SNOOP_USE_METADATA; - if (value & (SNOOP_SET_VL0TOVL15)) { - ppd = &dd->pport[0]; /* first port will do */ - ret = hfi1_assign_snoop_link_credits(ppd, value); - } - break; - default: - return -ENOTTY; - } - - return ret; -} - -static void snoop_list_add_tail(struct snoop_packet *packet, - struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) || - (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) { - list_add_tail(&packet->list, &dd->hfi1_snoop.queue); - snoop_dbg("Added packet to list"); - } - - /* - * Technically we can could have closed the snoop device while waiting - * on the above lock and it is gone now. The snoop mode_flag will - * prevent us from adding the packet to the queue though. - */ - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - wake_up_interruptible(&dd->hfi1_snoop.waitq); -} - -static inline int hfi1_filter_check(void *val, const char *msg) -{ - if (!val) { - snoop_dbg("Error invalid %s value for filter", msg); - return HFI1_FILTER_ERR; - } - return 0; -} - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */ - return HFI1_FILTER_HIT; /* matched */ - - return HFI1_FILTER_MISS; /* Not matched */ -} - -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1])) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* Not valid for outgoing packets, send handler passes null for data*/ -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - struct ib_smp *smp = NULL; - u32 qpn = 0; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(packet_data, "packet_data"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - - qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF; - if (qpn <= 1) { - smp = (struct ib_smp *)packet_data; - if (*((u8 *)value) == smp->mgmt_class) - return HFI1_FILTER_HIT; - else - return HFI1_FILTER_MISS; - } - return HFI1_FILTER_ERR; -} - -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value) -{ - u32 lnh = 0; - u8 opcode = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - - if (*((u8 *)value) == ((opcode >> 5) & 0x7)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value) -{ - u32 lnh = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - /* P_key is 16-bit entity, however top most bit indicates - * type of membership. 0 for limited and 1 for Full. - * Limited members cannot accept information from other - * Limited members, but communication is allowed between - * every other combination of membership. - * Hence we'll omit comparing top-most bit while filtering - */ - - if ((*(u16 *)value & 0x7FFF) == - ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* - * If packet_data is NULL then this is coming from one of the send functions. - * Thus we know if its an ingressed or egressed packet. - */ -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value) -{ - u8 user_dir = *(u8 *)value; - int ret; - - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - if (packet_data) { - /* Incoming packet */ - if (user_dir & HFI1_SNOOP_INGRESS) - return HFI1_FILTER_HIT; - } else { - /* Outgoing packet */ - if (user_dir & HFI1_SNOOP_EGRESS) - return HFI1_FILTER_HIT; - } - - return HFI1_FILTER_MISS; -} - -/* - * Allocate a snoop packet. The structure that is stored in the ring buffer, not - * to be confused with an hfi packet type. - */ -static struct snoop_packet *allocate_snoop_packet(u32 hdr_len, - u32 data_len, - u32 md_len) -{ - struct snoop_packet *packet; - - packet = kzalloc(sizeof(*packet) + hdr_len + data_len - + md_len, - GFP_ATOMIC | __GFP_NOWARN); - if (likely(packet)) - INIT_LIST_HEAD(&packet->list); - - return packet; -} - -/* - * Instead of having snoop and capture code intermixed with the recv functions, - * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call - * and land in here for snoop/capture but if not enabled the call will go - * through as before. This gives us a single point to constrain all of the snoop - * snoop recv logic. There is nothing special that needs to happen for bypass - * packets. This routine should not try to look into the packet. It just copied - * it. There is no guarantee for filters when it comes to bypass packets as - * there is no specific support. Bottom line is this routine does now even know - * what a bypass packet is. - */ -int snoop_recv_handler(struct hfi1_packet *packet) -{ - struct hfi1_pportdata *ppd = packet->rcd->ppd; - struct hfi1_ib_header *hdr = packet->hdr; - int header_size = packet->hlen; - void *data = packet->ebuf; - u32 tlen = packet->tlen; - struct snoop_packet *s_packet = NULL; - int ret; - int snoop_mode = 0; - u32 md_len = 0; - struct capture_md md; - - snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen, - data); - - trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size, - data); - - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - break; - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(header_size, - tlen - header_size, - md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - break; - } - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_INGRESS; - md.u.rhf = packet->rhf; - memcpy(s_packet->data, &md, md_len); - } - - /* We should always have a header */ - if (hdr) { - memcpy(s_packet->data + md_len, hdr, header_size); - } else { - dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - break; - } - - /* - * Packets with no data are possible. If there is no data needed - * to take care of the last 4 bytes which are normally included - * with data buffers and are included in tlen. Since we kzalloc - * the buffer we do not need to set any values but if we decide - * not to use kzalloc we should zero them. - */ - if (data) - memcpy(s_packet->data + header_size + md_len, data, - tlen - header_size); - - s_packet->total_len = tlen + md_len; - snoop_list_add_tail(s_packet, ppd->dd); - - /* - * If we are snooping the packet not capturing then throw away - * after adding to the list. - */ - snoop_dbg("Capturing packet"); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) { - snoop_dbg("Throwing packet away"); - /* - * If we are dropping the packet we still may need to - * handle the case where error flags are set, this is - * normally done by the type specific handler but that - * won't be called in this case. - */ - if (unlikely(rhf_err_flags(packet->rhf))) - handle_eflags(packet); - - /* throw the packet on the floor */ - return RHF_RCV_CONTINUE; - } - break; - default: - break; - } - - /* - * We do not care what type of packet came in here - just pass it off - * to the normal handler. - */ - return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)] - (packet); -} - -/* - * Handle snooping and capturing packets when sdma is being used. - */ -int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n"); - snoop_dbg("Unsupported Operation"); - return hfi1_verbs_send_dma(qp, ps, 0); -} - -/* - * Handle snooping and capturing packets when pio is being used. Does not handle - * bypass packets. The only way to send a bypass packet currently is to use the - * diagpkt interface. When that interface is enable snoop/capture is not. - */ -int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; - u32 dwords = (len + 3) >> 2; - u32 plen = hdrwords + dwords + 2; /* includes pbc */ - struct hfi1_pportdata *ppd = ps->ppd; - struct snoop_packet *s_packet = NULL; - u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; - u32 length = 0; - struct rvt_sge_state temp_ss; - void *data = NULL; - void *data_start = NULL; - int ret; - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - u32 vl; - u32 hdr_len = hdrwords << 2; - u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr); - - md.u.pbc = 0; - - snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u", - hdrwords, len, plen, dwords, tlen); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - /* not using ss->total_len as arg 2 b/c that does not count CRC */ - s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - goto out; - } - - s_packet->total_len = tlen + md_len; - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - if (likely(pbc == 0)) { - vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12; - md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen); - } else { - md.u.pbc = 0; - } - memcpy(s_packet->data, &md, md_len); - } else { - md.u.pbc = pbc; - } - - /* Copy header */ - if (likely(hdr)) { - memcpy(s_packet->data + md_len, hdr, hdr_len); - } else { - dd_dev_err(ppd->dd, - "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - goto out; - } - - if (ss) { - data = s_packet->data + hdr_len + md_len; - data_start = data; - - /* - * Copy SGE State - * The update_sge() function below will not modify the - * individual SGEs in the array. It will make a copy each time - * and operate on that. So we only need to copy this instance - * and it won't impact PIO. - */ - temp_ss = *ss; - length = len; - - snoop_dbg("Need to copy %d bytes", length); - while (length) { - void *addr = temp_ss.sge.vaddr; - u32 slen = temp_ss.sge.length; - - if (slen > length) { - slen = length; - snoop_dbg("slen %d > len %d", slen, length); - } - snoop_dbg("copy %d to %p", slen, addr); - memcpy(data, addr, slen); - update_sge(&temp_ss, slen); - length -= slen; - data += slen; - snoop_dbg("data is now %p bytes left %d", data, length); - } - snoop_dbg("Completed SGE copy"); - } - - /* - * Why do the filter check down here? Because the event tracing has its - * own filtering and we need to have the walked the SGE list. - */ - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set\n"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback( - &ps->s_txreq->phdr.hdr, - NULL, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - kfree(s_packet); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - snoop_list_add_tail(s_packet, ppd->dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && - (ppd->dd->hfi1_snoop.mode_flag & - HFI1_PORT_SNOOP_MODE))) { - unsigned long flags; - - snoop_dbg("Dropping packet"); - if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_send_complete( - qp, - qp->s_wqe, - IB_WC_SUCCESS); - spin_unlock_irqrestore(&qp->s_lock, flags); - } else if (qp->ibqp.qp_type == IB_QPT_RC) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_rc_send_complete(qp, - &ps->s_txreq->phdr.hdr); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - - /* - * If snoop is dropping the packet we need to put the - * txreq back because no one else will. - */ - hfi1_put_txreq(ps->s_txreq); - return 0; - } - break; - default: - kfree(s_packet); - break; - } -out: - return hfi1_verbs_send_pio(qp, ps, md.u.pbc); -} - -/* - * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently - * this can be used anywhere, but the intention is for inline ACKs for RC and - * CCA packets. We don't restrict this usage though. - */ -void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, - u64 pbc, const void *from, size_t count) -{ - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - struct snoop_packet *s_packet = NULL; - - /* - * count is in dwords so we need to convert to bytes. - * We also need to account for CRC which would be tacked on by hardware. - */ - int packet_len = (count << 2) + 4; - int ret; - - snoop_dbg("ACK OUT: len %d", packet_len); - - if (!dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = dd->hfi1_snoop.filter_callback( - (struct hfi1_ib_header *)from, - NULL, - dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(packet_len, 0, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n"); - goto inline_pio_out; - } - - s_packet->total_len = packet_len + md_len; - - /* Fill in the metadata for the packet */ - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - md.u.pbc = pbc; - memcpy(s_packet->data, &md, md_len); - } - - /* Add the packet data which is a single buffer */ - memcpy(s_packet->data + md_len, from, packet_len); - - snoop_list_add_tail(s_packet, dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) { - snoop_dbg("Dropping packet"); - return; - } - break; - default: - break; - } - -inline_pio_out: - pio_copy(dd, pbuf, pbc, from, count); -} diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 2eddd3305c6a..4287c806642e 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1500,13 +1500,7 @@ static int user_add(struct hfi1_devdata *dd) */ int hfi1_device_create(struct hfi1_devdata *dd) { - int r, ret; - - r = user_add(dd); - ret = hfi1_diag_add(dd); - if (r && !ret) - ret = r; - return ret; + return user_add(dd); } /* @@ -1516,5 +1510,4 @@ int hfi1_device_create(struct hfi1_devdata *dd) void hfi1_device_remove(struct hfi1_devdata *dd) { user_remove(dd); - hfi1_diag_remove(dd); } From ac56f162d4586885238da05131c4e6bbdb536d4e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:17 -0700 Subject: [PATCH 29/42] IB/hfi1: Remove unused user command The HFI1_CMD_SDMA_STATUS_UPD command was never implemented it has no reason to live in the driver. Remove it. Reviewed-by: Christoph Hellwig Reviewed-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/file_ops.c | 3 --- include/uapi/rdma/hfi/hfi1_user.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 4287c806642e..322e55f7b4b4 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -213,7 +213,6 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, copy = sizeof(uinfo); dest = &uinfo; break; - case HFI1_CMD_SDMA_STATUS_UPD: case HFI1_CMD_CREDIT_UPD: copy = 0; break; @@ -273,8 +272,6 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, ret = get_base_info(fp, (void __user *)(unsigned long) user_val, cmd.len); break; - case HFI1_CMD_SDMA_STATUS_UPD: - break; case HFI1_CMD_CREDIT_UPD: if (uctxt && uctxt->sc) sc_return_credits(uctxt->sc); diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 3e3680d11b25..aa48fbe016af 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -114,7 +114,6 @@ #define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */ #define HFI1_CMD_TID_FREE 5 /* free expected TID entries */ #define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */ -#define HFI1_CMD_SDMA_STATUS_UPD 7 /* force update of SDMA status ring */ #define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */ #define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */ From 8d970cf991a6c38a5566572979487b906d643740 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:24 -0700 Subject: [PATCH 30/42] IB/hfi1: Add ioctl() interface for user commands IOCTL is more suited to what user space commands need to do than the write() interface. Add IOCTL definitions for all existing write commands and the handling for those. The write() interface will be removed in a follow on patch. Reviewed-by: Mitko Haralanov Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/common.h | 5 +- drivers/staging/rdma/hfi1/file_ops.c | 204 +++++++++++++++++++++++++++ include/uapi/rdma/hfi/hfi1_user.h | 45 ++++++ 3 files changed, 253 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h index e9b6bb322025..fcc9c217a97a 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/staging/rdma/hfi1/common.h @@ -178,7 +178,8 @@ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_NO_INTEGRITY) -#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR) +#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \ + HFI1_USER_SWMINOR) #ifndef HFI1_KERN_TYPE #define HFI1_KERN_TYPE 0 @@ -349,6 +350,8 @@ struct hfi1_message_header { #define HFI1_BECN_MASK 1 #define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) +#define HFI1_PSM_IOC_BASE_SEQ 0x0 + static inline __u64 rhf_to_cpu(const __le32 *rbuf) { return __le64_to_cpu(*((__le64 *)rbuf)); diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 322e55f7b4b4..a338238bef92 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -96,6 +96,8 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); static int vma_fault(struct vm_area_struct *, struct vm_fault *); +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg); static const struct file_operations hfi1_file_ops = { .owner = THIS_MODULE, @@ -103,6 +105,7 @@ static const struct file_operations hfi1_file_ops = { .write_iter = hfi1_write_iter, .open = hfi1_file_open, .release = hfi1_file_close, + .unlocked_ioctl = hfi1_file_ioctl, .poll = hfi1_poll, .mmap = hfi1_file_mmap, .llseek = noop_llseek, @@ -175,6 +178,207 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg) +{ + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_user_info uinfo; + struct hfi1_tid_info tinfo; + int ret = 0; + unsigned long addr; + int uval = 0; + unsigned long ul_uval = 0; + u16 uval16 = 0; + + if (cmd != HFI1_IOCTL_ASSIGN_CTXT && + cmd != HFI1_IOCTL_GET_VERS && + !uctxt) + return -EINVAL; + + switch (cmd) { + case HFI1_IOCTL_ASSIGN_CTXT: + if (copy_from_user(&uinfo, + (struct hfi1_user_info __user *)arg, + sizeof(uinfo))) + return -EFAULT; + + ret = assign_ctxt(fp, &uinfo); + if (ret < 0) + return ret; + setup_ctxt(fp); + if (ret) + return ret; + ret = user_init(fp); + break; + case HFI1_IOCTL_CTXT_INFO: + ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_ctxt_info)); + break; + case HFI1_IOCTL_USER_INFO: + ret = get_base_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_base_info)); + break; + case HFI1_IOCTL_CREDIT_UPD: + if (uctxt && uctxt->sc) + sc_return_credits(uctxt->sc); + break; + + case HFI1_IOCTL_TID_UPDATE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_setup(fp, &tinfo); + if (!ret) { + /* + * Copy the number of tidlist entries we used + * and the length of the buffer we registered. + * These fields are adjacent in the structure so + * we can copy them at the same time. + */ + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt) + + sizeof(tinfo.length))) + ret = -EFAULT; + } + break; + + case HFI1_IOCTL_TID_FREE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + if (ret) + break; + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + ret = -EFAULT; + break; + + case HFI1_IOCTL_TID_INVAL_READ: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + if (ret) + break; + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + ret = -EFAULT; + break; + + case HFI1_IOCTL_RECV_CTRL: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + ret = manage_rcvq(uctxt, fd->subctxt, uval); + break; + + case HFI1_IOCTL_POLL_TYPE: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + uctxt->poll_type = (typeof(uctxt->poll_type))uval; + break; + + case HFI1_IOCTL_ACK_EVENT: + ret = get_user(ul_uval, (unsigned long __user *)arg); + if (ret != 0) + return -EFAULT; + ret = user_event_ack(uctxt, fd->subctxt, ul_uval); + break; + + case HFI1_IOCTL_SET_PKEY: + ret = get_user(uval16, (u16 __user *)arg); + if (ret != 0) + return -EFAULT; + if (HFI1_CAP_IS_USET(PKEY_CHECK)) + ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); + else + return -EPERM; + break; + + case HFI1_IOCTL_CTXT_RESET: { + struct send_context *sc; + struct hfi1_devdata *dd; + + if (!uctxt || !uctxt->dd || !uctxt->sc) + return -EINVAL; + + /* + * There is no protection here. User level has to + * guarantee that no one will be writing to the send + * context while it is being re-initialized. + * If user level breaks that guarantee, it will break + * it's own context and no one else's. + */ + dd = uctxt->dd; + sc = uctxt->sc; + /* + * Wait until the interrupt handler has marked the + * context as halted or frozen. Report error if we time + * out. + */ + wait_event_interruptible_timeout( + sc->halt_wait, (sc->flags & SCF_HALTED), + msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); + if (!(sc->flags & SCF_HALTED)) + return -ENOLCK; + + /* + * If the send context was halted due to a Freeze, + * wait until the device has been "unfrozen" before + * resetting the context. + */ + if (sc->flags & SCF_FROZEN) { + wait_event_interruptible_timeout( + dd->event_queue, + !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), + msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); + if (dd->flags & HFI1_FROZEN) + return -ENOLCK; + + if (dd->flags & HFI1_FORCED_FREEZE) + /* + * Don't allow context reset if we are into + * forced freeze + */ + return -ENODEV; + + sc_disable(sc); + ret = sc_enable(sc); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, + uctxt->ctxt); + } else { + ret = sc_restart(sc); + } + if (!ret) + sc_return_credits(sc); + break; + } + + case HFI1_IOCTL_GET_VERS: + uval = HFI1_USER_SWVERSION; + if (put_user(uval, (int __user *)arg)) + return -EFAULT; + break; + + default: + return -EINVAL; + } + + return ret; +} + static ssize_t hfi1_file_write(struct file *fp, const char __user *data, size_t count, loff_t *offset) { diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index aa48fbe016af..9784159c9d52 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -77,6 +77,11 @@ */ #define HFI1_USER_SWMINOR 1 +/* + * We will encode the major/minor inside a single 32bit version number. + */ +#define HFI1_SWMAJOR_SHIFT 16 + /* * Set of HW and driver capability/feature bits. * These bit values are used to configure enabled/disabled HW and @@ -121,6 +126,46 @@ #define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ #define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ #define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ +#define HFI1_CMD_GET_VERS 14 /* get the version of the user cdev */ + +/* + * User IOCTLs can not go above 128 if they do then see common.h and change the + * base for the snoop ioctl + */ +#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl/ioctl-number.txt */ + +/* + * Make the ioctls occupy the last 0xf0-0xff portion of the IB range + */ +#define __NUM(cmd) (HFI1_CMD_##cmd + 0xe0) + +struct hfi1_cmd; +#define HFI1_IOCTL_ASSIGN_CTXT \ + _IOWR(IB_IOCTL_MAGIC, __NUM(ASSIGN_CTXT), struct hfi1_user_info) +#define HFI1_IOCTL_CTXT_INFO \ + _IOW(IB_IOCTL_MAGIC, __NUM(CTXT_INFO), struct hfi1_ctxt_info) +#define HFI1_IOCTL_USER_INFO \ + _IOW(IB_IOCTL_MAGIC, __NUM(USER_INFO), struct hfi1_base_info) +#define HFI1_IOCTL_TID_UPDATE \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_UPDATE), struct hfi1_tid_info) +#define HFI1_IOCTL_TID_FREE \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_FREE), struct hfi1_tid_info) +#define HFI1_IOCTL_CREDIT_UPD \ + _IO(IB_IOCTL_MAGIC, __NUM(CREDIT_UPD)) +#define HFI1_IOCTL_RECV_CTRL \ + _IOW(IB_IOCTL_MAGIC, __NUM(RECV_CTRL), int) +#define HFI1_IOCTL_POLL_TYPE \ + _IOW(IB_IOCTL_MAGIC, __NUM(POLL_TYPE), int) +#define HFI1_IOCTL_ACK_EVENT \ + _IOW(IB_IOCTL_MAGIC, __NUM(ACK_EVENT), unsigned long) +#define HFI1_IOCTL_SET_PKEY \ + _IOW(IB_IOCTL_MAGIC, __NUM(SET_PKEY), __u16) +#define HFI1_IOCTL_CTXT_RESET \ + _IO(IB_IOCTL_MAGIC, __NUM(CTXT_RESET)) +#define HFI1_IOCTL_TID_INVAL_READ \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_INVAL_READ), struct hfi1_tid_info) +#define HFI1_IOCTL_GET_VERS \ + _IOR(IB_IOCTL_MAGIC, __NUM(GET_VERS), int) #define _HFI1_EVENT_FROZEN_BIT 0 #define _HFI1_EVENT_LINKDOWN_BIT 1 From 380fb942888e7afc3420ce195a5188ff73b5a782 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:31 -0700 Subject: [PATCH 31/42] IB/hfi1: Remove write(), use ioctl() for user cmds Remove the write() handler for user space commands now that ioctl handling is available. User apps will need to change to use ioctl from this point forward. Reviewed-by: Mitko Haralanov Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/file_ops.c | 226 --------------------------- include/uapi/rdma/hfi/hfi1_user.h | 8 +- 2 files changed, 1 insertion(+), 233 deletions(-) diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index a338238bef92..c46eadadf3bf 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -72,8 +72,6 @@ */ static int hfi1_file_open(struct inode *, struct file *); static int hfi1_file_close(struct inode *, struct file *); -static ssize_t hfi1_file_write(struct file *, const char __user *, - size_t, loff_t *); static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); static int hfi1_file_mmap(struct file *, struct vm_area_struct *); @@ -101,7 +99,6 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static const struct file_operations hfi1_file_ops = { .owner = THIS_MODULE, - .write = hfi1_file_write, .write_iter = hfi1_write_iter, .open = hfi1_file_open, .release = hfi1_file_close, @@ -379,229 +376,6 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, return ret; } -static ssize_t hfi1_file_write(struct file *fp, const char __user *data, - size_t count, loff_t *offset) -{ - const struct hfi1_cmd __user *ucmd; - struct hfi1_filedata *fd = fp->private_data; - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_cmd cmd; - struct hfi1_user_info uinfo; - struct hfi1_tid_info tinfo; - unsigned long addr; - ssize_t consumed = 0, copy = 0, ret = 0; - void *dest = NULL; - __u64 user_val = 0; - int uctxt_required = 1; - - /* FIXME: This interface cannot continue out of staging */ - if (WARN_ON_ONCE(!ib_safe_file_access(fp))) - return -EACCES; - - if (count < sizeof(cmd)) { - ret = -EINVAL; - goto bail; - } - - ucmd = (const struct hfi1_cmd __user *)data; - if (copy_from_user(&cmd, ucmd, sizeof(cmd))) { - ret = -EFAULT; - goto bail; - } - - consumed = sizeof(cmd); - - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: - uctxt_required = 0; /* assigned user context not required */ - copy = sizeof(uinfo); - dest = &uinfo; - break; - case HFI1_CMD_CREDIT_UPD: - copy = 0; - break; - case HFI1_CMD_TID_UPDATE: - case HFI1_CMD_TID_FREE: - case HFI1_CMD_TID_INVAL_READ: - copy = sizeof(tinfo); - dest = &tinfo; - break; - case HFI1_CMD_USER_INFO: - case HFI1_CMD_RECV_CTRL: - case HFI1_CMD_POLL_TYPE: - case HFI1_CMD_ACK_EVENT: - case HFI1_CMD_CTXT_INFO: - case HFI1_CMD_SET_PKEY: - case HFI1_CMD_CTXT_RESET: - copy = 0; - user_val = cmd.addr; - break; - default: - ret = -EINVAL; - goto bail; - } - - /* If the command comes with user data, copy it. */ - if (copy) { - if (copy_from_user(dest, (void __user *)cmd.addr, copy)) { - ret = -EFAULT; - goto bail; - } - consumed += copy; - } - - /* - * Make sure there is a uctxt when needed. - */ - if (uctxt_required && !uctxt) { - ret = -EINVAL; - goto bail; - } - - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: - ret = assign_ctxt(fp, &uinfo); - if (ret < 0) - goto bail; - ret = setup_ctxt(fp); - if (ret) - goto bail; - ret = user_init(fp); - break; - case HFI1_CMD_CTXT_INFO: - ret = get_ctxt_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); - break; - case HFI1_CMD_USER_INFO: - ret = get_base_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); - break; - case HFI1_CMD_CREDIT_UPD: - if (uctxt && uctxt->sc) - sc_return_credits(uctxt->sc); - break; - case HFI1_CMD_TID_UPDATE: - ret = hfi1_user_exp_rcv_setup(fp, &tinfo); - if (!ret) { - /* - * Copy the number of tidlist entries we used - * and the length of the buffer we registered. - * These fields are adjacent in the structure so - * we can copy them at the same time. - */ - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt) + - sizeof(tinfo.length))) - ret = -EFAULT; - } - break; - case HFI1_CMD_TID_INVAL_READ: - ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); - if (ret) - break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - ret = -EFAULT; - break; - case HFI1_CMD_TID_FREE: - ret = hfi1_user_exp_rcv_clear(fp, &tinfo); - if (ret) - break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - ret = -EFAULT; - break; - case HFI1_CMD_RECV_CTRL: - ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val); - break; - case HFI1_CMD_POLL_TYPE: - uctxt->poll_type = (typeof(uctxt->poll_type))user_val; - break; - case HFI1_CMD_ACK_EVENT: - ret = user_event_ack(uctxt, fd->subctxt, user_val); - break; - case HFI1_CMD_SET_PKEY: - if (HFI1_CAP_IS_USET(PKEY_CHECK)) - ret = set_ctxt_pkey(uctxt, fd->subctxt, user_val); - else - ret = -EPERM; - break; - case HFI1_CMD_CTXT_RESET: { - struct send_context *sc; - struct hfi1_devdata *dd; - - if (!uctxt || !uctxt->dd || !uctxt->sc) { - ret = -EINVAL; - break; - } - /* - * There is no protection here. User level has to - * guarantee that no one will be writing to the send - * context while it is being re-initialized. - * If user level breaks that guarantee, it will break - * it's own context and no one else's. - */ - dd = uctxt->dd; - sc = uctxt->sc; - /* - * Wait until the interrupt handler has marked the - * context as halted or frozen. Report error if we time - * out. - */ - wait_event_interruptible_timeout( - sc->halt_wait, (sc->flags & SCF_HALTED), - msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (!(sc->flags & SCF_HALTED)) { - ret = -ENOLCK; - break; - } - /* - * If the send context was halted due to a Freeze, - * wait until the device has been "unfrozen" before - * resetting the context. - */ - if (sc->flags & SCF_FROZEN) { - wait_event_interruptible_timeout( - dd->event_queue, - !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), - msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (dd->flags & HFI1_FROZEN) { - ret = -ENOLCK; - break; - } - if (dd->flags & HFI1_FORCED_FREEZE) { - /* - * Don't allow context reset if we are into - * forced freeze - */ - ret = -ENODEV; - break; - } - sc_disable(sc); - ret = sc_enable(sc); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, - uctxt->ctxt); - } else { - ret = sc_restart(sc); - } - if (!ret) - sc_return_credits(sc); - break; - } - } - - if (ret >= 0) - ret = consumed; -bail: - return ret; -} - static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 9784159c9d52..98bebf8bef55 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -66,7 +66,7 @@ * The major version changes when data structures change in an incompatible * way. The driver must be the same for initialization to succeed. */ -#define HFI1_USER_SWMAJOR 5 +#define HFI1_USER_SWMAJOR 6 /* * Minor version differences are always compatible @@ -265,12 +265,6 @@ struct hfi1_tid_info { __u32 length; }; -struct hfi1_cmd { - __u32 type; /* command type */ - __u32 len; /* length of struct pointed to by add */ - __u64 addr; /* pointer to user structure */ -}; - enum hfi1_sdma_comp_state { FREE = 0, QUEUED, From 8a1882ebd4b593df0e36ba0b72e4e2f632573274 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:37 -0700 Subject: [PATCH 32/42] IB/hfi1: Add trace message in user IOCTL handling Add a trace message to HFI1s user IOCTL handling. This allows debugging of which IOCTLs are being handled by the driver. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/file_ops.c | 1 + drivers/staging/rdma/hfi1/trace.c | 1 + drivers/staging/rdma/hfi1/trace.h | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index c46eadadf3bf..113917021af8 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -188,6 +188,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long ul_uval = 0; u16 uval16 = 0; + hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); if (cmd != HFI1_IOCTL_ASSIGN_CTXT && cmd != HFI1_IOCTL_GET_VERS && !uctxt) diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 8b62fefcf903..caddb2ac3cfc 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -233,3 +233,4 @@ __hfi1_trace_fn(FIRMWARE); __hfi1_trace_fn(RCVCTRL); __hfi1_trace_fn(TID); __hfi1_trace_fn(MMU); +__hfi1_trace_fn(IOCTL); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index 42bcfc3e15d5..a6c1adf20dca 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -1341,6 +1341,7 @@ __hfi1_trace_def(FIRMWARE); __hfi1_trace_def(RCVCTRL); __hfi1_trace_def(TID); __hfi1_trace_def(MMU); +__hfi1_trace_def(IOCTL); #define hfi1_cdbg(which, fmt, ...) \ __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) From e11ffbd57520c3832e05f2f5f19e9ff6adbb7cdc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:44 -0700 Subject: [PATCH 33/42] IB/hfi1: Do not free hfi1 cdev parent structure early The deletion of a cdev is not a fence for holding off references to the structure. The driver attempts to delete the cdev and then proceeds to free the parent structure, the hfi1_devdata, or dd. This can potentially lead to a kernel panic in situations where a user has an FD for the cdev open, and the pci device gets removed. If the user then closes the FD there will be a NULL dereference when trying to do put on the cdev's kobject. Fix this by pointing the cdev's kobject.parent at a new kobject embedded in its parent structure. Also take a reference when the device is opened and put it back when it is closed. Reviewed-by: Mitko Haralanov Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/device.c | 4 +++- drivers/staging/rdma/hfi1/device.h | 3 ++- drivers/staging/rdma/hfi1/file_ops.c | 15 ++++++++++++--- drivers/staging/rdma/hfi1/hfi.h | 1 + drivers/staging/rdma/hfi1/init.c | 14 +++++++++++++- 5 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c index 6ee800f0359f..bf64b5a7bfd7 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/staging/rdma/hfi1/device.c @@ -60,7 +60,8 @@ static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible) + bool user_accessible, + struct kobject *parent) { const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor); struct device *device = NULL; @@ -68,6 +69,7 @@ int hfi1_cdev_init(int minor, const char *name, cdev_init(cdev, fops); cdev->owner = THIS_MODULE; + cdev->kobj.parent = parent; kobject_set_name(&cdev->kobj, name); ret = cdev_add(cdev, dev, 1); diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h index 5bb3e83cf2da..c3ec19cb0ac9 100644 --- a/drivers/staging/rdma/hfi1/device.h +++ b/drivers/staging/rdma/hfi1/device.h @@ -50,7 +50,8 @@ int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible); + bool user_accessible, + struct kobject *parent); void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp); const char *class_name(void); int __init dev_init(void); diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 113917021af8..7a5b0e676cc7 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -168,6 +168,13 @@ static inline int is_valid_mmap(u64 token) static int hfi1_file_open(struct inode *inode, struct file *fp) { + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); + + /* Just take a ref now. Not all opens result in a context assign */ + kobject_get(&dd->kobj); + /* The real work is performed later in assign_ctxt() */ fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); if (fp->private_data) /* no cpu affinity by default */ @@ -690,7 +697,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) { struct hfi1_filedata *fdata = fp->private_data; struct hfi1_ctxtdata *uctxt = fdata->uctxt; - struct hfi1_devdata *dd; + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); unsigned long flags, *ev; fp->private_data = NULL; @@ -699,7 +708,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) goto done; hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); - dd = uctxt->dd; mutex_lock(&hfi1_mutex); flush_wc(); @@ -765,6 +773,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + kobject_put(&dd->kobj); kfree(fdata); return 0; } @@ -1464,7 +1473,7 @@ static int user_add(struct hfi1_devdata *dd) snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, &dd->user_cdev, &dd->user_device, - true); + true, &dd->kobj); if (ret) user_remove(dd); diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 337bd2dc2dbf..4417a0fd3ef9 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1169,6 +1169,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct kobject kobj; }; /* 8051 firmware version helper */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index b9beaae5953d..5cc492e5776d 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -989,8 +989,10 @@ static void release_asic_data(struct hfi1_devdata *dd) dd->asic_data = NULL; } -void hfi1_free_devdata(struct hfi1_devdata *dd) +static void __hfi1_free_devdata(struct kobject *kobj) { + struct hfi1_devdata *dd = + container_of(kobj, struct hfi1_devdata, kobj); unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); @@ -1007,6 +1009,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) rvt_dealloc_device(&dd->verbs_dev.rdi); } +static struct kobj_type hfi1_devdata_type = { + .release = __hfi1_free_devdata, +}; + +void hfi1_free_devdata(struct hfi1_devdata *dd) +{ + kobject_put(&dd->kobj); +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1102,6 +1113,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) &pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; bail: From f48ad614c100783be1e7e777dc36328001b83999 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 19 May 2016 05:26:51 -0700 Subject: [PATCH 34/42] IB/hfi1: Move driver out of staging The TODO list for the hfi1 driver was completed during 4.6. In addition other objections raised (which are far beyond what was in the TODO list) have been addressed as well. It is now time to remove the driver from staging and into the drivers/infiniband sub-tree. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- MAINTAINERS | 13 +++++++------ drivers/infiniband/Kconfig | 2 ++ drivers/infiniband/hw/Makefile | 1 + .../{staging/rdma => infiniband/hw}/hfi1/Kconfig | 0 .../{staging/rdma => infiniband/hw}/hfi1/Makefile | 0 .../{staging/rdma => infiniband/hw}/hfi1/affinity.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/affinity.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/aspm.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/chip.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/chip.h | 0 .../rdma => infiniband/hw}/hfi1/chip_registers.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/common.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/debugfs.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/debugfs.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/device.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/device.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/dma.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/driver.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/efivar.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/efivar.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/eprom.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/eprom.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/file_ops.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/firmware.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/hfi.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/init.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/intr.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/iowait.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/mad.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/mad.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/mmu_rb.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/mmu_rb.h | 0 .../rdma => infiniband/hw}/hfi1/opa_compat.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/pcie.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/pio.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/pio.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/pio_copy.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/platform.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/platform.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/qp.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/qp.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/qsfp.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/qsfp.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/rc.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/ruc.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/sdma.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/sdma.h | 0 .../rdma => infiniband/hw}/hfi1/sdma_txreq.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/sysfs.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/trace.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/trace.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/twsi.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/twsi.h | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/uc.c | 0 drivers/{staging/rdma => infiniband/hw}/hfi1/ud.c | 0 .../rdma => infiniband/hw}/hfi1/user_exp_rcv.c | 0 .../rdma => infiniband/hw}/hfi1/user_exp_rcv.h | 0 .../rdma => infiniband/hw}/hfi1/user_pages.c | 0 .../rdma => infiniband/hw}/hfi1/user_sdma.c | 0 .../rdma => infiniband/hw}/hfi1/user_sdma.h | 0 .../{staging/rdma => infiniband/hw}/hfi1/verbs.c | 0 .../{staging/rdma => infiniband/hw}/hfi1/verbs.h | 0 .../rdma => infiniband/hw}/hfi1/verbs_txreq.c | 0 .../rdma => infiniband/hw}/hfi1/verbs_txreq.h | 0 drivers/staging/rdma/Kconfig | 2 -- drivers/staging/rdma/Makefile | 1 - drivers/staging/rdma/hfi1/TODO | 6 ------ 67 files changed, 10 insertions(+), 15 deletions(-) rename drivers/{staging/rdma => infiniband/hw}/hfi1/Kconfig (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/Makefile (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/affinity.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/affinity.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/aspm.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/chip.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/chip.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/chip_registers.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/common.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/debugfs.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/debugfs.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/device.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/device.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/dma.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/driver.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/efivar.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/efivar.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/eprom.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/eprom.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/file_ops.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/firmware.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/hfi.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/init.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/intr.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/iowait.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/mad.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/mad.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/mmu_rb.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/mmu_rb.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/opa_compat.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/pcie.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/pio.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/pio.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/pio_copy.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/platform.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/platform.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/qp.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/qp.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/qsfp.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/qsfp.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/rc.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/ruc.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/sdma.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/sdma.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/sdma_txreq.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/sysfs.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/trace.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/trace.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/twsi.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/twsi.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/uc.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/ud.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/user_exp_rcv.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/user_exp_rcv.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/user_pages.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/user_sdma.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/user_sdma.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/verbs.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/verbs.h (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/verbs_txreq.c (100%) rename drivers/{staging/rdma => infiniband/hw}/hfi1/verbs_txreq.h (100%) delete mode 100644 drivers/staging/rdma/hfi1/TODO diff --git a/MAINTAINERS b/MAINTAINERS index c8025946eaae..98234560cfdc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5086,6 +5086,13 @@ F: drivers/block/cciss* F: include/linux/cciss_ioctl.h F: include/uapi/linux/cciss_ioctl.h +HFI1 DRIVER +M: Mike Marciniszyn +M: Dennis Dalessandro +L: linux-rdma@vger.kernel.org +S: Supported +F: drivers/infiniband/hw/hfi1 + HFS FILESYSTEM L: linux-fsdevel@vger.kernel.org S: Orphan @@ -10661,12 +10668,6 @@ M: Arnaud Patard S: Odd Fixes F: drivers/staging/xgifb/ -HFI1 DRIVER -M: Mike Marciniszyn -L: linux-rdma@vger.kernel.org -S: Supported -F: drivers/staging/rdma/hfi1 - STARFIRE/DURALAN NETWORK DRIVER M: Ion Badulescu S: Odd Fixes diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 6425c0e5d18a..2137adfbd8c3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -85,4 +85,6 @@ source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/hw/hfi1/Kconfig" + endif # INFINIBAND diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index c7ad0a4c8b15..c0c7cf8af3f4 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ +obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig similarity index 100% rename from drivers/staging/rdma/hfi1/Kconfig rename to drivers/infiniband/hw/hfi1/Kconfig diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile similarity index 100% rename from drivers/staging/rdma/hfi1/Makefile rename to drivers/infiniband/hw/hfi1/Makefile diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c similarity index 100% rename from drivers/staging/rdma/hfi1/affinity.c rename to drivers/infiniband/hw/hfi1/affinity.c diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h similarity index 100% rename from drivers/staging/rdma/hfi1/affinity.h rename to drivers/infiniband/hw/hfi1/affinity.h diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h similarity index 100% rename from drivers/staging/rdma/hfi1/aspm.h rename to drivers/infiniband/hw/hfi1/aspm.h diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c similarity index 100% rename from drivers/staging/rdma/hfi1/chip.c rename to drivers/infiniband/hw/hfi1/chip.c diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h similarity index 100% rename from drivers/staging/rdma/hfi1/chip.h rename to drivers/infiniband/hw/hfi1/chip.h diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h similarity index 100% rename from drivers/staging/rdma/hfi1/chip_registers.h rename to drivers/infiniband/hw/hfi1/chip_registers.h diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h similarity index 100% rename from drivers/staging/rdma/hfi1/common.h rename to drivers/infiniband/hw/hfi1/common.h diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c similarity index 100% rename from drivers/staging/rdma/hfi1/debugfs.c rename to drivers/infiniband/hw/hfi1/debugfs.c diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h similarity index 100% rename from drivers/staging/rdma/hfi1/debugfs.h rename to drivers/infiniband/hw/hfi1/debugfs.h diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c similarity index 100% rename from drivers/staging/rdma/hfi1/device.c rename to drivers/infiniband/hw/hfi1/device.c diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h similarity index 100% rename from drivers/staging/rdma/hfi1/device.h rename to drivers/infiniband/hw/hfi1/device.h diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c similarity index 100% rename from drivers/staging/rdma/hfi1/dma.c rename to drivers/infiniband/hw/hfi1/dma.c diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c similarity index 100% rename from drivers/staging/rdma/hfi1/driver.c rename to drivers/infiniband/hw/hfi1/driver.c diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c similarity index 100% rename from drivers/staging/rdma/hfi1/efivar.c rename to drivers/infiniband/hw/hfi1/efivar.c diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h similarity index 100% rename from drivers/staging/rdma/hfi1/efivar.h rename to drivers/infiniband/hw/hfi1/efivar.h diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c similarity index 100% rename from drivers/staging/rdma/hfi1/eprom.c rename to drivers/infiniband/hw/hfi1/eprom.c diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h similarity index 100% rename from drivers/staging/rdma/hfi1/eprom.h rename to drivers/infiniband/hw/hfi1/eprom.h diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c similarity index 100% rename from drivers/staging/rdma/hfi1/file_ops.c rename to drivers/infiniband/hw/hfi1/file_ops.c diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c similarity index 100% rename from drivers/staging/rdma/hfi1/firmware.c rename to drivers/infiniband/hw/hfi1/firmware.c diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h similarity index 100% rename from drivers/staging/rdma/hfi1/hfi.h rename to drivers/infiniband/hw/hfi1/hfi.h diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c similarity index 100% rename from drivers/staging/rdma/hfi1/init.c rename to drivers/infiniband/hw/hfi1/init.c diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c similarity index 100% rename from drivers/staging/rdma/hfi1/intr.c rename to drivers/infiniband/hw/hfi1/intr.c diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h similarity index 100% rename from drivers/staging/rdma/hfi1/iowait.h rename to drivers/infiniband/hw/hfi1/iowait.h diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c similarity index 100% rename from drivers/staging/rdma/hfi1/mad.c rename to drivers/infiniband/hw/hfi1/mad.c diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h similarity index 100% rename from drivers/staging/rdma/hfi1/mad.h rename to drivers/infiniband/hw/hfi1/mad.h diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c similarity index 100% rename from drivers/staging/rdma/hfi1/mmu_rb.c rename to drivers/infiniband/hw/hfi1/mmu_rb.c diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h similarity index 100% rename from drivers/staging/rdma/hfi1/mmu_rb.h rename to drivers/infiniband/hw/hfi1/mmu_rb.h diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h similarity index 100% rename from drivers/staging/rdma/hfi1/opa_compat.h rename to drivers/infiniband/hw/hfi1/opa_compat.h diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c similarity index 100% rename from drivers/staging/rdma/hfi1/pcie.c rename to drivers/infiniband/hw/hfi1/pcie.c diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c similarity index 100% rename from drivers/staging/rdma/hfi1/pio.c rename to drivers/infiniband/hw/hfi1/pio.c diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h similarity index 100% rename from drivers/staging/rdma/hfi1/pio.h rename to drivers/infiniband/hw/hfi1/pio.h diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c similarity index 100% rename from drivers/staging/rdma/hfi1/pio_copy.c rename to drivers/infiniband/hw/hfi1/pio_copy.c diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c similarity index 100% rename from drivers/staging/rdma/hfi1/platform.c rename to drivers/infiniband/hw/hfi1/platform.c diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h similarity index 100% rename from drivers/staging/rdma/hfi1/platform.h rename to drivers/infiniband/hw/hfi1/platform.h diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c similarity index 100% rename from drivers/staging/rdma/hfi1/qp.c rename to drivers/infiniband/hw/hfi1/qp.c diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h similarity index 100% rename from drivers/staging/rdma/hfi1/qp.h rename to drivers/infiniband/hw/hfi1/qp.h diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c similarity index 100% rename from drivers/staging/rdma/hfi1/qsfp.c rename to drivers/infiniband/hw/hfi1/qsfp.c diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h similarity index 100% rename from drivers/staging/rdma/hfi1/qsfp.h rename to drivers/infiniband/hw/hfi1/qsfp.h diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c similarity index 100% rename from drivers/staging/rdma/hfi1/rc.c rename to drivers/infiniband/hw/hfi1/rc.c diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c similarity index 100% rename from drivers/staging/rdma/hfi1/ruc.c rename to drivers/infiniband/hw/hfi1/ruc.c diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c similarity index 100% rename from drivers/staging/rdma/hfi1/sdma.c rename to drivers/infiniband/hw/hfi1/sdma.c diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h similarity index 100% rename from drivers/staging/rdma/hfi1/sdma.h rename to drivers/infiniband/hw/hfi1/sdma.h diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h similarity index 100% rename from drivers/staging/rdma/hfi1/sdma_txreq.h rename to drivers/infiniband/hw/hfi1/sdma_txreq.h diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c similarity index 100% rename from drivers/staging/rdma/hfi1/sysfs.c rename to drivers/infiniband/hw/hfi1/sysfs.c diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c similarity index 100% rename from drivers/staging/rdma/hfi1/trace.c rename to drivers/infiniband/hw/hfi1/trace.c diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h similarity index 100% rename from drivers/staging/rdma/hfi1/trace.h rename to drivers/infiniband/hw/hfi1/trace.h diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c similarity index 100% rename from drivers/staging/rdma/hfi1/twsi.c rename to drivers/infiniband/hw/hfi1/twsi.c diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h similarity index 100% rename from drivers/staging/rdma/hfi1/twsi.h rename to drivers/infiniband/hw/hfi1/twsi.h diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c similarity index 100% rename from drivers/staging/rdma/hfi1/uc.c rename to drivers/infiniband/hw/hfi1/uc.c diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c similarity index 100% rename from drivers/staging/rdma/hfi1/ud.c rename to drivers/infiniband/hw/hfi1/ud.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c similarity index 100% rename from drivers/staging/rdma/hfi1/user_exp_rcv.c rename to drivers/infiniband/hw/hfi1/user_exp_rcv.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h similarity index 100% rename from drivers/staging/rdma/hfi1/user_exp_rcv.h rename to drivers/infiniband/hw/hfi1/user_exp_rcv.h diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c similarity index 100% rename from drivers/staging/rdma/hfi1/user_pages.c rename to drivers/infiniband/hw/hfi1/user_pages.c diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c similarity index 100% rename from drivers/staging/rdma/hfi1/user_sdma.c rename to drivers/infiniband/hw/hfi1/user_sdma.c diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h similarity index 100% rename from drivers/staging/rdma/hfi1/user_sdma.h rename to drivers/infiniband/hw/hfi1/user_sdma.h diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c similarity index 100% rename from drivers/staging/rdma/hfi1/verbs.c rename to drivers/infiniband/hw/hfi1/verbs.c diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h similarity index 100% rename from drivers/staging/rdma/hfi1/verbs.h rename to drivers/infiniband/hw/hfi1/verbs.h diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c similarity index 100% rename from drivers/staging/rdma/hfi1/verbs_txreq.c rename to drivers/infiniband/hw/hfi1/verbs_txreq.c diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h similarity index 100% rename from drivers/staging/rdma/hfi1/verbs_txreq.h rename to drivers/infiniband/hw/hfi1/verbs_txreq.h diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig index f1f3ecadf0fb..2c5b0188ebbf 100644 --- a/drivers/staging/rdma/Kconfig +++ b/drivers/staging/rdma/Kconfig @@ -22,6 +22,4 @@ menuconfig STAGING_RDMA # Please keep entries in alphabetic order if STAGING_RDMA -source "drivers/staging/rdma/hfi1/Kconfig" - endif diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile index 8c7fc1de48a7..b5e94f169101 100644 --- a/drivers/staging/rdma/Makefile +++ b/drivers/staging/rdma/Makefile @@ -1,2 +1 @@ # Entries for RDMA_STAGING tree -obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO deleted file mode 100644 index 4c6f1d7d2eaf..000000000000 --- a/drivers/staging/rdma/hfi1/TODO +++ /dev/null @@ -1,6 +0,0 @@ -July, 2015 - -- Remove unneeded file entries in sysfs -- Remove software processing of IB protocol and place in library for use - by qib, ipath (if still present), hfi1, and eventually soft-roce -- Replace incorrect uAPI From 23f7d0d29e842360cc20bf03460f52f14ced915d Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 24 May 2016 12:50:10 -0700 Subject: [PATCH 35/42] IB/hfi1, qib: Add ieth to the packet header definitions A new union member "ieth" (Invalidate Extended Transport Header) is added to the packet header definition in preparation of supporting the send with invalidate opcode. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Jianxin Xiong Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs.h | 1 + drivers/infiniband/hw/qib/qib_verbs.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3ee223983b20..488356775627 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -152,6 +152,7 @@ union ib_ehdrs { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } __packed; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 6888f03c6d61..4f878151f81f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -159,6 +159,7 @@ struct qib_other_headers { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } u; } __packed; From bdd8a98ce465df31b07ff6314db9ed31a7c6bb0d Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 24 May 2016 12:50:17 -0700 Subject: [PATCH 36/42] IB/hfi1: Add tracing support for send with invalidate opcode Enable trace generation for packets with the "Send Last with Invalidate" and "Send Only with Invalidate" opcodes. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Jianxin Xiong Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace.c | 7 +++++++ drivers/infiniband/hw/hfi1/trace.h | 2 ++ drivers/infiniband/hw/hfi1/verbs.c | 2 ++ 3 files changed, 11 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index caddb2ac3cfc..79b2952c0dfb 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -66,6 +66,7 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" +#define IETH_PRN "ieth rkey 0x%.8x" #define ATOMICACKETH_PRN "origdata %lld" #define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" @@ -166,6 +167,12 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->ud.deth[0]), be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); break; + /* ieth */ + case OP(RC, SEND_LAST_WITH_INVALIDATE): + case OP(RC, SEND_ONLY_WITH_INVALIDATE): + trace_seq_printf(p, IETH_PRN, + be32_to_cpu(eh->ieth)); + break; } trace_seq_putc(p, 0); return ret; diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index a6c1adf20dca..28c1d0832886 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -392,6 +392,8 @@ __print_symbolic(opcode, \ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ ib_opcode_name(RC_COMPARE_SWAP), \ ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ + ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ ib_opcode_name(UC_SEND_FIRST), \ ib_opcode_name(UC_SEND_MIDDLE), \ ib_opcode_name(UC_SEND_LAST), \ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ae92ac5ff232..849c4b9399d4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -335,6 +335,8 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, From 7049de65c9e520886f06d6f9deceaaed5d93fb7c Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 24 May 2016 12:50:23 -0700 Subject: [PATCH 37/42] IB/hfi1: Fix hard lockup due to not using save/restore spin lock Commit b9b06cb6feda ("IB/hfi1: Fix missing lock/unlock in verbs drain callback") added a spin lock. Unfortunately, the new lock code can be called from a base level interrupt state, and an interrupt that can get stacked will attempt to get the same lock. Fix by using the flag save/restore spin lock variation. Cc: stable@vger.kernel.org # 4.6+ Reviewed-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 14f889e3655b..1a942ffba4cb 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -512,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason) static void iowait_sdma_drained(struct iowait *wait) { struct rvt_qp *qp = iowait_to_qp(wait); + unsigned long flags; /* * This happens when the send engine notes @@ -519,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait) * do the flush work until that QP's * sdma work has finished. */ - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_flags & RVT_S_WAIT_DMA) { qp->s_flags &= ~RVT_S_WAIT_DMA; hfi1_schedule_send(qp); } - spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); } /** From 4c0b653335bbdfe62a5f4483c98cb5d581432917 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 24 May 2016 12:50:34 -0700 Subject: [PATCH 38/42] IB/rdmavt: Max atomic value should be a u8 This matches the ib_qp_attr size and avoids a extremely large value when the lower level driver registers. As part of the patch, the u8 ordinals are moved to the end of the struct to reduce pahole noted excesses. Reviewed-by: Mitko Haralanov Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index d57ceee90d26..3f12da9384a1 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -149,15 +149,15 @@ struct rvt_driver_params { int qpn_res_end; int nports; int npkeys; - u8 qos_shift; char cq_name[RVT_CQN_MAX]; int node; - int max_rdma_atomic; int psn_mask; int psn_shift; int psn_modify_mask; u32 core_cap_flags; u32 max_mad_size; + u8 qos_shift; + u8 max_rdma_atomic; }; /* Protection domain */ From 8b103e9cdee5f381bc20a8a9f9bb5be11de8e68f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 24 May 2016 12:50:40 -0700 Subject: [PATCH 39/42] IB/rdamvt: Fix rdmavt s_ack_queue sizing rdmavt allows the driver to specify the size of the ack queue, but only uses it for the modify QP limit testing for setting the atomic limit value. The driver dependent size is now used to size the s_ack_queue ring dynamicially. Since the driver knows its size, the driver will use its define for any ring size dependent code. Reviewed-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 22 ++++++++++++++++++++-- include/rdma/rdma_vt.h | 9 +++++++++ include/rdma/rdmavt_qp.h | 5 +---- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index dd03b23a6a7c..5fa4d4d81ee0 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -397,6 +397,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) rvt_put_ss(&qp->s_rdma_read_sge); @@ -431,7 +432,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (qp->ibqp.qp_type != IB_QPT_RC) return; - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + for (n = 0; n < rvt_max_atomic(rdi); n++) { struct rvt_ack_entry *e = &qp->s_ack_queue[n]; if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && @@ -569,7 +570,12 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_ssn = 1; qp->s_lsn = 0; qp->s_mig_state = IB_MIG_MIGRATED; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + if (qp->s_ack_queue) + memset( + qp->s_ack_queue, + 0, + rvt_max_atomic(rdi) * + sizeof(*qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; qp->s_num_rd_atomic = 0; @@ -677,6 +683,16 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_swq; RCU_INIT_POINTER(qp->next, NULL); + if (init_attr->qp_type == IB_QPT_RC) { + qp->s_ack_queue = + kzalloc_node( + sizeof(*qp->s_ack_queue) * + rvt_max_atomic(rdi), + gfp, + rdi->dparms.node); + if (!qp->s_ack_queue) + goto bail_qp; + } /* * Driver needs to set up it's private QP structure and do any @@ -857,6 +873,7 @@ bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: + kfree(qp->s_ack_queue); kfree(qp); bail_swq: @@ -1284,6 +1301,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp) vfree(qp->r_rq.wq); vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); + kfree(qp->s_ack_queue); kfree(qp); return 0; } diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3f12da9384a1..16274e2133cd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -425,6 +425,15 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) return rdi->dparms.npkeys; } +/* + * Return the max atomic suitable for determining + * the size of the ack ring buffer in a QP. + */ +static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) +{ + return rdi->dparms.max_rdma_atomic + 1; +} + /* * Return the indexed PKEY from the port PKEY table. */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0e1ff2abfe92..6d23b879416a 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -211,8 +211,6 @@ struct rvt_mmap_info { unsigned size; }; -#define RVT_MAX_RDMA_ATOMIC 16 - /* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. @@ -282,8 +280,7 @@ struct rvt_qp { atomic_t refcount ____cacheline_aligned_in_smp; wait_queue_head_t wait; - struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; + struct rvt_ack_entry *s_ack_queue; struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ From ce8b2fd0950ccc14440b02f05d2c7023608cfa76 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 24 May 2016 12:50:47 -0700 Subject: [PATCH 40/42] IB/hfi1: Update pkey table properly after link down or FM start When FM is disabled, and the HFI port on the switch is changed from MgmtAllowed=YES to MgmtAllowed=NO and the link is bounced, FULL_MGMT_P_KEY doesn't get cleared from the pkey table. This also occurs when the QSFP cable is moved from a switch port with MgmtAllowed=YES to a MgmtAllowed=NO port. Clear pkey entry properly. Also, when the driver is loaded and the switch port is set to MgmtAllowed=NO, FULL_MGMT_P_KEY shouldn't be added to pkey table after FM is started. Only set FULL_MGMT_P_KEY in the pkey table if switch port is configured to MgmtAllowed=YES. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 +++++++++ drivers/infiniband/hw/hfi1/mad.c | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index d8cc329901d0..3b876da745a1 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1037,6 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *); static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6961,6 +6962,8 @@ void handle_link_down(struct work_struct *work) } reset_neighbor_info(ppd); + if (ppd->mgmt_allowed) + remove_full_mgmt_pkey(ppd); /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); @@ -7069,6 +7072,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); } +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd) +{ + ppd->pkeys[2] = 0; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); +} + /* * Convert the given link width to the OPA link width bitmask. */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 17882dc8650e..219029576ba0 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1402,6 +1402,12 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; + /* + * Don't update pkeys[2], if an HFI port without MgmtAllowed + * by neighbor is a switch. + */ + if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) + continue; /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching From f6de3d39cfc30387ce7dadb8542e4a635c706670 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Tue, 24 May 2016 12:51:00 -0700 Subject: [PATCH 41/42] IB/hfi1: Correct 8051 link parameter settings Two 8051 link settings, external device config and tuning method, were written in the wrong location and the previous settings were not cleared. For both, clear the old value and write the new value. Fixes: 8ebd4cf1852a ("staging/rdma/hfi1: Add active and optical cable support") Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.h | 6 ++++++ drivers/infiniband/hw/hfi1/platform.c | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 1948706fff1a..66a327978739 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -398,6 +398,12 @@ /* Lane ID for general configuration registers */ #define GENERAL_CONFIG 4 +/* LINK_TUNING_PARAMETERS fields */ +#define TUNING_METHOD_SHIFT 24 + +/* LINK_OPTIMIZATION_SETTINGS fields */ +#define ENABLE_EXT_DEV_CONFIG_SHIFT 24 + /* LOAD_DATA 8051 command shifts and fields */ #define LOAD_DATA_FIELD_ID_SHIFT 40 #define LOAD_DATA_FIELD_ID_MASK 0xfull diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index ae0e4985cc7e..03df9322f862 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -540,7 +540,8 @@ static void apply_tunings( /* Enable external device config if channel is limiting active */ read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, &config_data); - config_data |= limiting_active; + config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT); + config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT); ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -553,7 +554,8 @@ static void apply_tunings( /* Pass tuning method to 8051 */ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, &config_data); - config_data |= tuning_method; + config_data &= ~(0xff << TUNING_METHOD_SHIFT); + config_data |= ((u32)tuning_method << TUNING_METHOD_SHIFT); ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) From f158486527ebfb4c1fe4dcb69b12479090d66b72 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Tue, 24 May 2016 12:51:06 -0700 Subject: [PATCH 42/42] IB/hfi1: Fix pio map initialization The pio map initialization function is off by 1 causing the last kernel send context that is allocated to not get mapped into the pio map which leads to the last kernel send context not being used by any of the qps. The send context reserved for VL15 is taken care of by setting the scontext variable that is used as the index into the kernel send context array to 1 and does not need to be accounted for in the kernel send context counting loop as it is currently done. Fix the kernel send context counting loop to account for all the allocated send contexts and map all of them to the different VLs. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Reviewed-by: Jianxin Xiong Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index c67b9ad3fcf4..d5edb1afbb8f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1835,8 +1835,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) struct pio_vl_map *oldmap, *newmap; if (!vl_scontexts) { - /* send context 0 reserved for VL15 */ - for (i = 1; i < dd->num_send_contexts; i++) + for (i = 0; i < dd->num_send_contexts; i++) if (dd->send_contexts[i].type == SC_KERNEL) num_kernel_send_contexts++; /* truncate divide */