From 7235aa79f683db0d908dcb0c2b7062dfdd765196 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Nov 2012 15:47:40 +0000 Subject: [PATCH 01/10] UAPI: (Scripted) Disintegrate include/rdma Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones Signed-off-by: Roland Dreier --- include/rdma/Kbuild | 6 ---- include/rdma/rdma_netlink.h | 36 +----------------------- include/uapi/rdma/Kbuild | 6 ++++ include/{ => uapi}/rdma/ib_user_cm.h | 0 include/{ => uapi}/rdma/ib_user_mad.h | 0 include/{ => uapi}/rdma/ib_user_sa.h | 0 include/{ => uapi}/rdma/ib_user_verbs.h | 0 include/uapi/rdma/rdma_netlink.h | 37 +++++++++++++++++++++++++ include/{ => uapi}/rdma/rdma_user_cm.h | 0 9 files changed, 44 insertions(+), 41 deletions(-) rename include/{ => uapi}/rdma/ib_user_cm.h (100%) rename include/{ => uapi}/rdma/ib_user_mad.h (100%) rename include/{ => uapi}/rdma/ib_user_sa.h (100%) rename include/{ => uapi}/rdma/ib_user_verbs.h (100%) create mode 100644 include/uapi/rdma/rdma_netlink.h rename include/{ => uapi}/rdma/rdma_user_cm.h (100%) diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild index ea56f76c0c22..e69de29bb2d1 100644 --- a/include/rdma/Kbuild +++ b/include/rdma/Kbuild @@ -1,6 +0,0 @@ -header-y += ib_user_cm.h -header-y += ib_user_mad.h -header-y += ib_user_sa.h -header-y += ib_user_verbs.h -header-y += rdma_netlink.h -header-y += rdma_user_cm.h diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index bd3d8b24b420..e38de79eeb48 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -1,41 +1,9 @@ #ifndef _RDMA_NETLINK_H #define _RDMA_NETLINK_H -#include - -enum { - RDMA_NL_RDMA_CM = 1 -}; - -#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10) -#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1)) -#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op) - -enum { - RDMA_NL_RDMA_CM_ID_STATS = 0, - RDMA_NL_RDMA_CM_NUM_OPS -}; - -enum { - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1, - RDMA_NL_RDMA_CM_ATTR_DST_ADDR, - RDMA_NL_RDMA_CM_NUM_ATTR, -}; - -struct rdma_cm_id_stats { - __u32 qp_num; - __u32 bound_dev_if; - __u32 port_space; - __s32 pid; - __u8 cm_state; - __u8 node_type; - __u8 port_num; - __u8 qp_type; -}; - -#ifdef __KERNEL__ #include +#include struct ibnl_client_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); @@ -88,6 +56,4 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, int len, void *data, int type); -#endif /* __KERNEL__ */ - #endif /* _RDMA_NETLINK_H */ diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index aafaa5aa54d4..687ae332200f 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -1 +1,7 @@ # UAPI Header export list +header-y += ib_user_cm.h +header-y += ib_user_mad.h +header-y += ib_user_sa.h +header-y += ib_user_verbs.h +header-y += rdma_netlink.h +header-y += rdma_user_cm.h diff --git a/include/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h similarity index 100% rename from include/rdma/ib_user_cm.h rename to include/uapi/rdma/ib_user_cm.h diff --git a/include/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h similarity index 100% rename from include/rdma/ib_user_mad.h rename to include/uapi/rdma/ib_user_mad.h diff --git a/include/rdma/ib_user_sa.h b/include/uapi/rdma/ib_user_sa.h similarity index 100% rename from include/rdma/ib_user_sa.h rename to include/uapi/rdma/ib_user_sa.h diff --git a/include/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h similarity index 100% rename from include/rdma/ib_user_verbs.h rename to include/uapi/rdma/ib_user_verbs.h diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h new file mode 100644 index 000000000000..8297285b6288 --- /dev/null +++ b/include/uapi/rdma/rdma_netlink.h @@ -0,0 +1,37 @@ +#ifndef _UAPI_RDMA_NETLINK_H +#define _UAPI_RDMA_NETLINK_H + +#include + +enum { + RDMA_NL_RDMA_CM = 1 +}; + +#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10) +#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1)) +#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op) + +enum { + RDMA_NL_RDMA_CM_ID_STATS = 0, + RDMA_NL_RDMA_CM_NUM_OPS +}; + +enum { + RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1, + RDMA_NL_RDMA_CM_ATTR_DST_ADDR, + RDMA_NL_RDMA_CM_NUM_ATTR, +}; + +struct rdma_cm_id_stats { + __u32 qp_num; + __u32 bound_dev_if; + __u32 port_space; + __s32 pid; + __u8 cm_state; + __u8 node_type; + __u8 port_num; + __u8 qp_type; +}; + + +#endif /* _UAPI_RDMA_NETLINK_H */ diff --git a/include/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h similarity index 100% rename from include/rdma/rdma_user_cm.h rename to include/uapi/rdma/rdma_user_cm.h From 3127e4ea54fc023e35adb1d9af29d49c6d582d12 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 2 Nov 2012 23:17:34 +0000 Subject: [PATCH 02/10] RDMA/nes: Fix incorrect address of IP header Fix for incorrect ip header address when forwarding fpdus to hardware. Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_mgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 3ba7be369452..8cf74fd0c44f 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -447,7 +447,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX, lower_32_bits(u64tmp)); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX, - upper_32_bits(u64tmp >> 32)); + upper_32_bits(u64tmp)); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, lower_32_bits(fpdu_info->frags[0].physaddr)); From bff3976bef4f917554292d48ce43636f3d040182 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 2 Nov 2012 23:17:45 +0000 Subject: [PATCH 03/10] RDMA/nes: Fix for unlinking skbs from empty list Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_mgt.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 8cf74fd0c44f..1f5d69e7793a 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -210,6 +210,9 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp } while (1) { + if (skb_queue_empty(&nesqp->pau_list)) + goto out; + seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd); if (seq == nextseq) { if (skb->len || processacks) @@ -218,14 +221,13 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp goto out; } - if (skb->next == (struct sk_buff *)&nesqp->pau_list) - goto out; - old_skb = skb; skb = skb->next; skb_unlink(old_skb, &nesqp->pau_list); nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); nes_rem_ref_cm_node(nesqp->cm_node); + if (skb == (struct sk_buff *)&nesqp->pau_list) + goto out; } return skb; @@ -384,7 +386,8 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, if (frags[i].skb->len == 0) { /* Pull skb off the list - it will be freed in the callback */ spin_lock_irqsave(&nesqp->pau_lock, flags); - skb_unlink(frags[i].skb, &nesqp->pau_list); + if (!skb_queue_empty(&nesqp->pau_list)) + skb_unlink(frags[i].skb, &nesqp->pau_list); spin_unlock_irqrestore(&nesqp->pau_lock, flags); } else { /* Last skb still has data so update the seq */ From fc8d7547b1e19e1bb8f3206837ee0c7c6538e2e5 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 2 Nov 2012 23:17:54 +0000 Subject: [PATCH 04/10] RDMA/nes: Fix for sending fpdus in order to hardware Locking fix to prevent race conditions. Fpdus (per qp) need to be forwarded to hardware in the order of their sequence numbers. Signed-off-by: Tatyana Nikolova Signed-off-by: Donald Wood Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_mgt.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 1f5d69e7793a..4d6d77fd18b6 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -247,7 +247,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, struct nes_rskb_cb *cb; struct pau_fpdu_info *fpdu_info = NULL; struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; - unsigned long flags; u32 fpdu_len = 0; u32 tmp_len; int frag_cnt = 0; @@ -262,12 +261,10 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, *pau_fpdu_info = NULL; - spin_lock_irqsave(&nesqp->pau_lock, flags); skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd); - if (!skb) { - spin_unlock_irqrestore(&nesqp->pau_lock, flags); + if (!skb) goto out; - } + cb = (struct nes_rskb_cb *)&skb->cb[0]; if (skb->len) { fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING; @@ -292,10 +289,9 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, skb = nes_get_next_skb(nesdev, nesqp, skb, nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd); - if (!skb) { - spin_unlock_irqrestore(&nesqp->pau_lock, flags); + if (!skb) goto out; - } else if (rst_rcvd) { + if (rst_rcvd) { /* rst received in the middle of fpdu */ for (; i >= 0; i--) { skb_unlink(frags[i].skb, &nesqp->pau_list); @@ -322,8 +318,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, frag_cnt = 1; } - spin_unlock_irqrestore(&nesqp->pau_lock, flags); - /* Found one */ fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); if (fpdu_info == NULL) { @@ -385,10 +379,8 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, if (frags[i].skb->len == 0) { /* Pull skb off the list - it will be freed in the callback */ - spin_lock_irqsave(&nesqp->pau_lock, flags); if (!skb_queue_empty(&nesqp->pau_list)) skb_unlink(frags[i].skb, &nesqp->pau_list); - spin_unlock_irqrestore(&nesqp->pau_lock, flags); } else { /* Last skb still has data so update the seq */ iph = (struct iphdr *)(cb->data_start + ETH_HLEN); @@ -417,14 +409,18 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) struct pau_fpdu_info *fpdu_info; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; + unsigned long flags; u64 u64tmp; u32 u32tmp; int rc; while (1) { + spin_lock_irqsave(&nesqp->pau_lock, flags); rc = get_fpdu_info(nesdev, nesqp, &fpdu_info); - if (fpdu_info == NULL) + if (rc || (fpdu_info == NULL)) { + spin_unlock_irqrestore(&nesqp->pau_lock, flags); return rc; + } cqp_request = fpdu_info->cqp_request; cqp_wqe = &cqp_request->cqp_wqe; @@ -478,6 +474,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) atomic_set(&cqp_request->refcount, 1); nes_post_cqp_request(nesdev, cqp_request); + spin_unlock_irqrestore(&nesqp->pau_lock, flags); } return 0; From cecdcd5f24be8c532ad8dcbbd93c7b477cfd3413 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 13 Nov 2012 22:20:41 +0000 Subject: [PATCH 05/10] RDMA/nes: Fix for incorrect multicast address in the perfect filter table Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_nic.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 0564be757d82..9542e1644a5c 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -944,12 +944,13 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) addr, perfect_filter_register_address+(mc_index * 8), mc_nic_index); - macaddr_high = ((u16) addr[0]) << 8; - macaddr_high += (u16) addr[1]; - macaddr_low = ((u32) addr[2]) << 24; - macaddr_low += ((u32) addr[3]) << 16; - macaddr_low += ((u32) addr[4]) << 8; - macaddr_low += (u32) addr[5]; + macaddr_high = ((u8) addr[0]) << 8; + macaddr_high += (u8) addr[1]; + macaddr_low = ((u8) addr[2]) << 24; + macaddr_low += ((u8) addr[3]) << 16; + macaddr_low += ((u8) addr[4]) << 8; + macaddr_low += (u8) addr[5]; + nes_write_indexed(nesdev, perfect_filter_register_address+(mc_index * 8), macaddr_low); From 079abea6a37fd3b4f7e1b7cf9e4d055463988753 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 3 Nov 2012 10:58:37 +0000 Subject: [PATCH 06/10] RDMA/nes: Use WARN() Use WARN() rather than printk() followed by WARN_ON(1), for conciseness. A simplified version of the semantic patch that makes this transformation is as follows: (http://coccinelle.lip6.fr/) // @@ expression list es; @@ -printk( +WARN(1, es); -WARN_ON(1); // Signed-off-by: Julia Lawall [ Remove extra KERN_ERR from WARN() format. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 8 +++----- drivers/infiniband/hw/nes/nes_mgt.c | 6 ++---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index cfaacaf6bf5f..feb41e74206b 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -629,11 +629,9 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a case SEND_RDMA_READ_ZERO: default: - if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) { - printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n", - __func__, __LINE__, cm_node->send_rdma0_op); - WARN_ON(1); - } + if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) + WARN(1, "Unsupported RDMA0 len operation=%u\n", + cm_node->send_rdma0_op); nes_debug(NES_DBG_CM, "Sending first rdma operation.\n"); wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAR); diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 4d6d77fd18b6..416645259b0f 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -649,11 +649,9 @@ static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request nesqp = qh_chg->nesqp; /* Should we handle the bad completion */ - if (cqp_request->major_code) { - printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n", + if (cqp_request->major_code) + WARN(1, PFX "Invalid cqp_request major_code=0x%x\n", cqp_request->major_code); - WARN_ON(1); - } switch (nesqp->pau_state) { case PAU_DEL_QH: From 5390f86796a1f444ca1a7ba7315951e26acd958d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 25 Oct 2012 14:27:07 +0000 Subject: [PATCH 07/10] IB/ipath: Remove unreachable code Signed-off-by: Alan Cox Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 49b09c697c7c..be2a60e142b0 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -718,16 +718,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) if (ret) goto done; - /* - * we ignore most issues after reporting them, but have to specially - * handle hardware-disabled chips. - */ - if (ret == 2) { - /* unique error, known to ipath_init_one */ - ret = -EPERM; - goto done; - } - /* * We could bump this to allow for full rcvegrcnt + rcvtidcnt, * but then it no longer nicely fits power of two, and since From c9795bd708e37ed4154e838a1f4576192eeeacca Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 25 Oct 2012 14:36:51 +0000 Subject: [PATCH 08/10] RDMA/amsol1100: Fix missing break Signed-off-by: Alan Cox Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_ae.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 32d34e88d5cf..706cf97cbe8f 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -311,6 +311,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) if (cq->ibcq.event_handler) cq->ibcq.event_handler(&ib_event, cq->ibcq.cq_context); + break; } default: From 08ff32352d6ff7083533dc1c25618d42f92ec28e Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 21 Oct 2012 14:59:24 +0000 Subject: [PATCH 09/10] mlx4: 64-byte CQE/EQE support ConnectX-3 devices can use either 64- or 32-byte completion queue entries (CQEs) and event queue entries (EQEs). Using 64-byte EQEs/CQEs performs better because each entry is aligned to a complete cacheline. This patch queries the HCA's capabilities, and if it supports 64-byte CQEs and EQES the driver will configure the HW to work in 64-byte mode. The 32-byte vs 64-byte mode is global per HCA and not per CQ or EQ. Since this mode is global, userspace (libmlx4) must be updated to work with the configured CQE size, and guests using SR-IOV virtual functions need to know both EQE and CQE size. In case one of the 64-byte CQE/EQE capabilities is activated, the patch makes sure that older guest drivers that use the QUERY_DEV_FUNC command (e.g as done in mlx4_core of Linux 3.3..3.6) will notice that they need an update to be able to work with the PPF. This is done by changing the returned pf_context_behaviour not to be zero any more. In case none of these capabilities is activated that value remains zero and older guest drivers can run OK. The SRIOV related flow is as follows 1. the PPF does the detection of the new capabilities using QUERY_DEV_CAP command. 2. the PPF activates the new capabilities using INIT_HCA. 3. the VF detects if the PPF activated the capabilities using QUERY_HCA, and if this is the case activates them for itself too. Note that the VF detects that it must be aware to the new PF behaviour using QUERY_FUNC_CAP. Steps 1 and 2 apply also for native mode. User space notification is done through a new field introduced in struct mlx4_ib_ucontext which holds device capabilities for which user space must take action. This changes the binary interface so the ABI towards libmlx4 exposed through uverbs is bumped from 3 to 4 but only when **needed** i.e. only when the driver does use 64-byte CQEs or future device capabilities which must be in sync by user space. This practice allows to work with unmodified libmlx4 on older devices (e.g A0, B0) which don't support 64-byte CQEs. In order to keep existing systems functional when they update to a newer kernel that contains these changes in VF and userspace ABI, a module parameter enable_64b_cqe_eqe must be set to enable 64-byte mode; the default is currently false. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 34 +++++++++++++---- drivers/infiniband/hw/mlx4/main.c | 27 ++++++++++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/infiniband/hw/mlx4/user.h | 14 ++++++- drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 2 +- .../net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 ++- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 5 ++- drivers/net/ethernet/mellanox/mlx4/eq.c | 26 ++++++++----- drivers/net/ethernet/mellanox/mlx4/fw.c | 30 ++++++++++++++- drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 38 ++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 21 ++++++++++ 15 files changed, 176 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index c9eb6a6815ce..ae67df35dd4d 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) { - return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); + return mlx4_buf_offset(&buf->buf, n * buf->entry_size); } static void *get_cqe(struct mlx4_ib_cq *cq, int n) @@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n) static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) { struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); + struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe); - return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; } @@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * { int err; - err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), + err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, PAGE_SIZE * 2, &buf->buf); if (err) goto out; + buf->entry_size = dev->dev->caps.cqe_size; err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, &buf->mtt); if (err) @@ -120,8 +122,7 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &buf->mtt); err_buf: - mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), - &buf->buf); + mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf); out: return err; @@ -129,7 +130,7 @@ out: static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) { - mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); + mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); } static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, @@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont u64 buf_addr, int cqe) { int err; + int cqe_size = dev->dev->caps.cqe_size; - *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), + *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) { struct mlx4_cqe *cqe, *new_cqe; int i; + int cqe_size = cq->buf.entry_size; + int cqe_inc = cqe_size == 64 ? 1 : 0; i = cq->mcq.cons_index; cqe = get_cqe(cq, i & cq->ibcq.cqe); + cqe += cqe_inc; + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, (i + 1) & cq->resize_buf->cqe); - memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size); + new_cqe += cqe_inc; + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + cqe += cqe_inc; } ++cq->mcq.cons_index; } @@ -438,6 +447,7 @@ err_buf: out: mutex_unlock(&cq->resize_mutex); + return err; } @@ -586,6 +596,9 @@ repoll: if (!cqe) return -EAGAIN; + if (cq->buf.entry_size == 64) + cqe++; + ++cq->mcq.cons_index; /* @@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) int nfreed = 0; struct mlx4_cqe *cqe, *dest; u8 owner_bit; + int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0; /* * First we need to find the current producer index, so we @@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + cqe += cqe_inc; + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + dest += cqe_inc; + owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; memcpy(dest, cqe, sizeof *cqe); dest->owner_sr_opcode = owner_bit | diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 718ec6b2bad2..e7d81c0d1ac5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_ucontext *context; + struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) return ERR_PTR(-EAGAIN); - resp.qp_tab_size = dev->dev->caps.num_qps; - resp.bf_reg_size = dev->dev->caps.bf_reg_size; - resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { + resp_v3.qp_tab_size = dev->dev->caps.num_qps; + resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; + resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + } else { + resp.dev_caps = dev->dev->caps.userspace_caps; + resp.qp_tab_size = dev->dev->caps.num_qps; + resp.bf_reg_size = dev->dev->caps.bf_reg_size; + resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + resp.cqe_size = dev->dev->caps.cqe_size; + } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) @@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - err = ib_copy_to_udata(udata, &resp, sizeof resp); + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) + err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); + else + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); kfree(context); @@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; - ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + if (dev->caps.userspace_caps) + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + else + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; + ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e04cbc9a54a5..dcd845bc30f0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -90,6 +90,7 @@ struct mlx4_ib_xrcd { struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; + int entry_size; }; struct mlx4_ib_cq_resize { diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index 13beedeeef9f..07e6769ef43b 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -40,7 +40,9 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MLX4_IB_UVERBS_ABI_VERSION 3 + +#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3 +#define MLX4_IB_UVERBS_ABI_VERSION 4 /* * Make sure that all structs defined in this file remain laid out so @@ -50,12 +52,20 @@ * instead. */ -struct mlx4_ib_alloc_ucontext_resp { +struct mlx4_ib_alloc_ucontext_resp_v3 { __u32 qp_tab_size; __u16 bf_reg_size; __u16 bf_regs_per_page; }; +struct mlx4_ib_alloc_ucontext_resp { + __u32 dev_caps; + __u32 qp_tab_size; + __u16 bf_reg_size; + __u16 bf_regs_per_page; + __u32 cqe_size; +}; + struct mlx4_ib_alloc_pd_resp { __u32 pdn; __u32 reserved; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3d1899ff1076..e791e705f7b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) spin_lock_init(&s_state->lock); } - memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); + memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index aa9c2f6cf3c0..b8d0854a7ad1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, int err; cq->size = entries; - cq->buf_size = cq->size * sizeof(struct mlx4_cqe); + cq->buf_size = cq->size * mdev->dev->caps.cqe_size; cq->ring = ring; cq->is_tx = mode; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index edd9cb8d3e1d..93a325669582 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, goto out; } priv->rx_ring_num = prof->rx_ring_num; + priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->msg_enable = MLX4_EN_MSG_LEVEL; spin_lock_init(&priv->stats_lock); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5aba5ecdf1e2..6fa106f6c0ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct ethhdr *ethh; dma_addr_t dma; u64 s_mac; + int factor = priv->cqe_factor; if (!priv->port_up) return 0; @@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; - cqe = &cq->buf[index]; + cqe = &cq->buf[(index << factor) + factor]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, @@ -709,7 +710,7 @@ next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; - cqe = &cq->buf[index]; + cqe = &cq->buf[(index << factor) + factor]; if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index b35094c590ba..25c157abdd92 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) struct mlx4_cqe *buf = cq->buf; u32 packets = 0; u32 bytes = 0; + int factor = priv->cqe_factor; if (!priv->port_up) return; index = cons_index & size_mask; - cqe = &buf[index]; + cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; /* Process all completed CQEs */ @@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ++cons_index; index = cons_index & size_mask; - cqe = &buf[index]; + cqe = &buf[(index << factor) + factor]; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index b84a88bc44dc..c509a86db610 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not) mb(); } -static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry) +static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor) { - unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE; - return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; + /* (entry & (eq->nent - 1)) gives us a cyclic array */ + unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor); + /* CX3 is capable of extending the EQE from 32 to 64 bytes. + * When this feature is enabled, the first (in the lower addresses) + * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes + * contain the legacy EQE information. + */ + return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE; } -static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq) +static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor) { - struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index); + struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor); return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; } @@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) return; } - memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); + memcpy(s_eqe, eqe, dev->caps.eqe_size - 1); s_eqe->slave_id = slave; /* ensure all information is written before setting the ownersip bit */ wmb(); @@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int i; enum slave_port_gen_event gen_event; - while ((eqe = next_eqe_sw(eq))) { + while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); - npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ + npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); @@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev, struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int err; - int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE; int i; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ + int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 4f30b99324cf..9a9de51ecc91 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [42] = "Multicast VEP steering support", [48] = "Counters support", [59] = "Port management change event support", + [61] = "64 byte EQE support", + [62] = "64 byte CQE support", }; int i; @@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, field = dev->caps.num_ports; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - size = 0; /* no PF behaviour is set for now */ + size = dev->caps.function_caps; /* set PF behaviours */ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); field = 0; /* protected FMR support not available as yet */ @@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + } else { + dev->caps.cqe_size = 32; + } + /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); @@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; int err; + u8 byte_field; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 @@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); } + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS); + if (byte_field & 0x20) /* 64-bytes eqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; + if (byte_field & 0x40) /* 64-bytes cqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; + /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 85abe9c11a22..2c2e7ade2a34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -172,6 +172,7 @@ struct mlx4_init_hca_param { u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; + u64 dev_cap_enabled; }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2aa80afd98d2..4337f685175d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " Not in use with device managed" " flow steering"); +static bool enable_64b_cqe_eqe; +module_param(enable_64b_cqe_eqe, bool, 0444); +MODULE_PARM_DESC(enable_64b_cqe_eqe, + "Enable 64 byte CQEs/EQEs when the the FW supports this"); + #define HCA_GLOBAL_CAP_MASK 0 -#define PF_CONTEXT_BEHAVIOUR_MASK 0 + +#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX core driver v" @@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; + + if (!enable_64b_cqe_eqe) { + if (dev_cap->flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { + mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; + } + } + + if ((dev_cap->flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && + mlx4_is_master(dev)) + dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; + return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) goto err_mem; } + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + } else { + dev->caps.cqe_size = 32; + } + return 0; err_mem: diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9d27e42264e2..73b5c2ac5bd5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -487,6 +487,7 @@ struct mlx4_en_priv { int mac_index; unsigned max_mtu; int base_qpn; + int cqe_factor; struct mlx4_en_rss_map rss_map; __be32 ctrl_flags; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6d1acb04cd17..21821da2abfd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -142,6 +142,8 @@ enum { MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, + MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61, + MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62 }; enum { @@ -151,6 +153,20 @@ enum { MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 }; +enum { + MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0, + MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1 +}; + +enum { + MLX4_USER_DEV_CAP_64B_CQE = 1L << 0 +}; + +enum { + MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0 +}; + + #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { @@ -419,6 +435,11 @@ struct mlx4_caps { u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; u16 sqp_demux; + u32 eqe_size; + u32 cqe_size; + u8 eqe_factor; + u32 userspace_caps; /* userspace must be aware of these */ + u32 function_caps; /* VFs must be aware of these */ }; struct mlx4_buf_list { From 5107c2a3d117de8219a53e622c0f1f1bc3f7d1ae Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 3 Nov 2012 10:58:29 +0000 Subject: [PATCH 10/10] RDMA/cxgb3: use WARN Use WARN rather than printk followed by WARN_ON(1), for conciseness. A simplified version of the semantic patch that makes this transformation is as follows: (http://coccinelle.lip6.fr/) // @@ expression list es; @@ -printk( +WARN(1, es); -WARN_ON(1); // Signed-off-by: Julia Lawall Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index aaf88ef9409c..3e094cd6a0e3 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -128,9 +128,8 @@ static void stop_ep_timer(struct iwch_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (!timer_pending(&ep->timer)) { - printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", + WARN(1, "%s timer stopped when its not running! ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); return; } del_timer_sync(&ep->timer); @@ -1756,9 +1755,8 @@ static void ep_timeout(unsigned long arg) __state_set(&ep->com, ABORTING); break; default: - printk(KERN_ERR "%s unexpected state ep %p state %u\n", + WARN(1, "%s unexpected state ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); abort = 0; } spin_unlock_irqrestore(&ep->com.lock, flags);