2013-07-07 22:25:49 +08:00
|
|
|
/*
|
2015-04-02 22:07:29 +08:00
|
|
|
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
|
2013-07-07 22:25:49 +08:00
|
|
|
*
|
|
|
|
* This software is available to you under a choice of one of two
|
|
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
|
|
* General Public License (GPL) Version 2, available from the file
|
|
|
|
* COPYING in the main directory of this source tree, or the
|
|
|
|
* OpenIB.org BSD license below:
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or
|
|
|
|
* without modification, are permitted provided that the following
|
|
|
|
* conditions are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer.
|
|
|
|
*
|
|
|
|
* - Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials
|
|
|
|
* provided with the distribution.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MLX5_CORE_CQ_H
|
|
|
|
#define MLX5_CORE_CQ_H
|
|
|
|
|
|
|
|
#include <rdma/ib_verbs.h>
|
|
|
|
#include <linux/mlx5/driver.h>
|
2017-10-20 15:23:40 +08:00
|
|
|
#include <linux/refcount.h>
|
2013-07-07 22:25:49 +08:00
|
|
|
|
|
|
|
struct mlx5_core_cq {
|
|
|
|
u32 cqn;
|
|
|
|
int cqe_sz;
|
|
|
|
__be32 *set_ci_db;
|
|
|
|
__be32 *arm_db;
|
2017-01-04 05:55:27 +08:00
|
|
|
struct mlx5_uars_page *uar;
|
2017-10-20 15:23:40 +08:00
|
|
|
refcount_t refcount;
|
2013-07-07 22:25:49 +08:00
|
|
|
struct completion free;
|
|
|
|
unsigned vector;
|
2016-01-17 17:25:47 +08:00
|
|
|
unsigned int irqn;
|
2013-07-07 22:25:49 +08:00
|
|
|
void (*comp) (struct mlx5_core_cq *);
|
|
|
|
void (*event) (struct mlx5_core_cq *, enum mlx5_event);
|
|
|
|
u32 cons_index;
|
|
|
|
unsigned arm_sn;
|
|
|
|
struct mlx5_rsc_debug *dbg;
|
|
|
|
int pid;
|
net/mlx5_core: Use tasklet for user-space CQ completion events
Previously, we've fired all our completion callbacks straight from
our ISR.
Some of those callbacks were lightweight (for example, mlx5 Ethernet
napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over
those events could be so long, that we'll get into a hard lockup by
the system watchdog.
In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive
completion event to a per-EQ list and schedule a tasklet. In the
tasklet context we loop over all the CQs in the list and invoke the
user callback.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-04-17 22:08:40 +08:00
|
|
|
struct {
|
|
|
|
struct list_head list;
|
|
|
|
void (*comp)(struct mlx5_core_cq *);
|
|
|
|
void *priv;
|
|
|
|
} tasklet_ctx;
|
2016-06-17 20:01:38 +08:00
|
|
|
int reset_notify_added;
|
|
|
|
struct list_head reset_notify;
|
net/mlx5: CQ Database per EQ
Before this patch the driver had one CQ database protected via one
spinlock, this spinlock is meant to synchronize between CQ
adding/removing and CQ IRQ interrupt handling.
On a system with large number of CPUs and on a work load that requires
lots of interrupts, this global spinlock becomes a very nasty hotspot
and introduces a contention between the active cores, which will
significantly hurt performance and becomes a bottleneck that prevents
seamless cpu scaling.
To solve this we simply move the CQ database and its spinlock to be per
EQ (IRQ), thus per core.
Tested with:
system: 2 sockets, 14 cores per socket, hyperthreading, 2x14x2=56 cores
netperf command: ./super_netperf 200 -P 0 -t TCP_RR -H <server> -l 30 -- -r 300,300 -o -s 1M,1M -S 1M,1M
WITHOUT THIS PATCH:
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
Average: all 4.32 0.00 36.15 0.09 0.00 34.02 0.00 0.00 0.00 25.41
Samples: 2M of event 'cycles:pp', Event count (approx.): 1554616897271
Overhead Command Shared Object Symbol
+ 14.28% swapper [kernel.vmlinux] [k] intel_idle
+ 12.25% swapper [kernel.vmlinux] [k] queued_spin_lock_slowpath
+ 10.29% netserver [kernel.vmlinux] [k] queued_spin_lock_slowpath
+ 1.32% netserver [kernel.vmlinux] [k] mlx5e_xmit
WITH THIS PATCH:
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
Average: all 4.27 0.00 34.31 0.01 0.00 18.71 0.00 0.00 0.00 42.69
Samples: 2M of event 'cycles:pp', Event count (approx.): 1498132937483
Overhead Command Shared Object Symbol
+ 23.33% swapper [kernel.vmlinux] [k] intel_idle
+ 1.69% netserver [kernel.vmlinux] [k] mlx5e_xmit
Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
2018-01-20 08:13:01 +08:00
|
|
|
struct mlx5_eq *eq;
|
2013-07-07 22:25:49 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01,
|
|
|
|
MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02,
|
|
|
|
MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04,
|
|
|
|
MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05,
|
|
|
|
MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06,
|
|
|
|
MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10,
|
|
|
|
MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11,
|
|
|
|
MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
|
|
|
|
MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13,
|
|
|
|
MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14,
|
|
|
|
MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15,
|
|
|
|
MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
|
|
|
|
MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MLX5_CQE_OWNER_MASK = 1,
|
|
|
|
MLX5_CQE_REQ = 0,
|
|
|
|
MLX5_CQE_RESP_WR_IMM = 1,
|
|
|
|
MLX5_CQE_RESP_SEND = 2,
|
|
|
|
MLX5_CQE_RESP_SEND_IMM = 3,
|
|
|
|
MLX5_CQE_RESP_SEND_INV = 4,
|
2014-01-14 23:45:18 +08:00
|
|
|
MLX5_CQE_RESIZE_CQ = 5,
|
2014-02-23 20:19:12 +08:00
|
|
|
MLX5_CQE_SIG_ERR = 12,
|
2013-07-07 22:25:49 +08:00
|
|
|
MLX5_CQE_REQ_ERR = 13,
|
|
|
|
MLX5_CQE_RESP_ERR = 14,
|
2014-01-14 23:45:18 +08:00
|
|
|
MLX5_CQE_INVALID = 15,
|
2013-07-07 22:25:49 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
2014-01-14 23:45:17 +08:00
|
|
|
MLX5_CQ_MODIFY_PERIOD = 1 << 0,
|
|
|
|
MLX5_CQ_MODIFY_COUNT = 1 << 1,
|
|
|
|
MLX5_CQ_MODIFY_OVERRUN = 1 << 2,
|
2013-07-07 22:25:49 +08:00
|
|
|
};
|
|
|
|
|
2014-01-14 23:45:18 +08:00
|
|
|
enum {
|
|
|
|
MLX5_CQ_OPMOD_RESIZE = 1,
|
|
|
|
MLX5_MODIFY_CQ_MASK_LOG_SIZE = 1 << 0,
|
|
|
|
MLX5_MODIFY_CQ_MASK_PG_OFFSET = 1 << 1,
|
|
|
|
MLX5_MODIFY_CQ_MASK_PG_SIZE = 1 << 2,
|
|
|
|
};
|
|
|
|
|
2013-07-07 22:25:49 +08:00
|
|
|
struct mlx5_cq_modify_params {
|
|
|
|
int type;
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
u32 page_offset;
|
|
|
|
u8 log_cq_size;
|
|
|
|
} resize;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
} moder;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
} mapping;
|
|
|
|
} params;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
CQE_SIZE_64 = 0,
|
|
|
|
CQE_SIZE_128 = 1,
|
2017-10-19 13:25:53 +08:00
|
|
|
CQE_SIZE_128_PAD = 2,
|
2013-07-07 22:25:49 +08:00
|
|
|
};
|
|
|
|
|
2017-11-13 16:51:15 +08:00
|
|
|
#define MLX5_MAX_CQ_PERIOD (BIT(__mlx5_bit_sz(cqc, cq_period)) - 1)
|
|
|
|
#define MLX5_MAX_CQ_COUNT (BIT(__mlx5_bit_sz(cqc, cq_max_count)) - 1)
|
|
|
|
|
2017-10-19 13:25:53 +08:00
|
|
|
static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en)
|
2013-07-07 22:25:49 +08:00
|
|
|
{
|
2017-10-19 13:25:53 +08:00
|
|
|
return padding_128_en ? CQE_SIZE_128_PAD :
|
|
|
|
size == 64 ? CQE_SIZE_64 : CQE_SIZE_128;
|
2013-07-07 22:25:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq)
|
|
|
|
{
|
|
|
|
*cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff);
|
|
|
|
}
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24,
|
|
|
|
MLX5_CQ_DB_REQ_NOT = 0 << 24
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
|
|
|
|
void __iomem *uar_page,
|
2015-04-02 22:07:33 +08:00
|
|
|
u32 cons_index)
|
2013-07-07 22:25:49 +08:00
|
|
|
{
|
|
|
|
__be32 doorbell[2];
|
|
|
|
u32 sn;
|
|
|
|
u32 ci;
|
|
|
|
|
|
|
|
sn = cq->arm_sn & 3;
|
2015-04-02 22:07:33 +08:00
|
|
|
ci = cons_index & 0xffffff;
|
2013-07-07 22:25:49 +08:00
|
|
|
|
|
|
|
*cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci);
|
|
|
|
|
|
|
|
/* Make sure that the doorbell record in host memory is
|
|
|
|
* written before ringing the doorbell via PCI MMIO.
|
|
|
|
*/
|
|
|
|
wmb();
|
|
|
|
|
|
|
|
doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
|
|
|
|
doorbell[1] = cpu_to_be32(cq->cqn);
|
|
|
|
|
2017-01-04 05:55:25 +08:00
|
|
|
mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
|
2013-07-07 22:25:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
|
2016-07-16 07:33:22 +08:00
|
|
|
u32 *in, int inlen);
|
2013-07-07 22:25:49 +08:00
|
|
|
int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
|
|
|
|
int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
|
2016-07-16 07:33:22 +08:00
|
|
|
u32 *out, int outlen);
|
2013-07-07 22:25:49 +08:00
|
|
|
int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
|
2016-07-16 07:33:22 +08:00
|
|
|
u32 *in, int inlen);
|
2015-05-29 03:28:44 +08:00
|
|
|
int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
|
|
|
|
struct mlx5_core_cq *cq, u16 cq_period,
|
|
|
|
u16 cq_max_count);
|
2013-07-07 22:25:49 +08:00
|
|
|
int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
|
|
|
|
void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
|
|
|
|
|
|
|
|
#endif /* MLX5_CORE_CQ_H */
|