2007-05-09 09:00:38 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* This software is available to you under a choice of one of two
|
|
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
|
|
* General Public License (GPL) Version 2, available from the file
|
|
|
|
* COPYING in the main directory of this source tree, or the
|
|
|
|
* OpenIB.org BSD license below:
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or
|
|
|
|
* without modification, are permitted provided that the following
|
|
|
|
* conditions are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer.
|
|
|
|
*
|
|
|
|
* - Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials
|
|
|
|
* provided with the distribution.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MLX4_DEVICE_H
|
|
|
|
#define MLX4_DEVICE_H
|
|
|
|
|
2013-08-02 07:17:48 +08:00
|
|
|
#include <linux/if_ether.h>
|
2007-05-09 09:00:38 +08:00
|
|
|
#include <linux/pci.h>
|
|
|
|
#include <linux/completion.h>
|
|
|
|
#include <linux/radix-tree.h>
|
2012-07-19 06:33:51 +08:00
|
|
|
#include <linux/cpu_rmap.h>
|
2014-08-25 21:06:53 +08:00
|
|
|
#include <linux/crash_dump.h>
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2017-10-20 15:23:37 +08:00
|
|
|
#include <linux/refcount.h>
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2014-12-22 02:46:56 +08:00
|
|
|
#include <linux/timecounter.h>
|
2013-04-23 14:06:49 +08:00
|
|
|
|
net/mlx4_core: Set UAR page size to 4KB regardless of system page size
problem description:
The current code sets UAR page size equal to system page size.
The ConnectX-3 and ConnectX-3 Pro HWs require minimum 128 UAR pages.
The mlx4 kernel drivers are not loaded if there is less than 128 UAR pages.
solution:
Always set UAR page to 4KB. This allows more UAR pages if the OS
has PAGE_SIZE larger than 4KB. For example, PowerPC kernel use 64KB
system page size, with 4MB uar region, there are 4MB/2/64KB = 32
uars (half for uar, half for blueflame). This does not meet minimum 128
UAR pages requirement. With 4KB UAR page, there are 4MB/2/4KB = 512 uars
which meet the minimum requirement.
Note that only codes in mlx4_core that deal with firmware know that uar
page size is 4KB. Codes that deal with usr page in cq and qp context
(mlx4_ib, mlx4_en and part of mlx4_core) still have the same assumption
that uar page size equals to system page size.
Note that with this implementation, on 64KB system page size kernel, there
are 16 uars per system page but only one uars is used. The other 15
uars are ignored because of the above assumption.
Regarding SR-IOV, mlx4_core in hypervisor will set the uar page size
to 4KB and mlx4_core code in virtual OS will obtain the uar page size from
firmware.
Regarding backward compatibility in SR-IOV, if hypervisor has this new code,
the virtual OS must be updated. If hypervisor has old code, and the virtual
OS has this new code, the new code will be backward compatible with the
old code. If the uar size is big enough, this new code in VF continues to
work with 64 KB uar page size (on PowerPc kernel). If the uar size does not
meet 128 uars requirement, this new code not loaded in VF and print the same
error message as the old code in Hypervisor.
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-17 23:24:26 +08:00
|
|
|
#define DEFAULT_UAR_PAGE_SHIFT 12
|
|
|
|
|
2011-03-23 06:37:47 +08:00
|
|
|
#define MAX_MSIX_P_PORT 17
|
|
|
|
#define MAX_MSIX 64
|
|
|
|
#define MIN_MSIX_P_PORT 5
|
2015-05-31 14:30:16 +08:00
|
|
|
#define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \
|
|
|
|
(dev_cap).num_ports * MIN_MSIX_P_PORT)
|
2011-03-23 06:37:47 +08:00
|
|
|
|
2014-07-08 16:25:19 +08:00
|
|
|
#define MLX4_MAX_100M_UNITS_VAL 255 /*
|
|
|
|
* work around: can't set values
|
|
|
|
* greater then this value when
|
|
|
|
* using 100 Mbps units.
|
|
|
|
*/
|
|
|
|
#define MLX4_RATELIMIT_100M_UNITS 3 /* 100 Mbps */
|
|
|
|
#define MLX4_RATELIMIT_1G_UNITS 4 /* 1 Gbps */
|
|
|
|
#define MLX4_RATELIMIT_DEFAULT 0x00ff
|
|
|
|
|
2014-03-12 18:00:37 +08:00
|
|
|
#define MLX4_ROCE_MAX_GIDS 128
|
2014-03-12 18:00:39 +08:00
|
|
|
#define MLX4_ROCE_PF_GIDS 16
|
2014-03-12 18:00:37 +08:00
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
enum {
|
|
|
|
MLX4_FLAG_MSI_X = 1 << 0,
|
2007-06-18 23:15:02 +08:00
|
|
|
MLX4_FLAG_OLD_PORT_CMDS = 1 << 1,
|
2011-12-13 12:10:33 +08:00
|
|
|
MLX4_FLAG_MASTER = 1 << 2,
|
|
|
|
MLX4_FLAG_SLAVE = 1 << 3,
|
|
|
|
MLX4_FLAG_SRIOV = 1 << 4,
|
2013-11-03 16:03:18 +08:00
|
|
|
MLX4_FLAG_OLD_REG_MAC = 1 << 6,
|
2016-09-13 00:16:21 +08:00
|
|
|
MLX4_FLAG_BONDED = 1 << 7,
|
|
|
|
MLX4_FLAG_SECURE_HOST = 1 << 8,
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
2012-08-03 16:40:52 +08:00
|
|
|
enum {
|
|
|
|
MLX4_PORT_CAP_IS_SM = 1 << 1,
|
|
|
|
MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19,
|
|
|
|
};
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
enum {
|
2012-08-03 16:40:42 +08:00
|
|
|
MLX4_MAX_PORTS = 2,
|
2015-07-30 23:33:29 +08:00
|
|
|
MLX4_MAX_PORT_PKEYS = 128,
|
|
|
|
MLX4_MAX_PORT_GIDS = 128
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
mlx4_core: Implement mechanism for reserved Q_Keys
The SR-IOV special QP tunneling mechanism uses proxy special QPs
(instead of the real special QPs) for MADs on guests. These proxy QPs
send their packets to a "tunnel" QP owned by the master. The master
then forwards the MAD (after any required paravirtualization) to the
real special QP, which sends out the MAD.
For security reasons (i.e., to prevent guests from sending MADs to
tunnel QPs belonging to other guests), each proxy-tunnel QP pair is
assigned a unique, reserved, Q_Key. These Q_Keys are available only
for proxy and tunnel QPs -- if the guest tries to use these Q_Keys
with other QPs, it will fail.
This patch introduces a mechanism for reserving a block of 64K Q_Keys
for proxy/tunneling use.
The patch introduces also two new fields into mlx4_dev: base_sqpn and
base_tunnel_sqpn.
In SR-IOV mode, the QP numbers for the "real," proxy, and tunnel sqps
are added to the reserved QPN area (so that they will not change).
There are 8 special QPs per port in the HCA, and each of them is
assigned both a proxy and a tunnel QP, for each VF and for the PF as
well in SR-IOV mode.
The QPNs for these QPs are arranged as follows:
1. The real SQP numbers (8)
2. The proxy SQPs (8 * (max number of VFs + max number of PFs)
3. The tunnel SQPs (8 * (max number of VFs + max number of PFs)
To support these QPs, two new fields are added to struct mlx4_dev:
base_sqp: this is the QP number of the first of the real SQPs
base_tunnel_sqp: this is the qp number of the first qp in the tunnel
sqp region. (On guests, this is the first tunnel
sqp of the 8 which are assigned to that guest).
In addition, in SR-IOV mode, sqp_start is the number of the first
proxy SQP in the proxy SQP region. (In guests, this is the first
proxy SQP of the 8 which are assigned to that guest)
Note that in non-SR-IOV mode, there are no proxies and no tunnels.
In this case, sqp_start is set to sqp_base -- which minimizes code
changes.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:42 +08:00
|
|
|
/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
|
|
|
|
* These qkeys must not be allowed for general use. This is a 64k range,
|
|
|
|
* and to test for violation, we use the mask (protect against future chg).
|
|
|
|
*/
|
|
|
|
#define MLX4_RESERVED_QKEY_BASE (0xFFFF0000)
|
|
|
|
#define MLX4_RESERVED_QKEY_MASK (0xFFFF0000)
|
|
|
|
|
2007-09-18 15:14:18 +08:00
|
|
|
enum {
|
|
|
|
MLX4_BOARD_ID_LEN = 64
|
|
|
|
};
|
|
|
|
|
2011-12-13 12:10:33 +08:00
|
|
|
enum {
|
|
|
|
MLX4_MAX_NUM_PF = 16,
|
2014-11-13 20:45:33 +08:00
|
|
|
MLX4_MAX_NUM_VF = 126,
|
2014-03-20 00:11:50 +08:00
|
|
|
MLX4_MAX_NUM_VF_P_PORT = 64,
|
2015-02-02 21:18:42 +08:00
|
|
|
MLX4_MFUNC_MAX = 128,
|
net/mlx4_core: Fix number of EQs used in ICM initialisation
In SRIOV mode, the number of EQs used when computing the total ICM size
was incorrect.
To fix this, we do the following:
1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA
capabilities. The PPF uses the phys capabilities when it computes things
like ICM size.
The dev_caps structure will then contain the paravirtualized values, making
bookkeeping much easier in SRIOV mode. We add a structure rather than a
single parameter because there will be other fields in the phys_caps.
The first field we add to the mlx4_phys_caps structure is num_phys_eqs.
2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter
passed to the FW is the number of EQs per VF/PF; each function (PF or VF)
has this number of EQs available.
However, the total number of EQs which must be allowed for in the ICM is
(1 << log_num_eqs) * (#VFs + #PFs). Rather than compute this quantity,
we allocate ICM space for 1024 EQs (which is the device maximum
number of EQs, and which is the value we place in the mlx4_phys_caps structure).
For INIT_HCA, however, we use the per-function number of EQs as described
above.
Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-30 17:14:51 +08:00
|
|
|
MLX4_MAX_EQ_NUM = 1024,
|
2011-12-13 12:10:33 +08:00
|
|
|
MLX4_MFUNC_EQ_NUM = 4,
|
|
|
|
MLX4_MFUNC_MAX_EQES = 8,
|
|
|
|
MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1)
|
|
|
|
};
|
|
|
|
|
2017-05-09 06:57:56 +08:00
|
|
|
/* Driver supports 3 different device methods to manage traffic steering:
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
* -device managed - High level API for ib and eth flow steering. FW is
|
|
|
|
* managing flow steering tables.
|
2012-07-05 12:03:44 +08:00
|
|
|
* - B0 steering mode - Common low level API for ib and (if supported) eth.
|
|
|
|
* - A0 steering mode - Limited low level API for eth. In case of IB,
|
|
|
|
* B0 mode is in use.
|
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
MLX4_STEERING_MODE_A0,
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
MLX4_STEERING_MODE_B0,
|
|
|
|
MLX4_STEERING_MODE_DEVICE_MANAGED
|
2012-07-05 12:03:44 +08:00
|
|
|
};
|
|
|
|
|
net/mlx4: Add support for A0 steering
Add the required firmware commands for A0 steering and a way to enable
that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT,
QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used
to configure and query the device.
The different A0 DMFS (steering) modes are:
Static - optimized performance, but flow steering rules are
limited. This mode should be choosed explicitly by the user
in order to be used.
Dynamic - this mode should be explicitly choosed by the user.
In this mode, the FW works in optimized steering mode as long as
it can and afterwards automatically drops to classic (full) DMFS.
Disable - this mode should be explicitly choosed by the user.
The user instructs the system not to use optimized steering, even if
the FW supports Dynamic A0 DMFS (and thus will be able to use optimized
steering in Default A0 DMFS mode).
Default - this mode is implicitly choosed. In this mode, if the FW
supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll
work at Disable A0 DMFS mode.
Under SRIOV configuration, when the A0 steering mode is enabled,
older guest VF drivers who aren't using the RX QP allocation flag
(MLX4_RESERVE_A0_QP) will get a QP from the general range and
fail when attempting to register a steering rule. To avoid that,
the PF context behaviour is changed once on A0 static mode, to
require support for the allocation flag in VF drivers too.
In order to enable A0 steering, we use log_num_mgm_entry_size param.
If the value of the parameter is not positive, we treat the absolute
value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this
bit field enables static A0 steering.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:58:00 +08:00
|
|
|
enum {
|
|
|
|
MLX4_STEERING_DMFS_A0_DEFAULT,
|
|
|
|
MLX4_STEERING_DMFS_A0_DYNAMIC,
|
|
|
|
MLX4_STEERING_DMFS_A0_STATIC,
|
|
|
|
MLX4_STEERING_DMFS_A0_DISABLE,
|
|
|
|
MLX4_STEERING_DMFS_A0_NOT_SUPPORTED
|
|
|
|
};
|
|
|
|
|
2012-07-05 12:03:44 +08:00
|
|
|
static inline const char *mlx4_steering_mode_str(int steering_mode)
|
|
|
|
{
|
|
|
|
switch (steering_mode) {
|
|
|
|
case MLX4_STEERING_MODE_A0:
|
|
|
|
return "A0 steering";
|
|
|
|
|
|
|
|
case MLX4_STEERING_MODE_B0:
|
|
|
|
return "B0 steering";
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
|
|
|
|
case MLX4_STEERING_MODE_DEVICE_MANAGED:
|
|
|
|
return "Device managed flow steering";
|
|
|
|
|
2012-07-05 12:03:44 +08:00
|
|
|
default:
|
|
|
|
return "Unrecognize steering mode";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-23 22:09:43 +08:00
|
|
|
enum {
|
|
|
|
MLX4_TUNNEL_OFFLOAD_MODE_NONE,
|
|
|
|
MLX4_TUNNEL_OFFLOAD_MODE_VXLAN
|
|
|
|
};
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
enum {
|
2011-06-15 22:41:42 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_RC = 1LL << 0,
|
|
|
|
MLX4_DEV_CAP_FLAG_UC = 1LL << 1,
|
|
|
|
MLX4_DEV_CAP_FLAG_UD = 1LL << 2,
|
2011-06-03 00:01:33 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_XRC = 1LL << 3,
|
2011-06-15 22:41:42 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_SRQ = 1LL << 6,
|
|
|
|
MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1LL << 7,
|
|
|
|
MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8,
|
|
|
|
MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9,
|
|
|
|
MLX4_DEV_CAP_FLAG_DPDP = 1LL << 12,
|
|
|
|
MLX4_DEV_CAP_FLAG_BLH = 1LL << 15,
|
|
|
|
MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1LL << 16,
|
|
|
|
MLX4_DEV_CAP_FLAG_APM = 1LL << 17,
|
|
|
|
MLX4_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
|
|
|
|
MLX4_DEV_CAP_FLAG_RAW_MCAST = 1LL << 19,
|
|
|
|
MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1LL << 20,
|
|
|
|
MLX4_DEV_CAP_FLAG_UD_MCAST = 1LL << 21,
|
2011-07-08 03:19:29 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_IBOE = 1LL << 30,
|
|
|
|
MLX4_DEV_CAP_FLAG_UC_LOOPBACK = 1LL << 32,
|
2011-10-18 09:50:42 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_FCS_KEEP = 1LL << 34,
|
2011-11-27 03:55:15 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_WOL_PORT1 = 1LL << 37,
|
|
|
|
MLX4_DEV_CAP_FLAG_WOL_PORT2 = 1LL << 38,
|
2011-07-08 03:19:29 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40,
|
|
|
|
MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41,
|
2011-06-15 22:47:14 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42,
|
2011-12-19 12:00:26 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
|
2015-04-02 21:31:06 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_RSS_IP_FRAG = 1LL << 52,
|
2013-04-07 11:44:07 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_SET_ETH_SCHED = 1LL << 53,
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
|
|
|
|
MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
|
2012-10-21 22:59:24 +08:00
|
|
|
MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
|
|
|
|
MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
2012-04-29 22:04:25 +08:00
|
|
|
enum {
|
|
|
|
MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0,
|
|
|
|
MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1,
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2,
|
2013-01-31 07:07:10 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3,
|
2013-10-15 22:55:22 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 4,
|
2013-04-25 13:22:28 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_TS = 1LL << 5,
|
2013-04-25 13:22:29 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6,
|
2013-06-28 00:05:21 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7,
|
2013-11-07 21:25:14 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8,
|
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) BPF debugger and asm tool by Daniel Borkmann.
2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann.
3) Correct reciprocal_divide and update users, from Hannes Frederic
Sowa and Daniel Borkmann.
4) Currently we only have a "set" operation for the hw timestamp socket
ioctl, add a "get" operation to match. From Ben Hutchings.
5) Add better trace events for debugging driver datapath problems, also
from Ben Hutchings.
6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we
have a small send and a previous packet is already in the qdisc or
device queue, defer until TX completion or we get more data.
7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko.
8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel
Borkmann.
9) Share IP header compression code between Bluetooth and IEEE802154
layers, from Jukka Rissanen.
10) Fix ipv6 router reachability probing, from Jiri Benc.
11) Allow packets to be captured on macvtap devices, from Vlad Yasevich.
12) Support tunneling in GRO layer, from Jerry Chu.
13) Allow bonding to be configured fully using netlink, from Scott
Feldman.
14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can
already get the TCI. From Atzm Watanabe.
15) New "Heavy Hitter" qdisc, from Terry Lam.
16) Significantly improve the IPSEC support in pktgen, from Fan Du.
17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom
Herbert.
18) Add Proportional Integral Enhanced packet scheduler, from Vijay
Subramanian.
19) Allow openvswitch to mmap'd netlink, from Thomas Graf.
20) Key TCP metrics blobs also by source address, not just destination
address. From Christoph Paasch.
21) Support 10G in generic phylib. From Andy Fleming.
22) Try to short-circuit GRO flow compares using device provided RX
hash, if provided. From Tom Herbert.
The wireless and netfilter folks have been busy little bees too.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits)
net/cxgb4: Fix referencing freed adapter
ipv6: reallocate addrconf router for ipv6 address when lo device up
fib_frontend: fix possible NULL pointer dereference
rtnetlink: remove IFLA_BOND_SLAVE definition
rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info
qlcnic: update version to 5.3.55
qlcnic: Enhance logic to calculate msix vectors.
qlcnic: Refactor interrupt coalescing code for all adapters.
qlcnic: Update poll controller code path
qlcnic: Interrupt code cleanup
qlcnic: Enhance Tx timeout debugging.
qlcnic: Use bool for rx_mac_learn.
bonding: fix u64 division
rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC
sfc: Use the correct maximum TX DMA ring size for SFC9100
Add Shradha Shah as the sfc driver maintainer.
net/vxlan: Share RX skb de-marking and checksum checks with ovs
tulip: cleanup by using ARRAY_SIZE()
ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called
net/cxgb4: Don't retrieve stats during recovery
...
2014-01-26 03:17:34 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9,
|
|
|
|
MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10,
|
2014-06-01 16:53:50 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11,
|
2014-09-18 16:50:59 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 12,
|
2014-10-27 17:37:37 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13,
|
2014-10-27 17:37:38 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14,
|
2014-11-02 22:26:17 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15,
|
2014-11-13 20:45:32 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16,
|
2014-11-13 20:45:33 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17,
|
net/mlx4: Add support for A0 steering
Add the required firmware commands for A0 steering and a way to enable
that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT,
QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used
to configure and query the device.
The different A0 DMFS (steering) modes are:
Static - optimized performance, but flow steering rules are
limited. This mode should be choosed explicitly by the user
in order to be used.
Dynamic - this mode should be explicitly choosed by the user.
In this mode, the FW works in optimized steering mode as long as
it can and afterwards automatically drops to classic (full) DMFS.
Disable - this mode should be explicitly choosed by the user.
The user instructs the system not to use optimized steering, even if
the FW supports Dynamic A0 DMFS (and thus will be able to use optimized
steering in Default A0 DMFS mode).
Default - this mode is implicitly choosed. In this mode, if the FW
supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll
work at Disable A0 DMFS mode.
Under SRIOV configuration, when the A0 steering mode is enabled,
older guest VF drivers who aren't using the RX QP allocation flag
(MLX4_RESERVE_A0_QP) will get a QP from the general range and
fail when attempting to register a steering rule. To avoid that,
the PF context behaviour is changed once on A0 static mode, to
require support for the allocation flag in VF drivers too.
In order to enable A0 steering, we use log_num_mgm_entry_size param.
If the value of the parameter is not positive, we treat the absolute
value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this
bit field enables static A0 steering.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:58:00 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18,
|
2015-01-27 21:57:59 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19,
|
2015-02-03 22:48:32 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
|
2015-03-06 02:16:12 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21,
|
|
|
|
MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22,
|
2015-03-30 22:45:25 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23,
|
2015-04-02 21:31:13 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24,
|
|
|
|
MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25,
|
2015-04-02 21:31:17 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26,
|
2015-04-02 21:31:20 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27,
|
2015-04-02 21:31:22 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28,
|
2015-07-27 19:46:31 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29,
|
|
|
|
MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30,
|
2015-10-15 19:44:38 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
|
|
|
|
MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
|
2016-01-14 23:50:32 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
|
2016-02-19 00:31:06 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34,
|
2016-07-20 01:54:56 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35,
|
2016-09-22 17:11:13 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36,
|
2016-10-10 08:04:33 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37,
|
2017-08-28 21:38:23 +08:00
|
|
|
MLX4_DEV_CAP_FLAG2_USER_MAC_EN = 1ULL << 38,
|
2012-04-29 22:04:25 +08:00
|
|
|
};
|
|
|
|
|
net/mlx4: Change QP allocation scheme
When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.
The current Ethernet driver code reserves a Tx QP range with 256b alignment.
This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.
This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.
The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:
1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
and request "BF enabled QPs" if BF is supported for the function
2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0] - number of QPs
b. param1[31-24] - flags controlling QPs reservation
Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.
Bits 24-30 of the flags are currently reserved.
When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.
In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:54 +08:00
|
|
|
enum {
|
net/mlx4: Add A0 hybrid steering
A0 hybrid steering is a form of high performance flow steering.
By using this mode, mlx4 cards use a fast limited table based steering,
in order to enable fast steering of unicast packets to a QP.
In order to implement A0 hybrid steering we allocate resources
from different zones:
(1) General range
(2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region.
When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP,
we try hard to allocate the QP from range (2). Otherwise, we try hard not
to allocate from this range. However, when the system is pushed to its
limits and one needs every resource, the allocator uses every region it can.
Meaning, when we run out of raw-eth qps, the allocator allocates from the
general range (and the special-A0 area is no longer active). If we run out
of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that
is also exhausted, the allocator will allocate from the general range
(and the A0 region is no longer active).
Note that if a raw-eth qp is allocated from the general range, it attempts
to allocate the range such that bits 6 and 7 (blueflame bits) in the
QP number are not set.
When the feature is used in SRIOV, the VF has to notify the PF what
kind of QP attributes it needs. In order to do that, along with the
"Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According
to the combination of these bits, the PF tries to allocate a suitable QP.
In order to maintain backward compatibility (with older PFs), the PF
notifies which QP attributes it supports via QUERY_FUNC_CAP command.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:57 +08:00
|
|
|
MLX4_QUERY_FUNC_FLAGS_BF_RES_QP = 1LL << 0,
|
|
|
|
MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1
|
net/mlx4: Change QP allocation scheme
When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.
The current Ethernet driver code reserves a Tx QP range with 256b alignment.
This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.
This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.
The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:
1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
and request "BF enabled QPs" if BF is supported for the function
2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0] - number of QPs
b. param1[31-24] - flags controlling QPs reservation
Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.
Bits 24-30 of the flags are currently reserved.
When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.
In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:54 +08:00
|
|
|
};
|
|
|
|
|
2015-01-25 22:59:42 +08:00
|
|
|
enum {
|
|
|
|
MLX4_VF_CAP_FLAG_RESET = 1 << 0
|
|
|
|
};
|
|
|
|
|
net/mlx4: Change QP allocation scheme
When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.
The current Ethernet driver code reserves a Tx QP range with 256b alignment.
This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.
This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.
The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:
1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
and request "BF enabled QPs" if BF is supported for the function
2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0] - number of QPs
b. param1[31-24] - flags controlling QPs reservation
Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.
Bits 24-30 of the flags are currently reserved.
When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.
In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:54 +08:00
|
|
|
/* bit enums for an 8-bit flags field indicating special use
|
|
|
|
* QPs which require special handling in qp_reserve_range.
|
|
|
|
* Currently, this only includes QPs used by the ETH interface,
|
|
|
|
* where we expect to use blueflame. These QPs must not have
|
|
|
|
* bits 6 and 7 set in their qp number.
|
|
|
|
*
|
|
|
|
* This enum may use only bits 0..7.
|
|
|
|
*/
|
|
|
|
enum {
|
net/mlx4: Add A0 hybrid steering
A0 hybrid steering is a form of high performance flow steering.
By using this mode, mlx4 cards use a fast limited table based steering,
in order to enable fast steering of unicast packets to a QP.
In order to implement A0 hybrid steering we allocate resources
from different zones:
(1) General range
(2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region.
When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP,
we try hard to allocate the QP from range (2). Otherwise, we try hard not
to allocate from this range. However, when the system is pushed to its
limits and one needs every resource, the allocator uses every region it can.
Meaning, when we run out of raw-eth qps, the allocator allocates from the
general range (and the special-A0 area is no longer active). If we run out
of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that
is also exhausted, the allocator will allocate from the general range
(and the A0 region is no longer active).
Note that if a raw-eth qp is allocated from the general range, it attempts
to allocate the range such that bits 6 and 7 (blueflame bits) in the
QP number are not set.
When the feature is used in SRIOV, the VF has to notify the PF what
kind of QP attributes it needs. In order to do that, along with the
"Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According
to the combination of these bits, the PF tries to allocate a suitable QP.
In order to maintain backward compatibility (with older PFs), the PF
notifies which QP attributes it supports via QUERY_FUNC_CAP command.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:57 +08:00
|
|
|
MLX4_RESERVE_A0_QP = 1 << 6,
|
net/mlx4: Change QP allocation scheme
When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.
The current Ethernet driver code reserves a Tx QP range with 256b alignment.
This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.
This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.
The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:
1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
and request "BF enabled QPs" if BF is supported for the function
2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0] - number of QPs
b. param1[31-24] - flags controlling QPs reservation
Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.
Bits 24-30 of the flags are currently reserved.
When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.
In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:54 +08:00
|
|
|
MLX4_RESERVE_ETH_BF_QP = 1 << 7,
|
|
|
|
};
|
|
|
|
|
2012-10-21 22:59:24 +08:00
|
|
|
enum {
|
|
|
|
MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
|
2014-09-18 16:50:59 +08:00
|
|
|
MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1,
|
|
|
|
MLX4_DEV_CAP_CQE_STRIDE_ENABLED = 1LL << 2,
|
|
|
|
MLX4_DEV_CAP_EQE_STRIDE_ENABLED = 1LL << 3
|
2012-10-21 22:59:24 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
2014-09-18 16:50:59 +08:00
|
|
|
MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0
|
2012-10-21 22:59:24 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
2014-09-18 16:50:59 +08:00
|
|
|
MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0,
|
net/mlx4: Add support for A0 steering
Add the required firmware commands for A0 steering and a way to enable
that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT,
QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used
to configure and query the device.
The different A0 DMFS (steering) modes are:
Static - optimized performance, but flow steering rules are
limited. This mode should be choosed explicitly by the user
in order to be used.
Dynamic - this mode should be explicitly choosed by the user.
In this mode, the FW works in optimized steering mode as long as
it can and afterwards automatically drops to classic (full) DMFS.
Disable - this mode should be explicitly choosed by the user.
The user instructs the system not to use optimized steering, even if
the FW supports Dynamic A0 DMFS (and thus will be able to use optimized
steering in Default A0 DMFS mode).
Default - this mode is implicitly choosed. In this mode, if the FW
supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll
work at Disable A0 DMFS mode.
Under SRIOV configuration, when the A0 steering mode is enabled,
older guest VF drivers who aren't using the RX QP allocation flag
(MLX4_RESERVE_A0_QP) will get a QP from the general range and
fail when attempting to register a steering rule. To avoid that,
the PF context behaviour is changed once on A0 static mode, to
require support for the allocation flag in VF drivers too.
In order to enable A0 steering, we use log_num_mgm_entry_size param.
If the value of the parameter is not positive, we treat the absolute
value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this
bit field enables static A0 steering.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:58:00 +08:00
|
|
|
MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1,
|
|
|
|
MLX4_FUNC_CAP_DMFS_A0_STATIC = 1L << 2
|
2012-10-21 22:59:24 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2011-10-24 17:02:34 +08:00
|
|
|
#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
|
|
|
|
|
2008-07-23 23:12:26 +08:00
|
|
|
enum {
|
2013-02-07 00:19:14 +08:00
|
|
|
MLX4_BMME_FLAG_WIN_TYPE_2B = 1 << 1,
|
2008-07-23 23:12:26 +08:00
|
|
|
MLX4_BMME_FLAG_LOCAL_INV = 1 << 6,
|
|
|
|
MLX4_BMME_FLAG_REMOTE_INV = 1 << 7,
|
|
|
|
MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
|
|
|
|
MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10,
|
|
|
|
MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11,
|
2016-01-14 23:50:32 +08:00
|
|
|
MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19,
|
2015-02-03 22:48:32 +08:00
|
|
|
MLX4_BMME_FLAG_PORT_REMAP = 1 << 24,
|
2014-09-10 21:41:56 +08:00
|
|
|
MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28,
|
2008-07-23 23:12:26 +08:00
|
|
|
};
|
|
|
|
|
2015-02-03 22:48:32 +08:00
|
|
|
enum {
|
2016-01-14 23:50:32 +08:00
|
|
|
MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP,
|
|
|
|
MLX4_FLAG_ROCE_V1_V2 = MLX4_BMME_FLAG_ROCE_V1_V2
|
2015-02-03 22:48:32 +08:00
|
|
|
};
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
enum mlx4_event {
|
|
|
|
MLX4_EVENT_TYPE_COMP = 0x00,
|
|
|
|
MLX4_EVENT_TYPE_PATH_MIG = 0x01,
|
|
|
|
MLX4_EVENT_TYPE_COMM_EST = 0x02,
|
|
|
|
MLX4_EVENT_TYPE_SQ_DRAINED = 0x03,
|
|
|
|
MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
|
|
|
|
MLX4_EVENT_TYPE_SRQ_LIMIT = 0x14,
|
|
|
|
MLX4_EVENT_TYPE_CQ_ERROR = 0x04,
|
|
|
|
MLX4_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
|
|
|
|
MLX4_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
|
|
|
|
MLX4_EVENT_TYPE_PATH_MIG_FAILED = 0x07,
|
|
|
|
MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
|
|
|
|
MLX4_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
|
|
|
|
MLX4_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
|
|
|
|
MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08,
|
|
|
|
MLX4_EVENT_TYPE_PORT_CHANGE = 0x09,
|
|
|
|
MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
|
|
|
|
MLX4_EVENT_TYPE_ECC_DETECT = 0x0e,
|
2011-12-13 12:10:33 +08:00
|
|
|
MLX4_EVENT_TYPE_CMD = 0x0a,
|
|
|
|
MLX4_EVENT_TYPE_VEP_UPDATE = 0x19,
|
|
|
|
MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18,
|
2013-07-28 23:54:21 +08:00
|
|
|
MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a,
|
2012-03-06 21:50:49 +08:00
|
|
|
MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b,
|
2011-12-13 12:10:33 +08:00
|
|
|
MLX4_EVENT_TYPE_FLR_EVENT = 0x1c,
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d,
|
2015-01-27 21:57:59 +08:00
|
|
|
MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT = 0x3e,
|
2011-12-13 12:10:33 +08:00
|
|
|
MLX4_EVENT_TYPE_NONE = 0xff,
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MLX4_PORT_CHANGE_SUBTYPE_DOWN = 1,
|
|
|
|
MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4
|
|
|
|
};
|
|
|
|
|
2015-01-27 21:57:59 +08:00
|
|
|
enum {
|
|
|
|
MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE = 1,
|
|
|
|
MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE = 2,
|
|
|
|
};
|
|
|
|
|
2012-03-06 21:50:49 +08:00
|
|
|
enum {
|
|
|
|
MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
|
|
|
|
};
|
|
|
|
|
2012-08-03 16:40:48 +08:00
|
|
|
enum slave_port_state {
|
|
|
|
SLAVE_PORT_DOWN = 0,
|
|
|
|
SLAVE_PENDING_UP,
|
|
|
|
SLAVE_PORT_UP,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum slave_port_gen_event {
|
|
|
|
SLAVE_PORT_GEN_EVENT_DOWN = 0,
|
|
|
|
SLAVE_PORT_GEN_EVENT_UP,
|
|
|
|
SLAVE_PORT_GEN_EVENT_NONE,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum slave_port_state_event {
|
|
|
|
MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
|
|
|
|
MLX4_PORT_STATE_DEV_EVENT_PORT_UP,
|
|
|
|
MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
|
|
|
|
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
|
|
|
|
};
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
enum {
|
|
|
|
MLX4_PERM_LOCAL_READ = 1 << 10,
|
|
|
|
MLX4_PERM_LOCAL_WRITE = 1 << 11,
|
|
|
|
MLX4_PERM_REMOTE_READ = 1 << 12,
|
|
|
|
MLX4_PERM_REMOTE_WRITE = 1 << 13,
|
2013-02-07 00:19:14 +08:00
|
|
|
MLX4_PERM_ATOMIC = 1 << 14,
|
|
|
|
MLX4_PERM_BIND_MW = 1 << 15,
|
2014-07-31 16:01:29 +08:00
|
|
|
MLX4_PERM_MASK = 0xFC00
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MLX4_OPCODE_NOP = 0x00,
|
|
|
|
MLX4_OPCODE_SEND_INVAL = 0x01,
|
|
|
|
MLX4_OPCODE_RDMA_WRITE = 0x08,
|
|
|
|
MLX4_OPCODE_RDMA_WRITE_IMM = 0x09,
|
|
|
|
MLX4_OPCODE_SEND = 0x0a,
|
|
|
|
MLX4_OPCODE_SEND_IMM = 0x0b,
|
|
|
|
MLX4_OPCODE_LSO = 0x0e,
|
|
|
|
MLX4_OPCODE_RDMA_READ = 0x10,
|
|
|
|
MLX4_OPCODE_ATOMIC_CS = 0x11,
|
|
|
|
MLX4_OPCODE_ATOMIC_FA = 0x12,
|
2010-04-14 22:23:39 +08:00
|
|
|
MLX4_OPCODE_MASKED_ATOMIC_CS = 0x14,
|
|
|
|
MLX4_OPCODE_MASKED_ATOMIC_FA = 0x15,
|
2007-05-09 09:00:38 +08:00
|
|
|
MLX4_OPCODE_BIND_MW = 0x18,
|
|
|
|
MLX4_OPCODE_FMR = 0x19,
|
|
|
|
MLX4_OPCODE_LOCAL_INVAL = 0x1b,
|
|
|
|
MLX4_OPCODE_CONFIG_CMD = 0x1f,
|
|
|
|
|
|
|
|
MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
|
|
|
|
MLX4_RECV_OPCODE_SEND = 0x01,
|
|
|
|
MLX4_RECV_OPCODE_SEND_IMM = 0x02,
|
|
|
|
MLX4_RECV_OPCODE_SEND_INVAL = 0x03,
|
|
|
|
|
|
|
|
MLX4_CQE_OPCODE_ERROR = 0x1e,
|
|
|
|
MLX4_CQE_OPCODE_RESIZE = 0x16,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MLX4_STAT_RATE_OFFSET = 5
|
|
|
|
};
|
|
|
|
|
2010-12-02 19:44:49 +08:00
|
|
|
enum mlx4_protocol {
|
2011-03-23 06:38:17 +08:00
|
|
|
MLX4_PROT_IB_IPV6 = 0,
|
|
|
|
MLX4_PROT_ETH,
|
|
|
|
MLX4_PROT_IB_IPV4,
|
|
|
|
MLX4_PROT_FCOE
|
2010-12-02 19:44:49 +08:00
|
|
|
};
|
|
|
|
|
2008-09-16 05:25:23 +08:00
|
|
|
enum {
|
|
|
|
MLX4_MTT_FLAG_PRESENT = 1
|
|
|
|
};
|
|
|
|
|
2008-10-23 01:25:29 +08:00
|
|
|
enum mlx4_qp_region {
|
|
|
|
MLX4_QP_REGION_FW = 0,
|
net/mlx4: Add A0 hybrid steering
A0 hybrid steering is a form of high performance flow steering.
By using this mode, mlx4 cards use a fast limited table based steering,
in order to enable fast steering of unicast packets to a QP.
In order to implement A0 hybrid steering we allocate resources
from different zones:
(1) General range
(2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region.
When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP,
we try hard to allocate the QP from range (2). Otherwise, we try hard not
to allocate from this range. However, when the system is pushed to its
limits and one needs every resource, the allocator uses every region it can.
Meaning, when we run out of raw-eth qps, the allocator allocates from the
general range (and the special-A0 area is no longer active). If we run out
of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that
is also exhausted, the allocator will allocate from the general range
(and the A0 region is no longer active).
Note that if a raw-eth qp is allocated from the general range, it attempts
to allocate the range such that bits 6 and 7 (blueflame bits) in the
QP number are not set.
When the feature is used in SRIOV, the VF has to notify the PF what
kind of QP attributes it needs. In order to do that, along with the
"Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According
to the combination of these bits, the PF tries to allocate a suitable QP.
In order to maintain backward compatibility (with older PFs), the PF
notifies which QP attributes it supports via QUERY_FUNC_CAP command.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:57 +08:00
|
|
|
MLX4_QP_REGION_RSS_RAW_ETH,
|
|
|
|
MLX4_QP_REGION_BOTTOM = MLX4_QP_REGION_RSS_RAW_ETH,
|
2008-10-23 01:25:29 +08:00
|
|
|
MLX4_QP_REGION_ETH_ADDR,
|
|
|
|
MLX4_QP_REGION_FC_ADDR,
|
|
|
|
MLX4_QP_REGION_FC_EXCH,
|
|
|
|
MLX4_NUM_QP_REGION
|
|
|
|
};
|
|
|
|
|
2008-10-23 06:38:42 +08:00
|
|
|
enum mlx4_port_type {
|
2011-12-13 12:10:33 +08:00
|
|
|
MLX4_PORT_TYPE_NONE = 0,
|
2009-03-19 10:45:11 +08:00
|
|
|
MLX4_PORT_TYPE_IB = 1,
|
|
|
|
MLX4_PORT_TYPE_ETH = 2,
|
|
|
|
MLX4_PORT_TYPE_AUTO = 3
|
2008-10-23 06:38:42 +08:00
|
|
|
};
|
|
|
|
|
2008-10-23 02:44:46 +08:00
|
|
|
enum mlx4_special_vlan_idx {
|
|
|
|
MLX4_NO_VLAN_IDX = 0,
|
|
|
|
MLX4_VLAN_MISS_IDX,
|
|
|
|
MLX4_VLAN_REGULAR
|
|
|
|
};
|
|
|
|
|
2011-03-23 06:38:17 +08:00
|
|
|
enum mlx4_steer_type {
|
|
|
|
MLX4_MC_STEER = 0,
|
|
|
|
MLX4_UC_STEER,
|
|
|
|
MLX4_NUM_STEERS
|
|
|
|
};
|
|
|
|
|
2017-06-21 14:29:36 +08:00
|
|
|
enum mlx4_resource_usage {
|
|
|
|
MLX4_RES_USAGE_NONE,
|
|
|
|
MLX4_RES_USAGE_DRIVER,
|
|
|
|
MLX4_RES_USAGE_USER_VERBS,
|
|
|
|
};
|
|
|
|
|
2008-10-23 01:25:29 +08:00
|
|
|
enum {
|
|
|
|
MLX4_NUM_FEXCH = 64 * 1024,
|
|
|
|
};
|
|
|
|
|
2010-10-07 22:24:16 +08:00
|
|
|
enum {
|
|
|
|
MLX4_MAX_FAST_REG_PAGES = 511,
|
|
|
|
};
|
|
|
|
|
2015-10-28 19:28:15 +08:00
|
|
|
enum {
|
|
|
|
/*
|
|
|
|
* Max wqe size for rdma read is 512 bytes, so this
|
|
|
|
* limits our max_sge_rd as the wqe needs to fit:
|
|
|
|
* - ctrl segment (16 bytes)
|
|
|
|
* - rdma segment (16 bytes)
|
|
|
|
* - scatter elements (16 bytes each)
|
|
|
|
*/
|
|
|
|
MLX4_MAX_SGE_RD = (512 - 16 - 16) / 16
|
|
|
|
};
|
|
|
|
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
enum {
|
|
|
|
MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14,
|
|
|
|
MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15,
|
|
|
|
MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16,
|
2016-09-13 00:16:21 +08:00
|
|
|
MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP = 0x17,
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Port mgmt change event handling */
|
|
|
|
enum {
|
|
|
|
MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0,
|
|
|
|
MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1,
|
|
|
|
MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2,
|
|
|
|
MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3,
|
|
|
|
MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4,
|
|
|
|
};
|
|
|
|
|
2016-09-13 00:16:21 +08:00
|
|
|
union sl2vl_tbl_to_u64 {
|
|
|
|
u8 sl8[8];
|
|
|
|
u64 sl64;
|
|
|
|
};
|
|
|
|
|
2015-01-25 22:59:38 +08:00
|
|
|
enum {
|
|
|
|
MLX4_DEVICE_STATE_UP = 1 << 0,
|
|
|
|
MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1,
|
|
|
|
};
|
|
|
|
|
2015-01-25 22:59:40 +08:00
|
|
|
enum {
|
|
|
|
MLX4_INTERFACE_STATE_UP = 1 << 0,
|
|
|
|
MLX4_INTERFACE_STATE_DELETION = 1 << 1,
|
2017-03-14 01:29:08 +08:00
|
|
|
MLX4_INTERFACE_STATE_NOWAIT = 1 << 2,
|
2015-01-25 22:59:40 +08:00
|
|
|
};
|
|
|
|
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
|
|
|
|
MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
|
|
|
|
|
2014-10-27 17:37:35 +08:00
|
|
|
enum mlx4_module_id {
|
|
|
|
MLX4_MODULE_ID_SFP = 0x3,
|
|
|
|
MLX4_MODULE_ID_QSFP = 0xC,
|
|
|
|
MLX4_MODULE_ID_QSFP_PLUS = 0xD,
|
|
|
|
MLX4_MODULE_ID_QSFP28 = 0x11,
|
|
|
|
};
|
|
|
|
|
2015-03-18 20:57:34 +08:00
|
|
|
enum { /* rl */
|
|
|
|
MLX4_QP_RATE_LIMIT_NONE = 0,
|
|
|
|
MLX4_QP_RATE_LIMIT_KBS = 1,
|
|
|
|
MLX4_QP_RATE_LIMIT_MBS = 2,
|
|
|
|
MLX4_QP_RATE_LIMIT_GBS = 3
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_rate_limit_caps {
|
|
|
|
u16 num_rates; /* Number of different rates */
|
|
|
|
u8 min_unit;
|
|
|
|
u16 min_val;
|
|
|
|
u8 max_unit;
|
|
|
|
u16 max_val;
|
|
|
|
};
|
|
|
|
|
IB/mlx4: Use multiple WQ blocks to post smaller send WQEs
ConnectX HCA supports shrinking WQEs, so that a single work request
can be made of multiple units of wqe_shift. This way, WRs can differ
in size, and do not have to be a power of 2 in size, saving memory and
speeding up send WR posting. Unfortunately, if we do this then the
wqe_index field in CQEs can't be used to look up the WR ID anymore, so
our implementation does this only if selective signaling is off.
Further, on 32-bit platforms, we can't use vmap() to make the QP
buffer virtually contigious. Thus we have to use constant-sized WRs to
make sure a WR is always fully within a single page-sized chunk.
Finally, we use WRs with the NOP opcode to avoid wrapping around the
queue buffer in the middle of posting a WR, and we set the
NoErrorCompletion bit to avoid getting completions with error for NOP
WRs. However, NEC is only supported starting with firmware 2.2.232,
so we use constant-sized WRs for older firmware. And, since MLX QPs
only support SEND, we use constant-sized WRs in this case.
When stamping during NOP posting, do stamping following setting of the
NOP WQE valid bit.
Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
2008-01-28 16:40:59 +08:00
|
|
|
static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
|
|
|
|
{
|
|
|
|
return (major << 32) | (minor << 16) | subminor;
|
|
|
|
}
|
|
|
|
|
net/mlx4_core: Fix number of EQs used in ICM initialisation
In SRIOV mode, the number of EQs used when computing the total ICM size
was incorrect.
To fix this, we do the following:
1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA
capabilities. The PPF uses the phys capabilities when it computes things
like ICM size.
The dev_caps structure will then contain the paravirtualized values, making
bookkeeping much easier in SRIOV mode. We add a structure rather than a
single parameter because there will be other fields in the phys_caps.
The first field we add to the mlx4_phys_caps structure is num_phys_eqs.
2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter
passed to the FW is the number of EQs per VF/PF; each function (PF or VF)
has this number of EQs available.
However, the total number of EQs which must be allowed for in the ICM is
(1 << log_num_eqs) * (#VFs + #PFs). Rather than compute this quantity,
we allocate ICM space for 1024 EQs (which is the device maximum
number of EQs, and which is the value we place in the mlx4_phys_caps structure).
For INIT_HCA, however, we use the per-function number of EQs as described
above.
Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-30 17:14:51 +08:00
|
|
|
struct mlx4_phys_caps {
|
mlx4: Put physical GID and P_Key table sizes in mlx4_phys_caps struct and paravirtualize them
To allow easy paravirtualization of P_Key and GID table sizes, keep
paravirtualized sizes in mlx4_dev->caps, but save the actual physical
sizes from FW in struct: mlx4_dev->phys_cap.
In addition, in SR-IOV mode, do the following:
1. Reduce reported P_Key table size by 1.
This is done to reserve the highest P_Key index for internal use,
for declaring an invalid P_Key in P_Key paravirtualization.
We require a P_Key index which always contain an invalid P_Key
value for this purpose (i.e., one which cannot be modified by
the subnet manager). The way to do this is to reduce the
P_Key table size reported to the subnet manager by 1, so that
it will not attempt to access the P_Key at index #127.
2. Paravirtualize the GID table size to 1. Thus, each guest sees
only a single GID (at its paravirtualized index 0).
In addition, since we are paravirtualizing the GID table size to 1, we
add paravirtualization of the master GID event here (i.e., we do not
do ib_dispatch_event() for the GUID change event on the master, since
its (only) GUID never changes).
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:44 +08:00
|
|
|
u32 gid_phys_table_len[MLX4_MAX_PORTS + 1];
|
|
|
|
u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1];
|
net/mlx4_core: Fix number of EQs used in ICM initialisation
In SRIOV mode, the number of EQs used when computing the total ICM size
was incorrect.
To fix this, we do the following:
1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA
capabilities. The PPF uses the phys capabilities when it computes things
like ICM size.
The dev_caps structure will then contain the paravirtualized values, making
bookkeeping much easier in SRIOV mode. We add a structure rather than a
single parameter because there will be other fields in the phys_caps.
The first field we add to the mlx4_phys_caps structure is num_phys_eqs.
2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter
passed to the FW is the number of EQs per VF/PF; each function (PF or VF)
has this number of EQs available.
However, the total number of EQs which must be allowed for in the ICM is
(1 << log_num_eqs) * (#VFs + #PFs). Rather than compute this quantity,
we allocate ICM space for 1024 EQs (which is the device maximum
number of EQs, and which is the value we place in the mlx4_phys_caps structure).
For INIT_HCA, however, we use the per-function number of EQs as described
above.
Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-30 17:14:51 +08:00
|
|
|
u32 num_phys_eqs;
|
mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations
Previously, the structure of a guest's proxy QPs followed the
structure of the PPF special qps (qp0 port 1, qp0 port 2, qp1 port 1,
qp1 port 2, ...). The guest then did offset calculations on the
sqp_base qp number that the PPF passed to it in QUERY_FUNC_CAP().
This is now changed so that the guest does no offset calculations
regarding proxy or tunnel QPs to use. This change frees the PPF from
needing to adhere to a specific order in allocating proxy and tunnel
QPs.
Now QUERY_FUNC_CAP provides each port individually with its proxy
qp0, proxy qp1, tunnel qp0, and tunnel qp1 QP numbers, and these are
used directly where required (with no offset calculations).
To accomplish this change, several fields were added to the phys_caps
structure for use by the PPF and by non-SR-IOV mode:
base_sqpn -- in non-sriov mode, this was formerly sqp_start.
base_proxy_sqpn -- the first physical proxy qp number -- used by PPF
base_tunnel_sqpn -- the first physical tunnel qp number -- used by PPF.
The current code in the PPF still adheres to the previous layout of
sqps, proxy-sqps and tunnel-sqps. However, the PPF can change this
layout without affecting VF or (paravirtualized) PF code.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-08-03 16:40:57 +08:00
|
|
|
u32 base_sqpn;
|
|
|
|
u32 base_proxy_sqpn;
|
|
|
|
u32 base_tunnel_sqpn;
|
net/mlx4_core: Fix number of EQs used in ICM initialisation
In SRIOV mode, the number of EQs used when computing the total ICM size
was incorrect.
To fix this, we do the following:
1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA
capabilities. The PPF uses the phys capabilities when it computes things
like ICM size.
The dev_caps structure will then contain the paravirtualized values, making
bookkeeping much easier in SRIOV mode. We add a structure rather than a
single parameter because there will be other fields in the phys_caps.
The first field we add to the mlx4_phys_caps structure is num_phys_eqs.
2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter
passed to the FW is the number of EQs per VF/PF; each function (PF or VF)
has this number of EQs available.
However, the total number of EQs which must be allowed for in the ICM is
(1 << log_num_eqs) * (#VFs + #PFs). Rather than compute this quantity,
we allocate ICM space for 1024 EQs (which is the device maximum
number of EQs, and which is the value we place in the mlx4_phys_caps structure).
For INIT_HCA, however, we use the per-function number of EQs as described
above.
Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-30 17:14:51 +08:00
|
|
|
};
|
|
|
|
|
2017-08-28 21:38:20 +08:00
|
|
|
struct mlx4_spec_qps {
|
|
|
|
u32 qp0_qkey;
|
|
|
|
u32 qp0_proxy;
|
|
|
|
u32 qp0_tunnel;
|
|
|
|
u32 qp1_proxy;
|
|
|
|
u32 qp1_tunnel;
|
|
|
|
};
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
struct mlx4_caps {
|
|
|
|
u64 fw_ver;
|
2011-12-13 12:10:33 +08:00
|
|
|
u32 function;
|
2007-05-09 09:00:38 +08:00
|
|
|
int num_ports;
|
2007-06-18 23:15:02 +08:00
|
|
|
int vl_cap[MLX4_MAX_PORTS + 1];
|
2008-10-23 01:56:48 +08:00
|
|
|
int ib_mtu_cap[MLX4_MAX_PORTS + 1];
|
2008-11-29 13:29:46 +08:00
|
|
|
__be32 ib_port_def_cap[MLX4_MAX_PORTS + 1];
|
2008-10-23 01:56:48 +08:00
|
|
|
u64 def_mac[MLX4_MAX_PORTS + 1];
|
|
|
|
int eth_mtu_cap[MLX4_MAX_PORTS + 1];
|
2007-06-18 23:15:02 +08:00
|
|
|
int gid_table_len[MLX4_MAX_PORTS + 1];
|
|
|
|
int pkey_table_len[MLX4_MAX_PORTS + 1];
|
2010-08-24 11:46:23 +08:00
|
|
|
int trans_type[MLX4_MAX_PORTS + 1];
|
|
|
|
int vendor_oui[MLX4_MAX_PORTS + 1];
|
|
|
|
int wavelength[MLX4_MAX_PORTS + 1];
|
|
|
|
u64 trans_code[MLX4_MAX_PORTS + 1];
|
2007-05-09 09:00:38 +08:00
|
|
|
int local_ca_ack_delay;
|
|
|
|
int num_uars;
|
2011-12-13 12:12:13 +08:00
|
|
|
u32 uar_page_size;
|
2007-05-09 09:00:38 +08:00
|
|
|
int bf_reg_size;
|
|
|
|
int bf_regs_per_page;
|
|
|
|
int max_sq_sg;
|
|
|
|
int max_rq_sg;
|
|
|
|
int num_qps;
|
|
|
|
int max_wqes;
|
|
|
|
int max_sq_desc_sz;
|
|
|
|
int max_rq_desc_sz;
|
|
|
|
int max_qp_init_rdma;
|
|
|
|
int max_qp_dest_rdma;
|
2016-06-21 17:43:59 +08:00
|
|
|
int max_tc_eth;
|
2017-08-28 21:38:20 +08:00
|
|
|
struct mlx4_spec_qps *spec_qps;
|
2007-05-09 09:00:38 +08:00
|
|
|
int num_srqs;
|
|
|
|
int max_srq_wqes;
|
|
|
|
int max_srq_sge;
|
|
|
|
int reserved_srqs;
|
|
|
|
int num_cqs;
|
|
|
|
int max_cqes;
|
|
|
|
int reserved_cqs;
|
2014-11-13 20:45:32 +08:00
|
|
|
int num_sys_eqs;
|
2007-05-09 09:00:38 +08:00
|
|
|
int num_eqs;
|
|
|
|
int reserved_eqs;
|
2008-12-22 23:15:03 +08:00
|
|
|
int num_comp_vectors;
|
2007-05-09 09:00:38 +08:00
|
|
|
int num_mpts;
|
2012-02-10 00:10:06 +08:00
|
|
|
int max_fmr_maps;
|
2011-12-13 12:16:56 +08:00
|
|
|
int num_mtts;
|
2007-05-09 09:00:38 +08:00
|
|
|
int fmr_reserved_mtts;
|
|
|
|
int reserved_mtts;
|
|
|
|
int reserved_mrws;
|
|
|
|
int reserved_uars;
|
|
|
|
int num_mgms;
|
|
|
|
int num_amgms;
|
|
|
|
int reserved_mcgs;
|
|
|
|
int num_qp_per_mgm;
|
2012-07-05 12:03:44 +08:00
|
|
|
int steering_mode;
|
net/mlx4: Add support for A0 steering
Add the required firmware commands for A0 steering and a way to enable
that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT,
QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used
to configure and query the device.
The different A0 DMFS (steering) modes are:
Static - optimized performance, but flow steering rules are
limited. This mode should be choosed explicitly by the user
in order to be used.
Dynamic - this mode should be explicitly choosed by the user.
In this mode, the FW works in optimized steering mode as long as
it can and afterwards automatically drops to classic (full) DMFS.
Disable - this mode should be explicitly choosed by the user.
The user instructs the system not to use optimized steering, even if
the FW supports Dynamic A0 DMFS (and thus will be able to use optimized
steering in Default A0 DMFS mode).
Default - this mode is implicitly choosed. In this mode, if the FW
supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll
work at Disable A0 DMFS mode.
Under SRIOV configuration, when the A0 steering mode is enabled,
older guest VF drivers who aren't using the RX QP allocation flag
(MLX4_RESERVE_A0_QP) will get a QP from the general range and
fail when attempting to register a steering rule. To avoid that,
the PF context behaviour is changed once on A0 static mode, to
require support for the allocation flag in VF drivers too.
In order to enable A0 steering, we use log_num_mgm_entry_size param.
If the value of the parameter is not positive, we treat the absolute
value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this
bit field enables static A0 steering.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:58:00 +08:00
|
|
|
int dmfs_high_steer_mode;
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
int fs_log_max_ucast_qp_range_size;
|
2007-05-09 09:00:38 +08:00
|
|
|
int num_pds;
|
|
|
|
int reserved_pds;
|
2011-06-03 00:01:33 +08:00
|
|
|
int max_xrcds;
|
|
|
|
int reserved_xrcds;
|
2007-05-09 09:00:38 +08:00
|
|
|
int mtt_entry_sz;
|
2007-06-26 20:55:28 +08:00
|
|
|
u32 max_msg_sz;
|
2007-05-09 09:00:38 +08:00
|
|
|
u32 page_size_cap;
|
2011-06-15 22:41:42 +08:00
|
|
|
u64 flags;
|
2012-04-29 22:04:25 +08:00
|
|
|
u64 flags2;
|
2008-07-23 23:12:26 +08:00
|
|
|
u32 bmme_flags;
|
|
|
|
u32 reserved_lkey;
|
2007-05-09 09:00:38 +08:00
|
|
|
u16 stat_rate_support;
|
2007-06-18 23:15:02 +08:00
|
|
|
u8 port_width_cap[MLX4_MAX_PORTS + 1];
|
2008-04-17 12:09:27 +08:00
|
|
|
int max_gso_sz;
|
2012-04-29 22:04:25 +08:00
|
|
|
int max_rss_tbl_sz;
|
2008-10-23 01:25:29 +08:00
|
|
|
int reserved_qps_cnt[MLX4_NUM_QP_REGION];
|
|
|
|
int reserved_qps;
|
|
|
|
int reserved_qps_base[MLX4_NUM_QP_REGION];
|
|
|
|
int log_num_macs;
|
|
|
|
int log_num_vlans;
|
2008-10-23 06:38:42 +08:00
|
|
|
enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
|
|
|
|
u8 supported_type[MLX4_MAX_PORTS + 1];
|
2011-12-19 12:00:34 +08:00
|
|
|
u8 suggested_type[MLX4_MAX_PORTS + 1];
|
|
|
|
u8 default_sense[MLX4_MAX_PORTS + 1];
|
2011-12-13 12:10:41 +08:00
|
|
|
u32 port_mask[MLX4_MAX_PORTS + 1];
|
2009-03-19 10:45:11 +08:00
|
|
|
enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1];
|
2011-06-15 22:47:14 +08:00
|
|
|
u32 max_counters;
|
2012-01-12 01:02:17 +08:00
|
|
|
u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
|
2012-08-03 16:40:40 +08:00
|
|
|
u16 sqp_demux;
|
2012-10-21 22:59:24 +08:00
|
|
|
u32 eqe_size;
|
|
|
|
u32 cqe_size;
|
|
|
|
u8 eqe_factor;
|
|
|
|
u32 userspace_caps; /* userspace must be aware of these */
|
|
|
|
u32 function_caps; /* VFs must be aware of these */
|
2013-04-23 14:06:48 +08:00
|
|
|
u16 hca_core_clock;
|
2013-12-20 03:20:12 +08:00
|
|
|
u64 phys_port_id[MLX4_MAX_PORTS + 1];
|
2013-12-23 22:09:43 +08:00
|
|
|
int tunnel_offload_mode;
|
2014-11-09 19:51:53 +08:00
|
|
|
u8 rx_checksum_flags_port[MLX4_MAX_PORTS + 1];
|
2015-07-27 19:46:31 +08:00
|
|
|
u8 phv_bit[MLX4_MAX_PORTS + 1];
|
net/mlx4: Change QP allocation scheme
When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.
The current Ethernet driver code reserves a Tx QP range with 256b alignment.
This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.
This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.
The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:
1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
and request "BF enabled QPs" if BF is supported for the function
2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0] - number of QPs
b. param1[31-24] - flags controlling QPs reservation
Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.
Bits 24-30 of the flags are currently reserved.
When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.
In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:54 +08:00
|
|
|
u8 alloc_res_qp_mask;
|
net/mlx4: Add support for A0 steering
Add the required firmware commands for A0 steering and a way to enable
that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT,
QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used
to configure and query the device.
The different A0 DMFS (steering) modes are:
Static - optimized performance, but flow steering rules are
limited. This mode should be choosed explicitly by the user
in order to be used.
Dynamic - this mode should be explicitly choosed by the user.
In this mode, the FW works in optimized steering mode as long as
it can and afterwards automatically drops to classic (full) DMFS.
Disable - this mode should be explicitly choosed by the user.
The user instructs the system not to use optimized steering, even if
the FW supports Dynamic A0 DMFS (and thus will be able to use optimized
steering in Default A0 DMFS mode).
Default - this mode is implicitly choosed. In this mode, if the FW
supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll
work at Disable A0 DMFS mode.
Under SRIOV configuration, when the A0 steering mode is enabled,
older guest VF drivers who aren't using the RX QP allocation flag
(MLX4_RESERVE_A0_QP) will get a QP from the general range and
fail when attempting to register a steering rule. To avoid that,
the PF context behaviour is changed once on A0 static mode, to
require support for the allocation flag in VF drivers too.
In order to enable A0 steering, we use log_num_mgm_entry_size param.
If the value of the parameter is not positive, we treat the absolute
value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this
bit field enables static A0 steering.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:58:00 +08:00
|
|
|
u32 dmfs_high_rate_qpn_base;
|
|
|
|
u32 dmfs_high_rate_qpn_range;
|
2015-01-25 22:59:42 +08:00
|
|
|
u32 vf_caps;
|
2017-08-01 21:43:43 +08:00
|
|
|
bool wol_port[MLX4_MAX_PORTS + 1];
|
2015-03-18 20:57:34 +08:00
|
|
|
struct mlx4_rate_limit_caps rl_caps;
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_buf_list {
|
|
|
|
void *buf;
|
|
|
|
dma_addr_t map;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_buf {
|
2008-02-07 13:17:59 +08:00
|
|
|
struct mlx4_buf_list direct;
|
|
|
|
struct mlx4_buf_list *page_list;
|
2007-05-09 09:00:38 +08:00
|
|
|
int nbufs;
|
|
|
|
int npages;
|
|
|
|
int page_shift;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_mtt {
|
2011-12-13 12:16:56 +08:00
|
|
|
u32 offset;
|
2007-05-09 09:00:38 +08:00
|
|
|
int order;
|
|
|
|
int page_shift;
|
|
|
|
};
|
|
|
|
|
2008-04-24 02:55:45 +08:00
|
|
|
enum {
|
|
|
|
MLX4_DB_PER_PAGE = PAGE_SIZE / 4
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_db_pgdir {
|
|
|
|
struct list_head list;
|
|
|
|
DECLARE_BITMAP(order0, MLX4_DB_PER_PAGE);
|
|
|
|
DECLARE_BITMAP(order1, MLX4_DB_PER_PAGE / 2);
|
|
|
|
unsigned long *bits[2];
|
|
|
|
__be32 *db_page;
|
|
|
|
dma_addr_t db_dma;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_ib_user_db_page;
|
|
|
|
|
|
|
|
struct mlx4_db {
|
|
|
|
__be32 *db;
|
|
|
|
union {
|
|
|
|
struct mlx4_db_pgdir *pgdir;
|
|
|
|
struct mlx4_ib_user_db_page *user_page;
|
|
|
|
} u;
|
|
|
|
dma_addr_t dma;
|
|
|
|
int index;
|
|
|
|
int order;
|
|
|
|
};
|
|
|
|
|
2008-04-26 05:27:08 +08:00
|
|
|
struct mlx4_hwq_resources {
|
|
|
|
struct mlx4_db db;
|
|
|
|
struct mlx4_mtt mtt;
|
|
|
|
struct mlx4_buf buf;
|
|
|
|
};
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
struct mlx4_mr {
|
|
|
|
struct mlx4_mtt mtt;
|
|
|
|
u64 iova;
|
|
|
|
u64 size;
|
|
|
|
u32 key;
|
|
|
|
u32 pd;
|
|
|
|
u32 access;
|
|
|
|
int enabled;
|
|
|
|
};
|
|
|
|
|
2013-02-07 00:19:14 +08:00
|
|
|
enum mlx4_mw_type {
|
|
|
|
MLX4_MW_TYPE_1 = 1,
|
|
|
|
MLX4_MW_TYPE_2 = 2,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_mw {
|
|
|
|
u32 key;
|
|
|
|
u32 pd;
|
|
|
|
enum mlx4_mw_type type;
|
|
|
|
int enabled;
|
|
|
|
};
|
|
|
|
|
2007-08-01 17:29:05 +08:00
|
|
|
struct mlx4_fmr {
|
|
|
|
struct mlx4_mr mr;
|
|
|
|
struct mlx4_mpt_entry *mpt;
|
|
|
|
__be64 *mtts;
|
|
|
|
dma_addr_t dma_handle;
|
|
|
|
int max_pages;
|
|
|
|
int max_maps;
|
|
|
|
int maps;
|
|
|
|
u8 page_shift;
|
|
|
|
};
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
struct mlx4_uar {
|
|
|
|
unsigned long pfn;
|
|
|
|
int index;
|
2011-03-23 06:38:41 +08:00
|
|
|
struct list_head bf_list;
|
|
|
|
unsigned free_bf_bmap;
|
|
|
|
void __iomem *map;
|
|
|
|
void __iomem *bf_map;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_bf {
|
2014-10-05 17:35:09 +08:00
|
|
|
unsigned int offset;
|
2011-03-23 06:38:41 +08:00
|
|
|
int buf_size;
|
|
|
|
struct mlx4_uar *uar;
|
|
|
|
void __iomem *reg;
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_cq {
|
|
|
|
void (*comp) (struct mlx4_cq *);
|
|
|
|
void (*event) (struct mlx4_cq *, enum mlx4_event);
|
|
|
|
|
|
|
|
struct mlx4_uar *uar;
|
|
|
|
|
|
|
|
u32 cons_index;
|
|
|
|
|
2014-05-14 17:15:10 +08:00
|
|
|
u16 irq;
|
2007-05-09 09:00:38 +08:00
|
|
|
__be32 *set_ci_db;
|
|
|
|
__be32 *arm_db;
|
|
|
|
int arm_sn;
|
|
|
|
|
|
|
|
int cqn;
|
2008-12-22 23:15:03 +08:00
|
|
|
unsigned vector;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2017-10-20 15:23:37 +08:00
|
|
|
refcount_t refcount;
|
2007-05-09 09:00:38 +08:00
|
|
|
struct completion free;
|
net/mlx4_core: Use tasklet for user-space CQ completion events
Previously, we've fired all our completion callbacks straight from our ISR.
Some of those callbacks were lightweight (for example, mlx4_en's and
IPoIB napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over those
events could be so long, that we'll get into a hard lockup by the system
watchdog.
In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive completion
event to a per-EQ list and schedule a tasklet. In the tasklet context
we loop over all the CQs in the list and invoke the user callback.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:53 +08:00
|
|
|
struct {
|
|
|
|
struct list_head list;
|
|
|
|
void (*comp)(struct mlx4_cq *);
|
|
|
|
void *priv;
|
|
|
|
} tasklet_ctx;
|
2015-02-08 17:49:34 +08:00
|
|
|
int reset_notify_added;
|
|
|
|
struct list_head reset_notify;
|
2017-06-21 14:29:36 +08:00
|
|
|
u8 usage;
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_qp {
|
|
|
|
void (*event) (struct mlx4_qp *, enum mlx4_event);
|
|
|
|
|
|
|
|
int qpn;
|
|
|
|
|
2017-10-20 15:23:38 +08:00
|
|
|
refcount_t refcount;
|
2007-05-09 09:00:38 +08:00
|
|
|
struct completion free;
|
2017-06-21 14:29:36 +08:00
|
|
|
u8 usage;
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_srq {
|
|
|
|
void (*event) (struct mlx4_srq *, enum mlx4_event);
|
|
|
|
|
|
|
|
int srqn;
|
|
|
|
int max;
|
|
|
|
int max_gs;
|
|
|
|
int wqe_shift;
|
|
|
|
|
2017-10-20 15:23:39 +08:00
|
|
|
refcount_t refcount;
|
2007-05-09 09:00:38 +08:00
|
|
|
struct completion free;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_av {
|
|
|
|
__be32 port_pd;
|
|
|
|
u8 reserved1;
|
|
|
|
u8 g_slid;
|
|
|
|
__be16 dlid;
|
|
|
|
u8 reserved2;
|
|
|
|
u8 gid_index;
|
|
|
|
u8 stat_rate;
|
|
|
|
u8 hop_limit;
|
|
|
|
__be32 sl_tclass_flowlabel;
|
|
|
|
u8 dgid[16];
|
|
|
|
};
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
struct mlx4_eth_av {
|
|
|
|
__be32 port_pd;
|
|
|
|
u8 reserved1;
|
|
|
|
u8 smac_idx;
|
|
|
|
u16 reserved2;
|
|
|
|
u8 reserved3;
|
|
|
|
u8 gid_index;
|
|
|
|
u8 stat_rate;
|
|
|
|
u8 hop_limit;
|
|
|
|
__be32 sl_tclass_flowlabel;
|
|
|
|
u8 dgid[16];
|
mlx4: Implement IP based gids support for RoCE/SRIOV
Since there is no connection between the MAC/VLAN and the GID
when using IP-based addressing, the proxy QP1 (running on the
slave) must pass the source-mac, destination-mac, and vlan_id
information separately from the GID. Additionally, the Host
must pass the remote source-mac and vlan_id back to the slave,
This is achieved as follows:
Outgoing MADs:
1. Source MAC: obtained from the CQ completion structure
(struct ib_wc, smac field).
2. Destination MAC: obtained from the tunnel header
3. vlan_id: obtained from the tunnel header.
Incoming MADs
1. The source (i.e., remote) MAC and vlan_id are passed in
the tunnel header to the proxy QP1.
VST mode support:
For outgoing MADs, the vlan_id obtained from the header is
discarded, and the vlan_id specified by the Hypervisor is used
instead.
For incoming MADs, the incoming vlan_id (in the wc) is discarded, and the
"invalid" vlan (0xffff) is substituted when forwarding to the slave.
Signed-off-by: Moni Shoua <monis@mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-12 18:00:41 +08:00
|
|
|
u8 s_mac[6];
|
|
|
|
u8 reserved4[2];
|
2010-10-25 12:08:52 +08:00
|
|
|
__be16 vlan;
|
2013-08-02 07:17:48 +08:00
|
|
|
u8 mac[ETH_ALEN];
|
2010-10-25 12:08:52 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
union mlx4_ext_av {
|
|
|
|
struct mlx4_av ib;
|
|
|
|
struct mlx4_eth_av eth;
|
|
|
|
};
|
|
|
|
|
2015-06-15 22:59:05 +08:00
|
|
|
/* Counters should be saturate once they reach their maximum value */
|
|
|
|
#define ASSIGN_32BIT_COUNTER(counter, value) do { \
|
|
|
|
if ((value) > U32_MAX) \
|
|
|
|
counter = cpu_to_be32(U32_MAX); \
|
|
|
|
else \
|
|
|
|
counter = cpu_to_be32(value); \
|
|
|
|
} while (0)
|
|
|
|
|
2011-06-15 22:47:14 +08:00
|
|
|
struct mlx4_counter {
|
|
|
|
u8 reserved1[3];
|
|
|
|
u8 counter_mode;
|
|
|
|
__be32 num_ifc;
|
|
|
|
u32 reserved2[2];
|
|
|
|
__be64 rx_frames;
|
|
|
|
__be64 rx_bytes;
|
|
|
|
__be64 tx_frames;
|
|
|
|
__be64 tx_bytes;
|
|
|
|
};
|
|
|
|
|
mlx4: Structures and init/teardown for VF resource quotas
This is step #1 for implementing SRIOV resource quotas for VFs.
Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.
Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC,
VLAN, and Counters.
The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).
For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".
The quota (i.e., max) for the VFs and the PF is:
The free-pool amount (50% of the real max) + the guaranteed minimum
For MACs:
Guarantee 2 MACs per VF/PF per port. As a result, since we have only
128 MACs per port, reduce the allowable number of VFs from 64 to 63.
Any remaining MACs are put into a free pool.
For VLANs:
For the PF, the per-port quota is 128 and guarantee is 64
(to allow the PF to register at least a VLAN per VF in VST mode).
For the VFs, the per-port quota is 64 and the guarantee is 0.
We assume that VGT VFs are trusted not to abuse the VLAN resource.
For Counters:
For all functions (PF and VFs), the quota is 128 and the guarantee is 0.
In this patch, we define the needed structures, which are added to the
resource-tracker struct. In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.
As part of the implementation, we introduce a new field in
mlx4_dev: quotas. This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).
The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-03 16:03:23 +08:00
|
|
|
struct mlx4_quotas {
|
|
|
|
int qp;
|
|
|
|
int cq;
|
|
|
|
int srq;
|
|
|
|
int mpt;
|
|
|
|
int mtt;
|
|
|
|
int counter;
|
|
|
|
int xrcd;
|
|
|
|
};
|
|
|
|
|
2014-03-20 00:11:50 +08:00
|
|
|
struct mlx4_vf_dev {
|
|
|
|
u8 min_port;
|
|
|
|
u8 n_ports;
|
|
|
|
};
|
|
|
|
|
2016-04-20 21:01:16 +08:00
|
|
|
enum mlx4_pci_status {
|
|
|
|
MLX4_PCI_STATUS_DISABLED,
|
|
|
|
MLX4_PCI_STATUS_ENABLED,
|
|
|
|
};
|
|
|
|
|
2015-01-25 22:59:35 +08:00
|
|
|
struct mlx4_dev_persistent {
|
2007-05-09 09:00:38 +08:00
|
|
|
struct pci_dev *pdev;
|
2015-01-25 22:59:35 +08:00
|
|
|
struct mlx4_dev *dev;
|
|
|
|
int nvfs[MLX4_MAX_PORTS + 1];
|
|
|
|
int num_vfs;
|
2015-01-25 22:59:36 +08:00
|
|
|
enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
|
|
|
|
enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
|
2015-01-25 22:59:37 +08:00
|
|
|
struct work_struct catas_work;
|
|
|
|
struct workqueue_struct *catas_wq;
|
2015-01-25 22:59:38 +08:00
|
|
|
struct mutex device_state_mutex; /* protect HW state */
|
|
|
|
u8 state;
|
2015-01-25 22:59:40 +08:00
|
|
|
struct mutex interface_state_mutex; /* protect SW state */
|
|
|
|
u8 interface_state;
|
2016-04-20 21:01:16 +08:00
|
|
|
struct mutex pci_status_mutex; /* sync pci state */
|
|
|
|
enum mlx4_pci_status pci_status;
|
2015-01-25 22:59:35 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_dev {
|
|
|
|
struct mlx4_dev_persistent *persist;
|
2007-05-09 09:00:38 +08:00
|
|
|
unsigned long flags;
|
2011-12-13 12:10:33 +08:00
|
|
|
unsigned long num_slaves;
|
2007-05-09 09:00:38 +08:00
|
|
|
struct mlx4_caps caps;
|
net/mlx4_core: Fix number of EQs used in ICM initialisation
In SRIOV mode, the number of EQs used when computing the total ICM size
was incorrect.
To fix this, we do the following:
1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA
capabilities. The PPF uses the phys capabilities when it computes things
like ICM size.
The dev_caps structure will then contain the paravirtualized values, making
bookkeeping much easier in SRIOV mode. We add a structure rather than a
single parameter because there will be other fields in the phys_caps.
The first field we add to the mlx4_phys_caps structure is num_phys_eqs.
2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter
passed to the FW is the number of EQs per VF/PF; each function (PF or VF)
has this number of EQs available.
However, the total number of EQs which must be allowed for in the ICM is
(1 << log_num_eqs) * (#VFs + #PFs). Rather than compute this quantity,
we allocate ICM space for 1024 EQs (which is the device maximum
number of EQs, and which is the value we place in the mlx4_phys_caps structure).
For INIT_HCA, however, we use the per-function number of EQs as described
above.
Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-30 17:14:51 +08:00
|
|
|
struct mlx4_phys_caps phys_caps;
|
mlx4: Structures and init/teardown for VF resource quotas
This is step #1 for implementing SRIOV resource quotas for VFs.
Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.
Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC,
VLAN, and Counters.
The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).
For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".
The quota (i.e., max) for the VFs and the PF is:
The free-pool amount (50% of the real max) + the guaranteed minimum
For MACs:
Guarantee 2 MACs per VF/PF per port. As a result, since we have only
128 MACs per port, reduce the allowable number of VFs from 64 to 63.
Any remaining MACs are put into a free pool.
For VLANs:
For the PF, the per-port quota is 128 and guarantee is 64
(to allow the PF to register at least a VLAN per VF in VST mode).
For the VFs, the per-port quota is 64 and the guarantee is 0.
We assume that VGT VFs are trusted not to abuse the VLAN resource.
For Counters:
For all functions (PF and VFs), the quota is 128 and the guarantee is 0.
In this patch, we define the needed structures, which are added to the
resource-tracker struct. In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.
As part of the implementation, we introduce a new field in
mlx4_dev: quotas. This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).
The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-03 16:03:23 +08:00
|
|
|
struct mlx4_quotas quotas;
|
2007-05-09 09:00:38 +08:00
|
|
|
struct radix_tree_root qp_table_tree;
|
2011-03-23 06:38:07 +08:00
|
|
|
u8 rev_id;
|
net/mlx4_core: Replace VF zero mac with random mac in mlx4_core
By design, when no default MAC addresses are set in the Hypervisor for VFs,
the VFs are passed zero-macs. When such a MAC is received by the VF, it
generates a random MAC address and registers that MAC address
with the Hypervisor.
This random mac generation is currently done in the mlx4_en module.
There is a problem, though, if the mlx4_ib module is loaded by a VF before
the mlx4_en module. In this case, for RoCE, mlx4_ib will see the un-replaced
zero-mac and register that zero-mac as part of QP1 initialization.
Having a zero-mac in the port's MAC table creates problems for a
Baseboard Management Console. The BMC occasionally sends packets with a
zero-mac destination MAC. If there is a zero-mac present in the port's
MAC table, the FW will send such BMC packets to the host driver rather than
to the wire, and BMC will stop working.
To address this problem, we move the replacement of zero-mac addresses
with random-mac addresses to procedure mlx4_slave_cap(), which is part of the
driver startup for VFs, and is before activation of mlx4_ib and mlx4_en.
As a result, zero-mac addresses will never be registered in the port MAC table
by the driver.
In addition, when mlx4_en does initialize the net device, it needs to set
the NET_ADDR_RANDOM flag in the netdev structure if the address was
randomly generated. This is done so that udev on the VM does not create
a new device name after each VF probe (VM boot and such). To accomplish this,
we add a per-port flag in mlx4_dev which gets set whenever mlx4_core replaces
a zero-mac with a randomly-generated mac. This flag is examined when mlx4_en
initializes the net-device.
Fix was suggested by Matan Barak <matanb@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-14 22:43:48 +08:00
|
|
|
u8 port_random_macs;
|
2007-09-18 15:14:18 +08:00
|
|
|
char board_id[MLX4_BOARD_ID_LEN];
|
2013-11-07 18:19:53 +08:00
|
|
|
int numa_node;
|
2012-12-07 01:12:00 +08:00
|
|
|
int oper_log_mgm_entry_size;
|
2012-07-05 12:03:48 +08:00
|
|
|
u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
|
|
|
|
u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
|
2014-03-20 00:11:50 +08:00
|
|
|
struct mlx4_vf_dev *dev_vfs;
|
net/mlx4_core: Set UAR page size to 4KB regardless of system page size
problem description:
The current code sets UAR page size equal to system page size.
The ConnectX-3 and ConnectX-3 Pro HWs require minimum 128 UAR pages.
The mlx4 kernel drivers are not loaded if there is less than 128 UAR pages.
solution:
Always set UAR page to 4KB. This allows more UAR pages if the OS
has PAGE_SIZE larger than 4KB. For example, PowerPC kernel use 64KB
system page size, with 4MB uar region, there are 4MB/2/64KB = 32
uars (half for uar, half for blueflame). This does not meet minimum 128
UAR pages requirement. With 4KB UAR page, there are 4MB/2/4KB = 512 uars
which meet the minimum requirement.
Note that only codes in mlx4_core that deal with firmware know that uar
page size is 4KB. Codes that deal with usr page in cq and qp context
(mlx4_ib, mlx4_en and part of mlx4_core) still have the same assumption
that uar page size equals to system page size.
Note that with this implementation, on 64KB system page size kernel, there
are 16 uars per system page but only one uars is used. The other 15
uars are ignored because of the above assumption.
Regarding SR-IOV, mlx4_core in hypervisor will set the uar page size
to 4KB and mlx4_core code in virtual OS will obtain the uar page size from
firmware.
Regarding backward compatibility in SR-IOV, if hypervisor has this new code,
the virtual OS must be updated. If hypervisor has old code, and the virtual
OS has this new code, the new code will be backward compatible with the
old code. If the uar size is big enough, this new code in VF continues to
work with 64 KB uar page size (on PowerPc kernel). If the uar size does not
meet 128 uars requirement, this new code not loaded in VF and print the same
error message as the old code in Hypervisor.
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-17 23:24:26 +08:00
|
|
|
u8 uar_page_shift;
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
2015-06-11 21:35:26 +08:00
|
|
|
struct mlx4_clock_params {
|
|
|
|
u64 offset;
|
|
|
|
u8 bar;
|
|
|
|
u8 size;
|
|
|
|
};
|
|
|
|
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
struct mlx4_eqe {
|
|
|
|
u8 reserved1;
|
|
|
|
u8 type;
|
|
|
|
u8 reserved2;
|
|
|
|
u8 subtype;
|
|
|
|
union {
|
|
|
|
u32 raw[6];
|
|
|
|
struct {
|
|
|
|
__be32 cqn;
|
|
|
|
} __packed comp;
|
|
|
|
struct {
|
|
|
|
u16 reserved1;
|
|
|
|
__be16 token;
|
|
|
|
u32 reserved2;
|
|
|
|
u8 reserved3[3];
|
|
|
|
u8 status;
|
|
|
|
__be64 out_param;
|
|
|
|
} __packed cmd;
|
|
|
|
struct {
|
|
|
|
__be32 qpn;
|
|
|
|
} __packed qp;
|
|
|
|
struct {
|
|
|
|
__be32 srqn;
|
|
|
|
} __packed srq;
|
|
|
|
struct {
|
|
|
|
__be32 cqn;
|
|
|
|
u32 reserved1;
|
|
|
|
u8 reserved2[3];
|
|
|
|
u8 syndrome;
|
|
|
|
} __packed cq_err;
|
|
|
|
struct {
|
|
|
|
u32 reserved1[2];
|
|
|
|
__be32 port;
|
|
|
|
} __packed port_change;
|
|
|
|
struct {
|
|
|
|
#define COMM_CHANNEL_BIT_ARRAY_SIZE 4
|
|
|
|
u32 reserved;
|
|
|
|
u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
|
|
|
|
} __packed comm_channel_arm;
|
|
|
|
struct {
|
|
|
|
u8 port;
|
|
|
|
u8 reserved[3];
|
|
|
|
__be64 mac;
|
|
|
|
} __packed mac_update;
|
|
|
|
struct {
|
|
|
|
__be32 slave_id;
|
|
|
|
} __packed flr_event;
|
|
|
|
struct {
|
|
|
|
__be16 current_temperature;
|
|
|
|
__be16 warning_threshold;
|
|
|
|
} __packed warming;
|
|
|
|
struct {
|
|
|
|
u8 reserved[3];
|
|
|
|
u8 port;
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
__be16 mstr_sm_lid;
|
|
|
|
__be16 port_lid;
|
|
|
|
__be32 changed_attr;
|
|
|
|
u8 reserved[3];
|
|
|
|
u8 mstr_sm_sl;
|
|
|
|
__be64 gid_prefix;
|
|
|
|
} __packed port_info;
|
|
|
|
struct {
|
|
|
|
__be32 block_ptr;
|
|
|
|
__be32 tbl_entries_mask;
|
|
|
|
} __packed tbl_change_info;
|
2016-09-13 00:16:21 +08:00
|
|
|
struct {
|
|
|
|
u8 sl2vl_table[8];
|
|
|
|
} __packed sl2vl_tbl_change_info;
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
} params;
|
|
|
|
} __packed port_mgmt_change;
|
2015-01-27 21:57:59 +08:00
|
|
|
struct {
|
|
|
|
u8 reserved[3];
|
|
|
|
u8 port;
|
|
|
|
u32 reserved1[5];
|
|
|
|
} __packed bad_cable;
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
} event;
|
|
|
|
u8 slave_id;
|
|
|
|
u8 reserved3[2];
|
|
|
|
u8 owner;
|
|
|
|
} __packed;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
struct mlx4_init_port_param {
|
|
|
|
int set_guid0;
|
|
|
|
int set_node_guid;
|
|
|
|
int set_si_guid;
|
|
|
|
u16 mtu;
|
|
|
|
int port_width_cap;
|
|
|
|
u16 vl_cap;
|
|
|
|
u16 max_gid;
|
|
|
|
u16 max_pkey;
|
|
|
|
u64 guid0;
|
|
|
|
u64 node_guid;
|
|
|
|
u64 si_guid;
|
|
|
|
};
|
|
|
|
|
2014-10-27 17:37:35 +08:00
|
|
|
#define MAD_IFC_DATA_SZ 192
|
|
|
|
/* MAD IFC Mailbox */
|
|
|
|
struct mlx4_mad_ifc {
|
|
|
|
u8 base_version;
|
|
|
|
u8 mgmt_class;
|
|
|
|
u8 class_version;
|
|
|
|
u8 method;
|
|
|
|
__be16 status;
|
|
|
|
__be16 class_specific;
|
|
|
|
__be64 tid;
|
|
|
|
__be16 attr_id;
|
|
|
|
__be16 resv;
|
|
|
|
__be32 attr_mod;
|
|
|
|
__be64 mkey;
|
|
|
|
__be16 dr_slid;
|
|
|
|
__be16 dr_dlid;
|
|
|
|
u8 reserved[28];
|
|
|
|
u8 data[MAD_IFC_DATA_SZ];
|
|
|
|
} __packed;
|
|
|
|
|
2008-10-23 06:38:42 +08:00
|
|
|
#define mlx4_foreach_port(port, dev, type) \
|
|
|
|
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
|
2011-12-13 12:10:41 +08:00
|
|
|
if ((type) == (dev)->caps.port_mask[(port)])
|
2008-10-23 06:38:42 +08:00
|
|
|
|
2011-12-13 12:10:41 +08:00
|
|
|
#define mlx4_foreach_ib_transport_port(port, dev) \
|
2016-01-14 23:50:32 +08:00
|
|
|
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
|
2011-12-13 12:10:41 +08:00
|
|
|
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
|
2017-03-19 17:01:28 +08:00
|
|
|
((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_ETH))
|
2011-12-13 12:10:33 +08:00
|
|
|
|
2012-06-19 16:21:33 +08:00
|
|
|
#define MLX4_INVALID_SLAVE_ID 0xFF
|
2015-06-15 22:58:58 +08:00
|
|
|
#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1)
|
2012-06-19 16:21:33 +08:00
|
|
|
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
void handle_port_mgmt_change_event(struct work_struct *work);
|
|
|
|
|
2012-06-19 16:21:41 +08:00
|
|
|
static inline int mlx4_master_func_num(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
return dev->caps.function;
|
|
|
|
}
|
|
|
|
|
2011-12-13 12:10:33 +08:00
|
|
|
static inline int mlx4_is_master(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
return dev->flags & MLX4_FLAG_MASTER;
|
|
|
|
}
|
|
|
|
|
mlx4: Structures and init/teardown for VF resource quotas
This is step #1 for implementing SRIOV resource quotas for VFs.
Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.
Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC,
VLAN, and Counters.
The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).
For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".
The quota (i.e., max) for the VFs and the PF is:
The free-pool amount (50% of the real max) + the guaranteed minimum
For MACs:
Guarantee 2 MACs per VF/PF per port. As a result, since we have only
128 MACs per port, reduce the allowable number of VFs from 64 to 63.
Any remaining MACs are put into a free pool.
For VLANs:
For the PF, the per-port quota is 128 and guarantee is 64
(to allow the PF to register at least a VLAN per VF in VST mode).
For the VFs, the per-port quota is 64 and the guarantee is 0.
We assume that VGT VFs are trusted not to abuse the VLAN resource.
For Counters:
For all functions (PF and VFs), the quota is 128 and the guarantee is 0.
In this patch, we define the needed structures, which are added to the
resource-tracker struct. In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.
As part of the implementation, we introduce a new field in
mlx4_dev: quotas. This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).
The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-03 16:03:23 +08:00
|
|
|
static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
return dev->phys_caps.base_sqpn + 8 +
|
|
|
|
16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev);
|
|
|
|
}
|
|
|
|
|
2011-12-13 12:10:33 +08:00
|
|
|
static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
|
|
|
|
{
|
mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations
Previously, the structure of a guest's proxy QPs followed the
structure of the PPF special qps (qp0 port 1, qp0 port 2, qp1 port 1,
qp1 port 2, ...). The guest then did offset calculations on the
sqp_base qp number that the PPF passed to it in QUERY_FUNC_CAP().
This is now changed so that the guest does no offset calculations
regarding proxy or tunnel QPs to use. This change frees the PPF from
needing to adhere to a specific order in allocating proxy and tunnel
QPs.
Now QUERY_FUNC_CAP provides each port individually with its proxy
qp0, proxy qp1, tunnel qp0, and tunnel qp1 QP numbers, and these are
used directly where required (with no offset calculations).
To accomplish this change, several fields were added to the phys_caps
structure for use by the PPF and by non-SR-IOV mode:
base_sqpn -- in non-sriov mode, this was formerly sqp_start.
base_proxy_sqpn -- the first physical proxy qp number -- used by PPF
base_tunnel_sqpn -- the first physical tunnel qp number -- used by PPF.
The current code in the PPF still adheres to the previous layout of
sqps, proxy-sqps and tunnel-sqps. However, the PPF can change this
layout without affecting VF or (paravirtualized) PF code.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-08-03 16:40:57 +08:00
|
|
|
return (qpn < dev->phys_caps.base_sqpn + 8 +
|
net/mlx4: Add A0 hybrid steering
A0 hybrid steering is a form of high performance flow steering.
By using this mode, mlx4 cards use a fast limited table based steering,
in order to enable fast steering of unicast packets to a QP.
In order to implement A0 hybrid steering we allocate resources
from different zones:
(1) General range
(2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region.
When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP,
we try hard to allocate the QP from range (2). Otherwise, we try hard not
to allocate from this range. However, when the system is pushed to its
limits and one needs every resource, the allocator uses every region it can.
Meaning, when we run out of raw-eth qps, the allocator allocates from the
general range (and the special-A0 area is no longer active). If we run out
of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that
is also exhausted, the allocator will allocate from the general range
(and the A0 region is no longer active).
Note that if a raw-eth qp is allocated from the general range, it attempts
to allocate the range such that bits 6 and 7 (blueflame bits) in the
QP number are not set.
When the feature is used in SRIOV, the VF has to notify the PF what
kind of QP attributes it needs. In order to do that, along with the
"Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According
to the combination of these bits, the PF tries to allocate a suitable QP.
In order to maintain backward compatibility (with older PFs), the PF
notifies which QP attributes it supports via QUERY_FUNC_CAP command.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:57 +08:00
|
|
|
16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev) &&
|
|
|
|
qpn >= dev->phys_caps.base_sqpn) ||
|
|
|
|
(qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]);
|
2012-08-03 16:40:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
|
|
|
|
{
|
mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations
Previously, the structure of a guest's proxy QPs followed the
structure of the PPF special qps (qp0 port 1, qp0 port 2, qp1 port 1,
qp1 port 2, ...). The guest then did offset calculations on the
sqp_base qp number that the PPF passed to it in QUERY_FUNC_CAP().
This is now changed so that the guest does no offset calculations
regarding proxy or tunnel QPs to use. This change frees the PPF from
needing to adhere to a specific order in allocating proxy and tunnel
QPs.
Now QUERY_FUNC_CAP provides each port individually with its proxy
qp0, proxy qp1, tunnel qp0, and tunnel qp1 QP numbers, and these are
used directly where required (with no offset calculations).
To accomplish this change, several fields were added to the phys_caps
structure for use by the PPF and by non-SR-IOV mode:
base_sqpn -- in non-sriov mode, this was formerly sqp_start.
base_proxy_sqpn -- the first physical proxy qp number -- used by PPF
base_tunnel_sqpn -- the first physical tunnel qp number -- used by PPF.
The current code in the PPF still adheres to the previous layout of
sqps, proxy-sqps and tunnel-sqps. However, the PPF can change this
layout without affecting VF or (paravirtualized) PF code.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-08-03 16:40:57 +08:00
|
|
|
int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8;
|
2012-08-03 16:40:41 +08:00
|
|
|
|
mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations
Previously, the structure of a guest's proxy QPs followed the
structure of the PPF special qps (qp0 port 1, qp0 port 2, qp1 port 1,
qp1 port 2, ...). The guest then did offset calculations on the
sqp_base qp number that the PPF passed to it in QUERY_FUNC_CAP().
This is now changed so that the guest does no offset calculations
regarding proxy or tunnel QPs to use. This change frees the PPF from
needing to adhere to a specific order in allocating proxy and tunnel
QPs.
Now QUERY_FUNC_CAP provides each port individually with its proxy
qp0, proxy qp1, tunnel qp0, and tunnel qp1 QP numbers, and these are
used directly where required (with no offset calculations).
To accomplish this change, several fields were added to the phys_caps
structure for use by the PPF and by non-SR-IOV mode:
base_sqpn -- in non-sriov mode, this was formerly sqp_start.
base_proxy_sqpn -- the first physical proxy qp number -- used by PPF
base_tunnel_sqpn -- the first physical tunnel qp number -- used by PPF.
The current code in the PPF still adheres to the previous layout of
sqps, proxy-sqps and tunnel-sqps. However, the PPF can change this
layout without affecting VF or (paravirtualized) PF code.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-08-03 16:40:57 +08:00
|
|
|
if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8)
|
2012-08-03 16:40:41 +08:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
2011-12-13 12:10:33 +08:00
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
|
2011-12-13 12:10:33 +08:00
|
|
|
static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int mlx4_is_slave(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
return dev->flags & MLX4_FLAG_SLAVE;
|
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
|
2015-04-02 21:31:08 +08:00
|
|
|
static inline int mlx4_is_eth(struct mlx4_dev *dev, int port)
|
|
|
|
{
|
|
|
|
return dev->caps.port_type[port] == MLX4_PORT_TYPE_IB ? 0 : 1;
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
|
2017-05-23 19:38:15 +08:00
|
|
|
struct mlx4_buf *buf);
|
2007-05-09 09:00:38 +08:00
|
|
|
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
|
2008-02-07 13:07:54 +08:00
|
|
|
static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
|
|
|
|
{
|
2016-05-04 19:50:15 +08:00
|
|
|
if (buf->nbufs == 1)
|
2008-02-07 13:17:59 +08:00
|
|
|
return buf->direct.buf + offset;
|
2008-02-07 13:07:54 +08:00
|
|
|
else
|
2008-02-07 13:17:59 +08:00
|
|
|
return buf->page_list[offset >> PAGE_SHIFT].buf +
|
2008-02-07 13:07:54 +08:00
|
|
|
(offset & (PAGE_SIZE - 1));
|
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
|
|
|
|
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
|
2011-06-03 00:01:33 +08:00
|
|
|
int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
|
|
|
|
void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
|
|
|
|
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
|
2013-11-07 18:19:54 +08:00
|
|
|
int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node);
|
2011-03-23 06:38:41 +08:00
|
|
|
void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
|
|
|
|
struct mlx4_mtt *mtt);
|
|
|
|
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
|
|
|
|
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
|
|
|
|
|
|
|
|
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
|
|
|
|
int npages, int page_shift, struct mlx4_mr *mr);
|
2013-02-07 00:19:09 +08:00
|
|
|
int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
|
2007-05-09 09:00:38 +08:00
|
|
|
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
|
2013-02-07 00:19:14 +08:00
|
|
|
int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
|
|
|
|
struct mlx4_mw *mw);
|
|
|
|
void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw);
|
|
|
|
int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
|
2007-05-09 09:00:38 +08:00
|
|
|
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
|
|
|
|
int start_index, int npages, u64 *page_list);
|
|
|
|
int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
|
2017-05-23 19:38:15 +08:00
|
|
|
struct mlx4_buf *buf);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2017-05-23 19:38:15 +08:00
|
|
|
int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
|
2008-04-24 02:55:45 +08:00
|
|
|
void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
|
|
|
|
|
2008-04-26 05:27:08 +08:00
|
|
|
int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
|
2016-05-04 19:50:15 +08:00
|
|
|
int size);
|
2008-04-26 05:27:08 +08:00
|
|
|
void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
|
|
|
|
int size);
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
|
2008-04-30 04:46:50 +08:00
|
|
|
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
|
2013-04-23 14:06:49 +08:00
|
|
|
unsigned vector, int collapsed, int timestamp_en);
|
2007-05-09 09:00:38 +08:00
|
|
|
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
|
net/mlx4: Change QP allocation scheme
When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.
The current Ethernet driver code reserves a Tx QP range with 256b alignment.
This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.
This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.
The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:
1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
and request "BF enabled QPs" if BF is supported for the function
2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0] - number of QPs
b. param1[31-24] - flags controlling QPs reservation
Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.
Bits 24-30 of the flags are currently reserved.
When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.
In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:54 +08:00
|
|
|
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
|
2017-06-21 14:29:36 +08:00
|
|
|
int *base, u8 flags, u8 usage);
|
2008-10-11 03:01:37 +08:00
|
|
|
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
|
|
|
|
|
2017-05-23 19:38:15 +08:00
|
|
|
int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
|
2007-05-09 09:00:38 +08:00
|
|
|
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
|
|
|
|
|
2011-06-03 01:43:26 +08:00
|
|
|
int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
|
|
|
|
struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq);
|
2007-05-09 09:00:38 +08:00
|
|
|
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
|
|
|
|
int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
|
2007-06-21 18:03:11 +08:00
|
|
|
int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2007-06-18 23:15:02 +08:00
|
|
|
int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
|
2007-05-09 09:00:38 +08:00
|
|
|
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
|
|
|
|
|
2011-12-13 12:16:21 +08:00
|
|
|
int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
|
|
|
|
int block_mcast_loopback, enum mlx4_protocol prot);
|
|
|
|
int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
|
|
|
|
enum mlx4_protocol prot);
|
2008-07-15 14:48:48 +08:00
|
|
|
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
u8 port, int block_mcast_loopback,
|
|
|
|
enum mlx4_protocol protocol, u64 *reg_id);
|
2010-12-02 19:44:49 +08:00
|
|
|
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
enum mlx4_protocol protocol, u64 reg_id);
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MLX4_DOMAIN_UVERBS = 0x1000,
|
|
|
|
MLX4_DOMAIN_ETHTOOL = 0x2000,
|
|
|
|
MLX4_DOMAIN_RFS = 0x3000,
|
|
|
|
MLX4_DOMAIN_NIC = 0x5000,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum mlx4_net_trans_rule_id {
|
|
|
|
MLX4_NET_TRANS_RULE_ID_ETH = 0,
|
|
|
|
MLX4_NET_TRANS_RULE_ID_IB,
|
|
|
|
MLX4_NET_TRANS_RULE_ID_IPV6,
|
|
|
|
MLX4_NET_TRANS_RULE_ID_IPV4,
|
|
|
|
MLX4_NET_TRANS_RULE_ID_TCP,
|
|
|
|
MLX4_NET_TRANS_RULE_ID_UDP,
|
2013-12-23 22:09:43 +08:00
|
|
|
MLX4_NET_TRANS_RULE_ID_VXLAN,
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
MLX4_NET_TRANS_RULE_NUM, /* should be last */
|
|
|
|
};
|
|
|
|
|
2012-09-06 06:50:48 +08:00
|
|
|
extern const u16 __sw_id_hw[];
|
|
|
|
|
2012-09-06 06:50:49 +08:00
|
|
|
static inline int map_hw_to_sw_id(u16 header_id)
|
|
|
|
{
|
|
|
|
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) {
|
|
|
|
if (header_id == __sw_id_hw[i])
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
enum mlx4_net_trans_promisc_mode {
|
2013-04-24 21:58:45 +08:00
|
|
|
MLX4_FS_REGULAR = 1,
|
|
|
|
MLX4_FS_ALL_DEFAULT,
|
|
|
|
MLX4_FS_MC_DEFAULT,
|
2016-02-19 00:31:06 +08:00
|
|
|
MLX4_FS_MIRROR_RX_PORT,
|
|
|
|
MLX4_FS_MIRROR_SX_PORT,
|
2013-04-24 21:58:45 +08:00
|
|
|
MLX4_FS_UC_SNIFFER,
|
|
|
|
MLX4_FS_MC_SNIFFER,
|
2013-04-24 21:58:48 +08:00
|
|
|
MLX4_FS_MODE_NUM, /* should be last */
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_spec_eth {
|
2013-08-02 07:17:48 +08:00
|
|
|
u8 dst_mac[ETH_ALEN];
|
|
|
|
u8 dst_mac_msk[ETH_ALEN];
|
|
|
|
u8 src_mac[ETH_ALEN];
|
|
|
|
u8 src_mac_msk[ETH_ALEN];
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
u8 ether_type_enable;
|
|
|
|
__be16 ether_type;
|
|
|
|
__be16 vlan_id_msk;
|
|
|
|
__be16 vlan_id;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_spec_tcp_udp {
|
|
|
|
__be16 dst_port;
|
|
|
|
__be16 dst_port_msk;
|
|
|
|
__be16 src_port;
|
|
|
|
__be16 src_port_msk;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_spec_ipv4 {
|
|
|
|
__be32 dst_ip;
|
|
|
|
__be32 dst_ip_msk;
|
|
|
|
__be32 src_ip;
|
|
|
|
__be32 src_ip_msk;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_spec_ib {
|
2013-04-24 21:58:46 +08:00
|
|
|
__be32 l3_qpn;
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
__be32 qpn_msk;
|
|
|
|
u8 dst_gid[16];
|
|
|
|
u8 dst_gid_msk[16];
|
|
|
|
};
|
|
|
|
|
2013-12-23 22:09:43 +08:00
|
|
|
struct mlx4_spec_vxlan {
|
|
|
|
__be32 vni;
|
|
|
|
__be32 vni_mask;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
struct mlx4_spec_list {
|
|
|
|
struct list_head list;
|
|
|
|
enum mlx4_net_trans_rule_id id;
|
|
|
|
union {
|
|
|
|
struct mlx4_spec_eth eth;
|
|
|
|
struct mlx4_spec_ib ib;
|
|
|
|
struct mlx4_spec_ipv4 ipv4;
|
|
|
|
struct mlx4_spec_tcp_udp tcp_udp;
|
2013-12-23 22:09:43 +08:00
|
|
|
struct mlx4_spec_vxlan vxlan;
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
enum mlx4_net_trans_hw_rule_queue {
|
|
|
|
MLX4_NET_TRANS_Q_FIFO,
|
|
|
|
MLX4_NET_TRANS_Q_LIFO,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_net_trans_rule {
|
|
|
|
struct list_head list;
|
|
|
|
enum mlx4_net_trans_hw_rule_queue queue_mode;
|
|
|
|
bool exclusive;
|
|
|
|
bool allow_loopback;
|
|
|
|
enum mlx4_net_trans_promisc_mode promisc_mode;
|
|
|
|
u8 port;
|
|
|
|
u16 priority;
|
|
|
|
u32 qpn;
|
|
|
|
};
|
|
|
|
|
2013-04-24 21:58:44 +08:00
|
|
|
struct mlx4_net_trans_rule_hw_ctrl {
|
2013-04-24 21:58:47 +08:00
|
|
|
__be16 prio;
|
|
|
|
u8 type;
|
|
|
|
u8 flags;
|
2013-04-24 21:58:44 +08:00
|
|
|
u8 rsvd1;
|
|
|
|
u8 funcid;
|
|
|
|
u8 vep;
|
|
|
|
u8 port;
|
|
|
|
__be32 qpn;
|
|
|
|
__be32 rsvd2;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mlx4_net_trans_rule_hw_ib {
|
|
|
|
u8 size;
|
|
|
|
u8 rsvd1;
|
|
|
|
__be16 id;
|
|
|
|
u32 rsvd2;
|
2013-04-24 21:58:46 +08:00
|
|
|
__be32 l3_qpn;
|
2013-04-24 21:58:44 +08:00
|
|
|
__be32 qpn_mask;
|
|
|
|
u8 dst_gid[16];
|
|
|
|
u8 dst_gid_msk[16];
|
|
|
|
} __packed;
|
|
|
|
|
|
|
|
struct mlx4_net_trans_rule_hw_eth {
|
|
|
|
u8 size;
|
|
|
|
u8 rsvd;
|
|
|
|
__be16 id;
|
|
|
|
u8 rsvd1[6];
|
|
|
|
u8 dst_mac[6];
|
|
|
|
u16 rsvd2;
|
|
|
|
u8 dst_mac_msk[6];
|
|
|
|
u16 rsvd3;
|
|
|
|
u8 src_mac[6];
|
|
|
|
u16 rsvd4;
|
|
|
|
u8 src_mac_msk[6];
|
|
|
|
u8 rsvd5;
|
|
|
|
u8 ether_type_enable;
|
|
|
|
__be16 ether_type;
|
2013-04-24 21:58:46 +08:00
|
|
|
__be16 vlan_tag_msk;
|
|
|
|
__be16 vlan_tag;
|
2013-04-24 21:58:44 +08:00
|
|
|
} __packed;
|
|
|
|
|
|
|
|
struct mlx4_net_trans_rule_hw_tcp_udp {
|
|
|
|
u8 size;
|
|
|
|
u8 rsvd;
|
|
|
|
__be16 id;
|
|
|
|
__be16 rsvd1[3];
|
|
|
|
__be16 dst_port;
|
|
|
|
__be16 rsvd2;
|
|
|
|
__be16 dst_port_msk;
|
|
|
|
__be16 rsvd3;
|
|
|
|
__be16 src_port;
|
|
|
|
__be16 rsvd4;
|
|
|
|
__be16 src_port_msk;
|
|
|
|
} __packed;
|
|
|
|
|
|
|
|
struct mlx4_net_trans_rule_hw_ipv4 {
|
|
|
|
u8 size;
|
|
|
|
u8 rsvd;
|
|
|
|
__be16 id;
|
|
|
|
__be32 rsvd1;
|
|
|
|
__be32 dst_ip;
|
|
|
|
__be32 dst_ip_msk;
|
|
|
|
__be32 src_ip;
|
|
|
|
__be32 src_ip_msk;
|
|
|
|
} __packed;
|
|
|
|
|
2013-12-23 22:09:43 +08:00
|
|
|
struct mlx4_net_trans_rule_hw_vxlan {
|
|
|
|
u8 size;
|
|
|
|
u8 rsvd;
|
|
|
|
__be16 id;
|
|
|
|
__be32 rsvd1;
|
|
|
|
__be32 vni;
|
|
|
|
__be32 vni_mask;
|
|
|
|
} __packed;
|
|
|
|
|
2013-04-24 21:58:44 +08:00
|
|
|
struct _rule_hw {
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
u8 size;
|
|
|
|
u8 rsvd;
|
|
|
|
__be16 id;
|
|
|
|
};
|
|
|
|
struct mlx4_net_trans_rule_hw_eth eth;
|
|
|
|
struct mlx4_net_trans_rule_hw_ib ib;
|
|
|
|
struct mlx4_net_trans_rule_hw_ipv4 ipv4;
|
|
|
|
struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp;
|
2013-12-23 22:09:43 +08:00
|
|
|
struct mlx4_net_trans_rule_hw_vxlan vxlan;
|
2013-04-24 21:58:44 +08:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2013-12-23 22:09:43 +08:00
|
|
|
enum {
|
|
|
|
VXLAN_STEER_BY_OUTER_MAC = 1 << 0,
|
|
|
|
VXLAN_STEER_BY_OUTER_VLAN = 1 << 1,
|
|
|
|
VXLAN_STEER_BY_VSID_VNI = 1 << 2,
|
|
|
|
VXLAN_STEER_BY_INNER_MAC = 1 << 3,
|
|
|
|
VXLAN_STEER_BY_INNER_VLAN = 1 << 4,
|
|
|
|
};
|
|
|
|
|
2016-07-20 01:54:58 +08:00
|
|
|
enum {
|
|
|
|
MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS = 0x2,
|
|
|
|
};
|
2013-12-23 22:09:43 +08:00
|
|
|
|
2012-07-05 12:03:48 +08:00
|
|
|
int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
|
|
|
|
enum mlx4_net_trans_promisc_mode mode);
|
|
|
|
int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
|
|
|
|
enum mlx4_net_trans_promisc_mode mode);
|
2011-03-23 06:38:31 +08:00
|
|
|
int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
|
|
|
|
int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
|
|
|
|
int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
|
|
|
|
int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
|
|
|
|
int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
|
|
|
|
|
2011-12-13 12:16:21 +08:00
|
|
|
int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
|
|
|
|
void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
|
2013-02-07 10:25:22 +08:00
|
|
|
int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
|
|
|
|
int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
|
2012-03-06 12:04:47 +08:00
|
|
|
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
|
|
|
|
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
|
2017-08-28 21:38:23 +08:00
|
|
|
int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac);
|
2017-01-30 00:56:18 +08:00
|
|
|
int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu);
|
2012-03-06 12:04:47 +08:00
|
|
|
int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
|
|
|
|
u8 promisc);
|
2015-04-02 21:31:20 +08:00
|
|
|
int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time);
|
2015-04-02 21:31:22 +08:00
|
|
|
int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port,
|
|
|
|
u8 ignore_fcs_value);
|
2014-03-27 20:02:04 +08:00
|
|
|
int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable);
|
2015-07-27 19:46:31 +08:00
|
|
|
int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val);
|
|
|
|
int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
|
2016-09-22 17:11:13 +08:00
|
|
|
int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
|
|
|
|
bool *vlan_offload_disabled);
|
2016-12-30 00:37:13 +08:00
|
|
|
void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
|
|
|
|
struct _rule_hw *eth_header);
|
IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP. We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message. We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB. Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2013-12-13 00:03:11 +08:00
|
|
|
int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
|
2010-08-26 22:19:22 +08:00
|
|
|
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
|
2008-10-23 02:44:46 +08:00
|
|
|
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
|
2013-11-03 16:03:19 +08:00
|
|
|
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
|
2008-10-23 02:44:46 +08:00
|
|
|
|
2007-08-01 17:29:05 +08:00
|
|
|
int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
|
|
|
|
int npages, u64 iova, u32 *lkey, u32 *rkey);
|
|
|
|
int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
|
|
|
|
int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
|
|
|
|
int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
|
|
|
|
void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
|
|
|
|
u32 *lkey, u32 *rkey);
|
|
|
|
int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
|
|
|
|
int mlx4_SYNC_TPT(struct mlx4_dev *dev);
|
2016-10-27 21:27:20 +08:00
|
|
|
int mlx4_test_interrupt(struct mlx4_dev *dev, int vector);
|
|
|
|
int mlx4_test_async(struct mlx4_dev *dev);
|
2016-07-20 01:54:57 +08:00
|
|
|
int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
|
|
|
|
const u32 offset[], u32 value[],
|
|
|
|
size_t array_len, u8 port);
|
2015-05-31 14:30:16 +08:00
|
|
|
u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port);
|
|
|
|
bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector);
|
|
|
|
struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port);
|
|
|
|
int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector);
|
2011-03-23 06:37:47 +08:00
|
|
|
void mlx4_release_eq(struct mlx4_dev *dev, int vec);
|
2007-08-01 17:29:05 +08:00
|
|
|
|
2015-05-31 14:30:16 +08:00
|
|
|
int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector);
|
2014-06-29 16:54:55 +08:00
|
|
|
int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec);
|
|
|
|
|
2013-12-20 03:20:12 +08:00
|
|
|
int mlx4_get_phys_port_id(struct mlx4_dev *dev);
|
2011-03-23 06:37:59 +08:00
|
|
|
int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
|
|
|
|
int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
|
|
|
|
|
2017-06-21 14:29:36 +08:00
|
|
|
int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage);
|
2011-06-15 22:47:14 +08:00
|
|
|
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
|
2015-06-15 22:59:02 +08:00
|
|
|
int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port);
|
2011-06-15 22:47:14 +08:00
|
|
|
|
2015-03-03 16:54:48 +08:00
|
|
|
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
|
|
|
|
int port);
|
|
|
|
__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
|
2015-03-03 17:23:32 +08:00
|
|
|
void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
int mlx4_flow_attach(struct mlx4_dev *dev,
|
|
|
|
struct mlx4_net_trans_rule *rule, u64 *reg_id);
|
|
|
|
int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
|
2013-04-24 21:58:48 +08:00
|
|
|
int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
|
|
|
|
enum mlx4_net_trans_promisc_mode flow_type);
|
|
|
|
int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev,
|
|
|
|
enum mlx4_net_trans_rule_id id);
|
|
|
|
int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id);
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
|
2014-08-27 21:47:48 +08:00
|
|
|
int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr,
|
|
|
|
int port, int qpn, u16 prio, u64 *reg_id);
|
|
|
|
|
2012-08-03 16:40:43 +08:00
|
|
|
void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
|
|
|
|
int i, int val);
|
|
|
|
|
mlx4_core: Implement mechanism for reserved Q_Keys
The SR-IOV special QP tunneling mechanism uses proxy special QPs
(instead of the real special QPs) for MADs on guests. These proxy QPs
send their packets to a "tunnel" QP owned by the master. The master
then forwards the MAD (after any required paravirtualization) to the
real special QP, which sends out the MAD.
For security reasons (i.e., to prevent guests from sending MADs to
tunnel QPs belonging to other guests), each proxy-tunnel QP pair is
assigned a unique, reserved, Q_Key. These Q_Keys are available only
for proxy and tunnel QPs -- if the guest tries to use these Q_Keys
with other QPs, it will fail.
This patch introduces a mechanism for reserving a block of 64K Q_Keys
for proxy/tunneling use.
The patch introduces also two new fields into mlx4_dev: base_sqpn and
base_tunnel_sqpn.
In SR-IOV mode, the QP numbers for the "real," proxy, and tunnel sqps
are added to the reserved QPN area (so that they will not change).
There are 8 special QPs per port in the HCA, and each of them is
assigned both a proxy and a tunnel QP, for each VF and for the PF as
well in SR-IOV mode.
The QPNs for these QPs are arranged as follows:
1. The real SQP numbers (8)
2. The proxy SQPs (8 * (max number of VFs + max number of PFs)
3. The tunnel SQPs (8 * (max number of VFs + max number of PFs)
To support these QPs, two new fields are added to struct mlx4_dev:
base_sqp: this is the QP number of the first of the real SQPs
base_tunnel_sqp: this is the qp number of the first qp in the tunnel
sqp region. (On guests, this is the first tunnel
sqp of the 8 which are assigned to that guest).
In addition, in SR-IOV mode, sqp_start is the number of the first
proxy SQP in the proxy SQP region. (In guests, this is the first
proxy SQP of the 8 which are assigned to that guest)
Note that in non-SR-IOV mode, there are no proxies and no tunnels.
In this case, sqp_start is set to sqp_base -- which minimizes code
changes.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:42 +08:00
|
|
|
int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey);
|
|
|
|
|
2012-08-03 16:40:48 +08:00
|
|
|
int mlx4_is_slave_active(struct mlx4_dev *dev, int slave);
|
|
|
|
int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port);
|
|
|
|
int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port);
|
|
|
|
int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr);
|
|
|
|
int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change);
|
|
|
|
enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port);
|
|
|
|
int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event);
|
|
|
|
|
2012-08-03 16:40:56 +08:00
|
|
|
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
|
|
|
|
__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
|
2014-03-12 18:00:38 +08:00
|
|
|
|
|
|
|
int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
|
|
|
|
int *slave_id);
|
|
|
|
int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id,
|
|
|
|
u8 *gid);
|
2012-08-03 16:40:48 +08:00
|
|
|
|
2013-11-07 21:25:14 +08:00
|
|
|
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
|
|
|
|
u32 max_range_qpn);
|
|
|
|
|
2016-12-22 03:32:01 +08:00
|
|
|
u64 mlx4_read_clock(struct mlx4_dev *dev);
|
2013-04-23 14:06:49 +08:00
|
|
|
|
2014-03-20 00:11:51 +08:00
|
|
|
struct mlx4_active_ports {
|
|
|
|
DECLARE_BITMAP(ports, MLX4_MAX_PORTS);
|
|
|
|
};
|
|
|
|
/* Returns a bitmap of the physical ports which are assigned to slave */
|
|
|
|
struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave);
|
|
|
|
|
|
|
|
/* Returns the physical port that represents the virtual port of the slave, */
|
|
|
|
/* or a value < 0 in case of an error. If a slave has 2 ports, the identity */
|
|
|
|
/* mapping is returned. */
|
|
|
|
int mlx4_slave_convert_port(struct mlx4_dev *dev, int slave, int port);
|
|
|
|
|
|
|
|
struct mlx4_slaves_pport {
|
|
|
|
DECLARE_BITMAP(slaves, MLX4_MFUNC_MAX);
|
|
|
|
};
|
|
|
|
/* Returns a bitmap of all slaves that are assigned to port. */
|
|
|
|
struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev,
|
|
|
|
int port);
|
|
|
|
|
|
|
|
/* Returns a bitmap of all slaves that are assigned exactly to all the */
|
|
|
|
/* the ports that are set in crit_ports. */
|
|
|
|
struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv(
|
|
|
|
struct mlx4_dev *dev,
|
|
|
|
const struct mlx4_active_ports *crit_ports);
|
|
|
|
|
|
|
|
/* Returns the slave's virtual port that represents the physical port. */
|
|
|
|
int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port);
|
|
|
|
|
2014-03-20 00:11:52 +08:00
|
|
|
int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
|
2014-03-27 20:02:03 +08:00
|
|
|
|
|
|
|
int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
|
2015-02-03 22:48:32 +08:00
|
|
|
int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
|
2016-01-14 23:50:36 +08:00
|
|
|
int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port);
|
2015-02-03 22:48:32 +08:00
|
|
|
int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
|
2014-05-29 21:31:02 +08:00
|
|
|
int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
|
2014-05-29 21:31:04 +08:00
|
|
|
int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
|
|
|
|
int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
|
|
|
|
int enable);
|
2014-07-31 16:01:29 +08:00
|
|
|
int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
|
|
|
|
struct mlx4_mpt_entry ***mpt_entry);
|
|
|
|
int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
|
|
|
|
struct mlx4_mpt_entry **mpt_entry);
|
|
|
|
int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
|
|
|
|
u32 pdn);
|
|
|
|
int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
|
|
|
|
struct mlx4_mpt_entry *mpt_entry,
|
|
|
|
u32 access);
|
|
|
|
void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
|
|
|
|
struct mlx4_mpt_entry **mpt_entry);
|
|
|
|
void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr);
|
|
|
|
int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
|
|
|
|
u64 iova, u64 size, int npages,
|
|
|
|
int page_shift, struct mlx4_mpt_entry *mpt_entry);
|
2014-07-22 20:44:11 +08:00
|
|
|
|
2014-10-27 17:37:35 +08:00
|
|
|
int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
|
|
|
|
u16 offset, u16 size, u8 *data);
|
2016-06-21 17:43:59 +08:00
|
|
|
int mlx4_max_tc(struct mlx4_dev *dev);
|
2014-10-27 17:37:35 +08:00
|
|
|
|
2014-07-22 20:44:11 +08:00
|
|
|
/* Returns true if running in low memory profile (kdump kernel) */
|
|
|
|
static inline bool mlx4_low_memory_profile(void)
|
|
|
|
{
|
2014-08-25 21:06:53 +08:00
|
|
|
return is_kdump_kernel();
|
2014-07-22 20:44:11 +08:00
|
|
|
}
|
|
|
|
|
2014-10-27 17:37:37 +08:00
|
|
|
/* ACCESS REG commands */
|
|
|
|
enum mlx4_access_reg_method {
|
|
|
|
MLX4_ACCESS_REG_QUERY = 0x1,
|
|
|
|
MLX4_ACCESS_REG_WRITE = 0x2,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* ACCESS PTYS Reg command */
|
|
|
|
enum mlx4_ptys_proto {
|
|
|
|
MLX4_PTYS_IB = 1<<0,
|
|
|
|
MLX4_PTYS_EN = 1<<2,
|
|
|
|
};
|
|
|
|
|
2017-01-30 00:56:17 +08:00
|
|
|
enum mlx4_ptys_flags {
|
|
|
|
MLX4_PTYS_AN_DISABLE_CAP = 1 << 5,
|
|
|
|
MLX4_PTYS_AN_DISABLE_ADMIN = 1 << 6,
|
|
|
|
};
|
|
|
|
|
2014-10-27 17:37:37 +08:00
|
|
|
struct mlx4_ptys_reg {
|
2017-01-30 00:56:17 +08:00
|
|
|
u8 flags;
|
2014-10-27 17:37:37 +08:00
|
|
|
u8 local_port;
|
|
|
|
u8 resrvd2;
|
|
|
|
u8 proto_mask;
|
|
|
|
__be32 resrvd3[2];
|
|
|
|
__be32 eth_proto_cap;
|
|
|
|
__be16 ib_width_cap;
|
|
|
|
__be16 ib_speed_cap;
|
|
|
|
__be32 resrvd4;
|
|
|
|
__be32 eth_proto_admin;
|
|
|
|
__be16 ib_width_admin;
|
|
|
|
__be16 ib_speed_admin;
|
|
|
|
__be32 resrvd5;
|
|
|
|
__be32 eth_proto_oper;
|
|
|
|
__be16 ib_width_oper;
|
|
|
|
__be16 ib_speed_oper;
|
|
|
|
__be32 resrvd6;
|
|
|
|
__be32 eth_proto_lp_adv;
|
|
|
|
} __packed;
|
|
|
|
|
|
|
|
int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
|
|
|
|
enum mlx4_access_reg_method method,
|
|
|
|
struct mlx4_ptys_reg *ptys_reg);
|
|
|
|
|
2015-06-11 21:35:26 +08:00
|
|
|
int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
|
|
|
|
struct mlx4_clock_params *params);
|
|
|
|
|
net/mlx4_core: Set UAR page size to 4KB regardless of system page size
problem description:
The current code sets UAR page size equal to system page size.
The ConnectX-3 and ConnectX-3 Pro HWs require minimum 128 UAR pages.
The mlx4 kernel drivers are not loaded if there is less than 128 UAR pages.
solution:
Always set UAR page to 4KB. This allows more UAR pages if the OS
has PAGE_SIZE larger than 4KB. For example, PowerPC kernel use 64KB
system page size, with 4MB uar region, there are 4MB/2/64KB = 32
uars (half for uar, half for blueflame). This does not meet minimum 128
UAR pages requirement. With 4KB UAR page, there are 4MB/2/4KB = 512 uars
which meet the minimum requirement.
Note that only codes in mlx4_core that deal with firmware know that uar
page size is 4KB. Codes that deal with usr page in cq and qp context
(mlx4_ib, mlx4_en and part of mlx4_core) still have the same assumption
that uar page size equals to system page size.
Note that with this implementation, on 64KB system page size kernel, there
are 16 uars per system page but only one uars is used. The other 15
uars are ignored because of the above assumption.
Regarding SR-IOV, mlx4_core in hypervisor will set the uar page size
to 4KB and mlx4_core code in virtual OS will obtain the uar page size from
firmware.
Regarding backward compatibility in SR-IOV, if hypervisor has this new code,
the virtual OS must be updated. If hypervisor has old code, and the virtual
OS has this new code, the new code will be backward compatible with the
old code. If the uar size is big enough, this new code in VF continues to
work with 64 KB uar page size (on PowerPc kernel). If the uar size does not
meet 128 uars requirement, this new code not loaded in VF and print the same
error message as the old code in Hypervisor.
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-17 23:24:26 +08:00
|
|
|
static inline int mlx4_to_hw_uar_index(struct mlx4_dev *dev, int index)
|
|
|
|
{
|
|
|
|
return (index << (PAGE_SHIFT - dev->uar_page_shift));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int mlx4_get_num_reserved_uar(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
/* The first 128 UARs are used for EQ doorbells */
|
|
|
|
return (128 >> (PAGE_SHIFT - dev->uar_page_shift));
|
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
#endif /* MLX4_DEVICE_H */
|