First merge window pull request

This has been a smaller cycle with many of the commits being smallish code
 fixes and improvements across the drivers.
 
 - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and rxe
 
 - Memory window support in hns
 
 - mlx5 user API 'flow mutate/steering' allows accessing the full packet
   mangling and matching machinery from user space
 
 - Support inter-working with verbs API calls in the 'devx' mlx5 user API, and
   provide options to use devx with less privilege
 
 - Modernize the use of syfs and the device interface to use attribute groups
   and cdev properly for uverbs, and clean up some of the core code's device list
   management
 
 - More progress on net namespaces for RDMA devices
 
 - Consolidate driver BAR mmapping support into core code helpers and rework
   how RDMA holds poitners to mm_struct for get_user_pages cases
 
 - First pass to use 'dev_name' instead of ib_device->name
 
 - Device renaming for RDMA devices
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAlvR7dUACgkQOG33FX4g
 mxojiw//a9GU5kq4IZ3LNAEio/3Ql/NHRF0uie5tSzJgipRJA1Ln9zW0Cm1S/ms1
 VCmaSJ3l3q3GC4i3tIlsZSIIkN5qtjv/FsT/i+TZwSJYx9BDpPbzWtG6Mp4PSDj0
 v3xzklFCN5HMOmEcjkNmyZw3VjHOt2Iw2mKjqvGbI9imCPLOYnw+WQaZLmMWMH6p
 GL0HDbAopN5Lv8ireWd8pOhPLVbSb12cWM1crx+yHOS3q8YNWjIXGiZr/QkOPtPr
 cymSXB8yuITJ7gnjbs/GxZHg6rxU0knC/Ck8hE7FqqYYHgytTklOXDE2ef1J2lFe
 1VmotD+nTsCir0mZWSdcRrszEk7tzaZT7n1oWggKvWySDB6qaH0II8vWumJchQnN
 pElIQn/WDgpekIqplamNqXJnKnDXZJpEVA01OHHDN4MNSc+Ad08hQy4FyFzpB6/G
 jv9TnDMfGC6ma9pr1ipOXyCgCa2pHYEUCaYxUqRA0O/4ATVl7/PplqT0rqtJ6hKg
 o/hmaVCawIFOUKD87/bo7Em2HBs3xNwE/c5ggbsQElLYeydrgPrZfrPfjkshv5K3
 eIKDb+HPyis0is1aiF7m/bz1hSIYZp0bQhuKCdzLRjZobwCm5WDPhtuuAWb7vYVw
 GSLCJWyet+bLyZxynNOt67gKm9je9lt8YTr5nilz49KeDytspK0=
 =pacJ
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a smaller cycle with many of the commits being smallish
  code fixes and improvements across the drivers.

   - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and
     rxe

   - Memory window support in hns

   - mlx5 user API 'flow mutate/steering' allows accessing the full
     packet mangling and matching machinery from user space

   - Support inter-working with verbs API calls in the 'devx' mlx5 user
     API, and provide options to use devx with less privilege

   - Modernize the use of syfs and the device interface to use attribute
     groups and cdev properly for uverbs, and clean up some of the core
     code's device list management

   - More progress on net namespaces for RDMA devices

   - Consolidate driver BAR mmapping support into core code helpers and
     rework how RDMA holds poitners to mm_struct for get_user_pages
     cases

   - First pass to use 'dev_name' instead of ib_device->name

   - Device renaming for RDMA devices"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits)
  IB/mlx5: Add support for extended atomic operations
  RDMA/core: Fix comment for hw stats init for port == 0
  RDMA/core: Refactor ib_register_device() function
  RDMA/core: Fix unwinding flow in case of error to register device
  ib_srp: Remove WARN_ON in srp_terminate_io()
  IB/mlx5: Allow scatter to CQE without global signaled WRs
  IB/mlx5: Verify that driver supports user flags
  IB/mlx5: Support scatter to CQE for DC transport type
  RDMA/drivers: Use core provided API for registering device attributes
  RDMA/core: Allow existing drivers to set one sysfs group per device
  IB/rxe: Remove unnecessary enum values
  RDMA/umad: Use kernel API to allocate umad indexes
  RDMA/uverbs: Use kernel API to allocate uverbs indexes
  RDMA/core: Increase total number of RDMA ports across all devices
  IB/mlx4: Add port and TID to MAD debug print
  IB/mlx4: Enable debug print of SMPs
  RDMA/core: Rename ports_parent to ports_kobj
  RDMA/core: Do not expose unsupported counters
  IB/mlx4: Refer to the device kobject instead of ports_parent
  RDMA/nldev: Allow IB device rename through RDMA netlink
  ...
This commit is contained in:
Linus Torvalds 2018-10-26 07:38:19 -07:00
commit da19a102ce
204 changed files with 7633 additions and 5205 deletions

View File

@ -91,6 +91,24 @@ Description:
stacked (e.g: VLAN interfaces) but still have the same MAC
address as their parent device.
What: /sys/class/net/<iface>/dev_port
Date: February 2014
KernelVersion: 3.15
Contact: netdev@vger.kernel.org
Description:
Indicates the port number of this network device, formatted
as a decimal value. Some NICs have multiple independent ports
on the same PCI bus, device and function. This attribute allows
userspace to distinguish the respective interfaces.
Note: some device drivers started to use 'dev_id' for this
purpose since long before 3.15 and have not adopted the new
attribute ever since. To query the port number, some tools look
exclusively at 'dev_port', while others only consult 'dev_id'.
If a network device has multiple client adapter ports as
described in the previous paragraph and does not set this
attribute to its port number, it's a kernel bug.
What: /sys/class/net/<iface>/dormant
Date: March 2006
KernelVersion: 2.6.17

View File

@ -26,6 +26,7 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
select ANON_INODES
depends on MMU
---help---
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace

View File

@ -45,6 +45,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_sa.h>
#include <rdma/ib.h>
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>
@ -61,6 +62,7 @@ struct addr_req {
struct rdma_dev_addr *addr, void *context);
unsigned long timeout;
struct delayed_work work;
bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */
int status;
u32 seq;
};
@ -219,18 +221,54 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
}
EXPORT_SYMBOL(rdma_addr_size_kss);
void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
const struct net_device *dev,
const unsigned char *dst_dev_addr)
/**
* rdma_copy_src_l2_addr - Copy netdevice source addresses
* @dev_addr: Destination address pointer where to copy the addresses
* @dev: Netdevice whose source addresses to copy
*
* rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
* This includes unicast address, broadcast address, device type and
* interface index.
*/
void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
const struct net_device *dev)
{
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->bound_dev_if = dev->ifindex;
}
EXPORT_SYMBOL(rdma_copy_addr);
EXPORT_SYMBOL(rdma_copy_src_l2_addr);
static struct net_device *
rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
{
struct net_device *dev = NULL;
int ret = -EADDRNOTAVAIL;
switch (src_in->sa_family) {
case AF_INET:
dev = __ip_dev_find(net,
((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
false);
if (dev)
ret = 0;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
for_each_netdev_rcu(net, dev) {
if (ipv6_chk_addr(net,
&((const struct sockaddr_in6 *)src_in)->sin6_addr,
dev, 1)) {
ret = 0;
break;
}
}
break;
#endif
}
return ret ? ERR_PTR(ret) : dev;
}
int rdma_translate_ip(const struct sockaddr *addr,
struct rdma_dev_addr *dev_addr)
@ -241,38 +279,17 @@ int rdma_translate_ip(const struct sockaddr *addr,
dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
rdma_copy_addr(dev_addr, dev, NULL);
rdma_copy_src_l2_addr(dev_addr, dev);
dev_put(dev);
return 0;
}
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(dev_addr->net,
((const struct sockaddr_in *)addr)->sin_addr.s_addr);
if (!dev)
return -EADDRNOTAVAIL;
rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
for_each_netdev_rcu(dev_addr->net, dev) {
if (ipv6_chk_addr(dev_addr->net,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
rdma_copy_addr(dev_addr, dev, NULL);
break;
}
}
rcu_read_unlock();
break;
#endif
}
return 0;
rcu_read_lock();
dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
if (!IS_ERR(dev))
rdma_copy_src_l2_addr(dev_addr, dev);
rcu_read_unlock();
return PTR_ERR_OR_ZERO(dev);
}
EXPORT_SYMBOL(rdma_translate_ip);
@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req)
spin_unlock_bh(&lock);
}
static int ib_nl_fetch_ha(const struct dst_entry *dst,
struct rdma_dev_addr *dev_addr,
static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
return -EADDRNOTAVAIL;
/* We fill in what we can, the response will fill the rest */
rdma_copy_addr(dev_addr, dst->dev, NULL);
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
rdma_copy_addr(dev_addr, dst->dev, n->ha);
memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
}
neigh_release(n);
@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
/* Gateway + ARPHRD_INFINIBAND -> IB router */
if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
/* If we have a gateway in IB mode then it must be an IB network */
if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
else
return dst_fetch_ha(dst, dev_addr, daddr);
}
static int addr4_resolve(struct sockaddr_in *src_in,
const struct sockaddr_in *dst_in,
static int addr4_resolve(struct sockaddr *src_sock,
const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct rtable **prt)
{
struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
const struct sockaddr_in *dst_in =
(const struct sockaddr_in *)dst_sock;
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
struct rtable *rt;
@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
if (ret)
return ret;
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
* definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
* type accordingly.
*/
if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV4;
addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
*prt = rt;
@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
}
#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
const struct sockaddr_in6 *dst_in,
static int addr6_resolve(struct sockaddr *src_sock,
const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
const struct sockaddr_in6 *dst_in =
(const struct sockaddr_in6 *)dst_sock;
struct flowi6 fl6;
struct dst_entry *dst;
struct rt6_info *rt;
int ret;
memset(&fl6, 0, sizeof fl6);
@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if (ret < 0)
return ret;
rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&src_in->sin6_addr)) {
src_in->sin6_family = AF_INET6;
if (ipv6_addr_any(&src_in->sin6_addr))
src_in->sin6_addr = fl6.saddr;
}
/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
* definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
* type accordingly.
*/
if (rt->rt6i_flags & RTF_GATEWAY &&
ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV6;
addr->hoplimit = ip6_dst_hoplimit(dst);
@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
return 0;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
const struct sockaddr_in6 *dst_in,
static int addr6_resolve(struct sockaddr *src_sock,
const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
unsigned int ndev_flags,
u32 seq)
{
if (dst->dev->flags & IFF_LOOPBACK) {
int ret;
int ret = 0;
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
MAX_ADDR_LEN);
if (ndev_flags & IFF_LOOPBACK) {
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
} else {
if (!(ndev_flags & IFF_NOARP)) {
/* If the device doesn't do ARP internally */
ret = fetch_ha(dst, addr, dst_in, seq);
}
}
return ret;
}
return ret;
static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in,
const struct dst_entry *dst,
const struct net_device *ndev)
{
int ret = 0;
if (dst->dev->flags & IFF_LOOPBACK)
ret = rdma_translate_ip(dst_in, dev_addr);
else
rdma_copy_src_l2_addr(dev_addr, dst->dev);
/*
* If there's a gateway and type of device not ARPHRD_INFINIBAND,
* we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
* network type accordingly.
*/
if (has_gateway(dst, dst_in->sa_family) &&
ndev->type != ARPHRD_INFINIBAND)
dev_addr->network = dst_in->sa_family == AF_INET ?
RDMA_NETWORK_IPV4 :
RDMA_NETWORK_IPV6;
else
dev_addr->network = RDMA_NETWORK_IB;
return ret;
}
static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
unsigned int *ndev_flags,
const struct sockaddr *dst_in,
const struct dst_entry *dst)
{
struct net_device *ndev = READ_ONCE(dst->dev);
*ndev_flags = ndev->flags;
/* A physical device must be the RDMA device to use */
if (ndev->flags & IFF_LOOPBACK) {
/*
* RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
* loopback IP address. So if route is resolved to loopback
* interface, translate that to a real ndev based on non
* loopback IP address.
*/
ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
if (IS_ERR(ndev))
return -ENODEV;
}
/* If the device doesn't do ARP internally */
if (!(dst->dev->flags & IFF_NOARP))
return fetch_ha(dst, addr, dst_in, seq);
return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
}
rdma_copy_addr(addr, dst->dev, NULL);
static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
{
struct net_device *ndev;
ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
/*
* Since we are holding the rcu, reading net and ifindex
* are safe without any additional reference; because
* change_net_namespace() in net/core/dev.c does rcu sync
* after it changes the state to IFF_DOWN and before
* updating netdev fields {net, ifindex}.
*/
addr->net = dev_net(ndev);
addr->bound_dev_if = ndev->ifindex;
return 0;
}
static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
{
addr->net = &init_net;
addr->bound_dev_if = 0;
}
static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
bool resolve_neigh,
bool resolve_by_gid_attr,
u32 seq)
{
struct net_device *ndev;
struct dst_entry *dst;
struct dst_entry *dst = NULL;
unsigned int ndev_flags = 0;
struct rtable *rt = NULL;
int ret;
if (!addr->net) {
@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in,
return -EINVAL;
}
rcu_read_lock();
if (resolve_by_gid_attr) {
if (!addr->sgid_attr) {
rcu_read_unlock();
pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
return -EINVAL;
}
/*
* If the request is for a specific gid attribute of the
* rdma_dev_addr, derive net from the netdevice of the
* GID attribute.
*/
ret = set_addr_netns_by_gid_rcu(addr);
if (ret) {
rcu_read_unlock();
return ret;
}
}
if (src_in->sa_family == AF_INET) {
struct rtable *rt = NULL;
const struct sockaddr_in *dst_in4 =
(const struct sockaddr_in *)dst_in;
ret = addr4_resolve((struct sockaddr_in *)src_in,
dst_in4, addr, &rt);
if (ret)
return ret;
if (resolve_neigh)
ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
if (addr->bound_dev_if) {
ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ndev = rt->dst.dev;
dev_hold(ndev);
}
ip_rt_put(rt);
ret = addr4_resolve(src_in, dst_in, addr, &rt);
dst = &rt->dst;
} else {
const struct sockaddr_in6 *dst_in6 =
(const struct sockaddr_in6 *)dst_in;
ret = addr6_resolve(src_in, dst_in, addr, &dst);
}
if (ret) {
rcu_read_unlock();
goto done;
}
ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
rcu_read_unlock();
ret = addr6_resolve((struct sockaddr_in6 *)src_in,
dst_in6, addr,
&dst);
if (ret)
return ret;
if (resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr, seq);
if (addr->bound_dev_if) {
ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ndev = dst->dev;
dev_hold(ndev);
}
/*
* Resolve neighbor destination address if requested and
* only if src addr translation didn't fail.
*/
if (!ret && resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
if (src_in->sa_family == AF_INET)
ip_rt_put(rt);
else
dst_release(dst);
}
if (ndev) {
if (ndev->flags & IFF_LOOPBACK)
ret = rdma_translate_ip(dst_in, addr);
else
addr->bound_dev_if = ndev->ifindex;
dev_put(ndev);
}
done:
/*
* Clear the addr net to go back to its original state, only if it was
* derived from GID attribute in this context.
*/
if (resolve_by_gid_attr)
rdma_addr_set_net_defaults(addr);
return ret;
}
@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work)
src_in = (struct sockaddr *)&req->src_addr;
dst_in = (struct sockaddr *)&req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr,
true, req->seq);
true, req->resolve_by_gid_attr,
req->seq);
if (req->status && time_after_eq(jiffies, req->timeout)) {
req->status = -ETIMEDOUT;
} else if (req->status == -ENODATA) {
@ -586,10 +659,10 @@ static void process_one_req(struct work_struct *_work)
}
int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
struct rdma_dev_addr *addr, unsigned long timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
void *context)
bool resolve_by_gid_attr, void *context)
{
struct sockaddr *src_in, *dst_in;
struct addr_req *req;
@ -617,10 +690,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
req->addr = addr;
req->callback = callback;
req->context = context;
req->resolve_by_gid_attr = resolve_by_gid_attr;
INIT_DELAYED_WORK(&req->work, process_one_req);
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
req->status = addr_resolve(src_in, dst_in, addr, true,
req->resolve_by_gid_attr, req->seq);
switch (req->status) {
case 0:
req->timeout = jiffies;
@ -641,25 +716,53 @@ err:
}
EXPORT_SYMBOL(rdma_resolve_ip);
int rdma_resolve_ip_route(struct sockaddr *src_addr,
const struct sockaddr *dst_addr,
struct rdma_dev_addr *addr)
int roce_resolve_route_from_path(struct sa_path_rec *rec,
const struct ib_gid_attr *attr)
{
struct sockaddr_storage ssrc_addr = {};
struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid, dgid;
struct rdma_dev_addr dev_addr = {};
int ret;
if (src_addr) {
if (src_addr->sa_family != dst_addr->sa_family)
return -EINVAL;
if (rec->roce.route_resolved)
return 0;
memcpy(src_in, src_addr, rdma_addr_size(src_addr));
} else {
src_in->sa_family = dst_addr->sa_family;
}
rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
rdma_gid2ip(&dgid._sockaddr, &rec->dgid);
return addr_resolve(src_in, dst_addr, addr, false, 0);
if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
return -EINVAL;
if (!attr || !attr->ndev)
return -EINVAL;
dev_addr.net = &init_net;
dev_addr.sgid_attr = attr;
ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
&dev_addr, false, true, 0);
if (ret)
return ret;
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
rec->roce.route_resolved = true;
return 0;
}
/**
* rdma_addr_cancel - Cancel resolve ip request
* @addr: Pointer to address structure given previously
* during rdma_resolve_ip().
* rdma_addr_cancel() is synchronous function which cancels any pending
* request if there is any.
*/
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
@ -687,11 +790,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
* guarentees no work is running and none will be started.
*/
cancel_delayed_work_sync(&found->work);
if (found->callback)
found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
found->addr, found->context);
kfree(found);
}
EXPORT_SYMBOL(rdma_addr_cancel);
@ -710,7 +808,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
u8 *dmac, const struct net_device *ndev,
u8 *dmac, const struct ib_gid_attr *sgid_attr,
int *hoplimit)
{
struct rdma_dev_addr dev_addr;
@ -726,12 +824,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
dev_addr.bound_dev_if = ndev->ifindex;
dev_addr.net = &init_net;
dev_addr.sgid_attr = sgid_attr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
&dev_addr, 1000, resolve_cb, true, &ctx);
if (ret)
return ret;

View File

@ -212,9 +212,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
u8 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
device->name, port_num, entry->attr.index,
entry->attr.gid.raw);
dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
port_num, entry->attr.index, entry->attr.gid.raw);
if (rdma_cap_roce_gid_table(device, port_num) &&
entry->state != GID_TABLE_ENTRY_INVALID)
@ -289,9 +288,9 @@ static void store_gid_entry(struct ib_gid_table *table,
{
entry->state = GID_TABLE_ENTRY_VALID;
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
entry->attr.device->name, entry->attr.port_num,
entry->attr.index, entry->attr.gid.raw);
dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
__func__, entry->attr.port_num, entry->attr.index,
entry->attr.gid.raw);
lockdep_assert_held(&table->lock);
write_lock_irq(&table->rwlock);
@ -320,17 +319,16 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
int ret;
if (!attr->ndev) {
pr_err("%s NULL netdev device=%s port=%d index=%d\n",
__func__, attr->device->name, attr->port_num,
attr->index);
dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
__func__, attr->port_num, attr->index);
return -EINVAL;
}
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
ret = attr->device->add_gid(attr, &entry->context);
if (ret) {
pr_err("%s GID add failed device=%s port=%d index=%d\n",
__func__, attr->device->name, attr->port_num,
attr->index);
dev_err(&attr->device->dev,
"%s GID add failed port=%d index=%d\n",
__func__, attr->port_num, attr->index);
return ret;
}
}
@ -353,9 +351,8 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
lockdep_assert_held(&table->lock);
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
ib_dev->name, port, ix,
table->data_vec[ix]->attr.gid.raw);
dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
ix, table->data_vec[ix]->attr.gid.raw);
write_lock_irq(&table->rwlock);
entry = table->data_vec[ix];
@ -782,9 +779,9 @@ static void release_gid_table(struct ib_device *device, u8 port,
if (is_gid_entry_free(table->data_vec[i]))
continue;
if (kref_read(&table->data_vec[i]->kref) > 1) {
pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
device->name, i,
kref_read(&table->data_vec[i]->kref));
dev_err(&device->dev,
"GID entry ref leak for index %d ref=%d\n", i,
kref_read(&table->data_vec[i]->kref));
leak = true;
}
}
@ -1252,6 +1249,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
}
EXPORT_SYMBOL(rdma_hold_gid_attr);
/**
* rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
* which must be in UP state.
*
* @attr:Pointer to the GID attribute
*
* Returns pointer to netdevice if the netdevice was attached to GID and
* netdevice is in UP state. Caller must hold RCU lock as this API
* reads the netdev flags which can change while netdevice migrates to
* different net namespace. Returns ERR_PTR with error code otherwise.
*
*/
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
{
struct ib_gid_table_entry *entry =
container_of(attr, struct ib_gid_table_entry, attr);
struct ib_device *device = entry->attr.device;
struct net_device *ndev = ERR_PTR(-ENODEV);
u8 port_num = entry->attr.port_num;
struct ib_gid_table *table;
unsigned long flags;
bool valid;
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
ndev = attr->ndev;
read_unlock_irqrestore(&table->rwlock, flags);
return ndev;
}
static int config_non_roce_gid_cache(struct ib_device *device,
u8 port, int gid_tbl_len)
{
@ -1270,8 +1300,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
continue;
ret = device->query_gid(device, port, i, &gid_attr.gid);
if (ret) {
pr_warn("query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
dev_warn(&device->dev,
"query_gid failed (%d) for index %d\n", ret,
i);
goto err;
}
gid_attr.index = i;
@ -1300,8 +1331,7 @@ static void ib_cache_update(struct ib_device *device,
ret = ib_query_port(device, port, tprops);
if (ret) {
pr_warn("ib_query_port failed (%d) for %s\n",
ret, device->name);
dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
goto err;
}
@ -1323,8 +1353,9 @@ static void ib_cache_update(struct ib_device *device,
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
ret, device->name, i);
dev_warn(&device->dev,
"ib_query_pkey failed (%d) for index %d\n",
ret, i);
goto err;
}
}

View File

@ -3292,8 +3292,11 @@ static int cm_lap_handler(struct cm_work *work)
if (ret)
goto unlock;
cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
cm_id_priv);
ret = cm_init_av_by_path(param->alternate_path, NULL,
&cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto unlock;
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
@ -4367,7 +4370,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
"%s", ib_device->name);
"%s", dev_name(&ib_device->dev));
if (IS_ERR(cm_dev->device)) {
kfree(cm_dev);
return;

View File

@ -639,13 +639,21 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
}
static int cma_acquire_dev(struct rdma_id_private *id_priv,
const struct rdma_id_private *listen_id_priv)
/**
* cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
* based on source ip address.
* @id_priv: cm_id which should be bound to cma device
*
* cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
* based on source IP address. It returns 0 on success or error code otherwise.
* It is applicable to active and passive side cm_id.
*/
static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
const struct ib_gid_attr *sgid_attr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid, *gidp;
struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
u8 port;
@ -654,41 +662,125 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
mutex_lock(&lock);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
if (listen_id_priv) {
cma_dev = listen_id_priv->cma_dev;
port = listen_id_priv->id.port_num;
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
gid_type = listen_id_priv->gid_type;
sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, gidp, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
ret = 0;
goto out;
}
}
rdma_addr_gid_offset(dev_addr), sizeof(gid));
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
if (listen_id_priv &&
listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
for (port = rdma_start_port(cma_dev->device);
port <= rdma_end_port(cma_dev->device); port++) {
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
gid_type = cma_dev->default_gid_type[port - 1];
sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, gidp, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
cma_attach_to_dev(id_priv, cma_dev);
ret = 0;
goto out;
}
}
}
out:
mutex_unlock(&lock);
return ret;
}
/**
* cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
* @id_priv: cm id to bind to cma device
* @listen_id_priv: listener cm id to match against
* @req: Pointer to req structure containaining incoming
* request information
* cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
* rdma device matches for listen_id and incoming request. It also verifies
* that a GID table entry is present for the source address.
* Returns 0 on success, or returns error code otherwise.
*/
static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
const struct rdma_id_private *listen_id_priv,
struct cma_req_info *req)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
const struct ib_gid_attr *sgid_attr;
enum ib_gid_type gid_type;
union ib_gid gid;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
if (rdma_protocol_roce(req->device, req->port))
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&gid);
else
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof(gid));
gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
sgid_attr = cma_validate_port(req->device, req->port,
gid_type, &gid, id_priv);
if (IS_ERR(sgid_attr))
return PTR_ERR(sgid_attr);
id_priv->id.port_num = req->port;
cma_bind_sgid_attr(id_priv, sgid_attr);
/* Need to acquire lock to protect against reader
* of cma_dev->id_list such as cma_netdev_callback() and
* cma_process_remove().
*/
mutex_lock(&lock);
cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
mutex_unlock(&lock);
return 0;
}
static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
const struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
const struct ib_gid_attr *sgid_attr;
struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
union ib_gid gid;
u8 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof(gid));
mutex_lock(&lock);
cma_dev = listen_id_priv->cma_dev;
port = listen_id_priv->id.port_num;
gid_type = listen_id_priv->gid_type;
sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, &gid, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
ret = 0;
goto out;
}
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
if (listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
gid_type = cma_dev->default_gid_type[port - 1];
sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, &gid, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
@ -785,10 +877,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
if (caller)
id_priv->res.kern_name = caller;
else
rdma_restrack_set_task(&id_priv->res, current);
rdma_restrack_set_task(&id_priv->res, caller);
id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
@ -1462,17 +1551,34 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
return rdma_protocol_roce(device, port_num);
}
static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
{
const struct sockaddr *daddr =
(const struct sockaddr *)&req->listen_addr_storage;
const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
/* Returns true if the req is for IPv6 link local */
return (daddr->sa_family == AF_INET6 &&
(ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
}
static bool cma_match_net_dev(const struct rdma_cm_id *id,
const struct net_device *net_dev,
u8 port_num)
const struct cma_req_info *req)
{
const struct rdma_addr *addr = &id->route.addr;
if (!net_dev)
/* This request is an AF_IB request */
return (!id->port_num || id->port_num == port_num) &&
return (!id->port_num || id->port_num == req->port) &&
(addr->src_addr.ss_family == AF_IB);
/*
* If the request is not for IPv6 link local, allow matching
* request to any netdevice of the one or multiport rdma device.
*/
if (!cma_is_req_ipv6_ll(req))
return true;
/*
* Net namespaces must match, and if the listner is listening
* on a specific netdevice than netdevice must match as well.
@ -1500,13 +1606,14 @@ static struct rdma_id_private *cma_find_listener(
hlist_for_each_entry(id_priv, &bind_list->owners, node) {
if (cma_match_private_data(id_priv, ib_event->private_data)) {
if (id_priv->id.device == cm_id->device &&
cma_match_net_dev(&id_priv->id, net_dev, req->port))
cma_match_net_dev(&id_priv->id, net_dev, req))
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
listen_list) {
if (id_priv_dev->id.device == cm_id->device &&
cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
cma_match_net_dev(&id_priv_dev->id,
net_dev, req))
return id_priv_dev;
}
}
@ -1518,18 +1625,18 @@ static struct rdma_id_private *cma_find_listener(
static struct rdma_id_private *
cma_ib_id_from_event(struct ib_cm_id *cm_id,
const struct ib_cm_event *ib_event,
struct cma_req_info *req,
struct net_device **net_dev)
{
struct cma_req_info req;
struct rdma_bind_list *bind_list;
struct rdma_id_private *id_priv;
int err;
err = cma_save_req_info(ib_event, &req);
err = cma_save_req_info(ib_event, req);
if (err)
return ERR_PTR(err);
*net_dev = cma_get_net_dev(ib_event, &req);
*net_dev = cma_get_net_dev(ib_event, req);
if (IS_ERR(*net_dev)) {
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
@ -1567,17 +1674,17 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
(struct sockaddr *)&req.listen_addr_storage,
(struct sockaddr *)&req.src_addr_storage)) {
(struct sockaddr *)&req->listen_addr_storage,
(struct sockaddr *)&req->src_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
}
bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
rdma_ps_from_service_id(req.service_id),
cma_port_from_service_id(req.service_id));
id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
rdma_ps_from_service_id(req->service_id),
cma_port_from_service_id(req->service_id));
id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
err:
rcu_read_unlock();
if (IS_ERR(id_priv) && *net_dev) {
@ -1710,8 +1817,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_lock(&id_priv->handler_mutex);
mutex_unlock(&id_priv->handler_mutex);
rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev) {
rdma_restrack_del(&id_priv->res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
@ -1902,7 +2009,7 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
} else {
if (!cma_protocol_roce(listen_id) &&
cma_any_addr(cma_src_addr(id_priv))) {
@ -1952,7 +2059,7 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
goto err;
if (net_dev) {
rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
} else {
if (!cma_any_addr(cma_src_addr(id_priv))) {
ret = cma_translate_addr(cma_src_addr(id_priv),
@ -1999,11 +2106,12 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
{
struct rdma_id_private *listen_id, *conn_id = NULL;
struct rdma_cm_event event = {};
struct cma_req_info req = {};
struct net_device *net_dev;
u8 offset;
int ret;
listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
@ -2036,7 +2144,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
ret = cma_acquire_dev(conn_id, listen_id);
ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
if (ret)
goto err2;
@ -2232,7 +2340,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
}
ret = cma_acquire_dev(conn_id, listen_id);
ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@ -2354,8 +2462,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
ret = rdma_listen(id, id_priv->backlog);
if (ret)
pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
ret, cma_dev->device->name);
dev_warn(&cma_dev->device->dev,
"RDMA CMA: cma_listen_on_dev, error %d\n", ret);
}
static void cma_listen_on_all(struct rdma_id_private *id_priv)
@ -2402,8 +2510,8 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec,
queue_work(cma_wq, &work->work);
}
static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
static int cma_query_ib_route(struct rdma_id_private *id_priv,
unsigned long timeout_ms, struct cma_work *work)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct sa_path_rec path_rec;
@ -2521,7 +2629,8 @@ static void cma_init_resolve_addr_work(struct cma_work *work,
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
}
static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
unsigned long timeout_ms)
{
struct rdma_route *route = &id_priv->id.route;
struct cma_work *work;
@ -2643,7 +2752,7 @@ err:
}
EXPORT_SYMBOL(rdma_set_ib_path);
static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
{
struct cma_work *work;
@ -2744,7 +2853,7 @@ err1:
return ret;
}
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
int ret;
@ -2759,7 +2868,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
else if (rdma_protocol_roce(id->device, id->port_num))
ret = cma_resolve_iboe_route(id_priv);
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv, timeout_ms);
ret = cma_resolve_iw_route(id_priv);
else
ret = -ENOSYS;
@ -2862,7 +2971,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev(id_priv, NULL);
status = cma_acquire_dev_by_src_ip(id_priv);
if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status);
@ -2882,13 +2991,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
rdma_destroy_id(&id_priv->id);
return;
}
out:
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
}
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
@ -2966,7 +3073,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
const struct sockaddr *dst_addr, int timeout_ms)
const struct sockaddr *dst_addr, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
int ret;
@ -2985,16 +3092,16 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return -EINVAL;
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
atomic_inc(&id_priv->refcount);
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
ret = rdma_resolve_ip(cma_src_addr(id_priv),
dst_addr, &id->route.addr.dev_addr,
timeout_ms, addr_handler, id_priv);
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
false, id_priv);
}
}
if (ret)
@ -3003,7 +3110,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_addr);
@ -3414,7 +3520,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err1;
ret = cma_acquire_dev(id_priv, NULL);
ret = cma_acquire_dev_by_src_ip(id_priv);
if (ret)
goto err1;
}
@ -3439,10 +3545,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
if (id_priv->cma_dev) {
rdma_restrack_del(&id_priv->res);
rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev)
cma_release_dev(id_priv);
}
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
@ -3839,10 +3944,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
id_priv = container_of(id, struct rdma_id_private, id);
if (caller)
id_priv->res.kern_name = caller;
else
rdma_restrack_set_task(&id_priv->res, current);
rdma_restrack_set_task(&id_priv->res, caller);
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
@ -4087,9 +4189,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
(!ib_sa_sendonly_fullmem_support(&sa_client,
id_priv->id.device,
id_priv->id.port_num))) {
pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
"RDMA CM: SM doesn't support Send Only Full Member option\n",
id_priv->id.device->name, id_priv->id.port_num);
dev_warn(
&id_priv->id.device->dev,
"RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
id_priv->id.port_num);
return -EOPNOTSUPP;
}

View File

@ -65,7 +65,7 @@ static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
{
return !strcmp(ib_dev->name, cookie);
return !strcmp(dev_name(&ib_dev->dev), cookie);
}
static int cma_configfs_params_get(struct config_item *item,

View File

@ -44,7 +44,7 @@
#include "mad_priv.h"
/* Total number of ports combined across all struct ib_devices's */
#define RDMA_MAX_PORTS 1024
#define RDMA_MAX_PORTS 8192
struct pkey_index_qp_list {
struct list_head pkey_index_list;
@ -87,6 +87,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_device_rename(struct ib_device *ibdev, const char *name);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
@ -338,7 +339,14 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
u8 *dmac, const struct net_device *ndev,
u8 *dmac, const struct ib_gid_attr *sgid_attr,
int *hoplimit);
void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
const struct net_device *dev);
struct sa_path_rec;
int roce_resolve_route_from_path(struct sa_path_rec *rec,
const struct ib_gid_attr *attr);
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
#endif /* _CORE_PRIV_H */

View File

@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
IB_POLL_BATCH);
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(ib_comp_wq, &cq->work);
queue_work(cq->comp_wq, &cq->work);
}
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
queue_work(ib_comp_wq, &cq->work);
queue_work(cq->comp_wq, &cq->work);
}
/**
@ -161,7 +161,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
goto out_destroy_cq;
cq->res.type = RDMA_RESTRACK_CQ;
cq->res.kern_name = caller;
rdma_restrack_set_task(&cq->res, caller);
rdma_restrack_add(&cq->res);
switch (cq->poll_ctx) {
@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break;
case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
cq->comp_handler = ib_cq_completion_workqueue;
INIT_WORK(&cq->work, ib_cq_poll_work);
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
ib_comp_wq : ib_comp_unbound_wq;
break;
default:
ret = -EINVAL;
@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
cancel_work_sync(&cq->work);
break;
default:

View File

@ -61,6 +61,7 @@ struct ib_client_data {
};
struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
@ -122,8 +123,9 @@ static int ib_device_check_mandatory(struct ib_device *device)
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
pr_warn("Device %s is missing mandatory function %s\n",
device->name, mandatory_table[i].name);
dev_warn(&device->dev,
"Device is missing mandatory function %s\n",
mandatory_table[i].name);
return -EINVAL;
}
}
@ -163,16 +165,40 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
struct ib_device *device;
list_for_each_entry(device, &device_list, core_list)
if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
if (!strcmp(name, dev_name(&device->dev)))
return device;
return NULL;
}
static int alloc_name(char *name)
int ib_device_rename(struct ib_device *ibdev, const char *name)
{
struct ib_device *device;
int ret = 0;
if (!strcmp(name, dev_name(&ibdev->dev)))
return ret;
mutex_lock(&device_mutex);
list_for_each_entry(device, &device_list, core_list) {
if (!strcmp(name, dev_name(&device->dev))) {
ret = -EEXIST;
goto out;
}
}
ret = device_rename(&ibdev->dev, name);
if (ret)
goto out;
strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
out:
mutex_unlock(&device_mutex);
return ret;
}
static int alloc_name(struct ib_device *ibdev, const char *name)
{
unsigned long *inuse;
char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device;
int i;
@ -181,24 +207,21 @@ static int alloc_name(char *name)
return -ENOMEM;
list_for_each_entry(device, &device_list, core_list) {
if (!sscanf(device->name, name, &i))
char buf[IB_DEVICE_NAME_MAX];
if (sscanf(dev_name(&device->dev), name, &i) != 1)
continue;
if (i < 0 || i >= PAGE_SIZE * 8)
continue;
snprintf(buf, sizeof buf, name, i);
if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
if (!strcmp(buf, dev_name(&device->dev)))
set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
free_page((unsigned long) inuse);
snprintf(buf, sizeof buf, name, i);
if (__ib_device_get_by_name(buf))
return -ENFILE;
strlcpy(name, buf, IB_DEVICE_NAME_MAX);
return 0;
return dev_set_name(&ibdev->dev, name, i);
}
static void ib_device_release(struct device *device)
@ -221,9 +244,7 @@ static void ib_device_release(struct device *device)
static int ib_device_uevent(struct device *device,
struct kobj_uevent_env *env)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
if (add_uevent_var(env, "NAME=%s", dev->name))
if (add_uevent_var(env, "NAME=%s", dev_name(device)))
return -ENOMEM;
/*
@ -269,7 +290,7 @@ struct ib_device *ib_alloc_device(size_t size)
INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
rwlock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list);
@ -285,6 +306,7 @@ EXPORT_SYMBOL(ib_alloc_device);
*/
void ib_dealloc_device(struct ib_device *device)
{
WARN_ON(!list_empty(&device->client_data_list));
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
device->reg_state != IB_DEV_UNINITIALIZED);
rdma_restrack_clean(&device->res);
@ -295,9 +317,8 @@ EXPORT_SYMBOL(ib_dealloc_device);
static int add_client_context(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL);
context = kmalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return -ENOMEM;
@ -306,9 +327,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
context->going_down = false;
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
write_lock_irq(&device->client_data_lock);
list_add(&context->list, &device->client_data_list);
spin_unlock_irqrestore(&device->client_data_lock, flags);
write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem);
return 0;
@ -444,22 +465,8 @@ static u32 __dev_new_index(void)
}
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
static void setup_dma_device(struct ib_device *device)
{
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
struct device *parent = device->dev.parent;
WARN_ON_ONCE(device->dma_device);
@ -491,56 +498,113 @@ int ib_register_device(struct ib_device *device,
WARN_ON_ONCE(!parent);
device->dma_device = parent;
}
}
mutex_lock(&device_mutex);
static void cleanup_device(struct ib_device *device)
{
ib_cache_cleanup_one(device);
ib_cache_release_one(device);
kfree(device->port_pkey_list);
kfree(device->port_immutable);
}
if (strchr(device->name, '%')) {
ret = alloc_name(device->name);
if (ret)
goto out;
}
static int setup_device(struct ib_device *device)
{
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
int ret;
if (ib_device_check_mandatory(device)) {
ret = -EINVAL;
goto out;
}
ret = ib_device_check_mandatory(device);
if (ret)
return ret;
ret = read_port_immutable(device);
if (ret) {
pr_warn("Couldn't create per port immutable data %s\n",
device->name);
goto out;
}
ret = setup_port_pkey_list(device);
if (ret) {
pr_warn("Couldn't create per port_pkey_list\n");
goto out;
}
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
goto port_cleanup;
}
ret = ib_device_register_rdmacg(device);
if (ret) {
pr_warn("Couldn't register device with rdma cgroup\n");
goto cache_cleanup;
dev_warn(&device->dev,
"Couldn't create per port immutable data\n");
return ret;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
goto cg_cleanup;
dev_warn(&device->dev,
"Couldn't query the device attributes\n");
goto port_cleanup;
}
ret = setup_port_pkey_list(device);
if (ret) {
dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
goto port_cleanup;
}
ret = ib_cache_setup_one(device);
if (ret) {
dev_warn(&device->dev,
"Couldn't set up InfiniBand P_Key/GID cache\n");
goto pkey_cleanup;
}
return 0;
pkey_cleanup:
kfree(device->port_pkey_list);
port_cleanup:
kfree(device->port_immutable);
return ret;
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device, const char *name,
int (*port_callback)(struct ib_device *, u8,
struct kobject *))
{
int ret;
struct ib_client *client;
setup_dma_device(device);
mutex_lock(&device_mutex);
if (strchr(name, '%')) {
ret = alloc_name(device, name);
if (ret)
goto out;
} else {
ret = dev_set_name(&device->dev, name);
if (ret)
goto out;
}
if (__ib_device_get_by_name(dev_name(&device->dev))) {
ret = -ENFILE;
goto out;
}
strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
ret = setup_device(device);
if (ret)
goto out;
device->index = __dev_new_index();
ret = ib_device_register_rdmacg(device);
if (ret) {
dev_warn(&device->dev,
"Couldn't register device with rdma cgroup\n");
goto dev_cleanup;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
dev_warn(&device->dev,
"Couldn't register device with driver model\n");
goto cg_cleanup;
}
@ -550,7 +614,6 @@ int ib_register_device(struct ib_device *device,
if (!add_client_context(device, client) && client->add)
client->add(device);
device->index = __dev_new_index();
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
@ -559,11 +622,8 @@ int ib_register_device(struct ib_device *device,
cg_cleanup:
ib_device_unregister_rdmacg(device);
cache_cleanup:
ib_cache_cleanup_one(device);
ib_cache_release_one(device);
port_cleanup:
kfree(device->port_immutable);
dev_cleanup:
cleanup_device(device);
out:
mutex_unlock(&device_mutex);
return ret;
@ -585,21 +645,20 @@ void ib_unregister_device(struct ib_device *device)
down_write(&lists_rwsem);
list_del(&device->core_list);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
write_lock_irq(&device->client_data_lock);
list_for_each_entry(context, &device->client_data_list, list)
context->going_down = true;
spin_unlock_irqrestore(&device->client_data_lock, flags);
write_unlock_irq(&device->client_data_lock);
downgrade_write(&lists_rwsem);
list_for_each_entry_safe(context, tmp, &device->client_data_list,
list) {
list_for_each_entry(context, &device->client_data_list, list) {
if (context->client->remove)
context->client->remove(device, context->data);
}
up_read(&lists_rwsem);
ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
ib_device_unregister_rdmacg(device);
mutex_unlock(&device_mutex);
@ -609,10 +668,13 @@ void ib_unregister_device(struct ib_device *device)
kfree(device->port_pkey_list);
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
write_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list,
list) {
list_del(&context->list);
kfree(context);
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
write_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem);
device->reg_state = IB_DEV_UNREGISTERED;
@ -662,9 +724,8 @@ EXPORT_SYMBOL(ib_register_client);
*/
void ib_unregister_client(struct ib_client *client)
{
struct ib_client_data *context, *tmp;
struct ib_client_data *context;
struct ib_device *device;
unsigned long flags;
mutex_lock(&device_mutex);
@ -676,14 +737,14 @@ void ib_unregister_client(struct ib_client *client)
struct ib_client_data *found_context = NULL;
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
write_lock_irq(&device->client_data_lock);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
context->going_down = true;
found_context = context;
break;
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem);
if (client->remove)
@ -691,17 +752,18 @@ void ib_unregister_client(struct ib_client *client)
found_context->data : NULL);
if (!found_context) {
pr_warn("No client context found for %s/%s\n",
device->name, client->name);
dev_warn(&device->dev,
"No client context found for %s\n",
client->name);
continue;
}
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
write_lock_irq(&device->client_data_lock);
list_del(&found_context->list);
kfree(found_context);
spin_unlock_irqrestore(&device->client_data_lock, flags);
write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem);
kfree(found_context);
}
mutex_unlock(&device_mutex);
@ -722,13 +784,13 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
void *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
read_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
ret = context->data;
break;
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
read_unlock_irqrestore(&device->client_data_lock, flags);
return ret;
}
@ -749,18 +811,18 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
struct ib_client_data *context;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
write_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
context->data = data;
goto out;
}
pr_warn("No client context found for %s/%s\n",
device->name, client->name);
dev_warn(&device->dev, "No client context found for %s\n",
client->name);
out:
spin_unlock_irqrestore(&device->client_data_lock, flags);
write_unlock_irqrestore(&device->client_data_lock, flags);
}
EXPORT_SYMBOL(ib_set_client_data);
@ -1166,10 +1228,19 @@ static int __init ib_core_init(void)
goto err;
}
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
if (!ib_comp_unbound_wq) {
ret = -ENOMEM;
goto err_comp;
}
ret = class_register(&ib_class);
if (ret) {
pr_warn("Couldn't create InfiniBand device class\n");
goto err_comp;
goto err_comp_unbound;
}
ret = rdma_nl_init();
@ -1218,6 +1289,8 @@ err_ibnl:
rdma_nl_exit();
err_sysfs:
class_unregister(&ib_class);
err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
err:
@ -1236,6 +1309,7 @@ static void __exit ib_core_cleanup(void)
addr_cleanup();
rdma_nl_exit();
class_unregister(&ib_class);
destroy_workqueue(ib_comp_unbound_wq);
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);

View File

@ -213,7 +213,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
device = pd->device;
if (!device->alloc_fmr || !device->dealloc_fmr ||
!device->map_phys_fmr || !device->unmap_fmr) {
pr_info(PFX "Device %s does not support FMRs\n", device->name);
dev_info(&device->dev, "Device does not support FMRs\n");
return ERR_PTR(-ENOSYS);
}
@ -257,7 +257,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
pool->worker =
kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
if (IS_ERR(pool->worker)) {
pr_warn(PFX "couldn't start cleanup kthread worker\n");
ret = PTR_ERR(pool->worker);

View File

@ -509,7 +509,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
cm_id->m_local_addr = cm_id->local_addr;
cm_id->m_remote_addr = cm_id->remote_addr;
memcpy(pm_reg_msg.dev_name, cm_id->device->name,
memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
sizeof(pm_reg_msg.dev_name));
memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
sizeof(pm_reg_msg.if_name));

View File

@ -220,33 +220,37 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
int ret2, qpn;
u8 mgmt_class, vclass;
if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
(qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
return ERR_PTR(-EPROTONOSUPPORT);
/* Validate parameters */
qpn = get_spl_qp_index(qp_type);
if (qpn == -1) {
dev_notice(&device->dev,
"ib_register_mad_agent: invalid QP Type %d\n",
qp_type);
dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
__func__, qp_type);
goto error1;
}
if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
dev_notice(&device->dev,
"ib_register_mad_agent: invalid RMPP Version %u\n",
rmpp_version);
dev_dbg_ratelimited(&device->dev,
"%s: invalid RMPP Version %u\n",
__func__, rmpp_version);
goto error1;
}
/* Validate MAD registration request if supplied */
if (mad_reg_req) {
if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
dev_notice(&device->dev,
"ib_register_mad_agent: invalid Class Version %u\n",
mad_reg_req->mgmt_class_version);
dev_dbg_ratelimited(&device->dev,
"%s: invalid Class Version %u\n",
__func__,
mad_reg_req->mgmt_class_version);
goto error1;
}
if (!recv_handler) {
dev_notice(&device->dev,
"ib_register_mad_agent: no recv_handler\n");
dev_dbg_ratelimited(&device->dev,
"%s: no recv_handler\n", __func__);
goto error1;
}
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
@ -256,9 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
*/
if (mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
dev_notice(&device->dev,
"ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
mad_reg_req->mgmt_class);
dev_dbg_ratelimited(&device->dev,
"%s: Invalid Mgmt Class 0x%x\n",
__func__, mad_reg_req->mgmt_class);
goto error1;
}
} else if (mad_reg_req->mgmt_class == 0) {
@ -266,8 +270,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* Class 0 is reserved in IBA and is used for
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
*/
dev_notice(&device->dev,
"ib_register_mad_agent: Invalid Mgmt Class 0\n");
dev_dbg_ratelimited(&device->dev,
"%s: Invalid Mgmt Class 0\n",
__func__);
goto error1;
} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
/*
@ -275,18 +280,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* ensure supplied OUI is not zero
*/
if (!is_vendor_oui(mad_reg_req->oui)) {
dev_notice(&device->dev,
"ib_register_mad_agent: No OUI specified for class 0x%x\n",
mad_reg_req->mgmt_class);
dev_dbg_ratelimited(&device->dev,
"%s: No OUI specified for class 0x%x\n",
__func__,
mad_reg_req->mgmt_class);
goto error1;
}
}
/* Make sure class supplied is consistent with RMPP */
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
if (rmpp_version) {
dev_notice(&device->dev,
"ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
mad_reg_req->mgmt_class);
dev_dbg_ratelimited(&device->dev,
"%s: RMPP version for non-RMPP class 0x%x\n",
__func__, mad_reg_req->mgmt_class);
goto error1;
}
}
@ -297,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
(mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
dev_notice(&device->dev,
"ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
mad_reg_req->mgmt_class);
dev_dbg_ratelimited(&device->dev,
"%s: Invalid SM QP type: class 0x%x\n",
__func__, mad_reg_req->mgmt_class);
goto error1;
}
} else {
@ -307,9 +313,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
(mad_reg_req->mgmt_class ==
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
dev_notice(&device->dev,
"ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
mad_reg_req->mgmt_class);
dev_dbg_ratelimited(&device->dev,
"%s: Invalid GS QP type: class 0x%x\n",
__func__, mad_reg_req->mgmt_class);
goto error1;
}
}
@ -324,18 +330,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
dev_notice(&device->dev,
"ib_register_mad_agent: Invalid port %d\n",
port_num);
dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
__func__, port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
/* Verify the QP requested is supported. For example, Ethernet devices
* will not have QP0 */
/* Verify the QP requested is supported. For example, Ethernet devices
* will not have QP0.
*/
if (!port_priv->qp_info[qpn].qp) {
dev_notice(&device->dev,
"ib_register_mad_agent: QP %d not supported\n", qpn);
dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
__func__, qpn);
ret = ERR_PTR(-EPROTONOSUPPORT);
goto error1;
}
@ -2408,7 +2414,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
}
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
int timeout_ms)
unsigned long timeout_ms)
{
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
wait_for_response(mad_send_wr);
@ -3183,7 +3189,7 @@ static int ib_mad_port_open(struct ib_device *device,
cq_size *= 2;
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_WORKQUEUE);
IB_POLL_UNBOUND_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);

View File

@ -221,6 +221,6 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
int timeout_ms);
unsigned long timeout_ms);
#endif /* __IB_MAD_PRIV_H__ */

View File

@ -47,9 +47,9 @@ static struct {
const struct rdma_nl_cbs *cb_table;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
int rdma_nl_chk_listeners(unsigned int group)
bool rdma_nl_chk_listeners(unsigned int group)
{
return (netlink_has_listeners(nls, group)) ? 0 : -1;
return netlink_has_listeners(nls, group);
}
EXPORT_SYMBOL(rdma_nl_chk_listeners);

View File

@ -179,7 +179,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE;
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
dev_name(&device->dev)))
return -EMSGSIZE;
return 0;
@ -645,6 +646,36 @@ err:
return err;
}
static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
u32 index;
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
if (!device)
return -EINVAL;
if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
char name[IB_DEVICE_NAME_MAX] = {};
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
err = ib_device_rename(device, name);
}
put_device(&device->dev);
return err;
}
static int _nldev_get_dumpit(struct ib_device *device,
struct sk_buff *skb,
struct netlink_callback *cb,
@ -1077,6 +1108,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_get_doit,
.dump = nldev_get_dumpit,
},
[RDMA_NLDEV_CMD_SET] = {
.doit = nldev_set_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_PORT_GET] = {
.doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit,

View File

@ -794,44 +794,6 @@ void uverbs_close_fd(struct file *f)
uverbs_uobject_put(uobj);
}
static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
struct ib_device *ib_dev = ibcontext->device;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
if (!owning_process)
return;
owning_mm = get_task_mm(owning_process);
if (!owning_mm) {
pr_info("no mm, disassociate ucontext is pending task termination\n");
while (1) {
put_task_struct(owning_process);
usleep_range(1000, 2000);
owning_process = get_pid_task(ibcontext->tgid,
PIDTYPE_PID);
if (!owning_process ||
owning_process->state == TASK_DEAD) {
pr_info("disassociate ucontext done, task was terminated\n");
/* in case task was dead need to release the
* task struct.
*/
if (owning_process)
put_task_struct(owning_process);
return;
}
}
}
down_write(&owning_mm->mmap_sem);
ib_dev->disassociate_ucontext(ibcontext);
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
/*
* Drop the ucontext off the ufile and completely disconnect it from the
* ib_device
@ -840,20 +802,28 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *ib_dev = ucontext->device;
int ret;
if (reason == RDMA_REMOVE_DRIVER_REMOVE)
ufile_disassociate_ucontext(ucontext);
/*
* If we are closing the FD then the user mmap VMAs must have
* already been destroyed as they hold on to the filep, otherwise
* they need to be zap'd.
*/
if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
uverbs_user_mmap_disassociate(ufile);
if (ib_dev->disassociate_ucontext)
ib_dev->disassociate_ucontext(ucontext);
}
put_pid(ucontext->tgid);
ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
RDMACG_RESOURCE_HCA_HANDLE);
/*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return.
*/
ret = ucontext->device->dealloc_ucontext(ucontext);
ret = ib_dev->dealloc_ucontext(ucontext);
WARN_ON(ret);
ufile->ucontext = NULL;

View File

@ -160,5 +160,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi);
void uverbs_destroy_api(struct uverbs_api *uapi);
void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
#endif /* RDMA_CORE_H */

View File

@ -50,8 +50,7 @@ void rdma_restrack_clean(struct rdma_restrack_root *res)
dev = container_of(res, struct ib_device, res);
pr_err("restrack: %s", CUT_HERE);
pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
dev->name);
dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
hash_for_each(res->hash, bkt, e, node) {
if (rdma_is_kernel_res(e)) {
owner = e->kern_name;
@ -156,6 +155,21 @@ static bool res_is_user(struct rdma_restrack_entry *res)
}
}
void rdma_restrack_set_task(struct rdma_restrack_entry *res,
const char *caller)
{
if (caller) {
res->kern_name = caller;
return;
}
if (res->task)
put_task_struct(res->task);
get_task_struct(current);
res->task = current;
}
EXPORT_SYMBOL(rdma_restrack_set_task);
void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
@ -168,7 +182,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
if (res_is_user(res)) {
if (!res->task)
rdma_restrack_set_task(res, current);
rdma_restrack_set_task(res, NULL);
res->kern_name = NULL;
} else {
set_kern_name(res);
@ -209,7 +223,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
struct ib_device *dev;
if (!res->valid)
return;
goto out;
dev = res_to_dev(res);
if (!dev)
@ -222,8 +236,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
down_write(&dev->res.rwsem);
hash_del(&res->node);
res->valid = false;
if (res->task)
put_task_struct(res->task);
up_write(&dev->res.rwsem);
out:
if (res->task) {
put_task_struct(res->task);
res->task = NULL;
}
}
EXPORT_SYMBOL(rdma_restrack_del);

View File

@ -49,16 +49,14 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u8 method,
struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query);
void *context, struct ib_sa_query **sa_query);
int mcast_init(void);
void mcast_cleanup(void);

View File

@ -761,7 +761,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
/* Construct the family header first */
header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
memcpy(header->device_name, query->port->agent->device->name,
memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
@ -835,7 +835,6 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
void *data;
int ret = 0;
struct ib_sa_mad *mad;
int len;
@ -862,13 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret)
ret = len;
else
ret = 0;
return ret;
return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
}
static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
@ -891,14 +884,12 @@ static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
ret = ib_nl_send_msg(query, gfp_mask);
if (ret <= 0) {
if (ret) {
ret = -EIO;
/* Remove the request */
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_del(&query->list);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
} else {
ret = 0;
}
return ret;
@ -1227,46 +1218,6 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
static int roce_resolve_route_from_path(struct sa_path_rec *rec,
const struct ib_gid_attr *attr)
{
struct rdma_dev_addr dev_addr = {};
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
int ret;
if (rec->roce.route_resolved)
return 0;
if (!attr || !attr->ndev)
return -EINVAL;
dev_addr.bound_dev_if = attr->ndev->ifindex;
/* TODO: Use net from the ib_gid_attr once it is added to it,
* until than, limit itself to init_net.
*/
dev_addr.net = &init_net;
rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
/* validate the route */
ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
&dgid_addr._sockaddr, &dev_addr);
if (ret)
return ret;
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
rec->roce.route_resolved = true;
return 0;
}
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr,
@ -1409,7 +1360,8 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
spin_unlock_irqrestore(&tid_lock, flags);
}
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
gfp_t gfp_mask)
{
bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags;
@ -1433,7 +1385,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
(!(query->flags & IB_SA_QUERY_OPA))) {
if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
}
@ -1599,7 +1551,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct sa_path_rec *resp,
void *context),
@ -1753,7 +1705,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_service_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_service_rec *resp,
void *context),
@ -1850,7 +1802,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
@ -1941,7 +1893,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
int timeout_ms, gfp_t gfp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
@ -2108,7 +2060,7 @@ static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
}
static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
int timeout_ms,
unsigned long timeout_ms,
void (*callback)(void *context),
void *context,
struct ib_sa_query **sa_query)

View File

@ -685,9 +685,8 @@ static int ib_mad_agent_security_change(struct notifier_block *nb,
if (event != LSM_POLICY_CHANGE)
return NOTIFY_DONE;
ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security,
ag->device->name,
ag->port_num);
ag->smp_allowed = !security_ib_endport_manage_subnet(
ag->security, dev_name(&ag->device->dev), ag->port_num);
return NOTIFY_OK;
}
@ -708,7 +707,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
return 0;
ret = security_ib_endport_manage_subnet(agent->security,
agent->device->name,
dev_name(&agent->device->dev),
agent->port_num);
if (ret)
return ret;

View File

@ -512,7 +512,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
40 + offset / 8, sizeof(data));
if (ret < 0)
return sprintf(buf, "N/A (no PMA)\n");
return ret;
switch (width) {
case 4:
@ -1036,7 +1036,7 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
device->ports_parent,
device->ports_kobj,
"%d", port_num);
if (ret) {
kfree(p);
@ -1057,10 +1057,12 @@ static int add_port(struct ib_device *device, int port_num,
goto err_put;
}
p->pma_table = get_counter_table(device, port_num);
ret = sysfs_create_group(&p->kobj, p->pma_table);
if (ret)
goto err_put_gid_attrs;
if (device->process_mad) {
p->pma_table = get_counter_table(device, port_num);
ret = sysfs_create_group(&p->kobj, p->pma_table);
if (ret)
goto err_put_gid_attrs;
}
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@ -1118,9 +1120,9 @@ static int add_port(struct ib_device *device, int port_num,
}
/*
* If port == 0, it means we have only one port and the parent
* device, not this port device, should be the holder of the
* hw_counters
* If port == 0, it means hw_counters are per device and not per
* port, so holder should be device. Therefore skip per port conunter
* initialization.
*/
if (device->alloc_hw_stats && port_num)
setup_hw_stats(device, p, port_num);
@ -1173,7 +1175,8 @@ err_free_gid:
p->gid_group.attrs = NULL;
err_remove_pma:
sysfs_remove_group(&p->kobj, p->pma_table);
if (p->pma_table)
sysfs_remove_group(&p->kobj, p->pma_table);
err_put_gid_attrs:
kobject_put(&p->gid_attr_group->kobj);
@ -1183,7 +1186,7 @@ err_put:
return ret;
}
static ssize_t show_node_type(struct device *device,
static ssize_t node_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@ -1198,8 +1201,9 @@ static ssize_t show_node_type(struct device *device,
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
static DEVICE_ATTR_RO(node_type);
static ssize_t show_sys_image_guid(struct device *device,
static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@ -1210,8 +1214,9 @@ static ssize_t show_sys_image_guid(struct device *device,
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
}
static DEVICE_ATTR_RO(sys_image_guid);
static ssize_t show_node_guid(struct device *device,
static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@ -1222,8 +1227,9 @@ static ssize_t show_node_guid(struct device *device,
be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
}
static DEVICE_ATTR_RO(node_guid);
static ssize_t show_node_desc(struct device *device,
static ssize_t node_desc_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@ -1231,9 +1237,9 @@ static ssize_t show_node_desc(struct device *device,
return sprintf(buf, "%.64s\n", dev->node_desc);
}
static ssize_t set_node_desc(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
static ssize_t node_desc_store(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device_modify desc = {};
@ -1249,8 +1255,9 @@ static ssize_t set_node_desc(struct device *device,
return count;
}
static DEVICE_ATTR_RW(node_desc);
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@ -1259,19 +1266,19 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
return strlen(buf);
}
static DEVICE_ATTR_RO(fw_ver);
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static struct attribute *ib_dev_attrs[] = {
&dev_attr_node_type.attr,
&dev_attr_node_guid.attr,
&dev_attr_sys_image_guid.attr,
&dev_attr_fw_ver.attr,
&dev_attr_node_desc.attr,
NULL,
};
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
&dev_attr_node_desc,
&dev_attr_fw_ver,
static const struct attribute_group dev_attr_group = {
.attrs = ib_dev_attrs,
};
static void free_port_list_attributes(struct ib_device *device)
@ -1285,7 +1292,9 @@ static void free_port_list_attributes(struct ib_device *device)
kfree(port->hw_stats);
free_hsag(&port->kobj, port->hw_stats_ag);
}
sysfs_remove_group(p, port->pma_table);
if (port->pma_table)
sysfs_remove_group(p, port->pma_table);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
sysfs_remove_group(&port->gid_attr_group->kobj,
@ -1296,7 +1305,7 @@ static void free_port_list_attributes(struct ib_device *device)
kobject_put(p);
}
kobject_put(device->ports_parent);
kobject_put(device->ports_kobj);
}
int ib_device_register_sysfs(struct ib_device *device,
@ -1307,23 +1316,15 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
device->groups[0] = &dev_attr_group;
class_dev->groups = device->groups;
ret = device_add(class_dev);
if (ret)
goto err;
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
ret = device_create_file(class_dev, ib_class_attributes[i]);
if (ret)
goto err_unregister;
}
device->ports_parent = kobject_create_and_add("ports",
&class_dev->kobj);
if (!device->ports_parent) {
device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj);
if (!device->ports_kobj) {
ret = -ENOMEM;
goto err_put;
}
@ -1347,20 +1348,15 @@ int ib_device_register_sysfs(struct ib_device *device,
err_put:
free_port_list_attributes(device);
err_unregister:
device_del(class_dev);
err:
return ret;
}
void ib_device_unregister_sysfs(struct ib_device *device)
{
int i;
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
/* Hold device until ib_dealloc_device() */
get_device(&device->dev);
free_port_list_attributes(device);
@ -1369,8 +1365,5 @@ void ib_device_unregister_sysfs(struct ib_device *device)
free_hsag(&device->dev.kobj, device->hw_stats_ag);
}
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
device_remove_file(&device->dev, ib_class_attributes[i]);
device_unregister(&device->dev);
}

View File

@ -85,7 +85,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct page **page_list;
struct vm_area_struct **vma_list;
unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
struct mm_struct *mm;
unsigned long npages;
int ret;
int i;
@ -107,25 +109,32 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
umem = kzalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
if (access & IB_ACCESS_ON_DEMAND) {
umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->is_odp = 1;
} else {
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
}
umem->context = context;
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
if (access & IB_ACCESS_ON_DEMAND) {
ret = ib_umem_odp_get(context, umem, access);
ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
if (ret)
goto umem_kfree;
return umem;
}
umem->odp_data = NULL;
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
@ -144,25 +153,25 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->hugetlb = 0;
npages = ib_umem_num_pages(umem);
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
goto out;
}
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
current->mm->pinned_vm += npages;
if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
up_write(&current->mm->mmap_sem);
down_write(&mm->mmap_sem);
if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
(new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
up_write(&mm->mmap_sem);
ret = -ENOMEM;
goto vma;
goto out;
}
up_write(&current->mm->mmap_sem);
mm->pinned_vm = new_pinned;
up_write(&mm->mmap_sem);
cur_base = addr & PAGE_MASK;
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
goto vma;
}
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
goto vma;
@ -172,14 +181,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
sg_list_start = umem->sg_head.sgl;
down_read(&current->mm->mmap_sem);
while (npages) {
down_read(&mm->mmap_sem);
ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list);
if (ret < 0) {
up_read(&current->mm->mmap_sem);
up_read(&mm->mmap_sem);
goto umem_release;
}
@ -187,17 +196,20 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base += ret * PAGE_SIZE;
npages -= ret;
/* Continue to hold the mmap_sem as vma_list access
* needs to be protected.
*/
for_each_sg(sg_list_start, sg, ret, i) {
if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
umem->hugetlb = 0;
sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
}
up_read(&mm->mmap_sem);
/* preparing for next loop */
sg_list_start = sg;
}
up_read(&current->mm->mmap_sem);
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
@ -216,29 +228,40 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem_release:
__ib_umem_release(context->device, umem, 0);
vma:
down_write(&current->mm->mmap_sem);
current->mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&current->mm->mmap_sem);
down_write(&mm->mmap_sem);
mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&mm->mmap_sem);
out:
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
umem_kfree:
if (ret)
if (ret) {
mmdrop(umem->owning_mm);
kfree(umem);
}
return ret ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
static void ib_umem_account(struct work_struct *work)
static void __ib_umem_release_tail(struct ib_umem *umem)
{
mmdrop(umem->owning_mm);
if (umem->is_odp)
kfree(to_ib_umem_odp(umem));
else
kfree(umem);
}
static void ib_umem_release_defer(struct work_struct *work)
{
struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&umem->mm->mmap_sem);
umem->mm->pinned_vm -= umem->diff;
up_write(&umem->mm->mmap_sem);
mmput(umem->mm);
kfree(umem);
down_write(&umem->owning_mm->mmap_sem);
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&umem->owning_mm->mmap_sem);
__ib_umem_release_tail(umem);
}
/**
@ -248,52 +271,36 @@ static void ib_umem_account(struct work_struct *work)
void ib_umem_release(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
struct task_struct *task;
unsigned long diff;
if (umem->odp_data) {
ib_umem_odp_release(umem);
if (umem->is_odp) {
ib_umem_odp_release(to_ib_umem_odp(umem));
__ib_umem_release_tail(umem);
return;
}
__ib_umem_release(umem->context->device, umem, 1);
task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
diff = ib_umem_num_pages(umem);
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue.
* we defer the vm_locked accounting a workqueue.
*/
if (context->closing) {
if (!down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_account);
umem->mm = mm;
umem->diff = diff;
if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_release_defer);
queue_work(ib_wq, &umem->work);
return;
}
} else
down_write(&mm->mmap_sem);
} else {
down_write(&umem->owning_mm->mmap_sem);
}
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&umem->owning_mm->mmap_sem);
mm->pinned_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
out:
kfree(umem);
__ib_umem_release_tail(umem);
}
EXPORT_SYMBOL(ib_umem_release);
@ -303,7 +310,7 @@ int ib_umem_page_count(struct ib_umem *umem)
int n;
struct scatterlist *sg;
if (umem->odp_data)
if (umem->is_odp)
return ib_umem_num_pages(umem);
n = 0;

View File

@ -58,7 +58,7 @@ static u64 node_start(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
return ib_umem_start(umem_odp->umem);
return ib_umem_start(&umem_odp->umem);
}
/* Note that the representation of the intervals in the interval tree
@ -71,140 +71,86 @@ static u64 node_last(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
return ib_umem_end(umem_odp->umem) - 1;
return ib_umem_end(&umem_odp->umem) - 1;
}
INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
node_start, node_last, static, rbt_ib_umem)
static void ib_umem_notifier_start_account(struct ib_umem *item)
static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
{
mutex_lock(&item->odp_data->umem_mutex);
/* Only update private counters for this umem if it has them.
* Otherwise skip it. All page faults will be delayed for this umem. */
if (item->odp_data->mn_counters_active) {
int notifiers_count = item->odp_data->notifiers_count++;
if (notifiers_count == 0)
/* Initialize the completion object for waiting on
* notifiers. Since notifier_count is zero, no one
* should be waiting right now. */
reinit_completion(&item->odp_data->notifier_completion);
}
mutex_unlock(&item->odp_data->umem_mutex);
}
static void ib_umem_notifier_end_account(struct ib_umem *item)
{
mutex_lock(&item->odp_data->umem_mutex);
/* Only update private counters for this umem if it has them.
* Otherwise skip it. All page faults will be delayed for this umem. */
if (item->odp_data->mn_counters_active) {
mutex_lock(&umem_odp->umem_mutex);
if (umem_odp->notifiers_count++ == 0)
/*
* This sequence increase will notify the QP page fault that
* the page that is going to be mapped in the spte could have
* been freed.
* Initialize the completion object for waiting on
* notifiers. Since notifier_count is zero, no one should be
* waiting right now.
*/
++item->odp_data->notifiers_seq;
if (--item->odp_data->notifiers_count == 0)
complete_all(&item->odp_data->notifier_completion);
}
mutex_unlock(&item->odp_data->umem_mutex);
reinit_completion(&umem_odp->notifier_completion);
mutex_unlock(&umem_odp->umem_mutex);
}
/* Account for a new mmu notifier in an ib_ucontext. */
static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
{
atomic_inc(&context->notifier_count);
mutex_lock(&umem_odp->umem_mutex);
/*
* This sequence increase will notify the QP page fault that the page
* that is going to be mapped in the spte could have been freed.
*/
++umem_odp->notifiers_seq;
if (--umem_odp->notifiers_count == 0)
complete_all(&umem_odp->notifier_completion);
mutex_unlock(&umem_odp->umem_mutex);
}
/* Account for a terminating mmu notifier in an ib_ucontext.
*
* Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
* the function takes the semaphore itself. */
static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
u64 start, u64 end, void *cookie)
{
int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
struct ib_umem *umem = &umem_odp->umem;
if (zero_notifiers &&
!list_empty(&context->no_private_counters)) {
/* No currently running mmu notifiers. Now is the chance to
* add private accounting to all previously added umems. */
struct ib_umem_odp *odp_data, *next;
/* Prevent concurrent mmu notifiers from working on the
* no_private_counters list. */
down_write(&context->umem_rwsem);
/* Read the notifier_count again, with the umem_rwsem
* semaphore taken for write. */
if (!atomic_read(&context->notifier_count)) {
list_for_each_entry_safe(odp_data, next,
&context->no_private_counters,
no_private_counters) {
mutex_lock(&odp_data->umem_mutex);
odp_data->mn_counters_active = true;
list_del(&odp_data->no_private_counters);
complete_all(&odp_data->notifier_completion);
mutex_unlock(&odp_data->umem_mutex);
}
}
up_write(&context->umem_rwsem);
}
}
static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
u64 end, void *cookie) {
/*
* Increase the number of notifiers running, to
* prevent any further fault handling on this MR.
*/
ib_umem_notifier_start_account(item);
item->odp_data->dying = 1;
ib_umem_notifier_start_account(umem_odp);
umem_odp->dying = 1;
/* Make sure that the fact the umem is dying is out before we release
* all pending page faults. */
smp_wmb();
complete_all(&item->odp_data->notifier_completion);
item->context->invalidate_range(item, ib_umem_start(item),
ib_umem_end(item));
complete_all(&umem_odp->notifier_completion);
umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
return 0;
}
static void ib_umem_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
if (!context->invalidate_range)
return;
ib_ucontext_notifier_start_account(context);
down_read(&context->umem_rwsem);
rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
ULLONG_MAX,
ib_umem_notifier_release_trampoline,
true,
NULL);
up_read(&context->umem_rwsem);
down_read(&per_mm->umem_rwsem);
if (per_mm->active)
rbt_ib_umem_for_each_in_range(
&per_mm->umem_tree, 0, ULLONG_MAX,
ib_umem_notifier_release_trampoline, true, NULL);
up_read(&per_mm->umem_rwsem);
}
static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie)
{
ib_umem_notifier_start_account(item);
item->context->invalidate_range(item, start, start + PAGE_SIZE);
item->umem.context->invalidate_range(item, start, start + PAGE_SIZE);
ib_umem_notifier_end_account(item);
return 0;
}
static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
u64 end, void *cookie)
static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
u64 start, u64 end, void *cookie)
{
ib_umem_notifier_start_account(item);
item->context->invalidate_range(item, start, end);
item->umem.context->invalidate_range(item, start, end);
return 0;
}
@ -214,28 +160,30 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
unsigned long end,
bool blockable)
{
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
int ret;
if (!context->invalidate_range)
return 0;
struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
if (blockable)
down_read(&context->umem_rwsem);
else if (!down_read_trylock(&context->umem_rwsem))
down_read(&per_mm->umem_rwsem);
else if (!down_read_trylock(&per_mm->umem_rwsem))
return -EAGAIN;
ib_ucontext_notifier_start_account(context);
ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end,
invalidate_range_start_trampoline,
blockable, NULL);
up_read(&context->umem_rwsem);
if (!per_mm->active) {
up_read(&per_mm->umem_rwsem);
/*
* At this point active is permanently set and visible to this
* CPU without a lock, that fact is relied on to skip the unlock
* in range_end.
*/
return 0;
}
return ret;
return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
invalidate_range_start_trampoline,
blockable, NULL);
}
static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie)
{
ib_umem_notifier_end_account(item);
@ -247,22 +195,16 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
unsigned long start,
unsigned long end)
{
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
if (!context->invalidate_range)
if (unlikely(!per_mm->active))
return;
/*
* TODO: we currently bail out if there is any sleepable work to be done
* in ib_umem_notifier_invalidate_range_start so we shouldn't really block
* here. But this is ugly and fragile.
*/
down_read(&context->umem_rwsem);
rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
end,
invalidate_range_end_trampoline, true, NULL);
up_read(&context->umem_rwsem);
ib_ucontext_notifier_end_account(context);
up_read(&per_mm->umem_rwsem);
}
static const struct mmu_notifier_ops ib_umem_notifiers = {
@ -271,31 +213,158 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
unsigned long addr,
size_t size)
static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_umem *umem;
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
struct ib_umem *umem = &umem_odp->umem;
down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
rbt_ib_umem_insert(&umem_odp->interval_tree,
&per_mm->umem_tree);
up_write(&per_mm->umem_rwsem);
}
static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
struct ib_umem *umem = &umem_odp->umem;
down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
rbt_ib_umem_remove(&umem_odp->interval_tree,
&per_mm->umem_tree);
complete_all(&umem_odp->notifier_completion);
up_write(&per_mm->umem_rwsem);
}
static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
struct mm_struct *mm)
{
struct ib_ucontext_per_mm *per_mm;
int ret;
per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
if (!per_mm)
return ERR_PTR(-ENOMEM);
per_mm->context = ctx;
per_mm->mm = mm;
per_mm->umem_tree = RB_ROOT_CACHED;
init_rwsem(&per_mm->umem_rwsem);
per_mm->active = ctx->invalidate_range;
rcu_read_lock();
per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
WARN_ON(mm != current->mm);
per_mm->mn.ops = &ib_umem_notifiers;
ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
if (ret) {
dev_err(&ctx->device->dev,
"Failed to register mmu_notifier %d\n", ret);
goto out_pid;
}
list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
return per_mm;
out_pid:
put_pid(per_mm->tgid);
kfree(per_mm);
return ERR_PTR(ret);
}
static int get_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext *ctx = umem_odp->umem.context;
struct ib_ucontext_per_mm *per_mm;
/*
* Generally speaking we expect only one or two per_mm in this list,
* so no reason to optimize this search today.
*/
mutex_lock(&ctx->per_mm_list_lock);
list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
if (per_mm->mm == umem_odp->umem.owning_mm)
goto found;
}
per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
if (IS_ERR(per_mm)) {
mutex_unlock(&ctx->per_mm_list_lock);
return PTR_ERR(per_mm);
}
found:
umem_odp->per_mm = per_mm;
per_mm->odp_mrs_count++;
mutex_unlock(&ctx->per_mm_list_lock);
return 0;
}
static void free_per_mm(struct rcu_head *rcu)
{
kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
}
void put_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
struct ib_ucontext *ctx = umem_odp->umem.context;
bool need_free;
mutex_lock(&ctx->per_mm_list_lock);
umem_odp->per_mm = NULL;
per_mm->odp_mrs_count--;
need_free = per_mm->odp_mrs_count == 0;
if (need_free)
list_del(&per_mm->ucontext_list);
mutex_unlock(&ctx->per_mm_list_lock);
if (!need_free)
return;
/*
* NOTE! mmu_notifier_unregister() can happen between a start/end
* callback, resulting in an start/end, and thus an unbalanced
* lock. This doesn't really matter to us since we are about to kfree
* the memory that holds the lock, however LOCKDEP doesn't like this.
*/
down_write(&per_mm->umem_rwsem);
per_mm->active = false;
up_write(&per_mm->umem_rwsem);
WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
put_pid(per_mm->tgid);
mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
}
struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
unsigned long addr, size_t size)
{
struct ib_ucontext *ctx = per_mm->context;
struct ib_umem_odp *odp_data;
struct ib_umem *umem;
int pages = size >> PAGE_SHIFT;
int ret;
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data)
return ERR_PTR(-ENOMEM);
umem->context = context;
umem = &odp_data->umem;
umem->context = ctx;
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
umem->writable = 1;
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data) {
ret = -ENOMEM;
goto out_umem;
}
odp_data->umem = umem;
umem->is_odp = 1;
odp_data->per_mm = per_mm;
mutex_init(&odp_data->umem_mutex);
init_completion(&odp_data->notifier_completion);
@ -314,39 +383,34 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
goto out_page_list;
}
down_write(&context->umem_rwsem);
context->odp_mrs_count++;
rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
if (likely(!atomic_read(&context->notifier_count)))
odp_data->mn_counters_active = true;
else
list_add(&odp_data->no_private_counters,
&context->no_private_counters);
up_write(&context->umem_rwsem);
/*
* Caller must ensure that the umem_odp that the per_mm came from
* cannot be freed during the call to ib_alloc_odp_umem.
*/
mutex_lock(&ctx->per_mm_list_lock);
per_mm->odp_mrs_count++;
mutex_unlock(&ctx->per_mm_list_lock);
add_umem_to_per_mm(odp_data);
umem->odp_data = odp_data;
return umem;
return odp_data;
out_page_list:
vfree(odp_data->page_list);
out_odp_data:
kfree(odp_data);
out_umem:
kfree(umem);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_alloc_odp_umem);
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
int access)
int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
{
struct ib_umem *umem = &umem_odp->umem;
/*
* NOTE: This must called in a process context where umem->owning_mm
* == current->mm
*/
struct mm_struct *mm = umem->owning_mm;
int ret_val;
struct pid *our_pid;
struct mm_struct *mm = get_task_mm(current);
if (!mm)
return -EINVAL;
if (access & IB_ACCESS_HUGETLB) {
struct vm_area_struct *vma;
@ -366,111 +430,43 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
umem->hugetlb = 0;
}
/* Prevent creating ODP MRs in child processes */
rcu_read_lock();
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
put_pid(our_pid);
if (context->tgid != our_pid) {
ret_val = -EINVAL;
goto out_mm;
}
mutex_init(&umem_odp->umem_mutex);
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
if (!umem->odp_data) {
ret_val = -ENOMEM;
goto out_mm;
}
umem->odp_data->umem = umem;
mutex_init(&umem->odp_data->umem_mutex);
init_completion(&umem->odp_data->notifier_completion);
init_completion(&umem_odp->notifier_completion);
if (ib_umem_num_pages(umem)) {
umem->odp_data->page_list =
vzalloc(array_size(sizeof(*umem->odp_data->page_list),
umem_odp->page_list =
vzalloc(array_size(sizeof(*umem_odp->page_list),
ib_umem_num_pages(umem)));
if (!umem->odp_data->page_list) {
ret_val = -ENOMEM;
goto out_odp_data;
}
if (!umem_odp->page_list)
return -ENOMEM;
umem->odp_data->dma_list =
vzalloc(array_size(sizeof(*umem->odp_data->dma_list),
umem_odp->dma_list =
vzalloc(array_size(sizeof(*umem_odp->dma_list),
ib_umem_num_pages(umem)));
if (!umem->odp_data->dma_list) {
if (!umem_odp->dma_list) {
ret_val = -ENOMEM;
goto out_page_list;
}
}
/*
* When using MMU notifiers, we will get a
* notification before the "current" task (and MM) is
* destroyed. We use the umem_rwsem semaphore to synchronize.
*/
down_write(&context->umem_rwsem);
context->odp_mrs_count++;
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
rbt_ib_umem_insert(&umem->odp_data->interval_tree,
&context->umem_tree);
if (likely(!atomic_read(&context->notifier_count)) ||
context->odp_mrs_count == 1)
umem->odp_data->mn_counters_active = true;
else
list_add(&umem->odp_data->no_private_counters,
&context->no_private_counters);
downgrade_write(&context->umem_rwsem);
ret_val = get_per_mm(umem_odp);
if (ret_val)
goto out_dma_list;
add_umem_to_per_mm(umem_odp);
if (context->odp_mrs_count == 1) {
/*
* Note that at this point, no MMU notifier is running
* for this context!
*/
atomic_set(&context->notifier_count, 0);
INIT_HLIST_NODE(&context->mn.hlist);
context->mn.ops = &ib_umem_notifiers;
/*
* Lock-dep detects a false positive for mmap_sem vs.
* umem_rwsem, due to not grasping downgrade_write correctly.
*/
lockdep_off();
ret_val = mmu_notifier_register(&context->mn, mm);
lockdep_on();
if (ret_val) {
pr_err("Failed to register mmu_notifier %d\n", ret_val);
ret_val = -EBUSY;
goto out_mutex;
}
}
up_read(&context->umem_rwsem);
/*
* Note that doing an mmput can cause a notifier for the relevant mm.
* If the notifier is called while we hold the umem_rwsem, this will
* cause a deadlock. Therefore, we release the reference only after we
* released the semaphore.
*/
mmput(mm);
return 0;
out_mutex:
up_read(&context->umem_rwsem);
vfree(umem->odp_data->dma_list);
out_dma_list:
vfree(umem_odp->dma_list);
out_page_list:
vfree(umem->odp_data->page_list);
out_odp_data:
kfree(umem->odp_data);
out_mm:
mmput(mm);
vfree(umem_odp->page_list);
return ret_val;
}
void ib_umem_odp_release(struct ib_umem *umem)
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext *context = umem->context;
struct ib_umem *umem = &umem_odp->umem;
/*
* Ensure that no more pages are mapped in the umem.
@ -478,61 +474,13 @@ void ib_umem_odp_release(struct ib_umem *umem)
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
down_write(&context->umem_rwsem);
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
rbt_ib_umem_remove(&umem->odp_data->interval_tree,
&context->umem_tree);
context->odp_mrs_count--;
if (!umem->odp_data->mn_counters_active) {
list_del(&umem->odp_data->no_private_counters);
complete_all(&umem->odp_data->notifier_completion);
}
/*
* Downgrade the lock to a read lock. This ensures that the notifiers
* (who lock the mutex for reading) will be able to finish, and we
* will be able to enventually obtain the mmu notifiers SRCU. Note
* that since we are doing it atomically, no other user could register
* and unregister while we do the check.
*/
downgrade_write(&context->umem_rwsem);
if (!context->odp_mrs_count) {
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(context->tgid,
PIDTYPE_PID);
if (owning_process == NULL)
/*
* The process is already dead, notifier were removed
* already.
*/
goto out;
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL)
/*
* The process' mm is already dead, notifier were
* removed already.
*/
goto out_put_task;
mmu_notifier_unregister(&context->mn, owning_mm);
mmput(owning_mm);
out_put_task:
put_task_struct(owning_process);
}
out:
up_read(&context->umem_rwsem);
vfree(umem->odp_data->dma_list);
vfree(umem->odp_data->page_list);
kfree(umem->odp_data);
kfree(umem);
remove_umem_from_per_mm(umem_odp);
put_per_mm(umem_odp);
vfree(umem_odp->dma_list);
vfree(umem_odp->page_list);
}
/*
@ -544,7 +492,7 @@ out:
* @access_mask: access permissions needed for this page.
* @current_seq: sequence number for synchronization with invalidations.
* the sequence number is taken from
* umem->odp_data->notifiers_seq.
* umem_odp->notifiers_seq.
*
* The function returns -EFAULT if the DMA mapping operation fails. It returns
* -EAGAIN if a concurrent invalidation prevents us from updating the page.
@ -554,12 +502,13 @@ out:
* umem.
*/
static int ib_umem_odp_map_dma_single_page(
struct ib_umem *umem,
struct ib_umem_odp *umem_odp,
int page_index,
struct page *page,
u64 access_mask,
unsigned long current_seq)
{
struct ib_umem *umem = &umem_odp->umem;
struct ib_device *dev = umem->context->device;
dma_addr_t dma_addr;
int stored_page = 0;
@ -571,11 +520,11 @@ static int ib_umem_odp_map_dma_single_page(
* handle case of a racing notifier. This check also allows us to bail
* early if we have a notifier running in parallel with us.
*/
if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
ret = -EAGAIN;
goto out;
}
if (!(umem->odp_data->dma_list[page_index])) {
if (!(umem_odp->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
0, BIT(umem->page_shift),
@ -584,15 +533,15 @@ static int ib_umem_odp_map_dma_single_page(
ret = -EFAULT;
goto out;
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
umem_odp->dma_list[page_index] = dma_addr | access_mask;
umem_odp->page_list[page_index] = page;
umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
} else if (umem_odp->page_list[page_index] == page) {
umem_odp->dma_list[page_index] |= access_mask;
} else {
pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
umem->odp_data->page_list[page_index], page);
umem_odp->page_list[page_index], page);
/* Better remove the mapping now, to prevent any further
* damage. */
remove_existing_mapping = 1;
@ -605,7 +554,7 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
umem,
umem_odp,
ib_umem_start(umem) + (page_index >> umem->page_shift),
ib_umem_start(umem) + ((page_index + 1) >>
umem->page_shift),
@ -621,7 +570,7 @@ out:
*
* Pins the range of pages passed in the argument, and maps them to
* DMA addresses. The DMA addresses of the mapped pages is updated in
* umem->odp_data->dma_list.
* umem_odp->dma_list.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
@ -629,7 +578,7 @@ out:
* the function from completing its task.
* An -ENOENT error code indicates that userspace process is being terminated
* and mm was already destroyed.
* @umem: the umem to map and pin
* @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
* bigger due to alignment, and may also be smaller in case of an error
@ -639,13 +588,15 @@ out:
* range.
* @current_seq: the MMU notifiers sequance value for synchronization with
* invalidations. the sequance number is read from
* umem->odp_data->notifiers_seq before calling this function
* umem_odp->notifiers_seq before calling this function
*/
int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
u64 access_mask, unsigned long current_seq)
int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
u64 bcnt, u64 access_mask,
unsigned long current_seq)
{
struct ib_umem *umem = &umem_odp->umem;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
struct page **local_page_list = NULL;
u64 page_mask, off;
int j, k, ret = 0, start_idx, npages = 0, page_shift;
@ -669,15 +620,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
if (owning_process == NULL) {
/*
* owning_process is allowed to be NULL, this means somehow the mm is
* existing beyond the lifetime of the originating process.. Presumably
* mmget_not_zero will fail in this case.
*/
owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) {
ret = -EINVAL;
goto out_no_task;
}
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
ret = -ENOENT;
goto out_put_task;
}
@ -709,7 +659,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
mutex_lock(&umem->odp_data->umem_mutex);
mutex_lock(&umem_odp->umem_mutex);
for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
if (user_virt & ~page_mask) {
p += PAGE_SIZE;
@ -722,7 +672,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
}
ret = ib_umem_odp_map_dma_single_page(
umem, k, local_page_list[j],
umem_odp, k, local_page_list[j],
access_mask, current_seq);
if (ret < 0)
break;
@ -730,7 +680,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
p = page_to_phys(local_page_list[j]);
k++;
}
mutex_unlock(&umem->odp_data->umem_mutex);
mutex_unlock(&umem_odp->umem_mutex);
if (ret < 0) {
/* Release left over pages when handling errors. */
@ -749,16 +699,17 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
mmput(owning_mm);
out_put_task:
put_task_struct(owning_process);
out_no_task:
if (owning_process)
put_task_struct(owning_process);
free_page((unsigned long)local_page_list);
return ret;
}
EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
struct ib_umem *umem = &umem_odp->umem;
int idx;
u64 addr;
struct ib_device *dev = umem->context->device;
@ -770,12 +721,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem->odp_data->umem_mutex);
mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
if (umem->odp_data->page_list[idx]) {
struct page *page = umem->odp_data->page_list[idx];
dma_addr_t dma = umem->odp_data->dma_list[idx];
if (umem_odp->page_list[idx]) {
struct page *page = umem_odp->page_list[idx];
dma_addr_t dma = umem_odp->dma_list[idx];
dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
WARN_ON(!dma_addr);
@ -798,12 +749,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
/* on demand pinning support */
if (!umem->context->invalidate_range)
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
umem_odp->page_list[idx] = NULL;
umem_odp->dma_list[idx] = 0;
umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);
mutex_unlock(&umem_odp->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
@ -830,7 +781,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
ret_val = cb(umem, start, last, cookie) || ret_val;
}
return ret_val;

View File

@ -138,7 +138,7 @@ static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
static dev_t dynamic_umad_dev;
static dev_t dynamic_issm_dev;
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static DEFINE_IDA(umad_ida);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
@ -1132,7 +1132,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
return sprintf(buf, "%s\n", port->ib_dev->name);
return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@ -1159,11 +1159,10 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
dev_t base_umad;
dev_t base_issm;
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
if (devnum >= IB_UMAD_MAX_PORTS)
devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
if (devnum < 0)
return -1;
port->dev_num = devnum;
set_bit(devnum, dev_map);
if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
@ -1227,7 +1226,7 @@ err_dev:
err_cdev:
cdev_del(&port->cdev);
clear_bit(devnum, dev_map);
ida_free(&umad_ida, devnum);
return -1;
}
@ -1261,7 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
clear_bit(port->dev_num, dev_map);
ida_free(&umad_ida, port->dev_num);
}
static void ib_umad_add_one(struct ib_device *device)

View File

@ -100,13 +100,14 @@ struct ib_uverbs_device {
atomic_t refcount;
int num_comp_vectors;
struct completion comp;
struct device *dev;
struct device dev;
/* First group for device attributes, NULL terminated array */
const struct attribute_group *groups[2];
struct ib_device __rcu *ib_dev;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
struct kobject kobj;
struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
@ -146,7 +147,6 @@ struct ib_uverbs_file {
struct ib_event_handler event_handler;
struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
/*
* To access the uobjects list hw_destroy_rwsem must be held for write
@ -158,6 +158,9 @@ struct ib_uverbs_file {
spinlock_t uobjects_lock;
struct list_head uobjects;
struct mutex umap_lock;
struct list_head umaps;
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
@ -218,12 +221,6 @@ struct ib_ucq_object {
u32 async_events_reported;
};
struct ib_uflow_resources;
struct ib_uflow_object {
struct ib_uobject uobject;
struct ib_uflow_resources *resources;
};
extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,

View File

@ -117,18 +117,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
/* ufile is required when some objects are released */
ucontext->ufile = file;
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
ucontext->closing = 0;
ucontext->closing = false;
ucontext->cleanup_retryable = false;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
ucontext->umem_tree = RB_ROOT_CACHED;
init_rwsem(&ucontext->umem_rwsem);
ucontext->odp_mrs_count = 0;
INIT_LIST_HEAD(&ucontext->no_private_counters);
mutex_init(&ucontext->per_mm_list_lock);
INIT_LIST_HEAD(&ucontext->per_mm_list);
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL;
@ -172,7 +166,6 @@ err_fd:
put_unused_fd(resp.async_fd);
err_free:
put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext);
err_alloc:
@ -2769,16 +2762,7 @@ out_put:
return ret ? ret : in_len;
}
struct ib_uflow_resources {
size_t max;
size_t num;
size_t collection_num;
size_t counters_num;
struct ib_counters **counters;
struct ib_flow_action **collection;
};
static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
{
struct ib_uflow_resources *resources;
@ -2808,6 +2792,7 @@ err:
return NULL;
}
EXPORT_SYMBOL(flow_resources_alloc);
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
{
@ -2826,10 +2811,11 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
kfree(uflow_res->counters);
kfree(uflow_res);
}
EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
static void flow_resources_add(struct ib_uflow_resources *uflow_res,
enum ib_flow_spec_type type,
void *ibobj)
void flow_resources_add(struct ib_uflow_resources *uflow_res,
enum ib_flow_spec_type type,
void *ibobj)
{
WARN_ON(uflow_res->num >= uflow_res->max);
@ -2850,6 +2836,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
uflow_res->num++;
}
EXPORT_SYMBOL(flow_resources_add);
static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
struct ib_uverbs_flow_spec *kern_spec,
@ -3484,7 +3471,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj;
struct ib_uflow_object *uflow;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
@ -3623,13 +3609,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = PTR_ERR(flow_id);
goto err_free;
}
atomic_inc(&qp->usecnt);
flow_id->qp = qp;
flow_id->device = qp->device;
flow_id->uobject = uobj;
uobj->object = flow_id;
uflow = container_of(uobj, typeof(*uflow), uobject);
uflow->resources = uflow_res;
ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;

View File

@ -57,6 +57,7 @@ struct bundle_priv {
struct ib_uverbs_attr *uattrs;
DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
/*
* Must be last. bundle ends in a flex array which overlaps
@ -143,6 +144,86 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
0, uattr->len - len);
}
static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
const struct uverbs_api_attr *attr_uapi,
struct uverbs_objs_arr_attr *attr,
struct ib_uverbs_attr *uattr,
u32 attr_bkey)
{
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
size_t array_len;
u32 *idr_vals;
int ret = 0;
size_t i;
if (uattr->attr_data.reserved)
return -EINVAL;
if (uattr->len % sizeof(u32))
return -EINVAL;
array_len = uattr->len / sizeof(u32);
if (array_len < spec->u2.objs_arr.min_len ||
array_len > spec->u2.objs_arr.max_len)
return -EINVAL;
attr->uobjects =
uverbs_alloc(&pbundle->bundle,
array_size(array_len, sizeof(*attr->uobjects)));
if (IS_ERR(attr->uobjects))
return PTR_ERR(attr->uobjects);
/*
* Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
* to store idrs array and avoid additional memory allocation. The
* idrs array is offset to the end of the uobjects array so we will be
* able to read idr and replace with a pointer.
*/
idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
if (uattr->len > sizeof(uattr->data)) {
ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
uattr->len);
if (ret)
return -EFAULT;
} else {
memcpy(idr_vals, &uattr->data, uattr->len);
}
for (i = 0; i != array_len; i++) {
attr->uobjects[i] = uverbs_get_uobject_from_file(
spec->u2.objs_arr.obj_type, pbundle->bundle.ufile,
spec->u2.objs_arr.access, idr_vals[i]);
if (IS_ERR(attr->uobjects[i])) {
ret = PTR_ERR(attr->uobjects[i]);
break;
}
}
attr->len = i;
__set_bit(attr_bkey, pbundle->spec_finalize);
return ret;
}
static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
struct uverbs_objs_arr_attr *attr,
bool commit)
{
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
int current_ret;
int ret = 0;
size_t i;
for (i = 0; i != attr->len; i++) {
current_ret = uverbs_finalize_object(
attr->uobjects[i], spec->u2.objs_arr.access, commit);
if (!ret)
ret = current_ret;
}
return ret;
}
static int uverbs_process_attr(struct bundle_priv *pbundle,
const struct uverbs_api_attr *attr_uapi,
struct ib_uverbs_attr *uattr, u32 attr_bkey)
@ -246,6 +327,11 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
}
break;
case UVERBS_ATTR_TYPE_IDRS_ARRAY:
return uverbs_process_idrs_array(pbundle, attr_uapi,
&e->objs_arr_attr, uattr,
attr_bkey);
default:
return -EOPNOTSUPP;
}
@ -300,8 +386,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
return -EPROTONOSUPPORT;
return 0;
}
attr = srcu_dereference(
*slot, &pbundle->bundle.ufile->device->disassociate_srcu);
attr = rcu_dereference_protected(*slot, true);
/* Reject duplicate attributes from user-space */
if (test_bit(attr_bkey, pbundle->bundle.attr_present))
@ -384,6 +469,7 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
unsigned int i;
int ret = 0;
/* fast path for simple uobjects */
i = -1;
while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) {
@ -397,6 +483,30 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
ret = current_ret;
}
i = -1;
while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) {
struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
const struct uverbs_api_attr *attr_uapi;
void __rcu **slot;
int current_ret;
slot = uapi_get_attr_for_method(
pbundle,
pbundle->method_key | uapi_bkey_to_key_attr(i));
if (WARN_ON(!slot))
continue;
attr_uapi = rcu_dereference_protected(*slot, true);
if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
current_ret = uverbs_free_idrs_array(
attr_uapi, &attr->objs_arr_attr, commit);
if (!ret)
ret = current_ret;
}
}
for (memblock = pbundle->allocated_mem; memblock;) {
struct bundle_alloc_head *tmp = memblock;
@ -429,7 +539,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
uapi_key_ioctl_method(hdr->method_id));
if (unlikely(!slot))
return -EPROTONOSUPPORT;
method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
method_elm = rcu_dereference_protected(*slot, true);
if (!method_elm->use_stack) {
pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
@ -461,6 +571,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
memset(pbundle->bundle.attr_present, 0,
sizeof(pbundle->bundle.attr_present));
memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
destroy_ret = bundle_destroy(pbundle, ret == 0);
@ -611,3 +722,26 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
return 0;
}
EXPORT_SYMBOL(uverbs_copy_to);
int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, s64 lower_bound, u64 upper_bound,
s64 *def_val)
{
const struct uverbs_attr *attr;
attr = uverbs_attr_get(attrs_bundle, idx);
if (IS_ERR(attr)) {
if ((PTR_ERR(attr) != -ENOENT) || !def_val)
return PTR_ERR(attr);
*to = *def_val;
} else {
*to = attr->ptr_attr.data;
}
if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
return -EINVAL;
return 0;
}
EXPORT_SYMBOL(_uverbs_get_const);

View File

@ -45,6 +45,7 @@
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@ -72,7 +73,7 @@ enum {
static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static DEFINE_IDA(uverbs_ida);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
@ -169,20 +170,16 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
return ret;
}
static void ib_uverbs_release_dev(struct kobject *kobj)
static void ib_uverbs_release_dev(struct device *device)
{
struct ib_uverbs_device *dev =
container_of(kobj, struct ib_uverbs_device, kobj);
container_of(device, struct ib_uverbs_device, dev);
uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu);
kfree(dev);
}
static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
static void ib_uverbs_release_async_event_file(struct kref *ref)
{
struct ib_uverbs_async_event_file *file =
@ -265,7 +262,7 @@ void ib_uverbs_release_file(struct kref *ref)
if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
kobject_put(&file->device->kobj);
put_device(&file->device->dev);
kfree(file);
}
@ -816,6 +813,226 @@ out:
return ret;
}
/*
* Each time we map IO memory into user space this keeps track of the mapping.
* When the device is hot-unplugged we 'zap' the mmaps in user space to point
* to the zero page and allow the hot unplug to proceed.
*
* This is necessary for cases like PCI physical hot unplug as the actual BAR
* memory may vanish after this and access to it from userspace could MCE.
*
* RDMA drivers supporting disassociation must have their user space designed
* to cope in some way with their IO pages going to the zero page.
*/
struct rdma_umap_priv {
struct vm_area_struct *vma;
struct list_head list;
};
static const struct vm_operations_struct rdma_umap_ops;
static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
struct vm_area_struct *vma)
{
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
priv->vma = vma;
vma->vm_private_data = priv;
vma->vm_ops = &rdma_umap_ops;
mutex_lock(&ufile->umap_lock);
list_add(&priv->list, &ufile->umaps);
mutex_unlock(&ufile->umap_lock);
}
/*
* The VMA has been dup'd, initialize the vm_private_data with a new tracking
* struct
*/
static void rdma_umap_open(struct vm_area_struct *vma)
{
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
struct rdma_umap_priv *opriv = vma->vm_private_data;
struct rdma_umap_priv *priv;
if (!opriv)
return;
/* We are racing with disassociation */
if (!down_read_trylock(&ufile->hw_destroy_rwsem))
goto out_zap;
/*
* Disassociation already completed, the VMA should already be zapped.
*/
if (!ufile->ucontext)
goto out_unlock;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
goto out_unlock;
rdma_umap_priv_init(priv, vma);
up_read(&ufile->hw_destroy_rwsem);
return;
out_unlock:
up_read(&ufile->hw_destroy_rwsem);
out_zap:
/*
* We can't allow the VMA to be created with the actual IO pages, that
* would break our API contract, and it can't be stopped at this
* point, so zap it.
*/
vma->vm_private_data = NULL;
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
}
static void rdma_umap_close(struct vm_area_struct *vma)
{
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
struct rdma_umap_priv *priv = vma->vm_private_data;
if (!priv)
return;
/*
* The vma holds a reference on the struct file that created it, which
* in turn means that the ib_uverbs_file is guaranteed to exist at
* this point.
*/
mutex_lock(&ufile->umap_lock);
list_del(&priv->list);
mutex_unlock(&ufile->umap_lock);
kfree(priv);
}
static const struct vm_operations_struct rdma_umap_ops = {
.open = rdma_umap_open,
.close = rdma_umap_close,
};
static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
struct vm_area_struct *vma,
unsigned long size)
{
struct ib_uverbs_file *ufile = ucontext->ufile;
struct rdma_umap_priv *priv;
if (vma->vm_end - vma->vm_start != size)
return ERR_PTR(-EINVAL);
/* Driver is using this wrong, must be called by ib_uverbs_mmap */
if (WARN_ON(!vma->vm_file ||
vma->vm_file->private_data != ufile))
return ERR_PTR(-EINVAL);
lockdep_assert_held(&ufile->device->disassociate_srcu);
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return ERR_PTR(-ENOMEM);
return priv;
}
/*
* Map IO memory into a process. This is to be called by drivers as part of
* their mmap() functions if they wish to send something like PCI-E BAR memory
* to userspace.
*/
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
if (IS_ERR(priv))
return PTR_ERR(priv);
vma->vm_page_prot = prot;
if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
kfree(priv);
return -EAGAIN;
}
rdma_umap_priv_init(priv, vma);
return 0;
}
EXPORT_SYMBOL(rdma_user_mmap_io);
/*
* The page case is here for a slightly different reason, the driver expects
* to be able to free the page it is sharing to user space when it destroys
* its ucontext, which means we need to zap the user space references.
*
* We could handle this differently by providing an API to allocate a shared
* page and then only freeing the shared page when the last ufile is
* destroyed.
*/
int rdma_user_mmap_page(struct ib_ucontext *ucontext,
struct vm_area_struct *vma, struct page *page,
unsigned long size)
{
struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
if (IS_ERR(priv))
return PTR_ERR(priv);
if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
vma->vm_page_prot)) {
kfree(priv);
return -EAGAIN;
}
rdma_umap_priv_init(priv, vma);
return 0;
}
EXPORT_SYMBOL(rdma_user_mmap_page);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
struct rdma_umap_priv *priv, *next_priv;
lockdep_assert_held(&ufile->hw_destroy_rwsem);
while (1) {
struct mm_struct *mm = NULL;
/* Get an arbitrary mm pointer that hasn't been cleaned yet */
mutex_lock(&ufile->umap_lock);
if (!list_empty(&ufile->umaps)) {
mm = list_first_entry(&ufile->umaps,
struct rdma_umap_priv, list)
->vma->vm_mm;
mmget(mm);
}
mutex_unlock(&ufile->umap_lock);
if (!mm)
return;
/*
* The umap_lock is nested under mmap_sem since it used within
* the vma_ops callbacks, so we have to clean the list one mm
* at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal.
*/
down_write(&mm->mmap_sem);
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
struct vm_area_struct *vma = priv->vma;
if (vma->vm_mm != mm)
continue;
list_del_init(&priv->list);
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
mutex_unlock(&ufile->umap_lock);
up_write(&mm->mmap_sem);
mmput(mm);
}
}
/*
* ib_uverbs_open() does not need the BKL:
*
@ -839,6 +1056,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
if (!atomic_inc_not_zero(&dev->refcount))
return -ENXIO;
get_device(&dev->dev);
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
mutex_lock(&dev->lists_mutex);
ib_dev = srcu_dereference(dev->ib_dev,
@ -876,9 +1094,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
spin_lock_init(&file->uobjects_lock);
INIT_LIST_HEAD(&file->uobjects);
init_rwsem(&file->hw_destroy_rwsem);
mutex_init(&file->umap_lock);
INIT_LIST_HEAD(&file->umaps);
filp->private_data = file;
kobject_get(&dev->kobj);
list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
@ -899,6 +1118,7 @@ err:
if (atomic_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
put_device(&dev->dev);
return ret;
}
@ -909,10 +1129,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
list_del(&file->list);
file->is_closed = 1;
}
list_del_init(&file->list);
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
@ -951,37 +1168,34 @@ static struct ib_client uverbs_client = {
.remove = ib_uverbs_remove_one
};
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_uverbs_device *dev =
container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
struct ib_uverbs_device *dev = dev_get_drvdata(device);
struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
ret = sprintf(buf, "%s\n", ib_dev->name);
ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
static DEVICE_ATTR_RO(ibdev);
static ssize_t show_dev_abi_version(struct device *device,
struct device_attribute *attr, char *buf)
static ssize_t abi_version_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_uverbs_device *dev = dev_get_drvdata(device);
struct ib_uverbs_device *dev =
container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
@ -990,7 +1204,17 @@ static ssize_t show_dev_abi_version(struct device *device,
return ret;
}
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static DEVICE_ATTR_RO(abi_version);
static struct attribute *ib_dev_attrs[] = {
&dev_attr_abi_version.attr,
&dev_attr_ibdev.attr,
NULL,
};
static const struct attribute_group dev_attr_group = {
.attrs = ib_dev_attrs,
};
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
@ -1028,65 +1252,56 @@ static void ib_uverbs_add_one(struct ib_device *device)
return;
}
device_initialize(&uverbs_dev->dev);
uverbs_dev->dev.class = uverbs_class;
uverbs_dev->dev.parent = device->dev.parent;
uverbs_dev->dev.release = ib_uverbs_release_dev;
uverbs_dev->groups[0] = &dev_attr_group;
uverbs_dev->dev.groups = uverbs_dev->groups;
atomic_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
if (devnum >= IB_UVERBS_MAX_DEVICES)
devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
GFP_KERNEL);
if (devnum < 0)
goto err;
uverbs_dev->devnum = devnum;
set_bit(devnum, dev_map);
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
else
base = IB_UVERBS_BASE_DEV + devnum;
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
if (ib_uverbs_create_uapi(device, uverbs_dev))
goto err_uapi;
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->dev.devt = base;
dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
cdev_init(&uverbs_dev->cdev,
device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
goto err_cdev;
if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
goto err_class;
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
if (ret)
goto err_uapi;
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
err_class:
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
err_cdev:
cdev_del(&uverbs_dev->cdev);
err_uapi:
clear_bit(devnum, dev_map);
ida_free(&uverbs_ida, devnum);
err:
if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
kobject_put(&uverbs_dev->kobj);
put_device(&uverbs_dev->dev);
return;
}
@ -1107,8 +1322,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
file->is_closed = 1;
list_del(&file->list);
list_del_init(&file->list);
kref_get(&file->ref);
/* We must release the mutex before going ahead and calling
@ -1156,10 +1370,8 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
if (!uverbs_dev)
return;
dev_set_drvdata(uverbs_dev->dev, NULL);
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
cdev_del(&uverbs_dev->cdev);
clear_bit(uverbs_dev->devnum, dev_map);
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
ida_free(&uverbs_ida, uverbs_dev->devnum);
if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
@ -1182,7 +1394,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
kobject_put(&uverbs_dev->kobj);
put_device(&uverbs_dev->dev);
}
static char *uverbs_devnode(struct device *dev, umode_t *mode)

View File

@ -326,11 +326,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
if (IS_ERR(action))
return PTR_ERR(action);
atomic_set(&action->usecnt, 0);
action->device = ib_dev;
action->type = IB_FLOW_ACTION_ESP;
action->uobject = uobj;
uobj->object = action;
uverbs_flow_action_fill_action(action, uobj, ib_dev,
IB_FLOW_ACTION_ESP);
return 0;
}

View File

@ -73,6 +73,18 @@ static int uapi_merge_method(struct uverbs_api *uapi,
if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
method_elm->driver_method |= is_driver;
/*
* Like other uobject based things we only support a single
* uobject being NEW'd or DESTROY'd
*/
if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
u8 access = attr->attr.u2.objs_arr.access;
if (WARN_ON(access == UVERBS_ACCESS_NEW ||
access == UVERBS_ACCESS_DESTROY))
return -EINVAL;
}
attr_slot =
uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
sizeof(*attr_slot));

View File

@ -264,7 +264,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
}
pd->res.type = RDMA_RESTRACK_PD;
pd->res.kern_name = caller;
rdma_restrack_set_task(&pd->res, caller);
rdma_restrack_add(&pd->res);
if (mr_access_flags) {
@ -710,7 +710,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
ah_attr->roce.dmac,
sgid_attr->ndev, &hop_limit);
sgid_attr, &hop_limit);
grh->hop_limit = hop_limit;
return ret;
@ -1509,8 +1509,7 @@ static const struct {
};
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask,
enum rdma_link_layer ll)
enum ib_qp_type type, enum ib_qp_attr_mask mask)
{
enum ib_qp_attr_mask req_param, opt_param;
@ -1629,14 +1628,16 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
__func__, qp->device->name);
dev_warn(&qp->device->dev,
"%s rq_psn overflow, masking to 24 bits\n",
__func__);
attr->rq_psn &= 0xffffff;
}
if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
__func__, qp->device->name);
dev_warn(&qp->device->dev,
" %s sq_psn overflow, masking to 24 bits\n",
__func__);
attr->sq_psn &= 0xffffff;
}
}
@ -1888,7 +1889,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ;
cq->res.kern_name = caller;
rdma_restrack_set_task(&cq->res, caller);
rdma_restrack_add(&cq->res);
}

View File

@ -40,7 +40,6 @@
#ifndef __BNXT_RE_H__
#define __BNXT_RE_H__
#define ROCE_DRV_MODULE_NAME "bnxt_re"
#define ROCE_DRV_MODULE_VERSION "1.0.0"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
#define BNXT_RE_PAGE_SHIFT_4K (12)
@ -120,6 +119,8 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5
#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;

View File

@ -68,6 +68,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_TX_PKTS] = "tx_pkts",
[BNXT_RE_TX_BYTES] = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
[BNXT_RE_RX_DROPS] = "rx_roce_drops",
[BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
@ -106,7 +108,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
[BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
};
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@ -128,6 +131,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
if (bnxt_re_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
stats->value[BNXT_RE_RX_DROPS] =
le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
stats->value[BNXT_RE_RX_DISCARDS] =
le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
stats->value[BNXT_RE_RX_PKTS] =
le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
stats->value[BNXT_RE_RX_BYTES] =
@ -220,6 +227,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
rdev->stats.res_tx_pci_err;
stats->value[BNXT_RE_RES_RX_PCI_ERR] =
rdev->stats.res_rx_pci_err;
stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
rdev->stats.res_oos_drop_count;
}
return ARRAY_SIZE(bnxt_re_stat_name);

View File

@ -51,6 +51,8 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
BNXT_RE_RX_DROPS,
BNXT_RE_RX_DISCARDS,
BNXT_RE_TO_RETRANSMITS,
BNXT_RE_SEQ_ERR_NAKS_RCVD,
BNXT_RE_MAX_RETRY_EXCEEDED,
@ -90,6 +92,7 @@ enum bnxt_re_hw_stats {
BNXT_RE_RES_SRQ_LOAD_ERR,
BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR,
BNXT_RE_OUT_OF_SEQ_ERR,
BNXT_RE_NUM_COUNTERS
};

View File

@ -1598,8 +1598,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
new_qp_state = qp_attr->qp_state;
if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
ib_qp->qp_type, qp_attr_mask,
IB_LINK_LAYER_ETHERNET)) {
ib_qp->qp_type, qp_attr_mask)) {
dev_err(rdev_to_dev(rdev),
"Invalid attribute mask: %#x specified ",
qp_attr_mask);
@ -2664,6 +2663,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
nq->budget++;
atomic_inc(&rdev->cq_count);
spin_lock_init(&cq->cq_lock);
if (context) {
struct bnxt_re_cq_resp resp;

View File

@ -67,7 +67,7 @@
#include "hw_counters.h"
static char version[] =
BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
BNXT_RE_DESC "\n";
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
@ -535,6 +535,34 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
return en_dev;
}
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
}
static DEVICE_ATTR_RO(hca_type);
static struct attribute *bnxt_re_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
NULL
};
static const struct attribute_group bnxt_re_dev_attr_group = {
.attrs = bnxt_re_attributes,
};
static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
{
ib_unregister_device(&rdev->ibdev);
@ -547,7 +575,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
/* ib device init */
ibdev->owner = THIS_MODULE;
ibdev->node_type = RDMA_NODE_IB_CA;
strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
strlen(BNXT_RE_DESC) + 5);
ibdev->phys_port_cnt = 1;
@ -639,34 +666,11 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
return ib_register_device(ibdev, NULL);
return ib_register_device(ibdev, "bnxt_re%d", NULL);
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
}
static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
static struct device_attribute *bnxt_re_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type
};
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
{
dev_put(rdev->netdev);
@ -864,10 +868,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
if (rdev->nq[0].hwq.max_elements) {
for (i = 1; i < rdev->num_msix; i++)
bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
}
for (i = 1; i < rdev->num_msix; i++)
bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
if (rdev->qplib_res.rcfw)
bnxt_qplib_cleanup_res(&rdev->qplib_res);
@ -876,6 +878,7 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
int num_vec_enabled = 0;
bnxt_qplib_init_res(&rdev->qplib_res);
@ -891,9 +894,13 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
"Failed to enable NQ with rc = 0x%x", rc);
goto fail;
}
num_vec_enabled++;
}
return 0;
fail:
for (i = num_vec_enabled; i >= 0; i--)
bnxt_qplib_disable_nq(&rdev->nq[i]);
return rc;
}
@ -925,6 +932,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
int num_vec_created = 0;
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
@ -951,7 +959,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
if (rc) {
dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
i, rc);
goto dealloc_dpi;
goto free_nq;
}
rc = bnxt_re_net_ring_alloc
(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
@ -964,14 +972,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
dev_err(rdev_to_dev(rdev),
"Failed to allocate NQ fw id with rc = 0x%x",
rc);
bnxt_qplib_free_nq(&rdev->nq[i]);
goto free_nq;
}
num_vec_created++;
}
return 0;
free_nq:
for (i = 0; i < rdev->num_msix - 1; i++)
for (i = num_vec_created; i >= 0; i--) {
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
bnxt_qplib_free_nq(&rdev->nq[i]);
dealloc_dpi:
}
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged);
@ -989,12 +1000,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
struct ib_event ib_event;
ib_event.device = ibdev;
if (qp)
if (qp) {
ib_event.element.qp = qp;
else
ib_event.event = event;
if (qp->event_handler)
qp->event_handler(&ib_event, qp->qp_context);
} else {
ib_event.element.port_num = port_num;
ib_event.event = event;
ib_dispatch_event(&ib_event);
ib_event.event = event;
ib_dispatch_event(&ib_event);
}
}
#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
@ -1189,20 +1205,20 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{
int i, rc;
int rc;
if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
device_remove_file(&rdev->ibdev.dev,
bnxt_re_attributes[i]);
/* Cleanup ib dev */
bnxt_re_unregister_ib(rdev);
}
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
cancel_delayed_work(&rdev->worker);
cancel_delayed_work_sync(&rdev->worker);
bnxt_re_cleanup_res(rdev);
bnxt_re_free_res(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
&rdev->flags))
bnxt_re_cleanup_res(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
bnxt_re_free_res(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
@ -1241,7 +1257,7 @@ static void bnxt_re_worker(struct work_struct *work)
static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
{
int i, j, rc;
int rc;
bool locked;
@ -1331,12 +1347,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
pr_err("Failed to allocate resources: %#x\n", rc);
goto fail;
}
set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
rc = bnxt_re_init_res(rdev);
if (rc) {
pr_err("Failed to initialize resources: %#x\n", rc);
goto fail;
}
set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
if (!rdev->is_virtfn) {
rc = bnxt_re_setup_qos(rdev);
if (rc)
@ -1358,20 +1377,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
dev_info(rdev_to_dev(rdev), "Device registered successfully");
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
rc = device_create_file(&rdev->ibdev.dev,
bnxt_re_attributes[i]);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to create IB sysfs: %#x", rc);
/* Must clean up all created device files */
for (j = 0; j < i; j++)
device_remove_file(&rdev->ibdev.dev,
bnxt_re_attributes[j]);
bnxt_re_unregister_ib(rdev);
goto fail;
}
}
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);

View File

@ -36,6 +36,8 @@
* Description: Fast Path Operators
*/
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->sq.flushed) {
dev_dbg(&scq->hwq.pdev->dev,
"QPLIB: FP: Adding to SQ Flush list = %p",
qp);
"FP: Adding to SQ Flush list = %p\n", qp);
bnxt_qplib_cancel_phantom_processing(qp);
list_add_tail(&qp->sq_flush, &scq->sqf_head);
qp->sq.flushed = true;
@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->srq) {
if (!qp->rq.flushed) {
dev_dbg(&rcq->hwq.pdev->dev,
"QPLIB: FP: Adding to RQ Flush list = %p",
qp);
"FP: Adding to RQ Flush list = %p\n", qp);
list_add_tail(&qp->rq_flush, &rcq->rqf_head);
qp->rq.flushed = true;
}
@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->sq_hdr_buf) {
rc = -ENOMEM;
dev_err(&res->pdev->dev,
"QPLIB: Failed to create sq_hdr_buf");
"Failed to create sq_hdr_buf\n");
goto fail;
}
}
@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->rq_hdr_buf) {
rc = -ENOMEM;
dev_err(&res->pdev->dev,
"QPLIB: Failed to create rq_hdr_buf");
"Failed to create rq_hdr_buf\n");
goto fail;
}
}
@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_cqne_processed++;
else
dev_warn(&nq->pdev->dev,
"QPLIB: cqn - type 0x%x not handled",
type);
"cqn - type 0x%x not handled\n", type);
spin_unlock_bh(&cq->compl_lock);
break;
}
@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_srqne_processed++;
else
dev_warn(&nq->pdev->dev,
"QPLIB: SRQ event 0x%x not handled",
"SRQ event 0x%x not handled\n",
nqsrqe->event);
break;
}
@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
break;
default:
dev_warn(&nq->pdev->dev,
"QPLIB: nqe with type = 0x%x not handled",
type);
"nqe with type = 0x%x not handled\n", type);
break;
}
raw_cons++;
@ -360,7 +358,8 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
}
/* Make sure the HW is stopped! */
bnxt_qplib_nq_stop_irq(nq, true);
if (nq->requested)
bnxt_qplib_nq_stop_irq(nq, true);
if (nq->bar_reg_iomem)
iounmap(nq->bar_reg_iomem);
@ -396,7 +395,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
rc = irq_set_affinity_hint(nq->vector, &nq->mask);
if (rc) {
dev_warn(&nq->pdev->dev,
"QPLIB: set affinity failed; vector: %d nq_idx: %d\n",
"set affinity failed; vector: %d nq_idx: %d\n",
nq->vector, nq_indx);
}
nq->requested = true;
@ -443,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
if (rc) {
dev_err(&nq->pdev->dev,
"QPLIB: Failed to request irq for nq-idx %d", nq_idx);
"Failed to request irq for nq-idx %d\n", nq_idx);
goto fail;
}
@ -662,8 +661,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
spin_lock(&srq_hwq->lock);
if (srq->start_idx == srq->last_idx) {
dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!",
srq->id);
dev_err(&srq_hwq->pdev->dev,
"FP: SRQ (0x%x) is full!\n", srq->id);
rc = -EINVAL;
spin_unlock(&srq_hwq->lock);
goto done;
@ -1324,7 +1323,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
}
}
if (i == res->sgid_tbl.max)
dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
dev_warn(&res->pdev->dev, "SGID not found??\n");
qp->ah.hop_limit = sb->hop_limit;
qp->ah.traffic_class = sb->traffic_class;
@ -1536,7 +1535,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
if (bnxt_qplib_queue_full(sq)) {
dev_err(&sq->hwq.pdev->dev,
"QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
"prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
sq->q_full_delta);
rc = -ENOMEM;
@ -1561,7 +1560,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
/* Copy the inline data */
if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
dev_warn(&sq->hwq.pdev->dev,
"QPLIB: Inline data length > 96 detected");
"Inline data length > 96 detected\n");
data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
} else {
data_len = wqe->inline_len;
@ -1776,7 +1775,7 @@ done:
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&sq->hwq.pdev->dev,
"QPLIB: FP: Failed to allocate SQ nq_work!");
"FP: Failed to allocate SQ nq_work!\n");
rc = -ENOMEM;
}
}
@ -1815,13 +1814,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
sch_handler = true;
dev_dbg(&rq->hwq.pdev->dev,
"%s Error QP. Scheduling for poll_cq\n",
__func__);
"%s: Error QP. Scheduling for poll_cq\n", __func__);
goto queue_err;
}
if (bnxt_qplib_queue_full(rq)) {
dev_err(&rq->hwq.pdev->dev,
"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
"FP: QP (0x%x) RQ is full!\n", qp->id);
rc = -EINVAL;
goto done;
}
@ -1870,7 +1868,7 @@ queue_err:
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&rq->hwq.pdev->dev,
"QPLIB: FP: Failed to allocate RQ nq_work!");
"FP: Failed to allocate RQ nq_work!\n");
rc = -ENOMEM;
}
}
@ -1932,7 +1930,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
if (!cq->dpi) {
dev_err(&rcfw->pdev->dev,
"QPLIB: FP: CREATE_CQ failed due to NULL DPI");
"FP: CREATE_CQ failed due to NULL DPI\n");
return -EINVAL;
}
req.dpi = cpu_to_le32(cq->dpi->dpi);
@ -1969,6 +1967,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
INIT_LIST_HEAD(&cq->sqf_head);
INIT_LIST_HEAD(&cq->rqf_head);
spin_lock_init(&cq->compl_lock);
spin_lock_init(&cq->flush_lock);
bnxt_qplib_arm_cq_enable(cq);
return 0;
@ -2172,7 +2171,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
* comes back
*/
dev_dbg(&cq->hwq.pdev->dev,
"FP:Got Phantom CQE");
"FP: Got Phantom CQE\n");
sq->condition = false;
sq->single = true;
rc = 0;
@ -2189,7 +2188,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
peek_raw_cq_cons++;
}
dev_err(&cq->hwq.pdev->dev,
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
rc = -EINVAL;
}
@ -2213,7 +2212,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: Process Req qp is NULL");
"FP: Process Req qp is NULL\n");
return -EINVAL;
}
sq = &qp->sq;
@ -2221,16 +2220,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
if (cqe_sq_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process req reported ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
"FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
cqe_sq_cons, sq->hwq.max_elements);
return -EINVAL;
}
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
"%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
/* Require to walk the sq's swq to fabricate CQEs for all previously
@ -2262,9 +2259,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Processed Req ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
"FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
sw_sq_cons, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
@ -2330,12 +2325,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
"%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
@ -2356,9 +2351,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
return -EINVAL;
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process RC ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
"FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@ -2371,9 +2364,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process RC ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
"FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@ -2409,12 +2400,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
"%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
@ -2439,9 +2430,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process UD ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
"FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@ -2454,9 +2443,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process UD ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
"FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@ -2508,13 +2495,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: process_cq Raw/QP1 qp is NULL");
dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
"%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
@ -2543,14 +2529,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
srq = qp->srq;
if (!srq) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: SRQ used but not defined??");
"FP: SRQ used but not defined??\n");
return -EINVAL;
}
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Raw/QP1 ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
"FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@ -2563,9 +2547,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: ix 0x%x exceeded RQ max 0x%x",
"FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@ -2600,14 +2582,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
/* Check the Status */
if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
dev_warn(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Terminal Error status = 0x%x",
"FP: CQ Process Terminal Error status = 0x%x\n",
hwcqe->status);
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process terminal qp is NULL");
"FP: CQ Process terminal qp is NULL\n");
return -EINVAL;
}
@ -2623,16 +2605,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
if (cqe_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process terminal reported ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
"FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
cqe_cons, sq->hwq.max_elements);
goto do_rq;
}
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
"%s: QP in Flush QP = %p\n", __func__, qp);
goto sq_done;
}
@ -2673,16 +2653,14 @@ do_rq:
goto done;
} else if (cqe_cons > rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Processed terminal ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
"FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
cqe_cons, rq->hwq.max_elements);
goto done;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
"%s: QP in Flush QP = %p\n", __func__, qp);
rc = 0;
goto done;
}
@ -2704,7 +2682,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
/* Check the Status */
if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Cutoff Error status = 0x%x",
"FP: CQ Process Cutoff Error status = 0x%x\n",
hwcqe->status);
return -EINVAL;
}
@ -2724,16 +2702,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
spin_lock_irqsave(&cq->flush_lock, flags);
list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
dev_dbg(&cq->hwq.pdev->dev,
"QPLIB: FP: Flushing SQ QP= %p",
qp);
dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
__flush_sq(&qp->sq, qp, &cqe, &budget);
}
list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
dev_dbg(&cq->hwq.pdev->dev,
"QPLIB: FP: Flushing RQ QP= %p",
qp);
dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
__flush_rq(&qp->rq, qp, &cqe, &budget);
}
spin_unlock_irqrestore(&cq->flush_lock, flags);
@ -2801,7 +2775,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
goto exit;
default:
dev_err(&cq->hwq.pdev->dev,
"QPLIB: process_cq unknown type 0x%lx",
"process_cq unknown type 0x%lx\n",
hw_cqe->cqe_type_toggle &
CQ_BASE_CQE_TYPE_MASK);
rc = -EINVAL;
@ -2814,7 +2788,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
* next one
*/
dev_err(&cq->hwq.pdev->dev,
"QPLIB: process_cqe error rc = 0x%x", rc);
"process_cqe error rc = 0x%x\n", rc);
}
raw_cons++;
}

View File

@ -35,6 +35,9 @@
*
* Description: RDMA Controller HW interface
*/
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
dev_err(&rcfw->pdev->dev,
"QPLIB: RCFW not initialized, reject opcode 0x%x",
opcode);
"RCFW not initialized, reject opcode 0x%x\n", opcode);
return -EINVAL;
}
if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
return -EINVAL;
}
@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
*/
spin_lock_irqsave(&cmdq->lock, flags);
if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
spin_unlock_irqrestore(&cmdq->lock, flags);
return -EAGAIN;
}
@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
if (!cmdqe) {
dev_err(&rcfw->pdev->dev,
"QPLIB: RCFW request failed with no cmdqe!");
"RCFW request failed with no cmdqe!\n");
goto done;
}
/* Copy a segment of the req cmd to the cmdq */
@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
/* send failed */
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
cookie, opcode);
return rc;
}
@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
rc = __wait_for_resp(rcfw, cookie);
if (rc) {
/* timed out */
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
return rc;
@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (evnt->status) {
/* failed with status */
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
cookie, opcode, evnt->status);
rc = -EFAULT;
}
@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
qp_id = le32_to_cpu(err_event->xid);
qp = rcfw->qp_tbl[qp_id].qp_handle;
dev_dbg(&rcfw->pdev->dev,
"QPLIB: Received QP error notification");
"Received QP error notification\n");
dev_dbg(&rcfw->pdev->dev,
"QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
"qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
qp_id, err_event->req_err_state_reason,
err_event->res_err_state_reason);
if (!qp)
@ -309,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
rcfw->aeq_handler(rcfw, qp_event, qp);
break;
default:
/* Command Response */
spin_lock_irqsave(&cmdq->lock, flags);
/*
* Command Response
* cmdq->lock needs to be acquired to synchronie
* the command send and completion reaping. This function
* is always called with creq->lock held. Using
* the nested variant of spin_lock.
*
*/
spin_lock_irqsave_nested(&cmdq->lock, flags,
SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
@ -322,14 +333,16 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
crsqe->resp = NULL;
} else {
dev_err(&rcfw->pdev->dev,
"QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
crsqe->resp ? "mismatch" : "collision",
crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
if (crsqe->resp && crsqe->resp->cookie)
dev_err(&rcfw->pdev->dev,
"CMD %s cookie sent=%#x, recd=%#x\n",
crsqe->resp ? "mismatch" : "collision",
crsqe->resp ? crsqe->resp->cookie : 0,
mcookie);
}
if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
dev_warn(&rcfw->pdev->dev,
"QPLIB: CMD bit %d was not requested", cbit);
"CMD bit %d was not requested\n", cbit);
cmdq->cons += crsqe->req_size;
crsqe->req_size = 0;
@ -376,14 +389,14 @@ static void bnxt_qplib_service_creq(unsigned long data)
(rcfw, (struct creq_func_event *)creqe))
rcfw->creq_func_event_processed++;
else
dev_warn
(&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
type);
dev_warn(&rcfw->pdev->dev,
"aeqe:%#x Not handled\n", type);
break;
default:
dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
dev_warn(&rcfw->pdev->dev,
"QPLIB: op_event = 0x%x not handled", type);
if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
dev_warn(&rcfw->pdev->dev,
"creqe with event 0x%x not handled\n",
type);
break;
}
raw_cons++;
@ -551,7 +564,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_L2_CMPL)) {
dev_err(&rcfw->pdev->dev,
"QPLIB: HW channel CREQ allocation failed");
"HW channel CREQ allocation failed\n");
goto fail;
}
rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
@ -560,7 +573,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_CTX)) {
dev_err(&rcfw->pdev->dev,
"QPLIB: HW channel CMDQ allocation failed");
"HW channel CMDQ allocation failed\n");
goto fail;
}
@ -605,21 +618,18 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
bnxt_qplib_rcfw_stop_irq(rcfw, true);
if (rcfw->cmdq_bar_reg_iomem)
iounmap(rcfw->cmdq_bar_reg_iomem);
rcfw->cmdq_bar_reg_iomem = NULL;
if (rcfw->creq_bar_reg_iomem)
iounmap(rcfw->creq_bar_reg_iomem);
rcfw->creq_bar_reg_iomem = NULL;
iounmap(rcfw->cmdq_bar_reg_iomem);
iounmap(rcfw->creq_bar_reg_iomem);
indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
if (indx != rcfw->bmap_size)
dev_err(&rcfw->pdev->dev,
"QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
"disabling RCFW with pending cmd-bit %lx\n", indx);
kfree(rcfw->cmdq_bitmap);
rcfw->bmap_size = 0;
rcfw->cmdq_bar_reg_iomem = NULL;
rcfw->creq_bar_reg_iomem = NULL;
rcfw->aeq_handler = NULL;
rcfw->vector = 0;
}
@ -681,8 +691,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
RCFW_COMM_BASE_OFFSET,
RCFW_COMM_SIZE);
if (!rcfw->cmdq_bar_reg_iomem) {
dev_err(&rcfw->pdev->dev,
"QPLIB: CMDQ BAR region %d mapping failed",
dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
rcfw->cmdq_bar_reg);
return -ENOMEM;
}
@ -697,14 +706,15 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
if (!res_base)
dev_err(&rcfw->pdev->dev,
"QPLIB: CREQ BAR region %d resc start is 0!",
"CREQ BAR region %d resc start is 0!\n",
rcfw->creq_bar_reg);
rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
4);
if (!rcfw->creq_bar_reg_iomem) {
dev_err(&rcfw->pdev->dev,
"QPLIB: CREQ BAR region %d mapping failed",
dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
rcfw->creq_bar_reg);
iounmap(rcfw->cmdq_bar_reg_iomem);
rcfw->cmdq_bar_reg_iomem = NULL;
return -ENOMEM;
}
rcfw->creq_qp_event_processed = 0;
@ -717,7 +727,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
if (rc) {
dev_err(&rcfw->pdev->dev,
"QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
"Failed to request IRQ for CREQ rc = 0x%x\n", rc);
bnxt_qplib_disable_rcfw_channel(rcfw);
return rc;
}

View File

@ -154,6 +154,8 @@ struct bnxt_qplib_qp_node {
void *qp_handle; /* ptr to qplib_qp */
};
#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
/* RCFW Communication Channels */
struct bnxt_qplib_rcfw {
struct pci_dev *pdev;
@ -190,6 +192,8 @@ struct bnxt_qplib_rcfw {
struct bnxt_qplib_crsq *crsqe_tbl;
int qp_tbl_size;
struct bnxt_qplib_qp_node *qp_tbl;
u64 oos_prev;
u32 init_oos_stats;
};
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);

View File

@ -36,6 +36,8 @@
* Description: QPLib resource manager
*/
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
pbl->pg_map_arr[i]);
else
dev_warn(&pdev->dev,
"QPLIB: PBL free pg_arr[%d] empty?!",
i);
"PBL free pg_arr[%d] empty?!\n", i);
pbl->pg_arr[i] = NULL;
}
}
@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
struct bnxt_qplib_pkey_tbl *pkey_tbl)
{
if (!pkey_tbl->tbl)
dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
else
kfree(pkey_tbl->tbl);
@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd *pd)
{
if (test_and_set_bit(pd->id, pdt->tbl)) {
dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
pd->id);
return -EINVAL;
}
@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
struct bnxt_qplib_dpi *dpi)
{
if (dpi->dpi >= dpit->max) {
dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
return -EINVAL;
}
if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
dpi->dpi);
return -EINVAL;
}
@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
u32 dbr_len, bytes;
if (dpit->dbr_bar_reg_iomem) {
dev_err(&res->pdev->dev,
"QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
dbr_bar_reg);
return -EALREADY;
}
bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
if (!bar_reg_base) {
dev_err(&res->pdev->dev,
"QPLIB: BAR region %d resc start failed", dbr_bar_reg);
dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
dbr_bar_reg);
return -ENOMEM;
}
dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
dbr_len);
dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
return -ENOMEM;
}
@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
dbr_len);
if (!dpit->dbr_bar_reg_iomem) {
dev_err(&res->pdev->dev,
"QPLIB: FP: DBR BAR region %d mapping failed",
dbr_bar_reg);
"FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
return -ENOMEM;
}
@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma) {
dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
dev_err(&pdev->dev, "Stats DMA allocation failed\n");
return -ENOMEM;
}
return 0;

View File

@ -36,6 +36,8 @@
* Description: Slow Path Operators
*/
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) {
dev_err(&rcfw->pdev->dev,
"QPLIB: SP: QUERY_FUNC alloc side buffer failed");
"SP: QUERY_FUNC alloc side buffer failed\n");
return -ENOMEM;
}
@ -135,8 +137,16 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->max_srq = le16_to_cpu(sb->max_srq);
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge;
/* Bono only reports 1 PKEY for now, but it can support > 1 */
attr->max_pkey = le32_to_cpu(sb->max_pkeys);
/*
* Some versions of FW reports more than 0xFFFF.
* Restrict it for now to 0xFFFF to avoid
* reporting trucated value
*/
if (attr->max_pkey > 0xFFFF) {
/* ib_port_attr::pkey_tbl_len is u16 */
attr->max_pkey = 0xFFFF;
}
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
attr->l2_db_size = (sb->l2_db_space_size + 1) *
@ -186,8 +196,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
(void *)&resp,
NULL, 0);
if (rc) {
dev_err(&res->pdev->dev,
"QPLIB: Failed to set function resources");
dev_err(&res->pdev->dev, "Failed to set function resources\n");
}
return rc;
}
@ -199,7 +208,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
{
if (index >= sgid_tbl->max) {
dev_err(&res->pdev->dev,
"QPLIB: Index %d exceeded SGID table max (%d)",
"Index %d exceeded SGID table max (%d)\n",
index, sgid_tbl->max);
return -EINVAL;
}
@ -217,13 +226,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int index;
if (!sgid_tbl) {
dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL;
}
/* Do we need a sgid_lock here? */
if (!sgid_tbl->active) {
dev_err(&res->pdev->dev,
"QPLIB: SGID table has no active entries");
dev_err(&res->pdev->dev, "SGID table has no active entries\n");
return -ENOMEM;
}
for (index = 0; index < sgid_tbl->max; index++) {
@ -231,7 +239,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
break;
}
if (index == sgid_tbl->max) {
dev_warn(&res->pdev->dev, "GID not found in the SGID table");
dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
return 0;
}
/* Remove GID from the SGID table */
@ -244,7 +252,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
if (sgid_tbl->hw_id[index] == 0xFFFF) {
dev_err(&res->pdev->dev,
"QPLIB: GID entry contains an invalid HW id");
"GID entry contains an invalid HW id\n");
return -EINVAL;
}
req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
@ -258,7 +266,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[index] = 0;
sgid_tbl->active--;
dev_dbg(&res->pdev->dev,
"QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
"SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
index, sgid_tbl->hw_id[index], sgid_tbl->active);
sgid_tbl->hw_id[index] = (u16)-1;
@ -277,20 +285,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int i, free_idx;
if (!sgid_tbl) {
dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL;
}
/* Do we need a sgid_lock here? */
if (sgid_tbl->active == sgid_tbl->max) {
dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
dev_err(&res->pdev->dev, "SGID table is full\n");
return -ENOMEM;
}
free_idx = sgid_tbl->max;
for (i = 0; i < sgid_tbl->max; i++) {
if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
dev_dbg(&res->pdev->dev,
"QPLIB: SGID entry already exist in entry %d!",
i);
"SGID entry already exist in entry %d!\n", i);
*index = i;
return -EALREADY;
} else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
@ -301,7 +308,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
if (free_idx == sgid_tbl->max) {
dev_err(&res->pdev->dev,
"QPLIB: SGID table is FULL but count is not MAX??");
"SGID table is FULL but count is not MAX??\n");
return -ENOMEM;
}
if (update) {
@ -348,7 +355,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[free_idx] = 1;
dev_dbg(&res->pdev->dev,
"QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
"SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
*index = free_idx;
@ -404,7 +411,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
}
if (index >= pkey_tbl->max) {
dev_err(&res->pdev->dev,
"QPLIB: Index %d exceeded PKEY table max (%d)",
"Index %d exceeded PKEY table max (%d)\n",
index, pkey_tbl->max);
return -EINVAL;
}
@ -419,14 +426,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
int i, rc = 0;
if (!pkey_tbl) {
dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL;
}
/* Do we need a pkey_lock here? */
if (!pkey_tbl->active) {
dev_err(&res->pdev->dev,
"QPLIB: PKEY table has no active entries");
dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
return -ENOMEM;
}
for (i = 0; i < pkey_tbl->max; i++) {
@ -435,8 +441,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
}
if (i == pkey_tbl->max) {
dev_err(&res->pdev->dev,
"QPLIB: PKEY 0x%04x not found in the pkey table",
*pkey);
"PKEY 0x%04x not found in the pkey table\n", *pkey);
return -ENOMEM;
}
memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
@ -453,13 +458,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
int i, free_idx, rc = 0;
if (!pkey_tbl) {
dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL;
}
/* Do we need a pkey_lock here? */
if (pkey_tbl->active == pkey_tbl->max) {
dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
dev_err(&res->pdev->dev, "PKEY table is full\n");
return -ENOMEM;
}
free_idx = pkey_tbl->max;
@ -471,7 +476,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
}
if (free_idx == pkey_tbl->max) {
dev_err(&res->pdev->dev,
"QPLIB: PKEY table is FULL but count is not MAX??");
"PKEY table is FULL but count is not MAX??\n");
return -ENOMEM;
}
/* Add PKEY to the pkey_tbl */
@ -555,8 +560,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
int rc;
if (mrw->lkey == 0xFFFFFFFF) {
dev_info(&res->pdev->dev,
"QPLIB: SP: Free a reserved lkey MRW");
dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
return 0;
}
@ -666,9 +670,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
pages++;
if (pages > MAX_PBL_LVL_1_PGS) {
dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
dev_err(&res->pdev->dev,
"requested (0x%x) exceeded max (0x%x)",
"SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
pages, MAX_PBL_LVL_1_PGS);
return -ENOMEM;
}
@ -684,7 +687,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
HWQ_TYPE_CTX);
if (rc) {
dev_err(&res->pdev->dev,
"SP: Reg MR memory allocation failed");
"SP: Reg MR memory allocation failed\n");
return -ENOMEM;
}
/* Write to the hwq */
@ -795,7 +798,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) {
dev_err(&rcfw->pdev->dev,
"QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
"SP: QUERY_ROCE_STATS alloc side buffer failed\n");
return -ENOMEM;
}
@ -845,6 +848,16 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
if (!rcfw->init_oos_stats) {
rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
rcfw->init_oos_stats = 1;
} else {
stats->res_oos_drop_count +=
(le64_to_cpu(sb->res_oos_drop_count) -
rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
}
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;

View File

@ -205,6 +205,16 @@ struct bnxt_qplib_roce_stats {
/* res_tx_pci_err is 64 b */
u64 res_rx_pci_err;
/* res_rx_pci_err is 64 b */
u64 res_oos_drop_count;
/* res_oos_drop_count */
u64 active_qp_count_p0;
/* port 0 active qps */
u64 active_qp_count_p1;
/* port 1 active qps */
u64 active_qp_count_p2;
/* port 2 active qps */
u64 active_qp_count_p3;
/* port 3 active qps */
};
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,

View File

@ -2929,6 +2929,11 @@ struct creq_query_roce_stats_resp_sb {
__le64 res_srq_load_err;
__le64 res_tx_pci_err;
__le64 res_rx_pci_err;
__le64 res_oos_drop_count;
__le64 active_qp_count_p0;
__le64 active_qp_count_p1;
__le64 active_qp_count_p2;
__le64 active_qp_count_p3;
};
/* QP error notification event (16 bytes) */

View File

@ -1127,17 +1127,18 @@ static int iwch_query_port(struct ib_device *ibdev,
return 0;
}
static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
char *buf)
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
@ -1148,9 +1149,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
static DEVICE_ATTR_RO(hca_type);
static ssize_t show_board(struct device *dev, struct device_attribute *attr,
char *buf)
static ssize_t board_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
@ -1158,6 +1160,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
}
static DEVICE_ATTR_RO(board_id);
enum counters {
IPINRECEIVES,
@ -1274,14 +1277,15 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return stats->num_counters;
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct attribute *iwch_class_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *iwch_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type,
&dev_attr_board_id,
static const struct attribute_group iwch_attr_group = {
.attrs = iwch_class_attributes,
};
static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
@ -1316,10 +1320,8 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
int i;
pr_debug("%s iwch_dev %p\n", __func__, dev);
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
@ -1402,33 +1404,16 @@ int iwch_register_device(struct iwch_dev *dev)
sizeof(dev->ibdev.iwcm->ifname));
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
ret = ib_register_device(&dev->ibdev, NULL);
rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
if (ret)
goto bail1;
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
ret = device_create_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
if (ret) {
goto bail2;
}
}
return 0;
bail2:
ib_unregister_device(&dev->ibdev);
bail1:
kfree(dev->ibdev.iwcm);
kfree(dev->ibdev.iwcm);
return ret;
}
void iwch_unregister_device(struct iwch_dev *dev)
{
int i;
pr_debug("%s iwch_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm);
return;

View File

@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref)
ep->com.local_addr.ss_family);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
if (ep->mpa_skb)
kfree_skb(ep->mpa_skb);
kfree_skb(ep->mpa_skb);
}
if (!skb_queue_empty(&ep->com.ep_skb_list))
skb_queue_purge(&ep->com.ep_skb_list);

View File

@ -161,7 +161,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->gts = rdev->lldi.gts_reg;
cq->rdev = rdev;
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) {

View File

@ -373,8 +373,8 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
return 0;
}
static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
char *buf)
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@ -382,9 +382,10 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@ -395,9 +396,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
static DEVICE_ATTR_RO(hca_type);
static ssize_t show_board(struct device *dev, struct device_attribute *attr,
char *buf)
static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@ -405,6 +407,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
}
static DEVICE_ATTR_RO(board_id);
enum counters {
IP4INSEGS,
@ -461,14 +464,15 @@ static int c4iw_get_mib(struct ib_device *ibdev,
return stats->num_counters;
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct attribute *c4iw_class_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *c4iw_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type,
&dev_attr_board_id,
static const struct attribute_group c4iw_attr_group = {
.attrs = c4iw_class_attributes,
};
static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
@ -530,12 +534,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
void c4iw_register_device(struct work_struct *work)
{
int ret;
int i;
struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
struct c4iw_dev *dev = ctx->dev;
pr_debug("c4iw_dev %p\n", dev);
strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
@ -626,20 +628,13 @@ void c4iw_register_device(struct work_struct *work)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ret = ib_register_device(&dev->ibdev, NULL);
ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
if (ret)
goto err_kfree_iwcm;
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
ret = device_create_file(&dev->ibdev.dev,
c4iw_class_attributes[i]);
if (ret)
goto err_unregister_device;
}
return;
err_unregister_device:
ib_unregister_device(&dev->ibdev);
err_kfree_iwcm:
kfree(dev->ibdev.iwcm);
err_dealloc_ctx:
@ -651,12 +646,7 @@ err_dealloc_ctx:
void c4iw_unregister_device(struct c4iw_dev *dev)
{
int i;
pr_debug("c4iw_dev %p\n", dev);
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
c4iw_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm);
return;

View File

@ -279,12 +279,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->db = rdev->lldi.db_reg;
wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
CXGB4_BAR2_QTYPE_EGRESS,
&wq->sq.bar2_qid,
user ? &wq->sq.bar2_pa : NULL);
if (need_rq)
wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
T4_BAR2_QTYPE_EGRESS,
CXGB4_BAR2_QTYPE_EGRESS,
&wq->rq.bar2_qid,
user ? &wq->rq.bar2_pa : NULL);
@ -2572,7 +2573,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
memset(wq->queue, 0, wq->memsize);
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
&wq->bar2_qid,
user ? &wq->bar2_pa : NULL);
@ -2813,8 +2814,7 @@ err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
err_free_skb:
if (srq->destroy_skb)
kfree_skb(srq->destroy_skb);
kfree_skb(srq->destroy_skb);
err_free_srq_idx:
c4iw_free_srq_idx(&rhp->rdev, srq->idx);
err_free_wr_wait:

View File

@ -8,12 +8,42 @@
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
eprom.o exp_rcv.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
verbs_txreq.o vnic_main.o vnic_sdma.o
hfi1-y := \
affinity.o \
chip.o \
device.o \
driver.o \
efivar.o \
eprom.o \
exp_rcv.o \
file_ops.o \
firmware.o \
init.o \
intr.o \
iowait.o \
mad.o \
mmu_rb.o \
msix.o \
pcie.o \
pio.o \
pio_copy.o \
platform.o \
qp.o \
qsfp.o \
rc.o \
ruc.o \
sdma.o \
sysfs.o \
trace.o \
uc.o \
ud.o \
user_exp_rcv.o \
user_pages.o \
user_sdma.o \
verbs.o \
verbs_txreq.o \
vnic_main.o \
vnic_sdma.o
ifdef CONFIG_DEBUG_FS
hfi1-y += debugfs.o

View File

@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
set = &entry->def_intr;
cpumask_set_cpu(cpu, &set->mask);
cpumask_set_cpu(cpu, &set->used);
for (i = 0; i < dd->num_msix_entries; i++) {
for (i = 0; i < dd->msix_info.max_requested; i++) {
struct hfi1_msix_entry *other_msix;
other_msix = &dd->msix_entries[i];
other_msix = &dd->msix_info.msix_entries[i];
if (other_msix->type != IRQ_SDMA || other_msix == msix)
continue;

View File

@ -67,8 +67,6 @@
#include "debugfs.h"
#include "fault.h"
#define NUM_IB_PORTS 1
uint kdeth_qp;
module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@ -1100,9 +1098,9 @@ struct err_reg_info {
const char *desc;
};
#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
/*
* Helpers for building HFI and DC error interrupt table entries. Different
@ -8181,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
/**
* is_rcv_urgent_int() - User receive context urgent IRQ handler
* @dd: valid dd
* @source: logical IRQ source (ofse from IS_RCVURGENT_START)
* @source: logical IRQ source (offset from IS_RCVURGENT_START)
*
* RX block receive urgent interrupt. Source is < 160.
*
@ -8231,7 +8229,7 @@ static const struct is_table is_table[] = {
is_sdma_eng_err_name, is_sdma_eng_err_int },
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
is_sendctxt_err_name, is_sendctxt_err_int },
{ IS_SDMA_START, IS_SDMA_END,
{ IS_SDMA_START, IS_SDMA_IDLE_END,
is_sdma_eng_name, is_sdma_eng_int },
{ IS_VARIOUS_START, IS_VARIOUS_END,
is_various_name, is_various_int },
@ -8257,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
/* avoids a double compare by walking the table in-order */
for (entry = &is_table[0]; entry->is_name; entry++) {
if (source < entry->end) {
if (source <= entry->end) {
trace_hfi1_interrupt(dd, entry, source);
entry->is_int(dd, source - entry->start);
return;
@ -8276,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
* context DATA IRQs are threaded and are not supported by this handler.
*
*/
static irqreturn_t general_interrupt(int irq, void *data)
irqreturn_t general_interrupt(int irq, void *data)
{
struct hfi1_devdata *dd = data;
u64 regs[CCE_NUM_INT_CSRS];
@ -8309,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
return handled;
}
static irqreturn_t sdma_interrupt(int irq, void *data)
irqreturn_t sdma_interrupt(int irq, void *data)
{
struct sdma_engine *sde = data;
struct hfi1_devdata *dd = sde->dd;
@ -8401,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/
static irqreturn_t receive_context_interrupt(int irq, void *data)
irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd;
@ -8441,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
* Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked.
*/
static irqreturn_t receive_context_thread(int irq, void *data)
irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
int present;
@ -9651,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
}
}
static void init_qsfp_int(struct hfi1_devdata *dd)
void init_qsfp_int(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd = dd->pport;
u64 qsfp_mask, cce_int_mask;
const int qsfp1_int_smask = QSFP1_INT % 64;
const int qsfp2_int_smask = QSFP2_INT % 64;
/*
* disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
* Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
* therefore just one of QSFP1_INT/QSFP2_INT can be used to find
* the index of the appropriate CSR in the CCEIntMask CSR array
*/
cce_int_mask = read_csr(dd, CCE_INT_MASK +
(8 * (QSFP1_INT / 64)));
if (dd->hfi1_id) {
cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
cce_int_mask);
} else {
cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
cce_int_mask);
}
u64 qsfp_mask;
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */
@ -9691,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask);
/* Enable the appropriate QSFP IRQ source */
if (!dd->hfi1_id)
set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
else
set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
}
/*
@ -10577,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
}
}
/*
* Verify if BCT for data VLs is non-zero.
/**
* data_vls_operational() - Verify if data VL BCT credits and MTU
* are both set.
* @ppd: pointer to hfi1_pportdata structure
*
* Return: true - Ok, false -otherwise.
*/
static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
{
return !!ppd->actual_vls_operational;
int i;
u64 reg;
if (!ppd->actual_vls_operational)
return false;
for (i = 0; i < ppd->vls_supported; i++) {
reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
if ((reg && !ppd->dd->vld[i].mtu) ||
(!reg && ppd->dd->vld[i].mtu))
return false;
}
return true;
}
/*
@ -10695,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (!data_vls_operational(ppd)) {
dd_dev_err(dd,
"%s: data VLs not operational\n", __func__);
"%s: Invalid data VL credits or mtu\n",
__func__);
ret = -EINVAL;
break;
}
@ -11932,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
}
if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
IS_RCVAVAIL_START + rcd->ctxt, true);
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
}
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
}
if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@ -11965,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_URGENT_ENB)
set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
IS_RCVURGENT_START + rcd->ctxt, true);
if (op & HFI1_RCVCTRL_URGENT_DIS)
set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
IS_RCVURGENT_START + rcd->ctxt, false);
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
@ -12963,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
return ret;
}
/**
* get_int_mask - get 64 bit int mask
* @dd - the devdata
* @i - the csr (relative to CCE_INT_MASK)
*
* Returns the mask with the urgent interrupt mask
* bit clear for kernel receive contexts.
*/
static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
{
u64 mask = U64_MAX; /* default to no change */
if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
int j = (i - (IS_RCVURGENT_START / 64)) * 64;
int k = !j ? IS_RCVURGENT_START % 64 : 0;
if (j)
j -= IS_RCVURGENT_START % 64;
/* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
/* convert to bit in mask and clear */
mask &= ~BIT_ULL(k);
}
return mask;
}
/* ========================================================================= */
/*
* Enable/disable chip from delivering interrupts.
/**
* read_mod_write() - Calculate the IRQ register index and set/clear the bits
* @dd: valid devdata
* @src: IRQ source to determine register index from
* @bits: the bits to set or clear
* @set: true == set the bits, false == clear the bits
*
*/
void set_intr_state(struct hfi1_devdata *dd, u32 enable)
static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
bool set)
{
int i;
u64 reg;
u16 idx = src / BITS_PER_REGISTER;
/*
* In HFI, the mask needs to be 1 to allow interrupts.
*/
if (enable) {
/* enable all interrupts but urgent on kernel contexts */
for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
u64 mask = get_int_mask(dd, i);
spin_lock(&dd->irq_src_lock);
reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
if (set)
reg |= bits;
else
reg &= ~bits;
write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
spin_unlock(&dd->irq_src_lock);
}
write_csr(dd, CCE_INT_MASK + (8 * i), mask);
/**
* set_intr_bits() - Enable/disable a range (one or more) IRQ sources
* @dd: valid devdata
* @first: first IRQ source to set/clear
* @last: last IRQ source (inclusive) to set/clear
* @set: true == set the bits, false == clear the bits
*
* If first == last, set the exact source.
*/
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
{
u64 bits = 0;
u64 bit;
u16 src;
if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
return -EINVAL;
if (last < first)
return -ERANGE;
for (src = first; src <= last; src++) {
bit = src % BITS_PER_REGISTER;
/* wrapped to next register? */
if (!bit && bits) {
read_mod_write(dd, src - 1, bits, set);
bits = 0;
}
init_qsfp_int(dd);
} else {
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
bits |= BIT_ULL(bit);
}
read_mod_write(dd, last, bits, set);
return 0;
}
/*
* Clear all interrupt sources on the chip.
*/
static void clear_all_interrupts(struct hfi1_devdata *dd)
void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;
@ -13043,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
}
/**
* hfi1_clean_up_interrupts() - Free all IRQ resources
* @dd: valid device data data structure
*
* Free the MSIx and assoicated PCI resources, if they have been allocated.
*/
void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
{
int i;
struct hfi1_msix_entry *me = dd->msix_entries;
/* remove irqs - must happen before disabling/turning off */
for (i = 0; i < dd->num_msix_entries; i++, me++) {
if (!me->arg) /* => no irq, no affinity */
continue;
hfi1_put_irq_affinity(dd, me);
pci_free_irq(dd->pcidev, i, me->arg);
}
/* clean structures */
kfree(dd->msix_entries);
dd->msix_entries = NULL;
dd->num_msix_entries = 0;
pci_free_irq_vectors(dd->pcidev);
}
/*
* Remap the interrupt source from the general handler to the given MSI-X
* interrupt.
*/
static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
{
u64 reg;
int m, n;
@ -13098,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
write_csr(dd, CCE_INT_MAP + (8 * m), reg);
}
static void remap_sdma_interrupts(struct hfi1_devdata *dd,
int engine, int msix_intr)
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
{
/*
* SDMA engine interrupt sources grouped by type, rather than
@ -13108,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
* SDMAProgress
* SDMAIdle
*/
remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
msix_intr);
remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
msix_intr);
remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
msix_intr);
}
static int request_msix_irqs(struct hfi1_devdata *dd)
{
int first_general, last_general;
int first_sdma, last_sdma;
int first_rx, last_rx;
int i, ret = 0;
/* calculate the ranges we are going to use */
first_general = 0;
last_general = first_general + 1;
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
/* VNIC MSIx interrupts get mapped when VNIC contexts are created */
dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
/*
* Sanity check - the code expects all SDMA chip source
* interrupts to be in the same CSR, starting at bit 0. Verify
* that this is true by checking the bit location of the start.
*/
BUILD_BUG_ON(IS_SDMA_START % 64);
for (i = 0; i < dd->num_msix_entries; i++) {
struct hfi1_msix_entry *me = &dd->msix_entries[i];
const char *err_info;
irq_handler_t handler;
irq_handler_t thread = NULL;
void *arg = NULL;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
char name[MAX_NAME_SIZE];
/* obtain the arguments to pci_request_irq */
if (first_general <= i && i < last_general) {
idx = i - first_general;
handler = general_interrupt;
arg = dd;
snprintf(name, sizeof(name),
DRIVER_NAME "_%d", dd->unit);
err_info = "general";
me->type = IRQ_GENERAL;
} else if (first_sdma <= i && i < last_sdma) {
idx = i - first_sdma;
sde = &dd->per_sdma[idx];
handler = sdma_interrupt;
arg = sde;
snprintf(name, sizeof(name),
DRIVER_NAME "_%d sdma%d", dd->unit, idx);
err_info = "sdma";
remap_sdma_interrupts(dd, idx, i);
me->type = IRQ_SDMA;
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = hfi1_rcd_get_by_index_safe(dd, idx);
if (rcd) {
/*
* Set the interrupt register and mask for this
* context's interrupt.
*/
rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
rcd->imask = ((u64)1) <<
((IS_RCVAVAIL_START + idx) % 64);
handler = receive_context_interrupt;
thread = receive_context_thread;
arg = rcd;
snprintf(name, sizeof(name),
DRIVER_NAME "_%d kctxt%d",
dd->unit, idx);
err_info = "receive context";
remap_intr(dd, IS_RCVAVAIL_START + idx, i);
me->type = IRQ_RCVCTXT;
rcd->msix_intr = i;
hfi1_rcd_put(rcd);
}
} else {
/* not in our expected range - complain, then
* ignore it
*/
dd_dev_err(dd,
"Unexpected extra MSI-X interrupt %d\n", i);
continue;
}
/* no argument, no interrupt */
if (!arg)
continue;
/* make sure the name is terminated */
name[sizeof(name) - 1] = 0;
me->irq = pci_irq_vector(dd->pcidev, i);
ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
name);
if (ret) {
dd_dev_err(dd,
"unable to allocate %s interrupt, irq %d, index %d, err %d\n",
err_info, me->irq, idx, ret);
return ret;
}
/*
* assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me->arg = arg;
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
}
return ret;
}
void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
for (i = 0; i < dd->vnic.num_ctxt; i++) {
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
synchronize_irq(me->irq);
}
}
void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
if (!me->arg) /* => no irq, no affinity */
return;
hfi1_put_irq_affinity(dd, me);
pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
me->arg = NULL;
}
void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
struct hfi1_msix_entry *me;
int idx = rcd->ctxt;
void *arg = rcd;
int ret;
rcd->msix_intr = dd->vnic.msix_idx++;
me = &dd->msix_entries[rcd->msix_intr];
/*
* Set the interrupt register and mask for this
* context's interrupt.
*/
rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
rcd->imask = ((u64)1) <<
((IS_RCVAVAIL_START + idx) % 64);
me->type = IRQ_RCVCTXT;
me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
receive_context_interrupt,
receive_context_thread, arg,
DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
if (ret) {
dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
me->irq, idx, ret);
return;
}
/*
* assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me->arg = arg;
ret = hfi1_get_irq_affinity(dd, me);
if (ret) {
dd_dev_err(dd,
"unable to pin IRQ %d\n", ret);
pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
}
remap_intr(dd, IS_SDMA_START + engine, msix_intr);
remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
}
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
*/
static void reset_interrupts(struct hfi1_devdata *dd)
void reset_interrupts(struct hfi1_devdata *dd)
{
int i;
@ -13318,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
}
/**
* set_up_interrupts() - Initialize the IRQ resources and state
* @dd: valid devdata
*
*/
static int set_up_interrupts(struct hfi1_devdata *dd)
{
u32 total;
int ret, request;
/*
* Interrupt count:
* 1 general, "slow path" interrupt (includes the SDMA engines
* slow source, SDMACleanupDone)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
* V interrupt - one for each VNIC context
*/
total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
/* ask for MSI-X interrupts */
request = request_msix(dd, total);
if (request < 0) {
ret = request;
goto fail;
} else {
dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
GFP_KERNEL);
if (!dd->msix_entries) {
ret = -ENOMEM;
goto fail;
}
/* using MSI-X */
dd->num_msix_entries = total;
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
}
int ret;
/* mask all interrupts */
set_intr_state(dd, 0);
set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
/* clear all pending interrupts */
clear_all_interrupts(dd);
/* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd);
ret = request_msix_irqs(dd);
/* ask for MSI-X interrupts */
ret = msix_initialize(dd);
if (ret)
goto fail;
return ret;
return 0;
ret = msix_request_irqs(dd);
if (ret)
msix_clean_up_interrupts(dd);
fail:
hfi1_clean_up_interrupts(dd);
return ret;
}
@ -14918,20 +14704,16 @@ err_exit:
}
/**
* Allocate and initialize the device structure for the hfi.
* hfi1_init_dd() - Initialize most of the dd structure.
* @dev: the pci_dev for hfi1_ib device
* @ent: pci_device_id struct for this dev
*
* Also allocates, initializes, and returns the devdata struct for this
* device instance
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
const struct pci_device_id *ent)
int hfi1_init_dd(struct hfi1_devdata *dd)
{
struct hfi1_devdata *dd;
struct pci_dev *pdev = dd->pcidev;
struct hfi1_pportdata *ppd;
u64 reg;
int i, ret;
@ -14942,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"Functional simulator"
};
struct pci_dev *parent = pdev->bus->self;
u32 sdma_engines;
u32 sdma_engines = chip_sdma_engines(dd);
dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
sizeof(struct hfi1_pportdata));
if (IS_ERR(dd))
goto bail;
sdma_engines = chip_sdma_engines(dd);
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl;
@ -15127,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
/*
* This should probably occur in hfi1_pcie_init(), but historically
* occurs after the do_pcie_gen3_transition() code.
*/
tune_pcie_caps(dd);
/* start setting dd values and adjusting CSRs */
init_early_variables(dd);
@ -15239,14 +15022,13 @@ bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
hfi1_comp_vectors_clean_up(dd);
hfi1_clean_up_interrupts(dd);
msix_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
dd = ERR_PTR(ret);
bail:
return dd;
return ret;
}
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,

View File

@ -52,9 +52,7 @@
*/
/* sizes */
#define CCE_NUM_MSIX_VECTORS 256
#define CCE_NUM_INT_CSRS 12
#define CCE_NUM_INT_MAP_CSRS 96
#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
#define NUM_INTERRUPT_SOURCES 768
#define RXE_NUM_CONTEXTS 160
#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
@ -161,34 +159,49 @@
(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
/* interrupt source numbers */
#define IS_GENERAL_ERR_START 0
#define IS_SDMAENG_ERR_START 16
#define IS_SENDCTXT_ERR_START 32
#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */
/* Specific IRQ sources */
#define CCE_ERR_INT 0
#define RXE_ERR_INT 1
#define MISC_ERR_INT 2
#define PIO_ERR_INT 4
#define SDMA_ERR_INT 5
#define EGRESS_ERR_INT 6
#define TXE_ERR_INT 7
#define PBC_INT 240
#define GPIO_ASSERT_INT 241
#define QSFP1_INT 242
#define QSFP2_INT 243
#define TCRIT_INT 244
/* interrupt source ranges */
#define IS_FIRST_SOURCE CCE_ERR_INT
#define IS_GENERAL_ERR_START 0
#define IS_SDMAENG_ERR_START 16
#define IS_SENDCTXT_ERR_START 32
#define IS_SDMA_START 192
#define IS_SDMA_PROGRESS_START 208
#define IS_SDMA_IDLE_START 224
#define IS_VARIOUS_START 240
#define IS_DC_START 248
#define IS_RCVAVAIL_START 256
#define IS_RCVURGENT_START 416
#define IS_SENDCREDIT_START 576
#define IS_RESERVED_START 736
#define IS_MAX_SOURCES 768
#define IS_LAST_SOURCE 767
/* derived interrupt source values */
#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START
#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START
#define IS_SENDCTXT_ERR_END IS_SDMA_START
#define IS_SDMA_END IS_VARIOUS_START
#define IS_VARIOUS_END IS_DC_START
#define IS_DC_END IS_RCVAVAIL_START
#define IS_RCVAVAIL_END IS_RCVURGENT_START
#define IS_RCVURGENT_END IS_SENDCREDIT_START
#define IS_SENDCREDIT_END IS_RESERVED_START
#define IS_RESERVED_END IS_MAX_SOURCES
/* absolute interrupt numbers for QSFP1Int and QSFP2Int */
#define QSFP1_INT 242
#define QSFP2_INT 243
#define IS_GENERAL_ERR_END 7
#define IS_SDMAENG_ERR_END 31
#define IS_SENDCTXT_ERR_END 191
#define IS_SDMA_END 207
#define IS_SDMA_PROGRESS_END 223
#define IS_SDMA_IDLE_END 239
#define IS_VARIOUS_END 244
#define IS_DC_END 255
#define IS_RCVAVAIL_END 415
#define IS_RCVURGENT_END 575
#define IS_SENDCREDIT_END 735
#define IS_RESERVED_END IS_LAST_SOURCE
/* DCC_CFG_PORT_CONFIG logical link states */
#define LSTATE_DOWN 0x1
@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
irqreturn_t general_interrupt(int irq, void *data);
irqreturn_t sdma_interrupt(int irq, void *data);
irqreturn_t receive_context_interrupt(int irq, void *data);
irqreturn_t receive_context_thread(int irq, void *data);
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
void init_qsfp_int(struct hfi1_devdata *dd);
void clear_all_interrupts(struct hfi1_devdata *dd);
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
void reset_interrupts(struct hfi1_devdata *dd);
/*
* Interrupt source table.
*

View File

@ -878,6 +878,10 @@
#define SEND_CTRL (TXE + 0x000000000000)
#define SEND_CTRL_CM_RESET_SMASK 0x4ull
#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull

View File

@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
HFI1_RCVCTRL_NO_EGR_DROP_DIS |
HFI1_RCVCTRL_URGENT_DIS, uctxt);
/* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt);
/*
@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/*

View File

@ -80,6 +80,7 @@
#include "qsfp.h"
#include "platform.h"
#include "affinity.h"
#include "msix.h"
/* bumped 1 from s/w major version of TrueScale */
#define HFI1_CHIP_VERS_MAJ 3U
@ -620,6 +621,8 @@ struct rvt_sge_state;
#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
#define HFI1_RCVCTRL_URGENT_ENB 0x40000
#define HFI1_RCVCTRL_URGENT_DIS 0x80000
/* partition enforcement flags */
#define HFI1_PART_ENFORCE_IN 0x1
@ -667,6 +670,14 @@ struct hfi1_msix_entry {
struct irq_affinity_notify notify;
};
struct hfi1_msix_info {
/* lock to synchronize in_use_msix access */
spinlock_t msix_lock;
DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
struct hfi1_msix_entry *msix_entries;
u16 max_requested;
};
/* per-SL CCA information */
struct cca_timer {
struct hrtimer hrtimer;
@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
struct idr vesw_idr;
u8 rmt_start;
u8 num_ctxt;
u32 msix_idx;
};
struct hfi1_vnic_vport_info;
@ -1205,11 +1215,6 @@ struct hfi1_devdata {
struct diag_client *diag_client;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
u32 num_msix_entries;
u32 first_dyn_msix_idx;
/* general interrupt: mask of handled interrupts */
u64 gi_mask[CCE_NUM_INT_CSRS];
@ -1223,6 +1228,9 @@ struct hfi1_devdata {
*/
struct timer_list synth_stats_timer;
/* MSI-X information */
struct hfi1_msix_info msix_info;
/*
* device counters
*/
@ -1349,6 +1357,8 @@ struct hfi1_devdata {
/* vnic data */
struct hfi1_vnic_data vnic;
/* Lock to protect IRQ SRC register access */
spinlock_t irq_src_lock;
};
static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd);
void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
void hfi1_make_ud_req_9B(struct rvt_qp *qp,
@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
const struct pci_device_id *ent);
int hfi1_init_dd(struct hfi1_devdata *dd);
void hfi1_free_devdata(struct hfi1_devdata *dd);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
*/
extern const char ib_hfi1_version[];
extern const struct attribute_group ib_hfi1_attr_group;
int hfi1_device_create(struct hfi1_devdata *dd);
void hfi1_device_remove(struct hfi1_devdata *dd);
@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
int hfi1_pcie_init(struct hfi1_devdata *dd);
void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd);
int request_msix(struct hfi1_devdata *dd, u32 msireq);
int restore_pci_variables(struct hfi1_devdata *dd);
int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
void tune_pcie_caps(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
enum platform_config_table_type_encoding
@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
return base_sdma_integrity;
}
/*
* hfi1_early_err is used (only!) to print early errors before devdata is
* allocated, or when dd->pcidev may not be valid, and at the tail end of
* cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
* the same as dd_dev_err, but is used when the message really needs
* the IB port# to be definitive as to what's happening..
*/
#define hfi1_early_err(dev, fmt, ...) \
dev_err(dev, fmt, ##__VA_ARGS__)
#define hfi1_early_info(dev, fmt, ...) \
dev_info(dev, fmt, ##__VA_ARGS__)
#define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)

View File

@ -83,6 +83,8 @@
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
#define NUM_IB_PORTS 1
/*
* Number of user receive contexts we are configured to use (to allow for more
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
if (loopback) {
hfi1_early_err(&pdev->dev,
"Faking data partition 0x8001 in idx %u\n",
!default_pkey_idx);
dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
!default_pkey_idx);
ppd->pkeys[!default_pkey_idx] = 0x8001;
}
@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
return;
bail:
hfi1_early_err(&pdev->dev,
"Congestion Control Agent disabled for port %d\n", port);
dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
}
/*
@ -832,6 +831,23 @@ wq_error:
return -ENOMEM;
}
/**
* enable_general_intr() - Enable the IRQs that will be handled by the
* general interrupt handler.
* @dd: valid devdata
*
*/
static void enable_general_intr(struct hfi1_devdata *dd)
{
set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
}
/**
* hfi1_init - do the actual initialization sequence on the chip
* @dd: the hfi1_ib device
@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
ret = lastfail;
}
/* enable IRQ */
hfi1_rcd_put(rcd);
}
@ -954,7 +971,8 @@ done:
HFI1_STATUS_INITTED;
if (!ret) {
/* enable all interrupts from the chip */
set_intr_state(dd, 1);
enable_general_intr(dd);
init_qsfp_int(dd);
/* chip is OK for user apps; mark it as initialized */
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
}
dd->flags &= ~HFI1_INITTED;
/* mask and clean up interrupts, but not errors */
set_intr_state(dd, 0);
hfi1_clean_up_interrupts(dd);
/* mask and clean up interrupts */
set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
kobject_put(&dd->kobj);
}
/*
* Allocate our primary per-unit data structure. Must be done via verbs
* allocator, because the verbs cleanup process both does cleanup and
* free of the data structure.
/**
* hfi1_alloc_devdata - Allocate our primary per-unit data structure.
* @pdev: Valid PCI device
* @extra: How many bytes to alloc past the default
*
* Must be done via verbs allocator, because the verbs cleanup process
* both does cleanup and free of the data structure.
* "extra" is for chip-specific data.
*
* Use the idr mechanism to get a unit number for this unit.
*/
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
size_t extra)
{
unsigned long flags;
struct hfi1_devdata *dd;
@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
idr_preload_end();
if (ret < 0) {
hfi1_early_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret);
dev_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->pio_map_lock);
mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
spin_lock_init(&dd->irq_src_lock);
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
idr_init(&hfi1_unit_table);
hfi1_dbg_init();
ret = hfi1_wss_init();
if (ret < 0)
goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
goto bail; /* all OK */
bail_dev:
hfi1_wss_exit();
bail_wss:
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
dev_cleanup();
@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
node_affinity_destroy_all();
hfi1_wss_exit();
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
{
if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
hfi1_early_err(dev, "Receive header queue count too small\n");
dd_dev_err(dd, "Receive header queue count too small\n");
return -EINVAL;
}
if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
hfi1_early_err(dev,
"Receive header queue count cannot be greater than %u\n",
HFI1_MAX_HDRQ_EGRBUF_CNT);
dd_dev_err(dd,
"Receive header queue count cannot be greater than %u\n",
HFI1_MAX_HDRQ_EGRBUF_CNT);
return -EINVAL;
}
if (thecnt % HDRQ_INCREMENT) {
hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
thecnt, HDRQ_INCREMENT);
dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
thecnt, HDRQ_INCREMENT);
return -EINVAL;
}
@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Validate dev ids */
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) {
hfi1_early_err(&pdev->dev,
"Failing on unknown Intel deviceid 0x%x\n",
ent->device);
dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
ent->device);
ret = -ENODEV;
goto bail;
}
/* Allocate the dd so we can get to work */
dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
sizeof(struct hfi1_pportdata));
if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
goto bail;
}
/* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
hfi1_hdrq_entsize);
dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
hfi1_hdrq_entsize);
ret = -EINVAL;
goto bail;
}
@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
clamp_val(eager_buffer_size,
MIN_EAGER_BUFFER * 8,
MAX_EAGER_BUFFER_TOTAL);
hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
eager_buffer_size);
dd_dev_info(dd, "Eager buffer size %u\n",
eager_buffer_size);
} else {
hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
ret = -EINVAL;
goto bail;
}
@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* restrict value of hfi1_rcvarr_split */
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
ret = hfi1_pcie_init(pdev, ent);
ret = hfi1_pcie_init(dd);
if (ret)
goto bail;
@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* Do device-specific initialization, function table setup, dd
* allocation, etc.
*/
dd = hfi1_init_dd(pdev, ent);
if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
ret = hfi1_init_dd(dd);
if (ret)
goto clean_bail; /* error already printed */
}
ret = create_workqueues(dd);
if (ret)
@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
if (initfail || ret) {
hfi1_clean_up_interrupts(dd);
msix_clean_up_interrupts(dd);
stop_timers(dd);
flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {

View File

@ -0,0 +1,94 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#include "iowait.h"
#include "trace_iowait.h"
void iowait_set_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_set(wait, flag);
set_bit(flag, &wait->flags);
}
bool iowait_flag_set(struct iowait *wait, u32 flag)
{
return test_bit(flag, &wait->flags);
}
inline void iowait_clear_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_clear(wait, flag);
clear_bit(flag, &wait->flags);
}
/**
* iowait_init() - initialize wait structure
* @wait: wait struct to initialize
* @tx_limit: limit for overflow queuing
* @func: restart function for workqueue
* @sleep: sleep function for no space
* @resume: wakeup function for no space
*
* This function initializes the iowait
* structure embedded in the QP or PQ.
*
*/
void iowait_init(struct iowait *wait, u32 tx_limit,
void (*func)(struct work_struct *work),
void (*tidfunc)(struct work_struct *work),
int (*sleep)(struct sdma_engine *sde,
struct iowait_work *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
{
int i;
wait->count = 0;
INIT_LIST_HEAD(&wait->list);
init_waitqueue_head(&wait->wait_dma);
init_waitqueue_head(&wait->wait_pio);
atomic_set(&wait->sdma_busy, 0);
atomic_set(&wait->pio_busy, 0);
wait->tx_limit = tx_limit;
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
wait->flags = 0;
for (i = 0; i < IOWAIT_SES; i++) {
wait->wait[i].iow = wait;
INIT_LIST_HEAD(&wait->wait[i].tx_head);
if (i == IOWAIT_IB_SE)
INIT_WORK(&wait->wait[i].iowork, func);
else
INIT_WORK(&wait->wait[i].iowork, tidfunc);
}
}
/**
* iowait_cancel_work - cancel all work in iowait
* @w: the iowait struct
*/
void iowait_cancel_work(struct iowait *w)
{
cancel_work_sync(&iowait_get_ib_work(w)->iowork);
cancel_work_sync(&iowait_get_tid_work(w)->iowork);
}
/**
* iowait_set_work_flag - set work flag based on leg
* @w - the iowait work struct
*/
int iowait_set_work_flag(struct iowait_work *w)
{
if (w == &w->iow->wait[IOWAIT_IB_SE]) {
iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
return IOWAIT_IB_SE;
}
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
return IOWAIT_TID_SE;
}

View File

@ -1,7 +1,7 @@
#ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -49,6 +49,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include "sdma_txreq.h"
@ -59,16 +60,47 @@
*/
typedef void (*restart_t)(struct work_struct *work);
#define IOWAIT_PENDING_IB 0x0
#define IOWAIT_PENDING_TID 0x1
/*
* A QP can have multiple Send Engines (SEs).
*
* The current use case is for supporting a TID RDMA
* packet build/xmit mechanism independent from verbs.
*/
#define IOWAIT_SES 2
#define IOWAIT_IB_SE 0
#define IOWAIT_TID_SE 1
struct sdma_txreq;
struct sdma_engine;
/**
* struct iowait - linkage for delayed progress/waiting
* @iowork: the work struct
* @tx_head: list of prebuilt packets
* @iow: the parent iowait structure
*
* This structure is the work item (process) specific
* details associated with the each of the two SEs of the
* QP.
*
* The workstruct and the queued TXs are unique to each
* SE.
*/
struct iowait;
struct iowait_work {
struct work_struct iowork;
struct list_head tx_head;
struct iowait *iow;
};
/**
* @list: used to add/insert into QP/PQ wait lists
* @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
* @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
* @wait_pio: wait for pio_busy == 0
@ -76,6 +108,8 @@ struct sdma_engine;
* @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
* @flags: wait flags (one per QP)
* @wait: SE array
*
* This is to be embedded in user's state structure
* (QP or PQ).
@ -98,13 +132,11 @@ struct sdma_engine;
* Waiters explicity know that, but the destroy
* code that unwaits QPs does not.
*/
struct iowait {
struct list_head list;
struct list_head tx_head;
int (*sleep)(
struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent
@ -112,7 +144,6 @@ struct iowait {
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
atomic_t sdma_busy;
@ -121,63 +152,37 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
unsigned long flags;
struct iowait_work wait[IOWAIT_SES];
};
#define SDMA_AVAIL_REASON 0
/**
* iowait_init() - initialize wait structure
* @wait: wait struct to initialize
* @tx_limit: limit for overflow queuing
* @func: restart function for workqueue
* @sleep: sleep function for no space
* @resume: wakeup function for no space
*
* This function initializes the iowait
* structure embedded in the QP or PQ.
*
*/
void iowait_set_flag(struct iowait *wait, u32 flag);
bool iowait_flag_set(struct iowait *wait, u32 flag);
void iowait_clear_flag(struct iowait *wait, u32 flag);
static inline void iowait_init(
struct iowait *wait,
u32 tx_limit,
void (*func)(struct work_struct *work),
int (*sleep)(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
{
wait->count = 0;
wait->lock = NULL;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);
init_waitqueue_head(&wait->wait_dma);
init_waitqueue_head(&wait->wait_pio);
atomic_set(&wait->sdma_busy, 0);
atomic_set(&wait->pio_busy, 0);
wait->tx_limit = tx_limit;
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
}
void iowait_init(struct iowait *wait, u32 tx_limit,
void (*func)(struct work_struct *work),
void (*tidfunc)(struct work_struct *work),
int (*sleep)(struct sdma_engine *sde,
struct iowait_work *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait));
/**
* iowait_schedule() - initialize wait structure
* iowait_schedule() - schedule the default send engine work
* @wait: wait struct to schedule
* @wq: workqueue for schedule
* @cpu: cpu
*/
static inline void iowait_schedule(
struct iowait *wait,
struct workqueue_struct *wq,
int cpu)
static inline bool iowait_schedule(struct iowait *wait,
struct workqueue_struct *wq, int cpu)
{
queue_work_on(cpu, wq, &wait->iowork);
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}
/**
@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
*/
static inline int iowait_sdma_dec(struct iowait *wait)
{
if (!wait)
return 0;
return atomic_dec_and_test(&wait->sdma_busy);
}
@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
}
/**
* iowait_sdma_dec - note pio complete
* iowait_pio_dec - note pio complete
* @wait: iowait structure
*/
static inline int iowait_pio_dec(struct iowait *wait)
{
if (!wait)
return 0;
return atomic_dec_and_test(&wait->pio_busy);
}
@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/**
* iowait_get_txhead() - get packet off of iowait list
*
* @wait wait struture
* @wait iowait_work struture
*/
static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{
struct sdma_txreq *tx = NULL;
@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx;
}
static inline u16 iowait_get_desc(struct iowait_work *w)
{
u16 num_desc = 0;
struct sdma_txreq *tx = NULL;
if (!list_empty(&w->tx_head)) {
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
num_desc = tx->num_desc;
}
return num_desc;
}
static inline u32 iowait_get_all_desc(struct iowait *w)
{
u32 num_desc = 0;
num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
return num_desc;
}
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
}
/**
* iowait_packet_queued() - determine if a packet is already built
* @wait: the wait structure
* iowait_packet_queued() - determine if a packet is queued
* @wait: the iowait_work structure
*/
static inline bool iowait_packet_queued(struct iowait *wait)
static inline bool iowait_packet_queued(struct iowait_work *wait)
{
return !list_empty(&wait->tx_head);
}
/**
* inc_wait_count - increment wait counts
* @w: the log work struct
* @n: the count
*/
static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
{
if (!w)
return;
w->iow->tx_count++;
w->iow->count += n;
}
/**
* iowait_get_tid_work - return iowait_work for tid SE
* @w: the iowait struct
*/
static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
{
return &w->wait[IOWAIT_TID_SE];
}
/**
* iowait_get_ib_work - return iowait_work for ib SE
* @w: the iowait struct
*/
static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
{
return &w->wait[IOWAIT_IB_SE];
}
/**
* iowait_ioww_to_iow - return iowait given iowait_work
* @w: the iowait_work struct
*/
static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
{
if (likely(w))
return w->iow;
return NULL;
}
void iowait_cancel_work(struct iowait *w);
int iowait_set_work_flag(struct iowait_work *w);
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015-2017 Intel Corporation.
* Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
int ret;
int pkey_idx;
int local_mad = 0;
u32 resp_len = 0;
u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
struct hfi1_ibport *ibp = to_iport(ibdev, port);
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);

View File

@ -0,0 +1,363 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "hfi.h"
#include "affinity.h"
#include "sdma.h"
/**
* msix_initialize() - Calculate, request and configure MSIx IRQs
* @dd: valid hfi1 devdata
*
*/
int msix_initialize(struct hfi1_devdata *dd)
{
u32 total;
int ret;
struct hfi1_msix_entry *entries;
/*
* MSIx interrupt count:
* one for the general, "slow path" interrupt
* one per used SDMA engine
* one per kernel receive context
* one for each VNIC context
* ...any new IRQs should be added here.
*/
total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
if (total >= CCE_NUM_MSIX_VECTORS)
return -EINVAL;
ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
if (ret < 0) {
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
return ret;
}
entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
GFP_KERNEL);
if (!entries) {
pci_free_irq_vectors(dd->pcidev);
return -ENOMEM;
}
dd->msix_info.msix_entries = entries;
spin_lock_init(&dd->msix_info.msix_lock);
bitmap_zero(dd->msix_info.in_use_msix, total);
dd->msix_info.max_requested = total;
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
return 0;
}
/**
* msix_request_irq() - Allocate a free MSIx IRQ
* @dd: valid devdata
* @arg: context information for the IRQ
* @handler: IRQ handler
* @thread: IRQ thread handler (could be NULL)
* @idx: zero base idx if multiple devices are needed
* @type: affinty IRQ type
*
* Allocated an MSIx vector if available, and then create the appropriate
* meta data needed to keep track of the pci IRQ request.
*
* Return:
* < 0 Error
* >= 0 MSIx vector
*
*/
static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
irq_handler_t handler, irq_handler_t thread,
u32 idx, enum irq_type type)
{
unsigned long nr;
int irq;
int ret;
const char *err_info;
char name[MAX_NAME_SIZE];
struct hfi1_msix_entry *me;
/* Allocate an MSIx vector */
spin_lock(&dd->msix_info.msix_lock);
nr = find_first_zero_bit(dd->msix_info.in_use_msix,
dd->msix_info.max_requested);
if (nr < dd->msix_info.max_requested)
__set_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
if (nr == dd->msix_info.max_requested)
return -ENOSPC;
/* Specific verification and determine the name */
switch (type) {
case IRQ_GENERAL:
/* general interrupt must be MSIx vector 0 */
if (nr) {
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
nr);
return -EINVAL;
}
snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
err_info = "general";
break;
case IRQ_SDMA:
snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
dd->unit, idx);
err_info = "sdma";
break;
case IRQ_RCVCTXT:
snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
dd->unit, idx);
err_info = "receive context";
break;
case IRQ_OTHER:
default:
return -EINVAL;
}
name[sizeof(name) - 1] = 0;
irq = pci_irq_vector(dd->pcidev, nr);
ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
if (ret) {
dd_dev_err(dd,
"%s: request for IRQ %d failed, MSIx %d, err %d\n",
err_info, irq, idx, ret);
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
return ret;
}
/*
* assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me = &dd->msix_info.msix_entries[nr];
me->irq = irq;
me->arg = arg;
me->type = type;
/* This is a request, so a failure is not fatal */
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
return nr;
}
/**
* msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
* @rcd: valid rcd context
*
*/
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
{
int nr;
nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
if (nr < 0)
return nr;
/*
* Set the interrupt register and mask for this
* context's interrupt.
*/
rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
rcd->msix_intr = nr;
remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
return 0;
}
/**
* msix_request_smda_ira() - Helper for getting SDMA IRQ resources
* @sde: valid sdma engine
*
*/
int msix_request_sdma_irq(struct sdma_engine *sde)
{
int nr;
nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
sde->this_idx, IRQ_SDMA);
if (nr < 0)
return nr;
sde->msix_intr = nr;
remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
return 0;
}
/**
* enable_sdma_src() - Helper to enable SDMA IRQ srcs
* @dd: valid devdata structure
* @i: index of SDMA engine
*/
static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
{
set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
IS_SDMA_PROGRESS_START + i, true);
set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
true);
}
/**
* msix_request_irqs() - Allocate all MSIx IRQs
* @dd: valid devdata structure
*
* Helper function to request the used MSIx IRQs.
*
*/
int msix_request_irqs(struct hfi1_devdata *dd)
{
int i;
int ret;
ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
if (ret < 0)
return ret;
for (i = 0; i < dd->num_sdma; i++) {
struct sdma_engine *sde = &dd->per_sdma[i];
ret = msix_request_sdma_irq(sde);
if (ret)
return ret;
enable_sdma_srcs(sde->dd, i);
}
for (i = 0; i < dd->n_krcv_queues; i++) {
struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
if (rcd)
ret = msix_request_rcd_irq(rcd);
hfi1_rcd_put(rcd);
if (ret)
return ret;
}
return 0;
}
/**
* msix_free_irq() - Free the specified MSIx resources and IRQ
* @dd: valid devdata
* @msix_intr: MSIx vector to free.
*
*/
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
{
struct hfi1_msix_entry *me;
if (msix_intr >= dd->msix_info.max_requested)
return;
me = &dd->msix_info.msix_entries[msix_intr];
if (!me->arg) /* => no irq, no affinity */
return;
hfi1_put_irq_affinity(dd, me);
pci_free_irq(dd->pcidev, msix_intr, me->arg);
me->arg = NULL;
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(msix_intr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
}
/**
* hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
* @dd: valid device data data structure
*
* Free the MSIx and associated PCI resources, if they have been allocated.
*/
void msix_clean_up_interrupts(struct hfi1_devdata *dd)
{
int i;
struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
/* remove irqs - must happen before disabling/turning off */
for (i = 0; i < dd->msix_info.max_requested; i++, me++)
msix_free_irq(dd, i);
/* clean structures */
kfree(dd->msix_info.msix_entries);
dd->msix_info.msix_entries = NULL;
dd->msix_info.max_requested = 0;
pci_free_irq_vectors(dd->pcidev);
}
/**
* msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
* @dd: valid devdata
*/
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
for (i = 0; i < dd->vnic.num_ctxt; i++) {
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me;
me = &dd->msix_info.msix_entries[rcd->msix_intr];
synchronize_irq(me->irq);
}
}

View File

@ -0,0 +1,64 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _HFI1_MSIX_H
#define _HFI1_MSIX_H
#include "hfi.h"
/* MSIx interface */
int msix_initialize(struct hfi1_devdata *dd);
int msix_request_irqs(struct hfi1_devdata *dd);
void msix_clean_up_interrupts(struct hfi1_devdata *dd);
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
int msix_request_sdma_irq(struct sdma_engine *sde);
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
/* VNIC interface */
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -60,20 +60,13 @@
* This file contains PCIe utility routines.
*/
/*
* Code to adjust PCIe capabilities.
*/
static void tune_pcie_caps(struct hfi1_devdata *);
/*
* Do all the common PCIe setup and initialization.
* devdata is not yet allocated, and is not allocated until after this
* routine returns success. Therefore dd_dev_err() can't be used for error
* printing.
*/
int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
int hfi1_pcie_init(struct hfi1_devdata *dd)
{
int ret;
struct pci_dev *pdev = dd->pcidev;
ret = pci_enable_device(pdev);
if (ret) {
@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
* about that, it appears. If the original BAR was retained
* in the kernel data structures, this may be OK.
*/
hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n",
-ret);
goto done;
dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
return ret;
}
ret = pci_request_regions(pdev, DRIVER_NAME);
if (ret) {
hfi1_early_err(&pdev->dev,
"pci_request_regions fails: err %d\n", -ret);
dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
goto bail;
}
@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret) {
hfi1_early_err(&pdev->dev,
"Unable to set DMA mask: %d\n", ret);
dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
goto bail;
}
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
}
if (ret) {
hfi1_early_err(&pdev->dev,
"Unable to set DMA consistent mask: %d\n", ret);
dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
goto bail;
}
pci_set_master(pdev);
(void)pci_enable_pcie_error_reporting(pdev);
goto done;
return 0;
bail:
hfi1_pcie_cleanup(pdev);
done:
return ret;
}
@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
dd_dev_err(dd, "WC mapping of send buffers failed\n");
goto nomem;
}
dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
dd->physaddr = addr; /* used for io_remap, etc. */
@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
/*
* Returns:
* - actual number of interrupts allocated or
* - error
*/
int request_msix(struct hfi1_devdata *dd, u32 msireq)
{
int nvec;
nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
if (nvec < 0) {
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
return nvec;
}
tune_pcie_caps(dd);
return nvec;
}
/* restore command and BARs after a reset has wiped them out */
int restore_pci_variables(struct hfi1_devdata *dd)
{
@ -479,14 +447,19 @@ error:
* Check and optionally adjust them to maximize our throughput.
*/
static int hfi1_pcie_caps;
module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
uint aspm_mode = ASPM_MODE_DISABLED;
module_param_named(aspm, aspm_mode, uint, S_IRUGO);
module_param_named(aspm, aspm_mode, uint, 0444);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
static void tune_pcie_caps(struct hfi1_devdata *dd)
/**
* tune_pcie_caps() - Code to adjust PCIe capabilities.
* @dd: Valid device data structure
*
*/
void tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@ -1028,6 +1001,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
const u8 (*ctle_tunings)[4];
uint static_ctle_mode;
int return_error = 0;
u32 target_width;
/* PCIe Gen3 is for the ASIC only */
if (dd->icode != ICODE_RTL_SILICON)
@ -1067,6 +1041,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
return 0;
}
/* Previous Gen1/Gen2 bus width */
target_width = dd->lbus_width;
/*
* Do the Gen3 transition. Steps are those of the PCIe Gen3
* recipe.
@ -1435,11 +1412,12 @@ retry:
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
dd->lbus_info);
if (dd->lbus_speed != target_speed) { /* not target */
if (dd->lbus_speed != target_speed ||
dd->lbus_width < target_width) { /* not target */
/* maybe retry */
do_retry = retry_count < pcie_retry;
dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
pcie_target, do_retry ? ", retrying" : "");
dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
do_retry ? ", retrying" : "");
retry_count++;
if (do_retry) {
msleep(100); /* allow time to settle */

View File

@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
}
}
/* defined in header release 48 and higher */
#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#endif
/* global control of PIO send */
void pio_send_control(struct hfi1_devdata *dd, int op)
{

View File

@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep(
struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *stx,
unsigned int seq,
bool pkts_sent);
@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
};
static void flush_tx_list(struct rvt_qp *qp)
static void flush_list_head(struct list_head *l)
{
struct hfi1_qp_priv *priv = qp->priv;
while (!list_empty(&priv->s_iowait.tx_head)) {
while (!list_empty(l)) {
struct sdma_txreq *tx;
tx = list_first_entry(
&priv->s_iowait.tx_head,
l,
struct sdma_txreq,
list);
list_del_init(&tx->list);
@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
}
}
static void flush_tx_list(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
}
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
}
/**
* hfi1_check_send_wqe - validate wqe
* hfi1_setup_wqe - set up the wqe
* @qp - The qp
* @wqe - The built wqe
* @call_send - Determine if the send should be posted or scheduled.
*
* validate wqe. This is called
* prior to inserting the wqe into
* the ring but after the wqe has been
* setup.
* Perform setup of the wqe. This is called
* prior to inserting the wqe into the ring but after
* the wqe has been setup by RDMAVT. This function
* allows the driver the opportunity to perform
* validation and additional setup of the wqe.
*
* Returns 0 on success, -EINVAL on failure
*
*/
int hfi1_check_send_wqe(struct rvt_qp *qp,
struct rvt_swqe *wqe)
int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah;
struct hfi1_pportdata *ppd;
struct hfi1_devdata *dd;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
if (wqe->length > qp->pmtu)
*call_send = false;
break;
case IB_QPT_SMI:
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
/*
* SM packets should exclusively use VL15 and their SL is
* ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
* is created, SL is 0 in most cases and as a result some
* fields (vl and pmtu) in ah may not be set correctly,
* depending on the SL2SC and SC2VL tables at the time.
*/
ppd = ppd_from_ibp(ibp);
dd = dd_from_ppd(ppd);
if (wqe->length > dd->vld[15].mtu)
return -EINVAL;
break;
case IB_QPT_GSI:
@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
default:
break;
}
return wqe->length <= piothreshold;
return 0;
}
/**
@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
* It is only used in the post send, which doesn't hold
* the s_lock.
*/
void _hfi1_schedule_send(struct rvt_qp *qp)
bool _hfi1_schedule_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp =
@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
priv->s_sde->cpu :
cpumask_first(cpumask_of_node(dd->node)));
return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
priv->s_sde->cpu :
cpumask_first(cpumask_of_node(dd->node)));
}
static void qp_pio_drain(struct rvt_qp *qp)
@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
*
* This schedules qp progress and caller should hold
* the s_lock.
* @return true if the first leg is scheduled;
* false if the first leg is not scheduled.
*/
void hfi1_schedule_send(struct rvt_qp *qp)
bool hfi1_schedule_send(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
if (hfi1_send_ok(qp))
if (hfi1_send_ok(qp)) {
_hfi1_schedule_send(qp);
return true;
}
if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
IOWAIT_PENDING_IB);
return false;
}
static void hfi1_qp_schedule(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
bool ret;
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
ret = hfi1_schedule_send(qp);
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
}
}
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
if (qp->s_flags & flag) {
qp->s_flags &= ~flag;
trace_hfi1_qpwakeup(qp, flag);
hfi1_schedule_send(qp);
hfi1_qp_schedule(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
rvt_put_qp(qp);
}
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
{
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
qp->s_flags &= ~RVT_S_BUSY;
}
static int iowait_sleep(
struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *stx,
uint seq,
bool pkts_sent)
@ -438,7 +483,7 @@ static int iowait_sleep(
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
} else {
@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait,
1,
_hfi1_do_send,
NULL,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
cancel_work_sync(&priv->s_iowait.iowork);
iowait_cancel_work(&priv->s_iowait);
}
void quiesce_qp(struct rvt_qp *qp)

View File

@ -57,18 +57,6 @@ extern unsigned int hfi1_qp_table_size;
extern const struct rvt_operation_params hfi1_post_parms[];
/*
* Send if not busy or waiting for I/O and either
* a RC response is pending or we can process send work requests.
*/
static inline int hfi1_send_ok(struct rvt_qp *qp)
{
return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
(verbs_txreq_queued(qp) ||
(qp->s_flags & RVT_S_RESP_PENDING) ||
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
/*
* Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
*
@ -89,6 +77,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
/*
* Send if not busy or waiting for I/O and either
* a RC response is pending or we can process send work requests.
*/
static inline int hfi1_send_ok(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
(verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
(qp->s_flags & RVT_S_RESP_PENDING) ||
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
/*
* free_ahg - clear ahg from QP
*/
@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
void _hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_schedule_send(struct rvt_qp *qp);
bool _hfi1_schedule_send(struct rvt_qp *qp);
bool hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_migrate_qp(struct rvt_qp *qp);
@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
int mtu_to_path_mtu(u32 mtu);
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
#endif /* _QP_H */

View File

@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
hfi1_send_complete(qp, wqe,
err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
rvt_send_complete(qp, wqe,
err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
goto done_free_tx;
@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
hfi1_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
} else { /* need to handle delayed completion */
@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, status);
rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@ -1644,7 +1644,8 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
rvt_copy_sge(qp, &qp->s_rdma_read_sge,
data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@ -1684,7 +1685,8 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
rvt_copy_sge(qp, &qp->s_rdma_read_sge,
data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@ -1704,7 +1706,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR;
ack_err:
if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, status);
rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
@ -2144,7 +2146,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@ -2200,7 +2202,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))

View File

@ -155,333 +155,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
return 0;
}
/**
* ruc_loopback - handle UC and RC loopback requests
* @sqp: the sending QP
*
* This is called from hfi1_do_send() to
* forward a WQE addressed to the same HFI.
* Note that although we are single threaded due to the send engine, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
static void ruc_loopback(struct rvt_qp *sqp)
{
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct rvt_qp *qp;
struct rvt_swqe *wqe;
struct rvt_sge *sge;
unsigned long flags;
struct ib_wc wc;
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
bool release;
int ret;
bool copy_last = false;
int local_ops = 0;
rcu_read_lock();
/*
* Note that we check the responder QP state after
* checking the requester's state.
*/
qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
sqp->remote_qpn);
spin_lock_irqsave(&sqp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
goto unlock;
sqp->s_flags |= RVT_S_BUSY;
again:
if (sqp->s_last == READ_ONCE(sqp->s_head))
goto clr_busy;
wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work request. */
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
* We increment s_cur to indicate s_last is in progress.
*/
if (sqp->s_last == sqp->s_cur) {
if (++sqp->s_cur >= sqp->s_size)
sqp->s_cur = 0;
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
qp->ibqp.qp_type != sqp->ibqp.qp_type) {
ibp->rvp.n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
*/
if (sqp->ibqp.qp_type == IB_QPT_RC)
send_status = IB_WC_RETRY_EXC_ERR;
else
send_status = IB_WC_SUCCESS;
goto serr;
}
memset(&wc, 0, sizeof(wc));
send_status = IB_WC_SUCCESS;
release = true;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
sqp->s_sge.num_sge = wqe->wr.num_sge;
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_REG_MR:
goto send_comp;
case IB_WR_LOCAL_INV:
if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
if (rvt_invalidate_rkey(sqp,
wqe->wr.ex.invalidate_rkey))
send_status = IB_WC_LOC_PROT_ERR;
local_ops = 1;
}
goto send_comp;
case IB_WR_SEND_WITH_INV:
if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
wc.wc_flags = IB_WC_WITH_INVALIDATE;
wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
}
goto send;
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
send:
ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
/* skip copy_last set and qp_access_flags recheck */
goto do_write;
case IB_WR_RDMA_WRITE:
copy_last = rvt_is_user_qp(qp);
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
do_write:
if (wqe->length == 0)
break;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
wqe->rdma_wr.remote_addr,
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
qp->r_sge.num_sge = 1;
qp->r_sge.total_len = wqe->length;
break;
case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
wqe->rdma_wr.remote_addr,
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = false;
sqp->s_sge.sg_list = NULL;
sqp->s_sge.num_sge = 1;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
qp->r_sge.total_len = wqe->length;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->atomic_wr.remote_addr,
wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
sdata = wqe->atomic_wr.compare_add;
*(u64 *)sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64)atomic64_add_return(sdata, maddr) - sdata :
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
sdata, wqe->atomic_wr.swap);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
default:
send_status = IB_WC_LOC_QP_OP_ERR;
goto serr;
}
sge = &sqp->s_sge.sge;
while (sqp->s_len) {
u32 len = sqp->s_len;
if (len > sge->length)
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
rvt_put_mr(sge->mr);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
sqp->s_len -= len;
}
if (release)
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
ibp->rvp.n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
hfi1_send_complete(sqp, wqe, send_status);
if (local_ops) {
atomic_dec(&sqp->local_ops_pending);
local_ops = 0;
}
goto again;
rnr_nak:
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
ibp->rvp.n_rnr_naks++;
/*
* Note: we don't need the s_lock held since the BUSY flag
* makes this single threaded.
*/
if (sqp->s_rnr_retry == 0) {
send_status = IB_WC_RNR_RETRY_EXC_ERR;
goto serr;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
IB_AETH_CREDIT_SHIFT);
goto clr_busy;
op_err:
send_status = IB_WC_REM_OP_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
acc_err:
send_status = IB_WC_REM_ACCESS_ERR;
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
rvt_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
hfi1_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
sqp->s_flags &= ~RVT_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
ev.device = sqp->ibqp.device;
ev.element.qp = &sqp->ibqp;
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
}
goto done;
}
clr_busy:
sqp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
rcu_read_unlock();
}
/**
* hfi1_make_grh - construct a GRH header
* @ibp: a pointer to the IB port
@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
void _hfi1_do_send(struct work_struct *work)
{
struct iowait *wait = container_of(work, struct iowait, iowork);
struct rvt_qp *qp = iowait_to_qp(wait);
struct iowait_work *w = container_of(work, struct iowait_work, iowork);
struct rvt_qp *qp = iowait_to_qp(w->iow);
hfi1_do_send(qp, true);
}
@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
ps.in_thread = in_thread;
ps.wait = iowait_get_ib_work(&priv->s_iowait);
trace_hfi1_rc_do_send(qp, in_thread);
@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
ruc_loopback(qp);
rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_rc_req;
@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
ruc_loopback(qp);
rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_uc_req;
@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
/* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) {
if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return;
}
@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.pkts_sent = false;
/* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp);
ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
do {
/* Check for a constructed packet to be sent. */
if (ps.s_txreq) {
@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
*/
if (hfi1_verbs_send(qp, &ps))
return;
/* allow other tasks to run */
if (schedule_send_yield(qp, &ps))
return;
@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
/*
* This should be called with s_lock held.
*/
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
last = qp->s_last;
old_last = last;
trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
rvt_qp_swqe_complete(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
status);
if (qp->s_acked == old_last)
qp->s_acked = last;
if (qp->s_cur == old_last)
qp->s_cur = last;
if (qp->s_tail == old_last)
qp->s_tail = last;
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
qp->s_draining = 0;
}

View File

@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
__sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
if (wait && iowait_sdma_dec(wait))
if (iowait_sdma_dec(wait))
iowait_drain_wakeup(wait);
}
@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
nw,
&sde->dmawait,
list) {
u16 num_desc = 0;
u32 num_desc;
if (!wait->wakeup)
continue;
if (n == ARRAY_SIZE(waits))
break;
if (!list_empty(&wait->tx_head)) {
stx = list_first_entry(
&wait->tx_head,
struct sdma_txreq,
list);
num_desc = stx->num_desc;
}
num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
*/
static int sdma_check_progress(
struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@ -2356,12 +2349,12 @@ static int sdma_check_progress(
if (tx->num_desc <= sde->desc_avail)
return -EAGAIN;
/* pulse the head_lock */
if (wait && wait->sleep) {
if (wait && iowait_ioww_to_iow(wait)->sleep) {
unsigned seq;
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@ -2373,7 +2366,7 @@ static int sdma_check_progress(
/**
* sdma_send_txreq() - submit a tx req to ring
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @wait: SE wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
* @pkts_sent: has any packet been sent yet?
*
@ -2386,7 +2379,7 @@ static int sdma_check_progress(
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
/* user should have supplied entire packet */
if (unlikely(tx->tlen))
return -EINVAL;
tx->wait = wait;
tx->wait = iowait_ioww_to_iow(wait);
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
if (unlikely(!__sdma_running(sde)))
@ -2406,14 +2399,14 @@ retry:
goto nodesc;
tail = submit_tx(sde, tx);
if (wait)
iowait_sdma_inc(wait);
iowait_sdma_inc(iowait_ioww_to_iow(wait));
sdma_update_tail(sde, tail);
unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret;
unlock_noconn:
if (wait)
iowait_sdma_inc(wait);
iowait_sdma_inc(iowait_ioww_to_iow(wait));
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
@ -2422,10 +2415,7 @@ unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock);
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;
}
iowait_inc_wait_count(wait, tx->num_desc);
schedule_work(&sde->flush_worker);
ret = -ECOMM;
goto unlock;
@ -2442,9 +2432,9 @@ nodesc:
/**
* sdma_send_txlist() - submit a list of tx req to ring
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
* @count: pointer to a u32 which, after return will contain the total number of
* @count: pointer to a u16 which, after return will contain the total number of
* sdma_txreqs removed from the tx_list. This will include sdma_txreqs
* whose SDMA descriptors are submitted to the ring and the sdma_txreqs
* which are added to SDMA engine flush list if the SDMA engine state is
@ -2467,8 +2457,8 @@ nodesc:
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
struct list_head *tx_list, u32 *count_out)
int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
struct list_head *tx_list, u16 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
tx->wait = iowait_ioww_to_iow(wait);
if (unlikely(!__sdma_running(sde)))
goto unlock_noconn;
if (unlikely(tx->num_desc > sde->desc_avail))
@ -2500,8 +2490,9 @@ retry:
update_tail:
total_count = submit_count + flush_count;
if (wait) {
iowait_sdma_add(wait, total_count);
iowait_starve_clear(submit_count > 0, wait);
iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
iowait_starve_clear(submit_count > 0,
iowait_ioww_to_iow(wait));
}
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
@ -2511,7 +2502,7 @@ update_tail:
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
tx->wait = iowait_ioww_to_iow(wait);
list_del_init(&tx->list);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@ -2520,10 +2511,7 @@ unlock_noconn:
#endif
list_add_tail(&tx->list, &sde->flushlist);
flush_count++;
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;
}
iowait_inc_wait_count(wait, tx->num_desc);
}
spin_unlock(&sde->flushlist_lock);
schedule_work(&sde->flush_worker);

View File

@ -1,7 +1,7 @@
#ifndef _HFI1_SDMA_H
#define _HFI1_SDMA_H
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -62,16 +62,6 @@
/* Hardware limit for SDMA packet size */
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
#define SDMA_TXREQ_S_OK 0
#define SDMA_TXREQ_S_SENDERROR 1
#define SDMA_TXREQ_S_ABORTED 2
#define SDMA_TXREQ_S_SHUTDOWN 3
/* flags bits */
#define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004
#define SDMA_MAP_NONE 0
#define SDMA_MAP_SINGLE 1
#define SDMA_MAP_PAGE 2
@ -415,6 +405,7 @@ struct sdma_engine {
struct list_head flushlist;
struct cpumask cpu_mask;
struct kobject kobj;
u32 msix_intr;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
dd, SDMA_MAP_SINGLE, tx, addr, len);
}
struct iowait;
struct iowait_work;
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct list_head *tx_list,
u32 *count);
u16 *count_out);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);

View File

@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
char *buf)
static ssize_t board_id_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret;
}
static DEVICE_ATTR_RO(board_id);
static ssize_t show_boardversion(struct device *device,
static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
static DEVICE_ATTR_RO(boardversion);
static ssize_t show_nctxts(struct device *device,
static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
min(dd->num_user_contexts,
(u32)dd->sc_sizes[SC_USER].count));
}
static DEVICE_ATTR_RO(nctxts);
static ssize_t show_nfreectxts(struct device *device,
static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
/* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
static DEVICE_ATTR_RO(nfreectxts);
static ssize_t show_serial(struct device *device,
static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
}
static DEVICE_ATTR_RO(serial);
static ssize_t store_chip_reset(struct device *device,
static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
bail:
return ret < 0 ? ret : count;
}
static DEVICE_ATTR_WO(chip_reset);
/*
* Convert the reported temperature from an integer (reported in
@ -598,7 +605,7 @@ bail:
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
*/
static ssize_t show_tempsense(struct device *device,
static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
}
return ret;
}
static DEVICE_ATTR_RO(tempsense);
/*
* end of per-unit (or driver, in some cases, but replicated
@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
*/
/* start of per-unit file structures and support code */
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
static struct attribute *hfi1_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_board_id.attr,
&dev_attr_nctxts.attr,
&dev_attr_nfreectxts.attr,
&dev_attr_serial.attr,
&dev_attr_boardversion.attr,
&dev_attr_tempsense.attr,
&dev_attr_chip_reset.attr,
NULL,
};
static struct device_attribute *hfi1_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_board_id,
&dev_attr_nctxts,
&dev_attr_nfreectxts,
&dev_attr_serial,
&dev_attr_boardversion,
&dev_attr_tempsense,
&dev_attr_chip_reset,
const struct attribute_group ib_hfi1_attr_group = {
.attrs = hfi1_attributes,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
struct device *class_dev = &dev->dev;
int i, j, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
if (ret)
goto bail;
}
for (i = 0; i < dd->num_sdma; i++) {
ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
&sde_ktype, &class_dev->kobj,
@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0;
bail:
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
device_remove_file(&dev->dev, hfi1_attributes[i]);
for (i = 0; i < dd->num_sdma; i++)
kobject_del(&dd->per_sdma[i].kobj);

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -62,3 +62,4 @@ __print_symbolic(etype, \
#include "trace_rx.h"
#include "trace_tx.h"
#include "trace_mmu.h"
#include "trace_iowait.h"

View File

@ -0,0 +1,54 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_IOWAIT_H
#include <linux/tracepoint.h>
#include "iowait.h"
#include "verbs.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_iowait
DECLARE_EVENT_CLASS(hfi1_iowait_template,
TP_PROTO(struct iowait *wait, u32 flag),
TP_ARGS(wait, flag),
TP_STRUCT__entry(/* entry */
__field(unsigned long, addr)
__field(unsigned long, flags)
__field(u32, flag)
__field(u32, qpn)
),
TP_fast_assign(/* assign */
__entry->addr = (unsigned long)wait;
__entry->flags = wait->flags;
__entry->flag = (1 << flag);
__entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
),
TP_printk(/* print */
"iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
__entry->addr,
__entry->qpn,
__entry->flags,
__entry->flag
)
);
DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
TP_PROTO(struct iowait *wait, u32 flag),
TP_ARGS(wait, flag));
DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
TP_PROTO(struct iowait *wait, u32 flag),
TP_ARGS(wait, flag));
#endif /* __HFI1_TRACE_IOWAIT_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_iowait
#include <trace/define_trace.h>

View File

@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp, wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
@ -426,7 +426,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@ -449,7 +449,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
@ -523,7 +523,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@ -550,7 +550,7 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
@ -564,7 +564,7 @@ rdma_last:
tlen -= (hdrsize + extra_bytes);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;

View File

@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
}
hfi1_make_grh(ibp, &grh, &grd, 0, 0);
hfi1_copy_sge(&qp->r_sge, &grh,
sizeof(grh), true, false);
rvt_copy_sge(qp, &qp->r_sge, &grh,
sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags);
ps->flags = tflags;
hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx;
}
}
@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
goto drop;
}
if (packet->grh) {
hfi1_copy_sge(&qp->r_sge, packet->grh,
sizeof(struct ib_grh), true, false);
rvt_copy_sge(qp, &qp->r_sge, packet->grh,
sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
struct ib_grh grh;
@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* out when creating 16B, add back the GRH here.
*/
hfi1_make_ext_grh(packet, &grh, slid, dlid);
hfi1_copy_sge(&qp->r_sge, &grh,
sizeof(struct ib_grh), true, false);
rvt_copy_sge(qp, &qp->r_sge, &grh,
sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
true, false);
rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
static unsigned initial_pkt_count = 8;
static int user_sdma_send_pkts(struct user_sdma_request *req,
unsigned maxpkts);
static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent);
@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
static int defer_packet_queue(
struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size;
pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0);
pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL);
pq->reqidx = 0;
@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
/* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
(READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
!atomic_read(&pq->n_reqs));
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
/**
* hfi1_user_sdma_process_request() - Process and start a user sdma request
* @fd: valid file descriptor
* @iovec: array of io vectors to process
* @dim: overall iovec array size
* @count: number of io vector array entries processed
*/
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count)
@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
u8 opcode, sc, vl;
u16 pkey;
u32 slid;
int req_queued = 0;
u16 dlid;
u32 selector;
@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->data_len = 0;
req->pq = pq;
req->cq = cq;
req->status = -1;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
req->done = 0;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
/* The request is initialized, count it */
atomic_inc(&pq->n_reqs);
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->data_iovs = i;
req->status = ret;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
req_queued = 1;
pq->state = SDMA_PKT_Q_ACTIVE;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
if (unlikely(ret < 0 && ret != -EBUSY)) {
req->status = ret;
if (unlikely(ret < 0 && ret != -EBUSY))
goto free_req;
}
/*
* It is possible that the SDMA engine would have processed all the
* submitted packets by the time we get here. Therefore, only set
* packet queue state to ACTIVE if there are still uncompleted
* requests.
*/
if (atomic_read(&pq->n_reqs))
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
/*
* This is a somewhat blocking send implementation.
@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
if (ret != -EBUSY) {
req->status = ret;
WRITE_ONCE(req->has_error, 1);
if (READ_ONCE(req->seqcomp) ==
req->seqsubmitted - 1)
goto free_req;
return ret;
}
if (ret != -EBUSY)
goto free_req;
wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
*count += idx;
return 0;
free_req:
user_sdma_free_request(req, true);
if (req_queued)
/*
* If the submitted seqsubmitted == npkts, the completion routine
* controls the final state. If sequbmitted < npkts, wait for any
* outstanding packets to finish before cleaning up.
*/
if (req->seqsubmitted < req->info.npkts) {
if (req->seqsubmitted)
wait_event(pq->busy.wait_dma,
(req->seqcomp == req->seqsubmitted - 1));
user_sdma_free_request(req, true);
pq_update(pq);
set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
}
return ret;
}
@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
return ret;
}
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
{
int ret = 0, count;
int ret = 0;
u16 count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
changes = set_txreq_header_ahg(req, tx,
datalen);
if (changes < 0)
if (changes < 0) {
ret = changes;
goto free_tx;
}
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
ret = sdma_send_txlist(req->sde,
iowait_get_ib_work(&pq->busy),
&req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return idx;
}
/*
* SDMA tx request completion callback. Called when the SDMA progress
* state machine gets notification that the SDMA descriptors for this
* tx request have been processed by the DMA engine. Called in
* interrupt context.
/**
* user_sdma_txreq_cb() - SDMA tx request completion callback.
* @txreq: valid sdma tx request
* @status: success/failure of request
*
* Called when the SDMA progress state machine gets notification that
* the SDMA descriptors for this tx request have been processed by the
* DMA engine. Called in interrupt context.
* Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
u16 idx;
enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d",
status);
WRITE_ONCE(req->has_error, 1);
state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
tx = NULL;
idx = req->info.comp_idx;
if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
if (req->seqcomp == req->info.npkts - 1) {
req->status = 0;
user_sdma_free_request(req, false);
pq_update(pq);
set_comp_state(pq, cq, idx, COMPLETE, 0);
}
} else {
if (status != SDMA_TXREQ_S_OK)
req->status = status;
if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
(READ_ONCE(req->done) ||
READ_ONCE(req->has_error))) {
user_sdma_free_request(req, false);
pq_update(pq);
set_comp_state(pq, cq, idx, ERROR, req->status);
}
}
/* sequence isn't complete? We are done */
if (req->seqcomp != req->info.npkts - 1)
return;
user_sdma_free_request(req, false);
set_comp_state(pq, cq, req->info.comp_idx, state, status);
pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
if (atomic_dec_and_test(&pq->n_reqs)) {
xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
if (atomic_dec_and_test(&pq->n_reqs))
wake_up(&pq->wait);
}
}
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node)
continue;
req->iovs[i].node = NULL;
if (unpin)
hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);

View File

@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
#define SDMA_PKT_Q_DEFERRED BIT(2)
enum pkt_q_sdma_state {
SDMA_PKT_Q_ACTIVE,
SDMA_PKT_Q_DEFERRED,
};
/*
* Maximum retry attempts to submit a TX request
@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
struct user_sdma_request *reqs;
unsigned long *req_in_use;
struct iowait busy;
unsigned state;
enum pkt_q_sdma_state state;
wait_queue_head_t wait;
unsigned long unpinned;
struct mmu_rb_handler *handler;
@ -203,14 +204,12 @@ struct user_sdma_request {
s8 ahg_idx;
/* Writeable fields shared with interrupt */
u64 seqcomp ____cacheline_aligned_in_smp;
u64 seqsubmitted;
/* status of the last txreq completed */
int status;
u16 seqcomp ____cacheline_aligned_in_smp;
u16 seqsubmitted;
/* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp;
u64 seqnum;
u16 seqnum;
/*
* KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the
@ -228,7 +227,6 @@ struct user_sdma_request {
u16 tididx;
/* progress index moving along the iovs array */
u8 iov_idx;
u8 done;
u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
@ -248,7 +246,7 @@ struct user_sdma_txreq {
struct user_sdma_request *req;
u16 flags;
unsigned int busycount;
u64 seqnum;
u16 seqnum;
};
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,

View File

@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
module_param(piothreshold, ushort, S_IRUGO);
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
#define COPY_CACHELESS 1
#define COPY_ADAPTIVE 2
static unsigned int sge_copy_mode;
module_param(sge_copy_mode, uint, S_IRUGO);
MODULE_PARM_DESC(sge_copy_mode,
@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
/* 16B trailing buffer */
static const u8 trail_buf[MAX_16B_PADDING];
static uint wss_threshold;
static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
static uint wss_clean_period = 256;
module_param(wss_clean_period, uint, S_IRUGO);
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
/* memory working set size */
struct hfi1_wss {
unsigned long *entries;
atomic_t total_count;
atomic_t clean_counter;
atomic_t clean_entry;
int threshold;
int num_entries;
long pages_mask;
};
static struct hfi1_wss wss;
int hfi1_wss_init(void)
{
long llc_size;
long llc_bits;
long table_size;
long table_bits;
/* check for a valid percent range - default to 80 if none or invalid */
if (wss_threshold < 1 || wss_threshold > 100)
wss_threshold = 80;
/* reject a wildly large period */
if (wss_clean_period > 1000000)
wss_clean_period = 256;
/* reject a zero period */
if (wss_clean_period == 0)
wss_clean_period = 1;
/*
* Calculate the table size - the next power of 2 larger than the
* LLC size. LLC size is in KiB.
*/
llc_size = wss_llc_size() * 1024;
table_size = roundup_pow_of_two(llc_size);
/* one bit per page in rounded up table */
llc_bits = llc_size / PAGE_SIZE;
table_bits = table_size / PAGE_SIZE;
wss.pages_mask = table_bits - 1;
wss.num_entries = table_bits / BITS_PER_LONG;
wss.threshold = (llc_bits * wss_threshold) / 100;
if (wss.threshold == 0)
wss.threshold = 1;
atomic_set(&wss.clean_counter, wss_clean_period);
wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
GFP_KERNEL);
if (!wss.entries) {
hfi1_wss_exit();
return -ENOMEM;
}
return 0;
}
void hfi1_wss_exit(void)
{
/* coded to handle partially initialized and repeat callers */
kfree(wss.entries);
wss.entries = NULL;
}
/*
* Advance the clean counter. When the clean period has expired,
* clean an entry.
*
* This is implemented in atomics to avoid locking. Because multiple
* variables are involved, it can be racy which can lead to slightly
* inaccurate information. Since this is only a heuristic, this is
* OK. Any innaccuracies will clean themselves out as the counter
* advances. That said, it is unlikely the entry clean operation will
* race - the next possible racer will not start until the next clean
* period.
*
* The clean counter is implemented as a decrement to zero. When zero
* is reached an entry is cleaned.
*/
static void wss_advance_clean_counter(void)
{
int entry;
int weight;
unsigned long bits;
/* become the cleaner if we decrement the counter to zero */
if (atomic_dec_and_test(&wss.clean_counter)) {
/*
* Set, not add, the clean period. This avoids an issue
* where the counter could decrement below the clean period.
* Doing a set can result in lost decrements, slowing the
* clean advance. Since this a heuristic, this possible
* slowdown is OK.
*
* An alternative is to loop, advancing the counter by a
* clean period until the result is > 0. However, this could
* lead to several threads keeping another in the clean loop.
* This could be mitigated by limiting the number of times
* we stay in the loop.
*/
atomic_set(&wss.clean_counter, wss_clean_period);
/*
* Uniquely grab the entry to clean and move to next.
* The current entry is always the lower bits of
* wss.clean_entry. The table size, wss.num_entries,
* is always a power-of-2.
*/
entry = (atomic_inc_return(&wss.clean_entry) - 1)
& (wss.num_entries - 1);
/* clear the entry and count the bits */
bits = xchg(&wss.entries[entry], 0);
weight = hweight64((u64)bits);
/* only adjust the contended total count if needed */
if (weight)
atomic_sub(weight, &wss.total_count);
}
}
/*
* Insert the given address into the working set array.
*/
static void wss_insert(void *address)
{
u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
u32 nr = page & (BITS_PER_LONG - 1);
if (!test_and_set_bit(nr, &wss.entries[entry]))
atomic_inc(&wss.total_count);
wss_advance_clean_counter();
}
/*
* Is the working set larger than the threshold?
*/
static inline bool wss_exceeds_threshold(void)
{
return atomic_read(&wss.total_count) >= wss.threshold;
}
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
*/
__be64 ib_hfi1_sys_image_guid;
/**
* hfi1_copy_sge - copy data to SGE memory
* @ss: the SGE state
* @data: the data to copy
* @length: the length of the data
* @release: boolean to release MR
* @copy_last: do a separate copy of the last 8 bytes
*/
void hfi1_copy_sge(
struct rvt_sge_state *ss,
void *data, u32 length,
bool release,
bool copy_last)
{
struct rvt_sge *sge = &ss->sge;
int i;
bool in_last = false;
bool cacheless_copy = false;
if (sge_copy_mode == COPY_CACHELESS) {
cacheless_copy = length >= PAGE_SIZE;
} else if (sge_copy_mode == COPY_ADAPTIVE) {
if (length >= PAGE_SIZE) {
/*
* NOTE: this *assumes*:
* o The first vaddr is the dest.
* o If multiple pages, then vaddr is sequential.
*/
wss_insert(sge->vaddr);
if (length >= (2 * PAGE_SIZE))
wss_insert(sge->vaddr + PAGE_SIZE);
cacheless_copy = wss_exceeds_threshold();
} else {
wss_advance_clean_counter();
}
}
if (copy_last) {
if (length > 8) {
length -= 8;
} else {
copy_last = false;
in_last = true;
}
}
again:
while (length) {
u32 len = rvt_get_sge_length(sge, length);
WARN_ON_ONCE(len == 0);
if (unlikely(in_last)) {
/* enforce byte transfer ordering */
for (i = 0; i < len; i++)
((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
} else if (cacheless_copy) {
cacheless_memcpy(sge->vaddr, data, len);
} else {
memcpy(sge->vaddr, data, len);
}
rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
if (copy_last) {
copy_last = false;
in_last = true;
length = 8;
goto again;
}
}
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
@ -713,7 +492,7 @@ static void verbs_sdma_complete(
spin_lock(&qp->s_lock);
if (tx->wqe) {
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct hfi1_opa_header *hdr;
@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
&priv->s_iowait.tx_head);
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(ret))
goto bail_build;
}
ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
ps->pkts_sent);
ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
&priv->s_iowait.tx_head);
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibdev *dev = &dd->verbs_dev;
int was_empty;
@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
hfi1_sc_wantpiobuf_intr(sc, 1);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
pio_bail:
if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_send_complete(qp, qp->s_wqe, wc_status);
rvt_send_complete(qp, qp->s_wqe, wc_status);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hfi1_cdbg(PIO, "%s() Failed. Completing with err",
__func__);
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
return -EINVAL;
@ -1943,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
hfi1_comp_vect_mappings_lookup;
@ -1956,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
/* opcode translation table */
dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi,
@ -1967,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
&ib_hfi1_attr_group);
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
if (ret)
goto err_verbs_txreq;

View File

@ -166,11 +166,13 @@ struct hfi1_qp_priv {
* This structure is used to hold commonly lookedup and computed values during
* the send engine progress.
*/
struct iowait_work;
struct hfi1_pkt_state {
struct hfi1_ibdev *dev;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
struct iowait_work *wait;
unsigned long flags;
unsigned long timeout;
unsigned long timeout_int;
@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
return container_of(rdi, struct hfi1_ibdev, rdi);
}
static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
{
struct hfi1_qp_priv *priv;
@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
bool release, bool copy_last);
void hfi1_cnp_rcv(struct hfi1_packet *packet);
void hfi1_uc_rcv(struct hfi1_packet *packet);
@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
bool *call_send);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
int hfi1_wss_init(void);
void hfi1_wss_exit(void);
/* platform specific: return the lowest level cache (llc) size, in KiB */
static inline int wss_llc_size(void)
{
/* assume that the boot CPU value is universal for all CPUs */
return boot_cpu_data.x86_cache_size;
}
/* platform specific: cacheless copy */
static inline void cacheless_memcpy(void *dst, void *src, size_t n)
{
/*
* Use the only available X64 cacheless copy. Add a __user cast
* to quiet sparse. The src agument is already in the kernel so
* there are no security issues. The extra fault recovery machinery
* is not invoked.
*/
__copy_user_nocache(dst, (void __user *)src, n, 0);
}
static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
{
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);

View File

@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
return &tx->txreq;
}
static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
{
struct sdma_txreq *stx;
struct hfi1_qp_priv *priv = qp->priv;
stx = iowait_get_txhead(&priv->s_iowait);
stx = iowait_get_txhead(w);
if (stx)
return container_of(stx, struct verbs_txreq, txreq);
return NULL;
}
static inline bool verbs_txreq_queued(struct rvt_qp *qp)
static inline bool verbs_txreq_queued(struct iowait_work *w)
{
struct hfi1_qp_priv *priv = qp->priv;
return iowait_packet_queued(&priv->s_iowait);
return iowait_packet_queued(w);
}
void hfi1_put_txreq(struct verbs_txreq *tx);

View File

@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
uctxt->seq_cnt = 1;
uctxt->is_vnic = true;
hfi1_set_vnic_msix_info(uctxt);
msix_request_rcd_irq(uctxt);
hfi1_stats.sps_ctxts++;
dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
flush_wc();
hfi1_reset_vnic_msix_info(uctxt);
/*
* Disable receive context and interrupt available, reset all
* RcvCtxtCtrl bits to default values.
@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
/* msix_intr will always be > 0, only clean up if this is true */
if (uctxt->msix_intr)
msix_free_irq(dd, uctxt->msix_intr);
uctxt->event_flags = 0;
hfi1_clear_tids(uctxt);
@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
/* ensure irqs see the change */
hfi1_vnic_synchronize_irq(dd);
msix_vnic_synchronize_irq(dd);
/* remove unread skbs */
for (i = 0; i < vinfo->num_rx_q; i++) {
@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_txreq_init(dd);
if (rc)
goto txreq_fail;
dd->vnic.msix_idx = dd->first_dyn_msix_idx;
}
for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2017 Intel Corporation.
* Copyright(c) 2017 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
goto free_desc;
tx->retry_count = 0;
ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
vnic_sdma->pkts_sent);
ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
&tx->txreq, vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc;
@ -230,13 +230,13 @@ tx_err:
* become available.
*/
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
struct iowait *wait,
struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait, struct hfi1_vnic_sdma, wait);
container_of(wait->iow, struct hfi1_vnic_sdma, wait);
struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
iowait_queue(pkts_sent, wait, &sde->dmawait);
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
}
@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
for (i = 0; i < vinfo->num_tx_q; i++) {
struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd;
@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
/* Add a free descriptor watermark for wakeups */
if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
struct iowait_work *work;
INIT_LIST_HEAD(&vnic_sdma->stx.list);
vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
list_add_tail(&vnic_sdma->stx.list,
&vnic_sdma->wait.tx_head);
work = iowait_get_ib_work(&vnic_sdma->wait);
list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
}
}
}

View File

@ -1,6 +1,7 @@
config INFINIBAND_HNS
tristate "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on ARM64 || (COMPILE_TEST && 64BIT)
---help---
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine

View File

@ -49,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
bool vlan_en = false;
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
@ -58,8 +59,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
gid_attr = ah_attr->grh.sgid_attr;
if (is_vlan_dev(gid_attr->ndev))
if (is_vlan_dev(gid_attr->ndev)) {
vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
vlan_en = true;
}
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@ -71,6 +74,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
HNS_ROCE_PORT_NUM_SHIFT));
ah->av.gid_index = grh->sgid_index;
ah->av.vlan = cpu_to_le16(vlan_tag);
ah->av.vlan_en = vlan_en;
dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
ah->av.vlan);

View File

@ -88,8 +88,11 @@
#define BITMAP_RR 1
#define MR_TYPE_MR 0x00
#define MR_TYPE_FRMR 0x01
#define MR_TYPE_DMA 0x03
#define HNS_ROCE_FRMR_MAX_PA 512
#define PKEY_ID 0xffff
#define GUID_LEN 8
#define NODE_DESC_SIZE 64
@ -193,6 +196,9 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
};
enum hns_roce_mtt_type {
@ -219,19 +225,11 @@ struct hns_roce_uar {
unsigned long logic_idx;
};
struct hns_roce_vma_data {
struct list_head list;
struct vm_area_struct *vma;
struct mutex *vma_list_mutex;
};
struct hns_roce_ucontext {
struct ib_ucontext ibucontext;
struct hns_roce_uar uar;
struct list_head page_list;
struct mutex page_mutex;
struct list_head vma_list;
struct mutex vma_list_mutex;
};
struct hns_roce_pd {
@ -293,6 +291,16 @@ struct hns_roce_mtt {
enum hns_roce_mtt_type mtt_type;
};
struct hns_roce_mw {
struct ib_mw ibmw;
u32 pdn;
u32 rkey;
int enabled; /* MW's active status */
u32 pbl_hop_num;
u32 pbl_ba_pg_sz;
u32 pbl_buf_pg_sz;
};
/* Only support 4K page size for mr register */
#define MR_SIZE_4K 0
@ -304,6 +312,7 @@ struct hns_roce_mr {
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
u32 access;/* Access permission of MR */
u32 npages;
int enabled; /* MR's active status */
int type; /* MR's register type */
u64 *pbl_buf;/* MR's PBL space */
@ -457,6 +466,7 @@ struct hns_roce_av {
u8 dgid[HNS_ROCE_GID_SIZE];
u8 mac[6];
__le16 vlan;
bool vlan_en;
};
struct hns_roce_ah {
@ -656,6 +666,7 @@ struct hns_roce_eq_table {
};
struct hns_roce_caps {
u64 fw_ver;
u8 num_ports;
int gid_table_len[HNS_ROCE_MAX_PORTS];
int pkey_table_len[HNS_ROCE_MAX_PORTS];
@ -665,7 +676,9 @@ struct hns_roce_caps {
u32 max_sq_sg; /* 2 */
u32 max_sq_inline; /* 32 */
u32 max_rq_sg; /* 2 */
u32 max_extend_sg;
int num_qps; /* 256k */
int reserved_qps;
u32 max_wqes; /* 16k */
u32 max_sq_desc_sz; /* 64 */
u32 max_rq_desc_sz; /* 64 */
@ -738,6 +751,7 @@ struct hns_roce_work {
struct hns_roce_dev *hr_dev;
struct work_struct work;
u32 qpn;
u32 cqn;
int event_type;
int sub_type;
};
@ -764,6 +778,8 @@ struct hns_roce_hw {
struct hns_roce_mr *mr, int flags, u32 pdn,
int mr_access_flags, u64 iova, u64 size,
void *mb_buf);
int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
dma_addr_t dma_handle, int nent, u32 vector);
@ -863,6 +879,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
return container_of(ibmr, struct hns_roce_mr, ibmr);
}
static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
{
return container_of(ibmw, struct hns_roce_mw, ibmw);
}
static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct hns_roce_qp, ibqp);
@ -968,12 +989,20 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr);
int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox,
unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,

View File

@ -731,7 +731,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
cq_init_attr.comp_vector = 0;
cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
if (IS_ERR(cq)) {
dev_err(dev, "Create cq for reseved loop qp failed!");
dev_err(dev, "Create cq for reserved loop qp failed!");
return -ENOMEM;
}
free_mr->mr_free_cq = to_hr_cq(cq);
@ -744,7 +744,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
if (IS_ERR(pd)) {
dev_err(dev, "Create pd for reseved loop qp failed!");
dev_err(dev, "Create pd for reserved loop qp failed!");
ret = -ENOMEM;
goto alloc_pd_failed;
}

View File

@ -54,6 +54,59 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length);
}
static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
struct hns_roce_wqe_frmr_seg *fseg,
const struct ib_reg_wr *wr)
{
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
/* use ib_access_flags */
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_RR_S,
wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_RW_S,
wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_LW_S,
wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
/* Data structure reuse may lead to confusion */
rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
rc_sq_wqe->rkey = cpu_to_le32(wr->key);
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
fseg->pbl_size = cpu_to_le32(mr->pbl_size);
roce_set_field(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_bit(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
const struct ib_atomic_wr *wr)
{
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
aseg->cmp_data = cpu_to_le64(wr->compare_add);
} else {
aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
aseg->cmp_data = 0;
}
}
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind)
{
@ -121,6 +174,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
if (wr->opcode == IB_WR_RDMA_READ) {
*bad_wr = wr;
dev_err(hr_dev->dev, "Not support inline data!\n");
return -EINVAL;
}
@ -179,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
struct hns_roce_wqe_frmr_seg *fseg;
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db;
struct ib_qp_attr attr;
@ -191,6 +246,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
int attr_mask;
u32 tmp_len;
int ret = 0;
u32 hr_op;
u8 *smac;
int nreq;
int i;
@ -356,6 +412,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
V2_UD_SEND_WQE_BYTE_40_PORTN_S,
qp->port);
roce_set_bit(ud_sq_wqe->byte_40,
V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
ah->av.vlan_en ? 1 : 0);
roce_set_field(ud_sq_wqe->byte_48,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
@ -406,99 +465,100 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
switch (wr->opcode) {
case IB_WR_RDMA_READ:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_RDMA_READ);
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_RDMA_WRITE:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_SEND:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND);
hr_op = HNS_ROCE_V2_WQE_OP_SEND;
break;
case IB_WR_SEND_WITH_INV:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
break;
case IB_WR_SEND_WITH_IMM:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
break;
case IB_WR_LOCAL_INV:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_LOCAL_INV);
hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
rc_sq_wqe->inv_key =
cpu_to_le32(wr->ex.invalidate_rkey);
break;
case IB_WR_REG_MR:
hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
fseg = wqe;
set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
rc_sq_wqe->rkey =
cpu_to_le32(atomic_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_ATOMIC_FETCH_AND_ADD:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
rc_sq_wqe->rkey =
cpu_to_le32(atomic_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
hr_op =
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
break;
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
hr_op =
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
break;
default:
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_MASK);
hr_op = HNS_ROCE_V2_WQE_OP_MASK;
break;
}
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
struct hns_roce_v2_wqe_data_seg *dseg;
dseg = wqe;
set_data_seg_v2(dseg, wr->sg_list);
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
set_atomic_seg(wqe, atomic_wr(wr));
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
wr->num_sge);
} else if (wr->opcode != IB_WR_REG_MR) {
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
wqe, &sge_ind, bad_wr);
if (ret)
goto out;
}
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
&sge_ind, bad_wr);
if (ret)
goto out;
ind++;
} else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
@ -935,7 +995,24 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
resp = (struct hns_roce_query_version *)desc.data;
hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id);
hr_dev->vendor_id = hr_dev->pci_dev->vendor;
return 0;
}
static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
{
struct hns_roce_query_fw_info *resp;
struct hns_roce_cmq_desc desc;
int ret;
hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
resp = (struct hns_roce_query_fw_info *)desc.data;
hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
return 0;
}
@ -1157,6 +1234,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
int ret;
ret = hns_roce_cmq_query_hw_info(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
ret);
return ret;
}
ret = hns_roce_query_fw_ver(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
ret);
@ -1185,14 +1269,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
hr_dev->vendor_part_id = 0;
hr_dev->sys_image_guid = 0;
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
@ -1222,6 +1308,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1;
caps->reserved_uars = 0;
caps->reserved_cqs = 0;
caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
caps->qpc_ba_pg_sz = 0;
caps->qpc_buf_pg_sz = 0;
@ -1255,6 +1342,11 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
HNS_ROCE_CAP_FLAG_RQ_INLINE |
HNS_ROCE_CAP_FLAG_RECORD_DB |
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
if (hr_dev->pci_dev->revision == 0x21)
caps->flags |= HNS_ROCE_CAP_FLAG_MW |
HNS_ROCE_CAP_FLAG_FRMR;
caps->pkey_table_len[0] = 1;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
@ -1262,6 +1354,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
if (hr_dev->pci_dev->revision == 0x21)
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@ -1690,10 +1785,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
(mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
(mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
@ -1817,6 +1913,88 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
return 0;
}
static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(mr->pbl_ba >> 3));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
return 0;
}
static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mw->pdn);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S,
mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
0 : mw->pbl_hop_num);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
mpt_entry->lkey = cpu_to_le32(mw->rkey);
return 0;
}
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
@ -2274,6 +2452,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->src_qp = (u8)roce_get_field(cqe->byte_32,
V2_CQE_BYTE_32_RMT_QPN_M,
V2_CQE_BYTE_32_RMT_QPN_S);
wc->slid = 0;
wc->wc_flags |= (roce_get_bit(cqe->byte_32,
V2_CQE_BYTE_32_GRH_S) ?
IB_WC_GRH : 0);
@ -2287,7 +2466,14 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->smac[5] = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_SMAC_5_M,
V2_CQE_BYTE_28_SMAC_5_S);
wc->vlan_id = 0xffff;
if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_VID_M,
V2_CQE_BYTE_28_VID_S);
} else {
wc->vlan_id = 0xffff;
}
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
wc->network_hdr_type = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_PORT_TYPE_M,
@ -2589,21 +2775,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M,
V2_QPC_BYTE_60_MAPID_S, 0);
roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
V2_QPC_BYTE_60_TEMPID_S, 0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid,
V2_QPC_BYTE_60_INNER_MAP_IND_S, 0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S,
0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S,
0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S,
0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S,
0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S,
0);
roce_set_field(qpc_mask->byte_60_qpst_tempid,
V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
0);
roce_set_bit(qpc_mask->byte_60_qpst_tempid,
V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
roce_set_bit(qpc_mask->byte_60_qpst_tempid,
V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
@ -2685,7 +2866,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0);
roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
@ -2694,8 +2876,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_144_raq,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S,
0);
roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
@ -2721,14 +2901,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
ilog2((unsigned int)hr_qp->sq.wqe_cnt));
roce_set_field(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
@ -2746,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
0);
roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
roce_set_field(qpc_mask->byte_176_msg_pktn,
V2_QPC_BYTE_176_MSG_USE_PKTN_M,
V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@ -2790,6 +2971,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
0);
roce_set_bit(qpc_mask->byte_232_irrl_sge,
V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
0);
qpc_mask->irrl_cur_sge_offset = 0;
roce_set_field(qpc_mask->byte_240_irrl_tail,
@ -2955,13 +3143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_56_dqpn_err,
V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
}
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
ilog2((unsigned int)hr_qp->sq.wqe_cnt));
roce_set_field(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
}
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
@ -3271,13 +3452,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
roce_set_field(context->byte_60_qpst_mapid,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt);
roce_set_field(qpc_mask->byte_60_qpst_mapid,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0);
context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
@ -3538,6 +3712,17 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
}
if (is_vlan_dev(gid_attr->ndev)) {
roce_set_bit(context->byte_76_srqn_op_en,
V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
roce_set_bit(qpc_mask->byte_76_srqn_op_en,
V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
roce_set_bit(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
}
roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_VLAN_ID_M,
V2_QPC_BYTE_24_VLAN_ID_S, vlan);
@ -3584,8 +3769,15 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, grh->traffic_class);
if (hr_dev->pci_dev->revision == 0x21 &&
gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
grh->traffic_class >> 2);
else
roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
grh->traffic_class);
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0);
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@ -3606,9 +3798,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
/* Every status migrate must change state */
roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state);
roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */
@ -3728,7 +3920,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
goto out;
}
state = roce_get_field(context->byte_60_qpst_mapid,
state = roce_get_field(context->byte_60_qpst_tempid,
V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
@ -3995,13 +4187,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
{
struct hns_roce_work *irq_work =
container_of(work, struct hns_roce_work, work);
struct device *dev = irq_work->hr_dev->dev;
u32 qpn = irq_work->qpn;
u32 cqn = irq_work->cqn;
switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
dev_info(dev, "Path migrated succeeded.\n");
break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
dev_warn(dev, "Path migration failed.\n");
break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
dev_info(dev, "Communication established.\n");
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
dev_warn(dev, "Send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
dev_err(dev, "Local work queue catastrophic error.\n");
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
switch (irq_work->sub_type) {
case HNS_ROCE_LWQCE_QPC_ERROR:
dev_err(dev, "QP %d, QPC error.\n", qpn);
break;
case HNS_ROCE_LWQCE_MTU_ERROR:
dev_err(dev, "QP %d, MTU error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
dev_err(dev, "QP %d, WQE addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
dev_err(dev, "QP %d, WQE shift error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n",
irq_work->sub_type);
break;
}
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
dev_err(dev, "Invalid request local work queue error.\n");
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
dev_err(dev, "Local access violation work queue error.\n");
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
switch (irq_work->sub_type) {
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
dev_err(dev, "QP %d, R_key violation.\n", qpn);
break;
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
dev_err(dev, "QP %d, length error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_VA_ERROR:
dev_err(dev, "QP %d, VA error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_PD_ERROR:
dev_err(dev, "QP %d, PD error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
dev_err(dev, "QP %d, rw acc error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
dev_err(dev, "QP %d, key state error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
dev_err(dev, "QP %d, MR operation error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n",
irq_work->sub_type);
break;
}
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
dev_warn(dev, "SRQ limit reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
dev_warn(dev, "SRQ last wqe reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
dev_err(dev, "SRQ catas error.\n");
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
dev_err(dev, "CQ 0x%x access err.\n", cqn);
break;
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
dev_warn(dev, "DB overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_FLR:
dev_warn(dev, "Function level reset.\n");
break;
default:
break;
@ -4011,7 +4293,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
}
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq, u32 qpn)
struct hns_roce_eq *eq,
u32 qpn, u32 cqn)
{
struct hns_roce_work *irq_work;
@ -4022,6 +4305,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
irq_work->hr_dev = hr_dev;
irq_work->qpn = qpn;
irq_work->cqn = cqn;
irq_work->event_type = eq->event_type;
irq_work->sub_type = eq->sub_type;
queue_work(hr_dev->irq_workq, &(irq_work->work));
@ -4058,124 +4342,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
hns_roce_write64_k(doorbell, eq->doorbell);
}
static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe,
u32 qpn)
{
struct device *dev = hr_dev->dev;
int sub_type;
dev_warn(dev, "Local work queue catastrophic error.\n");
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
switch (sub_type) {
case HNS_ROCE_LWQCE_QPC_ERROR:
dev_warn(dev, "QP %d, QPC error.\n", qpn);
break;
case HNS_ROCE_LWQCE_MTU_ERROR:
dev_warn(dev, "QP %d, MTU error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
break;
}
}
static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe, u32 qpn)
{
struct device *dev = hr_dev->dev;
int sub_type;
dev_warn(dev, "Local access violation work queue error.\n");
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
switch (sub_type) {
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
dev_warn(dev, "QP %d, R_key violation.\n", qpn);
break;
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
dev_warn(dev, "QP %d, length error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_VA_ERROR:
dev_warn(dev, "QP %d, VA error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_PD_ERROR:
dev_err(dev, "QP %d, PD error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
dev_warn(dev, "QP %d, rw acc error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
dev_warn(dev, "QP %d, key state error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
dev_warn(dev, "QP %d, MR operation error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
break;
}
}
static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe,
int event_type, u32 qpn)
{
struct device *dev = hr_dev->dev;
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_COMM_EST:
dev_warn(dev, "Communication established.\n");
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
dev_warn(dev, "Send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
dev_warn(dev, "Invalid request local work queue error.\n");
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
break;
default:
break;
}
hns_roce_qp_event(hr_dev, qpn, event_type);
}
static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe,
int event_type, u32 cqn)
{
struct device *dev = hr_dev->dev;
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
dev_warn(dev, "CQ 0x%x access err.\n", cqn);
break;
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
break;
default:
break;
}
hns_roce_cq_event(hr_dev, cqn, event_type);
}
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
{
u32 buf_chk_sz;
@ -4251,31 +4417,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
dev_warn(dev, "Path migrated succeeded.\n");
break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
dev_warn(dev, "Path migration failed.\n");
break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
qpn);
hns_roce_qp_event(hr_dev, qpn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
dev_warn(dev, "SRQ not support.\n");
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
cqn);
hns_roce_cq_event(hr_dev, cqn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
dev_warn(dev, "DB overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@ -4284,10 +4442,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
dev_warn(dev, "CEQ overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_FLR:
dev_warn(dev, "Function level reset.\n");
break;
default:
dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
@ -4304,7 +4460,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
dev_warn(dev, "cons_index overflow, set back to 0.\n");
eq->cons_index = 0;
}
hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
}
set_eq_cons_index_v2(eq);
@ -5125,6 +5281,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
create_singlethread_workqueue("hns_roce_irq_workqueue");
if (!hr_dev->irq_workq) {
dev_err(dev, "Create irq workqueue failed!\n");
ret = -ENOMEM;
goto err_request_irq_fail;
}
@ -5195,6 +5352,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.set_mac = hns_roce_v2_set_mac,
.write_mtpt = hns_roce_v2_write_mtpt,
.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
.frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
.mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
.write_cqc = hns_roce_v2_write_cqc,
.set_hem = hns_roce_v2_set_hem,
.clear_hem = hns_roce_v2_clear_hem,

View File

@ -50,6 +50,7 @@
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1
@ -78,6 +79,7 @@
#define HNS_ROCE_INVALID_LKEY 0x100
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
@ -201,6 +203,7 @@ enum {
/* CMQ command */
enum hns_roce_opcode_type {
HNS_QUERY_FW_VER = 0x0001,
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
@ -324,6 +327,7 @@ struct hns_roce_v2_cq_context {
enum{
V2_MPT_ST_VALID = 0x1,
V2_MPT_ST_FREE = 0x2,
};
enum hns_roce_v2_qp_state {
@ -350,7 +354,7 @@ struct hns_roce_v2_qp_context {
__le32 dmac;
__le32 byte_52_udpspn_dmac;
__le32 byte_56_dqpn_err;
__le32 byte_60_qpst_mapid;
__le32 byte_60_qpst_tempid;
__le32 qkey_xrcd;
__le32 byte_68_rq_db;
__le32 rq_db_record_addr;
@ -492,26 +496,15 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28
#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
#define V2_QPC_BYTE_60_MAPID_S 0
#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0)
#define V2_QPC_BYTE_60_TEMPID_S 0
#define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
#define V2_QPC_BYTE_60_INNER_MAP_IND_S 13
#define V2_QPC_BYTE_60_SCC_TOKEN_S 8
#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
#define V2_QPC_BYTE_60_SQ_MAP_IND_S 14
#define V2_QPC_BYTE_60_SQ_DB_DOING_S 27
#define V2_QPC_BYTE_60_RQ_MAP_IND_S 15
#define V2_QPC_BYTE_60_TEMPID_S 16
#define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16)
#define V2_QPC_BYTE_60_EXT_MAP_IND_S 23
#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24
#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24)
#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27
#define V2_QPC_BYTE_60_SQ_EXT_IND_S 28
#define V2_QPC_BYTE_60_RQ_DB_DOING_S 28
#define V2_QPC_BYTE_60_QP_ST_S 29
#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
@ -534,6 +527,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_76_RQIE_S 28
#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
#define V2_QPC_BYTE_80_RX_CQN_S 0
#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
@ -588,7 +582,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_140_RR_MAX_S 12
#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
#define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15
#define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
@ -599,8 +593,6 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24
#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
@ -637,9 +629,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22
#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24
#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24)
#define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
#define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
@ -725,6 +718,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
@ -743,6 +740,9 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_244_RNR_CNT_S 27
#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
#define V2_QPC_BYTE_248_IRRL_PSN_S 0
#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
@ -818,6 +818,11 @@ struct hns_roce_v2_cqe {
#define V2_CQE_BYTE_28_PORT_TYPE_S 16
#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
#define V2_CQE_BYTE_28_VID_S 18
#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
#define V2_CQE_BYTE_28_VID_VLD_S 30
#define V2_CQE_BYTE_32_RMT_QPN_S 0
#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
@ -878,8 +883,19 @@ struct hns_roce_v2_mpt_entry {
#define V2_MPT_BYTE_8_LW_EN_S 7
#define V2_MPT_BYTE_8_MW_CNT_S 8
#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
#define V2_MPT_BYTE_12_FRE_S 0
#define V2_MPT_BYTE_12_PA_S 1
#define V2_MPT_BYTE_12_MR_MW_S 4
#define V2_MPT_BYTE_12_BPD_S 5
#define V2_MPT_BYTE_12_BQP_S 6
#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
@ -988,6 +1004,8 @@ struct hns_roce_v2_ud_send_wqe {
#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
#define V2_UD_SEND_WQE_DMAC_0_S 0
@ -1042,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
@ -1051,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
__le32 mode_buf_pg_sz;
};
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
struct hns_roce_v2_wqe_data_seg {
__le32 len;
__le32 lkey;
@ -1068,6 +1106,11 @@ struct hns_roce_query_version {
__le32 rsv[5];
};
struct hns_roce_query_fw_info {
__le32 fw_ver;
__le32 rsv[5];
};
struct hns_roce_cfg_llm_a {
__le32 base_addr_l;
__le32 base_addr_h;
@ -1564,4 +1607,9 @@ struct hns_roce_eq_context {
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
struct hns_roce_wqe_atomic_seg {
__le64 fetchadd_swap_data;
__le64 cmp_data;
};
#endif

View File

@ -196,6 +196,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
memset(props, 0, sizeof(*props));
props->fw_ver = hr_dev->caps.fw_ver;
props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
props->max_mr_size = (u64)(~(0ULL));
props->page_size_cap = hr_dev->caps.page_size_cap;
@ -215,7 +216,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_pd = hr_dev->caps.num_pds;
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
props->atomic_cap = IB_ATOMIC_NONE;
props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
@ -344,8 +346,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret)
goto error_fail_uar_alloc;
INIT_LIST_HEAD(&context->vma_list);
mutex_init(&context->vma_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
INIT_LIST_HEAD(&context->page_list);
mutex_init(&context->page_mutex);
@ -376,76 +376,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
static void hns_roce_vma_open(struct vm_area_struct *vma)
{
vma->vm_ops = NULL;
}
static void hns_roce_vma_close(struct vm_area_struct *vma)
{
struct hns_roce_vma_data *vma_data;
vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
vma_data->vma = NULL;
mutex_lock(vma_data->vma_list_mutex);
list_del(&vma_data->list);
mutex_unlock(vma_data->vma_list_mutex);
kfree(vma_data);
}
static const struct vm_operations_struct hns_roce_vm_ops = {
.open = hns_roce_vma_open,
.close = hns_roce_vma_close,
};
static int hns_roce_set_vma_data(struct vm_area_struct *vma,
struct hns_roce_ucontext *context)
{
struct list_head *vma_head = &context->vma_list;
struct hns_roce_vma_data *vma_data;
vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
if (!vma_data)
return -ENOMEM;
vma_data->vma = vma;
vma_data->vma_list_mutex = &context->vma_list_mutex;
vma->vm_private_data = vma_data;
vma->vm_ops = &hns_roce_vm_ops;
mutex_lock(&context->vma_list_mutex);
list_add(&vma_data->list, vma_head);
mutex_unlock(&context->vma_list_mutex);
return 0;
}
static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
return -EINVAL;
switch (vma->vm_pgoff) {
case 0:
return rdma_user_mmap_io(context, vma,
to_hr_ucontext(context)->uar.pfn,
PAGE_SIZE,
pgprot_noncached(vma->vm_page_prot));
if (vma->vm_pgoff == 0) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_hr_ucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
} else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr &&
hr_dev->tptr_size) {
/* vm_pgoff: 1 -- TPTR */
if (io_remap_pfn_range(vma, vma->vm_start,
hr_dev->tptr_dma_addr >> PAGE_SHIFT,
hr_dev->tptr_size,
vma->vm_page_prot))
return -EAGAIN;
} else
return -EINVAL;
/* vm_pgoff: 1 -- TPTR */
case 1:
if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
return -EINVAL;
/*
* FIXME: using io_remap_pfn_range on the dma address returned
* by dma_alloc_coherent is totally wrong.
*/
return rdma_user_mmap_io(context, vma,
hr_dev->tptr_dma_addr >> PAGE_SHIFT,
hr_dev->tptr_size,
vma->vm_page_prot);
return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
default:
return -EINVAL;
}
}
static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@ -471,21 +429,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
struct hns_roce_vma_data *vma_data, *n;
struct vm_area_struct *vma;
mutex_lock(&context->vma_list_mutex);
list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
vma = vma_data->vma;
zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
vma->vm_ops = NULL;
list_del(&vma_data->list);
kfree(vma_data);
}
mutex_unlock(&context->vma_list_mutex);
}
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
@ -508,7 +451,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
spin_lock_init(&iboe->lock);
ib_dev = &hr_dev->ib_dev;
strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
@ -584,12 +526,27 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
}
/* MW */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
ib_dev->alloc_mw = hns_roce_alloc_mw;
ib_dev->dealloc_mw = hns_roce_dealloc_mw;
ib_dev->uverbs_cmd_mask |=
(1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
}
/* FRMR */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
ib_dev->alloc_mr = hns_roce_alloc_mr;
ib_dev->map_mr_sg = hns_roce_map_mr_sg;
}
/* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable;
ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
ib_dev->driver_id = RDMA_DRIVER_HNS;
ret = ib_register_device(ib_dev, NULL);
ret = ib_register_device(ib_dev, "hns_%d", NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
return ret;

View File

@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
u64 bt_idx;
u64 size;
mhop_num = hr_dev->caps.pbl_hop_num;
mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_dma_addr;
mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
mr->pbl_hop_num = mhop_num;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
return 0;
@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->key = hw_index_to_key(index); /* MR key */
if (size == ~0ull) {
mr->type = MR_TYPE_DMA;
mr->pbl_buf = NULL;
mr->pbl_dma_addr = 0;
/* PBL multi-hop addressing parameters */
@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->pbl_l1_dma_addr = NULL;
mr->pbl_l0_dma_addr = 0;
} else {
mr->type = MR_TYPE_MR;
if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr),
@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
u32 mhop_num;
u64 bt_idx;
npages = ib_umem_page_count(mr->umem);
npages = mr->pbl_size;
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
mhop_num = hr_dev->caps.pbl_hop_num;
mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
if (mhop_num == HNS_ROCE_HOP_NUM_0)
return;
@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
}
if (mr->size != ~0ULL) {
npages = ib_umem_page_count(mr->umem);
if (mr->type == MR_TYPE_MR)
npages = ib_umem_page_count(mr->umem);
if (!hr_dev->caps.pbl_hop_num)
dma_free_coherent(dev, (unsigned int)(npages * 8),
@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
goto err_table;
}
ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
if (mr->type != MR_TYPE_FRMR)
ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
else
ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
if (ret) {
dev_err(dev, "Write mtpt fail!\n");
goto err_page;
@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
if (mr == NULL)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA;
/* Allocate memory region key */
ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
~0ULL, acc, 0, mr);
@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
mr->type = MR_TYPE_MR;
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
access_flags, n, mr);
if (ret)
@ -1201,3 +1207,193 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
return ret;
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
u64 length;
u32 page_size;
int ret;
page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
length = max_num_sg * page_size;
if (mr_type != IB_MR_TYPE_MEM_REG)
return ERR_PTR(-EINVAL);
if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
dev_err(dev, "max_num_sg larger than %d\n",
HNS_ROCE_FRMR_MAX_PA);
return ERR_PTR(-EINVAL);
}
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_FRMR;
/* Allocate memory region key */
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
0, max_num_sg, mr);
if (ret)
goto err_free;
ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
mr->umem = NULL;
return &mr->ibmr;
err_mr:
hns_roce_mr_free(to_hr_dev(pd->device), mr);
err_free:
kfree(mr);
return ERR_PTR(ret);
}
static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
{
struct hns_roce_mr *mr = to_hr_mr(ibmr);
mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
return 0;
}
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
{
struct hns_roce_mr *mr = to_hr_mr(ibmr);
mr->npages = 0;
return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
}
static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
struct hns_roce_mw *mw)
{
struct device *dev = hr_dev->dev;
int ret;
if (mw->enabled) {
ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
& (hr_dev->caps.num_mtpts - 1));
if (ret)
dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
key_to_hw_index(mw->rkey));
}
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
key_to_hw_index(mw->rkey), BITMAP_NO_RR);
}
static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
struct hns_roce_mw *mw)
{
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
struct hns_roce_cmd_mailbox *mailbox;
struct device *dev = hr_dev->dev;
unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
int ret;
/* prepare HEM entry memory */
ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
if (ret)
return ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox)) {
ret = PTR_ERR(mailbox);
goto err_table;
}
ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
if (ret) {
dev_err(dev, "MW write mtpt fail!\n");
goto err_page;
}
ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
goto err_page;
}
mw->enabled = 1;
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return 0;
err_page:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
err_table:
hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
return ret;
}
struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
struct hns_roce_mw *mw;
unsigned long index = 0;
int ret;
mw = kmalloc(sizeof(*mw), GFP_KERNEL);
if (!mw)
return ERR_PTR(-ENOMEM);
/* Allocate a key for mw from bitmap */
ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
if (ret)
goto err_bitmap;
mw->rkey = hw_index_to_key(index);
mw->ibmw.rkey = mw->rkey;
mw->ibmw.type = type;
mw->pdn = to_hr_pd(ib_pd)->pdn;
mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
ret = hns_roce_mw_enable(hr_dev, mw);
if (ret)
goto err_mw;
return &mw->ibmw;
err_mw:
hns_roce_mw_free(hr_dev, mw);
err_bitmap:
kfree(mw);
return ERR_PTR(ret);
}
int hns_roce_dealloc_mw(struct ib_mw *ibmw)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
struct hns_roce_mw *mw = to_hr_mw(ibmw);
hns_roce_mw_free(hr_dev, mw);
kfree(mw);
return 0;
}

View File

@ -31,6 +31,7 @@
* SOFTWARE.
*/
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
@ -343,6 +344,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
{
u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
u8 max_sq_stride = ilog2(roundup_sq_stride);
u32 ex_sge_num;
u32 page_size;
u32 max_cnt;
@ -372,7 +374,18 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
if (hr_qp->sq.max_gs > 2)
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
(hr_qp->sq.max_gs - 2));
if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
dev_err(hr_dev->dev,
"The extended sge cnt error! sge_cnt=%d\n",
hr_qp->sge.sge_cnt);
return -EINVAL;
}
}
hr_qp->sge.sge_shift = 4;
ex_sge_num = hr_qp->sge.sge_cnt;
/* Get buf size, SQ and RQ are aligned to page_szie */
if (hr_dev->caps.max_sq_sg <= 2) {
@ -386,6 +399,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), PAGE_SIZE);
} else {
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sge.sge_cnt =
max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), page_size) +
HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
@ -394,7 +409,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), page_size);
hr_qp->sq.offset = 0;
if (hr_qp->sge.sge_cnt) {
if (ex_sge_num) {
hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
(hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift),
@ -465,6 +480,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sge.sge_shift = 4;
}
if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
hr_qp->sge.sge_cnt);
return -EINVAL;
}
}
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sq.offset = 0;
@ -472,6 +495,8 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
page_size);
if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
(u32)hr_qp->sge.sge_cnt);
hr_qp->sge.offset = size;
size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
hr_qp->sge.sge_shift, page_size);
@ -952,8 +977,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
}
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
IB_LINK_LAYER_ETHERNET)) {
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask)) {
dev_err(dev, "ib_modify_qp_is_ok failed\n");
goto out;
}
@ -1106,14 +1131,20 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
int reserved_from_top = 0;
int reserved_from_bot;
int ret;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
/* A port include two SQP, six port total 12 */
/* In hw v1, a port include two SQP, six ports total 12 */
if (hr_dev->caps.max_sq_sg <= 2)
reserved_from_bot = SQP_NUM;
else
reserved_from_bot = hr_dev->caps.reserved_qps;
ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
hr_dev->caps.num_qps - 1, SQP_NUM,
hr_dev->caps.num_qps - 1, reserved_from_bot,
reserved_from_top);
if (ret) {
dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",

View File

@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
unsigned long flags;
rtnl_lock();
for_each_netdev_rcu(&init_net, ip_dev) {
for_each_netdev(&init_net, ip_dev) {
if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
(rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
(ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {

View File

@ -2135,10 +2135,10 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
}
/**
* i40iw_show_rev
* hw_rev_show
*/
static ssize_t i40iw_show_rev(struct device *dev,
struct device_attribute *attr, char *buf)
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct i40iw_ib_device *iwibdev = container_of(dev,
struct i40iw_ib_device,
@ -2147,34 +2147,37 @@ static ssize_t i40iw_show_rev(struct device *dev,
return sprintf(buf, "%x\n", hw_rev);
}
static DEVICE_ATTR_RO(hw_rev);
/**
* i40iw_show_hca
* hca_type_show
*/
static ssize_t i40iw_show_hca(struct device *dev,
struct device_attribute *attr, char *buf)
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "I40IW\n");
}
static DEVICE_ATTR_RO(hca_type);
/**
* i40iw_show_board
* board_id_show
*/
static ssize_t i40iw_show_board(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t board_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
}
static DEVICE_ATTR_RO(board_id);
static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
static struct attribute *i40iw_dev_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *i40iw_dev_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type,
&dev_attr_board_id
static const struct attribute_group i40iw_attr_group = {
.attrs = i40iw_dev_attributes,
};
/**
@ -2752,7 +2755,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
i40iw_pr_err("iwdev == NULL\n");
return NULL;
}
strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
iwibdev->ibdev.owner = THIS_MODULE;
iwdev->iwibdev = iwibdev;
iwibdev->iwdev = iwdev;
@ -2850,20 +2852,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
ib_dispatch_event(&event);
}
/**
* i40iw_unregister_rdma_device - unregister of iwarp from IB
* @iwibdev: rdma device ptr
*/
static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
{
int i;
for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
device_remove_file(&iwibdev->ibdev.dev,
i40iw_dev_attributes[i]);
ib_unregister_device(&iwibdev->ibdev);
}
/**
* i40iw_destroy_rdma_device - destroy rdma device and free resources
* @iwibdev: IB device ptr
@ -2873,7 +2861,7 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
if (!iwibdev)
return;
i40iw_unregister_rdma_device(iwibdev);
ib_unregister_device(&iwibdev->ibdev);
kfree(iwibdev->ibdev.iwcm);
iwibdev->ibdev.iwcm = NULL;
wait_event_timeout(iwibdev->iwdev->close_wq,
@ -2888,32 +2876,19 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
*/
int i40iw_register_rdma_device(struct i40iw_device *iwdev)
{
int i, ret;
int ret;
struct i40iw_ib_device *iwibdev;
iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
if (!iwdev->iwibdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
ret = ib_register_device(&iwibdev->ibdev, NULL);
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
if (ret)
goto error;
for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
ret =
device_create_file(&iwibdev->ibdev.dev,
i40iw_dev_attributes[i]);
if (ret) {
while (i > 0) {
i--;
device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
}
ib_unregister_device(&iwibdev->ibdev);
goto error;
}
}
return 0;
error:
kfree(iwdev->iwibdev->ibdev.iwcm);

View File

@ -1,6 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on MAY_USE_DEVLINK
select NET_VENDOR_MELLANOX
select MLX4_CORE

View File

@ -807,15 +807,17 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err;
struct ib_port_attr pattr;
if (in_wc && in_wc->qp->qp_num) {
pr_debug("received MAD: slid:%d sqpn:%d "
"dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
in_wc->slid, in_wc->src_qp,
in_wc->dlid_path_bits,
in_wc->qp->qp_num,
in_wc->wc_flags,
in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
be16_to_cpu(in_mad->mad_hdr.attr_id));
if (in_wc && in_wc->qp) {
pr_debug("received MAD: port:%d slid:%d sqpn:%d "
"dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
port_num,
in_wc->slid, in_wc->src_qp,
in_wc->dlid_path_bits,
in_wc->qp->qp_num,
in_wc->wc_flags,
be64_to_cpu(in_mad->mad_hdr.tid),
in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
be16_to_cpu(in_mad->mad_hdr.attr_id));
if (in_wc->wc_flags & IB_WC_GRH) {
pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
be64_to_cpu(in_grh->sgid.global.subnet_prefix),

View File

@ -1140,144 +1140,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
static void mlx4_ib_vma_open(struct vm_area_struct *area)
{
/* vma_open is called when a new VMA is created on top of our VMA.
* This is done through either mremap flow or split_vma (usually due
* to mlock, madvise, munmap, etc.). We do not support a clone of the
* vma, as this VMA is strongly hardware related. Therefore we set the
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
* calling us again and trying to do incorrect actions. We assume that
* the original vma size is exactly a single page that there will be no
* "splitting" operations on.
*/
area->vm_ops = NULL;
}
static void mlx4_ib_vma_close(struct vm_area_struct *area)
{
struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
/* It's guaranteed that all VMAs opened on a FD are closed before the
* file itself is closed, therefore no sync is needed with the regular
* closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
* with accessing the vma as part of mlx4_ib_disassociate_ucontext.
* The close operation is usually called under mm->mmap_sem except when
* process is exiting. The exiting case is handled explicitly as part
* of mlx4_ib_disassociate_ucontext.
*/
mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
area->vm_private_data;
/* set the vma context pointer to null in the mlx4_ib driver's private
* data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
*/
mlx4_ib_vma_priv_data->vma = NULL;
}
static const struct vm_operations_struct mlx4_ib_vm_ops = {
.open = mlx4_ib_vma_open,
.close = mlx4_ib_vma_close
};
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
int i;
struct vm_area_struct *vma;
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
/* need to protect from a race on closing the vma as part of
* mlx4_ib_vma_close().
*/
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
continue;
zap_vma_ptes(context->hw_bar_info[i].vma,
context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
context->hw_bar_info[i].vma->vm_flags &=
~(VM_SHARED | VM_MAYSHARE);
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
}
static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
struct mlx4_ib_vma_private_data *vma_private_data)
{
vma_private_data->vma = vma;
vma->vm_private_data = vma_private_data;
vma->vm_ops = &mlx4_ib_vm_ops;
}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
switch (vma->vm_pgoff) {
case 0:
return rdma_user_mmap_io(context, vma,
to_mucontext(context)->uar.pfn,
PAGE_SIZE,
pgprot_noncached(vma->vm_page_prot));
if (vma->vm_pgoff == 0) {
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_DB].vma)
case 1:
if (dev->dev->caps.bf_reg_size == 0)
return -EINVAL;
return rdma_user_mmap_io(
context, vma,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_BF].vma)
return -EINVAL;
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
} else if (vma->vm_pgoff == 3) {
case 3: {
struct mlx4_clock_params params;
int ret;
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
return -EINVAL;
ret = mlx4_get_internal_clock_params(dev->dev, &params);
if (ret)
return ret;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
(pci_resource_start(dev->dev->persist->pdev,
params.bar) +
params.offset)
>> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma,
&mucontext->hw_bar_info[HW_BAR_CLOCK]);
} else {
return -EINVAL;
return rdma_user_mmap_io(
context, vma,
(pci_resource_start(dev->dev->persist->pdev,
params.bar) +
params.offset) >>
PAGE_SHIFT,
PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
}
return 0;
default:
return -EINVAL;
}
}
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
@ -2133,39 +2039,43 @@ out:
return err;
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
static ssize_t hw_rev_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->dev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_board(struct device *device, struct device_attribute *attr,
char *buf)
static ssize_t board_id_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
dev->dev->board_id);
}
static DEVICE_ATTR_RO(board_id);
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type,
&dev_attr_board_id
static const struct attribute_group mlx4_attr_group = {
.attrs = mlx4_class_attributes,
};
struct diag_counter {
@ -2636,7 +2546,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->dev = dev;
ibdev->bond_next_port = 0;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
@ -2898,8 +2807,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
if (ib_register_device(&ibdev->ib_dev, NULL))
if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev))
@ -2922,12 +2832,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
mlx4_class_attributes[j]))
goto err_notif;
}
ibdev->ib_active = true;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),

View File

@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
if (!list_empty(&group->pending_list))
req = list_first_entry(&group->pending_list,
struct mcast_req, group_list);
if ((method == IB_MGMT_METHOD_GET_RESP)) {
if (method == IB_MGMT_METHOD_GET_RESP) {
if (req) {
send_reply_to_slave(req->func, group, &req->sa_mad, status);
--group->func[req->func].num_pend_reqs;

View File

@ -80,16 +80,11 @@ enum hw_bar_type {
HW_BAR_COUNT
};
struct mlx4_ib_vma_private_data {
struct vm_area_struct *vma;
};
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
struct list_head wqn_ranges_list;
struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
};

View File

@ -2629,7 +2629,6 @@ enum {
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
@ -2639,13 +2638,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (cur_state != new_state || cur_state != IB_QPS_RESET) {
int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
ll = rdma_port_get_link_layer(&dev->ib_dev, port);
}
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask, ll)) {
attr_mask)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",

Some files were not shown because too many files have changed in this diff Show More