OpenCloudOS-Kernel/drivers/infiniband/core/sa_query.c

1281 lines
33 KiB
C
Raw Normal View History

/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2006 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/random.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
IB/core: Ethernet L2 attributes in verbs/cm structures This patch add the support for Ethernet L2 attributes in the verbs/cm/cma structures. When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority in a similar manner that the IB L2 (and the L4 PKEY) attributes are used. Thus, those attributes were added to the following structures: * ib_ah_attr - added dmac * ib_qp_attr - added smac and vlan_id, (sl remains vlan priority) * ib_wc - added smac, vlan_id * ib_sa_path_rec - added smac, dmac, vlan_id * cm_av - added smac and vlan_id For the path record structure, extra care was taken to avoid the new fields when packing it into wire format, so we don't break the IB CM and SA wire protocol. On the active side, the CM fills. its internal structures from the path provided by the ULP. We add there taking the ETH L2 attributes and placing them into the CM Address Handle (struct cm_av). On the passive side, the CM fills its internal structures from the WC associated with the REQ message. We add there taking the ETH L2 attributes from the WC. When the HW driver provides the required ETH L2 attributes in the WC, they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core code checks for the presence of these flags, and in their absence does address resolution from the ib_init_ah_from_wc() helper function. ib_modify_qp_is_ok is also updated to consider the link layer. Some parameters are mandatory for Ethernet link layer, while they are irrelevant for IB. Vendor drivers are modified to support the new function signature. Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
2013-12-13 00:03:11 +08:00
#include <uapi/linux/if_ether.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_cache.h>
#include "sa.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
MODULE_LICENSE("Dual BSD/GPL");
struct ib_sa_sm_ah {
struct ib_ah *ah;
struct kref ref;
u16 pkey_index;
u8 src_path_mask;
};
struct ib_sa_port {
struct ib_mad_agent *agent;
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
spinlock_t ah_lock;
u8 port_num;
};
struct ib_sa_device {
int start_port, end_port;
struct ib_event_handler event_handler;
struct ib_sa_port port[0];
};
struct ib_sa_query {
void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
void (*release)(struct ib_sa_query *);
struct ib_sa_client *client;
struct ib_sa_port *port;
struct ib_mad_send_buf *mad_buf;
struct ib_sa_sm_ah *sm_ah;
int id;
};
struct ib_sa_service_query {
void (*callback)(int, struct ib_sa_service_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
struct ib_sa_path_query {
void (*callback)(int, struct ib_sa_path_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
struct ib_sa_guidinfo_query {
void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
static void ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device);
static struct ib_client sa_client = {
.name = "sa",
.add = ib_sa_add_one,
.remove = ib_sa_remove_one
};
static DEFINE_SPINLOCK(idr_lock);
static DEFINE_IDR(query_idr);
static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
.struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
{ PATH_REC_FIELD(service_id),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 64 },
{ PATH_REC_FIELD(dgid),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 128 },
{ PATH_REC_FIELD(sgid),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 },
{ PATH_REC_FIELD(dlid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 16 },
{ PATH_REC_FIELD(slid),
.offset_words = 10,
.offset_bits = 16,
.size_bits = 16 },
{ PATH_REC_FIELD(raw_traffic),
.offset_words = 11,
.offset_bits = 0,
.size_bits = 1 },
{ RESERVED,
.offset_words = 11,
.offset_bits = 1,
.size_bits = 3 },
{ PATH_REC_FIELD(flow_label),
.offset_words = 11,
.offset_bits = 4,
.size_bits = 20 },
{ PATH_REC_FIELD(hop_limit),
.offset_words = 11,
.offset_bits = 24,
.size_bits = 8 },
{ PATH_REC_FIELD(traffic_class),
.offset_words = 12,
.offset_bits = 0,
.size_bits = 8 },
{ PATH_REC_FIELD(reversible),
.offset_words = 12,
.offset_bits = 8,
.size_bits = 1 },
{ PATH_REC_FIELD(numb_path),
.offset_words = 12,
.offset_bits = 9,
.size_bits = 7 },
{ PATH_REC_FIELD(pkey),
.offset_words = 12,
.offset_bits = 16,
.size_bits = 16 },
{ PATH_REC_FIELD(qos_class),
.offset_words = 13,
.offset_bits = 0,
.size_bits = 12 },
{ PATH_REC_FIELD(sl),
.offset_words = 13,
.offset_bits = 12,
.size_bits = 4 },
{ PATH_REC_FIELD(mtu_selector),
.offset_words = 13,
.offset_bits = 16,
.size_bits = 2 },
{ PATH_REC_FIELD(mtu),
.offset_words = 13,
.offset_bits = 18,
.size_bits = 6 },
{ PATH_REC_FIELD(rate_selector),
.offset_words = 13,
.offset_bits = 24,
.size_bits = 2 },
{ PATH_REC_FIELD(rate),
.offset_words = 13,
.offset_bits = 26,
.size_bits = 6 },
{ PATH_REC_FIELD(packet_life_time_selector),
.offset_words = 14,
.offset_bits = 0,
.size_bits = 2 },
{ PATH_REC_FIELD(packet_life_time),
.offset_words = 14,
.offset_bits = 2,
.size_bits = 6 },
{ PATH_REC_FIELD(preference),
.offset_words = 14,
.offset_bits = 8,
.size_bits = 8 },
{ RESERVED,
.offset_words = 14,
.offset_bits = 16,
.size_bits = 48 },
};
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
.struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
.field_name = "sa_mcmember_rec:" #field
static const struct ib_field mcmember_rec_table[] = {
{ MCMEMBER_REC_FIELD(mgid),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 128 },
{ MCMEMBER_REC_FIELD(port_gid),
.offset_words = 4,
.offset_bits = 0,
.size_bits = 128 },
{ MCMEMBER_REC_FIELD(qkey),
.offset_words = 8,
.offset_bits = 0,
.size_bits = 32 },
{ MCMEMBER_REC_FIELD(mlid),
.offset_words = 9,
.offset_bits = 0,
.size_bits = 16 },
{ MCMEMBER_REC_FIELD(mtu_selector),
.offset_words = 9,
.offset_bits = 16,
.size_bits = 2 },
{ MCMEMBER_REC_FIELD(mtu),
.offset_words = 9,
.offset_bits = 18,
.size_bits = 6 },
{ MCMEMBER_REC_FIELD(traffic_class),
.offset_words = 9,
.offset_bits = 24,
.size_bits = 8 },
{ MCMEMBER_REC_FIELD(pkey),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 16 },
{ MCMEMBER_REC_FIELD(rate_selector),
.offset_words = 10,
.offset_bits = 16,
.size_bits = 2 },
{ MCMEMBER_REC_FIELD(rate),
.offset_words = 10,
.offset_bits = 18,
.size_bits = 6 },
{ MCMEMBER_REC_FIELD(packet_life_time_selector),
.offset_words = 10,
.offset_bits = 24,
.size_bits = 2 },
{ MCMEMBER_REC_FIELD(packet_life_time),
.offset_words = 10,
.offset_bits = 26,
.size_bits = 6 },
{ MCMEMBER_REC_FIELD(sl),
.offset_words = 11,
.offset_bits = 0,
.size_bits = 4 },
{ MCMEMBER_REC_FIELD(flow_label),
.offset_words = 11,
.offset_bits = 4,
.size_bits = 20 },
{ MCMEMBER_REC_FIELD(hop_limit),
.offset_words = 11,
.offset_bits = 24,
.size_bits = 8 },
{ MCMEMBER_REC_FIELD(scope),
.offset_words = 12,
.offset_bits = 0,
.size_bits = 4 },
{ MCMEMBER_REC_FIELD(join_state),
.offset_words = 12,
.offset_bits = 4,
.size_bits = 4 },
{ MCMEMBER_REC_FIELD(proxy_join),
.offset_words = 12,
.offset_bits = 8,
.size_bits = 1 },
{ RESERVED,
.offset_words = 12,
.offset_bits = 9,
.size_bits = 23 },
};
#define SERVICE_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \
.struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
.field_name = "sa_service_rec:" #field
static const struct ib_field service_rec_table[] = {
{ SERVICE_REC_FIELD(id),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 64 },
{ SERVICE_REC_FIELD(gid),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 128 },
{ SERVICE_REC_FIELD(pkey),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 16 },
{ SERVICE_REC_FIELD(lease),
.offset_words = 7,
.offset_bits = 0,
.size_bits = 32 },
{ SERVICE_REC_FIELD(key),
.offset_words = 8,
.offset_bits = 0,
.size_bits = 128 },
{ SERVICE_REC_FIELD(name),
.offset_words = 12,
.offset_bits = 0,
.size_bits = 64*8 },
{ SERVICE_REC_FIELD(data8),
.offset_words = 28,
.offset_bits = 0,
.size_bits = 16*8 },
{ SERVICE_REC_FIELD(data16),
.offset_words = 32,
.offset_bits = 0,
.size_bits = 8*16 },
{ SERVICE_REC_FIELD(data32),
.offset_words = 36,
.offset_bits = 0,
.size_bits = 4*32 },
{ SERVICE_REC_FIELD(data64),
.offset_words = 40,
.offset_bits = 0,
.size_bits = 2*64 },
};
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
.field_name = "sa_guidinfo_rec:" #field
static const struct ib_field guidinfo_rec_table[] = {
{ GUIDINFO_REC_FIELD(lid),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 16 },
{ GUIDINFO_REC_FIELD(block_num),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 8 },
{ GUIDINFO_REC_FIELD(res1),
.offset_words = 0,
.offset_bits = 24,
.size_bits = 8 },
{ GUIDINFO_REC_FIELD(res2),
.offset_words = 1,
.offset_bits = 0,
.size_bits = 32 },
{ GUIDINFO_REC_FIELD(guid_info_list),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 512 },
};
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
ib_destroy_ah(sm_ah->ah);
kfree(sm_ah);
}
static void update_sm_ah(struct work_struct *work)
{
struct ib_sa_port *port =
container_of(work, struct ib_sa_port, update_task);
struct ib_sa_sm_ah *new_ah;
struct ib_port_attr port_attr;
struct ib_ah_attr ah_attr;
if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
printk(KERN_WARNING "Couldn't query port\n");
return;
}
new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
if (!new_ah) {
printk(KERN_WARNING "Couldn't allocate new SM AH\n");
return;
}
kref_init(&new_ah->ref);
new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
new_ah->pkey_index = 0;
if (ib_find_pkey(port->agent->device, port->port_num,
IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
printk(KERN_ERR "Couldn't find index for default PKey\n");
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = port_attr.sm_lid;
ah_attr.sl = port_attr.sm_sl;
ah_attr.port_num = port->port_num;
new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
if (IS_ERR(new_ah->ah)) {
printk(KERN_WARNING "Couldn't create new SM AH\n");
kfree(new_ah);
return;
}
spin_lock_irq(&port->ah_lock);
if (port->sm_ah)
kref_put(&port->sm_ah->ref, free_sm_ah);
port->sm_ah = new_ah;
spin_unlock_irq(&port->ah_lock);
}
static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
{
if (event->event == IB_EVENT_PORT_ERR ||
event->event == IB_EVENT_PORT_ACTIVE ||
event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_SM_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER) {
unsigned long flags;
struct ib_sa_device *sa_dev =
container_of(handler, typeof(*sa_dev), event_handler);
struct ib_sa_port *port =
&sa_dev->port[event->element.port_num - sa_dev->start_port];
if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND)
return;
spin_lock_irqsave(&port->ah_lock, flags);
if (port->sm_ah)
kref_put(&port->sm_ah->ref, free_sm_ah);
port->sm_ah = NULL;
spin_unlock_irqrestore(&port->ah_lock, flags);
queue_work(ib_wq, &sa_dev->port[event->element.port_num -
sa_dev->start_port].update_task);
}
}
void ib_sa_register_client(struct ib_sa_client *client)
{
atomic_set(&client->users, 1);
init_completion(&client->comp);
}
EXPORT_SYMBOL(ib_sa_register_client);
void ib_sa_unregister_client(struct ib_sa_client *client)
{
ib_sa_client_put(client);
wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(ib_sa_unregister_client);
/**
* ib_sa_cancel_query - try to cancel an SA query
* @id:ID of query to cancel
* @query:query pointer to cancel
*
* Try to cancel an SA query. If the id and query don't match up or
* the query has already completed, nothing is done. Otherwise the
* query is canceled and will complete with a status of -EINTR.
*/
void ib_sa_cancel_query(int id, struct ib_sa_query *query)
{
unsigned long flags;
struct ib_mad_agent *agent;
struct ib_mad_send_buf *mad_buf;
spin_lock_irqsave(&idr_lock, flags);
if (idr_find(&query_idr, id) != query) {
spin_unlock_irqrestore(&idr_lock, flags);
return;
}
agent = query->port->agent;
mad_buf = query->mad_buf;
spin_unlock_irqrestore(&idr_lock, flags);
ib_cancel_mad(agent, mad_buf);
}
EXPORT_SYMBOL(ib_sa_cancel_query);
static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
{
struct ib_sa_device *sa_dev;
struct ib_sa_port *port;
unsigned long flags;
u8 src_path_mask;
sa_dev = ib_get_client_data(device, &sa_client);
if (!sa_dev)
return 0x7f;
port = &sa_dev->port[port_num - sa_dev->start_port];
spin_lock_irqsave(&port->ah_lock, flags);
src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
spin_unlock_irqrestore(&port->ah_lock, flags);
return src_path_mask;
}
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
int force_grh;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
ah_attr->sl = rec->sl;
ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
get_src_path_mask(device, port_num);
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET;
if (rec->hop_limit > 1 || force_grh) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
&gid_index);
if (ret)
return ret;
ah_attr->grh.sgid_index = gid_index;
ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
}
IB/core: Ethernet L2 attributes in verbs/cm structures This patch add the support for Ethernet L2 attributes in the verbs/cm/cma structures. When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority in a similar manner that the IB L2 (and the L4 PKEY) attributes are used. Thus, those attributes were added to the following structures: * ib_ah_attr - added dmac * ib_qp_attr - added smac and vlan_id, (sl remains vlan priority) * ib_wc - added smac, vlan_id * ib_sa_path_rec - added smac, dmac, vlan_id * cm_av - added smac and vlan_id For the path record structure, extra care was taken to avoid the new fields when packing it into wire format, so we don't break the IB CM and SA wire protocol. On the active side, the CM fills. its internal structures from the path provided by the ULP. We add there taking the ETH L2 attributes and placing them into the CM Address Handle (struct cm_av). On the passive side, the CM fills its internal structures from the WC associated with the REQ message. We add there taking the ETH L2 attributes from the WC. When the HW driver provides the required ETH L2 attributes in the WC, they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core code checks for the presence of these flags, and in their absence does address resolution from the ib_init_ah_from_wc() helper function. ib_modify_qp_is_ok is also updated to consider the link layer. Some parameters are mandatory for Ethernet link layer, while they are irrelevant for IB. Vendor drivers are modified to support the new function signature. Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
2013-12-13 00:03:11 +08:00
if (force_grh) {
memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
ah_attr->vlan_id = rec->vlan_id;
} else {
ah_attr->vlan_id = 0xffff;
}
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
{
unsigned long flags;
spin_lock_irqsave(&query->port->ah_lock, flags);
if (!query->port->sm_ah) {
spin_unlock_irqrestore(&query->port->ah_lock, flags);
return -EAGAIN;
}
kref_get(&query->port->sm_ah->ref);
query->sm_ah = query->port->sm_ah;
spin_unlock_irqrestore(&query->port->ah_lock, flags);
query->mad_buf = ib_create_send_mad(query->port->agent, 1,
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask);
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
}
query->mad_buf->ah = query->sm_ah->ah;
return 0;
}
static void free_mad(struct ib_sa_query *query)
{
ib_free_send_mad(query->mad_buf);
kref_put(&query->sm_ah->ref, free_sm_ah);
}
static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
{
unsigned long flags;
memset(mad, 0, sizeof *mad);
mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
spin_lock_irqsave(&tid_lock, flags);
mad->mad_hdr.tid =
cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
spin_unlock_irqrestore(&tid_lock, flags);
}
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
{
bool preload = gfp_mask & __GFP_WAIT;
unsigned long flags;
int ret, id;
if (preload)
idr_preload(gfp_mask);
spin_lock_irqsave(&idr_lock, flags);
id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT);
spin_unlock_irqrestore(&idr_lock, flags);
if (preload)
idr_preload_end();
if (id < 0)
return id;
query->mad_buf->timeout_ms = timeout_ms;
query->mad_buf->context[0] = query;
query->id = id;
ret = ib_post_send_mad(query->mad_buf, NULL);
if (ret) {
spin_lock_irqsave(&idr_lock, flags);
idr_remove(&query_idr, id);
spin_unlock_irqrestore(&idr_lock, flags);
}
/*
* It's not safe to dereference query any more, because the
* send may already have completed and freed the query in
* another context.
*/
return ret ? ret : id;
}
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
{
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
}
EXPORT_SYMBOL(ib_sa_pack_path);
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
struct ib_sa_path_query *query =
container_of(sa_query, struct ib_sa_path_query, sa_query);
if (mad) {
struct ib_sa_path_rec rec;
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
IB/core: Ethernet L2 attributes in verbs/cm structures This patch add the support for Ethernet L2 attributes in the verbs/cm/cma structures. When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority in a similar manner that the IB L2 (and the L4 PKEY) attributes are used. Thus, those attributes were added to the following structures: * ib_ah_attr - added dmac * ib_qp_attr - added smac and vlan_id, (sl remains vlan priority) * ib_wc - added smac, vlan_id * ib_sa_path_rec - added smac, dmac, vlan_id * cm_av - added smac and vlan_id For the path record structure, extra care was taken to avoid the new fields when packing it into wire format, so we don't break the IB CM and SA wire protocol. On the active side, the CM fills. its internal structures from the path provided by the ULP. We add there taking the ETH L2 attributes and placing them into the CM Address Handle (struct cm_av). On the passive side, the CM fills its internal structures from the WC associated with the REQ message. We add there taking the ETH L2 attributes from the WC. When the HW driver provides the required ETH L2 attributes in the WC, they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core code checks for the presence of these flags, and in their absence does address resolution from the ib_init_ah_from_wc() helper function. ib_modify_qp_is_ok is also updated to consider the link layer. Some parameters are mandatory for Ethernet link layer, while they are irrelevant for IB. Vendor drivers are modified to support the new function signature. Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
2013-12-13 00:03:11 +08:00
rec.vlan_id = 0xffff;
memset(rec.dmac, 0, ETH_ALEN);
memset(rec.smac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
}
/**
* ib_sa_path_rec_get - Start a Path get query
* @client:SA client
* @device:device to send query on
* @port_num: port number to send query on
* @rec:Path Record to send in query
* @comp_mask:component mask to send in query
* @timeout_ms:time to wait for response
* @gfp_mask:GFP mask to use for internal allocations
* @callback:function called when query completes, times out or is
* canceled
* @context:opaque user context passed to callback
* @sa_query:query context, used to cancel query
*
* Send a Path Record Get query to the SA to look up a path. The
* callback function will be called when the query completes (or
* fails); status is 0 for a successful response, -EINTR if the query
* is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
* occurred sending the query. The resp parameter of the callback is
* only valid if status is 0.
*
* If the return value of ib_sa_path_rec_get() is negative, it is an
* error code. Otherwise it is a query ID that can be used to cancel
* the query.
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_path_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
struct ib_sa_path_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
query->callback = callback;
query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
query->sa_query.release = ib_sa_path_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
mad->sa_hdr.comp_mask = comp_mask;
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
return ret;
err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
struct ib_sa_service_query *query =
container_of(sa_query, struct ib_sa_service_query, sa_query);
if (mad) {
struct ib_sa_service_rec rec;
ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
mad->data, &rec);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
}
/**
* ib_sa_service_rec_query - Start Service Record operation
* @client:SA client
* @device:device to send request on
* @port_num: port number to send request on
* @method:SA method - should be get, set, or delete
* @rec:Service Record to send in request
* @comp_mask:component mask to send in request
* @timeout_ms:time to wait for response
* @gfp_mask:GFP mask to use for internal allocations
* @callback:function called when request completes, times out or is
* canceled
* @context:opaque user context passed to callback
* @sa_query:request context, used to cancel request
*
* Send a Service Record set/get/delete to the SA to register,
* unregister or query a service record.
* The callback function will be called when the request completes (or
* fails); status is 0 for a successful response, -EINTR if the query
* is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
* occurred sending the query. The resp parameter of the callback is
* only valid if status is 0.
*
* If the return value of ib_sa_service_rec_query() is negative, it is an
* error code. Otherwise it is a request ID that can be used to cancel
* the query.
*/
int ib_sa_service_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_service_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_service_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
struct ib_sa_service_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
if (method != IB_MGMT_METHOD_GET &&
method != IB_MGMT_METHOD_SET &&
method != IB_SA_METHOD_DELETE)
return -EINVAL;
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
query->callback = callback;
query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
query->sa_query.release = ib_sa_service_rec_release;
mad->mad_hdr.method = method;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
mad->sa_hdr.comp_mask = comp_mask;
ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
return ret;
err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_service_rec_query);
static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
struct ib_sa_mcmember_query *query =
container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
if (mad) {
struct ib_sa_mcmember_rec rec;
ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
mad->data, &rec);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
struct ib_sa_mcmember_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
query->callback = callback;
query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
query->sa_query.release = ib_sa_mcmember_rec_release;
mad->mad_hdr.method = method;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
mad->sa_hdr.comp_mask = comp_mask;
ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
return ret;
err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
/* Support GuidInfoRecord */
static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
struct ib_sa_guidinfo_query *query =
container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
if (mad) {
struct ib_sa_guidinfo_rec rec;
ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
mad->data, &rec);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
}
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
struct ib_sa_guidinfo_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
if (method != IB_MGMT_METHOD_GET &&
method != IB_MGMT_METHOD_SET &&
method != IB_SA_METHOD_DELETE) {
return -EINVAL;
}
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
query->callback = callback;
query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
mad->mad_hdr.method = method;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
mad->sa_hdr.comp_mask = comp_mask;
ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
return ret;
err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
unsigned long flags;
if (query->callback)
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
/* No callback -- already got recv */
break;
case IB_WC_RESP_TIMEOUT_ERR:
query->callback(query, -ETIMEDOUT, NULL);
break;
case IB_WC_WR_FLUSH_ERR:
query->callback(query, -EINTR, NULL);
break;
default:
query->callback(query, -EIO, NULL);
break;
}
spin_lock_irqsave(&idr_lock, flags);
idr_remove(&query_idr, query->id);
spin_unlock_irqrestore(&idr_lock, flags);
free_mad(query);
ib_sa_client_put(query->client);
query->release(query);
}
static void recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
struct ib_mad_send_buf *mad_buf;
mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
query = mad_buf->context[0];
if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
mad_recv_wc->recv_buf.mad->mad_hdr.status ?
-EINVAL : 0,
(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
else
query->callback(query, -EIO, NULL);
}
ib_free_recv_mad(mad_recv_wc);
}
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
int s, e, i;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
return;
if (device->node_type == RDMA_NODE_IB_SWITCH)
s = e = 0;
else {
s = 1;
e = device->phys_port_cnt;
}
sa_dev = kzalloc(sizeof *sa_dev +
(e - s + 1) * sizeof (struct ib_sa_port),
GFP_KERNEL);
if (!sa_dev)
return;
sa_dev->start_port = s;
sa_dev->end_port = e;
for (i = 0; i <= e - s; ++i) {
spin_lock_init(&sa_dev->port[i].ah_lock);
if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND)
continue;
sa_dev->port[i].sm_ah = NULL;
sa_dev->port[i].port_num = i + s;
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
recv_handler, sa_dev);
if (IS_ERR(sa_dev->port[i].agent))
goto err;
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
}
ib_set_client_data(device, &sa_client, sa_dev);
/*
* We register our event handler after everything is set up,
* and then update our cached info after the event handler is
* registered to avoid any problems if a port changes state
* during our initialization.
*/
INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
if (ib_register_event_handler(&sa_dev->event_handler))
goto err;
for (i = 0; i <= e - s; ++i)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
update_sm_ah(&sa_dev->port[i].update_task);
return;
err:
while (--i >= 0)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
ib_unregister_mad_agent(sa_dev->port[i].agent);
kfree(sa_dev);
return;
}
static void ib_sa_remove_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
int i;
if (!sa_dev)
return;
ib_unregister_event_handler(&sa_dev->event_handler);
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
}
}
kfree(sa_dev);
}
static int __init ib_sa_init(void)
{
int ret;
get_random_bytes(&tid, sizeof tid);
ret = ib_register_client(&sa_client);
if (ret) {
printk(KERN_ERR "Couldn't register ib_sa client\n");
goto err1;
}
ret = mcast_init();
if (ret) {
printk(KERN_ERR "Couldn't initialize multicast handling\n");
goto err2;
}
return 0;
err2:
ib_unregister_client(&sa_client);
err1:
return ret;
}
static void __exit ib_sa_cleanup(void)
{
mcast_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}
module_init(ib_sa_init);
module_exit(ib_sa_cleanup);