2006-01-03 02:04:38 +08:00
|
|
|
/*
|
|
|
|
* net/tipc/node.c: TIPC node management routines
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2014-06-26 09:41:33 +08:00
|
|
|
* Copyright (c) 2000-2006, 2012-2014, Ericsson AB
|
2014-03-27 12:54:36 +08:00
|
|
|
* Copyright (c) 2005-2006, 2010-2014, Wind River Systems
|
2006-01-03 02:04:38 +08:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 02:04:38 +08:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 02:04:38 +08:00
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 02:04:38 +08:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "core.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "node.h"
|
|
|
|
#include "name_distr.h"
|
2014-08-23 06:09:07 +08:00
|
|
|
#include "socket.h"
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_lost_contact(struct tipc_node *n_ptr);
|
|
|
|
static void node_established_contact(struct tipc_node *n_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
struct tipc_sock_conn {
|
|
|
|
u32 port;
|
|
|
|
u32 peer_port;
|
|
|
|
u32 peer_node;
|
|
|
|
struct list_head list;
|
|
|
|
};
|
|
|
|
|
2014-11-20 17:29:17 +08:00
|
|
|
static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
|
|
|
|
[TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
|
|
|
|
[TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
|
|
|
|
[TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
|
|
|
|
};
|
|
|
|
|
2011-11-04 23:54:43 +08:00
|
|
|
/*
|
|
|
|
* A trivial power-of-two bitmask technique is used for speed, since this
|
|
|
|
* operation is done for every incoming TIPC packet. The number of hash table
|
|
|
|
* entries has been chosen so that no hash chain exceeds 8 nodes and will
|
|
|
|
* usually be much smaller (typically only a single node).
|
|
|
|
*/
|
2012-04-23 12:49:13 +08:00
|
|
|
static unsigned int tipc_hashfn(u32 addr)
|
2011-11-04 23:54:43 +08:00
|
|
|
{
|
|
|
|
return addr & (NODE_HTABLE_SIZE - 1);
|
|
|
|
}
|
|
|
|
|
2011-10-28 03:03:24 +08:00
|
|
|
/*
|
2011-02-26 07:42:52 +08:00
|
|
|
* tipc_node_find - locate specified node object, if it exists
|
|
|
|
*/
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_node *tipc_node_find(struct net *net, u32 addr)
|
2011-02-26 07:42:52 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2011-02-26 07:42:52 +08:00
|
|
|
struct tipc_node *node;
|
|
|
|
|
2015-01-09 15:27:10 +08:00
|
|
|
if (unlikely(!in_own_cluster_exact(net, addr)))
|
2011-02-26 07:42:52 +08:00
|
|
|
return NULL;
|
|
|
|
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_lock();
|
2015-01-09 15:27:05 +08:00
|
|
|
hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)],
|
|
|
|
hash) {
|
2014-03-27 12:54:36 +08:00
|
|
|
if (node->addr == addr) {
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_unlock();
|
2011-02-26 07:42:52 +08:00
|
|
|
return node;
|
2014-03-27 12:54:36 +08:00
|
|
|
}
|
2011-02-26 07:42:52 +08:00
|
|
|
}
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_unlock();
|
2011-02-26 07:42:52 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_node *tipc_node_create(struct net *net, u32 addr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2011-02-26 07:42:52 +08:00
|
|
|
struct tipc_node *n_ptr, *temp_node;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_lock_bh(&tn->node_list_lock);
|
2015-02-03 21:59:19 +08:00
|
|
|
n_ptr = tipc_node_find(net, addr);
|
|
|
|
if (n_ptr)
|
|
|
|
goto exit;
|
2011-01-01 02:59:23 +08:00
|
|
|
n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
|
2006-06-26 14:52:17 +08:00
|
|
|
if (!n_ptr) {
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_warn("Node creation failed, no memory\n");
|
2015-02-03 21:59:19 +08:00
|
|
|
goto exit;
|
2006-06-26 14:52:17 +08:00
|
|
|
}
|
|
|
|
n_ptr->addr = addr;
|
2015-01-09 15:27:05 +08:00
|
|
|
n_ptr->net = net;
|
2011-01-01 02:59:18 +08:00
|
|
|
spin_lock_init(&n_ptr->lock);
|
2011-02-26 07:42:52 +08:00
|
|
|
INIT_HLIST_NODE(&n_ptr->hash);
|
|
|
|
INIT_LIST_HEAD(&n_ptr->list);
|
2014-11-26 11:41:45 +08:00
|
|
|
INIT_LIST_HEAD(&n_ptr->publ_list);
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
INIT_LIST_HEAD(&n_ptr->conn_sks);
|
2014-11-26 11:41:53 +08:00
|
|
|
__skb_queue_head_init(&n_ptr->bclink.deferred_queue);
|
2015-01-09 15:27:05 +08:00
|
|
|
hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
|
|
|
|
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
|
2011-02-26 07:42:52 +08:00
|
|
|
if (n_ptr->addr < temp_node->addr)
|
|
|
|
break;
|
|
|
|
}
|
2014-03-27 12:54:37 +08:00
|
|
|
list_add_tail_rcu(&n_ptr->list, &temp_node->list);
|
2014-05-08 08:54:39 +08:00
|
|
|
n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
|
2011-10-29 04:26:41 +08:00
|
|
|
n_ptr->signature = INVALID_NODE_SIG;
|
2015-01-09 15:27:05 +08:00
|
|
|
tn->num_nodes++;
|
2015-02-03 21:59:19 +08:00
|
|
|
exit:
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2006-01-03 02:04:38 +08:00
|
|
|
return n_ptr;
|
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2014-03-27 12:54:37 +08:00
|
|
|
list_del_rcu(&n_ptr->list);
|
|
|
|
hlist_del_rcu(&n_ptr->hash);
|
|
|
|
kfree_rcu(n_ptr, rcu);
|
2011-01-01 02:59:19 +08:00
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
tn->num_nodes--;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
void tipc_node_stop(struct net *net)
|
2014-03-27 12:54:36 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2014-03-27 12:54:36 +08:00
|
|
|
struct tipc_node *node, *t_node;
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_lock_bh(&tn->node_list_lock);
|
|
|
|
list_for_each_entry_safe(node, t_node, &tn->node_list, list)
|
|
|
|
tipc_node_delete(tn, node);
|
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2014-03-27 12:54:36 +08:00
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
{
|
|
|
|
struct tipc_node *node;
|
|
|
|
struct tipc_sock_conn *conn;
|
|
|
|
|
2015-01-09 15:27:10 +08:00
|
|
|
if (in_own_node(net, dnode))
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
return 0;
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
node = tipc_node_find(net, dnode);
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
if (!node) {
|
|
|
|
pr_warn("Connecting sock to node 0x%x failed\n", dnode);
|
|
|
|
return -EHOSTUNREACH;
|
|
|
|
}
|
|
|
|
conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
|
|
|
|
if (!conn)
|
|
|
|
return -EHOSTUNREACH;
|
|
|
|
conn->peer_node = dnode;
|
|
|
|
conn->port = port;
|
|
|
|
conn->peer_port = peer_port;
|
|
|
|
|
|
|
|
tipc_node_lock(node);
|
|
|
|
list_add_tail(&conn->list, &node->conn_sks);
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
{
|
|
|
|
struct tipc_node *node;
|
|
|
|
struct tipc_sock_conn *conn, *safe;
|
|
|
|
|
2015-01-09 15:27:10 +08:00
|
|
|
if (in_own_node(net, dnode))
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
return;
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
node = tipc_node_find(net, dnode);
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
if (!node)
|
|
|
|
return;
|
|
|
|
|
|
|
|
tipc_node_lock(node);
|
|
|
|
list_for_each_entry_safe(conn, safe, &node->conn_sks, list) {
|
|
|
|
if (port != conn->port)
|
|
|
|
continue;
|
|
|
|
list_del(&conn->list);
|
|
|
|
kfree(conn);
|
|
|
|
}
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
}
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
/**
|
2006-01-18 07:38:21 +08:00
|
|
|
* tipc_node_link_up - handle addition of link
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2006-01-03 02:04:38 +08:00
|
|
|
* Link becomes active (alone or shared) or standby, depending on its priority.
|
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link **active = &n_ptr->active_links[0];
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-06-26 14:52:50 +08:00
|
|
|
n_ptr->working_links++;
|
2014-10-20 14:44:25 +08:00
|
|
|
n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP;
|
|
|
|
n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
|
|
|
|
|
2015-01-23 00:10:31 +08:00
|
|
|
pr_debug("Established link <%s> on network plane %c\n",
|
|
|
|
l_ptr->name, l_ptr->net_plane);
|
2007-02-09 22:25:21 +08:00
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
if (!active[0]) {
|
|
|
|
active[0] = active[1] = l_ptr;
|
|
|
|
node_established_contact(n_ptr);
|
2014-06-26 09:41:33 +08:00
|
|
|
goto exit;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2007-02-09 22:25:21 +08:00
|
|
|
if (l_ptr->priority < active[0]->priority) {
|
2015-01-23 00:10:31 +08:00
|
|
|
pr_debug("New link <%s> becomes standby\n", l_ptr->name);
|
2014-06-26 09:41:33 +08:00
|
|
|
goto exit;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2014-02-18 16:06:46 +08:00
|
|
|
tipc_link_dup_queue_xmit(active[0], l_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
if (l_ptr->priority == active[0]->priority) {
|
2006-01-03 02:04:38 +08:00
|
|
|
active[0] = l_ptr;
|
2014-06-26 09:41:33 +08:00
|
|
|
goto exit;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2015-01-23 00:10:31 +08:00
|
|
|
pr_debug("Old link <%s> becomes standby\n", active[0]->name);
|
2006-06-26 14:52:17 +08:00
|
|
|
if (active[1] != active[0])
|
2015-01-23 00:10:31 +08:00
|
|
|
pr_debug("Old link <%s> becomes standby\n", active[1]->name);
|
2006-01-03 02:04:38 +08:00
|
|
|
active[0] = active[1] = l_ptr;
|
2014-06-26 09:41:33 +08:00
|
|
|
exit:
|
|
|
|
/* Leave room for changeover header when returning 'mtu' to users: */
|
|
|
|
n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE;
|
|
|
|
n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* node_select_active_links - select active link
|
|
|
|
*/
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_select_active_links(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link **active = &n_ptr->active_links[0];
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 i;
|
|
|
|
u32 highest_prio = 0;
|
|
|
|
|
2007-02-09 22:25:21 +08:00
|
|
|
active[0] = active[1] = NULL;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link *l_ptr = n_ptr->links[i];
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!l_ptr || !tipc_link_is_up(l_ptr) ||
|
2006-01-03 02:04:38 +08:00
|
|
|
(l_ptr->priority < highest_prio))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (l_ptr->priority > highest_prio) {
|
2007-02-09 22:25:21 +08:00
|
|
|
highest_prio = l_ptr->priority;
|
2006-01-03 02:04:38 +08:00
|
|
|
active[0] = active[1] = l_ptr;
|
|
|
|
} else {
|
|
|
|
active[1] = l_ptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2006-01-18 07:38:21 +08:00
|
|
|
* tipc_node_link_down - handle loss of link
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-01-09 15:27:10 +08:00
|
|
|
struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link **active;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-06-26 14:52:50 +08:00
|
|
|
n_ptr->working_links--;
|
2014-10-20 14:44:25 +08:00
|
|
|
n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN;
|
|
|
|
n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
|
2006-06-26 14:52:50 +08:00
|
|
|
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!tipc_link_is_active(l_ptr)) {
|
2015-01-23 00:10:31 +08:00
|
|
|
pr_debug("Lost standby link <%s> on network plane %c\n",
|
|
|
|
l_ptr->name, l_ptr->net_plane);
|
2006-01-03 02:04:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2015-01-23 00:10:31 +08:00
|
|
|
pr_debug("Lost link <%s> on network plane %c\n",
|
|
|
|
l_ptr->name, l_ptr->net_plane);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
active = &n_ptr->active_links[0];
|
|
|
|
if (active[0] == l_ptr)
|
|
|
|
active[0] = active[1];
|
|
|
|
if (active[1] == l_ptr)
|
|
|
|
active[1] = active[0];
|
|
|
|
if (active[0] == l_ptr)
|
|
|
|
node_select_active_links(n_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
if (tipc_node_is_up(n_ptr))
|
2014-01-08 06:02:41 +08:00
|
|
|
tipc_link_failover_send_queue(l_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
else
|
2006-01-03 02:04:38 +08:00
|
|
|
node_lost_contact(n_ptr);
|
2014-06-26 09:41:33 +08:00
|
|
|
|
|
|
|
/* Leave room for changeover header when returning 'mtu' to users: */
|
|
|
|
if (active[0]) {
|
|
|
|
n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE;
|
|
|
|
n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Loopback link went down? No fragmentation needed from now on. */
|
2015-01-09 15:27:10 +08:00
|
|
|
if (n_ptr->addr == tn->own_addr) {
|
2014-06-26 09:41:33 +08:00
|
|
|
n_ptr->act_mtus[0] = MAX_MSG_SIZE;
|
|
|
|
n_ptr->act_mtus[1] = MAX_MSG_SIZE;
|
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2011-02-28 23:36:21 +08:00
|
|
|
int tipc_node_active_links(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2010-08-17 19:00:12 +08:00
|
|
|
return n_ptr->active_links[0] != NULL;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
int tipc_node_is_up(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-02-28 23:36:21 +08:00
|
|
|
return tipc_node_active_links(n_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
|
|
|
|
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 10:55:46 +08:00
|
|
|
n_ptr->links[l_ptr->bearer_id] = l_ptr;
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_lock_bh(&tn->node_list_lock);
|
|
|
|
tn->num_links++;
|
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2011-03-01 00:32:27 +08:00
|
|
|
n_ptr->link_cnt++;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
|
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:16 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
2014-02-15 05:40:43 +08:00
|
|
|
if (l_ptr != n_ptr->links[i])
|
|
|
|
continue;
|
|
|
|
n_ptr->links[i] = NULL;
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_lock_bh(&tn->node_list_lock);
|
|
|
|
tn->num_links--;
|
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2014-02-15 05:40:43 +08:00
|
|
|
n_ptr->link_cnt--;
|
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:16 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_established_contact(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2014-05-08 08:54:39 +08:00
|
|
|
n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
|
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 13:51:31 +08:00
|
|
|
n_ptr->bclink.oos_state = 0;
|
2015-01-09 15:27:07 +08:00
|
|
|
n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
|
|
|
|
tipc_bclink_add_node(n_ptr->net, n_ptr->addr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_lost_contact(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
|
|
|
char addr_string[16];
|
2015-02-05 21:36:42 +08:00
|
|
|
struct tipc_sock_conn *conn, *safe;
|
|
|
|
struct list_head *conns = &n_ptr->conn_sks;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
|
|
|
|
uint i;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-01-23 00:10:31 +08:00
|
|
|
pr_debug("Lost contact with %s\n",
|
|
|
|
tipc_addr_string_fill(addr_string, n_ptr->addr));
|
2011-04-07 23:58:08 +08:00
|
|
|
|
|
|
|
/* Flush broadcast link info associated with lost node */
|
2012-11-16 13:51:30 +08:00
|
|
|
if (n_ptr->bclink.recv_permitted) {
|
2014-11-26 11:41:53 +08:00
|
|
|
__skb_queue_purge(&n_ptr->bclink.deferred_queue);
|
2011-04-07 23:58:08 +08:00
|
|
|
|
2014-05-14 17:39:12 +08:00
|
|
|
if (n_ptr->bclink.reasm_buf) {
|
|
|
|
kfree_skb(n_ptr->bclink.reasm_buf);
|
|
|
|
n_ptr->bclink.reasm_buf = NULL;
|
2011-04-07 23:58:08 +08:00
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:07 +08:00
|
|
|
tipc_bclink_remove_node(n_ptr->net, n_ptr->addr);
|
2011-10-25 03:26:24 +08:00
|
|
|
tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2012-11-16 13:51:30 +08:00
|
|
|
n_ptr->bclink.recv_permitted = false;
|
2011-04-07 23:58:08 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Abort link changeover */
|
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link *l_ptr = n_ptr->links[i];
|
2007-02-09 22:25:21 +08:00
|
|
|
if (!l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
continue;
|
|
|
|
l_ptr->reset_checkpoint = l_ptr->next_in_no;
|
|
|
|
l_ptr->exp_msg_count = 0;
|
2006-01-18 07:38:21 +08:00
|
|
|
tipc_link_reset_fragments(l_ptr);
|
2015-02-03 21:59:18 +08:00
|
|
|
|
|
|
|
/* Link marked for deletion after failover? => do it now */
|
|
|
|
if (l_ptr->flags & LINK_STOPPED)
|
|
|
|
tipc_link_delete(l_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2014-05-08 08:54:40 +08:00
|
|
|
n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
|
|
|
|
|
2015-02-05 21:36:42 +08:00
|
|
|
/* Prevent re-contact with node until cleanup is done */
|
|
|
|
n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN;
|
|
|
|
|
|
|
|
/* Notify publications from this node */
|
|
|
|
n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
|
|
|
|
|
|
|
|
/* Notify sockets connected to node */
|
|
|
|
list_for_each_entry_safe(conn, safe, conns, list) {
|
|
|
|
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
|
|
|
|
SHORT_H_SIZE, 0, tn->own_addr,
|
|
|
|
conn->peer_node, conn->port,
|
|
|
|
conn->peer_port, TIPC_ERR_NO_NODE);
|
|
|
|
if (likely(skb)) {
|
|
|
|
skb_queue_tail(n_ptr->inputq, skb);
|
|
|
|
n_ptr->action_flags |= TIPC_MSG_EVT;
|
|
|
|
}
|
|
|
|
list_del(&conn->list);
|
|
|
|
kfree(conn);
|
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
|
|
|
|
int req_tlv_space)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 domain;
|
|
|
|
struct sk_buff *buf;
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node *n_ptr;
|
2007-02-09 22:25:21 +08:00
|
|
|
struct tipc_node_info node_info;
|
2006-06-30 03:33:20 +08:00
|
|
|
u32 payload_size;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
|
2006-01-18 07:38:21 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-11-08 16:19:09 +08:00
|
|
|
domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!tipc_addr_domain_valid(domain))
|
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
|
|
|
|
" (network address)");
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_lock_bh(&tn->node_list_lock);
|
|
|
|
if (!tn->num_nodes) {
|
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2007-02-09 22:25:21 +08:00
|
|
|
return tipc_cfg_reply_none();
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-01-01 02:59:17 +08:00
|
|
|
/* For now, get space for all other nodes */
|
2015-01-09 15:27:05 +08:00
|
|
|
payload_size = TLV_SPACE(sizeof(node_info)) * tn->num_nodes;
|
2008-07-15 13:44:58 +08:00
|
|
|
if (payload_size > 32768u) {
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2006-06-30 03:33:20 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
|
|
|
|
" (too many nodes)");
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2014-03-27 12:54:38 +08:00
|
|
|
|
2006-06-30 03:33:20 +08:00
|
|
|
buf = tipc_cfg_reply_alloc(payload_size);
|
2014-03-27 12:54:38 +08:00
|
|
|
if (!buf)
|
2006-01-03 02:04:38 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Add TLVs for all nodes in scope */
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_lock();
|
2015-01-09 15:27:05 +08:00
|
|
|
list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
|
2011-02-26 07:42:52 +08:00
|
|
|
if (!tipc_in_scope(domain, n_ptr->addr))
|
2006-01-03 02:04:38 +08:00
|
|
|
continue;
|
2007-02-09 22:25:21 +08:00
|
|
|
node_info.addr = htonl(n_ptr->addr);
|
|
|
|
node_info.up = htonl(tipc_node_is_up(n_ptr));
|
|
|
|
tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO,
|
2006-01-18 07:38:21 +08:00
|
|
|
&node_info, sizeof(node_info));
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_unlock();
|
2006-01-03 02:04:38 +08:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area,
|
|
|
|
int req_tlv_space)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 domain;
|
|
|
|
struct sk_buff *buf;
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node *n_ptr;
|
2007-02-09 22:25:21 +08:00
|
|
|
struct tipc_link_info link_info;
|
2006-06-30 03:33:20 +08:00
|
|
|
u32 payload_size;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
|
2006-01-18 07:38:21 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-11-08 16:19:09 +08:00
|
|
|
domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!tipc_addr_domain_valid(domain))
|
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
|
|
|
|
" (network address)");
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-01-09 15:27:10 +08:00
|
|
|
if (!tn->own_addr)
|
2007-02-09 22:25:21 +08:00
|
|
|
return tipc_cfg_reply_none();
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_lock_bh(&tn->node_list_lock);
|
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-28 02:17:53 +08:00
|
|
|
/* Get space for all unicast links + broadcast link */
|
2015-01-09 15:27:05 +08:00
|
|
|
payload_size = TLV_SPACE((sizeof(link_info)) * (tn->num_links + 1));
|
2008-07-15 13:44:58 +08:00
|
|
|
if (payload_size > 32768u) {
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2006-06-30 03:33:20 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
|
|
|
|
" (too many links)");
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2015-01-09 15:27:05 +08:00
|
|
|
spin_unlock_bh(&tn->node_list_lock);
|
2014-03-27 12:54:39 +08:00
|
|
|
|
2006-06-30 03:33:20 +08:00
|
|
|
buf = tipc_cfg_reply_alloc(payload_size);
|
2014-03-27 12:54:39 +08:00
|
|
|
if (!buf)
|
2006-01-03 02:04:38 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Add TLV for broadcast link */
|
2015-01-09 15:27:10 +08:00
|
|
|
link_info.dest = htonl(tipc_cluster_mask(tn->own_addr));
|
2007-02-09 22:25:21 +08:00
|
|
|
link_info.up = htonl(1);
|
2009-03-19 10:11:29 +08:00
|
|
|
strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
|
2006-01-18 07:38:21 +08:00
|
|
|
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Add TLVs for any other links in scope */
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_lock();
|
2015-01-09 15:27:05 +08:00
|
|
|
list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
|
2007-02-09 22:25:21 +08:00
|
|
|
u32 i;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-02-26 07:42:52 +08:00
|
|
|
if (!tipc_in_scope(domain, n_ptr->addr))
|
2006-01-03 02:04:38 +08:00
|
|
|
continue;
|
2008-07-15 13:44:58 +08:00
|
|
|
tipc_node_lock(n_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
|
|
|
if (!n_ptr->links[i])
|
|
|
|
continue;
|
|
|
|
link_info.dest = htonl(n_ptr->addr);
|
|
|
|
link_info.up = htonl(tipc_link_is_up(n_ptr->links[i]));
|
|
|
|
strcpy(link_info.str, n_ptr->links[i]->name);
|
|
|
|
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO,
|
2006-01-18 07:38:21 +08:00
|
|
|
&link_info, sizeof(link_info));
|
2007-02-09 22:25:21 +08:00
|
|
|
}
|
2008-07-15 13:44:58 +08:00
|
|
|
tipc_node_unlock(n_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
2014-03-27 12:54:37 +08:00
|
|
|
rcu_read_unlock();
|
2006-01-03 02:04:38 +08:00
|
|
|
return buf;
|
|
|
|
}
|
2014-04-24 22:26:47 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* tipc_node_get_linkname - get the name of a link
|
|
|
|
*
|
|
|
|
* @bearer_id: id of the bearer
|
|
|
|
* @node: peer node address
|
|
|
|
* @linkname: link name output buffer
|
|
|
|
*
|
|
|
|
* Returns 0 on success
|
|
|
|
*/
|
2015-01-09 15:27:05 +08:00
|
|
|
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
|
|
|
|
char *linkname, size_t len)
|
2014-04-24 22:26:47 +08:00
|
|
|
{
|
|
|
|
struct tipc_link *link;
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_node *node = tipc_node_find(net, addr);
|
2014-04-24 22:26:47 +08:00
|
|
|
|
2014-04-28 14:20:09 +08:00
|
|
|
if ((bearer_id >= MAX_BEARERS) || !node)
|
2014-04-24 22:26:47 +08:00
|
|
|
return -EINVAL;
|
|
|
|
tipc_node_lock(node);
|
|
|
|
link = node->links[bearer_id];
|
|
|
|
if (link) {
|
|
|
|
strncpy(linkname, link->name, len);
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2014-05-05 08:56:12 +08:00
|
|
|
|
|
|
|
void tipc_node_unlock(struct tipc_node *node)
|
|
|
|
{
|
2015-01-09 15:27:05 +08:00
|
|
|
struct net *net = node->net;
|
2014-05-05 08:56:14 +08:00
|
|
|
u32 addr = 0;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
u32 flags = node->action_flags;
|
2014-10-20 14:44:25 +08:00
|
|
|
u32 link_id = 0;
|
2015-02-05 21:36:42 +08:00
|
|
|
struct list_head *publ_list;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
struct sk_buff_head *inputq = node->inputq;
|
2015-02-05 21:36:42 +08:00
|
|
|
struct sk_buff_head *namedq;
|
2014-05-05 08:56:12 +08:00
|
|
|
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
if (likely(!flags || (flags == TIPC_MSG_EVT))) {
|
|
|
|
node->action_flags = 0;
|
2014-05-05 08:56:12 +08:00
|
|
|
spin_unlock_bh(&node->lock);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
if (flags == TIPC_MSG_EVT)
|
|
|
|
tipc_sk_rcv(net, inputq);
|
2014-05-05 08:56:12 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-10-20 14:44:25 +08:00
|
|
|
addr = node->addr;
|
|
|
|
link_id = node->link_id;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
namedq = node->namedq;
|
2015-02-05 21:36:42 +08:00
|
|
|
publ_list = &node->publ_list;
|
2014-10-20 14:44:25 +08:00
|
|
|
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
node->action_flags &= ~(TIPC_MSG_EVT | TIPC_NOTIFY_NODE_DOWN |
|
2014-10-20 14:44:25 +08:00
|
|
|
TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP |
|
|
|
|
TIPC_NOTIFY_LINK_DOWN |
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
TIPC_WAKEUP_BCAST_USERS |
|
|
|
|
TIPC_NAMED_MSG_EVT);
|
2014-10-20 14:44:25 +08:00
|
|
|
|
2014-05-05 08:56:12 +08:00
|
|
|
spin_unlock_bh(&node->lock);
|
|
|
|
|
2015-02-05 21:36:42 +08:00
|
|
|
if (flags & TIPC_NOTIFY_NODE_DOWN)
|
|
|
|
tipc_publ_notify(net, publ_list, addr);
|
2014-08-23 06:09:07 +08:00
|
|
|
|
tipc: fix bug in multicast congestion handling
One aim of commit 50100a5e39461b2a61d6040e73c384766c29975d ("tipc:
use pseudo message to wake up sockets after link congestion") was
to handle link congestion abatement in a uniform way for both unicast
and multicast transmit. However, the latter doesn't work correctly,
and has been broken since the referenced commit was applied.
If a user now sends a burst of multicast messages that is big
enough to cause broadcast link congestion, it will be put to sleep,
and not be waked up when the congestion abates as it should be.
This has two reasons. First, the flag that is used, TIPC_WAKEUP_USERS,
is set correctly, but in the wrong field. Instead of setting it in the
'action_flags' field of the arrival node struct, it is by mistake set
in the dummy node struct that is owned by the broadcast link, where it
will never tested for. Second, we cannot use the same flag for waking
up unicast and multicast users, since the function tipc_node_unlock()
needs to pick the wakeup pseudo messages to deliver from different
queues. It must hence be able to distinguish between the two cases.
This commit solves this problem by adding a new flag
TIPC_WAKEUP_BCAST_USERS, and a new function tipc_bclink_wakeup_user().
The latter is to be called by tipc_node_unlock() when the named flag,
now set in the correct field, is encountered.
v2: using explicit 'unsigned int' declaration instead of 'uint', as
per comment from David Miller.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-08 02:12:34 +08:00
|
|
|
if (flags & TIPC_WAKEUP_BCAST_USERS)
|
2015-01-09 15:27:05 +08:00
|
|
|
tipc_bclink_wakeup_users(net);
|
tipc: fix bug in multicast congestion handling
One aim of commit 50100a5e39461b2a61d6040e73c384766c29975d ("tipc:
use pseudo message to wake up sockets after link congestion") was
to handle link congestion abatement in a uniform way for both unicast
and multicast transmit. However, the latter doesn't work correctly,
and has been broken since the referenced commit was applied.
If a user now sends a burst of multicast messages that is big
enough to cause broadcast link congestion, it will be put to sleep,
and not be waked up when the congestion abates as it should be.
This has two reasons. First, the flag that is used, TIPC_WAKEUP_USERS,
is set correctly, but in the wrong field. Instead of setting it in the
'action_flags' field of the arrival node struct, it is by mistake set
in the dummy node struct that is owned by the broadcast link, where it
will never tested for. Second, we cannot use the same flag for waking
up unicast and multicast users, since the function tipc_node_unlock()
needs to pick the wakeup pseudo messages to deliver from different
queues. It must hence be able to distinguish between the two cases.
This commit solves this problem by adding a new flag
TIPC_WAKEUP_BCAST_USERS, and a new function tipc_bclink_wakeup_user().
The latter is to be called by tipc_node_unlock() when the named flag,
now set in the correct field, is encountered.
v2: using explicit 'unsigned int' declaration instead of 'uint', as
per comment from David Miller.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-08 02:12:34 +08:00
|
|
|
|
2014-10-20 14:44:25 +08:00
|
|
|
if (flags & TIPC_NOTIFY_NODE_UP)
|
2015-01-09 15:27:05 +08:00
|
|
|
tipc_named_node_up(net, addr);
|
2014-10-20 14:44:25 +08:00
|
|
|
|
|
|
|
if (flags & TIPC_NOTIFY_LINK_UP)
|
2015-01-09 15:27:05 +08:00
|
|
|
tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
|
2014-10-20 14:44:25 +08:00
|
|
|
TIPC_NODE_SCOPE, link_id, addr);
|
|
|
|
|
|
|
|
if (flags & TIPC_NOTIFY_LINK_DOWN)
|
2015-01-09 15:27:05 +08:00
|
|
|
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
|
2014-10-20 14:44:25 +08:00
|
|
|
link_id, addr);
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
|
|
|
|
if (flags & TIPC_MSG_EVT)
|
|
|
|
tipc_sk_rcv(net, inputq);
|
|
|
|
|
|
|
|
if (flags & TIPC_NAMED_MSG_EVT)
|
|
|
|
tipc_named_rcv(net, namedq);
|
2014-05-05 08:56:12 +08:00
|
|
|
}
|
2014-11-20 17:29:17 +08:00
|
|
|
|
|
|
|
/* Caller should hold node lock for the passed node */
|
2014-11-24 18:10:29 +08:00
|
|
|
static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
|
2014-11-20 17:29:17 +08:00
|
|
|
{
|
|
|
|
void *hdr;
|
|
|
|
struct nlattr *attrs;
|
|
|
|
|
|
|
|
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
|
|
|
|
NLM_F_MULTI, TIPC_NL_NODE_GET);
|
|
|
|
if (!hdr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE);
|
|
|
|
if (!attrs)
|
|
|
|
goto msg_full;
|
|
|
|
|
|
|
|
if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
|
|
|
|
goto attr_msg_full;
|
|
|
|
if (tipc_node_is_up(node))
|
|
|
|
if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
|
|
|
|
goto attr_msg_full;
|
|
|
|
|
|
|
|
nla_nest_end(msg->skb, attrs);
|
|
|
|
genlmsg_end(msg->skb, hdr);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
attr_msg_full:
|
|
|
|
nla_nest_cancel(msg->skb, attrs);
|
|
|
|
msg_full:
|
|
|
|
genlmsg_cancel(msg->skb, hdr);
|
|
|
|
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
int err;
|
2015-01-09 15:27:05 +08:00
|
|
|
struct net *net = sock_net(skb->sk);
|
|
|
|
struct tipc_net *tn = net_generic(net, tipc_net_id);
|
2014-11-20 17:29:17 +08:00
|
|
|
int done = cb->args[0];
|
|
|
|
int last_addr = cb->args[1];
|
|
|
|
struct tipc_node *node;
|
|
|
|
struct tipc_nl_msg msg;
|
|
|
|
|
|
|
|
if (done)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
msg.skb = skb;
|
|
|
|
msg.portid = NETLINK_CB(cb->skb).portid;
|
|
|
|
msg.seq = cb->nlh->nlmsg_seq;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
if (last_addr && !tipc_node_find(net, last_addr)) {
|
2014-11-20 17:29:17 +08:00
|
|
|
rcu_read_unlock();
|
|
|
|
/* We never set seq or call nl_dump_check_consistent() this
|
|
|
|
* means that setting prev_seq here will cause the consistence
|
|
|
|
* check to fail in the netlink callback handler. Resulting in
|
|
|
|
* the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if
|
|
|
|
* the node state changed while we released the lock.
|
|
|
|
*/
|
|
|
|
cb->prev_seq = 1;
|
|
|
|
return -EPIPE;
|
|
|
|
}
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
list_for_each_entry_rcu(node, &tn->node_list, list) {
|
2014-11-20 17:29:17 +08:00
|
|
|
if (last_addr) {
|
|
|
|
if (node->addr == last_addr)
|
|
|
|
last_addr = 0;
|
|
|
|
else
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
tipc_node_lock(node);
|
|
|
|
err = __tipc_nl_add_node(&msg, node);
|
|
|
|
if (err) {
|
|
|
|
last_addr = node->addr;
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
tipc_node_unlock(node);
|
|
|
|
}
|
|
|
|
done = 1;
|
|
|
|
out:
|
|
|
|
cb->args[0] = done;
|
|
|
|
cb->args[1] = last_addr;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return skb->len;
|
|
|
|
}
|