2006-01-03 02:04:38 +08:00
|
|
|
/*
|
|
|
|
* net/tipc/node.h: Include file for TIPC node management routines
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2015-02-05 21:36:44 +08:00
|
|
|
* Copyright (c) 2000-2006, 2014-2015, Ericsson AB
|
2014-03-27 12:54:36 +08:00
|
|
|
* Copyright (c) 2005, 2010-2014, Wind River Systems
|
2006-01-03 02:04:38 +08:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 02:04:38 +08:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 02:04:38 +08:00
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 02:04:38 +08:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _TIPC_NODE_H
|
|
|
|
#define _TIPC_NODE_H
|
|
|
|
|
2011-01-01 02:59:19 +08:00
|
|
|
#include "addr.h"
|
|
|
|
#include "net.h"
|
2006-01-03 02:04:38 +08:00
|
|
|
#include "bearer.h"
|
2014-06-26 09:41:33 +08:00
|
|
|
#include "msg.h"
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
/* Out-of-range value for node signature */
|
|
|
|
#define INVALID_NODE_SIG 0x10000
|
|
|
|
|
2014-05-08 08:54:39 +08:00
|
|
|
/* Flags used to take different actions according to flag type
|
|
|
|
* TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
|
|
|
|
* TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
|
|
|
|
* TIPC_NOTIFY_NODE_DOWN: notify node is down
|
|
|
|
* TIPC_NOTIFY_NODE_UP: notify node is up
|
2014-10-20 14:44:25 +08:00
|
|
|
* TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
|
2014-05-05 08:56:11 +08:00
|
|
|
*/
|
|
|
|
enum {
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
TIPC_MSG_EVT = 1,
|
2014-05-08 08:54:39 +08:00
|
|
|
TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
|
|
|
|
TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
|
|
|
|
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
|
2014-08-23 06:09:07 +08:00
|
|
|
TIPC_NOTIFY_NODE_UP = (1 << 4),
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
TIPC_WAKEUP_BCAST_USERS = (1 << 5),
|
|
|
|
TIPC_NOTIFY_LINK_UP = (1 << 6),
|
|
|
|
TIPC_NOTIFY_LINK_DOWN = (1 << 7),
|
2015-02-05 21:36:44 +08:00
|
|
|
TIPC_NAMED_MSG_EVT = (1 << 8),
|
tipc: fix potential deadlock when all links are reset
[ 60.988363] ======================================================
[ 60.988754] [ INFO: possible circular locking dependency detected ]
[ 60.989152] 3.19.0+ #194 Not tainted
[ 60.989377] -------------------------------------------------------
[ 60.989781] swapper/3/0 is trying to acquire lock:
[ 60.990079] (&(&n_ptr->lock)->rlock){+.-...}, at: [<ffffffffa0006dca>] tipc_link_retransmit+0x1aa/0x240 [tipc]
[ 60.990743]
[ 60.990743] but task is already holding lock:
[ 60.991106] (&(&bclink->lock)->rlock){+.-...}, at: [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.991738]
[ 60.991738] which lock already depends on the new lock.
[ 60.991738]
[ 60.992174]
[ 60.992174] the existing dependency chain (in reverse order) is:
[ 60.992174]
-> #1 (&(&bclink->lock)->rlock){+.-...}:
[ 60.992174] [<ffffffff810a9c0c>] lock_acquire+0x9c/0x140
[ 60.992174] [<ffffffff8179c41f>] _raw_spin_lock_bh+0x3f/0x50
[ 60.992174] [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.992174] [<ffffffffa0000f57>] tipc_bclink_add_node+0x97/0xf0 [tipc]
[ 60.992174] [<ffffffffa0011815>] tipc_node_link_up+0xf5/0x110 [tipc]
[ 60.992174] [<ffffffffa0007783>] link_state_event+0x2b3/0x4f0 [tipc]
[ 60.992174] [<ffffffffa00193c0>] tipc_link_proto_rcv+0x24c/0x418 [tipc]
[ 60.992174] [<ffffffffa0008857>] tipc_rcv+0x827/0xac0 [tipc]
[ 60.992174] [<ffffffffa0002ca3>] tipc_l2_rcv_msg+0x73/0xd0 [tipc]
[ 60.992174] [<ffffffff81646e66>] __netif_receive_skb_core+0x746/0x980
[ 60.992174] [<ffffffff816470c1>] __netif_receive_skb+0x21/0x70
[ 60.992174] [<ffffffff81647295>] netif_receive_skb_internal+0x35/0x130
[ 60.992174] [<ffffffff81648218>] napi_gro_receive+0x158/0x1d0
[ 60.992174] [<ffffffff81559e05>] e1000_clean_rx_irq+0x155/0x490
[ 60.992174] [<ffffffff8155c1b7>] e1000_clean+0x267/0x990
[ 60.992174] [<ffffffff81647b60>] net_rx_action+0x150/0x360
[ 60.992174] [<ffffffff8105ec43>] __do_softirq+0x123/0x360
[ 60.992174] [<ffffffff8105f12e>] irq_exit+0x8e/0xb0
[ 60.992174] [<ffffffff8179f9f5>] do_IRQ+0x65/0x110
[ 60.992174] [<ffffffff8179da6f>] ret_from_intr+0x0/0x13
[ 60.992174] [<ffffffff8100de9f>] arch_cpu_idle+0xf/0x20
[ 60.992174] [<ffffffff8109dfa6>] cpu_startup_entry+0x2f6/0x3f0
[ 60.992174] [<ffffffff81033cda>] start_secondary+0x13a/0x150
[ 60.992174]
-> #0 (&(&n_ptr->lock)->rlock){+.-...}:
[ 60.992174] [<ffffffff810a8f7d>] __lock_acquire+0x163d/0x1ca0
[ 60.992174] [<ffffffff810a9c0c>] lock_acquire+0x9c/0x140
[ 60.992174] [<ffffffff8179c41f>] _raw_spin_lock_bh+0x3f/0x50
[ 60.992174] [<ffffffffa0006dca>] tipc_link_retransmit+0x1aa/0x240 [tipc]
[ 60.992174] [<ffffffffa0001e11>] tipc_bclink_rcv+0x611/0x640 [tipc]
[ 60.992174] [<ffffffffa0008646>] tipc_rcv+0x616/0xac0 [tipc]
[ 60.992174] [<ffffffffa0002ca3>] tipc_l2_rcv_msg+0x73/0xd0 [tipc]
[ 60.992174] [<ffffffff81646e66>] __netif_receive_skb_core+0x746/0x980
[ 60.992174] [<ffffffff816470c1>] __netif_receive_skb+0x21/0x70
[ 60.992174] [<ffffffff81647295>] netif_receive_skb_internal+0x35/0x130
[ 60.992174] [<ffffffff81648218>] napi_gro_receive+0x158/0x1d0
[ 60.992174] [<ffffffff81559e05>] e1000_clean_rx_irq+0x155/0x490
[ 60.992174] [<ffffffff8155c1b7>] e1000_clean+0x267/0x990
[ 60.992174] [<ffffffff81647b60>] net_rx_action+0x150/0x360
[ 60.992174] [<ffffffff8105ec43>] __do_softirq+0x123/0x360
[ 60.992174] [<ffffffff8105f12e>] irq_exit+0x8e/0xb0
[ 60.992174] [<ffffffff8179f9f5>] do_IRQ+0x65/0x110
[ 60.992174] [<ffffffff8179da6f>] ret_from_intr+0x0/0x13
[ 60.992174] [<ffffffff8100de9f>] arch_cpu_idle+0xf/0x20
[ 60.992174] [<ffffffff8109dfa6>] cpu_startup_entry+0x2f6/0x3f0
[ 60.992174] [<ffffffff81033cda>] start_secondary+0x13a/0x150
[ 60.992174]
[ 60.992174] other info that might help us debug this:
[ 60.992174]
[ 60.992174] Possible unsafe locking scenario:
[ 60.992174]
[ 60.992174] CPU0 CPU1
[ 60.992174] ---- ----
[ 60.992174] lock(&(&bclink->lock)->rlock);
[ 60.992174] lock(&(&n_ptr->lock)->rlock);
[ 60.992174] lock(&(&bclink->lock)->rlock);
[ 60.992174] lock(&(&n_ptr->lock)->rlock);
[ 60.992174]
[ 60.992174] *** DEADLOCK ***
[ 60.992174]
[ 60.992174] 3 locks held by swapper/3/0:
[ 60.992174] #0: (rcu_read_lock){......}, at: [<ffffffff81646791>] __netif_receive_skb_core+0x71/0x980
[ 60.992174] #1: (rcu_read_lock){......}, at: [<ffffffffa0002c35>] tipc_l2_rcv_msg+0x5/0xd0 [tipc]
[ 60.992174] #2: (&(&bclink->lock)->rlock){+.-...}, at: [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.992174]
The correct the sequence of grabbing n_ptr->lock and bclink->lock
should be that the former is first held and the latter is then taken,
which exactly happened on CPU1. But especially when the retransmission
of broadcast link is failed, bclink->lock is first held in
tipc_bclink_rcv(), and n_ptr->lock is taken in link_retransmit_failure()
called by tipc_link_retransmit() subsequently, which is demonstrated on
CPU0. As a result, deadlock occurs.
If the order of holding the two locks happening on CPU0 is reversed, the
deadlock risk will be relieved. Therefore, the node lock taken in
link_retransmit_failure() originally is moved to tipc_bclink_rcv()
so that it's obtained before bclink lock. But the precondition of
the adjustment of node lock is that responding to bclink reset event
must be moved from tipc_bclink_unlock() to tipc_node_unlock().
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 18:10:23 +08:00
|
|
|
TIPC_BCAST_MSG_EVT = (1 << 9),
|
|
|
|
TIPC_BCAST_RESET = (1 << 10)
|
2014-05-05 08:56:11 +08:00
|
|
|
};
|
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 23:00:51 +08:00
|
|
|
|
2014-05-05 08:56:10 +08:00
|
|
|
/**
|
|
|
|
* struct tipc_node_bclink - TIPC node bclink structure
|
|
|
|
* @acked: sequence # of last outbound b'cast message acknowledged by node
|
|
|
|
* @last_in: sequence # of last in-sequence b'cast message received from node
|
|
|
|
* @last_sent: sequence # of last b'cast message sent by node
|
|
|
|
* @oos_state: state tracker for handling OOS b'cast messages
|
2014-11-26 11:41:53 +08:00
|
|
|
* @deferred_queue: deferred queue saved OOS b'cast message received from node
|
2014-05-14 17:39:12 +08:00
|
|
|
* @reasm_buf: broadcast reassembly queue head from node
|
2015-02-05 21:36:44 +08:00
|
|
|
* @inputq_map: bitmap indicating which inqueues should be kicked
|
2014-05-05 08:56:10 +08:00
|
|
|
* @recv_permitted: true if node is allowed to receive b'cast messages
|
|
|
|
*/
|
|
|
|
struct tipc_node_bclink {
|
|
|
|
u32 acked;
|
|
|
|
u32 last_in;
|
|
|
|
u32 last_sent;
|
|
|
|
u32 oos_state;
|
|
|
|
u32 deferred_size;
|
2015-03-14 04:08:10 +08:00
|
|
|
struct sk_buff_head deferdq;
|
2014-05-14 17:39:12 +08:00
|
|
|
struct sk_buff *reasm_buf;
|
2015-07-17 04:54:21 +08:00
|
|
|
struct sk_buff_head namedq;
|
2014-05-05 08:56:10 +08:00
|
|
|
bool recv_permitted;
|
|
|
|
};
|
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
struct tipc_link_entry {
|
|
|
|
struct tipc_link *link;
|
|
|
|
u32 mtu;
|
2015-07-17 04:54:21 +08:00
|
|
|
struct sk_buff_head inputq;
|
2015-07-17 04:54:20 +08:00
|
|
|
struct tipc_media_addr maddr;
|
2015-07-17 04:54:19 +08:00
|
|
|
};
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
/**
|
2008-09-03 14:38:32 +08:00
|
|
|
* struct tipc_node - TIPC node structure
|
2006-01-03 02:04:38 +08:00
|
|
|
* @addr: network address of node
|
2015-03-26 18:10:24 +08:00
|
|
|
* @ref: reference counter to node object
|
2006-01-03 02:04:38 +08:00
|
|
|
* @lock: spinlock governing access to structure
|
2015-01-09 15:27:05 +08:00
|
|
|
* @net: the applicable net namespace
|
2011-02-26 07:42:52 +08:00
|
|
|
* @hash: links to adjacent nodes in unsorted hash chain
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
* @inputq: pointer to input queue containing messages for msg event
|
|
|
|
* @namedq: pointer to name table input queue with name table messages
|
2015-07-17 04:54:19 +08:00
|
|
|
* @active_links: pointer into links[] array, identifying which links are active
|
|
|
|
* @links: array containing references to all links to node
|
2014-05-08 08:54:39 +08:00
|
|
|
* @action_flags: bit mask of different types of node actions
|
2014-05-05 08:56:10 +08:00
|
|
|
* @bclink: broadcast-related info
|
|
|
|
* @list: links to adjacent nodes in sorted list of cluster's nodes
|
|
|
|
* @working_links: number of working links to node (both active and standby)
|
2006-01-03 02:04:38 +08:00
|
|
|
* @link_cnt: number of links to node
|
2015-03-14 04:08:05 +08:00
|
|
|
* @capabilities: bitmap, indicating peer node's functional capabilities
|
2011-10-29 04:26:41 +08:00
|
|
|
* @signature: node instance identifier
|
2014-10-20 14:44:25 +08:00
|
|
|
* @link_id: local and remote bearer ids of changing link, if any
|
2014-11-26 11:41:45 +08:00
|
|
|
* @publ_list: list of publications
|
2014-03-27 12:54:37 +08:00
|
|
|
* @rcu: rcu struct for tipc_node
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node {
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 addr;
|
2015-03-26 18:10:24 +08:00
|
|
|
struct kref kref;
|
2006-01-03 02:04:38 +08:00
|
|
|
spinlock_t lock;
|
2015-01-09 15:27:05 +08:00
|
|
|
struct net *net;
|
2011-02-26 07:42:52 +08:00
|
|
|
struct hlist_node hash;
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
struct sk_buff_head *inputq;
|
|
|
|
struct sk_buff_head *namedq;
|
2015-07-17 04:54:19 +08:00
|
|
|
struct tipc_link_entry *active_links[2];
|
|
|
|
struct tipc_link_entry links[MAX_BEARERS];
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 21:36:41 +08:00
|
|
|
int action_flags;
|
2014-05-05 08:56:10 +08:00
|
|
|
struct tipc_node_bclink bclink;
|
|
|
|
struct list_head list;
|
2006-01-03 02:04:38 +08:00
|
|
|
int link_cnt;
|
2015-03-14 04:08:05 +08:00
|
|
|
u16 working_links;
|
|
|
|
u16 capabilities;
|
2011-10-29 04:26:41 +08:00
|
|
|
u32 signature;
|
2014-10-20 14:44:25 +08:00
|
|
|
u32 link_id;
|
2014-11-26 11:41:45 +08:00
|
|
|
struct list_head publ_list;
|
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 06:09:08 +08:00
|
|
|
struct list_head conn_sks;
|
2014-03-27 12:54:37 +08:00
|
|
|
struct rcu_head rcu;
|
2006-01-03 02:04:38 +08:00
|
|
|
};
|
|
|
|
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_node *tipc_node_find(struct net *net, u32 addr);
|
2015-03-26 18:10:24 +08:00
|
|
|
void tipc_node_put(struct tipc_node *node);
|
2015-01-09 15:27:05 +08:00
|
|
|
struct tipc_node *tipc_node_create(struct net *net, u32 addr);
|
|
|
|
void tipc_node_stop(struct net *net);
|
2015-07-17 04:54:20 +08:00
|
|
|
void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *bearer,
|
|
|
|
bool *link_up, bool *addr_match,
|
|
|
|
struct tipc_media_addr *maddr);
|
|
|
|
bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *bearer,
|
|
|
|
struct tipc_media_addr *maddr);
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
|
|
|
|
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
|
2015-07-17 04:54:19 +08:00
|
|
|
void tipc_node_link_down(struct tipc_node *n_ptr, int bearer_id);
|
|
|
|
void tipc_node_link_up(struct tipc_node *n_ptr, int bearer_id);
|
|
|
|
bool tipc_node_is_up(struct tipc_node *n);
|
2015-01-09 15:27:05 +08:00
|
|
|
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
|
|
|
|
char *linkname, size_t len);
|
2014-05-05 08:56:12 +08:00
|
|
|
void tipc_node_unlock(struct tipc_node *node);
|
2015-01-09 15:27:05 +08:00
|
|
|
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
|
|
|
|
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2014-11-20 17:29:17 +08:00
|
|
|
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
|
|
|
|
|
2014-05-05 08:56:12 +08:00
|
|
|
static inline void tipc_node_lock(struct tipc_node *node)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2014-05-05 08:56:12 +08:00
|
|
|
spin_lock_bh(&node->lock);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2014-05-05 08:56:11 +08:00
|
|
|
static inline bool tipc_node_blocked(struct tipc_node *node)
|
|
|
|
{
|
2014-05-08 08:54:39 +08:00
|
|
|
return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN |
|
|
|
|
TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
|
2014-05-05 08:56:11 +08:00
|
|
|
}
|
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel)
|
2014-06-26 09:41:33 +08:00
|
|
|
{
|
2015-07-17 04:54:19 +08:00
|
|
|
struct tipc_link_entry *le = n->active_links[sel & 1];
|
2014-06-26 09:41:33 +08:00
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
if (likely(le))
|
|
|
|
return le->link;
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-06-26 09:41:33 +08:00
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector)
|
|
|
|
{
|
|
|
|
struct tipc_node *n;
|
|
|
|
struct tipc_link_entry *le;
|
|
|
|
unsigned int mtu = MAX_MSG_SIZE;
|
2014-06-26 09:41:33 +08:00
|
|
|
|
2015-07-17 04:54:19 +08:00
|
|
|
n = tipc_node_find(net, addr);
|
|
|
|
if (unlikely(!n))
|
|
|
|
return mtu;
|
|
|
|
le = n->active_links[selector & 1];
|
|
|
|
if (likely(le))
|
|
|
|
mtu = le->mtu;
|
|
|
|
tipc_node_put(n);
|
2014-06-26 09:41:33 +08:00
|
|
|
return mtu;
|
|
|
|
}
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
#endif
|