2006-01-03 02:04:38 +08:00
|
|
|
/*
|
|
|
|
* net/tipc/node.c: TIPC node management routines
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 13:51:31 +08:00
|
|
|
* Copyright (c) 2000-2006, 2012 Ericsson AB
|
2011-01-08 02:00:11 +08:00
|
|
|
* Copyright (c) 2005-2006, 2010-2011, Wind River Systems
|
2006-01-03 02:04:38 +08:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 02:04:38 +08:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 02:04:38 +08:00
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 02:04:38 +08:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "core.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "node.h"
|
|
|
|
#include "name_distr.h"
|
|
|
|
|
2011-11-04 23:54:43 +08:00
|
|
|
#define NODE_HTABLE_SIZE 512
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_lost_contact(struct tipc_node *n_ptr);
|
|
|
|
static void node_established_contact(struct tipc_node *n_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2008-05-22 05:53:00 +08:00
|
|
|
static DEFINE_SPINLOCK(node_create_lock);
|
|
|
|
|
2011-02-26 07:42:52 +08:00
|
|
|
static struct hlist_head node_htable[NODE_HTABLE_SIZE];
|
|
|
|
LIST_HEAD(tipc_node_list);
|
|
|
|
static u32 tipc_num_nodes;
|
2011-02-26 08:11:25 +08:00
|
|
|
|
|
|
|
static atomic_t tipc_num_links = ATOMIC_INIT(0);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-11-04 23:54:43 +08:00
|
|
|
/*
|
|
|
|
* A trivial power-of-two bitmask technique is used for speed, since this
|
|
|
|
* operation is done for every incoming TIPC packet. The number of hash table
|
|
|
|
* entries has been chosen so that no hash chain exceeds 8 nodes and will
|
|
|
|
* usually be much smaller (typically only a single node).
|
|
|
|
*/
|
2012-04-23 12:49:13 +08:00
|
|
|
static unsigned int tipc_hashfn(u32 addr)
|
2011-11-04 23:54:43 +08:00
|
|
|
{
|
|
|
|
return addr & (NODE_HTABLE_SIZE - 1);
|
|
|
|
}
|
|
|
|
|
2011-10-28 03:03:24 +08:00
|
|
|
/*
|
2011-02-26 07:42:52 +08:00
|
|
|
* tipc_node_find - locate specified node object, if it exists
|
|
|
|
*/
|
|
|
|
struct tipc_node *tipc_node_find(u32 addr)
|
|
|
|
{
|
|
|
|
struct tipc_node *node;
|
|
|
|
|
2012-04-18 06:02:01 +08:00
|
|
|
if (unlikely(!in_own_cluster_exact(addr)))
|
2011-02-26 07:42:52 +08:00
|
|
|
return NULL;
|
|
|
|
|
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
|
|
|
hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) {
|
2011-02-26 07:42:52 +08:00
|
|
|
if (node->addr == addr)
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-05-22 05:53:00 +08:00
|
|
|
/**
|
|
|
|
* tipc_node_create - create neighboring node
|
|
|
|
*
|
|
|
|
* Currently, this routine is called by neighbor discovery code, which holds
|
|
|
|
* net_lock for reading only. We must take node_create_lock to ensure a node
|
|
|
|
* isn't created twice if two different bearers discover the node at the same
|
|
|
|
* time. (It would be preferable to switch to holding net_lock in write mode,
|
|
|
|
* but this is a non-trivial change.)
|
|
|
|
*/
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node *tipc_node_create(u32 addr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-02-26 07:42:52 +08:00
|
|
|
struct tipc_node *n_ptr, *temp_node;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2008-05-22 05:53:00 +08:00
|
|
|
spin_lock_bh(&node_create_lock);
|
|
|
|
|
2011-01-01 02:59:23 +08:00
|
|
|
n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
|
2006-06-26 14:52:17 +08:00
|
|
|
if (!n_ptr) {
|
2008-05-22 05:53:00 +08:00
|
|
|
spin_unlock_bh(&node_create_lock);
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_warn("Node creation failed, no memory\n");
|
2006-06-26 14:52:17 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
n_ptr->addr = addr;
|
2011-01-01 02:59:18 +08:00
|
|
|
spin_lock_init(&n_ptr->lock);
|
2011-02-26 07:42:52 +08:00
|
|
|
INIT_HLIST_NODE(&n_ptr->hash);
|
|
|
|
INIT_LIST_HEAD(&n_ptr->list);
|
2006-06-26 14:52:17 +08:00
|
|
|
INIT_LIST_HEAD(&n_ptr->nsub);
|
2011-01-01 02:59:19 +08:00
|
|
|
|
2011-02-26 07:42:52 +08:00
|
|
|
hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
|
|
|
|
|
|
|
|
list_for_each_entry(temp_node, &tipc_node_list, list) {
|
|
|
|
if (n_ptr->addr < temp_node->addr)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
list_add_tail(&n_ptr->list, &temp_node->list);
|
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 23:00:51 +08:00
|
|
|
n_ptr->block_setup = WAIT_PEER_DOWN;
|
2011-10-29 04:26:41 +08:00
|
|
|
n_ptr->signature = INVALID_NODE_SIG;
|
2011-02-26 07:42:52 +08:00
|
|
|
|
|
|
|
tipc_num_nodes++;
|
2006-06-26 14:52:17 +08:00
|
|
|
|
2008-05-22 05:53:00 +08:00
|
|
|
spin_unlock_bh(&node_create_lock);
|
2006-01-03 02:04:38 +08:00
|
|
|
return n_ptr;
|
|
|
|
}
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
void tipc_node_delete(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-02-26 07:42:52 +08:00
|
|
|
list_del(&n_ptr->list);
|
|
|
|
hlist_del(&n_ptr->hash);
|
2006-01-03 02:04:38 +08:00
|
|
|
kfree(n_ptr);
|
2011-01-01 02:59:19 +08:00
|
|
|
|
2011-02-26 07:42:52 +08:00
|
|
|
tipc_num_nodes--;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2006-01-18 07:38:21 +08:00
|
|
|
* tipc_node_link_up - handle addition of link
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2006-01-03 02:04:38 +08:00
|
|
|
* Link becomes active (alone or shared) or standby, depending on its priority.
|
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link **active = &n_ptr->active_links[0];
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-06-26 14:52:50 +08:00
|
|
|
n_ptr->working_links++;
|
|
|
|
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Established link <%s> on network plane %c\n",
|
|
|
|
l_ptr->name, l_ptr->b_ptr->net_plane);
|
2007-02-09 22:25:21 +08:00
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
if (!active[0]) {
|
|
|
|
active[0] = active[1] = l_ptr;
|
|
|
|
node_established_contact(n_ptr);
|
|
|
|
return;
|
|
|
|
}
|
2007-02-09 22:25:21 +08:00
|
|
|
if (l_ptr->priority < active[0]->priority) {
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("New link <%s> becomes standby\n", l_ptr->name);
|
2006-01-03 02:04:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2014-02-18 16:06:46 +08:00
|
|
|
tipc_link_dup_queue_xmit(active[0], l_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
if (l_ptr->priority == active[0]->priority) {
|
2006-01-03 02:04:38 +08:00
|
|
|
active[0] = l_ptr;
|
|
|
|
return;
|
|
|
|
}
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Old link <%s> becomes standby\n", active[0]->name);
|
2006-06-26 14:52:17 +08:00
|
|
|
if (active[1] != active[0])
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Old link <%s> becomes standby\n", active[1]->name);
|
2006-01-03 02:04:38 +08:00
|
|
|
active[0] = active[1] = l_ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* node_select_active_links - select active link
|
|
|
|
*/
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_select_active_links(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link **active = &n_ptr->active_links[0];
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 i;
|
|
|
|
u32 highest_prio = 0;
|
|
|
|
|
2007-02-09 22:25:21 +08:00
|
|
|
active[0] = active[1] = NULL;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link *l_ptr = n_ptr->links[i];
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!l_ptr || !tipc_link_is_up(l_ptr) ||
|
2006-01-03 02:04:38 +08:00
|
|
|
(l_ptr->priority < highest_prio))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (l_ptr->priority > highest_prio) {
|
2007-02-09 22:25:21 +08:00
|
|
|
highest_prio = l_ptr->priority;
|
2006-01-03 02:04:38 +08:00
|
|
|
active[0] = active[1] = l_ptr;
|
|
|
|
} else {
|
|
|
|
active[1] = l_ptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2006-01-18 07:38:21 +08:00
|
|
|
* tipc_node_link_down - handle loss of link
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link **active;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-06-26 14:52:50 +08:00
|
|
|
n_ptr->working_links--;
|
|
|
|
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!tipc_link_is_active(l_ptr)) {
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Lost standby link <%s> on network plane %c\n",
|
|
|
|
l_ptr->name, l_ptr->b_ptr->net_plane);
|
2006-01-03 02:04:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Lost link <%s> on network plane %c\n",
|
2006-01-03 02:04:38 +08:00
|
|
|
l_ptr->name, l_ptr->b_ptr->net_plane);
|
|
|
|
|
|
|
|
active = &n_ptr->active_links[0];
|
|
|
|
if (active[0] == l_ptr)
|
|
|
|
active[0] = active[1];
|
|
|
|
if (active[1] == l_ptr)
|
|
|
|
active[1] = active[0];
|
|
|
|
if (active[0] == l_ptr)
|
|
|
|
node_select_active_links(n_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
if (tipc_node_is_up(n_ptr))
|
2014-01-08 06:02:41 +08:00
|
|
|
tipc_link_failover_send_queue(l_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
else
|
2006-01-03 02:04:38 +08:00
|
|
|
node_lost_contact(n_ptr);
|
|
|
|
}
|
|
|
|
|
2011-02-28 23:36:21 +08:00
|
|
|
int tipc_node_active_links(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2010-08-17 19:00:12 +08:00
|
|
|
return n_ptr->active_links[0] != NULL;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
int tipc_node_is_up(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-02-28 23:36:21 +08:00
|
|
|
return tipc_node_active_links(n_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-03-01 00:32:27 +08:00
|
|
|
n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
|
|
|
|
atomic_inc(&tipc_num_links);
|
|
|
|
n_ptr->link_cnt++;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:16 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
2014-02-15 05:40:43 +08:00
|
|
|
if (l_ptr != n_ptr->links[i])
|
|
|
|
continue;
|
|
|
|
n_ptr->links[i] = NULL;
|
|
|
|
atomic_dec(&tipc_num_links);
|
|
|
|
n_ptr->link_cnt--;
|
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:16 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_established_contact(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
2011-01-01 02:59:18 +08:00
|
|
|
tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
|
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 13:51:31 +08:00
|
|
|
n_ptr->bclink.oos_state = 0;
|
2012-11-16 13:51:29 +08:00
|
|
|
n_ptr->bclink.acked = tipc_bclink_get_last_sent();
|
|
|
|
tipc_bclink_add_node(n_ptr->addr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 23:00:51 +08:00
|
|
|
static void node_name_purge_complete(unsigned long node_addr)
|
2010-08-17 19:00:16 +08:00
|
|
|
{
|
|
|
|
struct tipc_node *n_ptr;
|
|
|
|
|
|
|
|
read_lock_bh(&tipc_net_lock);
|
|
|
|
n_ptr = tipc_node_find(node_addr);
|
|
|
|
if (n_ptr) {
|
|
|
|
tipc_node_lock(n_ptr);
|
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 23:00:51 +08:00
|
|
|
n_ptr->block_setup &= ~WAIT_NAMES_GONE;
|
2010-08-17 19:00:16 +08:00
|
|
|
tipc_node_unlock(n_ptr);
|
|
|
|
}
|
|
|
|
read_unlock_bh(&tipc_net_lock);
|
|
|
|
}
|
|
|
|
|
2008-09-03 14:38:32 +08:00
|
|
|
static void node_lost_contact(struct tipc_node *n_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
|
|
|
char addr_string[16];
|
|
|
|
u32 i;
|
|
|
|
|
2012-06-29 12:16:37 +08:00
|
|
|
pr_info("Lost contact with %s\n",
|
|
|
|
tipc_addr_string_fill(addr_string, n_ptr->addr));
|
2011-04-07 23:58:08 +08:00
|
|
|
|
|
|
|
/* Flush broadcast link info associated with lost node */
|
2012-11-16 13:51:30 +08:00
|
|
|
if (n_ptr->bclink.recv_permitted) {
|
2013-12-11 12:45:38 +08:00
|
|
|
kfree_skb_list(n_ptr->bclink.deferred_head);
|
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-28 02:17:53 +08:00
|
|
|
n_ptr->bclink.deferred_size = 0;
|
2011-04-07 23:58:08 +08:00
|
|
|
|
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 16:28:06 +08:00
|
|
|
if (n_ptr->bclink.reasm_head) {
|
|
|
|
kfree_skb(n_ptr->bclink.reasm_head);
|
|
|
|
n_ptr->bclink.reasm_head = NULL;
|
|
|
|
n_ptr->bclink.reasm_tail = NULL;
|
2011-04-07 23:58:08 +08:00
|
|
|
}
|
|
|
|
|
2011-10-24 23:18:12 +08:00
|
|
|
tipc_bclink_remove_node(n_ptr->addr);
|
2011-10-25 03:26:24 +08:00
|
|
|
tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2012-11-16 13:51:30 +08:00
|
|
|
n_ptr->bclink.recv_permitted = false;
|
2011-04-07 23:58:08 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Abort link changeover */
|
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link *l_ptr = n_ptr->links[i];
|
2007-02-09 22:25:21 +08:00
|
|
|
if (!l_ptr)
|
2006-01-03 02:04:38 +08:00
|
|
|
continue;
|
|
|
|
l_ptr->reset_checkpoint = l_ptr->next_in_no;
|
|
|
|
l_ptr->exp_msg_count = 0;
|
2006-01-18 07:38:21 +08:00
|
|
|
tipc_link_reset_fragments(l_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Notify subscribers */
|
2011-02-24 03:13:41 +08:00
|
|
|
tipc_nodesub_notify(n_ptr);
|
2010-08-17 19:00:16 +08:00
|
|
|
|
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 23:00:51 +08:00
|
|
|
/* Prevent re-contact with node until cleanup is done */
|
|
|
|
n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE;
|
|
|
|
tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2006-01-18 07:38:21 +08:00
|
|
|
struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
|
|
|
u32 domain;
|
|
|
|
struct sk_buff *buf;
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node *n_ptr;
|
2007-02-09 22:25:21 +08:00
|
|
|
struct tipc_node_info node_info;
|
2006-06-30 03:33:20 +08:00
|
|
|
u32 payload_size;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
|
2006-01-18 07:38:21 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-11-08 16:19:09 +08:00
|
|
|
domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!tipc_addr_domain_valid(domain))
|
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
|
|
|
|
" (network address)");
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2008-07-15 13:44:58 +08:00
|
|
|
read_lock_bh(&tipc_net_lock);
|
2011-02-26 07:42:52 +08:00
|
|
|
if (!tipc_num_nodes) {
|
2008-07-15 13:44:58 +08:00
|
|
|
read_unlock_bh(&tipc_net_lock);
|
2007-02-09 22:25:21 +08:00
|
|
|
return tipc_cfg_reply_none();
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-01-01 02:59:17 +08:00
|
|
|
/* For now, get space for all other nodes */
|
2011-02-26 07:42:52 +08:00
|
|
|
payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes;
|
2008-07-15 13:44:58 +08:00
|
|
|
if (payload_size > 32768u) {
|
|
|
|
read_unlock_bh(&tipc_net_lock);
|
2006-06-30 03:33:20 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
|
|
|
|
" (too many nodes)");
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2006-06-30 03:33:20 +08:00
|
|
|
buf = tipc_cfg_reply_alloc(payload_size);
|
2008-07-15 13:44:58 +08:00
|
|
|
if (!buf) {
|
|
|
|
read_unlock_bh(&tipc_net_lock);
|
2006-01-03 02:04:38 +08:00
|
|
|
return NULL;
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Add TLVs for all nodes in scope */
|
2011-02-26 07:42:52 +08:00
|
|
|
list_for_each_entry(n_ptr, &tipc_node_list, list) {
|
|
|
|
if (!tipc_in_scope(domain, n_ptr->addr))
|
2006-01-03 02:04:38 +08:00
|
|
|
continue;
|
2007-02-09 22:25:21 +08:00
|
|
|
node_info.addr = htonl(n_ptr->addr);
|
|
|
|
node_info.up = htonl(tipc_node_is_up(n_ptr));
|
|
|
|
tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO,
|
2006-01-18 07:38:21 +08:00
|
|
|
&node_info, sizeof(node_info));
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2008-07-15 13:44:58 +08:00
|
|
|
read_unlock_bh(&tipc_net_lock);
|
2006-01-03 02:04:38 +08:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
2006-01-18 07:38:21 +08:00
|
|
|
struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
|
2006-01-03 02:04:38 +08:00
|
|
|
{
|
|
|
|
u32 domain;
|
|
|
|
struct sk_buff *buf;
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node *n_ptr;
|
2007-02-09 22:25:21 +08:00
|
|
|
struct tipc_link_info link_info;
|
2006-06-30 03:33:20 +08:00
|
|
|
u32 payload_size;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
|
2006-01-18 07:38:21 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2006-11-08 16:19:09 +08:00
|
|
|
domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
|
2006-01-18 07:38:21 +08:00
|
|
|
if (!tipc_addr_domain_valid(domain))
|
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
|
|
|
|
" (network address)");
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-11-09 02:48:28 +08:00
|
|
|
if (!tipc_own_addr)
|
2007-02-09 22:25:21 +08:00
|
|
|
return tipc_cfg_reply_none();
|
|
|
|
|
2008-07-15 13:44:58 +08:00
|
|
|
read_lock_bh(&tipc_net_lock);
|
|
|
|
|
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-28 02:17:53 +08:00
|
|
|
/* Get space for all unicast links + broadcast link */
|
2011-02-25 02:20:20 +08:00
|
|
|
payload_size = TLV_SPACE(sizeof(link_info)) *
|
2011-02-25 23:01:58 +08:00
|
|
|
(atomic_read(&tipc_num_links) + 1);
|
2008-07-15 13:44:58 +08:00
|
|
|
if (payload_size > 32768u) {
|
|
|
|
read_unlock_bh(&tipc_net_lock);
|
2006-06-30 03:33:20 +08:00
|
|
|
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
|
|
|
|
" (too many links)");
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2006-06-30 03:33:20 +08:00
|
|
|
buf = tipc_cfg_reply_alloc(payload_size);
|
2008-07-15 13:44:58 +08:00
|
|
|
if (!buf) {
|
|
|
|
read_unlock_bh(&tipc_net_lock);
|
2006-01-03 02:04:38 +08:00
|
|
|
return NULL;
|
2008-07-15 13:44:58 +08:00
|
|
|
}
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Add TLV for broadcast link */
|
2011-02-24 00:44:49 +08:00
|
|
|
link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr));
|
2007-02-09 22:25:21 +08:00
|
|
|
link_info.up = htonl(1);
|
2009-03-19 10:11:29 +08:00
|
|
|
strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
|
2006-01-18 07:38:21 +08:00
|
|
|
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Add TLVs for any other links in scope */
|
2011-02-26 07:42:52 +08:00
|
|
|
list_for_each_entry(n_ptr, &tipc_node_list, list) {
|
2007-02-09 22:25:21 +08:00
|
|
|
u32 i;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-02-26 07:42:52 +08:00
|
|
|
if (!tipc_in_scope(domain, n_ptr->addr))
|
2006-01-03 02:04:38 +08:00
|
|
|
continue;
|
2008-07-15 13:44:58 +08:00
|
|
|
tipc_node_lock(n_ptr);
|
2007-02-09 22:25:21 +08:00
|
|
|
for (i = 0; i < MAX_BEARERS; i++) {
|
|
|
|
if (!n_ptr->links[i])
|
|
|
|
continue;
|
|
|
|
link_info.dest = htonl(n_ptr->addr);
|
|
|
|
link_info.up = htonl(tipc_link_is_up(n_ptr->links[i]));
|
|
|
|
strcpy(link_info.str, n_ptr->links[i]->name);
|
|
|
|
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO,
|
2006-01-18 07:38:21 +08:00
|
|
|
&link_info, sizeof(link_info));
|
2007-02-09 22:25:21 +08:00
|
|
|
}
|
2008-07-15 13:44:58 +08:00
|
|
|
tipc_node_unlock(n_ptr);
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
2008-07-15 13:44:58 +08:00
|
|
|
read_unlock_bh(&tipc_net_lock);
|
2006-01-03 02:04:38 +08:00
|
|
|
return buf;
|
|
|
|
}
|