2006-01-03 02:04:38 +08:00
|
|
|
/*
|
|
|
|
* net/tipc/link.h: Include file for TIPC link code
|
2007-02-09 22:25:21 +08:00
|
|
|
*
|
2014-02-14 06:29:08 +08:00
|
|
|
* Copyright (c) 1995-2006, 2013, Ericsson AB
|
2011-01-08 00:43:40 +08:00
|
|
|
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
|
2006-01-03 02:04:38 +08:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 02:04:38 +08:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 02:04:38 +08:00
|
|
|
*
|
2006-01-11 20:30:43 +08:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 02:04:38 +08:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _TIPC_LINK_H
|
|
|
|
#define _TIPC_LINK_H
|
|
|
|
|
|
|
|
#include "msg.h"
|
|
|
|
#include "node.h"
|
|
|
|
|
2014-02-14 06:29:08 +08:00
|
|
|
/* Link reassembly status codes
|
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 16:28:06 +08:00
|
|
|
*/
|
|
|
|
#define LINK_REASM_ERROR -1
|
|
|
|
#define LINK_REASM_COMPLETE 1
|
|
|
|
|
2014-02-14 06:29:08 +08:00
|
|
|
/* Out-of-range value for link sequence numbers
|
2011-10-25 03:26:24 +08:00
|
|
|
*/
|
|
|
|
#define INVALID_LINK_SEQ 0x10000
|
|
|
|
|
2014-02-14 06:29:08 +08:00
|
|
|
/* Link working states
|
2006-01-03 02:04:38 +08:00
|
|
|
*/
|
|
|
|
#define WORKING_WORKING 560810u
|
|
|
|
#define WORKING_UNKNOWN 560811u
|
|
|
|
#define RESET_UNKNOWN 560812u
|
|
|
|
#define RESET_RESET 560813u
|
|
|
|
|
2014-02-14 06:29:08 +08:00
|
|
|
/* Link endpoint execution states
|
|
|
|
*/
|
|
|
|
#define LINK_STARTED 0x0001
|
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:09 +08:00
|
|
|
#define LINK_STOPPED 0x0002
|
2014-02-14 06:29:08 +08:00
|
|
|
|
|
|
|
/* Starting value for maximum packet size negotiation on unicast links
|
2006-01-03 02:04:38 +08:00
|
|
|
* (unless bearer MTU is less)
|
|
|
|
*/
|
|
|
|
#define MAX_PKT_DEFAULT 1500
|
|
|
|
|
2012-07-11 21:40:43 +08:00
|
|
|
struct tipc_stats {
|
|
|
|
u32 sent_info; /* used in counting # sent packets */
|
|
|
|
u32 recv_info; /* used in counting # recv'd packets */
|
|
|
|
u32 sent_states;
|
|
|
|
u32 recv_states;
|
|
|
|
u32 sent_probes;
|
|
|
|
u32 recv_probes;
|
|
|
|
u32 sent_nacks;
|
|
|
|
u32 recv_nacks;
|
|
|
|
u32 sent_acks;
|
|
|
|
u32 sent_bundled;
|
|
|
|
u32 sent_bundles;
|
|
|
|
u32 recv_bundled;
|
|
|
|
u32 recv_bundles;
|
|
|
|
u32 retransmitted;
|
|
|
|
u32 sent_fragmented;
|
|
|
|
u32 sent_fragments;
|
|
|
|
u32 recv_fragmented;
|
|
|
|
u32 recv_fragments;
|
|
|
|
u32 link_congs; /* # port sends blocked by congestion */
|
|
|
|
u32 deferred_recv;
|
|
|
|
u32 duplicates;
|
|
|
|
u32 max_queue_sz; /* send queue size high water mark */
|
|
|
|
u32 accu_queue_sz; /* used for send queue size profiling */
|
|
|
|
u32 queue_sz_counts; /* used for send queue size profiling */
|
|
|
|
u32 msg_length_counts; /* used for message length profiling */
|
|
|
|
u32 msg_lengths_total; /* used for message length profiling */
|
|
|
|
u32 msg_length_profile[7]; /* used for msg. length profiling */
|
|
|
|
};
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
/**
|
2011-12-30 09:58:42 +08:00
|
|
|
* struct tipc_link - TIPC link data structure
|
2006-01-03 02:04:38 +08:00
|
|
|
* @addr: network address of link's peer node
|
|
|
|
* @name: link name character string
|
|
|
|
* @media_addr: media address to use when sending messages over link
|
|
|
|
* @timer: link timer
|
|
|
|
* @owner: pointer to peer node
|
2014-02-14 06:29:08 +08:00
|
|
|
* @flags: execution state flags for link endpoint instance
|
2006-01-03 02:04:38 +08:00
|
|
|
* @checkpoint: reference point for triggering link continuity checking
|
|
|
|
* @peer_session: link session # being used by peer end of link
|
|
|
|
* @peer_bearer_id: bearer id used by link's peer endpoint
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 10:55:46 +08:00
|
|
|
* @bearer_id: local bearer id used by link
|
2007-02-09 22:25:21 +08:00
|
|
|
* @tolerance: minimum link continuity loss needed to reset link [in ms]
|
2006-01-03 02:04:38 +08:00
|
|
|
* @continuity_interval: link continuity testing interval [in ms]
|
|
|
|
* @abort_limit: # of unacknowledged continuity probes needed to reset link
|
|
|
|
* @state: current state of link FSM
|
|
|
|
* @fsm_msg_cnt: # of protocol messages link FSM has sent in current state
|
|
|
|
* @proto_msg: template for control messages generated by link
|
|
|
|
* @pmsg: convenience pointer to "proto_msg" field
|
|
|
|
* @priority: current link priority
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 10:55:46 +08:00
|
|
|
* @net_plane: current link network plane ('A' through 'H')
|
2006-01-03 02:04:38 +08:00
|
|
|
* @queue_limit: outbound message queue congestion thresholds (indexed by user)
|
|
|
|
* @exp_msg_count: # of tunnelled messages expected during link changeover
|
|
|
|
* @reset_checkpoint: seq # of last acknowledged message at time of link reset
|
|
|
|
* @max_pkt: current maximum packet size for this link
|
|
|
|
* @max_pkt_target: desired maximum packet size for this link
|
|
|
|
* @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target)
|
|
|
|
* @out_queue_size: # of messages in outbound message queue
|
|
|
|
* @first_out: ptr to first outbound message in queue
|
|
|
|
* @last_out: ptr to last outbound message in queue
|
|
|
|
* @next_out_no: next sequence number to use for outbound messages
|
|
|
|
* @last_retransmitted: sequence number of most recently retransmitted message
|
|
|
|
* @stale_count: # of identical retransmit requests made by peer
|
|
|
|
* @next_in_no: next sequence number to expect for inbound messages
|
|
|
|
* @deferred_inqueue_sz: # of messages in inbound message queue
|
|
|
|
* @oldest_deferred_in: ptr to first inbound message in queue
|
|
|
|
* @newest_deferred_in: ptr to last inbound message in queue
|
|
|
|
* @unacked_window: # of inbound messages rx'd without ack'ing back to peer
|
|
|
|
* @proto_msg_queue: ptr to (single) outbound control message
|
|
|
|
* @retransm_queue_size: number of messages to retransmit
|
|
|
|
* @retransm_queue_head: sequence number of first message to retransmit
|
|
|
|
* @next_out: ptr to first unsent outbound message in queue
|
|
|
|
* @waiting_ports: linked list of ports waiting for link congestion to abate
|
|
|
|
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
|
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 16:28:06 +08:00
|
|
|
* @reasm_head: list head of partially reassembled inbound message fragments
|
|
|
|
* @reasm_tail: last fragment received
|
2006-01-03 02:04:38 +08:00
|
|
|
* @stats: collects statistics regarding link activity
|
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link {
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 addr;
|
|
|
|
char name[TIPC_MAX_LINK_NAME];
|
|
|
|
struct tipc_media_addr media_addr;
|
|
|
|
struct timer_list timer;
|
2008-09-03 14:38:32 +08:00
|
|
|
struct tipc_node *owner;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Management and link supervision data */
|
2014-02-14 06:29:08 +08:00
|
|
|
unsigned int flags;
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 checkpoint;
|
|
|
|
u32 peer_session;
|
|
|
|
u32 peer_bearer_id;
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 10:55:46 +08:00
|
|
|
u32 bearer_id;
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 tolerance;
|
|
|
|
u32 continuity_interval;
|
|
|
|
u32 abort_limit;
|
|
|
|
int state;
|
|
|
|
u32 fsm_msg_cnt;
|
|
|
|
struct {
|
|
|
|
unchar hdr[INT_H_SIZE];
|
|
|
|
unchar body[TIPC_MAX_IF_NAME];
|
|
|
|
} proto_msg;
|
|
|
|
struct tipc_msg *pmsg;
|
|
|
|
u32 priority;
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 10:55:46 +08:00
|
|
|
char net_plane;
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 queue_limit[15]; /* queue_limit[0]==window limit */
|
|
|
|
|
|
|
|
/* Changeover */
|
|
|
|
u32 exp_msg_count;
|
|
|
|
u32 reset_checkpoint;
|
|
|
|
|
2007-02-09 22:25:21 +08:00
|
|
|
/* Max packet negotiation */
|
|
|
|
u32 max_pkt;
|
|
|
|
u32 max_pkt_target;
|
|
|
|
u32 max_pkt_probes;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Sending */
|
|
|
|
u32 out_queue_size;
|
|
|
|
struct sk_buff *first_out;
|
|
|
|
struct sk_buff *last_out;
|
|
|
|
u32 next_out_no;
|
2007-02-09 22:25:21 +08:00
|
|
|
u32 last_retransmitted;
|
|
|
|
u32 stale_count;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/* Reception */
|
|
|
|
u32 next_in_no;
|
|
|
|
u32 deferred_inqueue_sz;
|
|
|
|
struct sk_buff *oldest_deferred_in;
|
|
|
|
struct sk_buff *newest_deferred_in;
|
|
|
|
u32 unacked_window;
|
|
|
|
|
|
|
|
/* Congestion handling */
|
|
|
|
struct sk_buff *proto_msg_queue;
|
|
|
|
u32 retransm_queue_size;
|
|
|
|
u32 retransm_queue_head;
|
|
|
|
struct sk_buff *next_out;
|
|
|
|
struct list_head waiting_ports;
|
|
|
|
|
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 16:28:06 +08:00
|
|
|
/* Fragmentation/reassembly */
|
2006-01-03 02:04:38 +08:00
|
|
|
u32 long_msg_seq_no;
|
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 16:28:06 +08:00
|
|
|
struct sk_buff *reasm_head;
|
|
|
|
struct sk_buff *reasm_tail;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2007-02-09 22:25:21 +08:00
|
|
|
/* Statistics */
|
2012-07-11 21:40:43 +08:00
|
|
|
struct tipc_stats stats;
|
2006-01-03 02:04:38 +08:00
|
|
|
};
|
|
|
|
|
2011-01-08 00:43:40 +08:00
|
|
|
struct tipc_port;
|
2006-01-03 02:04:38 +08:00
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
|
2011-03-01 00:32:27 +08:00
|
|
|
struct tipc_bearer *b_ptr,
|
2006-01-18 07:38:21 +08:00
|
|
|
const struct tipc_media_addr *media_addr);
|
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:16 +08:00
|
|
|
void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down);
|
2014-01-08 06:02:41 +08:00
|
|
|
void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
|
2014-02-18 16:06:46 +08:00
|
|
|
void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
|
|
|
|
int tipc_link_is_up(struct tipc_link *l_ptr);
|
|
|
|
int tipc_link_is_active(struct tipc_link *l_ptr);
|
2014-01-08 06:02:44 +08:00
|
|
|
void tipc_link_purge_queues(struct tipc_link *l_ptr);
|
2014-01-08 06:02:41 +08:00
|
|
|
struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area,
|
|
|
|
int req_tlv_space,
|
|
|
|
u16 cmd);
|
|
|
|
struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area,
|
|
|
|
int req_tlv_space);
|
|
|
|
struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area,
|
|
|
|
int req_tlv_space);
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_link_reset(struct tipc_link *l_ptr);
|
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:09 +08:00
|
|
|
void tipc_link_reset_list(unsigned int bearer_id);
|
2014-02-18 16:06:46 +08:00
|
|
|
int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector);
|
|
|
|
void tipc_link_names_xmit(struct list_head *message_list, u32 dest);
|
|
|
|
int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf);
|
2011-12-30 09:58:42 +08:00
|
|
|
int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
|
2011-01-01 02:59:32 +08:00
|
|
|
u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
|
2014-02-18 16:06:46 +08:00
|
|
|
int tipc_link_iovec_xmit_fast(struct tipc_port *sender,
|
|
|
|
struct iovec const *msg_sect,
|
|
|
|
unsigned int len, u32 destnode);
|
|
|
|
void tipc_link_bundle_rcv(struct sk_buff *buf);
|
|
|
|
int tipc_link_frag_rcv(struct sk_buff **reasm_head,
|
|
|
|
struct sk_buff **reasm_tail,
|
|
|
|
struct sk_buff **fbuf);
|
|
|
|
void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
|
|
|
|
u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_link_push_queue(struct tipc_link *l_ptr);
|
2006-01-18 07:38:21 +08:00
|
|
|
u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
|
2014-02-18 16:06:46 +08:00
|
|
|
struct sk_buff *buf);
|
2011-12-30 09:58:42 +08:00
|
|
|
void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all);
|
|
|
|
void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
|
|
|
|
void tipc_link_retransmit(struct tipc_link *l_ptr,
|
|
|
|
struct sk_buff *start, u32 retransmits);
|
2006-01-03 02:04:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
|
|
|
|
*/
|
2011-10-25 04:03:12 +08:00
|
|
|
static inline u32 buf_seqno(struct sk_buff *buf)
|
|
|
|
{
|
|
|
|
return msg_seqno(buf_msg(buf));
|
|
|
|
}
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
static inline u32 mod(u32 x)
|
|
|
|
{
|
|
|
|
return x & 0xffffu;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int between(u32 lower, u32 upper, u32 n)
|
|
|
|
{
|
|
|
|
if ((lower < n) && (n < upper))
|
|
|
|
return 1;
|
|
|
|
if ((upper < lower) && ((n > lower) || (n < upper)))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int less_eq(u32 left, u32 right)
|
|
|
|
{
|
2010-09-23 04:43:57 +08:00
|
|
|
return mod(right - left) < 32768u;
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int less(u32 left, u32 right)
|
|
|
|
{
|
2010-09-23 04:43:57 +08:00
|
|
|
return less_eq(left, right) && (mod(right) != mod(left));
|
2006-01-03 02:04:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline u32 lesser(u32 left, u32 right)
|
|
|
|
{
|
|
|
|
return less_eq(left, right) ? left : right;
|
|
|
|
}
|
|
|
|
|
2010-05-11 22:30:11 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Link status checking routines
|
|
|
|
*/
|
2011-12-30 09:58:42 +08:00
|
|
|
static inline int link_working_working(struct tipc_link *l_ptr)
|
2010-05-11 22:30:11 +08:00
|
|
|
{
|
2010-09-23 04:43:57 +08:00
|
|
|
return l_ptr->state == WORKING_WORKING;
|
2010-05-11 22:30:11 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
static inline int link_working_unknown(struct tipc_link *l_ptr)
|
2010-05-11 22:30:11 +08:00
|
|
|
{
|
2010-09-23 04:43:57 +08:00
|
|
|
return l_ptr->state == WORKING_UNKNOWN;
|
2010-05-11 22:30:11 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
static inline int link_reset_unknown(struct tipc_link *l_ptr)
|
2010-05-11 22:30:11 +08:00
|
|
|
{
|
2010-09-23 04:43:57 +08:00
|
|
|
return l_ptr->state == RESET_UNKNOWN;
|
2010-05-11 22:30:11 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
static inline int link_reset_reset(struct tipc_link *l_ptr)
|
2010-05-11 22:30:11 +08:00
|
|
|
{
|
2010-09-23 04:43:57 +08:00
|
|
|
return l_ptr->state == RESET_RESET;
|
2010-05-11 22:30:11 +08:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:42 +08:00
|
|
|
static inline int link_congested(struct tipc_link *l_ptr)
|
2010-05-11 22:30:11 +08:00
|
|
|
{
|
2010-09-23 04:43:57 +08:00
|
|
|
return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
|
2010-05-11 22:30:11 +08:00
|
|
|
}
|
|
|
|
|
2006-01-03 02:04:38 +08:00
|
|
|
#endif
|