From 03b92017933bd22a3dca6830048877dd3162f872 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:05 -0500 Subject: [PATCH 01/14] tipc: stricter behavior of message reassembly function The function tipc_link_recv_fragment(struct sk_buff **buf) currently leaves the value of the input buffer pointer undefined when it returns, except when the return code indicates that the reassembly is complete. This despite the fact that it always consumes the input buffer. Here, we enforce a stricter behavior by this function, ensuring that the returned buffer pointer is non-NULL if and only if the reassembly is complete. This makes it possible to test for the buffer pointer as criteria for successful reassembly. We also rename the function to tipc_link_frag_rcv(), which is both shorter and more in line with common naming practice in the network subsystem. Apart from the new name, these changes have no impact on current users of the function, but makes it more practical for use in some planned future commits. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 6 +++--- net/tipc/link.c | 16 +++++++++------- net/tipc/link.h | 6 +++--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index bf860d9e75af..af35f76c6b29 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -481,9 +481,9 @@ receive: tipc_link_recv_bundle(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { int ret; - ret = tipc_link_recv_fragment(&node->bclink.reasm_head, - &node->bclink.reasm_tail, - &buf); + ret = tipc_link_frag_rcv(&node->bclink.reasm_head, + &node->bclink.reasm_tail, + &buf); if (ret == LINK_REASM_ERROR) goto unlock; spin_lock_bh(&bc_lock); diff --git a/net/tipc/link.c b/net/tipc/link.c index d4b5de41b682..17fbd15fcad8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1584,9 +1584,9 @@ deliver: continue; case MSG_FRAGMENTER: l_ptr->stats.recv_fragments++; - ret = tipc_link_recv_fragment(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); + ret = tipc_link_frag_rcv(&l_ptr->reasm_head, + &l_ptr->reasm_tail, + &buf); if (ret == LINK_REASM_COMPLETE) { l_ptr->stats.recv_fragmented++; msg = buf_msg(buf); @@ -2277,12 +2277,11 @@ static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf) return dsz; } -/* - * tipc_link_recv_fragment(): Called with node lock on. Returns +/* tipc_link_frag_rcv(): Called with node lock on. Returns * the reassembled buffer if message is complete. */ -int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff **fbuf) +int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail, + struct sk_buff **fbuf) { struct sk_buff *frag = *fbuf; struct tipc_msg *msg = buf_msg(frag); @@ -2296,6 +2295,7 @@ int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail, goto out_free; *head = frag; skb_frag_list_init(*head); + *fbuf = NULL; return 0; } else if (*head && skb_try_coalesce(*head, frag, &headstolen, &delta)) { @@ -2315,10 +2315,12 @@ int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail, *tail = *head = NULL; return LINK_REASM_COMPLETE; } + *fbuf = NULL; return 0; out_free: pr_warn_ratelimited("Link unable to reassemble fragmented message\n"); kfree_skb(*fbuf); + *fbuf = NULL; return LINK_REASM_ERROR; } diff --git a/net/tipc/link.h b/net/tipc/link.h index 3b6aa65b608c..8addc5ec5fc6 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -239,9 +239,9 @@ int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, unsigned int len, u32 destnode); void tipc_link_recv_bundle(struct sk_buff *buf); -int tipc_link_recv_fragment(struct sk_buff **reasm_head, - struct sk_buff **reasm_tail, - struct sk_buff **fbuf); +int tipc_link_frag_rcv(struct sk_buff **reasm_head, + struct sk_buff **reasm_tail, + struct sk_buff **fbuf); void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); From e0ca2c30b1e9c1ed8b58bccb95c33d25763a4311 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 13 Feb 2014 17:29:06 -0500 Subject: [PATCH 02/14] tipc: move code for resetting links from bearer.c to link.c We break out the code for resetting attached links in the function tipc_reset_bearer(), and define a new function named tipc_link_reset_list() to do this job. This commit incurs no functional changes, but makes the code of function tipc_reset_bearer() cleaner. It is also a preparation for a more important change to the bearer code, in a subsequent commit in this series. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 11 +---------- net/tipc/link.c | 12 ++++++++++++ net/tipc/link.h | 1 + 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a38c89969c68..a3bdf5c7f085 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -350,19 +350,10 @@ exit: */ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { - struct tipc_link *l_ptr; - struct tipc_link *temp_l_ptr; - read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); - list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - struct tipc_node *n_ptr = l_ptr->owner; - - spin_lock_bh(&n_ptr->lock); - tipc_link_reset(l_ptr); - spin_unlock_bh(&n_ptr->lock); - } + tipc_link_reset_list(b_ptr); spin_unlock_bh(&b_ptr->lock); read_unlock_bh(&tipc_net_lock); return 0; diff --git a/net/tipc/link.c b/net/tipc/link.c index 17fbd15fcad8..3ff34e8a37d7 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -461,6 +461,18 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_reset_statistics(l_ptr); } +void tipc_link_reset_list(struct tipc_bearer *b_ptr) +{ + struct tipc_link *l_ptr; + + list_for_each_entry(l_ptr, &b_ptr->links, link_list) { + struct tipc_node *n_ptr = l_ptr->owner; + + spin_lock_bh(&n_ptr->lock); + tipc_link_reset(l_ptr); + spin_unlock_bh(&n_ptr->lock); + } +} static void link_activate(struct tipc_link *l_ptr) { diff --git a/net/tipc/link.h b/net/tipc/link.h index 8addc5ec5fc6..73beecb369e2 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -231,6 +231,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); void tipc_link_reset(struct tipc_link *l_ptr); +void tipc_link_reset_list(struct tipc_bearer *b_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_send_names(struct list_head *message_list, u32 dest); int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); From 8d8439b686f15c23faef4d7d67c4a9f30ce0f2b5 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 13 Feb 2014 17:29:07 -0500 Subject: [PATCH 03/14] tipc: move code for deleting links from bearer.c to link.c We break out the code for deleting attached links in the function bearer_disable(), and define a new function named tipc_link_delete_list() to do this job. This commit incurs no functional changes, but makes the code of function bearer_disable() cleaner. It is also a preparation for a more important change to the bearer code, in a subsequent commit in this series. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 6 +----- net/tipc/link.c | 9 +++++++++ net/tipc/link.h | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a3bdf5c7f085..a5be053cac57 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -366,16 +366,12 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) */ static void bearer_disable(struct tipc_bearer *b_ptr) { - struct tipc_link *l_ptr; - struct tipc_link *temp_l_ptr; struct tipc_link_req *temp_req; pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); b_ptr->media->disable_media(b_ptr); - list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - tipc_link_delete(l_ptr); - } + tipc_link_delete_list(b_ptr); temp_req = b_ptr->link_req; b_ptr->link_req = NULL; spin_unlock_bh(&b_ptr->lock); diff --git a/net/tipc/link.c b/net/tipc/link.c index 3ff34e8a37d7..424e9f3acd81 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -313,6 +313,15 @@ void tipc_link_delete(struct tipc_link *l_ptr) kfree(l_ptr); } +void tipc_link_delete_list(struct tipc_bearer *b_ptr) +{ + struct tipc_link *l_ptr; + struct tipc_link *temp_l_ptr; + + list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { + tipc_link_delete(l_ptr); + } +} /** * link_schedule_port - schedule port for deferred sending diff --git a/net/tipc/link.h b/net/tipc/link.h index 73beecb369e2..994ebd16ddc3 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -219,6 +219,7 @@ void tipc_link_delete(struct tipc_link *l_ptr); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_send_queue(struct tipc_link *l_ptr, struct tipc_link *dest); +void tipc_link_delete_list(struct tipc_bearer *b_ptr); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); From 135daee6d3959a6d7c4f59b448ed6f854d88ce27 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 13 Feb 2014 17:29:08 -0500 Subject: [PATCH 04/14] tipc: redefine 'started' flag in struct link to bitmap Currently, the 'started' field in struct tipc_link represents only a binary state, 'started' or 'not started'. We need it to represent more link execution states in the coming commits in this series. Hence, we rename the field to 'flags', and define the current started/non-started state to be represented by the LSB bit of that field. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 4 ++-- net/tipc/link.h | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 424e9f3acd81..2070d032c923 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -500,7 +500,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) struct tipc_link *other; u32 cont_intv = l_ptr->continuity_interval; - if (!l_ptr->started && (event != STARTING_EVT)) + if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) return; /* Not yet. */ /* Check whether changeover is going on */ @@ -626,7 +626,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; case STARTING_EVT: - l_ptr->started = 1; + l_ptr->flags |= LINK_STARTED; /* fall through */ case TIMEOUT_EVT: tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); diff --git a/net/tipc/link.h b/net/tipc/link.h index 994ebd16ddc3..a900e74b4f3a 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -1,7 +1,7 @@ /* * net/tipc/link.h: Include file for TIPC link code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2013, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -40,27 +40,27 @@ #include "msg.h" #include "node.h" -/* - * Link reassembly status codes +/* Link reassembly status codes */ #define LINK_REASM_ERROR -1 #define LINK_REASM_COMPLETE 1 -/* - * Out-of-range value for link sequence numbers +/* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 -/* - * Link states +/* Link working states */ #define WORKING_WORKING 560810u #define WORKING_UNKNOWN 560811u #define RESET_UNKNOWN 560812u #define RESET_RESET 560813u -/* - * Starting value for maximum packet size negotiation on unicast links +/* Link endpoint execution states + */ +#define LINK_STARTED 0x0001 + +/* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) */ #define MAX_PKT_DEFAULT 1500 @@ -103,7 +103,7 @@ struct tipc_stats { * @timer: link timer * @owner: pointer to peer node * @link_list: adjacent links in bearer's list of links - * @started: indicates if link has been started + * @flags: execution state flags for link endpoint instance * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint @@ -152,7 +152,7 @@ struct tipc_link { struct list_head link_list; /* Management and link supervision data */ - int started; + unsigned int flags; u32 checkpoint; u32 peer_session; u32 peer_bearer_id; From c61dd61dec0b79fa22ded8b5caf2e817dc506c24 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 13 Feb 2014 17:29:09 -0500 Subject: [PATCH 05/14] tipc: remove 'links' list from tipc_bearer struct In our ongoing effort to simplify the TIPC locking structure, we see a need to remove the linked list for tipc_links in the bearer. This can be explained as follows. Currently, we have three different ways to access a link, via three different lists/tables: 1: Via a node hash table: Used by the time-critical outgoing/incoming data paths. (e.g. link_send_sections_fast() and tipc_recv_msg() ): grab net_lock(read) find node from node hash table grab node_lock select link grab bearer_lock send_msg() release bearer_lock release node lock release net_lock 2: Via a global linked list for nodes: Used by configuration commands (link_cmd_set_value()) grab net_lock(read) find node and link from global node list (using link name) grab node_lock update link release node lock release net_lock (Same locking order as above. No problem.) 3: Via the bearer's linked link list: Used by notifications from interface (e.g. tipc_disable_bearer() ) grab net_lock(write) grab bearer_lock get link ptr from bearer's link list get node from link grab node_lock delete link release node lock release bearer_lock release net_lock (Different order from above, but works because we grab the outer net_lock in write mode first, excluding all other access.) The first major goal in our simplification effort is to get rid of the "big" net_lock, replacing it with rcu-locks when accessing the node list and node hash array. This will come in a later patch series. But to get there we first need to rewrite access methods ##2 and 3, since removal of net_lock would introduce three major problems: a) In access method #2, we access the link before taking the protecting node_lock. This will not work once net_lock is gone, so we will have to change the access order. We will deal with this in a later commit in this series, "tipc: add node lock protection to link found by link_find_link()". b) When the outer protection from net_lock is gone, taking bearer_lock and node_lock in opposite order of method 1) and 2) will become an obvious deadlock hazard. This is fixed in the commit ("tipc: remove bearer_lock from tipc_bearer struct") later in this series. c) Similar to what is described in problem a), access method #3 starts with using a link pointer that is unprotected by node_lock, in order to via that pointer find the correct node struct and lock it. Before we remove net_lock, this access order must be altered. This is what we do with this commit. We can avoid introducing problem problem c) by even here using the global node list to find the node, before accessing its links. When we loop though the node list we use the own bearer identity as search criteria, thus easily finding the links that are associated to the resetting/disabling bearer. It should be noted that although this method is somewhat slower than the current list traversal, it is in no way time critical. This is only about resetting or deleting links, something that must be considered relatively infrequent events. As a bonus, we can get rid of the mutual pointers between links and bearers. After this commit, pointer dependency go in one direction only: from the link to the bearer. This commit pre-empts introduction of problem c) as described above. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 5 ++-- net/tipc/bearer.h | 2 -- net/tipc/core.c | 2 +- net/tipc/link.c | 67 +++++++++++++++++------------------------------ net/tipc/link.h | 8 +++--- 5 files changed, 30 insertions(+), 54 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a5be053cac57..4931eea65797 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -327,7 +327,6 @@ restart: b_ptr->net_plane = bearer_id + 'A'; b_ptr->active = 1; b_ptr->priority = priority; - INIT_LIST_HEAD(&b_ptr->links); spin_lock_init(&b_ptr->lock); res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain); @@ -353,7 +352,7 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); - tipc_link_reset_list(b_ptr); + tipc_link_reset_list(b_ptr->identity); spin_unlock_bh(&b_ptr->lock); read_unlock_bh(&tipc_net_lock); return 0; @@ -371,7 +370,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr) pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); b_ptr->media->disable_media(b_ptr); - tipc_link_delete_list(b_ptr); + tipc_link_delete_list(b_ptr->identity); temp_req = b_ptr->link_req; b_ptr->link_req = NULL; spin_unlock_bh(&b_ptr->lock); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 4f5db9ad5bf6..647cb1d2324a 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -120,7 +120,6 @@ struct tipc_media { * @tolerance: default link tolerance for bearer * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests - * @links: list of non-congested links associated with bearer * @active: non-zero if bearer structure is represents a bearer * @net_plane: network plane ('A' through 'H') currently associated with bearer * @nodes: indicates which nodes in cluster can be reached through bearer @@ -142,7 +141,6 @@ struct tipc_bearer { u32 tolerance; u32 identity; struct tipc_link_req *link_req; - struct list_head links; int active; char net_plane; struct tipc_node_map nodes; diff --git a/net/tipc/core.c b/net/tipc/core.c index f9e88d8b04ca..3f76b98d2fed 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -1,7 +1,7 @@ /* * net/tipc/core.c: TIPC module code * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2013, Ericsson AB * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * diff --git a/net/tipc/link.c b/net/tipc/link.c index 2070d032c923..e7e44ab008ec 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -147,11 +147,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr) /** * link_timeout - handle expiration of link timer * @l_ptr: pointer to link - * - * This routine must not grab "tipc_net_lock" to avoid a potential deadlock conflict - * with tipc_link_delete(). (There is no risk that the node will be deleted by - * another thread because tipc_link_delete() always cancels the link timer before - * tipc_node_delete() is called.) */ static void link_timeout(struct tipc_link *l_ptr) { @@ -213,8 +208,8 @@ static void link_set_timer(struct tipc_link *l_ptr, u32 time) * Returns pointer to link. */ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, - struct tipc_bearer *b_ptr, - const struct tipc_media_addr *media_addr) + struct tipc_bearer *b_ptr, + const struct tipc_media_addr *media_addr) { struct tipc_link *l_ptr; struct tipc_msg *msg; @@ -279,47 +274,32 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); - list_add_tail(&l_ptr->link_list, &b_ptr->links); link_state_event(l_ptr, STARTING_EVT); return l_ptr; } -/** - * tipc_link_delete - delete a link - * @l_ptr: pointer to link - * - * Note: 'tipc_net_lock' is write_locked, bearer is locked. - * This routine must not grab the node lock until after link timer cancellation - * to avoid a potential deadlock situation. - */ -void tipc_link_delete(struct tipc_link *l_ptr) -{ - if (!l_ptr) { - pr_err("Attempt to delete non-existent link\n"); - return; - } - k_cancel_timer(&l_ptr->timer); - - tipc_node_lock(l_ptr->owner); - tipc_link_reset(l_ptr); - tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_purge_queues(l_ptr); - list_del_init(&l_ptr->link_list); - tipc_node_unlock(l_ptr->owner); - k_term_timer(&l_ptr->timer); - kfree(l_ptr); -} - -void tipc_link_delete_list(struct tipc_bearer *b_ptr) +void tipc_link_delete_list(unsigned int bearer_id) { struct tipc_link *l_ptr; - struct tipc_link *temp_l_ptr; + struct tipc_node *n_ptr; - list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - tipc_link_delete(l_ptr); + list_for_each_entry(n_ptr, &tipc_node_list, list) { + spin_lock_bh(&n_ptr->lock); + l_ptr = n_ptr->links[bearer_id]; + if (l_ptr) { + tipc_link_reset(l_ptr); + tipc_node_detach_link(n_ptr, l_ptr); + spin_unlock_bh(&n_ptr->lock); + + /* Nobody else can access this link now: */ + del_timer_sync(&l_ptr->timer); + kfree(l_ptr); + continue; + } + spin_unlock_bh(&n_ptr->lock); } } @@ -470,15 +450,16 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_reset_statistics(l_ptr); } -void tipc_link_reset_list(struct tipc_bearer *b_ptr) +void tipc_link_reset_list(unsigned int bearer_id) { struct tipc_link *l_ptr; + struct tipc_node *n_ptr; - list_for_each_entry(l_ptr, &b_ptr->links, link_list) { - struct tipc_node *n_ptr = l_ptr->owner; - + list_for_each_entry(n_ptr, &tipc_node_list, list) { spin_lock_bh(&n_ptr->lock); - tipc_link_reset(l_ptr); + l_ptr = n_ptr->links[bearer_id]; + if (l_ptr) + tipc_link_reset(l_ptr); spin_unlock_bh(&n_ptr->lock); } } diff --git a/net/tipc/link.h b/net/tipc/link.h index a900e74b4f3a..3340fc1fc299 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -59,6 +59,7 @@ /* Link endpoint execution states */ #define LINK_STARTED 0x0001 +#define LINK_STOPPED 0x0002 /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -102,7 +103,6 @@ struct tipc_stats { * @media_addr: media address to use when sending messages over link * @timer: link timer * @owner: pointer to peer node - * @link_list: adjacent links in bearer's list of links * @flags: execution state flags for link endpoint instance * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link @@ -149,7 +149,6 @@ struct tipc_link { struct tipc_media_addr media_addr; struct timer_list timer; struct tipc_node *owner; - struct list_head link_list; /* Management and link supervision data */ unsigned int flags; @@ -215,11 +214,10 @@ struct tipc_port; struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); -void tipc_link_delete(struct tipc_link *l_ptr); +void tipc_link_delete_list(unsigned int bearer_id); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_send_queue(struct tipc_link *l_ptr, struct tipc_link *dest); -void tipc_link_delete_list(struct tipc_bearer *b_ptr); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); @@ -232,7 +230,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); void tipc_link_reset(struct tipc_link *l_ptr); -void tipc_link_reset_list(struct tipc_bearer *b_ptr); +void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_send_names(struct list_head *message_list, u32 dest); int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); From 1dab3d5ac22217241ca5c5bb7d0132602b465938 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:10 -0500 Subject: [PATCH 06/14] tipc: change reception of tunnelled duplicate packets When a second link to a destination comes up, some sender sockets will steer their subsequent traffic through the new link. In order to guarantee preserved packet order and cardinality for those sockets, we tunnel a duplicate of the old link's send queue through the new link before we open it for regular traffic. The last arriving packet copy, on whichever link, will be dropped at the receiving end based on the original sequence number, to ensure that only one copy is delivered to the end receiver. In this commit, we change the algorithm for receiving DUPLICATE_MSG packets, at the same time delegating it to a new subfunction, tipc_link_dup_rcv(). Instead of returning an extracted inner packet to the packet reception loop in tipc_rcv(), we just add it to the receiving (new) link's deferred packet queue. The packet will then be processed by that link when it receives its first non-tunneled packet, i.e., at latest when the changeover procedure is finished. Because tipc_link_tunnel_rcv()/tipc_link_dup_rcv() now is consuming all packets of type DUPLICATE_MSG, the calling tipc_rcv() function can omit testing for this. This in turn means that the current conditional jump to the label 'protocol_check' becomes redundant, and we can remove that label. Signed-off-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 53 +++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index e7e44ab008ec..f227a389e36e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1437,7 +1437,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) u32 seq_no; u32 ackd; u32 released = 0; - int type; head = head->next; buf->next = NULL; @@ -1525,7 +1524,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) } /* Now (finally!) process the incoming message */ -protocol_check: if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { link_recv_proto_msg(l_ptr, buf); @@ -1599,15 +1597,11 @@ deliver: tipc_node_unlock(n_ptr); continue; case CHANGEOVER_PROTOCOL: - type = msg_type(msg); - if (tipc_link_tunnel_rcv(&l_ptr, &buf)) { - msg = buf_msg(buf); - seq_no = msg_seqno(msg); - if (type == ORIGINAL_MSG) - goto deliver; - goto protocol_check; - } - break; + if (!tipc_link_tunnel_rcv(&l_ptr, &buf)) + break; + msg = buf_msg(buf); + seq_no = msg_seqno(msg); + goto deliver; default: kfree_skb(buf); buf = NULL; @@ -2107,7 +2101,30 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) return eb; } -/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent + + +/* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. + * Owner node is locked. + */ +static void tipc_link_dup_rcv(struct tipc_link *l_ptr, + struct sk_buff *t_buf) +{ + struct sk_buff *buf; + + if (!tipc_link_is_up(l_ptr)) + return; + + buf = buf_extract(t_buf, INT_H_SIZE); + if (buf == NULL) { + pr_warn("%sfailed to extract inner dup pkt\n", link_co_err); + return; + } + + /* Add buffer to deferred queue, if applicable: */ + link_handle_out_of_seq_msg(l_ptr, buf); +} + +/* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent * via other link as result of a failover (ORIGINAL_MSG) or * a new active link (DUPLICATE_MSG). Failover packets are * returned to the active link for delivery upwards. @@ -2126,6 +2143,7 @@ static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, if (bearer_id >= MAX_BEARERS) goto exit; + dest_link = (*l_ptr)->owner->links[bearer_id]; if (!dest_link) goto exit; @@ -2138,15 +2156,8 @@ static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, msg = msg_get_wrapped(tunnel_msg); if (msg_typ == DUPLICATE_MSG) { - if (less(msg_seqno(msg), mod(dest_link->next_in_no))) - goto exit; - *buf = buf_extract(tunnel_buf, INT_H_SIZE); - if (*buf == NULL) { - pr_warn("%sduplicate msg dropped\n", link_co_err); - goto exit; - } - kfree_skb(tunnel_buf); - return 1; + tipc_link_dup_rcv(dest_link, tunnel_buf); + goto exit; } /* First original message ?: */ From f006c9c70fda4676157e00caa2efa74646709d72 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:11 -0500 Subject: [PATCH 07/14] tipc: change reception of tunnelled failover packets When a link is reset, and there is a redundant link available, all sender sockets will steer their subsequent traffic through the remaining link. In order to guarantee preserved packet order and cardinality during the transition, we tunnel the failing link's send queue through the remaining link before we allow any sockets to use it. In this commit, we change the algorithm for receiving failover ("ORIGINAL_MSG") packets in tipc_link_tunnel_rcv(), at the same time delegating it to a new subfuncton, tipc_link_failover_rcv(). Instead of directly returning an extracted inner packet to the packet reception loop in tipc_rcv(), we first check if it is a message fragment, in which case we append it to the reset link's fragment chain. If the fragment chain is complete, we return the whole chain instead of the individual buffer, eliminating any need for the tipc_rcv() loop to do reassembly of tunneled packets. This change makes it possible to further simplify tipc_link_tunnel_rcv(), as well as the calling tipc_rcv() loop. We will do that in later commits. It also makes it possible to identify a single spot in the code where we can tell that a failover procedure is finished, something that is useful when we are deleting links after a failover. This will also be done in a later commit. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 75 +++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index f227a389e36e..26a54f4f3c63 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2124,6 +2124,50 @@ static void tipc_link_dup_rcv(struct tipc_link *l_ptr, link_handle_out_of_seq_msg(l_ptr, buf); } +/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet + * Owner node is locked. + */ +static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, + struct sk_buff *t_buf) +{ + struct tipc_msg *t_msg = buf_msg(t_buf); + struct sk_buff *buf = NULL; + struct tipc_msg *msg; + + if (tipc_link_is_up(l_ptr)) + tipc_link_reset(l_ptr); + + /* First failover packet? */ + if (l_ptr->exp_msg_count == START_CHANGEOVER) + l_ptr->exp_msg_count = msg_msgcnt(t_msg); + + /* Should there be an inner packet? */ + if (l_ptr->exp_msg_count) { + l_ptr->exp_msg_count--; + buf = buf_extract(t_buf, INT_H_SIZE); + if (buf == NULL) { + pr_warn("%sno inner failover pkt\n", link_co_err); + goto exit; + } + msg = buf_msg(buf); + + if (less(msg_seqno(msg), l_ptr->reset_checkpoint)) { + kfree_skb(buf); + buf = NULL; + goto exit; + } + if (msg_user(msg) == MSG_FRAGMENTER) { + l_ptr->stats.recv_fragments++; + tipc_link_frag_rcv(&l_ptr->reasm_head, + &l_ptr->reasm_tail, + &buf); + } + } + +exit: + return buf; +} + /* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent * via other link as result of a failover (ORIGINAL_MSG) or * a new active link (DUPLICATE_MSG). Failover packets are @@ -2135,10 +2179,8 @@ static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, { struct sk_buff *tunnel_buf = *buf; struct tipc_link *dest_link; - struct tipc_msg *msg; struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); u32 msg_typ = msg_type(tunnel_msg); - u32 msg_count = msg_msgcnt(tunnel_msg); u32 bearer_id = msg_bearer_id(tunnel_msg); if (bearer_id >= MAX_BEARERS) @@ -2153,42 +2195,19 @@ static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, goto exit; } *l_ptr = dest_link; - msg = msg_get_wrapped(tunnel_msg); if (msg_typ == DUPLICATE_MSG) { tipc_link_dup_rcv(dest_link, tunnel_buf); goto exit; } - /* First original message ?: */ - if (tipc_link_is_up(dest_link)) { - pr_info("%s<%s>, changeover initiated by peer\n", link_rst_msg, - dest_link->name); - tipc_link_reset(dest_link); - dest_link->exp_msg_count = msg_count; - if (!msg_count) - goto exit; - } else if (dest_link->exp_msg_count == START_CHANGEOVER) { - dest_link->exp_msg_count = msg_count; - if (!msg_count) - goto exit; - } + if (msg_type(tunnel_msg) == ORIGINAL_MSG) { + *buf = tipc_link_failover_rcv(dest_link, tunnel_buf); - /* Receive original message */ - if (dest_link->exp_msg_count == 0) { - pr_warn("%sgot too many tunnelled messages\n", link_co_err); - goto exit; - } - dest_link->exp_msg_count--; - if (less(msg_seqno(msg), dest_link->reset_checkpoint)) { - goto exit; - } else { - *buf = buf_extract(tunnel_buf, INT_H_SIZE); + /* Do we have a buffer/buffer chain to return? */ if (*buf != NULL) { kfree_skb(tunnel_buf); return 1; - } else { - pr_warn("%soriginal msg dropped\n", link_co_err); } } exit: From 3bb533800c698d5e8a8b01dbfc37e147260988f2 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:12 -0500 Subject: [PATCH 08/14] tipc: change signature of tunnelling reception function After the earlier commits in this series related to the function tipc_link_tunnel_rcv(), we can now go further and simplify its signature. The function now consumes all DUPLICATE packets, and only returns such ORIGINAL packets that are ready for immediate delivery, i.e., no more link level protocol processing needs to be done by the caller. As a consequence, the the caller, tipc_rcv(), does not access the link pointer after call return, and it becomes unnecessary to pass a link pointer reference in the call. Instead, we now only pass it the tunnel link's owner node, which is sufficient to find the destination link for the tunnelled packet. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 26a54f4f3c63..f9f90681a59d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -78,7 +78,7 @@ static const char *link_unk_evt = "Unknown link event "; static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf); static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, +static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); static int link_send_sections_long(struct tipc_port *sender, @@ -1597,7 +1597,7 @@ deliver: tipc_node_unlock(n_ptr); continue; case CHANGEOVER_PROTOCOL: - if (!tipc_link_tunnel_rcv(&l_ptr, &buf)) + if (!tipc_link_tunnel_rcv(n_ptr, &buf)) break; msg = buf_msg(buf); seq_no = msg_seqno(msg); @@ -2174,7 +2174,7 @@ exit: * returned to the active link for delivery upwards. * Owner node is locked. */ -static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, +static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf) { struct sk_buff *tunnel_buf = *buf; @@ -2186,15 +2186,9 @@ static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, if (bearer_id >= MAX_BEARERS) goto exit; - dest_link = (*l_ptr)->owner->links[bearer_id]; + dest_link = n_ptr->links[bearer_id]; if (!dest_link) goto exit; - if (dest_link == *l_ptr) { - pr_err("Unexpected changeover message on link <%s>\n", - (*l_ptr)->name); - goto exit; - } - *l_ptr = dest_link; if (msg_typ == DUPLICATE_MSG) { tipc_link_dup_rcv(dest_link, tunnel_buf); From 1e9d47a948f44af4bb040e10a3a852b6bc3d6a90 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:13 -0500 Subject: [PATCH 09/14] tipc: more cleanup of tunnelling reception function We simplify and slim down the code in function tipc_tunnel_rcv() No impact on the users of this function. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index f9f90681a59d..3136788799d8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2180,9 +2180,10 @@ static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff *tunnel_buf = *buf; struct tipc_link *dest_link; struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); - u32 msg_typ = msg_type(tunnel_msg); u32 bearer_id = msg_bearer_id(tunnel_msg); + *buf = NULL; + if (bearer_id >= MAX_BEARERS) goto exit; @@ -2190,24 +2191,16 @@ static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, if (!dest_link) goto exit; - if (msg_typ == DUPLICATE_MSG) { + if (msg_type(tunnel_msg) == DUPLICATE_MSG) tipc_link_dup_rcv(dest_link, tunnel_buf); - goto exit; - } - - if (msg_type(tunnel_msg) == ORIGINAL_MSG) { + else if (msg_type(tunnel_msg) == ORIGINAL_MSG) *buf = tipc_link_failover_rcv(dest_link, tunnel_buf); + else + pr_warn("%sunknown tunnel pkt received\n", link_co_err); - /* Do we have a buffer/buffer chain to return? */ - if (*buf != NULL) { - kfree_skb(tunnel_buf); - return 1; - } - } exit: - *buf = NULL; kfree_skb(tunnel_buf); - return 0; + return *buf != NULL; } /* From 02842f718d9d47950ec9045825679ec266ba532d Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:14 -0500 Subject: [PATCH 10/14] tipc: rename stack variables in function tipc_link_tunnel_rcv After the previous redesign of the tunnel reception algorithm and functions, we finalize it by renaming a couple of stack variables in tipc_tunnel_rcv(). This makes it more consistent with the naming scheme elsewhere in this part of the code. This change is purely cosmetic, with no functional changes. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 3136788799d8..b678c2e0080a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2177,29 +2177,29 @@ exit: static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf) { - struct sk_buff *tunnel_buf = *buf; - struct tipc_link *dest_link; - struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); - u32 bearer_id = msg_bearer_id(tunnel_msg); + struct sk_buff *t_buf = *buf; + struct tipc_link *l_ptr; + struct tipc_msg *t_msg = buf_msg(t_buf); + u32 bearer_id = msg_bearer_id(t_msg); *buf = NULL; if (bearer_id >= MAX_BEARERS) goto exit; - dest_link = n_ptr->links[bearer_id]; - if (!dest_link) + l_ptr = n_ptr->links[bearer_id]; + if (!l_ptr) goto exit; - if (msg_type(tunnel_msg) == DUPLICATE_MSG) - tipc_link_dup_rcv(dest_link, tunnel_buf); - else if (msg_type(tunnel_msg) == ORIGINAL_MSG) - *buf = tipc_link_failover_rcv(dest_link, tunnel_buf); + if (msg_type(t_msg) == DUPLICATE_MSG) + tipc_link_dup_rcv(l_ptr, t_buf); + else if (msg_type(t_msg) == ORIGINAL_MSG) + *buf = tipc_link_failover_rcv(l_ptr, t_buf); else pr_warn("%sunknown tunnel pkt received\n", link_co_err); exit: - kfree_skb(tunnel_buf); + kfree_skb(t_buf); return *buf != NULL; } From a5377831eb64c1b8a7b911dc79aec73a930e95da Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:15 -0500 Subject: [PATCH 11/14] tipc: changes to general packet reception algorithm We change the order of checking for destination users when processing incoming packets. By placing the checks for users that may potentially replace the processed buffer, i.e., CHANGEOVER_PROTOCOL and MSG_FRAGMENTER, in a separate step before we check for the true end users, we get rid of a label and a 'goto', at the same time making the code more comprehensible and easy to follow. This commit does not change any functionality, it is just a cosmetic code reshuffle. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 76 ++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index b678c2e0080a..663623c5896d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1484,7 +1484,7 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if ((n_ptr->block_setup & WAIT_PEER_DOWN) && msg_user(msg) == LINK_PROTOCOL && (msg_type(msg) == RESET_MSG || - msg_type(msg) == ACTIVATE_MSG) && + msg_type(msg) == ACTIVATE_MSG) && !msg_redundant_link(msg)) n_ptr->block_setup &= ~WAIT_PEER_DOWN; @@ -1503,7 +1503,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) while ((crs != l_ptr->next_out) && less_eq(buf_seqno(crs), ackd)) { struct sk_buff *next = crs->next; - kfree_skb(crs); crs = next; released++; @@ -1516,14 +1515,17 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) /* Try sending any messages link endpoint has pending */ if (unlikely(l_ptr->next_out)) tipc_link_push_queue(l_ptr); + if (unlikely(!list_empty(&l_ptr->waiting_ports))) tipc_link_wakeup_ports(l_ptr, 0); + if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { l_ptr->stats.sent_acks++; - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_send_proto_msg(l_ptr, STATE_MSG, + 0, 0, 0, 0, 0); } - /* Now (finally!) process the incoming message */ + /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { link_recv_proto_msg(l_ptr, buf); @@ -1555,14 +1557,40 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) l_ptr->next_in_no++; if (unlikely(l_ptr->oldest_deferred_in)) head = link_insert_deferred_queue(l_ptr, head); -deliver: - if (likely(msg_isdata(msg))) { + + /* Deliver packet/message to correct user: */ + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) { + if (!tipc_link_tunnel_rcv(n_ptr, &buf)) { + tipc_node_unlock(n_ptr); + continue; + } + msg = buf_msg(buf); + } else if (msg_user(msg) == MSG_FRAGMENTER) { + int rc; + + l_ptr->stats.recv_fragments++; + rc = tipc_link_frag_rcv(&l_ptr->reasm_head, + &l_ptr->reasm_tail, + &buf); + if (rc == LINK_REASM_COMPLETE) { + l_ptr->stats.recv_fragmented++; + msg = buf_msg(buf); + } else { + if (rc == LINK_REASM_ERROR) + tipc_link_reset(l_ptr); + tipc_node_unlock(n_ptr); + continue; + } + } + + switch (msg_user(msg)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: tipc_node_unlock(n_ptr); tipc_port_recv_msg(buf); continue; - } - switch (msg_user(msg)) { - int ret; case MSG_BUNDLER: l_ptr->stats.recv_bundles++; l_ptr->stats.recv_bundled += msg_msgcnt(msg); @@ -1574,44 +1602,20 @@ deliver: tipc_node_unlock(n_ptr); tipc_named_recv(buf); continue; - case BCAST_PROTOCOL: - tipc_link_recv_sync(n_ptr, buf); - tipc_node_unlock(n_ptr); - continue; case CONN_MANAGER: tipc_node_unlock(n_ptr); tipc_port_recv_proto_msg(buf); continue; - case MSG_FRAGMENTER: - l_ptr->stats.recv_fragments++; - ret = tipc_link_frag_rcv(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); - if (ret == LINK_REASM_COMPLETE) { - l_ptr->stats.recv_fragmented++; - msg = buf_msg(buf); - goto deliver; - } - if (ret == LINK_REASM_ERROR) - tipc_link_reset(l_ptr); - tipc_node_unlock(n_ptr); - continue; - case CHANGEOVER_PROTOCOL: - if (!tipc_link_tunnel_rcv(n_ptr, &buf)) - break; - msg = buf_msg(buf); - seq_no = msg_seqno(msg); - goto deliver; + case BCAST_PROTOCOL: + tipc_link_recv_sync(n_ptr, buf); + break; default: kfree_skb(buf); - buf = NULL; break; } tipc_node_unlock(n_ptr); - tipc_net_route_msg(buf); continue; unlock_discard: - tipc_node_unlock(n_ptr); discard: kfree_skb(buf); From 7d33939f475d403e79124e3143d7951dcfe8629f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:16 -0500 Subject: [PATCH 12/14] tipc: delay delete of link when failover is needed When a bearer is disabled, all its attached links are deleted. Ideally, we should do link failover to redundant links on other bearers, if there are any, in such cases. This would be consistent with current behavior when a link is reset, but not deleted. However, due to the complexity involved, and the (wrongly) perceived low demand for this feature, it was never implemented until now. We mark the doomed link for deletion with a new flag, but wait until the failover process is finished before we actually delete it. With the improved link tunnelling/failover code introduced earlier in this commit series, it is now easy to identify a spot in the code where the failover is finished and it is safe to delete the marked link. Moreover, the test for the flag and the deletion can be done synchronously, and outside the most time critical data path. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 12 ++++++------ net/tipc/link.c | 29 +++++++++++++++++++++-------- net/tipc/link.h | 2 +- net/tipc/node.c | 8 +++++++- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 4931eea65797..60caa45e5a41 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -51,7 +51,7 @@ static struct tipc_media * const media_info_array[] = { struct tipc_bearer tipc_bearers[MAX_BEARERS]; -static void bearer_disable(struct tipc_bearer *b_ptr); +static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); /** * tipc_media_find - locates specified media object by name @@ -331,7 +331,7 @@ restart: res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain); if (res) { - bearer_disable(b_ptr); + bearer_disable(b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); goto exit; @@ -363,14 +363,14 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds tipc_net_lock. */ -static void bearer_disable(struct tipc_bearer *b_ptr) +static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) { struct tipc_link_req *temp_req; pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); b_ptr->media->disable_media(b_ptr); - tipc_link_delete_list(b_ptr->identity); + tipc_link_delete_list(b_ptr->identity, shutting_down); temp_req = b_ptr->link_req; b_ptr->link_req = NULL; spin_unlock_bh(&b_ptr->lock); @@ -392,7 +392,7 @@ int tipc_disable_bearer(const char *name) pr_warn("Attempt to disable unknown bearer <%s>\n", name); res = -EINVAL; } else { - bearer_disable(b_ptr); + bearer_disable(b_ptr, false); res = 0; } write_unlock_bh(&tipc_net_lock); @@ -612,6 +612,6 @@ void tipc_bearer_stop(void) for (i = 0; i < MAX_BEARERS; i++) { if (tipc_bearers[i].active) - bearer_disable(&tipc_bearers[i]); + bearer_disable(&tipc_bearers[i], true); } } diff --git a/net/tipc/link.c b/net/tipc/link.c index 663623c5896d..03075165665e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -281,7 +281,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, } -void tipc_link_delete_list(unsigned int bearer_id) +void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) { struct tipc_link *l_ptr; struct tipc_node *n_ptr; @@ -291,12 +291,20 @@ void tipc_link_delete_list(unsigned int bearer_id) l_ptr = n_ptr->links[bearer_id]; if (l_ptr) { tipc_link_reset(l_ptr); - tipc_node_detach_link(n_ptr, l_ptr); - spin_unlock_bh(&n_ptr->lock); + if (shutting_down || !tipc_node_is_up(n_ptr)) { + tipc_node_detach_link(l_ptr->owner, l_ptr); + tipc_link_reset_fragments(l_ptr); + spin_unlock_bh(&n_ptr->lock); - /* Nobody else can access this link now: */ - del_timer_sync(&l_ptr->timer); - kfree(l_ptr); + /* Nobody else can access this link now: */ + del_timer_sync(&l_ptr->timer); + kfree(l_ptr); + } else { + /* Detach/delete when failover is finished: */ + l_ptr->flags |= LINK_STOPPED; + spin_unlock_bh(&n_ptr->lock); + del_timer_sync(&l_ptr->timer); + } continue; } spin_unlock_bh(&n_ptr->lock); @@ -481,6 +489,9 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) struct tipc_link *other; u32 cont_intv = l_ptr->continuity_interval; + if (l_ptr->flags & LINK_STOPPED) + return; + if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) return; /* Not yet. */ @@ -2167,8 +2178,11 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, &buf); } } - exit: + if ((l_ptr->exp_msg_count == 0) && (l_ptr->flags & LINK_STOPPED)) { + tipc_node_detach_link(l_ptr->owner, l_ptr); + kfree(l_ptr); + } return buf; } @@ -2201,7 +2215,6 @@ static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, *buf = tipc_link_failover_rcv(l_ptr, t_buf); else pr_warn("%sunknown tunnel pkt received\n", link_co_err); - exit: kfree_skb(t_buf); return *buf != NULL; diff --git a/net/tipc/link.h b/net/tipc/link.h index 3340fc1fc299..45b9cd071c41 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -214,7 +214,7 @@ struct tipc_port; struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); -void tipc_link_delete_list(unsigned int bearer_id); +void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_send_queue(struct tipc_link *l_ptr, struct tipc_link *dest); diff --git a/net/tipc/node.c b/net/tipc/node.c index efe4d41bf11b..833324b73fe1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -249,7 +249,13 @@ void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - n_ptr->links[l_ptr->b_ptr->identity] = NULL; + int i; + + for (i = 0; i < MAX_BEARERS; i++) { + if (l_ptr == n_ptr->links[i]) + break; + } + n_ptr->links[i] = NULL; atomic_dec(&tipc_num_links); n_ptr->link_cnt--; } From a83045292daf9f07d0b103e5715ef527123d2fcc Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 13 Feb 2014 17:29:17 -0500 Subject: [PATCH 13/14] tipc: remove bearer_lock from tipc_bearer struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the earlier commits ("tipc: remove 'links' list from tipc_bearer struct") and ("tipc: introduce new spinlock to protect struct link_req"), there is no longer any need to protect struct link_req or or any link list by use of bearer_lock. Furthermore, we have eliminated the need for using bearer_lock during downcalls (send) from the link to the bearer, since we have ensured that bearers always have a longer life cycle that their associated links, and always contain valid data. So, the only need now for a lock protecting bearers is for guaranteeing consistency of the bearer list itself. For this, it is sufficient, at least for the time being, to continue applying 'net_lockĀ“ in write mode. By removing bearer_lock we also pre-empt introduction of issue b) descibed in the previous commit "tipc: remove 'links' list from tipc_bearer struct": "b) When the outer protection from net_lock is gone, taking bearer_lock and node_lock in opposite order of method 1) and 2) will become an obvious deadlock hazard". Therefore, we now eliminate the bearer_lock spinlock. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 1 - net/tipc/bearer.c | 16 +++------------- net/tipc/bearer.h | 5 +---- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index af35f76c6b29..06a639c375f0 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -785,7 +785,6 @@ void tipc_bclink_init(void) bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - spin_lock_init(&bcbearer->bearer.lock); bcl->b_ptr = &bcbearer->bearer; bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 60caa45e5a41..242cddd35a47 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -327,7 +327,6 @@ restart: b_ptr->net_plane = bearer_id + 'A'; b_ptr->active = 1; b_ptr->priority = priority; - spin_lock_init(&b_ptr->lock); res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain); if (res) { @@ -351,9 +350,7 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); - spin_lock_bh(&b_ptr->lock); tipc_link_reset_list(b_ptr->identity); - spin_unlock_bh(&b_ptr->lock); read_unlock_bh(&tipc_net_lock); return 0; } @@ -365,19 +362,12 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) */ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) { - struct tipc_link_req *temp_req; - pr_info("Disabling bearer <%s>\n", b_ptr->name); - spin_lock_bh(&b_ptr->lock); b_ptr->media->disable_media(b_ptr); + tipc_link_delete_list(b_ptr->identity, shutting_down); - temp_req = b_ptr->link_req; - b_ptr->link_req = NULL; - spin_unlock_bh(&b_ptr->lock); - - if (temp_req) - tipc_disc_delete(temp_req); - + if (b_ptr->link_req) + tipc_disc_delete(b_ptr->link_req); memset(b_ptr, 0, sizeof(struct tipc_bearer)); } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 647cb1d2324a..425dd8107a8f 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -107,10 +107,8 @@ struct tipc_media { /** * struct tipc_bearer - Generic TIPC bearer structure - * @dev: ptr to associated network device - * @usr_handle: pointer to additional media-specific information about bearer + * @media_ptr: pointer to additional media-specific information about bearer * @mtu: max packet size bearer can support - * @lock: spinlock for controlling access to bearer * @addr: media-specific address associated with bearer * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer @@ -133,7 +131,6 @@ struct tipc_bearer { u32 mtu; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; - spinlock_t lock; struct tipc_media *media; struct tipc_media_addr bcast_addr; u32 priority; From e099e86c9e24fe9aff36773600543eb31d8954d1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 13 Feb 2014 17:29:18 -0500 Subject: [PATCH 14/14] tipc: add node_lock protection to link lookup function In an earlier commit, ("tipc: remove links list from bearer struct") we described three issues that need to be pre-emptively resolved before we can remove tipc_net_lock. Here we resolve issue a) described in that commit: "a) In access method #2, we access the link before taking the protecting node_lock. This will not work once net_lock is gone, so we will have to change the access order. We will deal with this in a later commit in this series." Here, we change that access order, by ensuring that the function link_find_link() returns only a safe reference for finding the link, i.e., a node pointer and an index into its 'links' array, not the link pointer itself. We also change all callers of this function to first take the node lock before they can check if there still is a valid link pointer at the returned index. Since the function now returns a node pointer rather than a link pointer, we rename it to the more appropriate 'tipc_link_find_owner(). Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 112 ++++++++++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 46 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 03075165665e..4fb4ae0a75ed 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2390,35 +2390,40 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) l_ptr->queue_limit[MSG_FRAGMENTER] = 4000; } -/** - * link_find_link - locate link by name - * @name: ptr to link name string - * @node: ptr to area to be filled with ptr to associated node - * +/* tipc_link_find_owner - locate owner node of link by link's name + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted; * this also prevents link deletion. * - * Returns pointer to link (or 0 if invalid link name). + * Returns pointer to node owning the link, or 0 if no matching link is found. */ -static struct tipc_link *link_find_link(const char *name, - struct tipc_node **node) +static struct tipc_node *tipc_link_find_owner(const char *link_name, + unsigned int *bearer_id) { struct tipc_link *l_ptr; struct tipc_node *n_ptr; + struct tipc_node *tmp_n_ptr; + struct tipc_node *found_node = 0; + int i; - list_for_each_entry(n_ptr, &tipc_node_list, list) { + *bearer_id = 0; + list_for_each_entry_safe(n_ptr, tmp_n_ptr, &tipc_node_list, list) { + spin_lock(&n_ptr->lock); for (i = 0; i < MAX_BEARERS; i++) { l_ptr = n_ptr->links[i]; - if (l_ptr && !strcmp(l_ptr->name, name)) - goto found; + if (l_ptr && !strcmp(l_ptr->name, link_name)) { + *bearer_id = i; + found_node = n_ptr; + break; + } } + spin_unlock(&n_ptr->lock); + if (found_node) + break; } - l_ptr = NULL; - n_ptr = NULL; -found: - *node = n_ptr; - return l_ptr; + return found_node; } /** @@ -2460,32 +2465,33 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) struct tipc_link *l_ptr; struct tipc_bearer *b_ptr; struct tipc_media *m_ptr; + int bearer_id; int res = 0; - l_ptr = link_find_link(name, &node); - if (l_ptr) { - /* - * acquire node lock for tipc_link_send_proto_msg(). - * see "TIPC locking policy" in net.c. - */ + node = tipc_link_find_owner(name, &bearer_id); + if (node) { tipc_node_lock(node); - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - link_set_supervision_props(l_ptr, new_value); - tipc_link_send_proto_msg(l_ptr, - STATE_MSG, 0, 0, new_value, 0, 0); - break; - case TIPC_CMD_SET_LINK_PRI: - l_ptr->priority = new_value; - tipc_link_send_proto_msg(l_ptr, - STATE_MSG, 0, 0, 0, new_value, 0); - break; - case TIPC_CMD_SET_LINK_WINDOW: - tipc_link_set_queue_limits(l_ptr, new_value); - break; - default: - res = -EINVAL; - break; + l_ptr = node->links[bearer_id]; + + if (l_ptr) { + switch (cmd) { + case TIPC_CMD_SET_LINK_TOL: + link_set_supervision_props(l_ptr, new_value); + tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, + 0, new_value, 0, 0); + break; + case TIPC_CMD_SET_LINK_PRI: + l_ptr->priority = new_value; + tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, + 0, 0, new_value, 0); + break; + case TIPC_CMD_SET_LINK_WINDOW: + tipc_link_set_queue_limits(l_ptr, new_value); + break; + default: + res = -EINVAL; + break; + } } tipc_node_unlock(node); return res; @@ -2580,6 +2586,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ char *link_name; struct tipc_link *l_ptr; struct tipc_node *node; + unsigned int bearer_id; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -2590,15 +2597,19 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ return tipc_cfg_reply_error_string("link not found"); return tipc_cfg_reply_none(); } - read_lock_bh(&tipc_net_lock); - l_ptr = link_find_link(link_name, &node); - if (!l_ptr) { + node = tipc_link_find_owner(link_name, &bearer_id); + if (!node) { + read_unlock_bh(&tipc_net_lock); + return tipc_cfg_reply_error_string("link not found"); + } + spin_lock(&node->lock); + l_ptr = node->links[bearer_id]; + if (!l_ptr) { + tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string("link not found"); } - - tipc_node_lock(node); link_reset_statistics(l_ptr); tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); @@ -2628,18 +2639,27 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) struct tipc_node *node; char *status; u32 profile_total = 0; + unsigned int bearer_id; int ret; if (!strcmp(name, tipc_bclink_name)) return tipc_bclink_stats(buf, buf_size); read_lock_bh(&tipc_net_lock); - l = link_find_link(name, &node); - if (!l) { + node = tipc_link_find_owner(name, &bearer_id); + if (!node) { read_unlock_bh(&tipc_net_lock); return 0; } tipc_node_lock(node); + + l = node->links[bearer_id]; + if (!l) { + tipc_node_unlock(node); + read_unlock_bh(&tipc_net_lock); + return 0; + } + s = &l->stats; if (tipc_link_is_active(l))