linux_old1/net/tipc/bearer.c

643 lines
16 KiB
C
Raw Normal View History

/*
* net/tipc/bearer.c: TIPC bearer code
*
* Copyright (c) 1996-2006, 2013, Ericsson AB
* Copyright (c) 2004-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "core.h"
#include "config.h"
#include "bearer.h"
#include "discover.h"
tipc: add InfiniBand media type Add InfiniBand media type based on the ethernet media type. The only real difference is that in case of InfiniBand, we need the entire 20 bytes of space reserved for media addresses, so the TIPC media type ID is not explicitly stored in the packet payload. Sample output of tipc-config: # tipc-config -v -addr -netid -nt=all -p -m -b -n -ls node address: <10.1.4> current network id: 4711 Type Lower Upper Port Identity Publication Scope 0 167776257 167776257 <10.1.1:1855512577> 1855512578 cluster 167776260 167776260 <10.1.4:1216454657> 1216454658 zone 1 1 1 <10.1.4:1216479235> 1216479236 node Ports: 1216479235: bound to {1,1} 1216454657: bound to {0,167776260} Media: eth ib Bearers: ib:ib0 Nodes known: <10.1.1>: up Link <broadcast-link> Window:20 packets RX packets:0 fragments:0/0 bundles:0/0 TX packets:0 fragments:0/0 bundles:0/0 RX naks:0 defs:0 dups:0 TX naks:0 acks:0 dups:0 Congestion bearer:0 link:0 Send queue max:0 avg:0 Link <10.1.4:ib0-10.1.1:ib0> ACTIVE MTU:2044 Priority:10 Tolerance:1500 ms Window:50 packets RX packets:80 fragments:0/0 bundles:0/0 TX packets:40 fragments:0/0 bundles:0/0 TX profile sample:22 packets average:54 octets 0-64:100% -256:0% -1024:0% -4096:0% -16384:0% -32768:0% -66000:0% RX states:410 probes:213 naks:0 defs:0 dups:0 TX states:410 probes:197 naks:0 acks:0 dups:0 Congestion bearer:0 link:0 Send queue max:1 avg:0 Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-04-17 14:18:28 +08:00
#define MAX_ADDR_STR 60
static struct tipc_media * const media_info_array[] = {
&eth_media_info,
#ifdef CONFIG_TIPC_MEDIA_IB
&ib_media_info,
#endif
NULL
};
struct tipc_bearer *bearer_list[MAX_BEARERS + 1];
static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down);
/**
* tipc_media_find - locates specified media object by name
*/
struct tipc_media *tipc_media_find(const char *name)
{
u32 i;
for (i = 0; media_info_array[i] != NULL; i++) {
if (!strcmp(media_info_array[i]->name, name))
break;
}
return media_info_array[i];
}
/**
* media_find_id - locates specified media object by type identifier
*/
static struct tipc_media *media_find_id(u8 type)
{
u32 i;
for (i = 0; media_info_array[i] != NULL; i++) {
if (media_info_array[i]->type_id == type)
break;
}
return media_info_array[i];
}
/**
* tipc_media_addr_printf - record media address in print buffer
*/
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 12:50:23 +08:00
void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
{
char addr_str[MAX_ADDR_STR];
struct tipc_media *m_ptr;
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 12:50:23 +08:00
int ret;
m_ptr = media_find_id(a->media_id);
if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str)))
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 12:50:23 +08:00
ret = tipc_snprintf(buf, len, "%s(%s)", m_ptr->name, addr_str);
else {
u32 i;
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 12:50:23 +08:00
ret = tipc_snprintf(buf, len, "UNKNOWN(%u)", a->media_id);
for (i = 0; i < sizeof(a->value); i++)
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 12:50:23 +08:00
ret += tipc_snprintf(buf - ret, len + ret,
"-%02x", a->value[i]);
}
}
/**
* tipc_media_get_names - record names of registered media in buffer
*/
struct sk_buff *tipc_media_get_names(void)
{
struct sk_buff *buf;
int i;
buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME));
if (!buf)
return NULL;
for (i = 0; media_info_array[i] != NULL; i++) {
tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME,
media_info_array[i]->name,
strlen(media_info_array[i]->name) + 1);
}
return buf;
}
/**
* bearer_name_validate - validate & (optionally) deconstruct bearer name
* @name: ptr to bearer name string
* @name_parts: ptr to area for bearer name components (or NULL if not needed)
*
* Returns 1 if bearer name is valid, otherwise 0.
*/
static int bearer_name_validate(const char *name,
struct tipc_bearer_names *name_parts)
{
char name_copy[TIPC_MAX_BEARER_NAME];
char *media_name;
char *if_name;
u32 media_len;
u32 if_len;
/* copy bearer name & ensure length is OK */
name_copy[TIPC_MAX_BEARER_NAME - 1] = 0;
/* need above in case non-Posix strncpy() doesn't pad with nulls */
strncpy(name_copy, name, TIPC_MAX_BEARER_NAME);
if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0)
return 0;
/* ensure all component parts of bearer name are present */
media_name = name_copy;
if_name = strchr(media_name, ':');
if (if_name == NULL)
return 0;
*(if_name++) = 0;
media_len = if_name - media_name;
if_len = strlen(if_name) + 1;
/* validate component parts of bearer name */
if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) ||
(if_len <= 1) || (if_len > TIPC_MAX_IF_NAME))
return 0;
/* return bearer name components, if necessary */
if (name_parts) {
strcpy(name_parts->media_name, media_name);
strcpy(name_parts->if_name, if_name);
}
return 1;
}
/**
* tipc_bearer_find - locates bearer object with matching bearer name
*/
struct tipc_bearer *tipc_bearer_find(const char *name)
{
struct tipc_bearer *b_ptr;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
b_ptr = bearer_list[i];
if (b_ptr && (!strcmp(b_ptr->name, name)))
return b_ptr;
}
return NULL;
}
/**
* tipc_bearer_get_names - record names of bearers in buffer
*/
struct sk_buff *tipc_bearer_get_names(void)
{
struct sk_buff *buf;
struct tipc_bearer *b;
int i, j;
buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
if (!buf)
return NULL;
read_lock_bh(&tipc_net_lock);
for (i = 0; media_info_array[i] != NULL; i++) {
for (j = 0; j < MAX_BEARERS; j++) {
b = bearer_list[j];
if (!b)
continue;
if (b->media == media_info_array[i]) {
tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
b->name,
strlen(b->name) + 1);
}
}
}
read_unlock_bh(&tipc_net_lock);
return buf;
}
void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
{
tipc_nmap_add(&b_ptr->nodes, dest);
tipc_bcbearer_sort();
tipc_disc_add_dest(b_ptr->link_req);
}
void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
{
tipc_nmap_remove(&b_ptr->nodes, dest);
tipc_bcbearer_sort();
tipc_disc_remove_dest(b_ptr->link_req);
}
/**
* tipc_enable_bearer - enable bearer with the given name
*/
int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
{
struct tipc_bearer *b_ptr;
struct tipc_media *m_ptr;
struct tipc_bearer_names b_names;
char addr_string[16];
u32 bearer_id;
u32 with_this_prio;
u32 i;
int res = -EINVAL;
if (!tipc_own_addr) {
pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
name);
return -ENOPROTOOPT;
}
if (!bearer_name_validate(name, &b_names)) {
pr_warn("Bearer <%s> rejected, illegal name\n", name);
return -EINVAL;
}
if (tipc_addr_domain_valid(disc_domain) &&
(disc_domain != tipc_own_addr)) {
if (tipc_in_scope(disc_domain, tipc_own_addr)) {
disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK;
res = 0; /* accept any node in own cluster */
} else if (in_own_cluster_exact(disc_domain))
res = 0; /* accept specified node in own cluster */
}
if (res) {
pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
name);
return -EINVAL;
}
if ((priority > TIPC_MAX_LINK_PRI) &&
(priority != TIPC_MEDIA_LINK_PRI)) {
pr_warn("Bearer <%s> rejected, illegal priority\n", name);
return -EINVAL;
}
write_lock_bh(&tipc_net_lock);
m_ptr = tipc_media_find(b_names.media_name);
if (!m_ptr) {
pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
name, b_names.media_name);
goto exit;
}
if (priority == TIPC_MEDIA_LINK_PRI)
priority = m_ptr->priority;
restart:
bearer_id = MAX_BEARERS;
with_this_prio = 1;
for (i = MAX_BEARERS; i-- != 0; ) {
b_ptr = bearer_list[i];
if (!b_ptr) {
bearer_id = i;
continue;
}
if (!strcmp(name, b_ptr->name)) {
pr_warn("Bearer <%s> rejected, already enabled\n",
name);
goto exit;
}
if ((b_ptr->priority == priority) &&
(++with_this_prio > 2)) {
if (priority-- == 0) {
pr_warn("Bearer <%s> rejected, duplicate priority\n",
name);
goto exit;
}
pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
name, priority + 1, priority);
goto restart;
}
}
if (bearer_id >= MAX_BEARERS) {
pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
name, MAX_BEARERS);
goto exit;
}
b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC);
if (!b_ptr) {
res = -ENOMEM;
goto exit;
}
strcpy(b_ptr->name, name);
b_ptr->media = m_ptr;
res = m_ptr->enable_media(b_ptr);
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
goto exit;
}
b_ptr->identity = bearer_id;
b_ptr->tolerance = m_ptr->tolerance;
b_ptr->window = m_ptr->window;
b_ptr->domain = disc_domain;
b_ptr->net_plane = bearer_id + 'A';
b_ptr->priority = priority;
res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr);
if (res) {
bearer_disable(b_ptr, false);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
name);
goto exit;
}
bearer_list[bearer_id] = b_ptr;
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
tipc_addr_string_fill(addr_string, disc_domain), priority);
exit:
write_unlock_bh(&tipc_net_lock);
return res;
}
/**
tipc: remove interface state mirroring in bearer struct 'tipc_bearer' is a generic representation of the underlying media type, and exists in a one-to-one relationship to each interface TIPC is using. The struct contains a 'blocked' flag that mirrors the operational and execution state of the represented interface, and is updated through notification calls from the latter. The users of tipc_bearer are checking this flag before each attempt to send a packet via the interface. This state mirroring serves no purpose in the current code base. TIPC links will not discover a media failure any faster through this mechanism, and in reality the flag only adds overhead at packet sending and reception. Furthermore, the fact that the flag needs to be protected by a spinlock aggregated into tipc_bearer has turned out to cause a serious and completely unnecessary deadlock problem. CPU0 CPU1 ---- ---- Time 0: bearer_disable() link_timeout() Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue() Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr) Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock) Time 4: del_timer_sync(&req->timer) I.e., del_timer_sync() on CPU0 never returns, because the timer handler on CPU1 is waiting for the bearer lock. We eliminate the 'blocked' flag from struct tipc_bearer, along with all tests on this flag. This not only resolves the deadlock, but also simplifies and speeds up the data path execution of TIPC. It also fits well into our ongoing effort to make the locking policy simpler and more manageable. An effect of this change is that we can get rid of functions such as tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer(). We replace the latter with a new function, tipc_reset_bearer(), which resets all links associated to the bearer immediately after an interface goes down. A user might notice one slight change in link behaviour after this change. When an interface goes down, (e.g. through a NETDEV_DOWN event) all attached links will be reset immediately, instead of leaving it to each link to detect the failure through a timer-driven mechanism. We consider this an improvement, and see no obvious risks with the new behavior. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 23:08:00 +08:00
* tipc_reset_bearer - Reset all links established over this bearer
*/
static int tipc_reset_bearer(struct tipc_bearer *b_ptr)
{
read_lock_bh(&tipc_net_lock);
tipc: remove interface state mirroring in bearer struct 'tipc_bearer' is a generic representation of the underlying media type, and exists in a one-to-one relationship to each interface TIPC is using. The struct contains a 'blocked' flag that mirrors the operational and execution state of the represented interface, and is updated through notification calls from the latter. The users of tipc_bearer are checking this flag before each attempt to send a packet via the interface. This state mirroring serves no purpose in the current code base. TIPC links will not discover a media failure any faster through this mechanism, and in reality the flag only adds overhead at packet sending and reception. Furthermore, the fact that the flag needs to be protected by a spinlock aggregated into tipc_bearer has turned out to cause a serious and completely unnecessary deadlock problem. CPU0 CPU1 ---- ---- Time 0: bearer_disable() link_timeout() Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue() Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr) Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock) Time 4: del_timer_sync(&req->timer) I.e., del_timer_sync() on CPU0 never returns, because the timer handler on CPU1 is waiting for the bearer lock. We eliminate the 'blocked' flag from struct tipc_bearer, along with all tests on this flag. This not only resolves the deadlock, but also simplifies and speeds up the data path execution of TIPC. It also fits well into our ongoing effort to make the locking policy simpler and more manageable. An effect of this change is that we can get rid of functions such as tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer(). We replace the latter with a new function, tipc_reset_bearer(), which resets all links associated to the bearer immediately after an interface goes down. A user might notice one slight change in link behaviour after this change. When an interface goes down, (e.g. through a NETDEV_DOWN event) all attached links will be reset immediately, instead of leaving it to each link to detect the failure through a timer-driven mechanism. We consider this an improvement, and see no obvious risks with the new behavior. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 23:08:00 +08:00
pr_info("Resetting bearer <%s>\n", b_ptr->name);
tipc_disc_delete(b_ptr->link_req);
tipc: remove 'links' list from tipc_bearer struct In our ongoing effort to simplify the TIPC locking structure, we see a need to remove the linked list for tipc_links in the bearer. This can be explained as follows. Currently, we have three different ways to access a link, via three different lists/tables: 1: Via a node hash table: Used by the time-critical outgoing/incoming data paths. (e.g. link_send_sections_fast() and tipc_recv_msg() ): grab net_lock(read) find node from node hash table grab node_lock select link grab bearer_lock send_msg() release bearer_lock release node lock release net_lock 2: Via a global linked list for nodes: Used by configuration commands (link_cmd_set_value()) grab net_lock(read) find node and link from global node list (using link name) grab node_lock update link release node lock release net_lock (Same locking order as above. No problem.) 3: Via the bearer's linked link list: Used by notifications from interface (e.g. tipc_disable_bearer() ) grab net_lock(write) grab bearer_lock get link ptr from bearer's link list get node from link grab node_lock delete link release node lock release bearer_lock release net_lock (Different order from above, but works because we grab the outer net_lock in write mode first, excluding all other access.) The first major goal in our simplification effort is to get rid of the "big" net_lock, replacing it with rcu-locks when accessing the node list and node hash array. This will come in a later patch series. But to get there we first need to rewrite access methods ##2 and 3, since removal of net_lock would introduce three major problems: a) In access method #2, we access the link before taking the protecting node_lock. This will not work once net_lock is gone, so we will have to change the access order. We will deal with this in a later commit in this series, "tipc: add node lock protection to link found by link_find_link()". b) When the outer protection from net_lock is gone, taking bearer_lock and node_lock in opposite order of method 1) and 2) will become an obvious deadlock hazard. This is fixed in the commit ("tipc: remove bearer_lock from tipc_bearer struct") later in this series. c) Similar to what is described in problem a), access method #3 starts with using a link pointer that is unprotected by node_lock, in order to via that pointer find the correct node struct and lock it. Before we remove net_lock, this access order must be altered. This is what we do with this commit. We can avoid introducing problem problem c) by even here using the global node list to find the node, before accessing its links. When we loop though the node list we use the own bearer identity as search criteria, thus easily finding the links that are associated to the resetting/disabling bearer. It should be noted that although this method is somewhat slower than the current list traversal, it is in no way time critical. This is only about resetting or deleting links, something that must be considered relatively infrequent events. As a bonus, we can get rid of the mutual pointers between links and bearers. After this commit, pointer dependency go in one direction only: from the link to the bearer. This commit pre-empts introduction of problem c) as described above. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 06:29:09 +08:00
tipc_link_reset_list(b_ptr->identity);
tipc_disc_create(b_ptr, &b_ptr->bcast_addr);
read_unlock_bh(&tipc_net_lock);
return 0;
}
/**
* bearer_disable
*
* Note: This routine assumes caller holds tipc_net_lock.
*/
static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
{
u32 i;
pr_info("Disabling bearer <%s>\n", b_ptr->name);
b_ptr->media->disable_media(b_ptr);
tipc: avoid possible deadlock while enable and disable bearer We met lockdep warning when enable and disable the bearer for commands such as: tipc-config -netid=1234 -addr=1.1.3 -be=eth:eth0 tipc-config -netid=1234 -addr=1.1.3 -bd=eth:eth0 --------------------------------------------------- [ 327.693595] ====================================================== [ 327.693994] [ INFO: possible circular locking dependency detected ] [ 327.694519] 3.11.0-rc3-wwd-default #4 Tainted: G O [ 327.694882] ------------------------------------------------------- [ 327.695385] tipc-config/5825 is trying to acquire lock: [ 327.695754] (((timer))#2){+.-...}, at: [<ffffffff8105be80>] del_timer_sync+0x0/0xd0 [ 327.696018] [ 327.696018] but task is already holding lock: [ 327.696018] (&(&b_ptr->lock)->rlock){+.-...}, at: [<ffffffffa02be58d>] bearer_disable+ 0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] which lock already depends on the new lock. [ 327.696018] [ 327.696018] [ 327.696018] the existing dependency chain (in reverse order) is: [ 327.696018] [ 327.696018] -> #1 (&(&b_ptr->lock)->rlock){+.-...}: [ 327.696018] [<ffffffff810b3b4d>] validate_chain+0x6dd/0x870 [ 327.696018] [<ffffffff810b40bb>] __lock_acquire+0x3db/0x670 [ 327.696018] [<ffffffff810b4453>] lock_acquire+0x103/0x130 [ 327.696018] [<ffffffff814d65b1>] _raw_spin_lock_bh+0x41/0x80 [ 327.696018] [<ffffffffa02c5d48>] disc_timeout+0x18/0xd0 [tipc] [ 327.696018] [<ffffffff8105b92a>] call_timer_fn+0xda/0x1e0 [ 327.696018] [<ffffffff8105bcd7>] run_timer_softirq+0x2a7/0x2d0 [ 327.696018] [<ffffffff8105379a>] __do_softirq+0x16a/0x2e0 [ 327.696018] [<ffffffff81053a35>] irq_exit+0xd5/0xe0 [ 327.696018] [<ffffffff81033005>] smp_apic_timer_interrupt+0x45/0x60 [ 327.696018] [<ffffffff814df4af>] apic_timer_interrupt+0x6f/0x80 [ 327.696018] [<ffffffff8100b70e>] arch_cpu_idle+0x1e/0x30 [ 327.696018] [<ffffffff810a039d>] cpu_idle_loop+0x1fd/0x280 [ 327.696018] [<ffffffff810a043e>] cpu_startup_entry+0x1e/0x20 [ 327.696018] [<ffffffff81031589>] start_secondary+0x89/0x90 [ 327.696018] [ 327.696018] -> #0 (((timer))#2){+.-...}: [ 327.696018] [<ffffffff810b33fe>] check_prev_add+0x43e/0x4b0 [ 327.696018] [<ffffffff810b3b4d>] validate_chain+0x6dd/0x870 [ 327.696018] [<ffffffff810b40bb>] __lock_acquire+0x3db/0x670 [ 327.696018] [<ffffffff810b4453>] lock_acquire+0x103/0x130 [ 327.696018] [<ffffffff8105bebd>] del_timer_sync+0x3d/0xd0 [ 327.696018] [<ffffffffa02c5855>] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [<ffffffffa02be59f>] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [<ffffffffa02be74f>] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [<ffffffffa02bfb32>] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [<ffffffffa02c8c79>] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [<ffffffff8143e898>] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [<ffffffff8143ed30>] genl_rcv_msg+0x70/0xd0 [ 327.696018] [<ffffffff8143d4c9>] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [<ffffffff8143e617>] genl_rcv+0x27/0x40 [ 327.696018] [<ffffffff8143d21e>] netlink_unicast+0x15e/0x1b0 [ 327.696018] [<ffffffff8143ddcf>] netlink_sendmsg+0x22f/0x400 [ 327.696018] [<ffffffff813f7836>] __sock_sendmsg+0x66/0x80 [ 327.696018] [<ffffffff813f7957>] sock_aio_write+0x107/0x120 [ 327.696018] [<ffffffff8117f76d>] do_sync_write+0x7d/0xc0 [ 327.696018] [<ffffffff8117fc56>] vfs_write+0x186/0x190 [ 327.696018] [<ffffffff811803e0>] SyS_write+0x60/0xb0 [ 327.696018] [<ffffffff814de852>] system_call_fastpath+0x16/0x1b [ 327.696018] [ 327.696018] other info that might help us debug this: [ 327.696018] [ 327.696018] Possible unsafe locking scenario: [ 327.696018] [ 327.696018] CPU0 CPU1 [ 327.696018] ---- ---- [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] [ 327.696018] *** DEADLOCK *** [ 327.696018] [ 327.696018] 5 locks held by tipc-config/5825: [ 327.696018] #0: (cb_lock){++++++}, at: [<ffffffff8143e608>] genl_rcv+0x18/0x40 [ 327.696018] #1: (genl_mutex){+.+.+.}, at: [<ffffffff8143ed66>] genl_rcv_msg+0xa6/0xd0 [ 327.696018] #2: (config_mutex){+.+.+.}, at: [<ffffffffa02bf889>] tipc_cfg_do_cmd+0x39/ 0x550 [tipc] [ 327.696018] #3: (tipc_net_lock){++.-..}, at: [<ffffffffa02be738>] tipc_disable_bearer+ 0x18/0x60 [tipc] [ 327.696018] #4: (&(&b_ptr->lock)->rlock){+.-...}, at: [<ffffffffa02be58d>] bearer_disable+0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] stack backtrace: [ 327.696018] CPU: 2 PID: 5825 Comm: tipc-config Tainted: G O 3.11.0-rc3-wwd- default #4 [ 327.696018] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 327.696018] 00000000ffffffff ffff880037fa77a8 ffffffff814d03dd 0000000000000000 [ 327.696018] ffff880037fa7808 ffff880037fa77e8 ffffffff810b1c4f 0000000037fa77e8 [ 327.696018] ffff880037fa7808 ffff880037e4db40 0000000000000000 ffff880037e4e318 [ 327.696018] Call Trace: [ 327.696018] [<ffffffff814d03dd>] dump_stack+0x4d/0xa0 [ 327.696018] [<ffffffff810b1c4f>] print_circular_bug+0x10f/0x120 [ 327.696018] [<ffffffff810b33fe>] check_prev_add+0x43e/0x4b0 [ 327.696018] [<ffffffff810b3b4d>] validate_chain+0x6dd/0x870 [ 327.696018] [<ffffffff81087a28>] ? sched_clock_cpu+0xd8/0x110 [ 327.696018] [<ffffffff810b40bb>] __lock_acquire+0x3db/0x670 [ 327.696018] [<ffffffff810b4453>] lock_acquire+0x103/0x130 [ 327.696018] [<ffffffff8105be80>] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [<ffffffff8105bebd>] del_timer_sync+0x3d/0xd0 [ 327.696018] [<ffffffff8105be80>] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [<ffffffffa02c5855>] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [<ffffffffa02be59f>] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [<ffffffffa02be74f>] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [<ffffffffa02bfb32>] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [<ffffffff81218783>] ? security_capable+0x13/0x20 [ 327.696018] [<ffffffffa02c8c79>] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [<ffffffff8143e898>] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [<ffffffff8143ed30>] genl_rcv_msg+0x70/0xd0 [ 327.696018] [<ffffffff8143ecc0>] ? genl_lock+0x20/0x20 [ 327.696018] [<ffffffff8143d4c9>] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [<ffffffff8143e608>] ? genl_rcv+0x18/0x40 [ 327.696018] [<ffffffff8143e617>] genl_rcv+0x27/0x40 [ 327.696018] [<ffffffff8143d21e>] netlink_unicast+0x15e/0x1b0 [ 327.696018] [<ffffffff81289d7c>] ? memcpy_fromiovec+0x6c/0x90 [ 327.696018] [<ffffffff8143ddcf>] netlink_sendmsg+0x22f/0x400 [ 327.696018] [<ffffffff813f7836>] __sock_sendmsg+0x66/0x80 [ 327.696018] [<ffffffff813f7957>] sock_aio_write+0x107/0x120 [ 327.696018] [<ffffffff813fe29c>] ? release_sock+0x8c/0xa0 [ 327.696018] [<ffffffff8117f76d>] do_sync_write+0x7d/0xc0 [ 327.696018] [<ffffffff8117fa24>] ? rw_verify_area+0x54/0x100 [ 327.696018] [<ffffffff8117fc56>] vfs_write+0x186/0x190 [ 327.696018] [<ffffffff811803e0>] SyS_write+0x60/0xb0 [ 327.696018] [<ffffffff814de852>] system_call_fastpath+0x16/0x1b ----------------------------------------------------------------------- The problem is that the tipc_link_delete() will cancel the timer disc_timeout() when the b_ptr->lock is hold, but the disc_timeout() still call b_ptr->lock to finish the work, so the dead lock occurs. We should unlock the b_ptr->lock when del the disc_timeout(). Remove link_timeout() still met the same problem, the patch: http://article.gmane.org/gmane.network.tipc.general/4380 fix the problem, so no need to send patch for fix link_timeout() deadlock warming. Signed-off-by: Wang Weidong <wangweidong1@huawei.com> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-09 17:12:58 +08:00
tipc_link_delete_list(b_ptr->identity, shutting_down);
if (b_ptr->link_req)
tipc_disc_delete(b_ptr->link_req);
for (i = 0; i < MAX_BEARERS; i++) {
if (b_ptr == bearer_list[i]) {
bearer_list[i] = NULL;
break;
}
}
kfree(b_ptr);
}
int tipc_disable_bearer(const char *name)
{
struct tipc_bearer *b_ptr;
int res;
write_lock_bh(&tipc_net_lock);
b_ptr = tipc_bearer_find(name);
if (b_ptr == NULL) {
pr_warn("Attempt to disable unknown bearer <%s>\n", name);
res = -EINVAL;
} else {
bearer_disable(b_ptr, false);
res = 0;
}
write_unlock_bh(&tipc_net_lock);
return res;
}
/* tipc_l2_media_addr_set - initialize Ethernet media address structure
*
* Media-dependent "value" field stores MAC address in first 6 bytes
* and zeroes out the remaining bytes.
*/
void tipc_l2_media_addr_set(const struct tipc_bearer *b,
struct tipc_media_addr *a, char *mac)
{
int len = b->media->hwaddr_len;
if (unlikely(sizeof(a->value) < len)) {
WARN_ONCE(1, "Media length invalid\n");
return;
}
memcpy(a->value, mac, len);
memset(a->value + len, 0, sizeof(a->value) - len);
a->media_id = b->media->type_id;
a->broadcast = !memcmp(mac, b->bcast_addr.value, len);
}
int tipc_enable_l2_media(struct tipc_bearer *b)
{
struct net_device *dev;
char *driver_name = strchr((const char *)b->name, ':') + 1;
/* Find device with specified name */
dev = dev_get_by_name(&init_net, driver_name);
if (!dev)
return -ENODEV;
/* Associate TIPC bearer with Ethernet bearer */
b->media_ptr = dev;
memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value));
memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
b->bcast_addr.media_id = b->media->type_id;
b->bcast_addr.broadcast = 1;
b->mtu = dev->mtu;
tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr);
rcu_assign_pointer(dev->tipc_ptr, b);
return 0;
}
/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface
*
* Mark Ethernet bearer as inactive so that incoming buffers are thrown away,
* then get worker thread to complete bearer cleanup. (Can't do cleanup
* here because cleanup code needs to sleep and caller holds spinlocks.)
*/
void tipc_disable_l2_media(struct tipc_bearer *b)
{
struct net_device *dev = (struct net_device *)b->media_ptr;
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
dev_put(dev);
}
/**
* tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface
* @buf: the packet to be sent
* @b_ptr: the bearer through which the packet is to be sent
* @dest: peer destination address
*/
int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
struct tipc_media_addr *dest)
{
struct sk_buff *clone;
int delta;
struct net_device *dev = (struct net_device *)b->media_ptr;
clone = skb_clone(buf, GFP_ATOMIC);
if (!clone)
return 0;
delta = dev->hard_header_len - skb_headroom(buf);
if ((delta > 0) &&
pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
kfree_skb(clone);
return 0;
}
skb_reset_network_header(clone);
clone->dev = dev;
clone->protocol = htons(ETH_P_TIPC);
dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
dev->dev_addr, clone->len);
dev_queue_xmit(clone);
return 0;
}
/* tipc_bearer_send- sends buffer to destination over bearer
*
* IMPORTANT:
* The media send routine must not alter the buffer being passed in
* as it may be needed for later retransmission!
*/
void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
struct tipc_media_addr *dest)
{
b->media->send_msg(buf, b, dest);
}
/**
* tipc_l2_rcv_msg - handle incoming TIPC message from an interface
* @buf: the received packet
* @dev: the net device that the packet was received on
* @pt: the packet_type structure which was used to register this handler
* @orig_dev: the original receive net device in case the device is a bond
*
* Accept only packets explicitly sent to this node, or broadcast packets;
* ignores packets sent using interface multicast, and traffic sent to other
* nodes (which can happen if interface is running in promiscuous mode).
*/
static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct tipc_bearer *b_ptr;
if (!net_eq(dev_net(dev), &init_net)) {
kfree_skb(buf);
return NET_RX_DROP;
}
rcu_read_lock();
b_ptr = rcu_dereference(dev->tipc_ptr);
if (likely(b_ptr)) {
if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
buf->next = NULL;
tipc_rcv(buf, b_ptr);
rcu_read_unlock();
return NET_RX_SUCCESS;
}
}
rcu_read_unlock();
kfree_skb(buf);
return NET_RX_DROP;
}
/**
* tipc_l2_device_event - handle device events from network device
* @nb: the context of the notification
* @evt: the type of event
* @ptr: the net device that the event was on
*
* This function is called by the Ethernet driver in case of link
* change event.
*/
static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
void *ptr)
{
struct tipc_bearer *b_ptr;
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
rcu_read_lock();
b_ptr = rcu_dereference(dev->tipc_ptr);
if (!b_ptr) {
rcu_read_unlock();
return NOTIFY_DONE;
}
b_ptr->mtu = dev->mtu;
switch (evt) {
case NETDEV_CHANGE:
if (netif_carrier_ok(dev))
break;
case NETDEV_DOWN:
case NETDEV_CHANGEMTU:
tipc_reset_bearer(b_ptr);
break;
case NETDEV_CHANGEADDR:
tipc_l2_media_addr_set(b_ptr, &b_ptr->addr,
(char *)dev->dev_addr);
tipc_reset_bearer(b_ptr);
break;
case NETDEV_UNREGISTER:
case NETDEV_CHANGENAME:
tipc_disable_bearer(b_ptr->name);
break;
}
rcu_read_unlock();
return NOTIFY_OK;
}
static struct packet_type tipc_packet_type __read_mostly = {
.type = htons(ETH_P_TIPC),
.func = tipc_l2_rcv_msg,
};
static struct notifier_block notifier = {
.notifier_call = tipc_l2_device_event,
.priority = 0,
};
int tipc_bearer_setup(void)
{
int err;
err = register_netdevice_notifier(&notifier);
if (err)
return err;
dev_add_pack(&tipc_packet_type);
return 0;
}
void tipc_bearer_cleanup(void)
{
unregister_netdevice_notifier(&notifier);
dev_remove_pack(&tipc_packet_type);
}
void tipc_bearer_stop(void)
{
struct tipc_bearer *b_ptr;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
b_ptr = bearer_list[i];
if (b_ptr) {
bearer_disable(b_ptr, true);
bearer_list[i] = NULL;
}
}
}