OpenCloudOS-Kernel

Commit Graph

Author	SHA1	Message	Date
Kees Cook	6da2ec5605	treewide: kmalloc() -> kmalloc_array() The kmalloc() function has a 2-factor argument form, kmalloc_array(). This patch replaces cases of: kmalloc(a * b, gfp) with: kmalloc_array(a * b, gfp) as well as handling cases of: kmalloc(a * b * c, gfp) with: kmalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kmalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kmalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The tools/ directory was manually excluded, since it has its own implementation of kmalloc(). The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) \| kmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) \| kmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) \| kmalloc( - sizeof(char) * (COUNT) + COUNT , ...) \| kmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) \| kmalloc( - sizeof(u8) * COUNT + COUNT , ...) \| kmalloc( - sizeof(__u8) * COUNT + COUNT , ...) \| kmalloc( - sizeof(char) * COUNT + COUNT , ...) \| kmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) \| - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) \| - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) \| - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) \| - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) \| - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) \| - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) \| - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kmalloc + kmalloc_array ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) \| kmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) \| kmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) \| kmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) \| kmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) \| kmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) \| kmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) \| kmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) \| kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) \| kmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) \| kmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) \| kmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) \| kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) \| kmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) \| kmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) \| kmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) \| kmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) \| kmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) \| kmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) \| kmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kmalloc(C1 * C2 * C3, ...) \| kmalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) \| kmalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) \| kmalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) \| kmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kmalloc(sizeof(THING) * C2, ...) \| kmalloc(sizeof(TYPE) * C2, ...) \| kmalloc(C1 * C2 * C3, ...) \| kmalloc(C1 * C2, ...) \| - kmalloc + kmalloc_array ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) \| - kmalloc + kmalloc_array ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) \| - kmalloc + kmalloc_array ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) \| - kmalloc + kmalloc_array ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) \| - kmalloc + kmalloc_array ( - (E1) * E2 + E1, E2 , ...) \| - kmalloc + kmalloc_array ( - (E1) * (E2) + E1, E2 , ...) \| - kmalloc + kmalloc_array ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>	2018-06-12 16:19:22 -07:00
Huy Nguyen	e549f6f9c0	net/dcb: Add dcbnl buffer attribute In this patch, we add dcbnl buffer attribute to allow user change the NIC's buffer configuration such as priority to buffer mapping and buffer size of individual buffer. This attribute combined with pfc attribute allows advanced user to fine tune the qos setting for specific priority queue. For example, user can give dedicated buffer for one or more priorities or user can give large buffer to certain priorities. The dcb buffer configuration will be controlled by lldptool. lldptool -T -i eth2 -V BUFFER prio 0,2,5,7,1,2,3,6 maps priorities 0,1,2,3,4,5,6,7 to receive buffer 0,2,5,7,1,2,3,6 lldptool -T -i eth2 -V BUFFER size 87296,87296,0,87296,0,0,0,0 sets receive buffer size for buffer 0,1,2,3,4,5,6,7 respectively After discussion on mailing list with Jakub, Jiri, Ido and John, we agreed to choose dcbnl over devlink interface since this feature is intended to set port attributes which are governed by the netdev instance of that port, where devlink API is more suitable for global ASIC configurations. We present an use case scenario where dcbnl buffer attribute configured by advance user helps reduce the latency of messages of different sizes. Scenarios description: On ConnectX-5, we run latency sensitive traffic with small/medium message sizes ranging from 64B to 256KB and bandwidth sensitive traffic with large messages sizes 512KB and 1MB. We group small, medium, and large message sizes to their own pfc enables priorities as follow. Priorities 1 & 2 (64B, 256B and 1KB) Priorities 3 & 4 (4KB, 8KB, 16KB, 64KB, 128KB and 256KB) Priorities 5 & 6 (512KB and 1MB) By default, ConnectX-5 maps all pfc enabled priorities to a single lossless fixed buffer size of 50% of total available buffer space. The other 50% is assigned to lossy buffer. Using dcbnl buffer attribute, we create three equal size lossless buffers. Each buffer has 25% of total available buffer space. Thus, the lossy buffer size reduces to 25%. Priority to lossless buffer mappings are set as follow. Priorities 1 & 2 on lossless buffer #1 Priorities 3 & 4 on lossless buffer #2 Priorities 5 & 6 on lossless buffer #3 We observe improvements in latency for small and medium message sizes as follows. Please note that the large message sizes bandwidth performance is reduced but the total bandwidth remains the same. 256B message size (42 % latency reduction) 4K message size (21% latency reduction) 64K message size (16% latency reduction) CC: Ido Schimmel <idosch@idosch.org> CC: Jakub Kicinski <jakub.kicinski@netronome.com> CC: Jiri Pirko <jiri@resnulli.us> CC: Or Gerlitz <gerlitz.or@gmail.com> CC: Parav Pandit <parav@mellanox.com> CC: Aron Silverton <aron.silverton@oracle.com> Signed-off-by: Huy Nguyen <huyn@mellanox.com> Reviewed-by: Parav Pandit <parav@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>	2018-05-24 14:22:59 -07:00
Florian Westphal	b97bac64a5	rtnetlink: make rtnl_register accept a flags parameter This change allows us to later indicate to rtnetlink core that certain doit functions should be called without acquiring rtnl_mutex. This change should have no effect, we simply replace the last (now unused) calcit argument with the new flag. Signed-off-by: Florian Westphal <fw@strlen.de> Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>	2017-08-09 16:57:38 -07:00
stephen hemminger	332b4fc886	dcb: enforce minimum length on IEEE_APPS attribute Found by reviewing the warning about unused policy table. The code implies that it meant to check for size, but since it unrolled the loop for attribute validation that is never used. Instead do explicit check for attribute. Compile tested only. Needs review by original author. Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2017-05-21 13:42:33 -04:00
David Ahern	c21ef3e343	net: rtnetlink: plumb extended ack to doit function Add netlink_ext_ack arg to rtnl_doit_func. Pass extack arg to nlmsg_parse for doit functions that call it directly. This is the first step to using extended error reporting in rtnetlink. >From here individual subsystems can be updated to set netlink_ext_ack as needed. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2017-04-17 15:35:38 -04:00
Johannes Berg	fceb6435e8	netlink: pass extended ACK struct to parsing functions Pass the new extended ACK reporting struct to all of the generic netlink parsing functions. For now, pass NULL in almost all callers (except for some in the core.) Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2017-04-13 13:58:22 -04:00
Pan Bian	c66ebf2db5	net: dcb: set error code on failures In function dcbnl_cee_fill(), returns the value of variable err on errors. However, on some error paths (e.g. nla put fails), its value may be 0. It may be better to explicitly set a negative errno to variable err before returning. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188881 Signed-off-by: Pan Bian <bianpan2016@163.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2016-12-03 23:54:25 -05:00
Paul Gortmaker	36b9ad8084	net/dcb: make dcbnl.c explicitly non-modular The Kconfig currently controlling compilation of this code is: net/dcb/Kconfig:config DCB net/dcb/Kconfig: bool "Data Center Bridging support" ...meaning that it currently is not being built as a module by anyone. Lets remove the modular code that is essentially orphaned, so that when reading the driver there is no doubt it is builtin-only. Since module_init translates to device_initcall in the non-modular case, the init ordering remains unchanged with this commit. We can change to one of the other priority initcalls (subsys?) at any later date, if desired. We also delete the MODULE_LICENSE tag etc. since all that information is (or is now) already contained at the top of the file in the comments. Cc: "David S. Miller" <davem@davemloft.net> Cc: Or Gerlitz <ogerlitz@mellanox.com> Cc: Anish Bhatt <anish@chelsio.com> Cc: John Fastabend <john.r.fastabend@intel.com> Cc: Shani Michaeli <shanim@mellanox.com> Cc: netdev@vger.kernel.org Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2015-10-09 07:52:27 -07:00
Shani Michaeli	c93682477b	net/dcb: Add IEEE QCN attribute As specified in 802.1Qau spec. Add this optional attribute to the DCB netlink layer. To allow for application to use the new attribute, NIC drivers should implement and register the callbacks ieee_getqcn, ieee_setqcn and ieee_getqcnstats. The QCN attribute holds a set of parameters for management, and a set of statistics to provide informative data on Congestion-Control defined by this spec. Signed-off-by: Shani Michaeli <shanim@mellanox.com> Signed-off-by: Shachar Raindel <raindel@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Acked-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2015-03-06 21:50:02 -05:00
Anish Bhatt	52cff74eef	dcbnl : Disable software interrupts before taking dcb_lock Solves possible lockup issues that can be seen from firmware DCB agents calling into the DCB app api. DCB firmware event queues can be tied in with NAPI so that dcb events are generated in softIRQ context. This can results in calls to dcb_*app() functions which try to take the dcb_lock. If the the event triggers while we also have the dcb_lock because lldpad or some other agent happened to be issuing a get/set command we could see a cpu lockup. This code was not originally written with firmware agents in mind, hence grabbing dcb_lock from softIRQ context was not considered. Signed-off-by: Anish Bhatt <anish@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2014-11-16 14:50:52 -05:00
Anish Bhatt	16eecd9be4	dcbnl : Fix misleading dcb_app->priority explanation Current explanation of dcb_app->priority is wrong. It says priority is expected to be a 3-bit unsigned integer which is only true when working with DCBx-IEEE. Use of dcb_app->priority by DCBx-CEE expects it to be 802.1p user priority bitmap. Updated accordingly This affects the cxgb4 driver, but I will post those changes as part of a larger changeset shortly. Fixes: `3e29027af4` ("dcbnl: add support for ieee8021Qaz attributes") Signed-off-by: Anish Bhatt <anish@chelsio.com> Acked-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2014-07-30 17:21:05 -07:00
Anish Bhatt	c2659479f7	Update setapp/getapp prototypes in dcbnl_rtnl_ops to return int instead of u8 v2: fixed issue with checking return of dcbnl_rtnl_ops->getapp() Signed-off-by: Anish Bhatt <anish@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2014-07-17 16:02:29 -07:00
Eric W. Biederman	90f62cf30a	net: Use netlink_ns_capable to verify the permisions of netlink messages It is possible by passing a netlink socket to a more privileged executable and then to fool that executable into writing to the socket data that happens to be valid netlink message to do something that privileged executable did not intend to do. To keep this from happening replace bare capable and ns_capable calls with netlink_capable, netlink_net_calls and netlink_ns_capable calls. Which act the same as the previous calls except they verify that the opener of the socket had the desired permissions as well. Reported-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2014-04-24 13:44:54 -04:00
Ying Xue	d9ac62be57	dcb: use __dev_get_by_name instead of dev_get_by_name to find interface The following call chain indicates that dcb_doit() is protected under rtnl_lock. So if we use __dev_get_by_name() instead of dev_get_by_name() to find interface handlers in it, this would help us avoid to change interface reference counter. rtnetlink_rcv() rtnl_lock() netlink_rcv_skb() dcb_doit() rtnl_unlock() Cc: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2014-01-14 18:50:46 -08:00
Jeff Kirsher	c057b190b8	net/*: Fix FSF address in file headers Several files refer to an old address for the Free Software Foundation in the file header comment. Resolve by replacing the address with the URL <http://www.gnu.org/licenses/> so that we do not have to keep updating the header comments anytime the address changes. CC: John Fastabend <john.r.fastabend@intel.com> CC: Alex Duyck <alexander.h.duyck@intel.com> CC: Marcel Holtmann <marcel@holtmann.org> CC: Gustavo Padovan <gustavo@padovan.org> CC: Johan Hedberg <johan.hedberg@gmail.com> CC: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2013-12-06 12:37:57 -05:00
Thomas Graf	661d2967b3	rtnetlink: Remove passing of attributes into rtnl_doit functions With decnet converted, we can finally get rid of rta_buf and its computations around it. It also gets rid of the minimal header length verification since all message handlers do that explicitly anyway. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2013-03-22 10:31:16 -04:00
Mathias Krause	29cd8ae0e1	dcbnl: fix various netlink info leaks The dcb netlink interface leaks stack memory in various places: * perm_addr[] buffer is only filled at max with 12 of the 32 bytes but copied completely, * no in-kernel driver fills all fields of an IEEE 802.1Qaz subcommand, so we're leaking up to 58 bytes for ieee_ets structs, up to 136 bytes for ieee_pfc structs, etc., * the same is true for CEE -- no in-kernel driver fills the whole struct, Prevent all of the above stack info leaks by properly initializing the buffers/structures involved. Signed-off-by: Mathias Krause <minipli@googlemail.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2013-03-10 05:19:26 -04:00
John Fastabend	7c77ab24e3	net: Allow DCBnl to use other namespaces besides init_net Allow DCB and net namespace to work together. This is useful if you have containers that are bound to 'phys' interfaces that want to also manage their DCB attributes. The net namespace is taken from sock_net(skb->sk) of the netlink skb. CC: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-12-10 14:09:01 -05:00
Eric W. Biederman	dfc47ef863	net: Push capable(CAP_NET_ADMIN) into the rtnl methods - In rtnetlink_rcv_msg convert the capable(CAP_NET_ADMIN) check to ns_capable(net->user-ns, CAP_NET_ADMIN). Allowing unprivileged users to make netlink calls to modify their local network namespace. - In the rtnetlink doit methods add capable(CAP_NET_ADMIN) so that calls that are not safe for unprivileged users are still protected. Later patches will remove the extra capable calls from methods that are safe for unprivilged users. Acked-by: Serge Hallyn <serge.hallyn@canonical.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-11-18 20:32:44 -05:00
Eric W. Biederman	15e473046c	netlink: Rename pid to portid to avoid confusion It is a frequent mistake to confuse the netlink port identifier with a process identifier. Try to reduce this confusion by renaming fields that hold port identifiers portid instead of pid. I have carefully avoided changing the structures exported to userspace to avoid changing the userspace API. I have successfully built an allyesconfig kernel with this change. Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-09-10 15:30:41 -04:00
Ben Hutchings	ae86b9e384	net: Fix non-kernel-doc comments with kernel-doc start marker Signed-off-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-07-10 23:13:45 -07:00
John Fastabend	bb1dfefdc6	net: dcb: fix small regression in __dcbnl_pg_setcfg() A small regression was introduced in the reply command of dcbnl_pg_setcfg(). User space apps may be expecting the DCB_ATTR_PG_CFG attribute to be returned with the patch below TX or RX variants are returned. commit `7be994138b` Author: Thomas Graf <tgraf@suug.ch> Date: Wed Jun 13 02:54:55 2012 +0000 dcbnl: Shorten all command handling functions This patch reverts this behavior and returns DCB_ATTR_PG_CFG Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Acked-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-21 15:06:00 -07:00
Thomas Graf	b3908e22ad	dcbnl: Use BUG_ON() instead of BUG() Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-14 01:45:46 -07:00
Thomas Graf	39912f9cf9	dcbnl: Silence harmless gcc warning about uninitialized reply_nlh Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-14 01:45:46 -07:00
Thomas Graf	7a282bc37f	dcbnl: Use type safe nlmsg_data() Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:35 -07:00
Thomas Graf	4e4f2f6970	dcbnl: Move dcb app allocation into dcb_app_add() Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:35 -07:00
Thomas Graf	716b31abbd	dcbnl: Move dcb app lookup code into dcb_app_lookup() Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:35 -07:00
Thomas Graf	3d1f486952	dcbnl: Return consistent error codes EMSGSIZE - ran out of space while constructing message EOPNOTSUPP - driver/hardware does not support operation ENODEV - network device not found EINVAL - invalid message Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:34 -07:00
Thomas Graf	ab6d470735	dcbnl: Use dcbnl_newmsg() where possible Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:34 -07:00
Thomas Graf	77c6849d7a	dcbnl: Remove now unused dcbnl_reply() Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:34 -07:00
Thomas Graf	7be994138b	dcbnl: Shorten all command handling functions Allocating and sending the skb in dcb_doit() allows for much shorter and cleaner command handling functions. The huge switch statement is replaced with an array based definition of the handling function and reply message type. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:34 -07:00
Thomas Graf	33a03aadb5	dcbnl: Prepare framework to shorten handling functions There is no need to allocate and send the reply message in each handling function separately. Instead, the reply skb can be allocated and sent in dcb_doit() directly. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-06-13 15:46:34 -07:00
John Fastabend	081579840b	net: dcb: add CEE notify calls This adds code to trigger CEE events when an APP change or setall command is made from user space. This simplifies user space code significantly by creating a single interface to listen on that works with both firmware and userland agents. And if we end up with multiple agents this keeps every thing in sync userland agents, firmware agents, and kernel notifier consumers. For an example agent that listens for these events see: https://github.com/jrfastab/cgdcbxd cgdcbxd is a daemon used to monitor DCB netlink events and manage the net_prio control group sub-system. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Acked-by: Shmulik Ravid <shmulikr@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-04-25 19:47:17 -04:00
Amir Vadai	08f10affe4	net/dcb: Add an optional max rate attribute Although not specified in 8021Qaz spec, it could be useful to enable drivers whose HW supports setting a rate limit for an ETS TC. This patch adds this optional attribute to DCB netlink. To use it, drivers should implement and register the callbacks ieee_setmaxrate and ieee_getmaxrate. The units are 64 bits long and specified in Kbps to enable usage over both slow and very fast networks. Signed-off-by: Amir Vadai <amirv@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2012-04-05 05:08:04 -04:00
David S. Miller	1eb4c97777	dcbnl: Stop using NLA_PUT*(). These macros contain a hidden goto, and are thus extremely error prone and make code hard to audit. Signed-off-by: David S. Miller <davem@davemloft.net>	2012-04-02 04:33:41 -04:00
Paul Gortmaker	3a9a231d97	net: Fix files explicitly needing to include module.h With calls to modular infrastructure, these files really needs the full module.h header. Call it out so some of the cleanups of implicit and unrequired includes elsewhere can be cleaned up. Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>	2011-10-31 19:30:28 -04:00
John Fastabend	6bd0e1cb10	dcb: add DCBX mode to event notifier attributes Add DCBX mode to event notifiers so listeners can learn currently enabled mode. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-10-06 15:49:51 -04:00
Mark Rustad	e290ed8130	dcb: Use ifindex instead of ifname Use ifindex instead of ifname in the DCB app ring. This makes for a smaller data structure and faster comparisons. It also avoids possible issues when a net device is renamed. Signed-off-by: Mark Rustad <mark.d.rustad@intel.com> Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-10-06 15:49:51 -04:00
Dan Carpenter	40f5d72a4f	dcbnl: unlock on an error path in dcbnl_cee_fill() We need to release "dcb_lock" which we took on the previous line. Signed-off-by: Dan Carpenter <error27@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-07-08 09:01:14 -07:00
Shmulik Ravid	5b7f762674	dcbnl: Add CEE notification This patch add an unsolicited notification of the DCBX negotiated parameters for the CEE flavor of the DCBX protocol. The notification message is identical to the aggregated CEE get operation and holds all the pertinent local and peer information. The notification routine is exported so it can be invoked by drivers supporting an embedded DCBX stack. Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-07-05 23:42:17 -07:00
Shmulik Ravid	37cf4d1a9b	dcbnl: Aggregated CEE GET operation The following couple of patches add dcbnl an unsolicited notification of the the DCB configuration for the CEE flavor of the DCBX protocol. This is useful when the user-mode DCB client is not responsible for conducting and resolving the DCBX negotiation (either because the DCBX stack is embedded in the HW or the negotiation is handled by another agent in the host), but still needs to get the negotiated parameters. This functionality already exists for the IEEE flavor of the DCBX protocol and these patches add it to the older CEE flavor. The first patch extends the CEE attribute GET operation to include not only the peer information, but also all the pertinent local configuration (negotiated parameters). The second patch adds and export a CEE specific notification routine. Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-07-05 23:42:17 -07:00
Dan Carpenter	4d054f2f14	dcb: use nlmsg_free() instead of kfree() These sk_buff structs were allocated with nlmsg_new() so they should be freed with nlmsg_free(). Signed-off-by: Dan Carpenter <error27@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-23 03:14:42 -07:00
John Fastabend	4003b65871	dcb: Add missing error check in dcb_ieee_set() Missing error checking before nla_parse_nested(). Reported-by: Mark Rustad <mark.d.rustad@intel.com> Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-21 16:06:12 -07:00
John Fastabend	ab6baf980b	dcb: fix return type on dcb_setapp() Incorrect return type on dcb_setapp() this routine returns negative error codes. All call sites of dcb_setapp() assign the return value to an int already so no need to update drivers. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-21 16:06:12 -07:00
John Fastabend	a364c8cf80	dcb: Add dcb_ieee_getapp_mask() for drivers to query APP settings With multiple APP entries per selector and protocol drivers or stacks may want to pick a specific value or stripe traffic across many priorities. Also if an APP entry in use is deleted the stack/driver may want to choose from the existing APP entries. To facilitate this and avoid having duplicate code to walk the APP ring provide a routine dcb_ieee_getapp_mask() to return a u8 bitmask of all priorities set for the specified selector and protocol. This routine and bitmask is a helper for DCB kernel users. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-21 16:06:12 -07:00
John Fastabend	f9ae7e4b51	dcb: Add ieee_dcb_delapp() and dcb op to delete app entry Now that we allow multiple IEEE App entries we need a way to remove specific entries. To do this add the ieee_dcb_delapp() routine. Additionaly drivers may need to remove the APP entry from their firmware tables. Add dcb ops routine to handle this. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-21 16:06:11 -07:00
John Fastabend	b6db2174c5	dcb: Add ieee_dcb_setapp() to be used for IEEE 802.1Qaz APP data This adds a setapp routine for IEEE802.1Qaz encoded APP data types. The IEEE 802.1Qaz spec encodes the priority bits differently and allows for multiple APP data entries of the same selector and protocol. Trying to force these to use the same set routines was becoming tedious. Furthermore, userspace could probably enforce the correct semantics, but expecting drivers to do this seems error prone in the firmware case. For these reasons add ieee_dcb_setapp() that understands the IEEE 802.1Qaz encoded form. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-21 16:06:11 -07:00
John Fastabend	314b4778ed	net: dcbnl, add multicast group for DCB Now that dcbnl is being used in many cases by more than a single agent it is beneficial to be notified when some entity either driver or user space has changed the DCB attributes. Today applications either end up polling the interface or relying on a user space database to maintain the DCB state and post events. Polling is a poor solution for obvious reasons. And relying on a user space database has its own downside. Namely it has created strange boot dependencies requiring the database be populated before any applications dependent on DCB attributes starts or the application goes into a polling loop. Populating the database requires negotiating link setting with the peer and can take anywhere from less than a second up to a few seconds depending on the switch implementation. Perhaps more importantly if another application or an embedded agent sets a DCB link attribute the database has no way of knowing other than polling the kernel. This prevents applications from responding quickly to changes in link events which at least in the FCoE case and probably any other protocols expecting a lossless link may result in IO errors. By adding a multicast group for DCB we have clean way to disseminate kernel DCB link attributes up to user space. Avoiding the need for user space to maintain a coherant database and disperse events that potentially do not reflect the current link state. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-21 16:06:11 -07:00
John Fastabend	c7797baf9f	dcb: Add DCBX capabilities bitmask to the get_ieee response Adding the capabilities bitmask to the get_ieee response allows user space to determine the current DCBX mode. Either CEE or IEEE this is useful with devices that support switching between modes where knowing the current state is relevant. Derived from work by Mark Rustad Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>	2011-06-21 16:06:11 -07:00
Greg Rose	c7ac8679be	rtnetlink: Compute and store minimum ifinfo dump size The message size allocated for rtnl ifinfo dumps was limited to a single page. This is not enough for additional interface info available with devices that support SR-IOV and caused a bug in which VF info would not be displayed if more than approximately 40 VFs were created per interface. Implement a new function pointer for the rtnl_register service that will calculate the amount of data required for the ifinfo dump and allocate enough data to satisfy the request. Signed-off-by: Greg Rose <gregory.v.rose@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>	2011-06-09 20:38:07 -07:00

1 2

84 Commits