2019-05-27 14:55:01 +08:00
// SPDX-License-Identifier: GPL-2.0-or-later
2005-04-17 06:20:36 +08:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Routing netlink socket interface : protocol independent part .
*
* Authors : Alexey Kuznetsov , < kuznet @ ms2 . inr . ac . ru >
*
* Fixes :
2021-06-02 14:56:23 +08:00
* Vitaly E . Lavrov RTA_OK arithmetic was wrong .
2005-04-17 06:20:36 +08:00
*/
2017-06-22 09:25:04 +08:00
# include <linux/bitops.h>
2005-04-17 06:20:36 +08:00
# include <linux/errno.h>
# include <linux/module.h>
# include <linux/types.h>
# include <linux/socket.h>
# include <linux/kernel.h>
# include <linux/timer.h>
# include <linux/string.h>
# include <linux/sockios.h>
# include <linux/net.h>
# include <linux/fcntl.h>
# include <linux/mm.h>
# include <linux/slab.h>
# include <linux/interrupt.h>
# include <linux/capability.h>
# include <linux/skbuff.h>
# include <linux/init.h>
# include <linux/security.h>
2006-03-21 14:23:58 +08:00
# include <linux/mutex.h>
2006-08-05 14:04:54 +08:00
# include <linux/if_addr.h>
2012-04-15 14:43:56 +08:00
# include <linux/if_bridge.h>
2014-11-28 21:34:15 +08:00
# include <linux/if_vlan.h>
2010-02-10 09:44:05 +08:00
# include <linux/pci.h>
2012-04-15 14:43:56 +08:00
# include <linux/etherdevice.h>
2017-06-16 08:29:09 +08:00
# include <linux/bpf.h>
2005-04-17 06:20:36 +08:00
2016-12-25 03:46:01 +08:00
# include <linux/uaccess.h>
2005-04-17 06:20:36 +08:00
# include <linux/inet.h>
# include <linux/netdevice.h>
# include <net/ip.h>
# include <net/protocol.h>
# include <net/arp.h>
# include <net/route.h>
# include <net/udp.h>
2015-01-06 06:57:47 +08:00
# include <net/tcp.h>
2005-04-17 06:20:36 +08:00
# include <net/sock.h>
# include <net/pkt_sched.h>
2006-08-04 18:38:38 +08:00
# include <net/fib_rules.h>
2007-03-23 02:48:11 +08:00
# include <net/rtnetlink.h>
2009-07-10 17:51:35 +08:00
# include <net/net_namespace.h>
2005-04-17 06:20:36 +08:00
2022-04-07 05:37:54 +08:00
# include "dev.h"
2018-11-24 10:34:20 +08:00
# define RTNL_MAX_TYPE 50
2021-01-26 17:35:33 +08:00
# define RTNL_SLAVE_MAX_TYPE 40
2018-05-31 06:20:52 +08:00
2009-11-07 17:26:17 +08:00
struct rtnl_link {
2007-03-23 02:48:11 +08:00
rtnl_doit_func doit ;
rtnl_dumpit_func dumpit ;
2017-12-03 04:44:06 +08:00
struct module * owner ;
2017-08-10 02:41:52 +08:00
unsigned int flags ;
2017-12-03 04:44:05 +08:00
struct rcu_head rcu ;
2007-03-23 02:48:11 +08:00
} ;
2006-03-21 14:23:58 +08:00
static DEFINE_MUTEX ( rtnl_mutex ) ;
2005-04-17 06:20:36 +08:00
void rtnl_lock ( void )
{
2006-03-21 14:23:58 +08:00
mutex_lock ( & rtnl_mutex ) ;
2005-04-17 06:20:36 +08:00
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_lock ) ;
2005-04-17 06:20:36 +08:00
2018-03-15 03:17:20 +08:00
int rtnl_lock_killable ( void )
{
return mutex_lock_killable ( & rtnl_mutex ) ;
}
EXPORT_SYMBOL ( rtnl_lock_killable ) ;
2016-06-14 11:21:50 +08:00
static struct sk_buff * defer_kfree_skb_list ;
void rtnl_kfree_skbs ( struct sk_buff * head , struct sk_buff * tail )
{
if ( head & & tail ) {
tail - > next = defer_kfree_skb_list ;
defer_kfree_skb_list = head ;
}
}
EXPORT_SYMBOL ( rtnl_kfree_skbs ) ;
2006-03-21 14:23:58 +08:00
void __rtnl_unlock ( void )
2005-04-17 06:20:36 +08:00
{
2016-06-14 11:21:50 +08:00
struct sk_buff * head = defer_kfree_skb_list ;
defer_kfree_skb_list = NULL ;
net: ensure net_todo_list is processed quickly
In [1], Will raised a potential issue that the cfg80211 code,
which does (from a locking perspective)
rtnl_lock()
wiphy_lock()
rtnl_unlock()
might be suspectible to ABBA deadlocks, because rtnl_unlock()
calls netdev_run_todo(), which might end up calling rtnl_lock()
again, which could then deadlock (see the comment in the code
added here for the scenario).
Some back and forth and thinking ensued, but clearly this can't
happen if the net_todo_list is empty at the rtnl_unlock() here.
Clearly, the code here cannot actually put an entry on it, and
all other users of rtnl_unlock() will empty it since that will
always go through netdev_run_todo(), emptying the list.
So the only other way to get there would be to add to the list
and then unlock the RTNL without going through rtnl_unlock(),
which is only possible through __rtnl_unlock(). However, this
isn't exported and not used in many places, and none of them
seem to be able to unregister before using it.
Therefore, add a WARN_ON() in the code to ensure this invariant
won't be broken, so that the cfg80211 (or any similar) code
stays safe.
[1] https://lore.kernel.org/r/Yjzpo3TfZxtKPMAG@google.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20220404113847.0ee02e4a70da.Ic73d206e217db20fd22dcec14fe5442ca732804b@changeid
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-04-04 17:38:47 +08:00
/* Ensure that we didn't actually add any TODO item when __rtnl_unlock()
* is used . In some places , e . g . in cfg80211 , we have code that will do
* something like
* rtnl_lock ( )
* wiphy_lock ( )
* . . .
* rtnl_unlock ( )
*
* and because netdev_run_todo ( ) acquires the RTNL for items on the list
* we could cause a situation such as this :
* Thread 1 Thread 2
* rtnl_lock ( )
* unregister_netdevice ( )
* __rtnl_unlock ( )
* rtnl_lock ( )
* wiphy_lock ( )
* rtnl_unlock ( )
* netdev_run_todo ( )
* __rtnl_unlock ( )
*
* // list not empty now
* // because of thread 2
* rtnl_lock ( )
* while ( ! list_empty ( . . . ) )
* rtnl_lock ( )
* wiphy_lock ( )
* * * * * DEADLOCK * * * *
*
* However , usage of __rtnl_unlock ( ) is rare , and so we can ensure that
* it ' s not used in cases where something is added to do the list .
*/
WARN_ON ( ! list_empty ( & net_todo_list ) ) ;
2006-03-21 14:23:58 +08:00
mutex_unlock ( & rtnl_mutex ) ;
2016-06-14 11:21:50 +08:00
while ( head ) {
struct sk_buff * next = head - > next ;
kfree_skb ( head ) ;
cond_resched ( ) ;
head = next ;
}
2005-04-17 06:20:36 +08:00
}
2006-03-21 14:23:58 +08:00
2005-04-17 06:20:36 +08:00
void rtnl_unlock ( void )
{
2008-10-08 06:50:03 +08:00
/* This fellow will unlock it for us. */
2005-04-17 06:20:36 +08:00
netdev_run_todo ( ) ;
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_unlock ) ;
2005-04-17 06:20:36 +08:00
2006-03-21 14:23:58 +08:00
int rtnl_trylock ( void )
{
return mutex_trylock ( & rtnl_mutex ) ;
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_trylock ) ;
2006-03-21 14:23:58 +08:00
2008-04-24 13:10:48 +08:00
int rtnl_is_locked ( void )
{
return mutex_is_locked ( & rtnl_mutex ) ;
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_is_locked ) ;
2008-04-24 13:10:48 +08:00
2018-09-25 00:22:49 +08:00
bool refcount_dec_and_rtnl_lock ( refcount_t * r )
{
return refcount_dec_and_mutex_lock ( r , & rtnl_mutex ) ;
}
EXPORT_SYMBOL ( refcount_dec_and_rtnl_lock ) ;
2010-02-23 09:04:49 +08:00
# ifdef CONFIG_PROVE_LOCKING
2015-10-08 21:29:02 +08:00
bool lockdep_rtnl_is_held ( void )
2010-02-23 09:04:49 +08:00
{
return lockdep_is_held ( & rtnl_mutex ) ;
}
EXPORT_SYMBOL ( lockdep_rtnl_is_held ) ;
# endif /* #ifdef CONFIG_PROVE_LOCKING */
2020-12-10 10:16:08 +08:00
static struct rtnl_link __rcu * __rcu * rtnl_msg_handlers [ RTNL_FAMILY_MAX + 1 ] ;
2007-03-23 02:48:11 +08:00
static inline int rtm_msgindex ( int msgtype )
{
int msgindex = msgtype - RTM_BASE ;
/*
* msgindex < 0 implies someone tried to register a netlink
* control code . msgindex > = RTM_NR_MSGTYPES may indicate that
* the message type has not been added to linux / rtnetlink . h
*/
BUG_ON ( msgindex < 0 | | msgindex > = RTM_NR_MSGTYPES ) ;
return msgindex ;
}
2017-12-03 04:44:05 +08:00
static struct rtnl_link * rtnl_get_link ( int protocol , int msgtype )
{
2020-12-10 10:16:08 +08:00
struct rtnl_link __rcu * * tab ;
2017-12-03 04:44:05 +08:00
if ( protocol > = ARRAY_SIZE ( rtnl_msg_handlers ) )
protocol = PF_UNSPEC ;
tab = rcu_dereference_rtnl ( rtnl_msg_handlers [ protocol ] ) ;
if ( ! tab )
tab = rcu_dereference_rtnl ( rtnl_msg_handlers [ PF_UNSPEC ] ) ;
2020-12-10 10:16:08 +08:00
return rcu_dereference_rtnl ( tab [ msgtype ] ) ;
2017-12-03 04:44:05 +08:00
}
2017-12-03 04:44:06 +08:00
static int rtnl_register_internal ( struct module * owner ,
int protocol , int msgtype ,
rtnl_doit_func doit , rtnl_dumpit_func dumpit ,
unsigned int flags )
2007-03-23 02:48:11 +08:00
{
2017-12-05 05:42:30 +08:00
struct rtnl_link * link , * old ;
struct rtnl_link __rcu * * tab ;
2007-03-23 02:48:11 +08:00
int msgindex ;
2017-12-03 04:44:05 +08:00
int ret = - ENOBUFS ;
2007-03-23 02:48:11 +08:00
2010-04-26 22:02:05 +08:00
BUG_ON ( protocol < 0 | | protocol > RTNL_FAMILY_MAX ) ;
2007-03-23 02:48:11 +08:00
msgindex = rtm_msgindex ( msgtype ) ;
2017-12-03 04:44:05 +08:00
rtnl_lock ( ) ;
2020-12-10 10:16:08 +08:00
tab = rtnl_dereference ( rtnl_msg_handlers [ protocol ] ) ;
2007-03-23 02:48:11 +08:00
if ( tab = = NULL ) {
2017-12-03 04:44:05 +08:00
tab = kcalloc ( RTM_NR_MSGTYPES , sizeof ( void * ) , GFP_KERNEL ) ;
if ( ! tab )
goto unlock ;
2007-03-23 02:48:11 +08:00
2017-12-03 04:44:05 +08:00
/* ensures we see the 0 stores */
2017-08-10 02:41:51 +08:00
rcu_assign_pointer ( rtnl_msg_handlers [ protocol ] , tab ) ;
2007-03-23 02:48:11 +08:00
}
2017-12-03 04:44:05 +08:00
old = rtnl_dereference ( tab [ msgindex ] ) ;
if ( old ) {
link = kmemdup ( old , sizeof ( * old ) , GFP_KERNEL ) ;
if ( ! link )
goto unlock ;
} else {
link = kzalloc ( sizeof ( * link ) , GFP_KERNEL ) ;
if ( ! link )
goto unlock ;
}
2017-12-03 04:44:06 +08:00
WARN_ON ( link - > owner & & link - > owner ! = owner ) ;
link - > owner = owner ;
2017-12-03 04:44:05 +08:00
WARN_ON ( doit & & link - > doit & & link - > doit ! = doit ) ;
2007-03-23 02:48:11 +08:00
if ( doit )
2017-12-03 04:44:05 +08:00
link - > doit = doit ;
WARN_ON ( dumpit & & link - > dumpit & & link - > dumpit ! = dumpit ) ;
2007-03-23 02:48:11 +08:00
if ( dumpit )
2017-12-03 04:44:05 +08:00
link - > dumpit = dumpit ;
2007-03-23 02:48:11 +08:00
2022-04-13 18:51:55 +08:00
WARN_ON ( rtnl_msgtype_kind ( msgtype ) ! = RTNL_KIND_DEL & &
( flags & RTNL_FLAG_BULK_DEL_SUPPORTED ) ) ;
2017-12-03 04:44:05 +08:00
link - > flags | = flags ;
/* publish protocol:msgtype */
rcu_assign_pointer ( tab [ msgindex ] , link ) ;
ret = 0 ;
if ( old )
kfree_rcu ( old , rcu ) ;
unlock :
rtnl_unlock ( ) ;
return ret ;
2007-03-23 02:48:11 +08:00
}
2017-12-03 04:44:06 +08:00
/**
* rtnl_register_module - Register a rtnetlink message type
*
* @ owner : module registering the hook ( THIS_MODULE )
* @ protocol : Protocol family or PF_UNSPEC
* @ msgtype : rtnetlink message type
* @ doit : Function pointer called for each request message
* @ dumpit : Function pointer called for each dump request ( NLM_F_DUMP ) message
2021-06-02 14:56:23 +08:00
* @ flags : rtnl_link_flags to modify behaviour of doit / dumpit functions
2017-12-03 04:44:06 +08:00
*
* Like rtnl_register , but for use by removable modules .
*/
int rtnl_register_module ( struct module * owner ,
int protocol , int msgtype ,
rtnl_doit_func doit , rtnl_dumpit_func dumpit ,
unsigned int flags )
{
return rtnl_register_internal ( owner , protocol , msgtype ,
doit , dumpit , flags ) ;
}
EXPORT_SYMBOL_GPL ( rtnl_register_module ) ;
/**
2017-12-03 04:44:08 +08:00
* rtnl_register - Register a rtnetlink message type
2017-12-03 04:44:06 +08:00
* @ protocol : Protocol family or PF_UNSPEC
* @ msgtype : rtnetlink message type
* @ doit : Function pointer called for each request message
* @ dumpit : Function pointer called for each dump request ( NLM_F_DUMP ) message
2021-06-02 14:56:23 +08:00
* @ flags : rtnl_link_flags to modify behaviour of doit / dumpit functions
2017-12-03 04:44:06 +08:00
*
* Registers the specified function pointers ( at least one of them has
* to be non - NULL ) to be called whenever a request message for the
* specified protocol family and message type is received .
*
* The special protocol family PF_UNSPEC may be used to define fallback
* function pointers for the case when no entry for the specific protocol
* family exists .
2007-03-23 02:48:11 +08:00
*/
void rtnl_register ( int protocol , int msgtype ,
2011-06-10 09:27:09 +08:00
rtnl_doit_func doit , rtnl_dumpit_func dumpit ,
2017-08-10 02:41:48 +08:00
unsigned int flags )
2007-03-23 02:48:11 +08:00
{
2017-12-03 04:44:08 +08:00
int err ;
err = rtnl_register_internal ( NULL , protocol , msgtype , doit , dumpit ,
flags ) ;
if ( err )
pr_err ( " Unable to register rtnetlink message handler, "
" protocol = %d, message type = %d \n " , protocol , msgtype ) ;
2007-03-23 02:48:11 +08:00
}
/**
* rtnl_unregister - Unregister a rtnetlink message type
* @ protocol : Protocol family or PF_UNSPEC
* @ msgtype : rtnetlink message type
*
* Returns 0 on success or a negative error code .
*/
int rtnl_unregister ( int protocol , int msgtype )
{
2020-12-10 10:16:08 +08:00
struct rtnl_link __rcu * * tab ;
struct rtnl_link * link ;
2007-03-23 02:48:11 +08:00
int msgindex ;
2010-04-26 22:02:05 +08:00
BUG_ON ( protocol < 0 | | protocol > RTNL_FAMILY_MAX ) ;
2007-03-23 02:48:11 +08:00
msgindex = rtm_msgindex ( msgtype ) ;
2017-08-10 02:41:51 +08:00
rtnl_lock ( ) ;
2017-12-03 04:44:05 +08:00
tab = rtnl_dereference ( rtnl_msg_handlers [ protocol ] ) ;
if ( ! tab ) {
2017-08-10 02:41:51 +08:00
rtnl_unlock ( ) ;
2007-03-23 02:48:11 +08:00
return - ENOENT ;
2017-08-10 02:41:51 +08:00
}
2007-03-23 02:48:11 +08:00
2020-12-10 10:16:08 +08:00
link = rtnl_dereference ( tab [ msgindex ] ) ;
2021-09-18 14:36:07 +08:00
RCU_INIT_POINTER ( tab [ msgindex ] , NULL ) ;
2017-08-10 02:41:51 +08:00
rtnl_unlock ( ) ;
2007-03-23 02:48:11 +08:00
2017-12-03 04:44:05 +08:00
kfree_rcu ( link , rcu ) ;
2007-03-23 02:48:11 +08:00
return 0 ;
}
EXPORT_SYMBOL_GPL ( rtnl_unregister ) ;
/**
* rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
* @ protocol : Protocol family or PF_UNSPEC
*
* Identical to calling rtnl_unregster ( ) for all registered message types
* of a certain protocol family .
*/
void rtnl_unregister_all ( int protocol )
{
2020-12-10 10:16:08 +08:00
struct rtnl_link __rcu * * tab ;
struct rtnl_link * link ;
2017-12-03 04:44:05 +08:00
int msgindex ;
2017-08-10 02:41:49 +08:00
2010-04-26 22:02:05 +08:00
BUG_ON ( protocol < 0 | | protocol > RTNL_FAMILY_MAX ) ;
2007-03-23 02:48:11 +08:00
2017-08-10 02:41:49 +08:00
rtnl_lock ( ) ;
2020-12-10 10:16:08 +08:00
tab = rtnl_dereference ( rtnl_msg_handlers [ protocol ] ) ;
2018-08-28 19:40:53 +08:00
if ( ! tab ) {
rtnl_unlock ( ) ;
return ;
}
2017-08-10 02:41:51 +08:00
RCU_INIT_POINTER ( rtnl_msg_handlers [ protocol ] , NULL ) ;
2017-12-03 04:44:05 +08:00
for ( msgindex = 0 ; msgindex < RTM_NR_MSGTYPES ; msgindex + + ) {
2020-12-10 10:16:08 +08:00
link = rtnl_dereference ( tab [ msgindex ] ) ;
2017-12-03 04:44:05 +08:00
if ( ! link )
continue ;
2021-09-18 14:36:07 +08:00
RCU_INIT_POINTER ( tab [ msgindex ] , NULL ) ;
2017-12-03 04:44:05 +08:00
kfree_rcu ( link , rcu ) ;
}
2017-08-10 02:41:49 +08:00
rtnl_unlock ( ) ;
2017-08-10 02:41:51 +08:00
synchronize_net ( ) ;
2017-12-03 04:44:05 +08:00
kfree ( tab ) ;
2007-03-23 02:48:11 +08:00
}
EXPORT_SYMBOL_GPL ( rtnl_unregister_all ) ;
2005-04-17 06:20:36 +08:00
2007-06-14 03:03:51 +08:00
static LIST_HEAD ( link_ops ) ;
2011-12-13 19:38:00 +08:00
static const struct rtnl_link_ops * rtnl_link_ops_get ( const char * kind )
{
const struct rtnl_link_ops * ops ;
list_for_each_entry ( ops , & link_ops , list ) {
if ( ! strcmp ( ops - > kind , kind ) )
return ops ;
}
return NULL ;
}
2007-06-14 03:03:51 +08:00
/**
* __rtnl_link_register - Register rtnl_link_ops with rtnetlink .
* @ ops : struct rtnl_link_ops * to register
*
* The caller must hold the rtnl_mutex . This function should be used
* by drivers that create devices during module initialization . It
* must be called before registering the devices .
*
* Returns 0 on success or a negative error code .
*/
int __rtnl_link_register ( struct rtnl_link_ops * ops )
{
2011-12-13 19:38:00 +08:00
if ( rtnl_link_ops_get ( ops - > kind ) )
return - EEXIST ;
2021-06-12 16:20:54 +08:00
/* The check for alloc/setup is here because if ops
2014-06-26 15:58:25 +08:00
* does not have that filled up , it is not possible
* to use the ops for creating device . So do not
* fill up dellink as well . That disables rtnl_dellink .
*/
2021-06-12 16:20:54 +08:00
if ( ( ops - > alloc | | ops - > setup ) & & ! ops - > dellink )
2009-10-27 15:06:36 +08:00
ops - > dellink = unregister_netdevice_queue ;
2007-07-12 10:42:13 +08:00
2007-06-14 03:03:51 +08:00
list_add_tail ( & ops - > list , & link_ops ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( __rtnl_link_register ) ;
/**
* rtnl_link_register - Register rtnl_link_ops with rtnetlink .
* @ ops : struct rtnl_link_ops * to register
*
* Returns 0 on success or a negative error code .
*/
int rtnl_link_register ( struct rtnl_link_ops * ops )
{
int err ;
2018-05-31 06:20:52 +08:00
/* Sanity-check max sizes to avoid stack buffer overflow. */
if ( WARN_ON ( ops - > maxtype > RTNL_MAX_TYPE | |
ops - > slave_maxtype > RTNL_SLAVE_MAX_TYPE ) )
return - EINVAL ;
2007-06-14 03:03:51 +08:00
rtnl_lock ( ) ;
err = __rtnl_link_register ( ops ) ;
rtnl_unlock ( ) ;
return err ;
}
EXPORT_SYMBOL_GPL ( rtnl_link_register ) ;
2008-04-16 15:46:52 +08:00
static void __rtnl_kill_links ( struct net * net , struct rtnl_link_ops * ops )
{
struct net_device * dev ;
2009-10-27 15:06:36 +08:00
LIST_HEAD ( list_kill ) ;
2008-04-16 15:46:52 +08:00
for_each_netdev ( net , dev ) {
2009-10-27 15:06:36 +08:00
if ( dev - > rtnl_link_ops = = ops )
ops - > dellink ( dev , & list_kill ) ;
2008-04-16 15:46:52 +08:00
}
2009-10-27 15:06:36 +08:00
unregister_netdevice_many ( & list_kill ) ;
2008-04-16 15:46:52 +08:00
}
2007-06-14 03:03:51 +08:00
/**
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink .
* @ ops : struct rtnl_link_ops * to unregister
*
2018-03-31 00:38:37 +08:00
* The caller must hold the rtnl_mutex and guarantee net_namespace_list
* integrity ( hold pernet_ops_rwsem for writing to close the race
* with setup_net ( ) and cleanup_net ( ) ) .
2007-06-14 03:03:51 +08:00
*/
void __rtnl_link_unregister ( struct rtnl_link_ops * ops )
{
2007-09-18 02:56:21 +08:00
struct net * net ;
2007-07-12 10:42:13 +08:00
2007-09-18 02:56:21 +08:00
for_each_net ( net ) {
2008-04-16 15:46:52 +08:00
__rtnl_kill_links ( net , ops ) ;
2007-07-12 10:42:13 +08:00
}
2007-06-14 03:03:51 +08:00
list_del ( & ops - > list ) ;
}
EXPORT_SYMBOL_GPL ( __rtnl_link_unregister ) ;
2014-05-13 06:11:20 +08:00
/* Return with the rtnl_lock held when there are no network
* devices unregistering in any network namespace .
*/
static void rtnl_lock_unregistering_all ( void )
{
struct net * net ;
bool unregistering ;
2014-10-30 00:04:56 +08:00
DEFINE_WAIT_FUNC ( wait , woken_wake_function ) ;
2014-05-13 06:11:20 +08:00
2014-10-30 00:04:56 +08:00
add_wait_queue ( & netdev_unregistering_wq , & wait ) ;
2014-05-13 06:11:20 +08:00
for ( ; ; ) {
unregistering = false ;
rtnl_lock ( ) ;
net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high.
When someone wants to iterate over alive net namespaces,
he/she has no a possibility to do that without exclusive lock.
But the exclusive rtnl_lock() in such places is overkill,
and it just increases the contention. Yes, there is already
for_each_net_rcu() in kernel, but it requires rcu_read_lock(),
and this can't be sleepable. Also, sometimes it may be need
really prevent net_namespace_list growth, so for_each_net_rcu()
is not fit there.
This patch introduces new rw_semaphore, which will be used
instead of rtnl_mutex to protect net_namespace_list. It is
sleepable and allows not-exclusive iterations over net
namespaces list. It allows to stop using rtnl_lock()
in several places (what is made in next patches) and makes
less the time, we keep rtnl_mutex. Here we just add new lock,
while the explanation of we can remove rtnl_lock() there are
in next patches.
Fine grained locks generally are better, then one big lock,
so let's do that with net_namespace_list, while the situation
allows that.
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-30 00:20:32 +08:00
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net ( ) and cleanup_net ( ) are not possible .
*/
2014-05-13 06:11:20 +08:00
for_each_net ( net ) {
2022-02-10 10:59:32 +08:00
if ( atomic_read ( & net - > dev_unreg_count ) > 0 ) {
2014-05-13 06:11:20 +08:00
unregistering = true ;
break ;
}
}
if ( ! unregistering )
break ;
__rtnl_unlock ( ) ;
2014-10-30 00:04:56 +08:00
wait_woken ( & wait , TASK_UNINTERRUPTIBLE , MAX_SCHEDULE_TIMEOUT ) ;
2014-05-13 06:11:20 +08:00
}
2014-10-30 00:04:56 +08:00
remove_wait_queue ( & netdev_unregistering_wq , & wait ) ;
2014-05-13 06:11:20 +08:00
}
2007-06-14 03:03:51 +08:00
/**
* rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink .
* @ ops : struct rtnl_link_ops * to unregister
*/
void rtnl_link_unregister ( struct rtnl_link_ops * ops )
{
2018-03-27 23:02:32 +08:00
/* Close the race with setup_net() and cleanup_net() */
2018-03-27 23:02:23 +08:00
down_write ( & pernet_ops_rwsem ) ;
2014-05-13 06:11:20 +08:00
rtnl_lock_unregistering_all ( ) ;
2007-06-14 03:03:51 +08:00
__rtnl_link_unregister ( ops ) ;
rtnl_unlock ( ) ;
2018-03-27 23:02:23 +08:00
up_write ( & pernet_ops_rwsem ) ;
2007-06-14 03:03:51 +08:00
}
EXPORT_SYMBOL_GPL ( rtnl_link_unregister ) ;
2014-01-22 16:05:55 +08:00
static size_t rtnl_link_get_slave_info_data_size ( const struct net_device * dev )
{
struct net_device * master_dev ;
const struct rtnl_link_ops * ops ;
2017-08-10 22:52:59 +08:00
size_t size = 0 ;
2014-01-22 16:05:55 +08:00
2017-08-10 22:52:59 +08:00
rcu_read_lock ( ) ;
master_dev = netdev_master_upper_dev_get_rcu ( ( struct net_device * ) dev ) ;
2014-01-22 16:05:55 +08:00
if ( ! master_dev )
2017-08-10 22:52:59 +08:00
goto out ;
2014-01-22 16:05:55 +08:00
ops = master_dev - > rtnl_link_ops ;
2014-02-04 18:35:02 +08:00
if ( ! ops | | ! ops - > get_slave_size )
2017-08-10 22:52:59 +08:00
goto out ;
2014-01-22 16:05:55 +08:00
/* IFLA_INFO_SLAVE_DATA + nested data */
2017-08-10 22:52:59 +08:00
size = nla_total_size ( sizeof ( struct nlattr ) ) +
2014-01-22 16:05:55 +08:00
ops - > get_slave_size ( master_dev , dev ) ;
2017-08-10 22:52:59 +08:00
out :
rcu_read_unlock ( ) ;
return size ;
2014-01-22 16:05:55 +08:00
}
2007-06-14 03:03:51 +08:00
static size_t rtnl_link_get_size ( const struct net_device * dev )
{
const struct rtnl_link_ops * ops = dev - > rtnl_link_ops ;
size_t size ;
if ( ! ops )
return 0 ;
2010-11-11 23:47:59 +08:00
size = nla_total_size ( sizeof ( struct nlattr ) ) + /* IFLA_LINKINFO */
nla_total_size ( strlen ( ops - > kind ) + 1 ) ; /* IFLA_INFO_KIND */
2007-06-14 03:03:51 +08:00
if ( ops - > get_size )
/* IFLA_INFO_DATA + nested data */
2010-11-11 23:47:59 +08:00
size + = nla_total_size ( sizeof ( struct nlattr ) ) +
2007-06-14 03:03:51 +08:00
ops - > get_size ( dev ) ;
if ( ops - > get_xstats_size )
2010-11-11 23:47:59 +08:00
/* IFLA_INFO_XSTATS */
size + = nla_total_size ( ops - > get_xstats_size ( dev ) ) ;
2007-06-14 03:03:51 +08:00
2014-01-22 16:05:55 +08:00
size + = rtnl_link_get_slave_info_data_size ( dev ) ;
2007-06-14 03:03:51 +08:00
return size ;
}
2010-11-16 12:30:14 +08:00
static LIST_HEAD ( rtnl_af_ops ) ;
static const struct rtnl_af_ops * rtnl_af_lookup ( const int family )
{
const struct rtnl_af_ops * ops ;
2021-05-09 02:00:33 +08:00
ASSERT_RTNL ( ) ;
list_for_each_entry ( ops , & rtnl_af_ops , list ) {
2010-11-16 12:30:14 +08:00
if ( ops - > family = = family )
return ops ;
}
return NULL ;
}
/**
* rtnl_af_register - Register rtnl_af_ops with rtnetlink .
* @ ops : struct rtnl_af_ops * to register
*
* Returns 0 on success or a negative error code .
*/
2013-12-31 02:41:32 +08:00
void rtnl_af_register ( struct rtnl_af_ops * ops )
2010-11-16 12:30:14 +08:00
{
rtnl_lock ( ) ;
2017-10-16 21:44:36 +08:00
list_add_tail_rcu ( & ops - > list , & rtnl_af_ops ) ;
2010-11-16 12:30:14 +08:00
rtnl_unlock ( ) ;
}
EXPORT_SYMBOL_GPL ( rtnl_af_register ) ;
/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink .
* @ ops : struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister ( struct rtnl_af_ops * ops )
{
rtnl_lock ( ) ;
2017-10-16 21:44:36 +08:00
list_del_rcu ( & ops - > list ) ;
2010-11-16 12:30:14 +08:00
rtnl_unlock ( ) ;
2017-10-16 21:44:36 +08:00
synchronize_rcu ( ) ;
2010-11-16 12:30:14 +08:00
}
EXPORT_SYMBOL_GPL ( rtnl_af_unregister ) ;
2015-10-20 00:23:28 +08:00
static size_t rtnl_link_get_af_size ( const struct net_device * dev ,
u32 ext_filter_mask )
2010-11-16 12:30:14 +08:00
{
struct rtnl_af_ops * af_ops ;
size_t size ;
/* IFLA_AF_SPEC */
size = nla_total_size ( sizeof ( struct nlattr ) ) ;
2017-10-16 21:44:36 +08:00
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( af_ops , & rtnl_af_ops , list ) {
2010-11-16 12:30:14 +08:00
if ( af_ops - > get_link_af_size ) {
/* AF_* + nested data */
size + = nla_total_size ( sizeof ( struct nlattr ) ) +
2015-10-20 00:23:28 +08:00
af_ops - > get_link_af_size ( dev , ext_filter_mask ) ;
2010-11-16 12:30:14 +08:00
}
}
2017-10-16 21:44:36 +08:00
rcu_read_unlock ( ) ;
2010-11-16 12:30:14 +08:00
return size ;
}
2014-01-22 16:05:55 +08:00
static bool rtnl_have_link_slave_info ( const struct net_device * dev )
2007-06-14 03:03:51 +08:00
{
2014-01-22 16:05:55 +08:00
struct net_device * master_dev ;
2017-09-26 19:58:43 +08:00
bool ret = false ;
2007-06-14 03:03:51 +08:00
2017-09-26 19:58:43 +08:00
rcu_read_lock ( ) ;
master_dev = netdev_master_upper_dev_get_rcu ( ( struct net_device * ) dev ) ;
2014-01-24 02:19:21 +08:00
if ( master_dev & & master_dev - > rtnl_link_ops )
2017-09-26 19:58:43 +08:00
ret = true ;
rcu_read_unlock ( ) ;
return ret ;
2014-01-22 16:05:55 +08:00
}
static int rtnl_link_slave_info_fill ( struct sk_buff * skb ,
const struct net_device * dev )
{
struct net_device * master_dev ;
const struct rtnl_link_ops * ops ;
struct nlattr * slave_data ;
int err ;
2007-06-14 03:03:51 +08:00
2014-01-22 16:05:55 +08:00
master_dev = netdev_master_upper_dev_get ( ( struct net_device * ) dev ) ;
if ( ! master_dev )
return 0 ;
ops = master_dev - > rtnl_link_ops ;
if ( ! ops )
return 0 ;
if ( nla_put_string ( skb , IFLA_INFO_SLAVE_KIND , ops - > kind ) < 0 )
return - EMSGSIZE ;
if ( ops - > fill_slave_info ) {
2019-04-26 17:13:06 +08:00
slave_data = nla_nest_start_noflag ( skb , IFLA_INFO_SLAVE_DATA ) ;
2014-01-22 16:05:55 +08:00
if ( ! slave_data )
return - EMSGSIZE ;
err = ops - > fill_slave_info ( skb , master_dev , dev ) ;
if ( err < 0 )
goto err_cancel_slave_data ;
nla_nest_end ( skb , slave_data ) ;
}
return 0 ;
err_cancel_slave_data :
nla_nest_cancel ( skb , slave_data ) ;
return err ;
}
static int rtnl_link_info_fill ( struct sk_buff * skb ,
const struct net_device * dev )
{
const struct rtnl_link_ops * ops = dev - > rtnl_link_ops ;
struct nlattr * data ;
int err ;
if ( ! ops )
return 0 ;
2007-06-14 03:03:51 +08:00
if ( nla_put_string ( skb , IFLA_INFO_KIND , ops - > kind ) < 0 )
2014-01-22 16:05:55 +08:00
return - EMSGSIZE ;
2007-06-14 03:03:51 +08:00
if ( ops - > fill_xstats ) {
err = ops - > fill_xstats ( skb , dev ) ;
if ( err < 0 )
2014-01-22 16:05:55 +08:00
return err ;
2007-06-14 03:03:51 +08:00
}
if ( ops - > fill_info ) {
2019-04-26 17:13:06 +08:00
data = nla_nest_start_noflag ( skb , IFLA_INFO_DATA ) ;
2014-01-22 16:05:55 +08:00
if ( data = = NULL )
return - EMSGSIZE ;
2007-06-14 03:03:51 +08:00
err = ops - > fill_info ( skb , dev ) ;
if ( err < 0 )
goto err_cancel_data ;
nla_nest_end ( skb , data ) ;
}
return 0 ;
err_cancel_data :
nla_nest_cancel ( skb , data ) ;
2014-01-22 16:05:55 +08:00
return err ;
}
static int rtnl_link_fill ( struct sk_buff * skb , const struct net_device * dev )
{
struct nlattr * linkinfo ;
int err = - EMSGSIZE ;
2019-04-26 17:13:06 +08:00
linkinfo = nla_nest_start_noflag ( skb , IFLA_LINKINFO ) ;
2014-01-22 16:05:55 +08:00
if ( linkinfo = = NULL )
goto out ;
err = rtnl_link_info_fill ( skb , dev ) ;
if ( err < 0 )
goto err_cancel_link ;
err = rtnl_link_slave_info_fill ( skb , dev ) ;
if ( err < 0 )
goto err_cancel_link ;
nla_nest_end ( skb , linkinfo ) ;
return 0 ;
2007-06-14 03:03:51 +08:00
err_cancel_link :
nla_nest_cancel ( skb , linkinfo ) ;
out :
return err ;
}
2012-04-15 13:58:06 +08:00
int rtnetlink_send ( struct sk_buff * skb , struct net * net , u32 pid , unsigned int group , int echo )
2005-04-17 06:20:36 +08:00
{
2007-11-20 14:26:51 +08:00
struct sock * rtnl = net - > rtnl ;
2021-07-15 20:12:57 +08:00
return nlmsg_notify ( rtnl , skb , pid , group , echo , GFP_KERNEL ) ;
2005-04-17 06:20:36 +08:00
}
2007-11-20 14:26:51 +08:00
int rtnl_unicast ( struct sk_buff * skb , struct net * net , u32 pid )
2006-08-15 15:30:25 +08:00
{
2007-11-20 14:26:51 +08:00
struct sock * rtnl = net - > rtnl ;
2006-08-15 15:30:25 +08:00
return nlmsg_unicast ( rtnl , skb , pid ) ;
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_unicast ) ;
2006-08-15 15:30:25 +08:00
2009-02-25 15:18:28 +08:00
void rtnl_notify ( struct sk_buff * skb , struct net * net , u32 pid , u32 group ,
struct nlmsghdr * nlh , gfp_t flags )
2006-08-15 15:31:41 +08:00
{
2007-11-20 14:26:51 +08:00
struct sock * rtnl = net - > rtnl ;
2006-08-15 15:31:41 +08:00
2021-07-27 11:41:41 +08:00
nlmsg_notify ( rtnl , skb , pid , group , nlmsg_report ( nlh ) , flags ) ;
2006-08-15 15:31:41 +08:00
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_notify ) ;
2006-08-15 15:31:41 +08:00
2007-11-20 14:26:51 +08:00
void rtnl_set_sk_err ( struct net * net , u32 group , int error )
2006-08-15 15:31:41 +08:00
{
2007-11-20 14:26:51 +08:00
struct sock * rtnl = net - > rtnl ;
2006-08-15 15:31:41 +08:00
netlink_set_err ( rtnl , 0 , group , error ) ;
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_set_sk_err ) ;
2006-08-15 15:31:41 +08:00
2005-04-17 06:20:36 +08:00
int rtnetlink_put_metrics ( struct sk_buff * skb , u32 * metrics )
{
2006-08-22 15:01:27 +08:00
struct nlattr * mx ;
int i , valid = 0 ;
2019-06-22 07:27:16 +08:00
/* nothing is dumped for dst_default_metrics, so just skip the loop */
if ( metrics = = dst_default_metrics . metrics )
return 0 ;
2019-04-26 17:13:06 +08:00
mx = nla_nest_start_noflag ( skb , RTA_METRICS ) ;
2006-08-22 15:01:27 +08:00
if ( mx = = NULL )
return - ENOBUFS ;
for ( i = 0 ; i < RTAX_MAX ; i + + ) {
if ( metrics [ i ] ) {
2015-01-06 06:57:47 +08:00
if ( i = = RTAX_CC_ALGO - 1 ) {
char tmp [ TCP_CA_NAME_MAX ] , * name ;
name = tcp_ca_get_name_by_key ( metrics [ i ] , tmp ) ;
if ( ! name )
continue ;
if ( nla_put_string ( skb , i + 1 , name ) )
goto nla_put_failure ;
tcp: use dctcp if enabled on the route to the initiator
Currently, the following case doesn't use DCTCP, even if it should:
A responder has f.e. Cubic as system wide default, but for a specific
route to the initiating host, DCTCP is being set in RTAX_CC_ALGO. The
initiating host then uses DCTCP as congestion control, but since the
initiator sets ECT(0), tcp_ecn_create_request() doesn't set ecn_ok,
and we have to fall back to Reno after 3WHS completes.
We were thinking on how to solve this in a minimal, non-intrusive
way without bloating tcp_ecn_create_request() needlessly: lets cache
the CA ecn option flag in RTAX_FEATURES. In other words, when ECT(0)
is set on the SYN packet, set ecn_ok=1 iff route RTAX_FEATURES
contains the unexposed (internal-only) DST_FEATURE_ECN_CA. This allows
to only do a single metric feature lookup inside tcp_ecn_create_request().
Joint work with Florian Westphal.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-31 21:58:47 +08:00
} else if ( i = = RTAX_FEATURES - 1 ) {
u32 user_features = metrics [ i ] & RTAX_FEATURE_MASK ;
2016-08-23 19:14:31 +08:00
if ( ! user_features )
continue ;
tcp: use dctcp if enabled on the route to the initiator
Currently, the following case doesn't use DCTCP, even if it should:
A responder has f.e. Cubic as system wide default, but for a specific
route to the initiating host, DCTCP is being set in RTAX_CC_ALGO. The
initiating host then uses DCTCP as congestion control, but since the
initiator sets ECT(0), tcp_ecn_create_request() doesn't set ecn_ok,
and we have to fall back to Reno after 3WHS completes.
We were thinking on how to solve this in a minimal, non-intrusive
way without bloating tcp_ecn_create_request() needlessly: lets cache
the CA ecn option flag in RTAX_FEATURES. In other words, when ECT(0)
is set on the SYN packet, set ecn_ok=1 iff route RTAX_FEATURES
contains the unexposed (internal-only) DST_FEATURE_ECN_CA. This allows
to only do a single metric feature lookup inside tcp_ecn_create_request().
Joint work with Florian Westphal.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-31 21:58:47 +08:00
BUILD_BUG_ON ( RTAX_FEATURE_MASK & DST_FEATURE_MASK ) ;
if ( nla_put_u32 ( skb , i + 1 , user_features ) )
goto nla_put_failure ;
2015-01-06 06:57:47 +08:00
} else {
if ( nla_put_u32 ( skb , i + 1 , metrics [ i ] ) )
goto nla_put_failure ;
}
2006-08-22 15:01:27 +08:00
valid + + ;
}
2005-04-17 06:20:36 +08:00
}
2006-08-23 13:20:14 +08:00
if ( ! valid ) {
nla_nest_cancel ( skb , mx ) ;
return 0 ;
}
2006-08-22 15:01:27 +08:00
return nla_nest_end ( skb , mx ) ;
nla_put_failure :
2008-06-04 07:36:54 +08:00
nla_nest_cancel ( skb , mx ) ;
return - EMSGSIZE ;
2005-04-17 06:20:36 +08:00
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnetlink_put_metrics ) ;
2005-04-17 06:20:36 +08:00
2006-11-28 01:27:07 +08:00
int rtnl_put_cacheinfo ( struct sk_buff * skb , struct dst_entry * dst , u32 id ,
2012-07-10 20:06:14 +08:00
long expires , u32 error )
2006-11-28 01:27:07 +08:00
{
struct rta_cacheinfo ci = {
. rta_error = error ,
. rta_id = id ,
} ;
2018-04-18 08:33:08 +08:00
if ( dst ) {
ci . rta_lastuse = jiffies_delta_to_clock_t ( jiffies - dst - > lastuse ) ;
ci . rta_used = dst - > __use ;
ci . rta_clntref = atomic_read ( & dst - > __refcnt ) ;
}
2012-07-30 00:01:30 +08:00
if ( expires ) {
unsigned long clock ;
2006-11-28 01:27:07 +08:00
2012-07-30 00:01:30 +08:00
clock = jiffies_to_clock_t ( abs ( expires ) ) ;
clock = min_t ( unsigned long , clock , INT_MAX ) ;
ci . rta_expires = ( expires > 0 ) ? clock : - clock ;
}
2006-11-28 01:27:07 +08:00
return nla_put ( skb , RTA_CACHEINFO , sizeof ( ci ) , & ci ) ;
}
EXPORT_SYMBOL_GPL ( rtnl_put_cacheinfo ) ;
2005-04-17 06:20:36 +08:00
2008-02-18 10:35:07 +08:00
static void set_operstate ( struct net_device * dev , unsigned char transition )
2006-03-21 09:09:11 +08:00
{
unsigned char operstate = dev - > operstate ;
2009-11-07 17:26:17 +08:00
switch ( transition ) {
2006-03-21 09:09:11 +08:00
case IF_OPER_UP :
if ( ( operstate = = IF_OPER_DORMANT | |
2020-04-20 06:11:50 +08:00
operstate = = IF_OPER_TESTING | |
2006-03-21 09:09:11 +08:00
operstate = = IF_OPER_UNKNOWN ) & &
2020-04-20 06:11:50 +08:00
! netif_dormant ( dev ) & & ! netif_testing ( dev ) )
2006-03-21 09:09:11 +08:00
operstate = IF_OPER_UP ;
break ;
2020-04-20 06:11:50 +08:00
case IF_OPER_TESTING :
if ( operstate = = IF_OPER_UP | |
operstate = = IF_OPER_UNKNOWN )
operstate = IF_OPER_TESTING ;
break ;
2006-03-21 09:09:11 +08:00
case IF_OPER_DORMANT :
if ( operstate = = IF_OPER_UP | |
operstate = = IF_OPER_UNKNOWN )
operstate = IF_OPER_DORMANT ;
break ;
2007-04-21 08:09:22 +08:00
}
2006-03-21 09:09:11 +08:00
if ( dev - > operstate ! = operstate ) {
2021-11-27 00:15:29 +08:00
write_lock ( & dev_base_lock ) ;
2006-03-21 09:09:11 +08:00
dev - > operstate = operstate ;
2021-11-27 00:15:29 +08:00
write_unlock ( & dev_base_lock ) ;
2008-02-18 10:35:07 +08:00
netdev_state_change ( dev ) ;
}
2006-03-21 09:09:11 +08:00
}
2012-07-27 10:58:22 +08:00
static unsigned int rtnl_dev_get_flags ( const struct net_device * dev )
{
return ( dev - > flags & ~ ( IFF_PROMISC | IFF_ALLMULTI ) ) |
( dev - > gflags & ( IFF_PROMISC | IFF_ALLMULTI ) ) ;
}
rtnetlink: support specifying device flags on device creation
commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date: Tue Feb 23 20:41:30 2010 +0100
Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.
Two examples:
# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
valid_lft 86399sec preferred_lft 14399sec
# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down
<no events>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-02-26 14:34:54 +08:00
static unsigned int rtnl_dev_combine_flags ( const struct net_device * dev ,
const struct ifinfomsg * ifm )
{
unsigned int flags = ifm - > ifi_flags ;
/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
if ( ifm - > ifi_change )
flags = ( flags & ifm - > ifi_change ) |
2012-07-27 10:58:22 +08:00
( rtnl_dev_get_flags ( dev ) & ~ ifm - > ifi_change ) ;
rtnetlink: support specifying device flags on device creation
commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date: Tue Feb 23 20:41:30 2010 +0100
Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.
Two examples:
# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
valid_lft 86399sec preferred_lft 14399sec
# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down
<no events>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-02-26 14:34:54 +08:00
return flags ;
}
2006-08-05 14:05:34 +08:00
static void copy_rtnl_link_stats ( struct rtnl_link_stats * a ,
2010-06-08 15:19:54 +08:00
const struct rtnl_link_stats64 * b )
2005-04-17 06:20:36 +08:00
{
2006-08-05 14:05:34 +08:00
a - > rx_packets = b - > rx_packets ;
a - > tx_packets = b - > tx_packets ;
a - > rx_bytes = b - > rx_bytes ;
a - > tx_bytes = b - > tx_bytes ;
a - > rx_errors = b - > rx_errors ;
a - > tx_errors = b - > tx_errors ;
a - > rx_dropped = b - > rx_dropped ;
a - > tx_dropped = b - > tx_dropped ;
a - > multicast = b - > multicast ;
a - > collisions = b - > collisions ;
a - > rx_length_errors = b - > rx_length_errors ;
a - > rx_over_errors = b - > rx_over_errors ;
a - > rx_crc_errors = b - > rx_crc_errors ;
a - > rx_frame_errors = b - > rx_frame_errors ;
a - > rx_fifo_errors = b - > rx_fifo_errors ;
a - > rx_missed_errors = b - > rx_missed_errors ;
a - > tx_aborted_errors = b - > tx_aborted_errors ;
a - > tx_carrier_errors = b - > tx_carrier_errors ;
a - > tx_fifo_errors = b - > tx_fifo_errors ;
a - > tx_heartbeat_errors = b - > tx_heartbeat_errors ;
a - > tx_window_errors = b - > tx_window_errors ;
a - > rx_compressed = b - > rx_compressed ;
a - > tx_compressed = b - > tx_compressed ;
2016-02-02 07:51:05 +08:00
a - > rx_nohandler = b - > rx_nohandler ;
2010-03-11 17:57:29 +08:00
}
2010-05-16 16:05:45 +08:00
/* All VF info */
2012-02-22 05:54:48 +08:00
static inline int rtnl_vfinfo_size ( const struct net_device * dev ,
u32 ext_filter_mask )
2010-02-10 09:44:05 +08:00
{
2017-01-18 21:04:39 +08:00
if ( dev - > dev . parent & & ( ext_filter_mask & RTEXT_FILTER_VF ) ) {
2010-05-16 16:05:45 +08:00
int num_vfs = dev_num_vf ( dev - > dev . parent ) ;
2016-11-15 17:39:03 +08:00
size_t size = nla_total_size ( 0 ) ;
2010-05-28 18:42:43 +08:00
size + = num_vfs *
2016-11-15 17:39:03 +08:00
( nla_total_size ( 0 ) +
nla_total_size ( sizeof ( struct ifla_vf_mac ) ) +
ipoib: show VF broadcast address
in IPoIB case we can't see a VF broadcast address for but
can see for PF
Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 MAC 14:80:00:00:66:fe, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off
v1->v2: add the IFLA_VF_BROADCAST constant
v2->v3: put IFLA_VF_BROADCAST at the end
to avoid KABI breakage and set NLA_REJECT
dev_setlink
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-17 16:53:41 +08:00
nla_total_size ( sizeof ( struct ifla_vf_broadcast ) ) +
2016-11-15 17:39:03 +08:00
nla_total_size ( sizeof ( struct ifla_vf_vlan ) ) +
nla_total_size ( 0 ) + /* nest IFLA_VF_VLAN_LIST */
2016-09-22 17:11:15 +08:00
nla_total_size ( MAX_VLAN_LIST_LEN *
sizeof ( struct ifla_vf_vlan_info ) ) +
net-next:v4: Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool.
o min_tx_rate puts lower limit on the VF bandwidth. VF is guaranteed
to have a bandwidth of at least this value.
max_tx_rate puts cap on the VF bandwidth. VF can have a bandwidth
of up to this value.
o A new handler set_vf_rate for attr IFLA_VF_RATE has been introduced
which takes 4 arguments:
netdev, VF number, min_tx_rate, max_tx_rate
o ndo_set_vf_rate replaces ndo_set_vf_tx_rate handler.
o Drivers that currently implement ndo_set_vf_tx_rate should now call
ndo_set_vf_rate instead and reject attempt to set a minimum bandwidth
greater than 0 for IFLA_VF_TX_RATE when IFLA_VF_RATE is not yet
implemented by driver.
o If user enters only one of either min_tx_rate or max_tx_rate, then,
userland should read back the other value from driver and set both
for IFLA_VF_RATE.
Drivers that have not yet implemented IFLA_VF_RATE should always
return min_tx_rate as 0 when read from ip tool.
o If both IFLA_VF_TX_RATE and IFLA_VF_RATE options are specified, then
IFLA_VF_RATE should override.
o Idea is to have consistent display of rate values to user.
o Usage example: -
./ip link set p4p1 vf 0 rate 900
./ip link show p4p1
32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
DEFAULT qlen 1000
link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff
vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 900 (Mbps), max_tx_rate 900Mbps
vf 1 MAC f6:c6:7c:3f:3d:6c
vf 2 MAC 56:32:43:98:d7:71
vf 3 MAC d6:be:c3:b5:85:ff
vf 4 MAC ee:a9:9a:1e:19:14
vf 5 MAC 4a:d0:4c:07:52:18
vf 6 MAC 3a:76:44:93:62:f9
vf 7 MAC 82:e9:e7:e3:15:1a
./ip link set p4p1 vf 0 max_tx_rate 300 min_tx_rate 200
./ip link show p4p1
32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
DEFAULT qlen 1000
link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff
vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 300 (Mbps), max_tx_rate 300Mbps,
min_tx_rate 200Mbps
vf 1 MAC f6:c6:7c:3f:3d:6c
vf 2 MAC 56:32:43:98:d7:71
vf 3 MAC d6:be:c3:b5:85:ff
vf 4 MAC ee:a9:9a:1e:19:14
vf 5 MAC 4a:d0:4c:07:52:18
vf 6 MAC 3a:76:44:93:62:f9
vf 7 MAC 82:e9:e7:e3:15:1a
./ip link set p4p1 vf 0 max_tx_rate 600 rate 300
./ip link show p4p1
32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
DEFAULT qlen 1000
link/ether 00:0e:1e:08:b0:f brd ff:ff:ff:ff:ff:ff
vf 0 MAC 3e:a0:ca:bd:ae:5, tx rate 600 (Mbps), max_tx_rate 600Mbps,
min_tx_rate 200Mbps
vf 1 MAC f6:c6:7c:3f:3d:6c
vf 2 MAC 56:32:43:98:d7:71
vf 3 MAC d6:be:c3:b5:85:ff
vf 4 MAC ee:a9:9a:1e:19:14
vf 5 MAC 4a:d0:4c:07:52:18
vf 6 MAC 3a:76:44:93:62:f9
vf 7 MAC 82:e9:e7:e3:15:1a
Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-22 21:59:05 +08:00
nla_total_size ( sizeof ( struct ifla_vf_spoofchk ) ) +
2016-11-15 17:39:03 +08:00
nla_total_size ( sizeof ( struct ifla_vf_tx_rate ) ) +
2014-08-08 22:44:32 +08:00
nla_total_size ( sizeof ( struct ifla_vf_rate ) ) +
2015-03-31 02:35:23 +08:00
nla_total_size ( sizeof ( struct ifla_vf_link_state ) ) +
2015-06-15 22:59:07 +08:00
nla_total_size ( sizeof ( struct ifla_vf_rss_query_en ) ) +
2016-11-15 17:39:03 +08:00
nla_total_size ( 0 ) + /* nest IFLA_VF_STATS */
2015-06-15 22:59:07 +08:00
/* IFLA_VF_STATS_RX_PACKETS */
2016-04-25 16:25:14 +08:00
nla_total_size_64bit ( sizeof ( __u64 ) ) +
2015-06-15 22:59:07 +08:00
/* IFLA_VF_STATS_TX_PACKETS */
2016-04-25 16:25:14 +08:00
nla_total_size_64bit ( sizeof ( __u64 ) ) +
2015-06-15 22:59:07 +08:00
/* IFLA_VF_STATS_RX_BYTES */
2016-04-25 16:25:14 +08:00
nla_total_size_64bit ( sizeof ( __u64 ) ) +
2015-06-15 22:59:07 +08:00
/* IFLA_VF_STATS_TX_BYTES */
2016-04-25 16:25:14 +08:00
nla_total_size_64bit ( sizeof ( __u64 ) ) +
2015-06-15 22:59:07 +08:00
/* IFLA_VF_STATS_BROADCAST */
2016-04-25 16:25:14 +08:00
nla_total_size_64bit ( sizeof ( __u64 ) ) +
2015-06-15 22:59:07 +08:00
/* IFLA_VF_STATS_MULTICAST */
2016-04-25 16:25:14 +08:00
nla_total_size_64bit ( sizeof ( __u64 ) ) +
2017-07-17 18:47:07 +08:00
/* IFLA_VF_STATS_RX_DROPPED */
nla_total_size_64bit ( sizeof ( __u64 ) ) +
/* IFLA_VF_STATS_TX_DROPPED */
nla_total_size_64bit ( sizeof ( __u64 ) ) +
2015-08-28 14:57:55 +08:00
nla_total_size ( sizeof ( struct ifla_vf_trust ) ) ) ;
2010-05-16 16:05:45 +08:00
return size ;
} else
2010-02-10 09:44:05 +08:00
return 0 ;
}
2014-04-24 08:22:36 +08:00
static size_t rtnl_port_size ( const struct net_device * dev ,
u32 ext_filter_mask )
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
{
size_t port_size = nla_total_size ( 4 ) /* PORT_VF */
+ nla_total_size ( PORT_PROFILE_MAX ) /* PORT_PROFILE */
+ nla_total_size ( PORT_UUID_MAX ) /* PORT_INSTANCE_UUID */
+ nla_total_size ( PORT_UUID_MAX ) /* PORT_HOST_UUID */
+ nla_total_size ( 1 ) /* PROT_VDP_REQUEST */
+ nla_total_size ( 2 ) ; /* PORT_VDP_RESPONSE */
size_t vf_ports_size = nla_total_size ( sizeof ( struct nlattr ) ) ;
size_t vf_port_size = nla_total_size ( sizeof ( struct nlattr ) )
+ port_size ;
size_t port_self_size = nla_total_size ( sizeof ( struct nlattr ) )
+ port_size ;
2014-04-24 08:22:36 +08:00
if ( ! dev - > netdev_ops - > ndo_get_vf_port | | ! dev - > dev . parent | |
! ( ext_filter_mask & RTEXT_FILTER_VF ) )
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
return 0 ;
if ( dev_num_vf ( dev - > dev . parent ) )
return port_self_size + vf_ports_size +
vf_port_size * dev_num_vf ( dev - > dev . parent ) ;
else
return port_self_size ;
}
2017-04-19 03:36:58 +08:00
static size_t rtnl_xdp_size ( void )
2016-07-20 03:16:49 +08:00
{
2016-11-15 18:16:35 +08:00
size_t xdp_size = nla_total_size ( 0 ) + /* nest IFLA_XDP */
2017-06-16 08:29:09 +08:00
nla_total_size ( 1 ) + /* XDP_ATTACHED */
2018-07-12 11:36:41 +08:00
nla_total_size ( 4 ) + /* XDP_PROG_ID (or 1st mode) */
2018-07-12 11:36:38 +08:00
nla_total_size ( 4 ) ; /* XDP_<mode>_PROG_ID */
2016-07-20 03:16:49 +08:00
2017-04-19 03:36:58 +08:00
return xdp_size ;
2016-07-20 03:16:49 +08:00
}
2019-09-30 17:48:17 +08:00
static size_t rtnl_prop_list_size ( const struct net_device * dev )
{
struct netdev_name_node * name_node ;
size_t size ;
if ( list_empty ( & dev - > name_node - > list ) )
return 0 ;
size = nla_total_size ( 0 ) ;
list_for_each_entry ( name_node , & dev - > name_node - > list , list )
size + = nla_total_size ( ALTIFNAMSIZ ) ;
return size ;
}
2020-08-01 08:34:01 +08:00
static size_t rtnl_proto_down_size ( const struct net_device * dev )
{
size_t size = nla_total_size ( 1 ) ;
if ( dev - > proto_down_reason )
size + = nla_total_size ( 0 ) + nla_total_size ( 4 ) ;
return size ;
}
2012-02-22 05:54:48 +08:00
static noinline size_t if_nlmsg_size ( const struct net_device * dev ,
u32 ext_filter_mask )
2006-11-11 06:10:15 +08:00
{
return NLMSG_ALIGN ( sizeof ( struct ifinfomsg ) )
+ nla_total_size ( IFNAMSIZ ) /* IFLA_IFNAME */
2008-09-23 12:28:11 +08:00
+ nla_total_size ( IFALIASZ ) /* IFLA_IFALIAS */
2006-11-11 06:10:15 +08:00
+ nla_total_size ( IFNAMSIZ ) /* IFLA_QDISC */
2016-04-26 16:06:16 +08:00
+ nla_total_size_64bit ( sizeof ( struct rtnl_link_ifmap ) )
2006-11-11 06:10:15 +08:00
+ nla_total_size ( sizeof ( struct rtnl_link_stats ) )
2016-04-20 07:49:29 +08:00
+ nla_total_size_64bit ( sizeof ( struct rtnl_link_stats64 ) )
2006-11-11 06:10:15 +08:00
+ nla_total_size ( MAX_ADDR_LEN ) /* IFLA_ADDRESS */
+ nla_total_size ( MAX_ADDR_LEN ) /* IFLA_BROADCAST */
+ nla_total_size ( 4 ) /* IFLA_TXQLEN */
+ nla_total_size ( 4 ) /* IFLA_WEIGHT */
+ nla_total_size ( 4 ) /* IFLA_MTU */
+ nla_total_size ( 4 ) /* IFLA_LINK */
+ nla_total_size ( 4 ) /* IFLA_MASTER */
2012-12-28 07:49:39 +08:00
+ nla_total_size ( 1 ) /* IFLA_CARRIER */
2012-03-29 20:51:30 +08:00
+ nla_total_size ( 4 ) /* IFLA_PROMISCUITY */
2012-07-20 10:28:48 +08:00
+ nla_total_size ( 4 ) /* IFLA_NUM_TX_QUEUES */
+ nla_total_size ( 4 ) /* IFLA_NUM_RX_QUEUES */
2016-11-30 21:30:37 +08:00
+ nla_total_size ( 4 ) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size ( 4 ) /* IFLA_GSO_MAX_SIZE */
2022-01-05 18:48:38 +08:00
+ nla_total_size ( 4 ) /* IFLA_GRO_MAX_SIZE */
2006-11-11 06:10:15 +08:00
+ nla_total_size ( 1 ) /* IFLA_OPERSTATE */
2007-06-14 03:03:51 +08:00
+ nla_total_size ( 1 ) /* IFLA_LINKMODE */
2014-03-30 00:48:35 +08:00
+ nla_total_size ( 4 ) /* IFLA_CARRIER_CHANGES */
2015-01-15 22:11:16 +08:00
+ nla_total_size ( 4 ) /* IFLA_LINK_NETNSID */
2017-06-20 19:35:23 +08:00
+ nla_total_size ( 4 ) /* IFLA_GROUP */
2012-02-22 05:54:48 +08:00
+ nla_total_size ( ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0 ) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size ( dev , ext_filter_mask ) /* IFLA_VFINFO_LIST */
2014-04-24 08:22:36 +08:00
+ rtnl_port_size ( dev , ext_filter_mask ) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
2010-11-16 12:30:14 +08:00
+ rtnl_link_get_size ( dev ) /* IFLA_LINKINFO */
2015-10-20 00:23:28 +08:00
+ rtnl_link_get_af_size ( dev , ext_filter_mask ) /* IFLA_AF_SPEC */
2014-11-28 21:34:18 +08:00
+ nla_total_size ( MAX_PHYS_ITEM_ID_LEN ) /* IFLA_PHYS_PORT_ID */
2015-07-15 04:43:20 +08:00
+ nla_total_size ( MAX_PHYS_ITEM_ID_LEN ) /* IFLA_PHYS_SWITCH_ID */
2016-04-01 00:10:31 +08:00
+ nla_total_size ( IFNAMSIZ ) /* IFLA_PHYS_PORT_NAME */
2017-04-19 03:36:58 +08:00
+ rtnl_xdp_size ( ) /* IFLA_XDP */
2017-05-27 22:14:34 +08:00
+ nla_total_size ( 4 ) /* IFLA_EVENT */
2017-10-03 19:53:23 +08:00
+ nla_total_size ( 4 ) /* IFLA_NEW_NETNSID */
2018-01-25 22:01:39 +08:00
+ nla_total_size ( 4 ) /* IFLA_NEW_IFINDEX */
2020-08-01 08:34:01 +08:00
+ rtnl_proto_down_size ( dev ) /* proto down */
2018-09-05 03:53:53 +08:00
+ nla_total_size ( 4 ) /* IFLA_TARGET_NETNSID */
2018-01-19 01:59:13 +08:00
+ nla_total_size ( 4 ) /* IFLA_CARRIER_UP_COUNT */
+ nla_total_size ( 4 ) /* IFLA_CARRIER_DOWN_COUNT */
2018-07-28 04:43:22 +08:00
+ nla_total_size ( 4 ) /* IFLA_MIN_MTU */
+ nla_total_size ( 4 ) /* IFLA_MAX_MTU */
2019-09-30 17:48:17 +08:00
+ rtnl_prop_list_size ( dev )
2019-12-11 17:58:14 +08:00
+ nla_total_size ( MAX_ADDR_LEN ) /* IFLA_PERM_ADDRESS */
2017-11-03 03:04:38 +08:00
+ 0 ;
2006-11-11 06:10:15 +08:00
}
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
static int rtnl_vf_ports_fill ( struct sk_buff * skb , struct net_device * dev )
{
struct nlattr * vf_ports ;
struct nlattr * vf_port ;
int vf ;
int err ;
2019-04-26 17:13:06 +08:00
vf_ports = nla_nest_start_noflag ( skb , IFLA_VF_PORTS ) ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
if ( ! vf_ports )
return - EMSGSIZE ;
for ( vf = 0 ; vf < dev_num_vf ( dev - > dev . parent ) ; vf + + ) {
2019-04-26 17:13:06 +08:00
vf_port = nla_nest_start_noflag ( skb , IFLA_VF_PORT ) ;
2010-05-28 18:42:18 +08:00
if ( ! vf_port )
goto nla_put_failure ;
2012-04-02 08:12:00 +08:00
if ( nla_put_u32 ( skb , IFLA_PORT_VF , vf ) )
goto nla_put_failure ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
err = dev - > netdev_ops - > ndo_get_vf_port ( dev , vf , skb ) ;
2010-05-28 18:42:18 +08:00
if ( err = = - EMSGSIZE )
goto nla_put_failure ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
if ( err ) {
nla_nest_cancel ( skb , vf_port ) ;
continue ;
}
nla_nest_end ( skb , vf_port ) ;
}
nla_nest_end ( skb , vf_ports ) ;
return 0 ;
2010-05-28 18:42:18 +08:00
nla_put_failure :
nla_nest_cancel ( skb , vf_ports ) ;
return - EMSGSIZE ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
}
static int rtnl_port_self_fill ( struct sk_buff * skb , struct net_device * dev )
{
struct nlattr * port_self ;
int err ;
2019-04-26 17:13:06 +08:00
port_self = nla_nest_start_noflag ( skb , IFLA_PORT_SELF ) ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
if ( ! port_self )
return - EMSGSIZE ;
err = dev - > netdev_ops - > ndo_get_vf_port ( dev , PORT_SELF_VF , skb ) ;
if ( err ) {
nla_nest_cancel ( skb , port_self ) ;
2010-05-28 18:42:18 +08:00
return ( err = = - EMSGSIZE ) ? err : 0 ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
}
nla_nest_end ( skb , port_self ) ;
return 0 ;
}
2014-04-24 08:22:36 +08:00
static int rtnl_port_fill ( struct sk_buff * skb , struct net_device * dev ,
u32 ext_filter_mask )
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
{
int err ;
2014-04-24 08:22:36 +08:00
if ( ! dev - > netdev_ops - > ndo_get_vf_port | | ! dev - > dev . parent | |
! ( ext_filter_mask & RTEXT_FILTER_VF ) )
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
return 0 ;
err = rtnl_port_self_fill ( skb , dev ) ;
if ( err )
return err ;
if ( dev_num_vf ( dev - > dev . parent ) ) {
err = rtnl_vf_ports_fill ( skb , dev ) ;
if ( err )
return err ;
}
return 0 ;
}
2013-07-30 00:16:50 +08:00
static int rtnl_phys_port_id_fill ( struct sk_buff * skb , struct net_device * dev )
{
int err ;
2014-11-28 21:34:16 +08:00
struct netdev_phys_item_id ppid ;
2013-07-30 00:16:50 +08:00
err = dev_get_phys_port_id ( dev , & ppid ) ;
if ( err ) {
if ( err = = - EOPNOTSUPP )
return 0 ;
return err ;
}
if ( nla_put ( skb , IFLA_PHYS_PORT_ID , ppid . id_len , ppid . id ) )
return - EMSGSIZE ;
return 0 ;
}
2015-03-18 10:23:15 +08:00
static int rtnl_phys_port_name_fill ( struct sk_buff * skb , struct net_device * dev )
{
char name [ IFNAMSIZ ] ;
int err ;
err = dev_get_phys_port_name ( dev , name , sizeof ( name ) ) ;
if ( err ) {
if ( err = = - EOPNOTSUPP )
return 0 ;
return err ;
}
2017-05-04 22:48:58 +08:00
if ( nla_put_string ( skb , IFLA_PHYS_PORT_NAME , name ) )
2015-03-18 10:23:15 +08:00
return - EMSGSIZE ;
return 0 ;
}
2014-11-28 21:34:18 +08:00
static int rtnl_phys_switch_id_fill ( struct sk_buff * skb , struct net_device * dev )
{
2019-02-07 01:45:46 +08:00
struct netdev_phys_item_id ppid = { } ;
2014-11-28 21:34:18 +08:00
int err ;
2019-02-07 01:45:46 +08:00
err = dev_get_port_parent_id ( dev , & ppid , false ) ;
2014-11-28 21:34:18 +08:00
if ( err ) {
if ( err = = - EOPNOTSUPP )
return 0 ;
return err ;
}
2019-02-07 01:45:46 +08:00
if ( nla_put ( skb , IFLA_PHYS_SWITCH_ID , ppid . id_len , ppid . id ) )
2014-11-28 21:34:18 +08:00
return - EMSGSIZE ;
return 0 ;
}
2015-11-17 21:16:52 +08:00
static noinline_for_stack int rtnl_fill_stats ( struct sk_buff * skb ,
struct net_device * dev )
{
2016-04-16 11:36:25 +08:00
struct rtnl_link_stats64 * sp ;
2015-11-17 21:16:52 +08:00
struct nlattr * attr ;
2016-04-20 02:30:10 +08:00
2016-04-22 00:58:25 +08:00
attr = nla_reserve_64bit ( skb , IFLA_STATS64 ,
sizeof ( struct rtnl_link_stats64 ) , IFLA_PAD ) ;
2015-11-17 21:16:52 +08:00
if ( ! attr )
return - EMSGSIZE ;
2016-04-16 11:36:25 +08:00
sp = nla_data ( attr ) ;
dev_get_stats ( dev , sp ) ;
2015-11-17 21:16:52 +08:00
2016-04-16 11:36:25 +08:00
attr = nla_reserve ( skb , IFLA_STATS ,
sizeof ( struct rtnl_link_stats ) ) ;
2015-11-17 21:16:52 +08:00
if ( ! attr )
return - EMSGSIZE ;
2016-04-16 11:36:25 +08:00
copy_rtnl_link_stats ( nla_data ( attr ) , sp ) ;
2015-11-17 21:16:52 +08:00
return 0 ;
}
static noinline_for_stack int rtnl_fill_vfinfo ( struct sk_buff * skb ,
struct net_device * dev ,
int vfs_num ,
struct nlattr * vfinfo )
{
struct ifla_vf_rss_query_en vf_rss_query_en ;
2016-09-22 17:11:15 +08:00
struct nlattr * vf , * vfstats , * vfvlanlist ;
2015-11-17 21:16:52 +08:00
struct ifla_vf_link_state vf_linkstate ;
2016-09-22 17:11:15 +08:00
struct ifla_vf_vlan_info vf_vlan_info ;
2015-11-17 21:16:52 +08:00
struct ifla_vf_spoofchk vf_spoofchk ;
struct ifla_vf_tx_rate vf_tx_rate ;
struct ifla_vf_stats vf_stats ;
struct ifla_vf_trust vf_trust ;
struct ifla_vf_vlan vf_vlan ;
struct ifla_vf_rate vf_rate ;
struct ifla_vf_mac vf_mac ;
ipoib: show VF broadcast address
in IPoIB case we can't see a VF broadcast address for but
can see for PF
Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 MAC 14:80:00:00:66:fe, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off
v1->v2: add the IFLA_VF_BROADCAST constant
v2->v3: put IFLA_VF_BROADCAST at the end
to avoid KABI breakage and set NLA_REJECT
dev_setlink
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-17 16:53:41 +08:00
struct ifla_vf_broadcast vf_broadcast ;
2015-11-17 21:16:52 +08:00
struct ifla_vf_info ivi ;
2019-11-06 21:30:07 +08:00
struct ifla_vf_guid node_guid ;
struct ifla_vf_guid port_guid ;
2015-11-17 21:16:52 +08:00
2017-06-08 02:00:33 +08:00
memset ( & ivi , 0 , sizeof ( ivi ) ) ;
2015-11-17 21:16:52 +08:00
/* Not all SR-IOV capable drivers support the
* spoofcheck and " RSS query enable " query . Preset to
* - 1 so the user space tool can detect that the driver
* didn ' t report anything .
*/
ivi . spoofchk = - 1 ;
ivi . rss_query_en = - 1 ;
ivi . trusted = - 1 ;
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
*/
ivi . linkstate = 0 ;
2016-09-22 17:11:15 +08:00
/* VLAN Protocol by default is 802.1Q */
ivi . vlan_proto = htons ( ETH_P_8021Q ) ;
2015-11-17 21:16:52 +08:00
if ( dev - > netdev_ops - > ndo_get_vf_config ( dev , vfs_num , & ivi ) )
return 0 ;
2016-10-13 16:45:28 +08:00
memset ( & vf_vlan_info , 0 , sizeof ( vf_vlan_info ) ) ;
2020-01-30 20:59:49 +08:00
memset ( & node_guid , 0 , sizeof ( node_guid ) ) ;
memset ( & port_guid , 0 , sizeof ( port_guid ) ) ;
2016-10-13 16:45:28 +08:00
2015-11-17 21:16:52 +08:00
vf_mac . vf =
vf_vlan . vf =
2016-09-22 17:11:15 +08:00
vf_vlan_info . vf =
2015-11-17 21:16:52 +08:00
vf_rate . vf =
vf_tx_rate . vf =
vf_spoofchk . vf =
vf_linkstate . vf =
vf_rss_query_en . vf =
2019-12-03 23:43:36 +08:00
vf_trust . vf =
node_guid . vf =
port_guid . vf = ivi . vf ;
2015-11-17 21:16:52 +08:00
memcpy ( vf_mac . mac , ivi . mac , sizeof ( ivi . mac ) ) ;
ipoib: show VF broadcast address
in IPoIB case we can't see a VF broadcast address for but
can see for PF
Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 MAC 14:80:00:00:66:fe, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off
v1->v2: add the IFLA_VF_BROADCAST constant
v2->v3: put IFLA_VF_BROADCAST at the end
to avoid KABI breakage and set NLA_REJECT
dev_setlink
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-17 16:53:41 +08:00
memcpy ( vf_broadcast . broadcast , dev - > broadcast , dev - > addr_len ) ;
2015-11-17 21:16:52 +08:00
vf_vlan . vlan = ivi . vlan ;
vf_vlan . qos = ivi . qos ;
2016-09-22 17:11:15 +08:00
vf_vlan_info . vlan = ivi . vlan ;
vf_vlan_info . qos = ivi . qos ;
vf_vlan_info . vlan_proto = ivi . vlan_proto ;
2015-11-17 21:16:52 +08:00
vf_tx_rate . rate = ivi . max_tx_rate ;
vf_rate . min_tx_rate = ivi . min_tx_rate ;
vf_rate . max_tx_rate = ivi . max_tx_rate ;
vf_spoofchk . setting = ivi . spoofchk ;
vf_linkstate . link_state = ivi . linkstate ;
vf_rss_query_en . setting = ivi . rss_query_en ;
vf_trust . setting = ivi . trusted ;
2019-04-26 17:13:06 +08:00
vf = nla_nest_start_noflag ( skb , IFLA_VF_INFO ) ;
2016-09-22 17:11:15 +08:00
if ( ! vf )
goto nla_put_vfinfo_failure ;
2015-11-17 21:16:52 +08:00
if ( nla_put ( skb , IFLA_VF_MAC , sizeof ( vf_mac ) , & vf_mac ) | |
ipoib: show VF broadcast address
in IPoIB case we can't see a VF broadcast address for but
can see for PF
Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 MAC 14:80:00:00:66:fe, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off
v1->v2: add the IFLA_VF_BROADCAST constant
v2->v3: put IFLA_VF_BROADCAST at the end
to avoid KABI breakage and set NLA_REJECT
dev_setlink
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-17 16:53:41 +08:00
nla_put ( skb , IFLA_VF_BROADCAST , sizeof ( vf_broadcast ) , & vf_broadcast ) | |
2015-11-17 21:16:52 +08:00
nla_put ( skb , IFLA_VF_VLAN , sizeof ( vf_vlan ) , & vf_vlan ) | |
nla_put ( skb , IFLA_VF_RATE , sizeof ( vf_rate ) ,
& vf_rate ) | |
nla_put ( skb , IFLA_VF_TX_RATE , sizeof ( vf_tx_rate ) ,
& vf_tx_rate ) | |
nla_put ( skb , IFLA_VF_SPOOFCHK , sizeof ( vf_spoofchk ) ,
& vf_spoofchk ) | |
nla_put ( skb , IFLA_VF_LINK_STATE , sizeof ( vf_linkstate ) ,
& vf_linkstate ) | |
nla_put ( skb , IFLA_VF_RSS_QUERY_EN ,
sizeof ( vf_rss_query_en ) ,
& vf_rss_query_en ) | |
nla_put ( skb , IFLA_VF_TRUST ,
sizeof ( vf_trust ) , & vf_trust ) )
2016-09-22 17:11:15 +08:00
goto nla_put_vf_failure ;
2019-11-06 21:30:07 +08:00
if ( dev - > netdev_ops - > ndo_get_vf_guid & &
! dev - > netdev_ops - > ndo_get_vf_guid ( dev , vfs_num , & node_guid ,
& port_guid ) ) {
if ( nla_put ( skb , IFLA_VF_IB_NODE_GUID , sizeof ( node_guid ) ,
& node_guid ) | |
nla_put ( skb , IFLA_VF_IB_PORT_GUID , sizeof ( port_guid ) ,
& port_guid ) )
goto nla_put_vf_failure ;
}
2019-04-26 17:13:06 +08:00
vfvlanlist = nla_nest_start_noflag ( skb , IFLA_VF_VLAN_LIST ) ;
2016-09-22 17:11:15 +08:00
if ( ! vfvlanlist )
goto nla_put_vf_failure ;
if ( nla_put ( skb , IFLA_VF_VLAN_INFO , sizeof ( vf_vlan_info ) ,
& vf_vlan_info ) ) {
nla_nest_cancel ( skb , vfvlanlist ) ;
goto nla_put_vf_failure ;
}
nla_nest_end ( skb , vfvlanlist ) ;
2015-11-17 21:16:52 +08:00
memset ( & vf_stats , 0 , sizeof ( vf_stats ) ) ;
if ( dev - > netdev_ops - > ndo_get_vf_stats )
dev - > netdev_ops - > ndo_get_vf_stats ( dev , vfs_num ,
& vf_stats ) ;
2019-04-26 17:13:06 +08:00
vfstats = nla_nest_start_noflag ( skb , IFLA_VF_STATS ) ;
2016-09-22 17:11:15 +08:00
if ( ! vfstats )
goto nla_put_vf_failure ;
2016-04-25 16:25:14 +08:00
if ( nla_put_u64_64bit ( skb , IFLA_VF_STATS_RX_PACKETS ,
vf_stats . rx_packets , IFLA_VF_STATS_PAD ) | |
nla_put_u64_64bit ( skb , IFLA_VF_STATS_TX_PACKETS ,
vf_stats . tx_packets , IFLA_VF_STATS_PAD ) | |
nla_put_u64_64bit ( skb , IFLA_VF_STATS_RX_BYTES ,
vf_stats . rx_bytes , IFLA_VF_STATS_PAD ) | |
nla_put_u64_64bit ( skb , IFLA_VF_STATS_TX_BYTES ,
vf_stats . tx_bytes , IFLA_VF_STATS_PAD ) | |
nla_put_u64_64bit ( skb , IFLA_VF_STATS_BROADCAST ,
vf_stats . broadcast , IFLA_VF_STATS_PAD ) | |
nla_put_u64_64bit ( skb , IFLA_VF_STATS_MULTICAST ,
2017-07-17 18:47:07 +08:00
vf_stats . multicast , IFLA_VF_STATS_PAD ) | |
nla_put_u64_64bit ( skb , IFLA_VF_STATS_RX_DROPPED ,
vf_stats . rx_dropped , IFLA_VF_STATS_PAD ) | |
nla_put_u64_64bit ( skb , IFLA_VF_STATS_TX_DROPPED ,
vf_stats . tx_dropped , IFLA_VF_STATS_PAD ) ) {
2016-09-22 17:11:15 +08:00
nla_nest_cancel ( skb , vfstats ) ;
goto nla_put_vf_failure ;
}
2015-11-17 21:16:52 +08:00
nla_nest_end ( skb , vfstats ) ;
nla_nest_end ( skb , vf ) ;
return 0 ;
2016-09-22 17:11:15 +08:00
nla_put_vf_failure :
nla_nest_cancel ( skb , vf ) ;
nla_put_vfinfo_failure :
nla_nest_cancel ( skb , vfinfo ) ;
return - EMSGSIZE ;
2015-11-17 21:16:52 +08:00
}
2017-09-26 19:58:41 +08:00
static noinline_for_stack int rtnl_fill_vf ( struct sk_buff * skb ,
struct net_device * dev ,
u32 ext_filter_mask )
{
struct nlattr * vfinfo ;
int i , num_vfs ;
if ( ! dev - > dev . parent | | ( ( ext_filter_mask & RTEXT_FILTER_VF ) = = 0 ) )
return 0 ;
num_vfs = dev_num_vf ( dev - > dev . parent ) ;
if ( nla_put_u32 ( skb , IFLA_NUM_VF , num_vfs ) )
return - EMSGSIZE ;
if ( ! dev - > netdev_ops - > ndo_get_vf_config )
return 0 ;
2019-04-26 17:13:06 +08:00
vfinfo = nla_nest_start_noflag ( skb , IFLA_VFINFO_LIST ) ;
2017-09-26 19:58:41 +08:00
if ( ! vfinfo )
return - EMSGSIZE ;
for ( i = 0 ; i < num_vfs ; i + + ) {
if ( rtnl_fill_vfinfo ( skb , dev , i , vfinfo ) )
return - EMSGSIZE ;
}
nla_nest_end ( skb , vfinfo ) ;
return 0 ;
}
2015-11-17 21:16:52 +08:00
static int rtnl_fill_link_ifmap ( struct sk_buff * skb , struct net_device * dev )
{
2016-05-04 04:46:24 +08:00
struct rtnl_link_ifmap map ;
memset ( & map , 0 , sizeof ( map ) ) ;
map . mem_start = dev - > mem_start ;
map . mem_end = dev - > mem_end ;
map . base_addr = dev - > base_addr ;
map . irq = dev - > irq ;
map . dma = dev - > dma ;
map . port = dev - > if_port ;
2016-04-26 16:06:16 +08:00
if ( nla_put_64bit ( skb , IFLA_MAP , sizeof ( map ) , & map , IFLA_PAD ) )
2015-11-17 21:16:52 +08:00
return - EMSGSIZE ;
return 0 ;
}
2018-07-12 11:36:41 +08:00
static u32 rtnl_xdp_prog_skb ( struct net_device * dev )
xdp: refine xdp api with regards to generic xdp
While working on the iproute2 generic XDP frontend, I noticed that
as of right now it's possible to have native *and* generic XDP
programs loaded both at the same time for the case when a driver
supports native XDP.
The intended model for generic XDP from b5cdae3291f7 ("net: Generic
XDP") is, however, that only one out of the two can be present at
once which is also indicated as such in the XDP netlink dump part.
The main rationale for generic XDP is to ease accessibility (in
case a driver does not yet have XDP support) and to generically
provide a semantical model as an example for driver developers
wanting to add XDP support. The generic XDP option for an XDP
aware driver can still be useful for comparing and testing both
implementations.
However, it is not intended to have a second XDP processing stage
or layer with exactly the same functionality of the first native
stage. Only reason could be to have a partial fallback for future
XDP features that are not supported yet in the native implementation
and we probably also shouldn't strive for such fallback and instead
encourage native feature support in the first place. Given there's
currently no such fallback issue or use case, lets not go there yet
if we don't need to.
Therefore, change semantics for loading XDP and bail out if the
user tries to load a generic XDP program when a native one is
present and vice versa. Another alternative to bailing out would
be to handle the transition from one flavor to another gracefully,
but that would require to bring the device down, exchange both
types of programs, and bring it up again in order to avoid a tiny
window where a packet could hit both hooks. Given this complicates
the logic for just a debugging feature in the native case, I went
with the simpler variant.
For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7
and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all
or just a subset of flags that were used for loading the XDP prog
is suboptimal in the long run since not all flags are useful for
dumping and if we start to reuse the same flag definitions for
load and dump, then we'll waste bit space. What we really just
want is to dump the mode for now.
Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0),
a program is running at the native driver layer (1). Thus, add a
mode that says that a program is running at generic XDP layer (2).
Applications will handle this fine in that older binaries will
just indicate that something is attached at XDP layer, effectively
this is similar to IFLA_XDP_FLAGS attr that we would have had
modulo the redundancy.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-12 07:04:46 +08:00
{
2017-06-16 08:29:09 +08:00
const struct bpf_prog * generic_xdp_prog ;
xdp: refine xdp api with regards to generic xdp
While working on the iproute2 generic XDP frontend, I noticed that
as of right now it's possible to have native *and* generic XDP
programs loaded both at the same time for the case when a driver
supports native XDP.
The intended model for generic XDP from b5cdae3291f7 ("net: Generic
XDP") is, however, that only one out of the two can be present at
once which is also indicated as such in the XDP netlink dump part.
The main rationale for generic XDP is to ease accessibility (in
case a driver does not yet have XDP support) and to generically
provide a semantical model as an example for driver developers
wanting to add XDP support. The generic XDP option for an XDP
aware driver can still be useful for comparing and testing both
implementations.
However, it is not intended to have a second XDP processing stage
or layer with exactly the same functionality of the first native
stage. Only reason could be to have a partial fallback for future
XDP features that are not supported yet in the native implementation
and we probably also shouldn't strive for such fallback and instead
encourage native feature support in the first place. Given there's
currently no such fallback issue or use case, lets not go there yet
if we don't need to.
Therefore, change semantics for loading XDP and bail out if the
user tries to load a generic XDP program when a native one is
present and vice versa. Another alternative to bailing out would
be to handle the transition from one flavor to another gracefully,
but that would require to bring the device down, exchange both
types of programs, and bring it up again in order to avoid a tiny
window where a packet could hit both hooks. Given this complicates
the logic for just a debugging feature in the native case, I went
with the simpler variant.
For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7
and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all
or just a subset of flags that were used for loading the XDP prog
is suboptimal in the long run since not all flags are useful for
dumping and if we start to reuse the same flag definitions for
load and dump, then we'll waste bit space. What we really just
want is to dump the mode for now.
Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0),
a program is running at the native driver layer (1). Thus, add a
mode that says that a program is running at generic XDP layer (2).
Applications will handle this fine in that older binaries will
just indicate that something is attached at XDP layer, effectively
this is similar to IFLA_XDP_FLAGS attr that we would have had
modulo the redundancy.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-12 07:04:46 +08:00
ASSERT_RTNL ( ) ;
2017-06-16 08:29:09 +08:00
generic_xdp_prog = rtnl_dereference ( dev - > xdp_prog ) ;
2018-07-12 11:36:41 +08:00
if ( ! generic_xdp_prog )
return 0 ;
return generic_xdp_prog - > aux - > id ;
}
xdp: refine xdp api with regards to generic xdp
While working on the iproute2 generic XDP frontend, I noticed that
as of right now it's possible to have native *and* generic XDP
programs loaded both at the same time for the case when a driver
supports native XDP.
The intended model for generic XDP from b5cdae3291f7 ("net: Generic
XDP") is, however, that only one out of the two can be present at
once which is also indicated as such in the XDP netlink dump part.
The main rationale for generic XDP is to ease accessibility (in
case a driver does not yet have XDP support) and to generically
provide a semantical model as an example for driver developers
wanting to add XDP support. The generic XDP option for an XDP
aware driver can still be useful for comparing and testing both
implementations.
However, it is not intended to have a second XDP processing stage
or layer with exactly the same functionality of the first native
stage. Only reason could be to have a partial fallback for future
XDP features that are not supported yet in the native implementation
and we probably also shouldn't strive for such fallback and instead
encourage native feature support in the first place. Given there's
currently no such fallback issue or use case, lets not go there yet
if we don't need to.
Therefore, change semantics for loading XDP and bail out if the
user tries to load a generic XDP program when a native one is
present and vice versa. Another alternative to bailing out would
be to handle the transition from one flavor to another gracefully,
but that would require to bring the device down, exchange both
types of programs, and bring it up again in order to avoid a tiny
window where a packet could hit both hooks. Given this complicates
the logic for just a debugging feature in the native case, I went
with the simpler variant.
For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7
and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all
or just a subset of flags that were used for loading the XDP prog
is suboptimal in the long run since not all flags are useful for
dumping and if we start to reuse the same flag definitions for
load and dump, then we'll waste bit space. What we really just
want is to dump the mode for now.
Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0),
a program is running at the native driver layer (1). Thus, add a
mode that says that a program is running at generic XDP layer (2).
Applications will handle this fine in that older binaries will
just indicate that something is attached at XDP layer, effectively
this is similar to IFLA_XDP_FLAGS attr that we would have had
modulo the redundancy.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-12 07:04:46 +08:00
2018-07-12 11:36:41 +08:00
static u32 rtnl_xdp_prog_drv ( struct net_device * dev )
{
2020-07-22 14:45:55 +08:00
return dev_xdp_prog_id ( dev , XDP_MODE_DRV ) ;
2018-07-12 11:36:41 +08:00
}
2017-12-02 07:08:55 +08:00
2018-07-12 11:36:41 +08:00
static u32 rtnl_xdp_prog_hw ( struct net_device * dev )
{
2020-07-22 14:45:55 +08:00
return dev_xdp_prog_id ( dev , XDP_MODE_HW ) ;
2018-07-12 11:36:41 +08:00
}
static int rtnl_xdp_report_one ( struct sk_buff * skb , struct net_device * dev ,
u32 * prog_id , u8 * mode , u8 tgt_mode , u32 attr ,
u32 ( * get_prog_id ) ( struct net_device * dev ) )
{
u32 curr_id ;
int err ;
curr_id = get_prog_id ( dev ) ;
if ( ! curr_id )
return 0 ;
* prog_id = curr_id ;
err = nla_put_u32 ( skb , attr , curr_id ) ;
if ( err )
return err ;
xdp: refine xdp api with regards to generic xdp
While working on the iproute2 generic XDP frontend, I noticed that
as of right now it's possible to have native *and* generic XDP
programs loaded both at the same time for the case when a driver
supports native XDP.
The intended model for generic XDP from b5cdae3291f7 ("net: Generic
XDP") is, however, that only one out of the two can be present at
once which is also indicated as such in the XDP netlink dump part.
The main rationale for generic XDP is to ease accessibility (in
case a driver does not yet have XDP support) and to generically
provide a semantical model as an example for driver developers
wanting to add XDP support. The generic XDP option for an XDP
aware driver can still be useful for comparing and testing both
implementations.
However, it is not intended to have a second XDP processing stage
or layer with exactly the same functionality of the first native
stage. Only reason could be to have a partial fallback for future
XDP features that are not supported yet in the native implementation
and we probably also shouldn't strive for such fallback and instead
encourage native feature support in the first place. Given there's
currently no such fallback issue or use case, lets not go there yet
if we don't need to.
Therefore, change semantics for loading XDP and bail out if the
user tries to load a generic XDP program when a native one is
present and vice versa. Another alternative to bailing out would
be to handle the transition from one flavor to another gracefully,
but that would require to bring the device down, exchange both
types of programs, and bring it up again in order to avoid a tiny
window where a packet could hit both hooks. Given this complicates
the logic for just a debugging feature in the native case, I went
with the simpler variant.
For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7
and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all
or just a subset of flags that were used for loading the XDP prog
is suboptimal in the long run since not all flags are useful for
dumping and if we start to reuse the same flag definitions for
load and dump, then we'll waste bit space. What we really just
want is to dump the mode for now.
Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0),
a program is running at the native driver layer (1). Thus, add a
mode that says that a program is running at generic XDP layer (2).
Applications will handle this fine in that older binaries will
just indicate that something is attached at XDP layer, effectively
this is similar to IFLA_XDP_FLAGS attr that we would have had
modulo the redundancy.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-12 07:04:46 +08:00
2018-07-12 11:36:41 +08:00
if ( * mode ! = XDP_ATTACHED_NONE )
* mode = XDP_ATTACHED_MULTI ;
else
* mode = tgt_mode ;
2017-12-02 07:08:55 +08:00
2018-07-12 11:36:41 +08:00
return 0 ;
xdp: refine xdp api with regards to generic xdp
While working on the iproute2 generic XDP frontend, I noticed that
as of right now it's possible to have native *and* generic XDP
programs loaded both at the same time for the case when a driver
supports native XDP.
The intended model for generic XDP from b5cdae3291f7 ("net: Generic
XDP") is, however, that only one out of the two can be present at
once which is also indicated as such in the XDP netlink dump part.
The main rationale for generic XDP is to ease accessibility (in
case a driver does not yet have XDP support) and to generically
provide a semantical model as an example for driver developers
wanting to add XDP support. The generic XDP option for an XDP
aware driver can still be useful for comparing and testing both
implementations.
However, it is not intended to have a second XDP processing stage
or layer with exactly the same functionality of the first native
stage. Only reason could be to have a partial fallback for future
XDP features that are not supported yet in the native implementation
and we probably also shouldn't strive for such fallback and instead
encourage native feature support in the first place. Given there's
currently no such fallback issue or use case, lets not go there yet
if we don't need to.
Therefore, change semantics for loading XDP and bail out if the
user tries to load a generic XDP program when a native one is
present and vice versa. Another alternative to bailing out would
be to handle the transition from one flavor to another gracefully,
but that would require to bring the device down, exchange both
types of programs, and bring it up again in order to avoid a tiny
window where a packet could hit both hooks. Given this complicates
the logic for just a debugging feature in the native case, I went
with the simpler variant.
For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7
and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all
or just a subset of flags that were used for loading the XDP prog
is suboptimal in the long run since not all flags are useful for
dumping and if we start to reuse the same flag definitions for
load and dump, then we'll waste bit space. What we really just
want is to dump the mode for now.
Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0),
a program is running at the native driver layer (1). Thus, add a
mode that says that a program is running at generic XDP layer (2).
Applications will handle this fine in that older binaries will
just indicate that something is attached at XDP layer, effectively
this is similar to IFLA_XDP_FLAGS attr that we would have had
modulo the redundancy.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-12 07:04:46 +08:00
}
2016-07-20 03:16:49 +08:00
static int rtnl_xdp_fill ( struct sk_buff * skb , struct net_device * dev )
{
struct nlattr * xdp ;
2017-06-16 08:29:09 +08:00
u32 prog_id ;
2016-07-20 03:16:49 +08:00
int err ;
2018-07-12 11:36:38 +08:00
u8 mode ;
2016-07-20 03:16:49 +08:00
2019-04-26 17:13:06 +08:00
xdp = nla_nest_start_noflag ( skb , IFLA_XDP ) ;
2016-07-20 03:16:49 +08:00
if ( ! xdp )
return - EMSGSIZE ;
xdp: refine xdp api with regards to generic xdp
While working on the iproute2 generic XDP frontend, I noticed that
as of right now it's possible to have native *and* generic XDP
programs loaded both at the same time for the case when a driver
supports native XDP.
The intended model for generic XDP from b5cdae3291f7 ("net: Generic
XDP") is, however, that only one out of the two can be present at
once which is also indicated as such in the XDP netlink dump part.
The main rationale for generic XDP is to ease accessibility (in
case a driver does not yet have XDP support) and to generically
provide a semantical model as an example for driver developers
wanting to add XDP support. The generic XDP option for an XDP
aware driver can still be useful for comparing and testing both
implementations.
However, it is not intended to have a second XDP processing stage
or layer with exactly the same functionality of the first native
stage. Only reason could be to have a partial fallback for future
XDP features that are not supported yet in the native implementation
and we probably also shouldn't strive for such fallback and instead
encourage native feature support in the first place. Given there's
currently no such fallback issue or use case, lets not go there yet
if we don't need to.
Therefore, change semantics for loading XDP and bail out if the
user tries to load a generic XDP program when a native one is
present and vice versa. Another alternative to bailing out would
be to handle the transition from one flavor to another gracefully,
but that would require to bring the device down, exchange both
types of programs, and bring it up again in order to avoid a tiny
window where a packet could hit both hooks. Given this complicates
the logic for just a debugging feature in the native case, I went
with the simpler variant.
For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7
and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all
or just a subset of flags that were used for loading the XDP prog
is suboptimal in the long run since not all flags are useful for
dumping and if we start to reuse the same flag definitions for
load and dump, then we'll waste bit space. What we really just
want is to dump the mode for now.
Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0),
a program is running at the native driver layer (1). Thus, add a
mode that says that a program is running at generic XDP layer (2).
Applications will handle this fine in that older binaries will
just indicate that something is attached at XDP layer, effectively
this is similar to IFLA_XDP_FLAGS attr that we would have had
modulo the redundancy.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-12 07:04:46 +08:00
2018-07-12 11:36:41 +08:00
prog_id = 0 ;
mode = XDP_ATTACHED_NONE ;
2018-07-17 10:08:50 +08:00
err = rtnl_xdp_report_one ( skb , dev , & prog_id , & mode , XDP_ATTACHED_SKB ,
IFLA_XDP_SKB_PROG_ID , rtnl_xdp_prog_skb ) ;
if ( err )
2018-07-12 11:36:41 +08:00
goto err_cancel ;
2018-07-17 10:08:50 +08:00
err = rtnl_xdp_report_one ( skb , dev , & prog_id , & mode , XDP_ATTACHED_DRV ,
IFLA_XDP_DRV_PROG_ID , rtnl_xdp_prog_drv ) ;
if ( err )
2018-07-12 11:36:41 +08:00
goto err_cancel ;
2018-07-17 10:08:50 +08:00
err = rtnl_xdp_report_one ( skb , dev , & prog_id , & mode , XDP_ATTACHED_HW ,
IFLA_XDP_HW_PROG_ID , rtnl_xdp_prog_hw ) ;
if ( err )
2018-07-12 11:36:41 +08:00
goto err_cancel ;
2018-07-12 11:36:38 +08:00
err = nla_put_u8 ( skb , IFLA_XDP_ATTACHED , mode ) ;
2016-07-20 03:16:49 +08:00
if ( err )
goto err_cancel ;
2018-07-12 11:36:41 +08:00
if ( prog_id & & mode ! = XDP_ATTACHED_MULTI ) {
2017-06-16 08:29:09 +08:00
err = nla_put_u32 ( skb , IFLA_XDP_PROG_ID , prog_id ) ;
if ( err )
goto err_cancel ;
}
2016-07-20 03:16:49 +08:00
nla_nest_end ( skb , xdp ) ;
return 0 ;
err_cancel :
nla_nest_cancel ( skb , xdp ) ;
return err ;
}
2017-05-27 22:14:34 +08:00
static u32 rtnl_get_event ( unsigned long event )
{
u32 rtnl_event_type = IFLA_EVENT_NONE ;
switch ( event ) {
case NETDEV_REBOOT :
rtnl_event_type = IFLA_EVENT_REBOOT ;
break ;
case NETDEV_FEAT_CHANGE :
rtnl_event_type = IFLA_EVENT_FEATURES ;
break ;
case NETDEV_BONDING_FAILOVER :
rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER ;
break ;
case NETDEV_NOTIFY_PEERS :
rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS ;
break ;
case NETDEV_RESEND_IGMP :
rtnl_event_type = IFLA_EVENT_IGMP_RESEND ;
break ;
case NETDEV_CHANGEINFODATA :
rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS ;
break ;
default :
break ;
}
return rtnl_event_type ;
}
2017-09-26 19:58:40 +08:00
static int put_master_ifindex ( struct sk_buff * skb , struct net_device * dev )
{
const struct net_device * upper_dev ;
int ret = 0 ;
rcu_read_lock ( ) ;
upper_dev = netdev_master_upper_dev_get_rcu ( dev ) ;
if ( upper_dev )
ret = nla_put_u32 ( skb , IFLA_MASTER , upper_dev - > ifindex ) ;
rcu_read_unlock ( ) ;
return ret ;
}
rtnetlink: always put IFLA_LINK for links with a link-netnsid
Currently, nla_put_iflink() doesn't put the IFLA_LINK attribute when
iflink == ifindex.
In some cases, a device can be created in a different netns with the
same ifindex as its parent. That device will not dump its IFLA_LINK
attribute, which can confuse some userspace software that expects it.
For example, if the last ifindex created in init_net and foo are both
8, these commands will trigger the issue:
ip link add parent type dummy # ifindex 9
ip link add link parent netns foo type macvlan # ifindex 9 in ns foo
So, in case a device puts the IFLA_LINK_NETNSID attribute in a dump,
always put the IFLA_LINK attribute as well.
Thanks to Dan Winship for analyzing the original OpenShift bug down to
the missing netlink attribute.
v2: change Fixes tag, it's been here forever, as Nicolas Dichtel said
add Nicolas' ack
v3: change Fixes tag
fix subject typo, spotted by Edward Cree
Analyzed-by: Dan Winship <danw@redhat.com>
Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-14 21:12:19 +08:00
static int nla_put_iflink ( struct sk_buff * skb , const struct net_device * dev ,
bool force )
2017-09-26 19:58:40 +08:00
{
int ifindex = dev_get_iflink ( dev ) ;
rtnetlink: always put IFLA_LINK for links with a link-netnsid
Currently, nla_put_iflink() doesn't put the IFLA_LINK attribute when
iflink == ifindex.
In some cases, a device can be created in a different netns with the
same ifindex as its parent. That device will not dump its IFLA_LINK
attribute, which can confuse some userspace software that expects it.
For example, if the last ifindex created in init_net and foo are both
8, these commands will trigger the issue:
ip link add parent type dummy # ifindex 9
ip link add link parent netns foo type macvlan # ifindex 9 in ns foo
So, in case a device puts the IFLA_LINK_NETNSID attribute in a dump,
always put the IFLA_LINK attribute as well.
Thanks to Dan Winship for analyzing the original OpenShift bug down to
the missing netlink attribute.
v2: change Fixes tag, it's been here forever, as Nicolas Dichtel said
add Nicolas' ack
v3: change Fixes tag
fix subject typo, spotted by Edward Cree
Analyzed-by: Dan Winship <danw@redhat.com>
Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-14 21:12:19 +08:00
if ( force | | dev - > ifindex ! = ifindex )
return nla_put_u32 ( skb , IFLA_LINK , ifindex ) ;
2017-09-26 19:58:40 +08:00
rtnetlink: always put IFLA_LINK for links with a link-netnsid
Currently, nla_put_iflink() doesn't put the IFLA_LINK attribute when
iflink == ifindex.
In some cases, a device can be created in a different netns with the
same ifindex as its parent. That device will not dump its IFLA_LINK
attribute, which can confuse some userspace software that expects it.
For example, if the last ifindex created in init_net and foo are both
8, these commands will trigger the issue:
ip link add parent type dummy # ifindex 9
ip link add link parent netns foo type macvlan # ifindex 9 in ns foo
So, in case a device puts the IFLA_LINK_NETNSID attribute in a dump,
always put the IFLA_LINK attribute as well.
Thanks to Dan Winship for analyzing the original OpenShift bug down to
the missing netlink attribute.
v2: change Fixes tag, it's been here forever, as Nicolas Dichtel said
add Nicolas' ack
v3: change Fixes tag
fix subject typo, spotted by Edward Cree
Analyzed-by: Dan Winship <danw@redhat.com>
Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-14 21:12:19 +08:00
return 0 ;
2017-09-26 19:58:40 +08:00
}
2017-10-03 05:50:05 +08:00
static noinline_for_stack int nla_put_ifalias ( struct sk_buff * skb ,
struct net_device * dev )
{
char buf [ IFALIASZ ] ;
int ret ;
ret = dev_get_alias ( dev , buf , sizeof ( buf ) ) ;
return ret > 0 ? nla_put_string ( skb , IFLA_IFALIAS , buf ) : 0 ;
}
2017-09-26 19:58:42 +08:00
static int rtnl_fill_link_netnsid ( struct sk_buff * skb ,
2017-11-03 03:04:38 +08:00
const struct net_device * dev ,
netns: fix GFP flags in rtnl_net_notifyid()
In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
but there are a few paths calling rtnl_net_notifyid() from atomic
context or from RCU critical sections. The later also precludes the use
of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
call is wrong too, as it uses GFP_KERNEL unconditionally.
Therefore, we need to pass the GFP flags as parameter and propagate it
through function calls until the proper flags can be determined.
In most cases, GFP_KERNEL is fine. The exceptions are:
* openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
indirectly call rtnl_net_notifyid() from RCU critical section,
* rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
parameter.
Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
by nlmsg_new(). The function is allowed to sleep, so better make the
flags consistent with the ones used in the following
ovs_vport_cmd_fill_info() call.
Found by code inspection.
Fixes: 9a9634545c70 ("netns: notify netns id events")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-24 00:39:04 +08:00
struct net * src_net , gfp_t gfp )
2017-09-26 19:58:42 +08:00
{
rtnetlink: always put IFLA_LINK for links with a link-netnsid
Currently, nla_put_iflink() doesn't put the IFLA_LINK attribute when
iflink == ifindex.
In some cases, a device can be created in a different netns with the
same ifindex as its parent. That device will not dump its IFLA_LINK
attribute, which can confuse some userspace software that expects it.
For example, if the last ifindex created in init_net and foo are both
8, these commands will trigger the issue:
ip link add parent type dummy # ifindex 9
ip link add link parent netns foo type macvlan # ifindex 9 in ns foo
So, in case a device puts the IFLA_LINK_NETNSID attribute in a dump,
always put the IFLA_LINK attribute as well.
Thanks to Dan Winship for analyzing the original OpenShift bug down to
the missing netlink attribute.
v2: change Fixes tag, it's been here forever, as Nicolas Dichtel said
add Nicolas' ack
v3: change Fixes tag
fix subject typo, spotted by Edward Cree
Analyzed-by: Dan Winship <danw@redhat.com>
Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-14 21:12:19 +08:00
bool put_iflink = false ;
2017-09-26 19:58:42 +08:00
if ( dev - > rtnl_link_ops & & dev - > rtnl_link_ops - > get_link_net ) {
struct net * link_net = dev - > rtnl_link_ops - > get_link_net ( dev ) ;
if ( ! net_eq ( dev_net ( dev ) , link_net ) ) {
netns: fix GFP flags in rtnl_net_notifyid()
In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
but there are a few paths calling rtnl_net_notifyid() from atomic
context or from RCU critical sections. The later also precludes the use
of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
call is wrong too, as it uses GFP_KERNEL unconditionally.
Therefore, we need to pass the GFP flags as parameter and propagate it
through function calls until the proper flags can be determined.
In most cases, GFP_KERNEL is fine. The exceptions are:
* openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
indirectly call rtnl_net_notifyid() from RCU critical section,
* rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
parameter.
Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
by nlmsg_new(). The function is allowed to sleep, so better make the
flags consistent with the ones used in the following
ovs_vport_cmd_fill_info() call.
Found by code inspection.
Fixes: 9a9634545c70 ("netns: notify netns id events")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-24 00:39:04 +08:00
int id = peernet2id_alloc ( src_net , link_net , gfp ) ;
2017-09-26 19:58:42 +08:00
if ( nla_put_s32 ( skb , IFLA_LINK_NETNSID , id ) )
return - EMSGSIZE ;
rtnetlink: always put IFLA_LINK for links with a link-netnsid
Currently, nla_put_iflink() doesn't put the IFLA_LINK attribute when
iflink == ifindex.
In some cases, a device can be created in a different netns with the
same ifindex as its parent. That device will not dump its IFLA_LINK
attribute, which can confuse some userspace software that expects it.
For example, if the last ifindex created in init_net and foo are both
8, these commands will trigger the issue:
ip link add parent type dummy # ifindex 9
ip link add link parent netns foo type macvlan # ifindex 9 in ns foo
So, in case a device puts the IFLA_LINK_NETNSID attribute in a dump,
always put the IFLA_LINK attribute as well.
Thanks to Dan Winship for analyzing the original OpenShift bug down to
the missing netlink attribute.
v2: change Fixes tag, it's been here forever, as Nicolas Dichtel said
add Nicolas' ack
v3: change Fixes tag
fix subject typo, spotted by Edward Cree
Analyzed-by: Dan Winship <danw@redhat.com>
Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-14 21:12:19 +08:00
put_iflink = true ;
2017-09-26 19:58:42 +08:00
}
}
rtnetlink: always put IFLA_LINK for links with a link-netnsid
Currently, nla_put_iflink() doesn't put the IFLA_LINK attribute when
iflink == ifindex.
In some cases, a device can be created in a different netns with the
same ifindex as its parent. That device will not dump its IFLA_LINK
attribute, which can confuse some userspace software that expects it.
For example, if the last ifindex created in init_net and foo are both
8, these commands will trigger the issue:
ip link add parent type dummy # ifindex 9
ip link add link parent netns foo type macvlan # ifindex 9 in ns foo
So, in case a device puts the IFLA_LINK_NETNSID attribute in a dump,
always put the IFLA_LINK attribute as well.
Thanks to Dan Winship for analyzing the original OpenShift bug down to
the missing netlink attribute.
v2: change Fixes tag, it's been here forever, as Nicolas Dichtel said
add Nicolas' ack
v3: change Fixes tag
fix subject typo, spotted by Edward Cree
Analyzed-by: Dan Winship <danw@redhat.com>
Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-14 21:12:19 +08:00
return nla_put_iflink ( skb , dev , put_iflink ) ;
2017-09-26 19:58:42 +08:00
}
2017-10-16 21:44:35 +08:00
static int rtnl_fill_link_af ( struct sk_buff * skb ,
const struct net_device * dev ,
u32 ext_filter_mask )
{
const struct rtnl_af_ops * af_ops ;
struct nlattr * af_spec ;
2019-04-26 17:13:06 +08:00
af_spec = nla_nest_start_noflag ( skb , IFLA_AF_SPEC ) ;
2017-10-16 21:44:35 +08:00
if ( ! af_spec )
return - EMSGSIZE ;
2017-10-16 21:44:36 +08:00
list_for_each_entry_rcu ( af_ops , & rtnl_af_ops , list ) {
2017-10-16 21:44:35 +08:00
struct nlattr * af ;
int err ;
if ( ! af_ops - > fill_link_af )
continue ;
2019-04-26 17:13:06 +08:00
af = nla_nest_start_noflag ( skb , af_ops - > family ) ;
2017-10-16 21:44:35 +08:00
if ( ! af )
return - EMSGSIZE ;
err = af_ops - > fill_link_af ( skb , dev , ext_filter_mask ) ;
/*
* Caller may return ENODATA to indicate that there
* was no data to be dumped . This is not an error , it
* means we should trim the attribute header and
* continue .
*/
if ( err = = - ENODATA )
nla_nest_cancel ( skb , af ) ;
else if ( err < 0 )
return - EMSGSIZE ;
nla_nest_end ( skb , af ) ;
}
nla_nest_end ( skb , af_spec ) ;
return 0 ;
}
2019-09-30 17:48:17 +08:00
static int rtnl_fill_alt_ifnames ( struct sk_buff * skb ,
const struct net_device * dev )
{
struct netdev_name_node * name_node ;
int count = 0 ;
list_for_each_entry ( name_node , & dev - > name_node - > list , list ) {
if ( nla_put_string ( skb , IFLA_ALT_IFNAME , name_node - > name ) )
return - EMSGSIZE ;
count + + ;
}
return count ;
}
static int rtnl_fill_prop_list ( struct sk_buff * skb ,
const struct net_device * dev )
{
struct nlattr * prop_list ;
int ret ;
prop_list = nla_nest_start ( skb , IFLA_PROP_LIST ) ;
if ( ! prop_list )
return - EMSGSIZE ;
ret = rtnl_fill_alt_ifnames ( skb , dev ) ;
if ( ret < = 0 )
goto nest_cancel ;
nla_nest_end ( skb , prop_list ) ;
return 0 ;
nest_cancel :
nla_nest_cancel ( skb , prop_list ) ;
return ret ;
}
2020-08-01 08:34:01 +08:00
static int rtnl_fill_proto_down ( struct sk_buff * skb ,
const struct net_device * dev )
{
struct nlattr * pr ;
u32 preason ;
if ( nla_put_u8 ( skb , IFLA_PROTO_DOWN , dev - > proto_down ) )
goto nla_put_failure ;
preason = dev - > proto_down_reason ;
if ( ! preason )
return 0 ;
pr = nla_nest_start ( skb , IFLA_PROTO_DOWN_REASON ) ;
if ( ! pr )
return - EMSGSIZE ;
if ( nla_put_u32 ( skb , IFLA_PROTO_DOWN_REASON_VALUE , preason ) ) {
nla_nest_cancel ( skb , pr ) ;
goto nla_put_failure ;
}
nla_nest_end ( skb , pr ) ;
return 0 ;
nla_put_failure :
return - EMSGSIZE ;
}
2017-11-03 03:04:38 +08:00
static int rtnl_fill_ifinfo ( struct sk_buff * skb ,
struct net_device * dev , struct net * src_net ,
2007-05-23 08:00:49 +08:00
int type , u32 pid , u32 seq , u32 change ,
2017-05-27 22:14:34 +08:00
unsigned int flags , u32 ext_filter_mask ,
2018-01-25 22:01:39 +08:00
u32 event , int * new_nsid , int new_ifindex ,
netns: fix GFP flags in rtnl_net_notifyid()
In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
but there are a few paths calling rtnl_net_notifyid() from atomic
context or from RCU critical sections. The later also precludes the use
of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
call is wrong too, as it uses GFP_KERNEL unconditionally.
Therefore, we need to pass the GFP flags as parameter and propagate it
through function calls until the proper flags can be determined.
In most cases, GFP_KERNEL is fine. The exceptions are:
* openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
indirectly call rtnl_net_notifyid() from RCU critical section,
* rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
parameter.
Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
by nlmsg_new(). The function is allowed to sleep, so better make the
flags consistent with the ones used in the following
ovs_vport_cmd_fill_info() call.
Found by code inspection.
Fixes: 9a9634545c70 ("netns: notify netns id events")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-24 00:39:04 +08:00
int tgt_netnsid , gfp_t gfp )
2006-08-05 14:05:34 +08:00
{
struct ifinfomsg * ifm ;
struct nlmsghdr * nlh ;
2022-02-12 04:06:23 +08:00
struct Qdisc * qdisc ;
2005-04-17 06:20:36 +08:00
2011-05-25 15:34:04 +08:00
ASSERT_RTNL ( ) ;
2006-08-05 14:05:34 +08:00
nlh = nlmsg_put ( skb , pid , seq , type , sizeof ( * ifm ) , flags ) ;
if ( nlh = = NULL )
2007-02-01 15:16:40 +08:00
return - EMSGSIZE ;
2005-04-17 06:20:36 +08:00
2006-08-05 14:05:34 +08:00
ifm = nlmsg_data ( nlh ) ;
ifm - > ifi_family = AF_UNSPEC ;
ifm - > __ifi_pad = 0 ;
ifm - > ifi_type = dev - > type ;
ifm - > ifi_index = dev - > ifindex ;
ifm - > ifi_flags = dev_get_flags ( dev ) ;
ifm - > ifi_change = change ;
2018-09-05 03:53:53 +08:00
if ( tgt_netnsid > = 0 & & nla_put_s32 ( skb , IFLA_TARGET_NETNSID , tgt_netnsid ) )
2017-11-03 03:04:38 +08:00
goto nla_put_failure ;
2022-02-12 04:06:23 +08:00
qdisc = rtnl_dereference ( dev - > qdisc ) ;
2012-04-02 08:12:00 +08:00
if ( nla_put_string ( skb , IFLA_IFNAME , dev - > name ) | |
nla_put_u32 ( skb , IFLA_TXQLEN , dev - > tx_queue_len ) | |
nla_put_u8 ( skb , IFLA_OPERSTATE ,
netif_running ( dev ) ? dev - > operstate : IF_OPER_DOWN ) | |
nla_put_u8 ( skb , IFLA_LINKMODE , dev - > link_mode ) | |
nla_put_u32 ( skb , IFLA_MTU , dev - > mtu ) | |
2018-07-28 04:43:22 +08:00
nla_put_u32 ( skb , IFLA_MIN_MTU , dev - > min_mtu ) | |
nla_put_u32 ( skb , IFLA_MAX_MTU , dev - > max_mtu ) | |
2012-04-02 08:12:00 +08:00
nla_put_u32 ( skb , IFLA_GROUP , dev - > group ) | |
2012-03-29 20:51:30 +08:00
nla_put_u32 ( skb , IFLA_PROMISCUITY , dev - > promiscuity ) | |
2012-07-20 10:28:48 +08:00
nla_put_u32 ( skb , IFLA_NUM_TX_QUEUES , dev - > num_tx_queues ) | |
2016-03-22 00:55:10 +08:00
nla_put_u32 ( skb , IFLA_GSO_MAX_SEGS , dev - > gso_max_segs ) | |
nla_put_u32 ( skb , IFLA_GSO_MAX_SIZE , dev - > gso_max_size ) | |
2022-01-05 18:48:38 +08:00
nla_put_u32 ( skb , IFLA_GRO_MAX_SIZE , dev - > gro_max_size ) | |
2012-07-20 21:35:13 +08:00
# ifdef CONFIG_RPS
2012-07-20 10:28:48 +08:00
nla_put_u32 ( skb , IFLA_NUM_RX_QUEUES , dev - > num_rx_queues ) | |
2012-07-20 21:35:13 +08:00
# endif
2017-09-26 19:58:40 +08:00
put_master_ifindex ( skb , dev ) | |
2012-12-28 07:49:39 +08:00
nla_put_u8 ( skb , IFLA_CARRIER , netif_carrier_ok ( dev ) ) | |
2022-02-12 04:06:23 +08:00
( qdisc & &
nla_put_string ( skb , IFLA_QDISC , qdisc - > ops - > id ) ) | |
2017-10-03 05:50:05 +08:00
nla_put_ifalias ( skb , dev ) | |
2014-03-30 00:48:35 +08:00
nla_put_u32 ( skb , IFLA_CARRIER_CHANGES ,
2018-01-19 01:59:13 +08:00
atomic_read ( & dev - > carrier_up_count ) +
atomic_read ( & dev - > carrier_down_count ) ) | |
nla_put_u32 ( skb , IFLA_CARRIER_UP_COUNT ,
atomic_read ( & dev - > carrier_up_count ) ) | |
nla_put_u32 ( skb , IFLA_CARRIER_DOWN_COUNT ,
atomic_read ( & dev - > carrier_down_count ) ) )
2012-04-02 08:12:00 +08:00
goto nla_put_failure ;
2008-09-23 12:28:11 +08:00
2020-08-01 08:34:01 +08:00
if ( rtnl_fill_proto_down ( skb , dev ) )
goto nla_put_failure ;
2017-05-27 22:14:34 +08:00
if ( event ! = IFLA_EVENT_NONE ) {
if ( nla_put_u32 ( skb , IFLA_EVENT , event ) )
goto nla_put_failure ;
}
2015-11-17 21:16:52 +08:00
if ( rtnl_fill_link_ifmap ( skb , dev ) )
goto nla_put_failure ;
2005-04-17 06:20:36 +08:00
if ( dev - > addr_len ) {
2012-04-02 08:12:00 +08:00
if ( nla_put ( skb , IFLA_ADDRESS , dev - > addr_len , dev - > dev_addr ) | |
nla_put ( skb , IFLA_BROADCAST , dev - > addr_len , dev - > broadcast ) )
goto nla_put_failure ;
2005-04-17 06:20:36 +08:00
}
2013-07-30 00:16:50 +08:00
if ( rtnl_phys_port_id_fill ( skb , dev ) )
goto nla_put_failure ;
2015-03-18 10:23:15 +08:00
if ( rtnl_phys_port_name_fill ( skb , dev ) )
goto nla_put_failure ;
2014-11-28 21:34:18 +08:00
if ( rtnl_phys_switch_id_fill ( skb , dev ) )
goto nla_put_failure ;
2015-11-17 21:16:52 +08:00
if ( rtnl_fill_stats ( skb , dev ) )
2010-03-11 17:57:29 +08:00
goto nla_put_failure ;
2017-09-26 19:58:41 +08:00
if ( rtnl_fill_vf ( skb , dev , ext_filter_mask ) )
2012-04-02 08:12:00 +08:00
goto nla_put_failure ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
2014-04-24 08:22:36 +08:00
if ( rtnl_port_fill ( skb , dev , ext_filter_mask ) )
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
goto nla_put_failure ;
2016-07-20 03:16:49 +08:00
if ( rtnl_xdp_fill ( skb , dev ) )
goto nla_put_failure ;
2014-01-22 16:05:55 +08:00
if ( dev - > rtnl_link_ops | | rtnl_have_link_slave_info ( dev ) ) {
2007-06-14 03:03:51 +08:00
if ( rtnl_link_fill ( skb , dev ) < 0 )
goto nla_put_failure ;
}
netns: fix GFP flags in rtnl_net_notifyid()
In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
but there are a few paths calling rtnl_net_notifyid() from atomic
context or from RCU critical sections. The later also precludes the use
of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
call is wrong too, as it uses GFP_KERNEL unconditionally.
Therefore, we need to pass the GFP flags as parameter and propagate it
through function calls until the proper flags can be determined.
In most cases, GFP_KERNEL is fine. The exceptions are:
* openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
indirectly call rtnl_net_notifyid() from RCU critical section,
* rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
parameter.
Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
by nlmsg_new(). The function is allowed to sleep, so better make the
flags consistent with the ones used in the following
ovs_vport_cmd_fill_info() call.
Found by code inspection.
Fixes: 9a9634545c70 ("netns: notify netns id events")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-24 00:39:04 +08:00
if ( rtnl_fill_link_netnsid ( skb , dev , src_net , gfp ) )
2017-09-26 19:58:42 +08:00
goto nla_put_failure ;
2015-01-15 22:11:16 +08:00
2017-10-03 19:53:23 +08:00
if ( new_nsid & &
nla_put_s32 ( skb , IFLA_NEW_NETNSID , * new_nsid ) < 0 )
goto nla_put_failure ;
2018-01-25 22:01:39 +08:00
if ( new_ifindex & &
nla_put_s32 ( skb , IFLA_NEW_IFINDEX , new_ifindex ) < 0 )
goto nla_put_failure ;
2019-12-11 17:58:14 +08:00
if ( memchr_inv ( dev - > perm_addr , ' \0 ' , dev - > addr_len ) & &
nla_put ( skb , IFLA_PERM_ADDRESS , dev - > addr_len , dev - > perm_addr ) )
goto nla_put_failure ;
2017-10-03 19:53:23 +08:00
2017-10-16 21:44:36 +08:00
rcu_read_lock ( ) ;
2017-10-16 21:44:35 +08:00
if ( rtnl_fill_link_af ( skb , dev , ext_filter_mask ) )
2017-10-16 21:44:36 +08:00
goto nla_put_failure_rcu ;
rcu_read_unlock ( ) ;
2010-11-16 12:30:14 +08:00
2019-09-30 17:48:17 +08:00
if ( rtnl_fill_prop_list ( skb , dev ) )
goto nla_put_failure ;
rtnetlink: add IFLA_PARENT_[DEV|DEV_BUS]_NAME
In some cases, for example in the upcoming WWAN framework changes,
there's no natural "parent netdev", so sometimes dummy netdevs are
created or similar. IFLA_PARENT_DEV_NAME is a new attribute intended to
contain a device (sysfs, struct device) name that can be used instead
when creating a new netdev, if the rtnetlink family implements it.
As suggested by Parav Pandit, we also introduce IFLA_PARENT_DEV_BUS_NAME
attribute in order to uniquely identify a device on the system (with
bus/name pair).
ip-link(8) support for the generic parent device attributes will help
us avoid code duplication, so no other link type will require a custom
code to handle the parent name attribute. E.g. the WWAN interface
creation command will looks like this:
$ ip link add wwan0-1 parent-dev wwan0 type wwan channel-id 1
So, some future subsystem (or driver) FOO will have an interface
creation command that looks like this:
$ ip link add foo1-3 parent-dev foo1 type foo bar-id 3 baz-type Y
Below is an example of dumping link info of a random device with these
new attributes:
$ ip --details link show wlp0s20f3
4: wlp0s20f3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue
state UP mode DORMANT group default qlen 1000
...
parent_bus pci parent_dev 0000:00:14.3
Co-developed-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Co-developed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Suggested-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-12 16:20:55 +08:00
if ( dev - > dev . parent & &
nla_put_string ( skb , IFLA_PARENT_DEV_NAME ,
dev_name ( dev - > dev . parent ) ) )
goto nla_put_failure ;
if ( dev - > dev . parent & & dev - > dev . parent - > bus & &
nla_put_string ( skb , IFLA_PARENT_DEV_BUS_NAME ,
dev - > dev . parent - > bus - > name ) )
goto nla_put_failure ;
2015-01-17 05:09:00 +08:00
nlmsg_end ( skb , nlh ) ;
return 0 ;
2006-08-05 14:05:34 +08:00
2017-10-16 21:44:36 +08:00
nla_put_failure_rcu :
rcu_read_unlock ( ) ;
2006-08-05 14:05:34 +08:00
nla_put_failure :
2007-02-01 15:16:40 +08:00
nlmsg_cancel ( skb , nlh ) ;
return - EMSGSIZE ;
2005-04-17 06:20:36 +08:00
}
2014-02-19 03:53:18 +08:00
static const struct nla_policy ifla_policy [ IFLA_MAX + 1 ] = {
2006-08-27 11:13:18 +08:00
[ IFLA_IFNAME ] = { . type = NLA_STRING , . len = IFNAMSIZ - 1 } ,
2007-06-14 03:03:51 +08:00
[ IFLA_ADDRESS ] = { . type = NLA_BINARY , . len = MAX_ADDR_LEN } ,
[ IFLA_BROADCAST ] = { . type = NLA_BINARY , . len = MAX_ADDR_LEN } ,
2006-08-27 11:13:18 +08:00
[ IFLA_MAP ] = { . len = sizeof ( struct rtnl_link_ifmap ) } ,
2006-08-11 12:17:37 +08:00
[ IFLA_MTU ] = { . type = NLA_U32 } ,
2008-02-20 08:12:08 +08:00
[ IFLA_LINK ] = { . type = NLA_U32 } ,
2011-02-13 18:15:37 +08:00
[ IFLA_MASTER ] = { . type = NLA_U32 } ,
2012-12-28 07:49:39 +08:00
[ IFLA_CARRIER ] = { . type = NLA_U8 } ,
2006-08-11 12:17:37 +08:00
[ IFLA_TXQLEN ] = { . type = NLA_U32 } ,
[ IFLA_WEIGHT ] = { . type = NLA_U32 } ,
[ IFLA_OPERSTATE ] = { . type = NLA_U8 } ,
[ IFLA_LINKMODE ] = { . type = NLA_U8 } ,
2008-02-20 08:12:08 +08:00
[ IFLA_LINKINFO ] = { . type = NLA_NESTED } ,
2007-09-12 19:57:04 +08:00
[ IFLA_NET_NS_PID ] = { . type = NLA_U32 } ,
2011-05-05 08:51:50 +08:00
[ IFLA_NET_NS_FD ] = { . type = NLA_U32 } ,
2017-10-11 22:24:48 +08:00
/* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
* allow 0 - length string ( needed to remove an alias ) .
*/
[ IFLA_IFALIAS ] = { . type = NLA_BINARY , . len = IFALIASZ - 1 } ,
2010-05-16 16:05:45 +08:00
[ IFLA_VFINFO_LIST ] = { . type = NLA_NESTED } ,
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
[ IFLA_VF_PORTS ] = { . type = NLA_NESTED } ,
[ IFLA_PORT_SELF ] = { . type = NLA_NESTED } ,
2010-11-16 12:30:14 +08:00
[ IFLA_AF_SPEC ] = { . type = NLA_NESTED } ,
2012-02-22 05:54:48 +08:00
[ IFLA_EXT_MASK ] = { . type = NLA_U32 } ,
2012-03-29 20:51:30 +08:00
[ IFLA_PROMISCUITY ] = { . type = NLA_U32 } ,
2012-07-20 10:28:48 +08:00
[ IFLA_NUM_TX_QUEUES ] = { . type = NLA_U32 } ,
[ IFLA_NUM_RX_QUEUES ] = { . type = NLA_U32 } ,
2017-12-08 07:40:19 +08:00
[ IFLA_GSO_MAX_SEGS ] = { . type = NLA_U32 } ,
[ IFLA_GSO_MAX_SIZE ] = { . type = NLA_U32 } ,
2014-11-28 21:34:16 +08:00
[ IFLA_PHYS_PORT_ID ] = { . type = NLA_BINARY , . len = MAX_PHYS_ITEM_ID_LEN } ,
2014-03-30 00:48:35 +08:00
[ IFLA_CARRIER_CHANGES ] = { . type = NLA_U32 } , /* ignored */
2014-11-28 21:34:18 +08:00
[ IFLA_PHYS_SWITCH_ID ] = { . type = NLA_BINARY , . len = MAX_PHYS_ITEM_ID_LEN } ,
2015-01-15 22:11:18 +08:00
[ IFLA_LINK_NETNSID ] = { . type = NLA_S32 } ,
2015-07-15 04:43:20 +08:00
[ IFLA_PROTO_DOWN ] = { . type = NLA_U8 } ,
2016-07-20 03:16:49 +08:00
[ IFLA_XDP ] = { . type = NLA_NESTED } ,
2017-05-27 22:14:34 +08:00
[ IFLA_EVENT ] = { . type = NLA_U32 } ,
2017-06-20 19:35:23 +08:00
[ IFLA_GROUP ] = { . type = NLA_U32 } ,
2018-09-05 03:53:53 +08:00
[ IFLA_TARGET_NETNSID ] = { . type = NLA_S32 } ,
2018-01-19 01:59:13 +08:00
[ IFLA_CARRIER_UP_COUNT ] = { . type = NLA_U32 } ,
[ IFLA_CARRIER_DOWN_COUNT ] = { . type = NLA_U32 } ,
2018-07-28 04:43:22 +08:00
[ IFLA_MIN_MTU ] = { . type = NLA_U32 } ,
[ IFLA_MAX_MTU ] = { . type = NLA_U32 } ,
2019-09-30 17:48:16 +08:00
[ IFLA_PROP_LIST ] = { . type = NLA_NESTED } ,
[ IFLA_ALT_IFNAME ] = { . type = NLA_STRING ,
. len = ALTIFNAMSIZ - 1 } ,
2019-12-11 17:58:14 +08:00
[ IFLA_PERM_ADDRESS ] = { . type = NLA_REJECT } ,
2020-08-01 08:34:01 +08:00
[ IFLA_PROTO_DOWN_REASON ] = { . type = NLA_NESTED } ,
2021-04-07 14:40:03 +08:00
[ IFLA_NEW_IFINDEX ] = NLA_POLICY_MIN ( NLA_S32 , 1 ) ,
2021-06-12 16:20:56 +08:00
[ IFLA_PARENT_DEV_NAME ] = { . type = NLA_NUL_STRING } ,
2022-01-05 18:48:38 +08:00
[ IFLA_GRO_MAX_SIZE ] = { . type = NLA_U32 } ,
2006-08-11 12:17:37 +08:00
} ;
2007-06-14 03:03:51 +08:00
static const struct nla_policy ifla_info_policy [ IFLA_INFO_MAX + 1 ] = {
[ IFLA_INFO_KIND ] = { . type = NLA_STRING } ,
[ IFLA_INFO_DATA ] = { . type = NLA_NESTED } ,
2014-01-22 16:05:55 +08:00
[ IFLA_INFO_SLAVE_KIND ] = { . type = NLA_STRING } ,
[ IFLA_INFO_SLAVE_DATA ] = { . type = NLA_NESTED } ,
2007-06-14 03:03:51 +08:00
} ;
2010-05-16 16:05:45 +08:00
static const struct nla_policy ifla_vf_policy [ IFLA_VF_MAX + 1 ] = {
2015-02-06 01:44:04 +08:00
[ IFLA_VF_MAC ] = { . len = sizeof ( struct ifla_vf_mac ) } ,
ipoib: show VF broadcast address
in IPoIB case we can't see a VF broadcast address for but
can see for PF
Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 MAC 14:80:00:00:66:fe, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
vf 0 link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off
v1->v2: add the IFLA_VF_BROADCAST constant
v2->v3: put IFLA_VF_BROADCAST at the end
to avoid KABI breakage and set NLA_REJECT
dev_setlink
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-06-17 16:53:41 +08:00
[ IFLA_VF_BROADCAST ] = { . type = NLA_REJECT } ,
2015-02-06 01:44:04 +08:00
[ IFLA_VF_VLAN ] = { . len = sizeof ( struct ifla_vf_vlan ) } ,
2016-09-22 17:11:15 +08:00
[ IFLA_VF_VLAN_LIST ] = { . type = NLA_NESTED } ,
2015-02-06 01:44:04 +08:00
[ IFLA_VF_TX_RATE ] = { . len = sizeof ( struct ifla_vf_tx_rate ) } ,
[ IFLA_VF_SPOOFCHK ] = { . len = sizeof ( struct ifla_vf_spoofchk ) } ,
[ IFLA_VF_RATE ] = { . len = sizeof ( struct ifla_vf_rate ) } ,
[ IFLA_VF_LINK_STATE ] = { . len = sizeof ( struct ifla_vf_link_state ) } ,
2015-03-31 02:35:23 +08:00
[ IFLA_VF_RSS_QUERY_EN ] = { . len = sizeof ( struct ifla_vf_rss_query_en ) } ,
2015-06-15 22:59:07 +08:00
[ IFLA_VF_STATS ] = { . type = NLA_NESTED } ,
2015-08-28 14:57:55 +08:00
[ IFLA_VF_TRUST ] = { . len = sizeof ( struct ifla_vf_trust ) } ,
2016-03-12 04:58:34 +08:00
[ IFLA_VF_IB_NODE_GUID ] = { . len = sizeof ( struct ifla_vf_guid ) } ,
[ IFLA_VF_IB_PORT_GUID ] = { . len = sizeof ( struct ifla_vf_guid ) } ,
2015-06-15 22:59:07 +08:00
} ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
static const struct nla_policy ifla_port_policy [ IFLA_PORT_MAX + 1 ] = {
[ IFLA_PORT_VF ] = { . type = NLA_U32 } ,
[ IFLA_PORT_PROFILE ] = { . type = NLA_STRING ,
. len = PORT_PROFILE_MAX } ,
[ IFLA_PORT_INSTANCE_UUID ] = { . type = NLA_BINARY ,
. len = PORT_UUID_MAX } ,
[ IFLA_PORT_HOST_UUID ] = { . type = NLA_STRING ,
. len = PORT_UUID_MAX } ,
[ IFLA_PORT_REQUEST ] = { . type = NLA_U8 , } ,
[ IFLA_PORT_RESPONSE ] = { . type = NLA_U16 , } ,
2017-02-17 08:56:11 +08:00
/* Unused, but we need to keep it here since user space could
* fill it . It ' s also broken with regard to NLA_BINARY use in
* combination with structs .
*/
[ IFLA_PORT_VSI_TYPE ] = { . type = NLA_BINARY ,
. len = sizeof ( struct ifla_port_vsi ) } ,
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
} ;
2016-07-20 03:16:49 +08:00
static const struct nla_policy ifla_xdp_policy [ IFLA_XDP_MAX + 1 ] = {
2020-03-26 01:23:26 +08:00
[ IFLA_XDP_UNSPEC ] = { . strict_start_type = IFLA_XDP_EXPECTED_FD } ,
2016-07-20 03:16:49 +08:00
[ IFLA_XDP_FD ] = { . type = NLA_S32 } ,
2020-03-26 01:23:26 +08:00
[ IFLA_XDP_EXPECTED_FD ] = { . type = NLA_S32 } ,
2016-07-20 03:16:49 +08:00
[ IFLA_XDP_ATTACHED ] = { . type = NLA_U8 } ,
2016-11-29 06:16:54 +08:00
[ IFLA_XDP_FLAGS ] = { . type = NLA_U32 } ,
2017-06-16 08:29:09 +08:00
[ IFLA_XDP_PROG_ID ] = { . type = NLA_U32 } ,
2016-07-20 03:16:49 +08:00
} ;
2016-02-03 00:17:07 +08:00
static const struct rtnl_link_ops * linkinfo_to_kind_ops ( const struct nlattr * nla )
{
const struct rtnl_link_ops * ops = NULL ;
struct nlattr * linfo [ IFLA_INFO_MAX + 1 ] ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
if ( nla_parse_nested_deprecated ( linfo , IFLA_INFO_MAX , nla , ifla_info_policy , NULL ) < 0 )
2016-02-03 00:17:07 +08:00
return NULL ;
if ( linfo [ IFLA_INFO_KIND ] ) {
char kind [ MODULE_NAME_LEN ] ;
2020-11-16 01:08:06 +08:00
nla_strscpy ( kind , linfo [ IFLA_INFO_KIND ] , sizeof ( kind ) ) ;
2016-02-03 00:17:07 +08:00
ops = rtnl_link_ops_get ( kind ) ;
}
return ops ;
}
static bool link_master_filtered ( struct net_device * dev , int master_idx )
{
struct net_device * master ;
if ( ! master_idx )
return false ;
master = netdev_master_upper_dev_get ( dev ) ;
2021-08-10 17:06:58 +08:00
/* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
* another invalid value for ifindex to denote " no master " .
*/
if ( master_idx = = - 1 )
return ! ! master ;
2016-02-03 00:17:07 +08:00
if ( ! master | | master - > ifindex ! = master_idx )
return true ;
return false ;
}
static bool link_kind_filtered ( const struct net_device * dev ,
const struct rtnl_link_ops * kind_ops )
{
if ( kind_ops & & dev - > rtnl_link_ops ! = kind_ops )
return true ;
return false ;
}
static bool link_dump_filtered ( struct net_device * dev ,
int master_idx ,
const struct rtnl_link_ops * kind_ops )
{
if ( link_master_filtered ( dev , master_idx ) | |
link_kind_filtered ( dev , kind_ops ) )
return true ;
return false ;
}
2018-09-05 03:53:47 +08:00
/**
* rtnl_get_net_ns_capable - Get netns if sufficiently privileged .
* @ sk : netlink socket
* @ netnsid : network namespace identifier
*
* Returns the network namespace identified by netnsid on success or an error
* pointer on failure .
*/
struct net * rtnl_get_net_ns_capable ( struct sock * sk , int netnsid )
2017-11-03 03:04:38 +08:00
{
struct net * net ;
2018-01-03 15:27:33 +08:00
net = get_net_ns_by_id ( sock_net ( sk ) , netnsid ) ;
2017-11-03 03:04:38 +08:00
if ( ! net )
return ERR_PTR ( - EINVAL ) ;
/* For now, the caller is required to have CAP_NET_ADMIN in
* the user namespace owning the target net ns .
*/
2018-01-03 15:27:33 +08:00
if ( ! sk_ns_capable ( sk , net - > user_ns , CAP_NET_ADMIN ) ) {
2017-11-03 03:04:38 +08:00
put_net ( net ) ;
return ERR_PTR ( - EACCES ) ;
}
return net ;
}
2018-09-05 03:53:47 +08:00
EXPORT_SYMBOL_GPL ( rtnl_get_net_ns_capable ) ;
2017-11-03 03:04:38 +08:00
2018-10-08 11:16:30 +08:00
static int rtnl_valid_dump_ifinfo_req ( const struct nlmsghdr * nlh ,
bool strict_check , struct nlattr * * tb ,
struct netlink_ext_ack * extack )
{
int hdrlen ;
if ( strict_check ) {
struct ifinfomsg * ifm ;
if ( nlh - > nlmsg_len < nlmsg_msg_size ( sizeof ( * ifm ) ) ) {
NL_SET_ERR_MSG ( extack , " Invalid header for link dump " ) ;
return - EINVAL ;
}
ifm = nlmsg_data ( nlh ) ;
if ( ifm - > __ifi_pad | | ifm - > ifi_type | | ifm - > ifi_flags | |
ifm - > ifi_change ) {
NL_SET_ERR_MSG ( extack , " Invalid values in header for link dump request " ) ;
return - EINVAL ;
}
if ( ifm - > ifi_index ) {
NL_SET_ERR_MSG ( extack , " Filter by device index not supported for link dumps " ) ;
return - EINVAL ;
}
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
return nlmsg_parse_deprecated_strict ( nlh , sizeof ( * ifm ) , tb ,
IFLA_MAX , ifla_policy ,
extack ) ;
2018-10-08 11:16:30 +08:00
}
/* A hack to preserve kernel<->userspace interface.
* The correct header is ifinfomsg . It is consistent with rtnl_getlink .
* However , before Linux v3 .9 the code here assumed rtgenmsg and that ' s
* what iproute2 < v3 .9 .0 used .
* We can detect the old iproute2 . Even including the IFLA_EXT_MASK
* attribute , its netlink message is shorter than struct ifinfomsg .
*/
hdrlen = nlmsg_len ( nlh ) < sizeof ( struct ifinfomsg ) ?
sizeof ( struct rtgenmsg ) : sizeof ( struct ifinfomsg ) ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
return nlmsg_parse_deprecated ( nlh , hdrlen , tb , IFLA_MAX , ifla_policy ,
extack ) ;
2018-10-08 11:16:30 +08:00
}
2014-02-19 03:53:18 +08:00
static int rtnl_dump_ifinfo ( struct sk_buff * skb , struct netlink_callback * cb )
{
2018-10-08 11:16:30 +08:00
struct netlink_ext_ack * extack = cb - > extack ;
const struct nlmsghdr * nlh = cb - > nlh ;
2014-02-19 03:53:18 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2017-11-03 03:04:38 +08:00
struct net * tgt_net = net ;
2014-02-19 03:53:18 +08:00
int h , s_h ;
int idx = 0 , s_idx ;
struct net_device * dev ;
struct hlist_head * head ;
struct nlattr * tb [ IFLA_MAX + 1 ] ;
u32 ext_filter_mask = 0 ;
2016-02-03 00:17:07 +08:00
const struct rtnl_link_ops * kind_ops = NULL ;
unsigned int flags = NLM_F_MULTI ;
int master_idx = 0 ;
2017-11-03 03:04:38 +08:00
int netnsid = - 1 ;
2018-10-08 11:16:30 +08:00
int err , i ;
2014-02-19 03:53:18 +08:00
s_h = cb - > args [ 0 ] ;
s_idx = cb - > args [ 1 ] ;
2018-10-08 11:16:30 +08:00
err = rtnl_valid_dump_ifinfo_req ( nlh , cb - > strict_check , tb , extack ) ;
if ( err < 0 ) {
if ( cb - > strict_check )
return err ;
goto walk_entries ;
}
for ( i = 0 ; i < = IFLA_MAX ; + + i ) {
if ( ! tb [ i ] )
continue ;
2014-05-28 20:15:19 +08:00
2018-10-08 11:16:30 +08:00
/* new attributes should only be added with strict checking */
switch ( i ) {
case IFLA_TARGET_NETNSID :
netnsid = nla_get_s32 ( tb [ i ] ) ;
2018-09-05 03:53:47 +08:00
tgt_net = rtnl_get_net_ns_capable ( skb - > sk , netnsid ) ;
2018-10-08 11:16:30 +08:00
if ( IS_ERR ( tgt_net ) ) {
NL_SET_ERR_MSG ( extack , " Invalid target network namespace id " ) ;
2018-09-29 03:28:41 +08:00
return PTR_ERR ( tgt_net ) ;
2018-10-08 11:16:30 +08:00
}
break ;
case IFLA_EXT_MASK :
ext_filter_mask = nla_get_u32 ( tb [ i ] ) ;
break ;
case IFLA_MASTER :
master_idx = nla_get_u32 ( tb [ i ] ) ;
break ;
case IFLA_LINKINFO :
kind_ops = linkinfo_to_kind_ops ( tb [ i ] ) ;
break ;
default :
if ( cb - > strict_check ) {
NL_SET_ERR_MSG ( extack , " Unsupported attribute in link dump request " ) ;
return - EINVAL ;
}
2017-11-03 03:04:38 +08:00
}
2014-02-19 03:53:18 +08:00
}
2018-10-08 11:16:30 +08:00
if ( master_idx | | kind_ops )
flags | = NLM_F_DUMP_FILTERED ;
walk_entries :
2014-02-19 03:53:18 +08:00
for ( h = s_h ; h < NETDEV_HASHENTRIES ; h + + , s_idx = 0 ) {
idx = 0 ;
2017-11-03 03:04:38 +08:00
head = & tgt_net - > dev_index_head [ h ] ;
2015-02-28 01:42:50 +08:00
hlist_for_each_entry ( dev , head , index_hlist ) {
2016-02-03 00:17:07 +08:00
if ( link_dump_filtered ( dev , master_idx , kind_ops ) )
2016-11-19 23:28:32 +08:00
goto cont ;
2014-02-19 03:53:18 +08:00
if ( idx < s_idx )
goto cont ;
2017-11-03 03:04:38 +08:00
err = rtnl_fill_ifinfo ( skb , dev , net ,
RTM_NEWLINK ,
2014-04-24 08:22:35 +08:00
NETLINK_CB ( cb - > skb ) . portid ,
2018-10-08 11:16:30 +08:00
nlh - > nlmsg_seq , 0 , flags ,
2018-01-25 22:01:39 +08:00
ext_filter_mask , 0 , NULL , 0 ,
netns: fix GFP flags in rtnl_net_notifyid()
In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
but there are a few paths calling rtnl_net_notifyid() from atomic
context or from RCU critical sections. The later also precludes the use
of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
call is wrong too, as it uses GFP_KERNEL unconditionally.
Therefore, we need to pass the GFP flags as parameter and propagate it
through function calls until the proper flags can be determined.
In most cases, GFP_KERNEL is fine. The exceptions are:
* openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
indirectly call rtnl_net_notifyid() from RCU critical section,
* rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
parameter.
Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
by nlmsg_new(). The function is allowed to sleep, so better make the
flags consistent with the ones used in the following
ovs_vport_cmd_fill_info() call.
Found by code inspection.
Fixes: 9a9634545c70 ("netns: notify netns id events")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-24 00:39:04 +08:00
netnsid , GFP_KERNEL ) ;
2014-04-24 08:22:35 +08:00
2017-05-16 14:19:17 +08:00
if ( err < 0 ) {
if ( likely ( skb - > len ) )
goto out ;
goto out_err ;
}
2014-02-19 03:53:18 +08:00
cont :
idx + + ;
}
}
out :
2017-05-16 14:19:17 +08:00
err = skb - > len ;
out_err :
2014-02-19 03:53:18 +08:00
cb - > args [ 1 ] = idx ;
cb - > args [ 0 ] = h ;
2021-03-02 18:16:07 +08:00
cb - > seq = tgt_net - > dev_base_seq ;
2017-08-09 23:39:12 +08:00
nl_dump_check_consistent ( cb , nlmsg_hdr ( skb ) ) ;
2017-11-03 03:04:38 +08:00
if ( netnsid > = 0 )
put_net ( tgt_net ) ;
2014-02-19 03:53:18 +08:00
2017-05-16 14:19:17 +08:00
return err ;
2014-02-19 03:53:18 +08:00
}
2017-04-12 20:34:07 +08:00
int rtnl_nla_parse_ifla ( struct nlattr * * tb , const struct nlattr * head , int len ,
struct netlink_ext_ack * exterr )
2014-02-19 03:53:18 +08:00
{
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
return nla_parse_deprecated ( tb , IFLA_MAX , head , len , ifla_policy ,
exterr ) ;
2014-02-19 03:53:18 +08:00
}
EXPORT_SYMBOL ( rtnl_nla_parse_ifla ) ;
2009-11-08 16:53:51 +08:00
struct net * rtnl_link_get_net ( struct net * src_net , struct nlattr * tb [ ] )
{
struct net * net ;
/* Examine the link attributes and figure out which
* network namespace we are talking about .
*/
if ( tb [ IFLA_NET_NS_PID ] )
net = get_net_ns_by_pid ( nla_get_u32 ( tb [ IFLA_NET_NS_PID ] ) ) ;
2011-05-05 08:51:50 +08:00
else if ( tb [ IFLA_NET_NS_FD ] )
net = get_net_ns_by_fd ( nla_get_u32 ( tb [ IFLA_NET_NS_FD ] ) ) ;
2009-11-08 16:53:51 +08:00
else
net = get_net ( src_net ) ;
return net ;
}
EXPORT_SYMBOL ( rtnl_link_get_net ) ;
2018-01-24 22:26:32 +08:00
/* Figure out which network namespace we are talking about by
* examining the link attributes in the following order :
*
* 1. IFLA_NET_NS_PID
* 2. IFLA_NET_NS_FD
2018-09-05 03:53:53 +08:00
* 3. IFLA_TARGET_NETNSID
2018-01-24 22:26:32 +08:00
*/
static struct net * rtnl_link_get_net_by_nlattr ( struct net * src_net ,
struct nlattr * tb [ ] )
{
struct net * net ;
if ( tb [ IFLA_NET_NS_PID ] | | tb [ IFLA_NET_NS_FD ] )
return rtnl_link_get_net ( src_net , tb ) ;
2018-09-05 03:53:53 +08:00
if ( ! tb [ IFLA_TARGET_NETNSID ] )
2018-01-24 22:26:32 +08:00
return get_net ( src_net ) ;
2018-09-05 03:53:53 +08:00
net = get_net_ns_by_id ( src_net , nla_get_u32 ( tb [ IFLA_TARGET_NETNSID ] ) ) ;
2018-01-24 22:26:32 +08:00
if ( ! net )
return ERR_PTR ( - EINVAL ) ;
return net ;
}
static struct net * rtnl_link_get_net_capable ( const struct sk_buff * skb ,
struct net * src_net ,
struct nlattr * tb [ ] , int cap )
{
struct net * net ;
net = rtnl_link_get_net_by_nlattr ( src_net , tb ) ;
if ( IS_ERR ( net ) )
return net ;
if ( ! netlink_ns_capable ( skb , net - > user_ns , cap ) ) {
put_net ( net ) ;
return ERR_PTR ( - EPERM ) ;
}
return net ;
}
2018-02-07 20:53:20 +08:00
/* Verify that rtnetlink requests do not pass additional properties
* potentially referring to different network namespaces .
*/
static int rtnl_ensure_unique_netns ( struct nlattr * tb [ ] ,
struct netlink_ext_ack * extack ,
bool netns_id_only )
{
if ( netns_id_only ) {
if ( ! tb [ IFLA_NET_NS_PID ] & & ! tb [ IFLA_NET_NS_FD ] )
return 0 ;
NL_SET_ERR_MSG ( extack , " specified netns attribute not supported " ) ;
return - EOPNOTSUPP ;
}
2018-09-05 03:53:53 +08:00
if ( tb [ IFLA_TARGET_NETNSID ] & & ( tb [ IFLA_NET_NS_PID ] | | tb [ IFLA_NET_NS_FD ] ) )
2018-02-07 20:53:20 +08:00
goto invalid_attr ;
2018-09-05 03:53:53 +08:00
if ( tb [ IFLA_NET_NS_PID ] & & ( tb [ IFLA_TARGET_NETNSID ] | | tb [ IFLA_NET_NS_FD ] ) )
2018-02-07 20:53:20 +08:00
goto invalid_attr ;
2018-09-05 03:53:53 +08:00
if ( tb [ IFLA_NET_NS_FD ] & & ( tb [ IFLA_TARGET_NETNSID ] | | tb [ IFLA_NET_NS_PID ] ) )
2018-02-07 20:53:20 +08:00
goto invalid_attr ;
return 0 ;
invalid_attr :
NL_SET_ERR_MSG ( extack , " multiple netns identifying attributes specified " ) ;
return - EINVAL ;
}
2021-08-03 20:02:50 +08:00
static int validate_linkmsg ( struct net_device * dev , struct nlattr * tb [ ] ,
struct netlink_ext_ack * extack )
2008-02-24 11:54:36 +08:00
{
if ( dev ) {
if ( tb [ IFLA_ADDRESS ] & &
nla_len ( tb [ IFLA_ADDRESS ] ) < dev - > addr_len )
return - EINVAL ;
if ( tb [ IFLA_BROADCAST ] & &
nla_len ( tb [ IFLA_BROADCAST ] ) < dev - > addr_len )
return - EINVAL ;
}
2010-11-22 09:31:54 +08:00
if ( tb [ IFLA_AF_SPEC ] ) {
struct nlattr * af ;
int rem , err ;
nla_for_each_nested ( af , tb [ IFLA_AF_SPEC ] , rem ) {
const struct rtnl_af_ops * af_ops ;
2017-10-16 21:44:36 +08:00
af_ops = rtnl_af_lookup ( nla_type ( af ) ) ;
2021-05-09 02:00:33 +08:00
if ( ! af_ops )
2010-11-22 09:31:54 +08:00
return - EAFNOSUPPORT ;
2021-05-09 02:00:33 +08:00
if ( ! af_ops - > set_link_af )
2010-11-22 09:31:54 +08:00
return - EOPNOTSUPP ;
if ( af_ops - > validate_link_af ) {
2021-08-03 20:02:50 +08:00
err = af_ops - > validate_link_af ( dev , af , extack ) ;
2021-05-09 02:00:33 +08:00
if ( err < 0 )
2010-11-22 09:31:54 +08:00
return err ;
}
}
}
2022-01-05 18:48:38 +08:00
if ( tb [ IFLA_GRO_MAX_SIZE ] ) {
u32 gro_max_size = nla_get_u32 ( tb [ IFLA_GRO_MAX_SIZE ] ) ;
if ( gro_max_size > GRO_MAX_SIZE ) {
NL_SET_ERR_MSG ( extack , " too big gro_max_size " ) ;
return - EINVAL ;
}
}
2008-02-24 11:54:36 +08:00
return 0 ;
}
2016-03-12 04:58:34 +08:00
static int handle_infiniband_guid ( struct net_device * dev , struct ifla_vf_guid * ivt ,
int guid_type )
{
const struct net_device_ops * ops = dev - > netdev_ops ;
return ops - > ndo_set_vf_guid ( dev , ivt - > vf , ivt - > guid , guid_type ) ;
}
static int handle_vf_guid ( struct net_device * dev , struct ifla_vf_guid * ivt , int guid_type )
{
if ( dev - > type ! = ARPHRD_INFINIBAND )
return - EOPNOTSUPP ;
return handle_infiniband_guid ( dev , ivt , guid_type ) ;
}
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
static int do_setvfinfo ( struct net_device * dev , struct nlattr * * tb )
2010-05-16 16:05:45 +08:00
{
const struct net_device_ops * ops = dev - > netdev_ops ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
int err = - EINVAL ;
2010-05-16 16:05:45 +08:00
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
if ( tb [ IFLA_VF_MAC ] ) {
struct ifla_vf_mac * ivm = nla_data ( tb [ IFLA_VF_MAC ] ) ;
2015-03-31 02:35:23 +08:00
2019-11-20 20:34:38 +08:00
if ( ivm - > vf > = INT_MAX )
return - EINVAL ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_mac )
err = ops - > ndo_set_vf_mac ( dev , ivm - > vf ,
ivm - > mac ) ;
if ( err < 0 )
return err ;
}
if ( tb [ IFLA_VF_VLAN ] ) {
struct ifla_vf_vlan * ivv = nla_data ( tb [ IFLA_VF_VLAN ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivv - > vf > = INT_MAX )
return - EINVAL ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_vlan )
err = ops - > ndo_set_vf_vlan ( dev , ivv - > vf , ivv - > vlan ,
2016-09-22 17:11:15 +08:00
ivv - > qos ,
htons ( ETH_P_8021Q ) ) ;
if ( err < 0 )
return err ;
}
if ( tb [ IFLA_VF_VLAN_LIST ] ) {
struct ifla_vf_vlan_info * ivvl [ MAX_VLAN_LIST_LEN ] ;
struct nlattr * attr ;
int rem , len = 0 ;
err = - EOPNOTSUPP ;
if ( ! ops - > ndo_set_vf_vlan )
return err ;
nla_for_each_nested ( attr , tb [ IFLA_VF_VLAN_LIST ] , rem ) {
if ( nla_type ( attr ) ! = IFLA_VF_VLAN_INFO | |
nla_len ( attr ) < NLA_HDRLEN ) {
return - EINVAL ;
}
if ( len > = MAX_VLAN_LIST_LEN )
return - EOPNOTSUPP ;
ivvl [ len ] = nla_data ( attr ) ;
len + + ;
}
2016-10-01 00:13:49 +08:00
if ( len = = 0 )
return - EINVAL ;
2019-11-20 20:34:38 +08:00
if ( ivvl [ 0 ] - > vf > = INT_MAX )
return - EINVAL ;
2016-09-22 17:11:15 +08:00
err = ops - > ndo_set_vf_vlan ( dev , ivvl [ 0 ] - > vf , ivvl [ 0 ] - > vlan ,
ivvl [ 0 ] - > qos , ivvl [ 0 ] - > vlan_proto ) ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
if ( err < 0 )
return err ;
2010-05-16 16:05:45 +08:00
}
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
if ( tb [ IFLA_VF_TX_RATE ] ) {
struct ifla_vf_tx_rate * ivt = nla_data ( tb [ IFLA_VF_TX_RATE ] ) ;
struct ifla_vf_info ivf ;
2019-11-20 20:34:38 +08:00
if ( ivt - > vf > = INT_MAX )
return - EINVAL ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
err = - EOPNOTSUPP ;
if ( ops - > ndo_get_vf_config )
err = ops - > ndo_get_vf_config ( dev , ivt - > vf , & ivf ) ;
if ( err < 0 )
return err ;
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_rate )
err = ops - > ndo_set_vf_rate ( dev , ivt - > vf ,
ivf . min_tx_rate ,
ivt - > rate ) ;
if ( err < 0 )
return err ;
}
if ( tb [ IFLA_VF_RATE ] ) {
struct ifla_vf_rate * ivt = nla_data ( tb [ IFLA_VF_RATE ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivt - > vf > = INT_MAX )
return - EINVAL ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_rate )
err = ops - > ndo_set_vf_rate ( dev , ivt - > vf ,
ivt - > min_tx_rate ,
ivt - > max_tx_rate ) ;
if ( err < 0 )
return err ;
}
if ( tb [ IFLA_VF_SPOOFCHK ] ) {
struct ifla_vf_spoofchk * ivs = nla_data ( tb [ IFLA_VF_SPOOFCHK ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivs - > vf > = INT_MAX )
return - EINVAL ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_spoofchk )
err = ops - > ndo_set_vf_spoofchk ( dev , ivs - > vf ,
ivs - > setting ) ;
if ( err < 0 )
return err ;
}
if ( tb [ IFLA_VF_LINK_STATE ] ) {
struct ifla_vf_link_state * ivl = nla_data ( tb [ IFLA_VF_LINK_STATE ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivl - > vf > = INT_MAX )
return - EINVAL ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_link_state )
err = ops - > ndo_set_vf_link_state ( dev , ivl - > vf ,
ivl - > link_state ) ;
if ( err < 0 )
return err ;
}
if ( tb [ IFLA_VF_RSS_QUERY_EN ] ) {
struct ifla_vf_rss_query_en * ivrssq_en ;
err = - EOPNOTSUPP ;
ivrssq_en = nla_data ( tb [ IFLA_VF_RSS_QUERY_EN ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivrssq_en - > vf > = INT_MAX )
return - EINVAL ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
if ( ops - > ndo_set_vf_rss_query_en )
err = ops - > ndo_set_vf_rss_query_en ( dev , ivrssq_en - > vf ,
ivrssq_en - > setting ) ;
if ( err < 0 )
return err ;
}
2015-08-28 14:57:55 +08:00
if ( tb [ IFLA_VF_TRUST ] ) {
struct ifla_vf_trust * ivt = nla_data ( tb [ IFLA_VF_TRUST ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivt - > vf > = INT_MAX )
return - EINVAL ;
2015-08-28 14:57:55 +08:00
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_trust )
err = ops - > ndo_set_vf_trust ( dev , ivt - > vf , ivt - > setting ) ;
if ( err < 0 )
return err ;
}
2016-03-12 04:58:34 +08:00
if ( tb [ IFLA_VF_IB_NODE_GUID ] ) {
struct ifla_vf_guid * ivt = nla_data ( tb [ IFLA_VF_IB_NODE_GUID ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivt - > vf > = INT_MAX )
return - EINVAL ;
2016-03-12 04:58:34 +08:00
if ( ! ops - > ndo_set_vf_guid )
return - EOPNOTSUPP ;
return handle_vf_guid ( dev , ivt , IFLA_VF_IB_NODE_GUID ) ;
}
if ( tb [ IFLA_VF_IB_PORT_GUID ] ) {
struct ifla_vf_guid * ivt = nla_data ( tb [ IFLA_VF_IB_PORT_GUID ] ) ;
2019-11-20 20:34:38 +08:00
if ( ivt - > vf > = INT_MAX )
return - EINVAL ;
2016-03-12 04:58:34 +08:00
if ( ! ops - > ndo_set_vf_guid )
return - EOPNOTSUPP ;
return handle_vf_guid ( dev , ivt , IFLA_VF_IB_PORT_GUID ) ;
}
2010-05-16 16:05:45 +08:00
return err ;
}
2017-10-05 08:48:46 +08:00
static int do_set_master ( struct net_device * dev , int ifindex ,
struct netlink_ext_ack * extack )
2011-02-13 18:15:37 +08:00
{
2013-01-04 06:48:52 +08:00
struct net_device * upper_dev = netdev_master_upper_dev_get ( dev ) ;
2011-02-13 18:15:37 +08:00
const struct net_device_ops * ops ;
int err ;
2013-01-04 06:48:52 +08:00
if ( upper_dev ) {
if ( upper_dev - > ifindex = = ifindex )
2011-02-13 18:15:37 +08:00
return 0 ;
2013-01-04 06:48:52 +08:00
ops = upper_dev - > netdev_ops ;
2011-02-13 18:15:37 +08:00
if ( ops - > ndo_del_slave ) {
2013-01-04 06:48:52 +08:00
err = ops - > ndo_del_slave ( upper_dev , dev ) ;
2011-02-13 18:15:37 +08:00
if ( err )
return err ;
} else {
return - EOPNOTSUPP ;
}
}
if ( ifindex ) {
2013-01-04 06:48:52 +08:00
upper_dev = __dev_get_by_index ( dev_net ( dev ) , ifindex ) ;
if ( ! upper_dev )
2011-02-13 18:15:37 +08:00
return - EINVAL ;
2013-01-04 06:48:52 +08:00
ops = upper_dev - > netdev_ops ;
2011-02-13 18:15:37 +08:00
if ( ops - > ndo_add_slave ) {
2017-10-05 08:48:46 +08:00
err = ops - > ndo_add_slave ( upper_dev , dev , extack ) ;
2011-02-13 18:15:37 +08:00
if ( err )
return err ;
} else {
return - EOPNOTSUPP ;
}
}
return 0 ;
}
2020-08-01 08:34:01 +08:00
static const struct nla_policy ifla_proto_down_reason_policy [ IFLA_PROTO_DOWN_REASON_VALUE + 1 ] = {
[ IFLA_PROTO_DOWN_REASON_MASK ] = { . type = NLA_U32 } ,
[ IFLA_PROTO_DOWN_REASON_VALUE ] = { . type = NLA_U32 } ,
} ;
static int do_set_proto_down ( struct net_device * dev ,
struct nlattr * nl_proto_down ,
struct nlattr * nl_proto_down_reason ,
struct netlink_ext_ack * extack )
{
struct nlattr * pdreason [ IFLA_PROTO_DOWN_REASON_MAX + 1 ] ;
unsigned long mask = 0 ;
u32 value ;
bool proto_down ;
int err ;
2021-11-23 09:24:47 +08:00
if ( ! ( dev - > priv_flags & IFF_CHANGE_PROTO_DOWN ) ) {
2020-08-01 08:34:01 +08:00
NL_SET_ERR_MSG ( extack , " Protodown not supported by device " ) ;
return - EOPNOTSUPP ;
}
if ( nl_proto_down_reason ) {
err = nla_parse_nested_deprecated ( pdreason ,
IFLA_PROTO_DOWN_REASON_MAX ,
nl_proto_down_reason ,
ifla_proto_down_reason_policy ,
NULL ) ;
if ( err < 0 )
return err ;
if ( ! pdreason [ IFLA_PROTO_DOWN_REASON_VALUE ] ) {
NL_SET_ERR_MSG ( extack , " Invalid protodown reason value " ) ;
return - EINVAL ;
}
value = nla_get_u32 ( pdreason [ IFLA_PROTO_DOWN_REASON_VALUE ] ) ;
if ( pdreason [ IFLA_PROTO_DOWN_REASON_MASK ] )
mask = nla_get_u32 ( pdreason [ IFLA_PROTO_DOWN_REASON_MASK ] ) ;
dev_change_proto_down_reason ( dev , mask , value ) ;
}
if ( nl_proto_down ) {
proto_down = nla_get_u8 ( nl_proto_down ) ;
2021-06-02 14:56:23 +08:00
/* Don't turn off protodown if there are active reasons */
2020-08-01 08:34:01 +08:00
if ( ! proto_down & & dev - > proto_down_reason ) {
NL_SET_ERR_MSG ( extack , " Cannot clear protodown, active reasons " ) ;
return - EBUSY ;
}
err = dev_change_proto_down ( dev ,
proto_down ) ;
if ( err )
return err ;
}
return 0 ;
}
2014-09-01 22:07:28 +08:00
# define DO_SETLINK_MODIFIED 0x01
2014-09-01 22:07:29 +08:00
/* notify flag means notify + modified. */
# define DO_SETLINK_NOTIFY 0x03
2014-04-24 05:29:27 +08:00
static int do_setlink ( const struct sk_buff * skb ,
struct net_device * dev , struct ifinfomsg * ifm ,
2017-05-01 12:46:46 +08:00
struct netlink_ext_ack * extack ,
2022-04-16 00:53:28 +08:00
struct nlattr * * tb , int status )
2005-04-17 06:20:36 +08:00
{
2008-11-20 13:32:24 +08:00
const struct net_device_ops * ops = dev - > netdev_ops ;
2022-04-16 00:53:28 +08:00
char ifname [ IFNAMSIZ ] ;
2007-06-14 03:03:36 +08:00
int err ;
2005-04-17 06:20:36 +08:00
2021-08-03 20:02:50 +08:00
err = validate_linkmsg ( dev , tb , extack ) ;
2018-06-06 00:25:19 +08:00
if ( err < 0 )
return err ;
2022-04-16 00:53:28 +08:00
if ( tb [ IFLA_IFNAME ] )
nla_strscpy ( ifname , tb [ IFLA_IFNAME ] , IFNAMSIZ ) ;
else
ifname [ 0 ] = ' \0 ' ;
2018-09-05 03:53:53 +08:00
if ( tb [ IFLA_NET_NS_PID ] | | tb [ IFLA_NET_NS_FD ] | | tb [ IFLA_TARGET_NETNSID ] ) {
2022-04-16 00:53:28 +08:00
const char * pat = ifname [ 0 ] ? ifname : NULL ;
2021-04-05 15:12:23 +08:00
struct net * net ;
int new_ifindex ;
net = rtnl_link_get_net_capable ( skb , dev_net ( dev ) ,
tb , CAP_NET_ADMIN ) ;
2007-09-12 19:57:04 +08:00
if ( IS_ERR ( net ) ) {
err = PTR_ERR ( net ) ;
goto errout ;
}
2018-01-24 22:26:32 +08:00
2021-04-05 15:12:23 +08:00
if ( tb [ IFLA_NEW_IFINDEX ] )
new_ifindex = nla_get_s32 ( tb [ IFLA_NEW_IFINDEX ] ) ;
else
new_ifindex = 0 ;
rtnetlink: Return correct error on changing device netns
Currently when device is moved between network namespaces using
RTM_NEWLINK message type and one of netns attributes (FLA_NET_NS_PID,
IFLA_NET_NS_FD, IFLA_TARGET_NETNSID) but w/o specifying IFLA_IFNAME, and
target namespace already has device with same name, userspace will get
EINVAL what is confusing and makes debugging harder.
Fix it so that userspace gets more appropriate EEXIST instead what makes
debugging much easier.
Before:
# ./ifname.sh
+ ip netns add ns0
+ ip netns exec ns0 ip link add l0 type dummy
+ ip netns exec ns0 ip link show l0
8: l0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 66:90:b5:d5:78:69 brd ff:ff:ff:ff:ff:ff
+ ip link add l0 type dummy
+ ip link show l0
10: l0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 6e:c6:1f:15:20:8d brd ff:ff:ff:ff:ff:ff
+ ip link set l0 netns ns0
RTNETLINK answers: Invalid argument
After:
# ./ifname.sh
+ ip netns add ns0
+ ip netns exec ns0 ip link add l0 type dummy
+ ip netns exec ns0 ip link show l0
8: l0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 1e:4a:72:e3:e3:8f brd ff:ff:ff:ff:ff:ff
+ ip link add l0 type dummy
+ ip link show l0
10: l0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether f2:fc:fe:2b:7d:a6 brd ff:ff:ff:ff:ff:ff
+ ip link set l0 netns ns0
RTNETLINK answers: File exists
The problem is that do_setlink() passes its `char *ifname` argument,
that it gets from a caller, to __dev_change_net_namespace() as is (as
`const char *pat`), but semantics of ifname and pat can be different.
For example, __rtnl_newlink() does this:
net/core/rtnetlink.c
3270 char ifname[IFNAMSIZ];
...
3286 if (tb[IFLA_IFNAME])
3287 nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
3288 else
3289 ifname[0] = '\0';
...
3364 if (dev) {
...
3394 return do_setlink(skb, dev, ifm, extack, tb, ifname, status);
3395 }
, i.e. do_setlink() gets ifname pointer that is always valid no matter
if user specified IFLA_IFNAME or not and then do_setlink() passes this
ifname pointer as is to __dev_change_net_namespace() as pat argument.
But the pat (pattern) in __dev_change_net_namespace() is used as:
net/core/dev.c
11198 err = -EEXIST;
11199 if (__dev_get_by_name(net, dev->name)) {
11200 /* We get here if we can't use the current device name */
11201 if (!pat)
11202 goto out;
11203 err = dev_get_valid_name(net, dev, pat);
11204 if (err < 0)
11205 goto out;
11206 }
As the result the `goto out` path on line 11202 is neven taken and
instead of returning EEXIST defined on line 11198,
__dev_change_net_namespace() returns an error from dev_get_valid_name()
and this, in turn, will be EINVAL for ifname[0] = '\0' set earlier.
Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.")
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-26 08:25:40 +08:00
err = __dev_change_net_namespace ( dev , net , pat , new_ifindex ) ;
2007-09-12 19:57:04 +08:00
put_net ( net ) ;
if ( err )
goto errout ;
2014-09-01 22:07:28 +08:00
status | = DO_SETLINK_MODIFIED ;
2007-09-12 19:57:04 +08:00
}
2006-08-11 12:17:37 +08:00
if ( tb [ IFLA_MAP ] ) {
2005-04-17 06:20:36 +08:00
struct rtnl_link_ifmap * u_map ;
struct ifmap k_map ;
2008-11-20 13:32:24 +08:00
if ( ! ops - > ndo_set_config ) {
2005-04-17 06:20:36 +08:00
err = - EOPNOTSUPP ;
2007-06-14 03:03:36 +08:00
goto errout ;
2005-04-17 06:20:36 +08:00
}
if ( ! netif_device_present ( dev ) ) {
err = - ENODEV ;
2007-06-14 03:03:36 +08:00
goto errout ;
2005-04-17 06:20:36 +08:00
}
2006-08-11 12:17:37 +08:00
u_map = nla_data ( tb [ IFLA_MAP ] ) ;
2005-04-17 06:20:36 +08:00
k_map . mem_start = ( unsigned long ) u_map - > mem_start ;
k_map . mem_end = ( unsigned long ) u_map - > mem_end ;
k_map . base_addr = ( unsigned short ) u_map - > base_addr ;
k_map . irq = ( unsigned char ) u_map - > irq ;
k_map . dma = ( unsigned char ) u_map - > dma ;
k_map . port = ( unsigned char ) u_map - > port ;
2008-11-20 13:32:24 +08:00
err = ops - > ndo_set_config ( dev , & k_map ) ;
2006-08-11 12:17:37 +08:00
if ( err < 0 )
2007-06-14 03:03:36 +08:00
goto errout ;
2005-04-17 06:20:36 +08:00
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
2005-04-17 06:20:36 +08:00
}
2006-08-11 12:17:37 +08:00
if ( tb [ IFLA_ADDRESS ] ) {
2006-08-09 07:47:37 +08:00
struct sockaddr * sa ;
int len ;
2017-07-21 02:27:57 +08:00
len = sizeof ( sa_family_t ) + max_t ( size_t , dev - > addr_len ,
sizeof ( * sa ) ) ;
2006-08-09 07:47:37 +08:00
sa = kmalloc ( len , GFP_KERNEL ) ;
if ( ! sa ) {
err = - ENOMEM ;
2007-06-14 03:03:36 +08:00
goto errout ;
2006-08-09 07:47:37 +08:00
}
sa - > sa_family = dev - > type ;
2006-08-11 12:17:37 +08:00
memcpy ( sa - > sa_data , nla_data ( tb [ IFLA_ADDRESS ] ) ,
2006-08-09 07:47:37 +08:00
dev - > addr_len ) ;
net: fix dev_ifsioc_locked() race condition
dev_ifsioc_locked() is called with only RCU read lock, so when
there is a parallel writer changing the mac address, it could
get a partially updated mac address, as shown below:
Thread 1 Thread 2
// eth_commit_mac_addr_change()
memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
// dev_ifsioc_locked()
memcpy(ifr->ifr_hwaddr.sa_data,
dev->dev_addr,...);
Close this race condition by guarding them with a RW semaphore,
like netdev_get_name(). We can not use seqlock here as it does not
allow blocking. The writers already take RTNL anyway, so this does
not affect the slow path. To avoid bothering existing
dev_set_mac_address() callers in drivers, introduce a new wrapper
just for user-facing callers on ioctl and rtnetlink paths.
Note, bonding also changes slave mac addresses but that requires
a separate patch due to the complexity of bonding code.
Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 03:34:10 +08:00
err = dev_set_mac_address_user ( dev , sa , extack ) ;
2006-08-09 07:47:37 +08:00
kfree ( sa ) ;
2005-04-17 06:20:36 +08:00
if ( err )
2007-06-14 03:03:36 +08:00
goto errout ;
2014-09-01 22:07:28 +08:00
status | = DO_SETLINK_MODIFIED ;
2005-04-17 06:20:36 +08:00
}
2006-08-11 12:17:37 +08:00
if ( tb [ IFLA_MTU ] ) {
2018-07-28 04:43:23 +08:00
err = dev_set_mtu_ext ( dev , nla_get_u32 ( tb [ IFLA_MTU ] ) , extack ) ;
2006-08-11 12:17:37 +08:00
if ( err < 0 )
2007-06-14 03:03:36 +08:00
goto errout ;
2014-09-01 22:07:28 +08:00
status | = DO_SETLINK_MODIFIED ;
2005-04-17 06:20:36 +08:00
}
2011-01-14 07:38:30 +08:00
if ( tb [ IFLA_GROUP ] ) {
dev_set_group ( dev , nla_get_u32 ( tb [ IFLA_GROUP ] ) ) ;
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
2011-01-14 07:38:30 +08:00
}
2006-08-11 12:17:37 +08:00
/*
* Interface selected by interface index but interface
* name provided implies that a name change has been
* requested .
*/
2007-06-06 03:40:01 +08:00
if ( ifm - > ifi_index > 0 & & ifname [ 0 ] ) {
2006-08-11 12:17:37 +08:00
err = dev_change_name ( dev , ifname ) ;
if ( err < 0 )
2007-06-14 03:03:36 +08:00
goto errout ;
2014-09-01 22:07:28 +08:00
status | = DO_SETLINK_MODIFIED ;
2005-04-17 06:20:36 +08:00
}
2008-09-23 12:28:11 +08:00
if ( tb [ IFLA_IFALIAS ] ) {
err = dev_set_alias ( dev , nla_data ( tb [ IFLA_IFALIAS ] ) ,
nla_len ( tb [ IFLA_IFALIAS ] ) ) ;
if ( err < 0 )
goto errout ;
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
2008-09-23 12:28:11 +08:00
}
2006-08-11 12:17:37 +08:00
if ( tb [ IFLA_BROADCAST ] ) {
nla_memcpy ( dev - > broadcast , tb [ IFLA_BROADCAST ] , dev - > addr_len ) ;
2013-01-01 11:30:13 +08:00
call_netdevice_notifiers ( NETDEV_CHANGEADDR , dev ) ;
2005-04-17 06:20:36 +08:00
}
2007-05-23 08:00:01 +08:00
if ( ifm - > ifi_flags | | ifm - > ifi_change ) {
2018-12-07 01:05:42 +08:00
err = dev_change_flags ( dev , rtnl_dev_combine_flags ( dev , ifm ) ,
extack ) ;
2008-11-17 15:20:31 +08:00
if ( err < 0 )
goto errout ;
2007-05-23 08:00:01 +08:00
}
2005-04-17 06:20:36 +08:00
2011-02-13 18:15:37 +08:00
if ( tb [ IFLA_MASTER ] ) {
2017-10-05 08:48:46 +08:00
err = do_set_master ( dev , nla_get_u32 ( tb [ IFLA_MASTER ] ) , extack ) ;
2011-02-13 18:15:37 +08:00
if ( err )
goto errout ;
2014-09-01 22:07:28 +08:00
status | = DO_SETLINK_MODIFIED ;
2011-02-13 18:15:37 +08:00
}
2012-12-28 07:49:39 +08:00
if ( tb [ IFLA_CARRIER ] ) {
err = dev_change_carrier ( dev , nla_get_u8 ( tb [ IFLA_CARRIER ] ) ) ;
if ( err )
goto errout ;
2014-09-01 22:07:28 +08:00
status | = DO_SETLINK_MODIFIED ;
2012-12-28 07:49:39 +08:00
}
2014-09-01 22:07:26 +08:00
if ( tb [ IFLA_TXQLEN ] ) {
2017-05-17 18:30:44 +08:00
unsigned int value = nla_get_u32 ( tb [ IFLA_TXQLEN ] ) ;
2018-01-26 10:26:22 +08:00
err = dev_change_tx_queue_len ( dev , value ) ;
if ( err )
goto errout ;
status | = DO_SETLINK_MODIFIED ;
2014-09-01 22:07:26 +08:00
}
2006-03-21 09:09:11 +08:00
2017-12-08 07:40:19 +08:00
if ( tb [ IFLA_GSO_MAX_SIZE ] ) {
u32 max_size = nla_get_u32 ( tb [ IFLA_GSO_MAX_SIZE ] ) ;
if ( max_size > GSO_MAX_SIZE ) {
err = - EINVAL ;
goto errout ;
}
if ( dev - > gso_max_size ^ max_size ) {
netif_set_gso_max_size ( dev , max_size ) ;
status | = DO_SETLINK_MODIFIED ;
}
}
if ( tb [ IFLA_GSO_MAX_SEGS ] ) {
u32 max_segs = nla_get_u32 ( tb [ IFLA_GSO_MAX_SEGS ] ) ;
if ( max_segs > GSO_MAX_SEGS ) {
err = - EINVAL ;
goto errout ;
}
if ( dev - > gso_max_segs ^ max_segs ) {
2021-11-19 23:43:32 +08:00
netif_set_gso_max_segs ( dev , max_segs ) ;
2017-12-08 07:40:19 +08:00
status | = DO_SETLINK_MODIFIED ;
}
}
2022-01-05 18:48:38 +08:00
if ( tb [ IFLA_GRO_MAX_SIZE ] ) {
u32 gro_max_size = nla_get_u32 ( tb [ IFLA_GRO_MAX_SIZE ] ) ;
if ( dev - > gro_max_size ^ gro_max_size ) {
netif_set_gro_max_size ( dev , gro_max_size ) ;
status | = DO_SETLINK_MODIFIED ;
}
}
2006-08-11 12:17:37 +08:00
if ( tb [ IFLA_OPERSTATE ] )
2008-02-18 10:35:07 +08:00
set_operstate ( dev , nla_get_u8 ( tb [ IFLA_OPERSTATE ] ) ) ;
2006-03-21 09:09:11 +08:00
2006-08-11 12:17:37 +08:00
if ( tb [ IFLA_LINKMODE ] ) {
2014-09-01 22:07:27 +08:00
unsigned char value = nla_get_u8 ( tb [ IFLA_LINKMODE ] ) ;
2021-11-27 00:15:29 +08:00
write_lock ( & dev_base_lock ) ;
2014-09-01 22:07:27 +08:00
if ( dev - > link_mode ^ value )
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
2014-09-01 22:07:27 +08:00
dev - > link_mode = value ;
2021-11-27 00:15:29 +08:00
write_unlock ( & dev_base_lock ) ;
2006-03-21 09:09:11 +08:00
}
2010-05-16 16:05:45 +08:00
if ( tb [ IFLA_VFINFO_LIST ] ) {
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
struct nlattr * vfinfo [ IFLA_VF_MAX + 1 ] ;
2010-05-16 16:05:45 +08:00
struct nlattr * attr ;
int rem ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
2010-05-16 16:05:45 +08:00
nla_for_each_nested ( attr , tb [ IFLA_VFINFO_LIST ] , rem ) {
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
if ( nla_type ( attr ) ! = IFLA_VF_INFO | |
nla_len ( attr ) < NLA_HDRLEN ) {
2010-05-21 10:25:27 +08:00
err = - EINVAL ;
2010-05-16 16:05:45 +08:00
goto errout ;
2010-05-21 10:25:27 +08:00
}
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nla_parse_nested_deprecated ( vfinfo , IFLA_VF_MAX ,
attr ,
ifla_vf_policy ,
NULL ) ;
rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver
Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].
Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.
Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.
Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).
Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-07 06:07:52 +08:00
if ( err < 0 )
goto errout ;
err = do_setvfinfo ( dev , vfinfo ) ;
2010-05-16 16:05:45 +08:00
if ( err < 0 )
goto errout ;
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
2010-05-16 16:05:45 +08:00
}
2010-02-10 09:44:05 +08:00
}
2005-04-17 06:20:36 +08:00
err = 0 ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
if ( tb [ IFLA_VF_PORTS ] ) {
struct nlattr * port [ IFLA_PORT_MAX + 1 ] ;
struct nlattr * attr ;
int vf ;
int rem ;
err = - EOPNOTSUPP ;
if ( ! ops - > ndo_set_vf_port )
goto errout ;
nla_for_each_nested ( attr , tb [ IFLA_VF_PORTS ] , rem ) {
2015-07-13 06:06:02 +08:00
if ( nla_type ( attr ) ! = IFLA_VF_PORT | |
nla_len ( attr ) < NLA_HDRLEN ) {
err = - EINVAL ;
goto errout ;
}
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nla_parse_nested_deprecated ( port , IFLA_PORT_MAX ,
attr ,
ifla_port_policy ,
NULL ) ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
if ( err < 0 )
goto errout ;
if ( ! port [ IFLA_PORT_VF ] ) {
err = - EOPNOTSUPP ;
goto errout ;
}
vf = nla_get_u32 ( port [ IFLA_PORT_VF ] ) ;
err = ops - > ndo_set_vf_port ( dev , vf , port ) ;
if ( err < 0 )
goto errout ;
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
}
}
err = 0 ;
if ( tb [ IFLA_PORT_SELF ] ) {
struct nlattr * port [ IFLA_PORT_MAX + 1 ] ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nla_parse_nested_deprecated ( port , IFLA_PORT_MAX ,
tb [ IFLA_PORT_SELF ] ,
ifla_port_policy , NULL ) ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
if ( err < 0 )
goto errout ;
err = - EOPNOTSUPP ;
if ( ops - > ndo_set_vf_port )
err = ops - > ndo_set_vf_port ( dev , PORT_SELF_VF , port ) ;
if ( err < 0 )
goto errout ;
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
}
2010-11-16 12:30:14 +08:00
if ( tb [ IFLA_AF_SPEC ] ) {
struct nlattr * af ;
int rem ;
nla_for_each_nested ( af , tb [ IFLA_AF_SPEC ] , rem ) {
const struct rtnl_af_ops * af_ops ;
2017-10-21 08:43:11 +08:00
BUG_ON ( ! ( af_ops = rtnl_af_lookup ( nla_type ( af ) ) ) ) ;
2010-11-16 12:30:14 +08:00
2021-04-07 23:59:12 +08:00
err = af_ops - > set_link_af ( dev , af , extack ) ;
2021-05-09 02:00:33 +08:00
if ( err < 0 )
2010-11-16 12:30:14 +08:00
goto errout ;
2014-09-01 22:07:29 +08:00
status | = DO_SETLINK_NOTIFY ;
2010-11-16 12:30:14 +08:00
}
}
net: Add netlink support for virtual port management (was iovnl)
Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface. Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list). These are both nested atrtibutes
using this layout:
[IFLA_NUM_VF]
[IFLA_VF_PORTS]
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
[IFLA_VF_PORT]
[IFLA_PORT_*], ...
...
[IFLA_PORT_SELF]
[IFLA_PORT_*], ...
These attributes are design to be set and get symmetrically. VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device. PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.
A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev. A port-profile is an identifier known to the switch. How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.
There are two types of port-profiles specs in the netlink msg. The first spec
is for 802.1Qbg (pre-)standard, VDP protocol. The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details. In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs. For example, both specs
have a notition of associating/deassociating a port-profile. And both specs
require some information from the hypervisor manager, such as client port
instance ID.
The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile. What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.
Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-18 13:49:55 +08:00
err = 0 ;
2020-08-01 08:34:01 +08:00
if ( tb [ IFLA_PROTO_DOWN ] | | tb [ IFLA_PROTO_DOWN_REASON ] ) {
err = do_set_proto_down ( dev , tb [ IFLA_PROTO_DOWN ] ,
tb [ IFLA_PROTO_DOWN_REASON ] , extack ) ;
2015-07-15 04:43:20 +08:00
if ( err )
goto errout ;
status | = DO_SETLINK_NOTIFY ;
}
2016-07-20 03:16:49 +08:00
if ( tb [ IFLA_XDP ] ) {
struct nlattr * xdp [ IFLA_XDP_MAX + 1 ] ;
2016-11-29 06:16:54 +08:00
u32 xdp_flags = 0 ;
2016-07-20 03:16:49 +08:00
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nla_parse_nested_deprecated ( xdp , IFLA_XDP_MAX ,
tb [ IFLA_XDP ] ,
ifla_xdp_policy , NULL ) ;
2016-07-20 03:16:49 +08:00
if ( err < 0 )
goto errout ;
2017-06-16 08:29:09 +08:00
if ( xdp [ IFLA_XDP_ATTACHED ] | | xdp [ IFLA_XDP_PROG_ID ] ) {
2016-07-21 08:22:34 +08:00
err = - EINVAL ;
goto errout ;
}
2016-11-29 06:16:54 +08:00
if ( xdp [ IFLA_XDP_FLAGS ] ) {
xdp_flags = nla_get_u32 ( xdp [ IFLA_XDP_FLAGS ] ) ;
if ( xdp_flags & ~ XDP_FLAGS_MASK ) {
err = - EINVAL ;
goto errout ;
}
2017-06-22 09:25:04 +08:00
if ( hweight32 ( xdp_flags & XDP_FLAGS_MODES ) > 1 ) {
2017-05-12 07:04:45 +08:00
err = - EINVAL ;
goto errout ;
}
2016-11-29 06:16:54 +08:00
}
2016-07-20 03:16:49 +08:00
if ( xdp [ IFLA_XDP_FD ] ) {
2020-03-26 01:23:26 +08:00
int expected_fd = - 1 ;
if ( xdp_flags & XDP_FLAGS_REPLACE ) {
if ( ! xdp [ IFLA_XDP_EXPECTED_FD ] ) {
err = - EINVAL ;
goto errout ;
}
expected_fd =
nla_get_s32 ( xdp [ IFLA_XDP_EXPECTED_FD ] ) ;
}
2017-05-01 12:46:46 +08:00
err = dev_change_xdp_fd ( dev , extack ,
2016-11-29 06:16:54 +08:00
nla_get_s32 ( xdp [ IFLA_XDP_FD ] ) ,
2020-03-26 01:23:26 +08:00
expected_fd ,
2016-11-29 06:16:54 +08:00
xdp_flags ) ;
2016-07-20 03:16:49 +08:00
if ( err )
goto errout ;
status | = DO_SETLINK_NOTIFY ;
}
}
2007-06-14 03:03:36 +08:00
errout :
2014-09-01 22:07:29 +08:00
if ( status & DO_SETLINK_MODIFIED ) {
2017-10-15 18:13:45 +08:00
if ( ( status & DO_SETLINK_NOTIFY ) = = DO_SETLINK_NOTIFY )
2014-09-01 22:07:29 +08:00
netdev_state_change ( dev ) ;
if ( err < 0 )
net_warn_ratelimited ( " A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check. \n " ,
dev - > name ) ;
}
2006-08-11 12:17:37 +08:00
2007-06-14 03:03:36 +08:00
return err ;
}
2005-04-17 06:20:36 +08:00
2019-09-30 17:48:19 +08:00
static struct net_device * rtnl_dev_get ( struct net * net ,
2022-04-16 00:53:28 +08:00
struct nlattr * tb [ ] )
{
char ifname [ ALTIFNAMSIZ ] ;
if ( tb [ IFLA_IFNAME ] )
nla_strscpy ( ifname , tb [ IFLA_IFNAME ] , IFNAMSIZ ) ;
else if ( tb [ IFLA_ALT_IFNAME ] )
nla_strscpy ( ifname , tb [ IFLA_ALT_IFNAME ] , ALTIFNAMSIZ ) ;
else
return NULL ;
2019-09-30 17:48:19 +08:00
return __dev_get_by_name ( net , ifname ) ;
}
2017-04-17 00:48:24 +08:00
static int rtnl_setlink ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2007-06-14 03:03:36 +08:00
{
2008-03-26 01:26:21 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2007-06-14 03:03:36 +08:00
struct ifinfomsg * ifm ;
struct net_device * dev ;
int err ;
struct nlattr * tb [ IFLA_MAX + 1 ] ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated ( nlh , sizeof ( * ifm ) , tb , IFLA_MAX ,
ifla_policy , extack ) ;
2007-06-14 03:03:36 +08:00
if ( err < 0 )
goto errout ;
2018-02-07 20:53:20 +08:00
err = rtnl_ensure_unique_netns ( tb , extack , false ) ;
if ( err < 0 )
goto errout ;
2007-06-14 03:03:36 +08:00
err = - EINVAL ;
ifm = nlmsg_data ( nlh ) ;
if ( ifm - > ifi_index > 0 )
2009-10-21 18:59:31 +08:00
dev = __dev_get_by_index ( net , ifm - > ifi_index ) ;
2019-09-30 17:48:20 +08:00
else if ( tb [ IFLA_IFNAME ] | | tb [ IFLA_ALT_IFNAME ] )
2022-04-16 00:53:28 +08:00
dev = rtnl_dev_get ( net , tb ) ;
2007-06-14 03:03:36 +08:00
else
goto errout ;
if ( dev = = NULL ) {
err = - ENODEV ;
goto errout ;
}
2022-04-16 00:53:28 +08:00
err = do_setlink ( skb , dev , ifm , extack , tb , 0 ) ;
2006-08-11 12:17:37 +08:00
errout :
2005-04-17 06:20:36 +08:00
return err ;
}
2015-03-25 02:53:31 +08:00
static int rtnl_group_dellink ( const struct net * net , int group )
{
struct net_device * dev , * aux ;
LIST_HEAD ( list_kill ) ;
bool found = false ;
if ( ! group )
return - EPERM ;
for_each_netdev ( net , dev ) {
if ( dev - > group = = group ) {
const struct rtnl_link_ops * ops ;
found = true ;
ops = dev - > rtnl_link_ops ;
if ( ! ops | | ! ops - > dellink )
return - EOPNOTSUPP ;
}
}
if ( ! found )
return - ENODEV ;
for_each_netdev_safe ( net , dev , aux ) {
if ( dev - > group = = group ) {
const struct rtnl_link_ops * ops ;
ops = dev - > rtnl_link_ops ;
ops - > dellink ( dev , & list_kill ) ;
}
}
unregister_netdevice_many ( & list_kill ) ;
return 0 ;
}
2015-07-21 16:44:06 +08:00
int rtnl_delete_link ( struct net_device * dev )
{
const struct rtnl_link_ops * ops ;
LIST_HEAD ( list_kill ) ;
ops = dev - > rtnl_link_ops ;
if ( ! ops | | ! ops - > dellink )
return - EOPNOTSUPP ;
ops - > dellink ( dev , & list_kill ) ;
unregister_netdevice_many ( & list_kill ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( rtnl_delete_link ) ;
2017-04-17 00:48:24 +08:00
static int rtnl_dellink ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2007-06-14 03:03:51 +08:00
{
2008-03-26 01:26:21 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2018-01-24 22:26:34 +08:00
struct net * tgt_net = net ;
struct net_device * dev = NULL ;
2007-06-14 03:03:51 +08:00
struct ifinfomsg * ifm ;
struct nlattr * tb [ IFLA_MAX + 1 ] ;
int err ;
2018-01-24 22:26:34 +08:00
int netnsid = - 1 ;
2007-06-14 03:03:51 +08:00
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated ( nlh , sizeof ( * ifm ) , tb , IFLA_MAX ,
ifla_policy , extack ) ;
2007-06-14 03:03:51 +08:00
if ( err < 0 )
return err ;
2018-02-07 20:53:20 +08:00
err = rtnl_ensure_unique_netns ( tb , extack , true ) ;
if ( err < 0 )
return err ;
2018-09-05 03:53:53 +08:00
if ( tb [ IFLA_TARGET_NETNSID ] ) {
netnsid = nla_get_s32 ( tb [ IFLA_TARGET_NETNSID ] ) ;
2018-09-05 03:53:47 +08:00
tgt_net = rtnl_get_net_ns_capable ( NETLINK_CB ( skb ) . sk , netnsid ) ;
2018-01-24 22:26:34 +08:00
if ( IS_ERR ( tgt_net ) )
return PTR_ERR ( tgt_net ) ;
}
err = - EINVAL ;
2007-06-14 03:03:51 +08:00
ifm = nlmsg_data ( nlh ) ;
if ( ifm - > ifi_index > 0 )
2018-01-24 22:26:34 +08:00
dev = __dev_get_by_index ( tgt_net , ifm - > ifi_index ) ;
2019-09-30 17:48:20 +08:00
else if ( tb [ IFLA_IFNAME ] | | tb [ IFLA_ALT_IFNAME ] )
2022-04-16 00:53:28 +08:00
dev = rtnl_dev_get ( net , tb ) ;
2015-03-25 02:53:31 +08:00
else if ( tb [ IFLA_GROUP ] )
2018-01-24 22:26:34 +08:00
err = rtnl_group_dellink ( tgt_net , nla_get_u32 ( tb [ IFLA_GROUP ] ) ) ;
2007-06-14 03:03:51 +08:00
else
2018-01-24 22:26:34 +08:00
goto out ;
2007-06-14 03:03:51 +08:00
2018-01-24 22:26:34 +08:00
if ( ! dev ) {
2022-04-16 00:53:29 +08:00
if ( tb [ IFLA_IFNAME ] | | tb [ IFLA_ALT_IFNAME ] | | ifm - > ifi_index > 0 )
2018-01-24 22:26:34 +08:00
err = - ENODEV ;
goto out ;
}
err = rtnl_delete_link ( dev ) ;
2007-06-14 03:03:51 +08:00
2018-01-24 22:26:34 +08:00
out :
if ( netnsid > = 0 )
put_net ( tgt_net ) ;
return err ;
2007-06-14 03:03:51 +08:00
}
rtnetlink: support specifying device flags on device creation
commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date: Tue Feb 23 20:41:30 2010 +0100
Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.
Two examples:
# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
valid_lft 86399sec preferred_lft 14399sec
# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down
<no events>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-02-26 14:34:54 +08:00
int rtnl_configure_link ( struct net_device * dev , const struct ifinfomsg * ifm )
{
unsigned int old_flags ;
int err ;
old_flags = dev - > flags ;
if ( ifm & & ( ifm - > ifi_flags | | ifm - > ifi_change ) ) {
2018-12-07 01:05:43 +08:00
err = __dev_change_flags ( dev , rtnl_dev_combine_flags ( dev , ifm ) ,
NULL ) ;
rtnetlink: support specifying device flags on device creation
commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date: Tue Feb 23 20:41:30 2010 +0100
Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.
Two examples:
# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
valid_lft 86399sec preferred_lft 14399sec
# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down
<no events>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-02-26 14:34:54 +08:00
if ( err < 0 )
return err ;
}
2018-07-05 07:46:29 +08:00
if ( dev - > rtnl_link_state = = RTNL_LINK_INITIALIZED ) {
2018-09-13 04:21:48 +08:00
__dev_notify_flags ( dev , old_flags , ( old_flags ^ dev - > flags ) ) ;
2018-07-05 07:46:29 +08:00
} else {
dev - > rtnl_link_state = RTNL_LINK_INITIALIZED ;
__dev_notify_flags ( dev , old_flags , ~ 0U ) ;
}
rtnetlink: support specifying device flags on device creation
commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date: Tue Feb 23 20:41:30 2010 +0100
Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.
Two examples:
# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
valid_lft 86399sec preferred_lft 14399sec
# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down
<no events>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-02-26 14:34:54 +08:00
return 0 ;
}
EXPORT_SYMBOL ( rtnl_configure_link ) ;
2018-11-07 04:51:14 +08:00
struct net_device * rtnl_create_link ( struct net * net , const char * ifname ,
unsigned char name_assign_type ,
const struct rtnl_link_ops * ops ,
struct nlattr * tb [ ] ,
struct netlink_ext_ack * extack )
2007-08-09 13:16:38 +08:00
{
struct net_device * dev ;
2012-07-20 10:28:47 +08:00
unsigned int num_tx_queues = 1 ;
unsigned int num_rx_queues = 1 ;
2007-08-09 13:16:38 +08:00
2012-07-20 10:28:48 +08:00
if ( tb [ IFLA_NUM_TX_QUEUES ] )
num_tx_queues = nla_get_u32 ( tb [ IFLA_NUM_TX_QUEUES ] ) ;
else if ( ops - > get_num_tx_queues )
2012-07-20 10:28:47 +08:00
num_tx_queues = ops - > get_num_tx_queues ( ) ;
2012-07-20 10:28:48 +08:00
if ( tb [ IFLA_NUM_RX_QUEUES ] )
num_rx_queues = nla_get_u32 ( tb [ IFLA_NUM_RX_QUEUES ] ) ;
else if ( ops - > get_num_rx_queues )
2012-07-20 10:28:47 +08:00
num_rx_queues = ops - > get_num_rx_queues ( ) ;
2012-04-11 02:34:43 +08:00
2018-11-07 04:51:14 +08:00
if ( num_tx_queues < 1 | | num_tx_queues > 4096 ) {
NL_SET_ERR_MSG ( extack , " Invalid number of transmit queues " ) ;
2018-10-03 06:47:35 +08:00
return ERR_PTR ( - EINVAL ) ;
2018-11-07 04:51:14 +08:00
}
2018-10-03 06:47:35 +08:00
2018-11-07 04:51:14 +08:00
if ( num_rx_queues < 1 | | num_rx_queues > 4096 ) {
NL_SET_ERR_MSG ( extack , " Invalid number of receive queues " ) ;
2018-10-03 06:47:35 +08:00
return ERR_PTR ( - EINVAL ) ;
2018-11-07 04:51:14 +08:00
}
2018-10-03 06:47:35 +08:00
2021-06-12 16:20:54 +08:00
if ( ops - > alloc ) {
dev = ops - > alloc ( tb , ifname , name_assign_type ,
num_tx_queues , num_rx_queues ) ;
if ( IS_ERR ( dev ) )
return dev ;
} else {
dev = alloc_netdev_mqs ( ops - > priv_size , ifname ,
name_assign_type , ops - > setup ,
num_tx_queues , num_rx_queues ) ;
}
2007-08-09 13:16:38 +08:00
if ( ! dev )
2017-02-20 23:32:06 +08:00
return ERR_PTR ( - ENOMEM ) ;
2007-08-09 13:16:38 +08:00
2009-11-08 16:53:51 +08:00
dev_net_set ( dev , net ) ;
dev - > rtnl_link_ops = ops ;
rtnetlink: support specifying device flags on device creation
commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date: Tue Feb 23 20:41:30 2010 +0100
Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.
Two examples:
# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
valid_lft 86399sec preferred_lft 14399sec
# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down
<no events>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-02-26 14:34:54 +08:00
dev - > rtnl_link_state = RTNL_LINK_INITIALIZING ;
2009-11-08 16:53:51 +08:00
2020-01-22 14:47:29 +08:00
if ( tb [ IFLA_MTU ] ) {
u32 mtu = nla_get_u32 ( tb [ IFLA_MTU ] ) ;
int err ;
err = dev_validate_mtu ( dev , mtu , extack ) ;
if ( err ) {
free_netdev ( dev ) ;
return ERR_PTR ( err ) ;
}
dev - > mtu = mtu ;
}
2013-01-06 20:41:57 +08:00
if ( tb [ IFLA_ADDRESS ] ) {
2021-10-23 07:20:57 +08:00
__dev_addr_set ( dev , nla_data ( tb [ IFLA_ADDRESS ] ) ,
nla_len ( tb [ IFLA_ADDRESS ] ) ) ;
2013-01-06 20:41:57 +08:00
dev - > addr_assign_type = NET_ADDR_SET ;
}
2007-08-09 13:16:38 +08:00
if ( tb [ IFLA_BROADCAST ] )
memcpy ( dev - > broadcast , nla_data ( tb [ IFLA_BROADCAST ] ) ,
nla_len ( tb [ IFLA_BROADCAST ] ) ) ;
if ( tb [ IFLA_TXQLEN ] )
dev - > tx_queue_len = nla_get_u32 ( tb [ IFLA_TXQLEN ] ) ;
if ( tb [ IFLA_OPERSTATE ] )
2008-02-18 10:35:07 +08:00
set_operstate ( dev , nla_get_u8 ( tb [ IFLA_OPERSTATE ] ) ) ;
2007-08-09 13:16:38 +08:00
if ( tb [ IFLA_LINKMODE ] )
dev - > link_mode = nla_get_u8 ( tb [ IFLA_LINKMODE ] ) ;
2011-01-20 11:00:42 +08:00
if ( tb [ IFLA_GROUP ] )
dev_set_group ( dev , nla_get_u32 ( tb [ IFLA_GROUP ] ) ) ;
2017-12-08 07:40:19 +08:00
if ( tb [ IFLA_GSO_MAX_SIZE ] )
netif_set_gso_max_size ( dev , nla_get_u32 ( tb [ IFLA_GSO_MAX_SIZE ] ) ) ;
if ( tb [ IFLA_GSO_MAX_SEGS ] )
2021-11-19 23:43:32 +08:00
netif_set_gso_max_segs ( dev , nla_get_u32 ( tb [ IFLA_GSO_MAX_SEGS ] ) ) ;
2022-01-05 18:48:38 +08:00
if ( tb [ IFLA_GRO_MAX_SIZE ] )
netif_set_gro_max_size ( dev , nla_get_u32 ( tb [ IFLA_GRO_MAX_SIZE ] ) ) ;
2007-08-09 13:16:38 +08:00
return dev ;
}
2009-11-07 17:26:17 +08:00
EXPORT_SYMBOL ( rtnl_create_link ) ;
2007-08-09 13:16:38 +08:00
2014-04-24 05:29:27 +08:00
static int rtnl_group_changelink ( const struct sk_buff * skb ,
struct net * net , int group ,
2011-01-14 07:38:31 +08:00
struct ifinfomsg * ifm ,
2017-05-01 12:46:46 +08:00
struct netlink_ext_ack * extack ,
2011-01-14 07:38:31 +08:00
struct nlattr * * tb )
{
2015-03-24 07:31:09 +08:00
struct net_device * dev , * aux ;
2011-01-14 07:38:31 +08:00
int err ;
2015-03-24 07:31:09 +08:00
for_each_netdev_safe ( net , dev , aux ) {
2011-01-14 07:38:31 +08:00
if ( dev - > group = = group ) {
2022-04-16 00:53:28 +08:00
err = do_setlink ( skb , dev , ifm , extack , tb , 0 ) ;
2011-01-14 07:38:31 +08:00
if ( err < 0 )
return err ;
}
}
return 0 ;
}
2022-04-30 07:55:07 +08:00
static int rtnl_newlink_create ( struct sk_buff * skb , struct ifinfomsg * ifm ,
const struct rtnl_link_ops * ops ,
struct nlattr * * tb , struct nlattr * * data ,
struct netlink_ext_ack * extack ) ;
2022-04-30 07:55:06 +08:00
struct rtnl_newlink_tbs {
struct nlattr * tb [ IFLA_MAX + 1 ] ;
struct nlattr * attr [ RTNL_MAX_TYPE + 1 ] ;
struct nlattr * slave_attr [ RTNL_SLAVE_MAX_TYPE + 1 ] ;
} ;
2018-11-28 14:32:31 +08:00
static int __rtnl_newlink ( struct sk_buff * skb , struct nlmsghdr * nlh ,
2022-04-30 07:55:06 +08:00
struct rtnl_newlink_tbs * tbs ,
struct netlink_ext_ack * extack )
2007-06-14 03:03:51 +08:00
{
2018-11-28 14:32:30 +08:00
struct nlattr * linkinfo [ IFLA_INFO_MAX + 1 ] ;
2022-04-30 07:55:06 +08:00
struct nlattr * * const tb = tbs - > tb ;
2022-02-01 09:21:06 +08:00
const struct rtnl_link_ops * m_ops ;
struct net_device * master_dev ;
2008-03-26 01:26:21 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2007-06-14 03:03:51 +08:00
const struct rtnl_link_ops * ops ;
2018-11-28 14:32:30 +08:00
struct nlattr * * slave_data ;
char kind [ MODULE_NAME_LEN ] ;
2007-06-14 03:03:51 +08:00
struct net_device * dev ;
struct ifinfomsg * ifm ;
2018-11-28 14:32:30 +08:00
struct nlattr * * data ;
2022-04-16 00:53:27 +08:00
bool link_specified ;
2007-06-14 03:03:51 +08:00
int err ;
2008-10-17 06:24:51 +08:00
# ifdef CONFIG_MODULES
2007-06-14 03:03:51 +08:00
replay :
2007-08-01 05:13:50 +08:00
# endif
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated ( nlh , sizeof ( * ifm ) , tb , IFLA_MAX ,
ifla_policy , extack ) ;
2007-06-14 03:03:51 +08:00
if ( err < 0 )
return err ;
2018-02-07 20:53:20 +08:00
err = rtnl_ensure_unique_netns ( tb , extack , false ) ;
if ( err < 0 )
return err ;
2007-06-14 03:03:51 +08:00
ifm = nlmsg_data ( nlh ) ;
2022-04-16 00:53:27 +08:00
if ( ifm - > ifi_index > 0 ) {
link_specified = true ;
2007-09-18 02:56:21 +08:00
dev = __dev_get_by_index ( net , ifm - > ifi_index ) ;
2022-04-16 00:53:27 +08:00
} else if ( tb [ IFLA_IFNAME ] | | tb [ IFLA_ALT_IFNAME ] ) {
link_specified = true ;
2022-04-16 00:53:28 +08:00
dev = rtnl_dev_get ( net , tb ) ;
2022-04-16 00:53:27 +08:00
} else {
link_specified = false ;
2019-09-30 17:48:18 +08:00
dev = NULL ;
2022-04-16 00:53:27 +08:00
}
2007-06-14 03:03:51 +08:00
2022-02-01 09:21:06 +08:00
master_dev = NULL ;
m_ops = NULL ;
2014-01-22 16:05:55 +08:00
if ( dev ) {
master_dev = netdev_master_upper_dev_get ( dev ) ;
if ( master_dev )
m_ops = master_dev - > rtnl_link_ops ;
}
2021-08-03 20:02:50 +08:00
err = validate_linkmsg ( dev , tb , extack ) ;
2009-11-07 17:26:17 +08:00
if ( err < 0 )
2008-02-24 11:54:36 +08:00
return err ;
2007-06-14 03:03:51 +08:00
if ( tb [ IFLA_LINKINFO ] ) {
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nla_parse_nested_deprecated ( linkinfo , IFLA_INFO_MAX ,
tb [ IFLA_LINKINFO ] ,
ifla_info_policy , NULL ) ;
2007-06-14 03:03:51 +08:00
if ( err < 0 )
return err ;
} else
memset ( linkinfo , 0 , sizeof ( linkinfo ) ) ;
if ( linkinfo [ IFLA_INFO_KIND ] ) {
2020-11-16 01:08:06 +08:00
nla_strscpy ( kind , linkinfo [ IFLA_INFO_KIND ] , sizeof ( kind ) ) ;
2007-06-14 03:03:51 +08:00
ops = rtnl_link_ops_get ( kind ) ;
} else {
kind [ 0 ] = ' \0 ' ;
ops = NULL ;
}
2018-11-28 14:32:30 +08:00
data = NULL ;
if ( ops ) {
if ( ops - > maxtype > RTNL_MAX_TYPE )
return - EINVAL ;
2018-05-31 06:20:52 +08:00
2018-11-28 14:32:30 +08:00
if ( ops - > maxtype & & linkinfo [ IFLA_INFO_DATA ] ) {
2022-04-30 07:55:06 +08:00
err = nla_parse_nested_deprecated ( tbs - > attr , ops - > maxtype ,
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
linkinfo [ IFLA_INFO_DATA ] ,
ops - > policy , extack ) ;
2018-11-28 14:32:30 +08:00
if ( err < 0 )
return err ;
2022-04-30 07:55:06 +08:00
data = tbs - > attr ;
2007-06-14 03:03:51 +08:00
}
2018-11-28 14:32:30 +08:00
if ( ops - > validate ) {
err = ops - > validate ( tb , data , extack ) ;
if ( err < 0 )
return err ;
}
}
2007-06-14 03:03:51 +08:00
2018-11-28 14:32:30 +08:00
slave_data = NULL ;
if ( m_ops ) {
if ( m_ops - > slave_maxtype > RTNL_SLAVE_MAX_TYPE )
return - EINVAL ;
2018-05-31 06:20:52 +08:00
2018-11-28 14:32:30 +08:00
if ( m_ops - > slave_maxtype & &
linkinfo [ IFLA_INFO_SLAVE_DATA ] ) {
2022-04-30 07:55:06 +08:00
err = nla_parse_nested_deprecated ( tbs - > slave_attr ,
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
m_ops - > slave_maxtype ,
linkinfo [ IFLA_INFO_SLAVE_DATA ] ,
m_ops - > slave_policy ,
extack ) ;
2018-11-28 14:32:30 +08:00
if ( err < 0 )
return err ;
2022-04-30 07:55:06 +08:00
slave_data = tbs - > slave_attr ;
2014-01-22 16:05:55 +08:00
}
2018-11-28 14:32:30 +08:00
}
2014-01-22 16:05:55 +08:00
2018-11-28 14:32:30 +08:00
if ( dev ) {
int status = 0 ;
2007-06-14 03:03:51 +08:00
2018-11-28 14:32:30 +08:00
if ( nlh - > nlmsg_flags & NLM_F_EXCL )
return - EEXIST ;
if ( nlh - > nlmsg_flags & NLM_F_REPLACE )
return - EOPNOTSUPP ;
2007-06-14 03:03:51 +08:00
2018-11-28 14:32:30 +08:00
if ( linkinfo [ IFLA_INFO_DATA ] ) {
if ( ! ops | | ops ! = dev - > rtnl_link_ops | |
! ops - > changelink )
return - EOPNOTSUPP ;
2007-06-14 03:03:51 +08:00
2018-11-28 14:32:30 +08:00
err = ops - > changelink ( dev , tb , data , extack ) ;
if ( err < 0 )
return err ;
status | = DO_SETLINK_NOTIFY ;
}
2014-01-22 16:05:55 +08:00
2018-11-28 14:32:30 +08:00
if ( linkinfo [ IFLA_INFO_SLAVE_DATA ] ) {
if ( ! m_ops | | ! m_ops - > slave_changelink )
return - EOPNOTSUPP ;
2014-01-22 16:05:55 +08:00
2018-11-28 14:32:30 +08:00
err = m_ops - > slave_changelink ( master_dev , dev , tb ,
slave_data , extack ) ;
if ( err < 0 )
return err ;
status | = DO_SETLINK_NOTIFY ;
2007-06-14 03:03:51 +08:00
}
2022-04-16 00:53:28 +08:00
return do_setlink ( skb , dev , ifm , extack , tb , status ) ;
2018-11-28 14:32:30 +08:00
}
if ( ! ( nlh - > nlmsg_flags & NLM_F_CREATE ) ) {
2022-04-16 00:53:27 +08:00
/* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
* or it ' s for a group
*/
if ( link_specified )
return - ENODEV ;
if ( tb [ IFLA_GROUP ] )
2018-11-28 14:32:30 +08:00
return rtnl_group_changelink ( skb , net ,
2011-01-20 11:00:42 +08:00
nla_get_u32 ( tb [ IFLA_GROUP ] ) ,
2017-05-01 12:46:46 +08:00
ifm , extack , tb ) ;
2022-04-19 20:51:51 +08:00
return - ENODEV ;
2018-11-28 14:32:30 +08:00
}
2007-06-14 03:03:51 +08:00
2018-11-28 14:32:30 +08:00
if ( tb [ IFLA_MAP ] | | tb [ IFLA_PROTINFO ] )
return - EOPNOTSUPP ;
2007-06-14 03:03:51 +08:00
2018-11-28 14:32:30 +08:00
if ( ! ops ) {
2008-10-17 06:24:51 +08:00
# ifdef CONFIG_MODULES
2018-11-28 14:32:30 +08:00
if ( kind [ 0 ] ) {
__rtnl_unlock ( ) ;
request_module ( " rtnl-link-%s " , kind ) ;
rtnl_lock ( ) ;
ops = rtnl_link_ops_get ( kind ) ;
if ( ops )
goto replay ;
2007-06-14 03:03:51 +08:00
}
2018-11-28 14:32:30 +08:00
# endif
NL_SET_ERR_MSG ( extack , " Unknown device type " ) ;
return - EOPNOTSUPP ;
}
2007-06-14 03:03:51 +08:00
2022-04-30 07:55:07 +08:00
return rtnl_newlink_create ( skb , ifm , ops , tb , data , extack ) ;
}
static int rtnl_newlink_create ( struct sk_buff * skb , struct ifinfomsg * ifm ,
const struct rtnl_link_ops * ops ,
struct nlattr * * tb , struct nlattr * * data ,
struct netlink_ext_ack * extack )
{
unsigned char name_assign_type = NET_NAME_USER ;
struct net * net = sock_net ( skb - > sk ) ;
struct net * dest_net , * link_net ;
struct net_device * dev ;
char ifname [ IFNAMSIZ ] ;
int err ;
2021-06-12 16:20:54 +08:00
if ( ! ops - > alloc & & ! ops - > setup )
2018-11-28 14:32:30 +08:00
return - EOPNOTSUPP ;
2014-06-26 15:58:25 +08:00
2022-04-16 00:53:28 +08:00
if ( tb [ IFLA_IFNAME ] ) {
nla_strscpy ( ifname , tb [ IFLA_IFNAME ] , IFNAMSIZ ) ;
} else {
2018-11-28 14:32:30 +08:00
snprintf ( ifname , IFNAMSIZ , " %s%%d " , ops - > kind ) ;
name_assign_type = NET_NAME_ENUM ;
}
2007-08-09 13:16:38 +08:00
2018-11-28 14:32:30 +08:00
dest_net = rtnl_link_get_net_capable ( skb , net , tb , CAP_NET_ADMIN ) ;
if ( IS_ERR ( dest_net ) )
return PTR_ERR ( dest_net ) ;
2011-01-29 22:57:22 +08:00
2018-11-28 14:32:30 +08:00
if ( tb [ IFLA_LINK_NETNSID ] ) {
int id = nla_get_s32 ( tb [ IFLA_LINK_NETNSID ] ) ;
2015-01-15 22:11:18 +08:00
2018-11-28 14:32:30 +08:00
link_net = get_net_ns_by_id ( dest_net , id ) ;
if ( ! link_net ) {
NL_SET_ERR_MSG ( extack , " Unknown network namespace id " ) ;
err = - EINVAL ;
2012-08-09 05:52:46 +08:00
goto out ;
}
2018-11-28 14:32:30 +08:00
err = - EPERM ;
if ( ! netlink_ns_capable ( skb , link_net - > user_ns , CAP_NET_ADMIN ) )
goto out ;
} else {
link_net = NULL ;
}
dev = rtnl_create_link ( link_net ? : dest_net , ifname ,
name_assign_type , ops , tb , extack ) ;
if ( IS_ERR ( dev ) ) {
err = PTR_ERR ( dev ) ;
goto out ;
}
2012-08-09 05:52:46 +08:00
2018-11-28 14:32:30 +08:00
dev - > ifindex = ifm - > ifi_index ;
2012-08-09 05:52:46 +08:00
2021-01-07 02:40:06 +08:00
if ( ops - > newlink )
2018-11-28 14:32:30 +08:00
err = ops - > newlink ( link_net ? : net , dev , tb , data , extack ) ;
2021-01-07 02:40:06 +08:00
else
2018-11-28 14:32:30 +08:00
err = register_netdevice ( dev ) ;
2021-01-07 02:40:06 +08:00
if ( err < 0 ) {
free_netdev ( dev ) ;
goto out ;
2018-11-28 14:32:30 +08:00
}
2021-01-07 02:40:06 +08:00
2018-11-28 14:32:30 +08:00
err = rtnl_configure_link ( dev , ifm ) ;
if ( err < 0 )
goto out_unregister ;
if ( link_net ) {
2021-04-07 14:40:51 +08:00
err = dev_change_net_namespace ( dev , dest_net , ifname ) ;
2015-03-11 09:58:32 +08:00
if ( err < 0 )
goto out_unregister ;
2018-11-28 14:32:30 +08:00
}
if ( tb [ IFLA_MASTER ] ) {
err = do_set_master ( dev , nla_get_u32 ( tb [ IFLA_MASTER ] ) , extack ) ;
if ( err )
goto out_unregister ;
}
rtnetlink: support specifying device flags on device creation
commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f
Author: Patrick McHardy <kaber@trash.net>
Date: Tue Feb 23 20:41:30 2010 +0100
Support specifying the initial device flags when creating a device though
rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING
in order to surpress netlink registration notifications. To complete setup,
rtnl_configure_link() must be called, which performs the device flag changes
and invokes the deferred notifiers if everything went well.
Two examples:
# add macvlan to eth0
#
$ ip link add link eth0 up allmulticast on type macvlan
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff
[ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0
[ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0
[LINK]11: macvlan0@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500
link/ether 26:f8:84:02:f9:2a
[ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link
valid_lft forever preferred_lft forever
[ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0
[ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0
[NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE
[ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0
[PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084
[ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic
valid_lft 86399sec preferred_lft 14399sec
# add VLAN to eth1, eth1 is down
#
$ ip link add link eth1 up type vlan id 1000
RTNETLINK answers: Network is down
<no events>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-02-26 14:34:54 +08:00
out :
2018-11-28 14:32:30 +08:00
if ( link_net )
put_net ( link_net ) ;
put_net ( dest_net ) ;
return err ;
2015-03-11 09:58:32 +08:00
out_unregister :
2018-11-28 14:32:30 +08:00
if ( ops - > newlink ) {
LIST_HEAD ( list_kill ) ;
2015-03-11 09:58:32 +08:00
2018-11-28 14:32:30 +08:00
ops - > dellink ( dev , & list_kill ) ;
unregister_netdevice_many ( & list_kill ) ;
} else {
unregister_netdevice ( dev ) ;
2007-06-14 03:03:51 +08:00
}
2018-11-28 14:32:30 +08:00
goto out ;
2007-06-14 03:03:51 +08:00
}
2018-11-28 14:32:31 +08:00
static int rtnl_newlink ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
{
2022-04-30 07:55:06 +08:00
struct rtnl_newlink_tbs * tbs ;
2018-11-28 14:32:31 +08:00
int ret ;
2022-04-30 07:55:06 +08:00
tbs = kmalloc ( sizeof ( * tbs ) , GFP_KERNEL ) ;
if ( ! tbs )
2018-11-28 14:32:31 +08:00
return - ENOMEM ;
2022-04-30 07:55:06 +08:00
ret = __rtnl_newlink ( skb , nlh , tbs , extack ) ;
kfree ( tbs ) ;
2018-11-28 14:32:31 +08:00
return ret ;
}
2019-01-19 02:46:16 +08:00
static int rtnl_valid_getlink_req ( struct sk_buff * skb ,
const struct nlmsghdr * nlh ,
struct nlattr * * tb ,
struct netlink_ext_ack * extack )
{
struct ifinfomsg * ifm ;
int i , err ;
if ( nlh - > nlmsg_len < nlmsg_msg_size ( sizeof ( * ifm ) ) ) {
NL_SET_ERR_MSG ( extack , " Invalid header for get link " ) ;
return - EINVAL ;
}
if ( ! netlink_strict_get_check ( skb ) )
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
return nlmsg_parse_deprecated ( nlh , sizeof ( * ifm ) , tb , IFLA_MAX ,
ifla_policy , extack ) ;
2019-01-19 02:46:16 +08:00
ifm = nlmsg_data ( nlh ) ;
if ( ifm - > __ifi_pad | | ifm - > ifi_type | | ifm - > ifi_flags | |
ifm - > ifi_change ) {
NL_SET_ERR_MSG ( extack , " Invalid values in header for get link request " ) ;
return - EINVAL ;
}
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated_strict ( nlh , sizeof ( * ifm ) , tb , IFLA_MAX ,
ifla_policy , extack ) ;
2019-01-19 02:46:16 +08:00
if ( err )
return err ;
for ( i = 0 ; i < = IFLA_MAX ; i + + ) {
if ( ! tb [ i ] )
continue ;
switch ( i ) {
case IFLA_IFNAME :
2019-09-30 17:48:20 +08:00
case IFLA_ALT_IFNAME :
2019-01-19 02:46:16 +08:00
case IFLA_EXT_MASK :
case IFLA_TARGET_NETNSID :
break ;
default :
NL_SET_ERR_MSG ( extack , " Unsupported attribute in get link request " ) ;
return - EINVAL ;
}
}
return 0 ;
}
2017-04-17 00:48:24 +08:00
static int rtnl_getlink ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2006-02-23 07:10:56 +08:00
{
2008-03-26 01:26:21 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2017-11-03 03:04:38 +08:00
struct net * tgt_net = net ;
2006-08-05 14:05:34 +08:00
struct ifinfomsg * ifm ;
struct nlattr * tb [ IFLA_MAX + 1 ] ;
struct net_device * dev = NULL ;
struct sk_buff * nskb ;
2017-11-03 03:04:38 +08:00
int netnsid = - 1 ;
2006-11-11 06:10:15 +08:00
int err ;
2012-02-22 05:54:48 +08:00
u32 ext_filter_mask = 0 ;
2006-02-23 07:10:56 +08:00
2019-01-19 02:46:16 +08:00
err = rtnl_valid_getlink_req ( skb , nlh , tb , extack ) ;
2006-08-05 14:05:34 +08:00
if ( err < 0 )
2006-09-27 14:26:38 +08:00
return err ;
2006-08-05 14:05:34 +08:00
2018-02-07 20:53:20 +08:00
err = rtnl_ensure_unique_netns ( tb , extack , true ) ;
if ( err < 0 )
return err ;
2018-09-05 03:53:53 +08:00
if ( tb [ IFLA_TARGET_NETNSID ] ) {
netnsid = nla_get_s32 ( tb [ IFLA_TARGET_NETNSID ] ) ;
2018-09-05 03:53:47 +08:00
tgt_net = rtnl_get_net_ns_capable ( NETLINK_CB ( skb ) . sk , netnsid ) ;
2017-11-03 03:04:38 +08:00
if ( IS_ERR ( tgt_net ) )
return PTR_ERR ( tgt_net ) ;
}
2012-02-22 05:54:48 +08:00
if ( tb [ IFLA_EXT_MASK ] )
ext_filter_mask = nla_get_u32 ( tb [ IFLA_EXT_MASK ] ) ;
2017-11-03 03:04:38 +08:00
err = - EINVAL ;
2006-08-05 14:05:34 +08:00
ifm = nlmsg_data ( nlh ) ;
2009-10-21 18:59:31 +08:00
if ( ifm - > ifi_index > 0 )
2017-11-03 03:04:38 +08:00
dev = __dev_get_by_index ( tgt_net , ifm - > ifi_index ) ;
2019-09-30 17:48:20 +08:00
else if ( tb [ IFLA_IFNAME ] | | tb [ IFLA_ALT_IFNAME ] )
2022-04-16 00:53:28 +08:00
dev = rtnl_dev_get ( tgt_net , tb ) ;
2009-10-21 18:59:31 +08:00
else
2017-11-03 03:04:38 +08:00
goto out ;
2006-02-23 07:10:56 +08:00
2017-11-03 03:04:38 +08:00
err = - ENODEV ;
2009-10-21 18:59:31 +08:00
if ( dev = = NULL )
2017-11-03 03:04:38 +08:00
goto out ;
2009-10-21 18:59:31 +08:00
2017-11-03 03:04:38 +08:00
err = - ENOBUFS ;
2012-02-22 05:54:48 +08:00
nskb = nlmsg_new ( if_nlmsg_size ( dev , ext_filter_mask ) , GFP_KERNEL ) ;
2009-10-21 18:59:31 +08:00
if ( nskb = = NULL )
2017-11-03 03:04:38 +08:00
goto out ;
2006-08-05 14:05:34 +08:00
2017-11-03 03:04:38 +08:00
err = rtnl_fill_ifinfo ( nskb , dev , net ,
RTM_NEWLINK , NETLINK_CB ( skb ) . portid ,
nlh - > nlmsg_seq , 0 , 0 , ext_filter_mask ,
netns: fix GFP flags in rtnl_net_notifyid()
In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
but there are a few paths calling rtnl_net_notifyid() from atomic
context or from RCU critical sections. The later also precludes the use
of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
call is wrong too, as it uses GFP_KERNEL unconditionally.
Therefore, we need to pass the GFP flags as parameter and propagate it
through function calls until the proper flags can be determined.
In most cases, GFP_KERNEL is fine. The exceptions are:
* openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
indirectly call rtnl_net_notifyid() from RCU critical section,
* rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
parameter.
Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
by nlmsg_new(). The function is allowed to sleep, so better make the
flags consistent with the ones used in the following
ovs_vport_cmd_fill_info() call.
Found by code inspection.
Fixes: 9a9634545c70 ("netns: notify netns id events")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-24 00:39:04 +08:00
0 , NULL , 0 , netnsid , GFP_KERNEL ) ;
2007-02-01 15:16:40 +08:00
if ( err < 0 ) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON ( err = = - EMSGSIZE ) ;
kfree_skb ( nskb ) ;
2009-10-21 18:59:31 +08:00
} else
2012-09-08 04:12:54 +08:00
err = rtnl_unicast ( nskb , net , NETLINK_CB ( skb ) . portid ) ;
2017-11-03 03:04:38 +08:00
out :
if ( netnsid > = 0 )
put_net ( tgt_net ) ;
2006-02-23 07:10:56 +08:00
2006-08-05 14:05:34 +08:00
return err ;
2006-02-23 07:10:56 +08:00
}
2019-09-30 17:48:16 +08:00
static int rtnl_alt_ifname ( int cmd , struct net_device * dev , struct nlattr * attr ,
bool * changed , struct netlink_ext_ack * extack )
{
char * alt_ifname ;
2022-03-10 02:29:14 +08:00
size_t size ;
2019-09-30 17:48:16 +08:00
int err ;
err = nla_validate ( attr , attr - > nla_len , IFLA_MAX , ifla_policy , extack ) ;
if ( err )
return err ;
2022-03-10 02:29:14 +08:00
if ( cmd = = RTM_NEWLINKPROP ) {
size = rtnl_prop_list_size ( dev ) ;
size + = nla_total_size ( ALTIFNAMSIZ ) ;
if ( size > = U16_MAX ) {
NL_SET_ERR_MSG ( extack ,
" effective property list too long " ) ;
return - EINVAL ;
}
}
2022-03-10 02:29:13 +08:00
alt_ifname = nla_strdup ( attr , GFP_KERNEL_ACCOUNT ) ;
2020-02-13 12:58:26 +08:00
if ( ! alt_ifname )
return - ENOMEM ;
2019-09-30 17:48:16 +08:00
if ( cmd = = RTM_NEWLINKPROP ) {
err = netdev_name_node_alt_create ( dev , alt_ifname ) ;
2020-02-13 12:58:26 +08:00
if ( ! err )
alt_ifname = NULL ;
2019-09-30 17:48:16 +08:00
} else if ( cmd = = RTM_DELLINKPROP ) {
err = netdev_name_node_alt_destroy ( dev , alt_ifname ) ;
} else {
2020-02-13 12:58:26 +08:00
WARN_ON_ONCE ( 1 ) ;
err = - EINVAL ;
2019-09-30 17:48:16 +08:00
}
2020-02-13 12:58:26 +08:00
kfree ( alt_ifname ) ;
if ( ! err )
* changed = true ;
return err ;
2019-09-30 17:48:16 +08:00
}
static int rtnl_linkprop ( int cmd , struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
{
struct net * net = sock_net ( skb - > sk ) ;
struct nlattr * tb [ IFLA_MAX + 1 ] ;
struct net_device * dev ;
struct ifinfomsg * ifm ;
bool changed = false ;
struct nlattr * attr ;
int err , rem ;
err = nlmsg_parse ( nlh , sizeof ( * ifm ) , tb , IFLA_MAX , ifla_policy , extack ) ;
if ( err )
return err ;
err = rtnl_ensure_unique_netns ( tb , extack , true ) ;
if ( err )
return err ;
ifm = nlmsg_data ( nlh ) ;
2019-09-30 17:48:19 +08:00
if ( ifm - > ifi_index > 0 )
2019-09-30 17:48:16 +08:00
dev = __dev_get_by_index ( net , ifm - > ifi_index ) ;
2019-09-30 17:48:20 +08:00
else if ( tb [ IFLA_IFNAME ] | | tb [ IFLA_ALT_IFNAME ] )
2022-04-16 00:53:28 +08:00
dev = rtnl_dev_get ( net , tb ) ;
2019-09-30 17:48:19 +08:00
else
2019-09-30 17:48:16 +08:00
return - EINVAL ;
if ( ! dev )
return - ENODEV ;
if ( ! tb [ IFLA_PROP_LIST ] )
return 0 ;
nla_for_each_nested ( attr , tb [ IFLA_PROP_LIST ] , rem ) {
switch ( nla_type ( attr ) ) {
case IFLA_ALT_IFNAME :
err = rtnl_alt_ifname ( cmd , dev , attr , & changed , extack ) ;
if ( err )
return err ;
break ;
}
}
if ( changed )
netdev_state_change ( dev ) ;
return 0 ;
}
static int rtnl_newlinkprop ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
{
return rtnl_linkprop ( RTM_NEWLINKPROP , skb , nlh , extack ) ;
}
static int rtnl_dellinkprop ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
{
return rtnl_linkprop ( RTM_DELLINKPROP , skb , nlh , extack ) ;
}
2020-10-21 10:00:53 +08:00
static u32 rtnl_calcit ( struct sk_buff * skb , struct nlmsghdr * nlh )
2011-06-10 09:27:09 +08:00
{
2012-02-22 05:54:48 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2020-10-21 10:00:53 +08:00
size_t min_ifinfo_dump_size = 0 ;
2012-02-22 05:54:48 +08:00
struct nlattr * tb [ IFLA_MAX + 1 ] ;
u32 ext_filter_mask = 0 ;
2020-10-21 10:00:53 +08:00
struct net_device * dev ;
2014-05-28 20:15:19 +08:00
int hdrlen ;
/* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
hdrlen = nlmsg_len ( nlh ) < sizeof ( struct ifinfomsg ) ?
sizeof ( struct rtgenmsg ) : sizeof ( struct ifinfomsg ) ;
2012-02-22 05:54:48 +08:00
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
if ( nlmsg_parse_deprecated ( nlh , hdrlen , tb , IFLA_MAX , ifla_policy , NULL ) > = 0 ) {
2012-03-04 20:32:10 +08:00
if ( tb [ IFLA_EXT_MASK ] )
ext_filter_mask = nla_get_u32 ( tb [ IFLA_EXT_MASK ] ) ;
}
2012-02-22 05:54:48 +08:00
if ( ! ext_filter_mask )
return NLMSG_GOODSIZE ;
/*
* traverse the list of net devices and compute the minimum
* buffer size based upon the filter mask .
*/
2017-08-10 02:41:51 +08:00
rcu_read_lock ( ) ;
for_each_netdev_rcu ( net , dev ) {
2020-10-21 10:00:53 +08:00
min_ifinfo_dump_size = max ( min_ifinfo_dump_size ,
if_nlmsg_size ( dev , ext_filter_mask ) ) ;
2012-02-22 05:54:48 +08:00
}
2017-08-10 02:41:51 +08:00
rcu_read_unlock ( ) ;
2012-02-22 05:54:48 +08:00
2016-11-22 14:14:28 +08:00
return nlmsg_total_size ( min_ifinfo_dump_size ) ;
2011-06-10 09:27:09 +08:00
}
2007-04-26 15:57:41 +08:00
static int rtnl_dump_all ( struct sk_buff * skb , struct netlink_callback * cb )
2005-04-17 06:20:36 +08:00
{
int idx ;
int s_idx = cb - > family ;
2018-09-05 03:53:51 +08:00
int type = cb - > nlh - > nlmsg_type - RTM_BASE ;
2018-10-25 03:59:02 +08:00
int ret = 0 ;
2005-04-17 06:20:36 +08:00
if ( s_idx = = 0 )
s_idx = 1 ;
2017-08-10 02:41:51 +08:00
2010-04-26 22:02:05 +08:00
for ( idx = 1 ; idx < = RTNL_FAMILY_MAX ; idx + + ) {
2020-12-10 10:16:08 +08:00
struct rtnl_link __rcu * * tab ;
2017-12-03 04:44:05 +08:00
struct rtnl_link * link ;
2017-08-10 02:41:51 +08:00
rtnl_dumpit_func dumpit ;
2005-04-17 06:20:36 +08:00
if ( idx < s_idx | | idx = = PF_PACKET )
continue ;
2017-08-10 02:41:51 +08:00
2017-12-03 04:44:05 +08:00
if ( type < 0 | | type > = RTM_NR_MSGTYPES )
2005-04-17 06:20:36 +08:00
continue ;
2017-08-10 02:41:51 +08:00
2017-12-03 04:44:05 +08:00
tab = rcu_dereference_rtnl ( rtnl_msg_handlers [ idx ] ) ;
if ( ! tab )
continue ;
2020-12-10 10:16:08 +08:00
link = rcu_dereference_rtnl ( tab [ type ] ) ;
2017-12-03 04:44:05 +08:00
if ( ! link )
continue ;
dumpit = link - > dumpit ;
2017-08-10 02:41:51 +08:00
if ( ! dumpit )
continue ;
2013-03-22 14:28:42 +08:00
if ( idx > s_idx ) {
2005-04-17 06:20:36 +08:00
memset ( & cb - > args [ 0 ] , 0 , sizeof ( cb - > args ) ) ;
2013-03-22 14:28:42 +08:00
cb - > prev_seq = 0 ;
cb - > seq = 0 ;
}
2018-10-25 03:59:02 +08:00
ret = dumpit ( skb , cb ) ;
2018-11-03 00:11:04 +08:00
if ( ret )
2005-04-17 06:20:36 +08:00
break ;
}
cb - > family = idx ;
2018-10-25 03:59:02 +08:00
return skb - > len ? : ret ;
2005-04-17 06:20:36 +08:00
}
2014-12-04 05:46:24 +08:00
struct sk_buff * rtmsg_ifinfo_build_skb ( int type , struct net_device * dev ,
2017-05-27 22:14:34 +08:00
unsigned int change ,
2018-01-25 22:01:39 +08:00
u32 event , gfp_t flags , int * new_nsid ,
int new_ifindex )
2005-04-17 06:20:36 +08:00
{
2008-03-25 20:47:49 +08:00
struct net * net = dev_net ( dev ) ;
2005-04-17 06:20:36 +08:00
struct sk_buff * skb ;
2006-08-15 15:37:09 +08:00
int err = - ENOBUFS ;
2005-04-17 06:20:36 +08:00
2021-10-21 14:40:20 +08:00
skb = nlmsg_new ( if_nlmsg_size ( dev , 0 ) , flags ) ;
2006-08-15 15:37:09 +08:00
if ( skb = = NULL )
goto errout ;
2005-04-17 06:20:36 +08:00
2017-11-03 03:04:38 +08:00
err = rtnl_fill_ifinfo ( skb , dev , dev_net ( dev ) ,
type , 0 , 0 , change , 0 , 0 , event ,
netns: fix GFP flags in rtnl_net_notifyid()
In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
but there are a few paths calling rtnl_net_notifyid() from atomic
context or from RCU critical sections. The later also precludes the use
of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
call is wrong too, as it uses GFP_KERNEL unconditionally.
Therefore, we need to pass the GFP flags as parameter and propagate it
through function calls until the proper flags can be determined.
In most cases, GFP_KERNEL is fine. The exceptions are:
* openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
indirectly call rtnl_net_notifyid() from RCU critical section,
* rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
parameter.
Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
by nlmsg_new(). The function is allowed to sleep, so better make the
flags consistent with the ones used in the following
ovs_vport_cmd_fill_info() call.
Found by code inspection.
Fixes: 9a9634545c70 ("netns: notify netns id events")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-24 00:39:04 +08:00
new_nsid , new_ifindex , - 1 , flags ) ;
2007-02-01 15:16:40 +08:00
if ( err < 0 ) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON ( err = = - EMSGSIZE ) ;
kfree_skb ( skb ) ;
goto errout ;
}
2014-12-04 05:46:24 +08:00
return skb ;
2006-08-15 15:37:09 +08:00
errout :
if ( err < 0 )
2007-11-20 14:27:40 +08:00
rtnl_set_sk_err ( net , RTNLGRP_LINK , err ) ;
2014-12-04 05:46:24 +08:00
return NULL ;
}
void rtmsg_ifinfo_send ( struct sk_buff * skb , struct net_device * dev , gfp_t flags )
{
struct net * net = dev_net ( dev ) ;
rtnl_notify ( skb , net , 0 , RTNLGRP_LINK , NULL , flags ) ;
}
2017-05-27 22:14:34 +08:00
static void rtmsg_ifinfo_event ( int type , struct net_device * dev ,
unsigned int change , u32 event ,
2018-01-25 22:01:39 +08:00
gfp_t flags , int * new_nsid , int new_ifindex )
2014-12-04 05:46:24 +08:00
{
struct sk_buff * skb ;
2015-05-13 20:19:42 +08:00
if ( dev - > reg_state ! = NETREG_REGISTERED )
return ;
2018-01-25 22:01:39 +08:00
skb = rtmsg_ifinfo_build_skb ( type , dev , change , event , flags , new_nsid ,
new_ifindex ) ;
2014-12-04 05:46:24 +08:00
if ( skb )
rtmsg_ifinfo_send ( skb , dev , flags ) ;
2005-04-17 06:20:36 +08:00
}
2017-05-27 22:14:34 +08:00
void rtmsg_ifinfo ( int type , struct net_device * dev , unsigned int change ,
gfp_t flags )
{
2018-01-25 22:01:39 +08:00
rtmsg_ifinfo_event ( type , dev , change , rtnl_get_event ( 0 ) , flags ,
NULL , 0 ) ;
2017-05-27 22:14:34 +08:00
}
2005-04-17 06:20:36 +08:00
2017-10-03 19:53:23 +08:00
void rtmsg_ifinfo_newnet ( int type , struct net_device * dev , unsigned int change ,
2018-01-25 22:01:39 +08:00
gfp_t flags , int * new_nsid , int new_ifindex )
2017-10-03 19:53:23 +08:00
{
rtmsg_ifinfo_event ( type , dev , change , rtnl_get_event ( 0 ) , flags ,
2018-01-25 22:01:39 +08:00
new_nsid , new_ifindex ) ;
2017-10-03 19:53:23 +08:00
}
2012-04-15 14:44:08 +08:00
static int nlmsg_populate_fdb_fill ( struct sk_buff * skb ,
struct net_device * dev ,
2015-04-09 20:16:17 +08:00
u8 * addr , u16 vid , u32 pid , u32 seq ,
2014-03-20 00:47:49 +08:00
int type , unsigned int flags ,
2015-12-15 21:20:30 +08:00
int nlflags , u16 ndm_state )
2012-04-15 14:44:08 +08:00
{
struct nlmsghdr * nlh ;
struct ndmsg * ndm ;
2014-03-20 00:47:49 +08:00
nlh = nlmsg_put ( skb , pid , seq , type , sizeof ( * ndm ) , nlflags ) ;
2012-04-15 14:44:08 +08:00
if ( ! nlh )
return - EMSGSIZE ;
ndm = nlmsg_data ( nlh ) ;
ndm - > ndm_family = AF_BRIDGE ;
ndm - > ndm_pad1 = 0 ;
ndm - > ndm_pad2 = 0 ;
ndm - > ndm_flags = flags ;
ndm - > ndm_type = 0 ;
ndm - > ndm_ifindex = dev - > ifindex ;
2015-12-15 21:20:30 +08:00
ndm - > ndm_state = ndm_state ;
2012-04-15 14:44:08 +08:00
if ( nla_put ( skb , NDA_LLADDR , ETH_ALEN , addr ) )
goto nla_put_failure ;
2015-04-09 20:16:17 +08:00
if ( vid )
if ( nla_put ( skb , NDA_VLAN , sizeof ( u16 ) , & vid ) )
goto nla_put_failure ;
2012-04-15 14:44:08 +08:00
2015-01-17 05:09:00 +08:00
nlmsg_end ( skb , nlh ) ;
return 0 ;
2012-04-15 14:44:08 +08:00
nla_put_failure :
nlmsg_cancel ( skb , nlh ) ;
return - EMSGSIZE ;
}
2012-04-15 14:44:14 +08:00
static inline size_t rtnl_fdb_nlmsg_size ( void )
{
2016-11-18 22:50:39 +08:00
return NLMSG_ALIGN ( sizeof ( struct ndmsg ) ) +
nla_total_size ( ETH_ALEN ) + /* NDA_LLADDR */
nla_total_size ( sizeof ( u16 ) ) + /* NDA_VLAN */
0 ;
2012-04-15 14:44:14 +08:00
}
2015-12-15 21:20:30 +08:00
static void rtnl_fdb_notify ( struct net_device * dev , u8 * addr , u16 vid , int type ,
u16 ndm_state )
2012-04-15 14:44:14 +08:00
{
struct net * net = dev_net ( dev ) ;
struct sk_buff * skb ;
int err = - ENOBUFS ;
skb = nlmsg_new ( rtnl_fdb_nlmsg_size ( ) , GFP_ATOMIC ) ;
if ( ! skb )
goto errout ;
2015-04-09 20:16:17 +08:00
err = nlmsg_populate_fdb_fill ( skb , dev , addr , vid ,
2015-12-15 21:20:30 +08:00
0 , 0 , type , NTF_SELF , 0 , ndm_state ) ;
2012-04-15 14:44:14 +08:00
if ( err < 0 ) {
kfree_skb ( skb ) ;
goto errout ;
}
rtnl_notify ( skb , net , 0 , RTNLGRP_NEIGH , NULL , GFP_ATOMIC ) ;
return ;
errout :
rtnl_set_sk_err ( net , RTNLGRP_NEIGH , err ) ;
}
2019-03-26 00:17:22 +08:00
/*
2013-03-06 23:39:42 +08:00
* ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
*/
int ndo_dflt_fdb_add ( struct ndmsg * ndm ,
struct nlattr * tb [ ] ,
struct net_device * dev ,
2014-11-28 21:34:15 +08:00
const unsigned char * addr , u16 vid ,
2013-03-06 23:39:42 +08:00
u16 flags )
{
int err = - EINVAL ;
/* If aging addresses are supported device will need to
* implement its own handler for this .
*/
if ( ndm - > ndm_state & & ! ( ndm - > ndm_state & NUD_PERMANENT ) ) {
2021-06-29 08:29:26 +08:00
netdev_info ( dev , " default FDB implementation only supports local addresses \n " ) ;
2013-03-06 23:39:42 +08:00
return err ;
}
2014-12-15 00:19:05 +08:00
if ( vid ) {
2021-06-29 08:29:25 +08:00
netdev_info ( dev , " vlans aren't supported yet for dev_uc|mc_add() \n " ) ;
2014-12-15 00:19:05 +08:00
return err ;
}
2013-03-06 23:39:42 +08:00
if ( is_unicast_ether_addr ( addr ) | | is_link_local_ether_addr ( addr ) )
err = dev_uc_add_excl ( dev , addr ) ;
else if ( is_multicast_ether_addr ( addr ) )
err = dev_mc_add_excl ( dev , addr ) ;
/* Only return duplicate errors if NLM_F_EXCL is set */
if ( err = = - EEXIST & & ! ( flags & NLM_F_EXCL ) )
err = 0 ;
return err ;
}
EXPORT_SYMBOL ( ndo_dflt_fdb_add ) ;
2017-10-10 23:10:04 +08:00
static int fdb_vid_parse ( struct nlattr * vlan_attr , u16 * p_vid ,
struct netlink_ext_ack * extack )
2014-11-28 21:34:15 +08:00
{
u16 vid = 0 ;
if ( vlan_attr ) {
if ( nla_len ( vlan_attr ) ! = sizeof ( u16 ) ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " invalid vlan attribute size " ) ;
2014-11-28 21:34:15 +08:00
return - EINVAL ;
}
vid = nla_get_u16 ( vlan_attr ) ;
if ( ! vid | | vid > = VLAN_VID_MASK ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " invalid vlan id " ) ;
2014-11-28 21:34:15 +08:00
return - EINVAL ;
}
}
* p_vid = vid ;
return 0 ;
}
2017-04-17 00:48:24 +08:00
static int rtnl_fdb_add ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2012-04-15 14:43:56 +08:00
{
struct net * net = sock_net ( skb - > sk ) ;
struct ndmsg * ndm ;
struct nlattr * tb [ NDA_MAX + 1 ] ;
struct net_device * dev ;
u8 * addr ;
2014-11-28 21:34:15 +08:00
u16 vid ;
2012-04-15 14:43:56 +08:00
int err ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated ( nlh , sizeof ( * ndm ) , tb , NDA_MAX , NULL ,
extack ) ;
2012-04-15 14:43:56 +08:00
if ( err < 0 )
return err ;
ndm = nlmsg_data ( nlh ) ;
if ( ndm - > ndm_ifindex = = 0 ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " invalid ifindex " ) ;
2012-04-15 14:43:56 +08:00
return - EINVAL ;
}
dev = __dev_get_by_index ( net , ndm - > ndm_ifindex ) ;
if ( dev = = NULL ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " unknown ifindex " ) ;
2012-04-15 14:43:56 +08:00
return - ENODEV ;
}
if ( ! tb [ NDA_LLADDR ] | | nla_len ( tb [ NDA_LLADDR ] ) ! = ETH_ALEN ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " invalid address " ) ;
2012-04-15 14:43:56 +08:00
return - EINVAL ;
}
2018-10-30 04:36:43 +08:00
if ( dev - > type ! = ARPHRD_ETHER ) {
NL_SET_ERR_MSG ( extack , " FDB add only supported for Ethernet devices " ) ;
return - EINVAL ;
}
2012-04-15 14:43:56 +08:00
addr = nla_data ( tb [ NDA_LLADDR ] ) ;
2017-10-10 23:10:04 +08:00
err = fdb_vid_parse ( tb [ NDA_VLAN ] , & vid , extack ) ;
2014-11-28 21:34:15 +08:00
if ( err )
return err ;
2012-04-15 14:43:56 +08:00
err = - EOPNOTSUPP ;
/* Support fdb on master device the net/bridge default case */
if ( ( ! ndm - > ndm_flags | | ndm - > ndm_flags & NTF_MASTER ) & &
2020-02-20 16:00:07 +08:00
netif_is_bridge_port ( dev ) ) {
2013-01-04 06:48:52 +08:00
struct net_device * br_dev = netdev_master_upper_dev_get ( dev ) ;
const struct net_device_ops * ops = br_dev - > netdev_ops ;
2014-11-28 21:34:15 +08:00
err = ops - > ndo_fdb_add ( ndm , tb , dev , addr , vid ,
2019-01-17 07:06:50 +08:00
nlh - > nlmsg_flags , extack ) ;
2012-04-15 14:43:56 +08:00
if ( err )
goto out ;
else
ndm - > ndm_flags & = ~ NTF_MASTER ;
}
/* Embedded bridge, macvlan, and any other device support */
2013-03-06 23:39:42 +08:00
if ( ( ndm - > ndm_flags & NTF_SELF ) ) {
if ( dev - > netdev_ops - > ndo_fdb_add )
err = dev - > netdev_ops - > ndo_fdb_add ( ndm , tb , dev , addr ,
2014-11-28 21:34:15 +08:00
vid ,
2019-01-17 07:06:50 +08:00
nlh - > nlmsg_flags ,
extack ) ;
2013-03-06 23:39:42 +08:00
else
2014-11-28 21:34:15 +08:00
err = ndo_dflt_fdb_add ( ndm , tb , dev , addr , vid ,
2013-03-06 23:39:42 +08:00
nlh - > nlmsg_flags ) ;
2012-04-15 14:43:56 +08:00
2012-04-15 14:44:14 +08:00
if ( ! err ) {
2015-12-15 21:20:30 +08:00
rtnl_fdb_notify ( dev , addr , vid , RTM_NEWNEIGH ,
ndm - > ndm_state ) ;
2012-04-15 14:43:56 +08:00
ndm - > ndm_flags & = ~ NTF_SELF ;
2012-04-15 14:44:14 +08:00
}
2012-04-15 14:43:56 +08:00
}
out :
return err ;
}
2019-03-26 00:17:22 +08:00
/*
2013-03-06 23:39:42 +08:00
* ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
*/
int ndo_dflt_fdb_del ( struct ndmsg * ndm ,
struct nlattr * tb [ ] ,
struct net_device * dev ,
2014-11-28 21:34:15 +08:00
const unsigned char * addr , u16 vid )
2013-03-06 23:39:42 +08:00
{
2014-07-16 06:15:20 +08:00
int err = - EINVAL ;
2013-03-06 23:39:42 +08:00
/* If aging addresses are supported device will need to
* implement its own handler for this .
*/
2013-08-09 06:19:48 +08:00
if ( ! ( ndm - > ndm_state & NUD_PERMANENT ) ) {
2021-06-29 08:29:26 +08:00
netdev_info ( dev , " default FDB implementation only supports local addresses \n " ) ;
2014-07-16 06:15:20 +08:00
return err ;
2013-03-06 23:39:42 +08:00
}
if ( is_unicast_ether_addr ( addr ) | | is_link_local_ether_addr ( addr ) )
err = dev_uc_del ( dev , addr ) ;
else if ( is_multicast_ether_addr ( addr ) )
err = dev_mc_del ( dev , addr ) ;
return err ;
}
EXPORT_SYMBOL ( ndo_dflt_fdb_del ) ;
2022-04-13 18:51:57 +08:00
static const struct nla_policy fdb_del_bulk_policy [ NDA_MAX + 1 ] = {
[ NDA_VLAN ] = { . type = NLA_U16 } ,
[ NDA_IFINDEX ] = NLA_POLICY_MIN ( NLA_S32 , 1 ) ,
2022-04-13 18:52:00 +08:00
[ NDA_NDM_STATE_MASK ] = { . type = NLA_U16 } ,
[ NDA_NDM_FLAGS_MASK ] = { . type = NLA_U8 } ,
2022-04-13 18:51:57 +08:00
} ;
2017-04-17 00:48:24 +08:00
static int rtnl_fdb_del ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2012-04-15 14:43:56 +08:00
{
2022-04-13 18:51:57 +08:00
bool del_bulk = ! ! ( nlh - > nlmsg_flags & NLM_F_BULK ) ;
2012-04-15 14:43:56 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2022-04-13 18:51:57 +08:00
const struct net_device_ops * ops ;
2012-04-15 14:43:56 +08:00
struct ndmsg * ndm ;
2013-02-13 20:00:18 +08:00
struct nlattr * tb [ NDA_MAX + 1 ] ;
2012-04-15 14:43:56 +08:00
struct net_device * dev ;
2022-04-13 18:51:57 +08:00
__u8 * addr = NULL ;
2020-04-25 19:28:14 +08:00
int err ;
2014-11-28 21:34:15 +08:00
u16 vid ;
2012-04-15 14:43:56 +08:00
2014-04-24 05:29:27 +08:00
if ( ! netlink_capable ( skb , CAP_NET_ADMIN ) )
2013-02-13 20:00:18 +08:00
return - EPERM ;
2022-04-13 18:51:57 +08:00
if ( ! del_bulk ) {
err = nlmsg_parse_deprecated ( nlh , sizeof ( * ndm ) , tb , NDA_MAX ,
NULL , extack ) ;
} else {
err = nlmsg_parse ( nlh , sizeof ( * ndm ) , tb , NDA_MAX ,
fdb_del_bulk_policy , extack ) ;
}
2013-02-13 20:00:18 +08:00
if ( err < 0 )
return err ;
2012-04-15 14:43:56 +08:00
ndm = nlmsg_data ( nlh ) ;
if ( ndm - > ndm_ifindex = = 0 ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " invalid ifindex " ) ;
2012-04-15 14:43:56 +08:00
return - EINVAL ;
}
dev = __dev_get_by_index ( net , ndm - > ndm_ifindex ) ;
if ( dev = = NULL ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " unknown ifindex " ) ;
2012-04-15 14:43:56 +08:00
return - ENODEV ;
}
2022-04-13 18:51:57 +08:00
if ( ! del_bulk ) {
if ( ! tb [ NDA_LLADDR ] | | nla_len ( tb [ NDA_LLADDR ] ) ! = ETH_ALEN ) {
NL_SET_ERR_MSG ( extack , " invalid address " ) ;
return - EINVAL ;
}
addr = nla_data ( tb [ NDA_LLADDR ] ) ;
2013-02-13 20:00:18 +08:00
}
2018-10-30 04:36:43 +08:00
if ( dev - > type ! = ARPHRD_ETHER ) {
NL_SET_ERR_MSG ( extack , " FDB delete only supported for Ethernet devices " ) ;
return - EINVAL ;
}
2017-10-10 23:10:04 +08:00
err = fdb_vid_parse ( tb [ NDA_VLAN ] , & vid , extack ) ;
2014-11-28 21:34:15 +08:00
if ( err )
return err ;
2012-04-15 14:43:56 +08:00
err = - EOPNOTSUPP ;
/* Support fdb on master device the net/bridge default case */
if ( ( ! ndm - > ndm_flags | | ndm - > ndm_flags & NTF_MASTER ) & &
2020-02-20 16:00:07 +08:00
netif_is_bridge_port ( dev ) ) {
2013-01-04 06:48:52 +08:00
struct net_device * br_dev = netdev_master_upper_dev_get ( dev ) ;
2012-04-15 14:43:56 +08:00
2022-04-13 18:51:57 +08:00
ops = br_dev - > netdev_ops ;
if ( ! del_bulk ) {
if ( ops - > ndo_fdb_del )
err = ops - > ndo_fdb_del ( ndm , tb , dev , addr , vid ) ;
} else {
if ( ops - > ndo_fdb_del_bulk )
err = ops - > ndo_fdb_del_bulk ( ndm , tb , dev , vid ,
extack ) ;
}
2012-04-15 14:43:56 +08:00
if ( err )
goto out ;
else
ndm - > ndm_flags & = ~ NTF_MASTER ;
}
/* Embedded bridge, macvlan, and any other device support */
2013-03-06 23:39:42 +08:00
if ( ndm - > ndm_flags & NTF_SELF ) {
2022-04-13 18:51:57 +08:00
ops = dev - > netdev_ops ;
if ( ! del_bulk ) {
if ( ops - > ndo_fdb_del )
err = ops - > ndo_fdb_del ( ndm , tb , dev , addr , vid ) ;
else
err = ndo_dflt_fdb_del ( ndm , tb , dev , addr , vid ) ;
} else {
/* in case err was cleared by NTF_MASTER call */
err = - EOPNOTSUPP ;
if ( ops - > ndo_fdb_del_bulk )
err = ops - > ndo_fdb_del_bulk ( ndm , tb , dev , vid ,
extack ) ;
}
2012-04-15 14:43:56 +08:00
2012-04-15 14:44:14 +08:00
if ( ! err ) {
2022-04-13 18:51:57 +08:00
if ( ! del_bulk )
rtnl_fdb_notify ( dev , addr , vid , RTM_DELNEIGH ,
ndm - > ndm_state ) ;
2012-04-15 14:43:56 +08:00
ndm - > ndm_flags & = ~ NTF_SELF ;
2012-04-15 14:44:14 +08:00
}
2012-04-15 14:43:56 +08:00
}
out :
return err ;
}
2012-04-15 14:44:08 +08:00
static int nlmsg_populate_fdb ( struct sk_buff * skb ,
struct netlink_callback * cb ,
struct net_device * dev ,
int * idx ,
struct netdev_hw_addr_list * list )
{
struct netdev_hw_addr * ha ;
int err ;
2012-09-08 04:12:54 +08:00
u32 portid , seq ;
2012-04-15 14:44:08 +08:00
2012-09-08 04:12:54 +08:00
portid = NETLINK_CB ( cb - > skb ) . portid ;
2012-04-15 14:44:08 +08:00
seq = cb - > nlh - > nlmsg_seq ;
list_for_each_entry ( ha , & list - > list , list ) {
2016-08-31 12:56:45 +08:00
if ( * idx < cb - > args [ 2 ] )
2012-04-15 14:44:08 +08:00
goto skip ;
2015-04-09 20:16:17 +08:00
err = nlmsg_populate_fdb_fill ( skb , dev , ha - > addr , 0 ,
2012-11-02 00:23:10 +08:00
portid , seq ,
2014-03-20 00:47:49 +08:00
RTM_NEWNEIGH , NTF_SELF ,
2015-12-15 21:20:30 +08:00
NLM_F_MULTI , NUD_PERMANENT ) ;
2012-04-15 14:44:08 +08:00
if ( err < 0 )
return err ;
skip :
* idx + = 1 ;
}
return 0 ;
}
/**
2012-07-10 18:55:09 +08:00
* ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table .
2019-03-26 00:17:22 +08:00
* @ skb : socket buffer to store message in
* @ cb : netlink callback
2012-04-15 14:44:08 +08:00
* @ dev : netdevice
2019-03-26 00:17:22 +08:00
* @ filter_dev : ignored
* @ idx : the number of FDB table entries dumped is added to * @ idx
2012-04-15 14:44:08 +08:00
*
* Default netdevice operation to dump the existing unicast address list .
2013-03-29 16:18:37 +08:00
* Returns number of addresses from list put in skb .
2012-04-15 14:44:08 +08:00
*/
int ndo_dflt_fdb_dump ( struct sk_buff * skb ,
struct netlink_callback * cb ,
struct net_device * dev ,
2014-07-10 19:01:58 +08:00
struct net_device * filter_dev ,
2016-08-31 12:56:45 +08:00
int * idx )
2012-04-15 14:44:08 +08:00
{
int err ;
2018-12-05 01:40:35 +08:00
if ( dev - > type ! = ARPHRD_ETHER )
return - EINVAL ;
2012-04-15 14:44:08 +08:00
netif_addr_lock_bh ( dev ) ;
2016-08-31 12:56:45 +08:00
err = nlmsg_populate_fdb ( skb , cb , dev , idx , & dev - > uc ) ;
2012-04-15 14:44:08 +08:00
if ( err )
goto out ;
2016-11-30 16:37:34 +08:00
err = nlmsg_populate_fdb ( skb , cb , dev , idx , & dev - > mc ) ;
2012-04-15 14:44:08 +08:00
out :
netif_addr_unlock_bh ( dev ) ;
2016-08-31 12:56:45 +08:00
return err ;
2012-04-15 14:44:08 +08:00
}
EXPORT_SYMBOL ( ndo_dflt_fdb_dump ) ;
2018-10-08 11:16:44 +08:00
static int valid_fdb_dump_strict ( const struct nlmsghdr * nlh ,
int * br_idx , int * brport_idx ,
struct netlink_ext_ack * extack )
{
struct nlattr * tb [ NDA_MAX + 1 ] ;
struct ndmsg * ndm ;
int err , i ;
if ( nlh - > nlmsg_len < nlmsg_msg_size ( sizeof ( * ndm ) ) ) {
NL_SET_ERR_MSG ( extack , " Invalid header for fdb dump request " ) ;
return - EINVAL ;
}
ndm = nlmsg_data ( nlh ) ;
if ( ndm - > ndm_pad1 | | ndm - > ndm_pad2 | | ndm - > ndm_state | |
ndm - > ndm_flags | | ndm - > ndm_type ) {
2019-10-29 19:59:32 +08:00
NL_SET_ERR_MSG ( extack , " Invalid values in header for fdb dump request " ) ;
2018-10-08 11:16:44 +08:00
return - EINVAL ;
}
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated_strict ( nlh , sizeof ( struct ndmsg ) , tb ,
NDA_MAX , NULL , extack ) ;
2018-10-08 11:16:44 +08:00
if ( err < 0 )
return err ;
* brport_idx = ndm - > ndm_ifindex ;
for ( i = 0 ; i < = NDA_MAX ; + + i ) {
if ( ! tb [ i ] )
continue ;
switch ( i ) {
case NDA_IFINDEX :
if ( nla_len ( tb [ i ] ) ! = sizeof ( u32 ) ) {
NL_SET_ERR_MSG ( extack , " Invalid IFINDEX attribute in fdb dump request " ) ;
return - EINVAL ;
}
* brport_idx = nla_get_u32 ( tb [ NDA_IFINDEX ] ) ;
break ;
case NDA_MASTER :
if ( nla_len ( tb [ i ] ) ! = sizeof ( u32 ) ) {
NL_SET_ERR_MSG ( extack , " Invalid MASTER attribute in fdb dump request " ) ;
return - EINVAL ;
}
* br_idx = nla_get_u32 ( tb [ NDA_MASTER ] ) ;
break ;
default :
NL_SET_ERR_MSG ( extack , " Unsupported attribute in fdb dump request " ) ;
return - EINVAL ;
}
}
return 0 ;
}
2018-10-08 11:16:43 +08:00
static int valid_fdb_dump_legacy ( const struct nlmsghdr * nlh ,
int * br_idx , int * brport_idx ,
struct netlink_ext_ack * extack )
2012-04-15 14:43:56 +08:00
{
2014-07-10 19:01:59 +08:00
struct nlattr * tb [ IFLA_MAX + 1 ] ;
2018-10-08 11:16:43 +08:00
int err ;
2014-07-10 19:01:59 +08:00
rtnetlink: fix rtnl_fdb_dump() for ndmsg header
Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').
The problem is, the function bails out early if nlmsg_parse() fails, which
does occur for iproute2 usage of 'struct ndmsg' because the payload length
is shorter than the family header alone (as 'struct ifinfomsg' is assumed).
This breaks backward compatibility with userspace -- nothing is sent back.
Some examples with iproute2 and netlink library for go [1]:
1) $ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
This one works, as it uses 'struct ifinfomsg'.
fdb_show() @ iproute2/bridge/fdb.c
"""
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
...
if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
"""
2) $ ip --family bridge neigh
RTNETLINK answers: Invalid argument
Dump terminated
This one fails, as it uses 'struct ndmsg'.
do_show_or_flush() @ iproute2/ip/ipneigh.c
"""
.n.nlmsg_type = RTM_GETNEIGH,
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
"""
3) $ ./neighlist
< no output >
This one fails, as it uses 'struct ndmsg'-based.
neighList() @ netlink/neigh_linux.go
"""
req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
msg := Ndmsg{
"""
The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
if the payload length (with the _actual_ family header) is less than the
family header length alone (which is assumed, in parameter 'hdrlen').
This is true in the examples above with struct ndmsg, with size and payload
length shorter than struct ifinfomsg.
However, that commit just intends to fix something under the assumption the
family header is indeed an 'struct ifinfomsg' - by preventing access to the
payload as such (via 'ifm' pointer) if the payload length is not sufficient
to actually contain it.
The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
interface at par with brctl"), to support iproute2's 'bridge fdb' command
(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.
So, in order to unbreak the 'struct ndmsg' family headers and still allow
'struct ifinfomsg' to continue to work, check for the known message sizes
used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
not used in this function anyway) then do not parse the data as ifinfomsg.
Same examples with this patch applied (or revert/before the original fix):
$ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
$ ip --family bridge neigh
dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT
$ ./neighlist
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).
References:
[1] netlink library for go (test-case)
https://github.com/vishvananda/netlink
$ cat ~/go/src/neighlist/main.go
package main
import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
func main() {
neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
}
$ export GOPATH=~/go
$ go get github.com/vishvananda/netlink
$ go build neighlist
$ ~/go/src/neighlist/neighlist
Thanks to David Ahern for suggestions to improve this patch.
Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
Reported-by: Aidan Obley <aobley@pivotal.io>
Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-10-02 09:46:40 +08:00
/* A hack to preserve kernel<->userspace interface.
* Before Linux v4 .12 this code accepted ndmsg since iproute2 v3 .3 .0 .
* However , ndmsg is shorter than ifinfomsg thus nlmsg_parse ( ) bails .
* So , check for ndmsg with an optional u32 attribute ( not used here ) .
* Fortunately these sizes don ' t conflict with the size of ifinfomsg
* with an optional attribute .
*/
2018-10-08 11:16:43 +08:00
if ( nlmsg_len ( nlh ) ! = sizeof ( struct ndmsg ) & &
( nlmsg_len ( nlh ) ! = sizeof ( struct ndmsg ) +
rtnetlink: fix rtnl_fdb_dump() for ndmsg header
Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').
The problem is, the function bails out early if nlmsg_parse() fails, which
does occur for iproute2 usage of 'struct ndmsg' because the payload length
is shorter than the family header alone (as 'struct ifinfomsg' is assumed).
This breaks backward compatibility with userspace -- nothing is sent back.
Some examples with iproute2 and netlink library for go [1]:
1) $ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
This one works, as it uses 'struct ifinfomsg'.
fdb_show() @ iproute2/bridge/fdb.c
"""
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
...
if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
"""
2) $ ip --family bridge neigh
RTNETLINK answers: Invalid argument
Dump terminated
This one fails, as it uses 'struct ndmsg'.
do_show_or_flush() @ iproute2/ip/ipneigh.c
"""
.n.nlmsg_type = RTM_GETNEIGH,
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
"""
3) $ ./neighlist
< no output >
This one fails, as it uses 'struct ndmsg'-based.
neighList() @ netlink/neigh_linux.go
"""
req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
msg := Ndmsg{
"""
The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
if the payload length (with the _actual_ family header) is less than the
family header length alone (which is assumed, in parameter 'hdrlen').
This is true in the examples above with struct ndmsg, with size and payload
length shorter than struct ifinfomsg.
However, that commit just intends to fix something under the assumption the
family header is indeed an 'struct ifinfomsg' - by preventing access to the
payload as such (via 'ifm' pointer) if the payload length is not sufficient
to actually contain it.
The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
interface at par with brctl"), to support iproute2's 'bridge fdb' command
(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.
So, in order to unbreak the 'struct ndmsg' family headers and still allow
'struct ifinfomsg' to continue to work, check for the known message sizes
used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
not used in this function anyway) then do not parse the data as ifinfomsg.
Same examples with this patch applied (or revert/before the original fix):
$ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
$ ip --family bridge neigh
dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT
$ ./neighlist
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).
References:
[1] netlink library for go (test-case)
https://github.com/vishvananda/netlink
$ cat ~/go/src/neighlist/main.go
package main
import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
func main() {
neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
}
$ export GOPATH=~/go
$ go get github.com/vishvananda/netlink
$ go build neighlist
$ ~/go/src/neighlist/neighlist
Thanks to David Ahern for suggestions to improve this patch.
Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
Reported-by: Aidan Obley <aobley@pivotal.io>
Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-10-02 09:46:40 +08:00
nla_attr_size ( sizeof ( u32 ) ) ) ) {
2018-10-09 04:57:24 +08:00
struct ifinfomsg * ifm ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated ( nlh , sizeof ( struct ifinfomsg ) ,
tb , IFLA_MAX , ifla_policy ,
extack ) ;
rtnetlink: fix rtnl_fdb_dump() for ndmsg header
Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').
The problem is, the function bails out early if nlmsg_parse() fails, which
does occur for iproute2 usage of 'struct ndmsg' because the payload length
is shorter than the family header alone (as 'struct ifinfomsg' is assumed).
This breaks backward compatibility with userspace -- nothing is sent back.
Some examples with iproute2 and netlink library for go [1]:
1) $ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
This one works, as it uses 'struct ifinfomsg'.
fdb_show() @ iproute2/bridge/fdb.c
"""
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
...
if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
"""
2) $ ip --family bridge neigh
RTNETLINK answers: Invalid argument
Dump terminated
This one fails, as it uses 'struct ndmsg'.
do_show_or_flush() @ iproute2/ip/ipneigh.c
"""
.n.nlmsg_type = RTM_GETNEIGH,
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
"""
3) $ ./neighlist
< no output >
This one fails, as it uses 'struct ndmsg'-based.
neighList() @ netlink/neigh_linux.go
"""
req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
msg := Ndmsg{
"""
The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
if the payload length (with the _actual_ family header) is less than the
family header length alone (which is assumed, in parameter 'hdrlen').
This is true in the examples above with struct ndmsg, with size and payload
length shorter than struct ifinfomsg.
However, that commit just intends to fix something under the assumption the
family header is indeed an 'struct ifinfomsg' - by preventing access to the
payload as such (via 'ifm' pointer) if the payload length is not sufficient
to actually contain it.
The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
interface at par with brctl"), to support iproute2's 'bridge fdb' command
(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.
So, in order to unbreak the 'struct ndmsg' family headers and still allow
'struct ifinfomsg' to continue to work, check for the known message sizes
used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
not used in this function anyway) then do not parse the data as ifinfomsg.
Same examples with this patch applied (or revert/before the original fix):
$ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
$ ip --family bridge neigh
dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT
$ ./neighlist
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).
References:
[1] netlink library for go (test-case)
https://github.com/vishvananda/netlink
$ cat ~/go/src/neighlist/main.go
package main
import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
func main() {
neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
}
$ export GOPATH=~/go
$ go get github.com/vishvananda/netlink
$ go build neighlist
$ ~/go/src/neighlist/neighlist
Thanks to David Ahern for suggestions to improve this patch.
Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
Reported-by: Aidan Obley <aobley@pivotal.io>
Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-10-02 09:46:40 +08:00
if ( err < 0 ) {
return - EINVAL ;
} else if ( err = = 0 ) {
if ( tb [ IFLA_MASTER ] )
2018-10-08 11:16:43 +08:00
* br_idx = nla_get_u32 ( tb [ IFLA_MASTER ] ) ;
rtnetlink: fix rtnl_fdb_dump() for ndmsg header
Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').
The problem is, the function bails out early if nlmsg_parse() fails, which
does occur for iproute2 usage of 'struct ndmsg' because the payload length
is shorter than the family header alone (as 'struct ifinfomsg' is assumed).
This breaks backward compatibility with userspace -- nothing is sent back.
Some examples with iproute2 and netlink library for go [1]:
1) $ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
This one works, as it uses 'struct ifinfomsg'.
fdb_show() @ iproute2/bridge/fdb.c
"""
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
...
if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
"""
2) $ ip --family bridge neigh
RTNETLINK answers: Invalid argument
Dump terminated
This one fails, as it uses 'struct ndmsg'.
do_show_or_flush() @ iproute2/ip/ipneigh.c
"""
.n.nlmsg_type = RTM_GETNEIGH,
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
"""
3) $ ./neighlist
< no output >
This one fails, as it uses 'struct ndmsg'-based.
neighList() @ netlink/neigh_linux.go
"""
req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
msg := Ndmsg{
"""
The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
if the payload length (with the _actual_ family header) is less than the
family header length alone (which is assumed, in parameter 'hdrlen').
This is true in the examples above with struct ndmsg, with size and payload
length shorter than struct ifinfomsg.
However, that commit just intends to fix something under the assumption the
family header is indeed an 'struct ifinfomsg' - by preventing access to the
payload as such (via 'ifm' pointer) if the payload length is not sufficient
to actually contain it.
The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
interface at par with brctl"), to support iproute2's 'bridge fdb' command
(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.
So, in order to unbreak the 'struct ndmsg' family headers and still allow
'struct ifinfomsg' to continue to work, check for the known message sizes
used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
not used in this function anyway) then do not parse the data as ifinfomsg.
Same examples with this patch applied (or revert/before the original fix):
$ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
$ ip --family bridge neigh
dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT
$ ./neighlist
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).
References:
[1] netlink library for go (test-case)
https://github.com/vishvananda/netlink
$ cat ~/go/src/neighlist/main.go
package main
import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
func main() {
neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
}
$ export GOPATH=~/go
$ go get github.com/vishvananda/netlink
$ go build neighlist
$ ~/go/src/neighlist/neighlist
Thanks to David Ahern for suggestions to improve this patch.
Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
Reported-by: Aidan Obley <aobley@pivotal.io>
Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-10-02 09:46:40 +08:00
}
2014-07-10 19:01:59 +08:00
2018-10-09 04:57:24 +08:00
ifm = nlmsg_data ( nlh ) ;
2018-10-08 11:16:43 +08:00
* brport_idx = ifm - > ifi_index ;
rtnetlink: fix rtnl_fdb_dump() for ndmsg header
Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').
The problem is, the function bails out early if nlmsg_parse() fails, which
does occur for iproute2 usage of 'struct ndmsg' because the payload length
is shorter than the family header alone (as 'struct ifinfomsg' is assumed).
This breaks backward compatibility with userspace -- nothing is sent back.
Some examples with iproute2 and netlink library for go [1]:
1) $ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
This one works, as it uses 'struct ifinfomsg'.
fdb_show() @ iproute2/bridge/fdb.c
"""
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
...
if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
"""
2) $ ip --family bridge neigh
RTNETLINK answers: Invalid argument
Dump terminated
This one fails, as it uses 'struct ndmsg'.
do_show_or_flush() @ iproute2/ip/ipneigh.c
"""
.n.nlmsg_type = RTM_GETNEIGH,
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
"""
3) $ ./neighlist
< no output >
This one fails, as it uses 'struct ndmsg'-based.
neighList() @ netlink/neigh_linux.go
"""
req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
msg := Ndmsg{
"""
The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
if the payload length (with the _actual_ family header) is less than the
family header length alone (which is assumed, in parameter 'hdrlen').
This is true in the examples above with struct ndmsg, with size and payload
length shorter than struct ifinfomsg.
However, that commit just intends to fix something under the assumption the
family header is indeed an 'struct ifinfomsg' - by preventing access to the
payload as such (via 'ifm' pointer) if the payload length is not sufficient
to actually contain it.
The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
interface at par with brctl"), to support iproute2's 'bridge fdb' command
(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.
So, in order to unbreak the 'struct ndmsg' family headers and still allow
'struct ifinfomsg' to continue to work, check for the known message sizes
used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
not used in this function anyway) then do not parse the data as ifinfomsg.
Same examples with this patch applied (or revert/before the original fix):
$ bridge fdb show
33:33:00:00:00:01 dev ens3 self permanent
01:00:5e:00:00:01 dev ens3 self permanent
33:33:ff:15:98:30 dev ens3 self permanent
$ ip --family bridge neigh
dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT
$ ./neighlist
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).
References:
[1] netlink library for go (test-case)
https://github.com/vishvananda/netlink
$ cat ~/go/src/neighlist/main.go
package main
import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
func main() {
neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
}
$ export GOPATH=~/go
$ go get github.com/vishvananda/netlink
$ go build neighlist
$ ~/go/src/neighlist/neighlist
Thanks to David Ahern for suggestions to improve this patch.
Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
Reported-by: Aidan Obley <aobley@pivotal.io>
Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-10-02 09:46:40 +08:00
}
2018-10-08 11:16:43 +08:00
return 0 ;
}
static int rtnl_fdb_dump ( struct sk_buff * skb , struct netlink_callback * cb )
{
struct net_device * dev ;
struct net_device * br_dev = NULL ;
const struct net_device_ops * ops = NULL ;
const struct net_device_ops * cops = NULL ;
struct net * net = sock_net ( skb - > sk ) ;
struct hlist_head * head ;
int brport_idx = 0 ;
int br_idx = 0 ;
int h , s_h ;
int idx = 0 , s_idx ;
int err = 0 ;
int fidx = 0 ;
2018-10-08 11:16:44 +08:00
if ( cb - > strict_check )
err = valid_fdb_dump_strict ( cb - > nlh , & br_idx , & brport_idx ,
cb - > extack ) ;
else
err = valid_fdb_dump_legacy ( cb - > nlh , & br_idx , & brport_idx ,
cb - > extack ) ;
2018-10-08 11:16:43 +08:00
if ( err < 0 )
return err ;
2014-07-10 19:01:59 +08:00
if ( br_idx ) {
br_dev = __dev_get_by_index ( net , br_idx ) ;
if ( ! br_dev )
return - ENODEV ;
ops = br_dev - > netdev_ops ;
}
2016-08-31 12:56:45 +08:00
s_h = cb - > args [ 0 ] ;
s_idx = cb - > args [ 1 ] ;
2014-07-10 19:01:59 +08:00
2016-08-31 12:56:45 +08:00
for ( h = s_h ; h < NETDEV_HASHENTRIES ; h + + , s_idx = 0 ) {
idx = 0 ;
head = & net - > dev_index_head [ h ] ;
hlist_for_each_entry ( dev , head , index_hlist ) {
2014-07-10 19:01:59 +08:00
2016-08-31 12:56:45 +08:00
if ( brport_idx & & ( dev - > ifindex ! = brport_idx ) )
2014-07-10 19:01:59 +08:00
continue ;
2016-08-31 12:56:45 +08:00
if ( ! br_idx ) { /* user did not specify a specific bridge */
2020-02-20 16:00:07 +08:00
if ( netif_is_bridge_port ( dev ) ) {
2016-08-31 12:56:45 +08:00
br_dev = netdev_master_upper_dev_get ( dev ) ;
cops = br_dev - > netdev_ops ;
}
} else {
if ( dev ! = br_dev & &
2020-02-20 16:00:07 +08:00
! netif_is_bridge_port ( dev ) )
2016-08-31 12:56:45 +08:00
continue ;
2014-07-10 19:01:59 +08:00
2016-08-31 12:56:45 +08:00
if ( br_dev ! = netdev_master_upper_dev_get ( dev ) & &
2021-10-16 19:21:36 +08:00
! netif_is_bridge_master ( dev ) )
2016-08-31 12:56:45 +08:00
continue ;
cops = ops ;
}
2012-04-15 14:43:56 +08:00
2016-08-31 12:56:45 +08:00
if ( idx < s_idx )
goto cont ;
2012-04-15 14:43:56 +08:00
2020-02-20 16:00:07 +08:00
if ( netif_is_bridge_port ( dev ) ) {
2016-08-31 12:56:45 +08:00
if ( cops & & cops - > ndo_fdb_dump ) {
err = cops - > ndo_fdb_dump ( skb , cb ,
br_dev , dev ,
& fidx ) ;
if ( err = = - EMSGSIZE )
goto out ;
}
}
2014-07-10 19:01:59 +08:00
2016-08-31 12:56:45 +08:00
if ( dev - > netdev_ops - > ndo_fdb_dump )
err = dev - > netdev_ops - > ndo_fdb_dump ( skb , cb ,
dev , NULL ,
& fidx ) ;
else
err = ndo_dflt_fdb_dump ( skb , cb , dev , NULL ,
& fidx ) ;
if ( err = = - EMSGSIZE )
goto out ;
cops = NULL ;
/* reset fdb offset to 0 for rest of the interfaces */
cb - > args [ 2 ] = 0 ;
fidx = 0 ;
cont :
idx + + ;
}
2012-04-15 14:43:56 +08:00
}
2016-08-31 12:56:45 +08:00
out :
cb - > args [ 0 ] = h ;
cb - > args [ 1 ] = idx ;
cb - > args [ 2 ] = fidx ;
2012-04-15 14:43:56 +08:00
return skb - > len ;
}
2018-12-16 14:35:08 +08:00
static int valid_fdb_get_strict ( const struct nlmsghdr * nlh ,
struct nlattr * * tb , u8 * ndm_flags ,
int * br_idx , int * brport_idx , u8 * * addr ,
u16 * vid , struct netlink_ext_ack * extack )
{
struct ndmsg * ndm ;
int err , i ;
if ( nlh - > nlmsg_len < nlmsg_msg_size ( sizeof ( * ndm ) ) ) {
NL_SET_ERR_MSG ( extack , " Invalid header for fdb get request " ) ;
return - EINVAL ;
}
ndm = nlmsg_data ( nlh ) ;
if ( ndm - > ndm_pad1 | | ndm - > ndm_pad2 | | ndm - > ndm_state | |
ndm - > ndm_type ) {
NL_SET_ERR_MSG ( extack , " Invalid values in header for fdb get request " ) ;
return - EINVAL ;
}
if ( ndm - > ndm_flags & ~ ( NTF_MASTER | NTF_SELF ) ) {
NL_SET_ERR_MSG ( extack , " Invalid flags in header for fdb get request " ) ;
return - EINVAL ;
}
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated_strict ( nlh , sizeof ( struct ndmsg ) , tb ,
NDA_MAX , nda_policy , extack ) ;
2018-12-16 14:35:08 +08:00
if ( err < 0 )
return err ;
* ndm_flags = ndm - > ndm_flags ;
* brport_idx = ndm - > ndm_ifindex ;
for ( i = 0 ; i < = NDA_MAX ; + + i ) {
if ( ! tb [ i ] )
continue ;
switch ( i ) {
case NDA_MASTER :
* br_idx = nla_get_u32 ( tb [ i ] ) ;
break ;
case NDA_LLADDR :
if ( nla_len ( tb [ i ] ) ! = ETH_ALEN ) {
NL_SET_ERR_MSG ( extack , " Invalid address in fdb get request " ) ;
return - EINVAL ;
}
* addr = nla_data ( tb [ i ] ) ;
break ;
case NDA_VLAN :
err = fdb_vid_parse ( tb [ i ] , vid , extack ) ;
if ( err )
return err ;
break ;
case NDA_VNI :
break ;
default :
NL_SET_ERR_MSG ( extack , " Unsupported attribute in fdb get request " ) ;
return - EINVAL ;
}
}
return 0 ;
}
static int rtnl_fdb_get ( struct sk_buff * in_skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
{
struct net_device * dev = NULL , * br_dev = NULL ;
const struct net_device_ops * ops = NULL ;
struct net * net = sock_net ( in_skb - > sk ) ;
struct nlattr * tb [ NDA_MAX + 1 ] ;
struct sk_buff * skb ;
int brport_idx = 0 ;
u8 ndm_flags = 0 ;
int br_idx = 0 ;
u8 * addr = NULL ;
u16 vid = 0 ;
int err ;
err = valid_fdb_get_strict ( nlh , tb , & ndm_flags , & br_idx ,
& brport_idx , & addr , & vid , extack ) ;
if ( err < 0 )
return err ;
2018-12-30 20:33:20 +08:00
if ( ! addr ) {
NL_SET_ERR_MSG ( extack , " Missing lookup address for fdb get request " ) ;
return - EINVAL ;
}
2018-12-16 14:35:08 +08:00
if ( brport_idx ) {
dev = __dev_get_by_index ( net , brport_idx ) ;
if ( ! dev ) {
NL_SET_ERR_MSG ( extack , " Unknown device ifindex " ) ;
return - ENODEV ;
}
}
if ( br_idx ) {
if ( dev ) {
NL_SET_ERR_MSG ( extack , " Master and device are mutually exclusive " ) ;
return - EINVAL ;
}
br_dev = __dev_get_by_index ( net , br_idx ) ;
if ( ! br_dev ) {
NL_SET_ERR_MSG ( extack , " Invalid master ifindex " ) ;
return - EINVAL ;
}
ops = br_dev - > netdev_ops ;
}
if ( dev ) {
if ( ! ndm_flags | | ( ndm_flags & NTF_MASTER ) ) {
2020-02-20 16:00:07 +08:00
if ( ! netif_is_bridge_port ( dev ) ) {
2018-12-16 14:35:08 +08:00
NL_SET_ERR_MSG ( extack , " Device is not a bridge port " ) ;
return - EINVAL ;
}
br_dev = netdev_master_upper_dev_get ( dev ) ;
if ( ! br_dev ) {
NL_SET_ERR_MSG ( extack , " Master of device not found " ) ;
return - EINVAL ;
}
ops = br_dev - > netdev_ops ;
} else {
if ( ! ( ndm_flags & NTF_SELF ) ) {
NL_SET_ERR_MSG ( extack , " Missing NTF_SELF " ) ;
return - EINVAL ;
}
ops = dev - > netdev_ops ;
}
}
if ( ! br_dev & & ! dev ) {
NL_SET_ERR_MSG ( extack , " No device specified " ) ;
return - ENODEV ;
}
if ( ! ops | | ! ops - > ndo_fdb_get ) {
NL_SET_ERR_MSG ( extack , " Fdb get operation not supported by device " ) ;
return - EOPNOTSUPP ;
}
skb = nlmsg_new ( NLMSG_GOODSIZE , GFP_KERNEL ) ;
if ( ! skb )
return - ENOBUFS ;
if ( br_dev )
dev = br_dev ;
err = ops - > ndo_fdb_get ( skb , tb , dev , addr , vid ,
NETLINK_CB ( in_skb ) . portid ,
nlh - > nlmsg_seq , extack ) ;
if ( err )
goto out ;
return rtnl_unicast ( skb , net , NETLINK_CB ( in_skb ) . portid ) ;
out :
kfree_skb ( skb ) ;
return err ;
}
2014-11-28 21:34:25 +08:00
static int brport_nla_put_flag ( struct sk_buff * skb , u32 flags , u32 mask ,
unsigned int attrnum , unsigned int flag )
{
if ( mask & flag )
return nla_put_u8 ( skb , attrnum , ! ! ( flags & flag ) ) ;
return 0 ;
}
2012-10-24 16:13:09 +08:00
int ndo_dflt_bridge_getlink ( struct sk_buff * skb , u32 pid , u32 seq ,
2014-11-28 21:34:25 +08:00
struct net_device * dev , u16 mode ,
2015-06-22 15:27:17 +08:00
u32 flags , u32 mask , int nlflags ,
u32 filter_mask ,
int ( * vlan_fill ) ( struct sk_buff * skb ,
struct net_device * dev ,
u32 filter_mask ) )
2012-10-24 16:13:09 +08:00
{
struct nlmsghdr * nlh ;
struct ifinfomsg * ifm ;
struct nlattr * br_afspec ;
2014-11-28 21:34:25 +08:00
struct nlattr * protinfo ;
2012-10-24 16:13:09 +08:00
u8 operstate = netif_running ( dev ) ? dev - > operstate : IF_OPER_DOWN ;
2013-01-04 06:48:52 +08:00
struct net_device * br_dev = netdev_master_upper_dev_get ( dev ) ;
2015-06-22 15:27:17 +08:00
int err = 0 ;
2012-10-24 16:13:09 +08:00
2015-04-29 00:33:49 +08:00
nlh = nlmsg_put ( skb , pid , seq , RTM_NEWLINK , sizeof ( * ifm ) , nlflags ) ;
2012-10-24 16:13:09 +08:00
if ( nlh = = NULL )
return - EMSGSIZE ;
ifm = nlmsg_data ( nlh ) ;
ifm - > ifi_family = AF_BRIDGE ;
ifm - > __ifi_pad = 0 ;
ifm - > ifi_type = dev - > type ;
ifm - > ifi_index = dev - > ifindex ;
ifm - > ifi_flags = dev_get_flags ( dev ) ;
ifm - > ifi_change = 0 ;
if ( nla_put_string ( skb , IFLA_IFNAME , dev - > name ) | |
nla_put_u32 ( skb , IFLA_MTU , dev - > mtu ) | |
nla_put_u8 ( skb , IFLA_OPERSTATE , operstate ) | |
2013-01-04 06:48:52 +08:00
( br_dev & &
nla_put_u32 ( skb , IFLA_MASTER , br_dev - > ifindex ) ) | |
2012-10-24 16:13:09 +08:00
( dev - > addr_len & &
nla_put ( skb , IFLA_ADDRESS , dev - > addr_len , dev - > dev_addr ) ) | |
2015-04-02 23:07:00 +08:00
( dev - > ifindex ! = dev_get_iflink ( dev ) & &
nla_put_u32 ( skb , IFLA_LINK , dev_get_iflink ( dev ) ) ) )
2012-10-24 16:13:09 +08:00
goto nla_put_failure ;
2019-04-26 17:13:06 +08:00
br_afspec = nla_nest_start_noflag ( skb , IFLA_AF_SPEC ) ;
2012-10-24 16:13:09 +08:00
if ( ! br_afspec )
goto nla_put_failure ;
2014-12-09 06:04:20 +08:00
if ( nla_put_u16 ( skb , IFLA_BRIDGE_FLAGS , BRIDGE_FLAGS_SELF ) ) {
2012-10-24 16:13:09 +08:00
nla_nest_cancel ( skb , br_afspec ) ;
goto nla_put_failure ;
}
2014-12-09 06:04:20 +08:00
if ( mode ! = BRIDGE_MODE_UNDEF ) {
if ( nla_put_u16 ( skb , IFLA_BRIDGE_MODE , mode ) ) {
nla_nest_cancel ( skb , br_afspec ) ;
goto nla_put_failure ;
}
}
2015-06-22 15:27:17 +08:00
if ( vlan_fill ) {
err = vlan_fill ( skb , dev , filter_mask ) ;
if ( err ) {
nla_nest_cancel ( skb , br_afspec ) ;
goto nla_put_failure ;
}
}
2012-10-24 16:13:09 +08:00
nla_nest_end ( skb , br_afspec ) ;
2019-04-26 17:13:06 +08:00
protinfo = nla_nest_start ( skb , IFLA_PROTINFO ) ;
2014-11-28 21:34:25 +08:00
if ( ! protinfo )
goto nla_put_failure ;
if ( brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_MODE , BR_HAIRPIN_MODE ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_GUARD , BR_BPDU_GUARD ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_FAST_LEAVE ,
BR_MULTICAST_FAST_LEAVE ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_PROTECT , BR_ROOT_BLOCK ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_LEARNING , BR_LEARNING ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_LEARNING_SYNC , BR_LEARNING_SYNC ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_UNICAST_FLOOD , BR_FLOOD ) | |
brport_nla_put_flag ( skb , flags , mask ,
2020-02-17 21:45:01 +08:00
IFLA_BRPORT_PROXYARP , BR_PROXYARP ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_MCAST_FLOOD , BR_MCAST_FLOOD ) | |
brport_nla_put_flag ( skb , flags , mask ,
IFLA_BRPORT_BCAST_FLOOD , BR_BCAST_FLOOD ) ) {
2014-11-28 21:34:25 +08:00
nla_nest_cancel ( skb , protinfo ) ;
goto nla_put_failure ;
}
nla_nest_end ( skb , protinfo ) ;
2015-01-17 05:09:00 +08:00
nlmsg_end ( skb , nlh ) ;
return 0 ;
2012-10-24 16:13:09 +08:00
nla_put_failure :
nlmsg_cancel ( skb , nlh ) ;
2015-06-22 15:27:17 +08:00
return err ? err : - EMSGSIZE ;
2012-10-24 16:13:09 +08:00
}
2015-06-22 15:27:17 +08:00
EXPORT_SYMBOL_GPL ( ndo_dflt_bridge_getlink ) ;
2012-10-24 16:13:09 +08:00
2018-10-08 11:16:31 +08:00
static int valid_bridge_getlink_req ( const struct nlmsghdr * nlh ,
bool strict_check , u32 * filter_mask ,
struct netlink_ext_ack * extack )
{
struct nlattr * tb [ IFLA_MAX + 1 ] ;
int err , i ;
if ( strict_check ) {
struct ifinfomsg * ifm ;
if ( nlh - > nlmsg_len < nlmsg_msg_size ( sizeof ( * ifm ) ) ) {
NL_SET_ERR_MSG ( extack , " Invalid header for bridge link dump " ) ;
return - EINVAL ;
}
ifm = nlmsg_data ( nlh ) ;
if ( ifm - > __ifi_pad | | ifm - > ifi_type | | ifm - > ifi_flags | |
ifm - > ifi_change | | ifm - > ifi_index ) {
NL_SET_ERR_MSG ( extack , " Invalid values in header for bridge link dump request " ) ;
return - EINVAL ;
}
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated_strict ( nlh ,
sizeof ( struct ifinfomsg ) ,
tb , IFLA_MAX , ifla_policy ,
extack ) ;
2018-10-08 11:16:31 +08:00
} else {
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nlmsg_parse_deprecated ( nlh , sizeof ( struct ifinfomsg ) ,
tb , IFLA_MAX , ifla_policy ,
extack ) ;
2018-10-08 11:16:31 +08:00
}
if ( err < 0 )
return err ;
/* new attributes should only be added with strict checking */
for ( i = 0 ; i < = IFLA_MAX ; + + i ) {
if ( ! tb [ i ] )
continue ;
switch ( i ) {
case IFLA_EXT_MASK :
* filter_mask = nla_get_u32 ( tb [ i ] ) ;
break ;
default :
if ( strict_check ) {
NL_SET_ERR_MSG ( extack , " Unsupported attribute in bridge link dump request " ) ;
return - EINVAL ;
}
}
}
return 0 ;
}
2012-10-24 16:12:57 +08:00
static int rtnl_bridge_getlink ( struct sk_buff * skb , struct netlink_callback * cb )
{
2018-10-08 11:16:31 +08:00
const struct nlmsghdr * nlh = cb - > nlh ;
2012-10-24 16:12:57 +08:00
struct net * net = sock_net ( skb - > sk ) ;
struct net_device * dev ;
int idx = 0 ;
u32 portid = NETLINK_CB ( cb - > skb ) . portid ;
2018-10-08 11:16:31 +08:00
u32 seq = nlh - > nlmsg_seq ;
2013-02-13 20:00:13 +08:00
u32 filter_mask = 0 ;
2015-09-16 05:44:29 +08:00
int err ;
2013-02-13 20:00:13 +08:00
2018-10-08 11:16:31 +08:00
err = valid_bridge_getlink_req ( nlh , cb - > strict_check , & filter_mask ,
cb - > extack ) ;
if ( err < 0 & & cb - > strict_check )
return err ;
2012-10-24 16:12:57 +08:00
rcu_read_lock ( ) ;
for_each_netdev_rcu ( net , dev ) {
const struct net_device_ops * ops = dev - > netdev_ops ;
2013-01-04 06:48:52 +08:00
struct net_device * br_dev = netdev_master_upper_dev_get ( dev ) ;
2012-10-24 16:12:57 +08:00
2013-01-04 06:48:52 +08:00
if ( br_dev & & br_dev - > netdev_ops - > ndo_bridge_getlink ) {
2015-09-16 05:44:29 +08:00
if ( idx > = cb - > args [ 0 ] ) {
err = br_dev - > netdev_ops - > ndo_bridge_getlink (
skb , portid , seq , dev ,
filter_mask , NLM_F_MULTI ) ;
2017-05-16 14:19:17 +08:00
if ( err < 0 & & err ! = - EOPNOTSUPP ) {
if ( likely ( skb - > len ) )
break ;
goto out_err ;
}
2015-09-16 05:44:29 +08:00
}
2012-11-02 20:56:52 +08:00
idx + + ;
2012-10-24 16:12:57 +08:00
}
if ( ops - > ndo_bridge_getlink ) {
2015-09-16 05:44:29 +08:00
if ( idx > = cb - > args [ 0 ] ) {
err = ops - > ndo_bridge_getlink ( skb , portid ,
seq , dev ,
filter_mask ,
NLM_F_MULTI ) ;
2017-05-16 14:19:17 +08:00
if ( err < 0 & & err ! = - EOPNOTSUPP ) {
if ( likely ( skb - > len ) )
break ;
goto out_err ;
}
2015-09-16 05:44:29 +08:00
}
2012-11-02 20:56:52 +08:00
idx + + ;
2012-10-24 16:12:57 +08:00
}
}
2017-05-16 14:19:17 +08:00
err = skb - > len ;
out_err :
2012-10-24 16:12:57 +08:00
rcu_read_unlock ( ) ;
cb - > args [ 0 ] = idx ;
2017-05-16 14:19:17 +08:00
return err ;
2012-10-24 16:12:57 +08:00
}
2012-10-24 16:13:03 +08:00
static inline size_t bridge_nlmsg_size ( void )
{
return NLMSG_ALIGN ( sizeof ( struct ifinfomsg ) )
+ nla_total_size ( IFNAMSIZ ) /* IFLA_IFNAME */
+ nla_total_size ( MAX_ADDR_LEN ) /* IFLA_ADDRESS */
+ nla_total_size ( sizeof ( u32 ) ) /* IFLA_MASTER */
+ nla_total_size ( sizeof ( u32 ) ) /* IFLA_MTU */
+ nla_total_size ( sizeof ( u32 ) ) /* IFLA_LINK */
+ nla_total_size ( sizeof ( u32 ) ) /* IFLA_OPERSTATE */
+ nla_total_size ( sizeof ( u8 ) ) /* IFLA_PROTINFO */
+ nla_total_size ( sizeof ( struct nlattr ) ) /* IFLA_AF_SPEC */
+ nla_total_size ( sizeof ( u16 ) ) /* IFLA_BRIDGE_FLAGS */
+ nla_total_size ( sizeof ( u16 ) ) ; /* IFLA_BRIDGE_MODE */
}
bridge: fix setlink/dellink notifications
problems with bridge getlink/setlink notifications today:
- bridge setlink generates two notifications to userspace
- one from the bridge driver
- one from rtnetlink.c (rtnl_bridge_notify)
- dellink generates one notification from rtnetlink.c. Which
means bridge setlink and dellink notifications are not
consistent
- Looking at the code it appears,
If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
the size calculation in rtnl_bridge_notify can be wrong.
Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
in a setlink request to rocker dev, rtnl_bridge_notify will
allocate skb for one set of bridge attributes, but,
both the bridge driver and rocker dev will try to add
attributes resulting in twice the number of attributes
being added to the skb. (rocker dev calls ndo_dflt_bridge_getlink)
There are multiple options:
1) Generate one notification including all attributes from master and self:
But, I don't think it will work, because both master and self may use
the same attributes/policy. Cannot pack the same set of attributes in a
single notification from both master and slave (duplicate attributes).
2) Generate one notification from master and the other notification from
self (This seems to be ideal):
For master: the master driver will send notification (bridge in this
example)
For self: the self driver will send notification (rocker in the above
example. It can use helpers from rtnetlink.c to do so. Like the
ndo_dflt_bridge_getlink api).
This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').
v1->v2 :
- rtnl_bridge_notify is now called only for self,
so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
earlier. So, I have changed the notification from br_dellink to
go as RTM_NEWLINK
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-15 12:02:25 +08:00
static int rtnl_bridge_notify ( struct net_device * dev )
2012-10-24 16:13:03 +08:00
{
struct net * net = dev_net ( dev ) ;
struct sk_buff * skb ;
int err = - EOPNOTSUPP ;
bridge: fix setlink/dellink notifications
problems with bridge getlink/setlink notifications today:
- bridge setlink generates two notifications to userspace
- one from the bridge driver
- one from rtnetlink.c (rtnl_bridge_notify)
- dellink generates one notification from rtnetlink.c. Which
means bridge setlink and dellink notifications are not
consistent
- Looking at the code it appears,
If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
the size calculation in rtnl_bridge_notify can be wrong.
Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
in a setlink request to rocker dev, rtnl_bridge_notify will
allocate skb for one set of bridge attributes, but,
both the bridge driver and rocker dev will try to add
attributes resulting in twice the number of attributes
being added to the skb. (rocker dev calls ndo_dflt_bridge_getlink)
There are multiple options:
1) Generate one notification including all attributes from master and self:
But, I don't think it will work, because both master and self may use
the same attributes/policy. Cannot pack the same set of attributes in a
single notification from both master and slave (duplicate attributes).
2) Generate one notification from master and the other notification from
self (This seems to be ideal):
For master: the master driver will send notification (bridge in this
example)
For self: the self driver will send notification (rocker in the above
example. It can use helpers from rtnetlink.c to do so. Like the
ndo_dflt_bridge_getlink api).
This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').
v1->v2 :
- rtnl_bridge_notify is now called only for self,
so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
earlier. So, I have changed the notification from br_dellink to
go as RTM_NEWLINK
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-15 12:02:25 +08:00
if ( ! dev - > netdev_ops - > ndo_bridge_getlink )
return 0 ;
2012-10-24 16:13:03 +08:00
skb = nlmsg_new ( bridge_nlmsg_size ( ) , GFP_ATOMIC ) ;
if ( ! skb ) {
err = - ENOMEM ;
goto errout ;
}
2015-04-29 00:33:49 +08:00
err = dev - > netdev_ops - > ndo_bridge_getlink ( skb , 0 , 0 , dev , 0 , 0 ) ;
bridge: fix setlink/dellink notifications
problems with bridge getlink/setlink notifications today:
- bridge setlink generates two notifications to userspace
- one from the bridge driver
- one from rtnetlink.c (rtnl_bridge_notify)
- dellink generates one notification from rtnetlink.c. Which
means bridge setlink and dellink notifications are not
consistent
- Looking at the code it appears,
If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
the size calculation in rtnl_bridge_notify can be wrong.
Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
in a setlink request to rocker dev, rtnl_bridge_notify will
allocate skb for one set of bridge attributes, but,
both the bridge driver and rocker dev will try to add
attributes resulting in twice the number of attributes
being added to the skb. (rocker dev calls ndo_dflt_bridge_getlink)
There are multiple options:
1) Generate one notification including all attributes from master and self:
But, I don't think it will work, because both master and self may use
the same attributes/policy. Cannot pack the same set of attributes in a
single notification from both master and slave (duplicate attributes).
2) Generate one notification from master and the other notification from
self (This seems to be ideal):
For master: the master driver will send notification (bridge in this
example)
For self: the self driver will send notification (rocker in the above
example. It can use helpers from rtnetlink.c to do so. Like the
ndo_dflt_bridge_getlink api).
This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').
v1->v2 :
- rtnl_bridge_notify is now called only for self,
so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
earlier. So, I have changed the notification from br_dellink to
go as RTM_NEWLINK
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-15 12:02:25 +08:00
if ( err < 0 )
goto errout ;
2012-10-24 16:13:03 +08:00
2021-06-09 19:17:53 +08:00
/* Notification info is only filled for bridge ports, not the bridge
* device itself . Therefore , a zero notification length is valid and
* should not result in an error .
*/
if ( ! skb - > len )
2015-01-29 08:23:11 +08:00
goto errout ;
2012-10-24 16:13:03 +08:00
rtnl_notify ( skb , net , 0 , RTNLGRP_LINK , NULL , GFP_ATOMIC ) ;
return 0 ;
errout :
WARN_ON ( err = = - EMSGSIZE ) ;
kfree_skb ( skb ) ;
2015-01-29 08:23:11 +08:00
if ( err )
rtnl_set_sk_err ( net , RTNLGRP_LINK , err ) ;
2012-10-24 16:13:03 +08:00
return err ;
}
2017-04-17 00:48:24 +08:00
static int rtnl_bridge_setlink ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2012-10-24 16:12:57 +08:00
{
struct net * net = sock_net ( skb - > sk ) ;
struct ifinfomsg * ifm ;
struct net_device * dev ;
2012-10-24 16:13:03 +08:00
struct nlattr * br_spec , * attr = NULL ;
int rem , err = - EOPNOTSUPP ;
2015-01-19 17:45:04 +08:00
u16 flags = 0 ;
2012-11-03 00:32:36 +08:00
bool have_flags = false ;
2012-10-24 16:12:57 +08:00
if ( nlmsg_len ( nlh ) < sizeof ( * ifm ) )
return - EINVAL ;
ifm = nlmsg_data ( nlh ) ;
if ( ifm - > ifi_family ! = AF_BRIDGE )
return - EPFNOSUPPORT ;
dev = __dev_get_by_index ( net , ifm - > ifi_index ) ;
if ( ! dev ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " unknown ifindex " ) ;
2012-10-24 16:12:57 +08:00
return - ENODEV ;
}
2012-10-24 16:13:03 +08:00
br_spec = nlmsg_find_attr ( nlh , sizeof ( struct ifinfomsg ) , IFLA_AF_SPEC ) ;
if ( br_spec ) {
nla_for_each_nested ( attr , br_spec , rem ) {
if ( nla_type ( attr ) = = IFLA_BRIDGE_FLAGS ) {
2014-11-26 20:42:16 +08:00
if ( nla_len ( attr ) < sizeof ( flags ) )
return - EINVAL ;
2012-11-03 00:32:36 +08:00
have_flags = true ;
2012-10-24 16:13:03 +08:00
flags = nla_get_u16 ( attr ) ;
break ;
}
}
}
if ( ! flags | | ( flags & BRIDGE_FLAGS_MASTER ) ) {
2013-01-04 06:48:52 +08:00
struct net_device * br_dev = netdev_master_upper_dev_get ( dev ) ;
if ( ! br_dev | | ! br_dev - > netdev_ops - > ndo_bridge_setlink ) {
2012-10-24 16:13:03 +08:00
err = - EOPNOTSUPP ;
goto out ;
}
2018-12-13 01:02:48 +08:00
err = br_dev - > netdev_ops - > ndo_bridge_setlink ( dev , nlh , flags ,
extack ) ;
2012-10-24 16:12:57 +08:00
if ( err )
goto out ;
2012-10-24 16:13:03 +08:00
flags & = ~ BRIDGE_FLAGS_MASTER ;
2012-10-24 16:12:57 +08:00
}
2012-10-24 16:13:03 +08:00
if ( ( flags & BRIDGE_FLAGS_SELF ) ) {
if ( ! dev - > netdev_ops - > ndo_bridge_setlink )
err = - EOPNOTSUPP ;
else
2015-01-30 14:40:12 +08:00
err = dev - > netdev_ops - > ndo_bridge_setlink ( dev , nlh ,
2018-12-13 01:02:48 +08:00
flags ,
extack ) ;
bridge: fix setlink/dellink notifications
problems with bridge getlink/setlink notifications today:
- bridge setlink generates two notifications to userspace
- one from the bridge driver
- one from rtnetlink.c (rtnl_bridge_notify)
- dellink generates one notification from rtnetlink.c. Which
means bridge setlink and dellink notifications are not
consistent
- Looking at the code it appears,
If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
the size calculation in rtnl_bridge_notify can be wrong.
Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
in a setlink request to rocker dev, rtnl_bridge_notify will
allocate skb for one set of bridge attributes, but,
both the bridge driver and rocker dev will try to add
attributes resulting in twice the number of attributes
being added to the skb. (rocker dev calls ndo_dflt_bridge_getlink)
There are multiple options:
1) Generate one notification including all attributes from master and self:
But, I don't think it will work, because both master and self may use
the same attributes/policy. Cannot pack the same set of attributes in a
single notification from both master and slave (duplicate attributes).
2) Generate one notification from master and the other notification from
self (This seems to be ideal):
For master: the master driver will send notification (bridge in this
example)
For self: the self driver will send notification (rocker in the above
example. It can use helpers from rtnetlink.c to do so. Like the
ndo_dflt_bridge_getlink api).
This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').
v1->v2 :
- rtnl_bridge_notify is now called only for self,
so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
earlier. So, I have changed the notification from br_dellink to
go as RTM_NEWLINK
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-15 12:02:25 +08:00
if ( ! err ) {
2012-10-24 16:13:03 +08:00
flags & = ~ BRIDGE_FLAGS_SELF ;
bridge: fix setlink/dellink notifications
problems with bridge getlink/setlink notifications today:
- bridge setlink generates two notifications to userspace
- one from the bridge driver
- one from rtnetlink.c (rtnl_bridge_notify)
- dellink generates one notification from rtnetlink.c. Which
means bridge setlink and dellink notifications are not
consistent
- Looking at the code it appears,
If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
the size calculation in rtnl_bridge_notify can be wrong.
Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
in a setlink request to rocker dev, rtnl_bridge_notify will
allocate skb for one set of bridge attributes, but,
both the bridge driver and rocker dev will try to add
attributes resulting in twice the number of attributes
being added to the skb. (rocker dev calls ndo_dflt_bridge_getlink)
There are multiple options:
1) Generate one notification including all attributes from master and self:
But, I don't think it will work, because both master and self may use
the same attributes/policy. Cannot pack the same set of attributes in a
single notification from both master and slave (duplicate attributes).
2) Generate one notification from master and the other notification from
self (This seems to be ideal):
For master: the master driver will send notification (bridge in this
example)
For self: the self driver will send notification (rocker in the above
example. It can use helpers from rtnetlink.c to do so. Like the
ndo_dflt_bridge_getlink api).
This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').
v1->v2 :
- rtnl_bridge_notify is now called only for self,
so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
earlier. So, I have changed the notification from br_dellink to
go as RTM_NEWLINK
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-15 12:02:25 +08:00
/* Generate event to notify upper layer of bridge
* change
*/
err = rtnl_bridge_notify ( dev ) ;
}
2012-10-24 16:13:03 +08:00
}
2012-10-24 16:12:57 +08:00
2012-11-03 00:32:36 +08:00
if ( have_flags )
2012-10-24 16:13:03 +08:00
memcpy ( nla_data ( attr ) , & flags , sizeof ( flags ) ) ;
2012-10-24 16:12:57 +08:00
out :
return err ;
}
2017-04-17 00:48:24 +08:00
static int rtnl_bridge_dellink ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2013-02-13 20:00:12 +08:00
{
struct net * net = sock_net ( skb - > sk ) ;
struct ifinfomsg * ifm ;
struct net_device * dev ;
struct nlattr * br_spec , * attr = NULL ;
int rem , err = - EOPNOTSUPP ;
2015-01-19 17:45:04 +08:00
u16 flags = 0 ;
2013-02-13 20:00:12 +08:00
bool have_flags = false ;
if ( nlmsg_len ( nlh ) < sizeof ( * ifm ) )
return - EINVAL ;
ifm = nlmsg_data ( nlh ) ;
if ( ifm - > ifi_family ! = AF_BRIDGE )
return - EPFNOSUPPORT ;
dev = __dev_get_by_index ( net , ifm - > ifi_index ) ;
if ( ! dev ) {
2017-10-10 23:10:04 +08:00
NL_SET_ERR_MSG ( extack , " unknown ifindex " ) ;
2013-02-13 20:00:12 +08:00
return - ENODEV ;
}
br_spec = nlmsg_find_attr ( nlh , sizeof ( struct ifinfomsg ) , IFLA_AF_SPEC ) ;
if ( br_spec ) {
nla_for_each_nested ( attr , br_spec , rem ) {
if ( nla_type ( attr ) = = IFLA_BRIDGE_FLAGS ) {
2014-11-26 20:42:16 +08:00
if ( nla_len ( attr ) < sizeof ( flags ) )
return - EINVAL ;
2013-02-13 20:00:12 +08:00
have_flags = true ;
flags = nla_get_u16 ( attr ) ;
break ;
}
}
}
if ( ! flags | | ( flags & BRIDGE_FLAGS_MASTER ) ) {
struct net_device * br_dev = netdev_master_upper_dev_get ( dev ) ;
if ( ! br_dev | | ! br_dev - > netdev_ops - > ndo_bridge_dellink ) {
err = - EOPNOTSUPP ;
goto out ;
}
2015-01-30 14:40:12 +08:00
err = br_dev - > netdev_ops - > ndo_bridge_dellink ( dev , nlh , flags ) ;
2013-02-13 20:00:12 +08:00
if ( err )
goto out ;
flags & = ~ BRIDGE_FLAGS_MASTER ;
}
if ( ( flags & BRIDGE_FLAGS_SELF ) ) {
if ( ! dev - > netdev_ops - > ndo_bridge_dellink )
err = - EOPNOTSUPP ;
else
2015-01-30 14:40:12 +08:00
err = dev - > netdev_ops - > ndo_bridge_dellink ( dev , nlh ,
flags ) ;
2013-02-13 20:00:12 +08:00
bridge: fix setlink/dellink notifications
problems with bridge getlink/setlink notifications today:
- bridge setlink generates two notifications to userspace
- one from the bridge driver
- one from rtnetlink.c (rtnl_bridge_notify)
- dellink generates one notification from rtnetlink.c. Which
means bridge setlink and dellink notifications are not
consistent
- Looking at the code it appears,
If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
the size calculation in rtnl_bridge_notify can be wrong.
Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
in a setlink request to rocker dev, rtnl_bridge_notify will
allocate skb for one set of bridge attributes, but,
both the bridge driver and rocker dev will try to add
attributes resulting in twice the number of attributes
being added to the skb. (rocker dev calls ndo_dflt_bridge_getlink)
There are multiple options:
1) Generate one notification including all attributes from master and self:
But, I don't think it will work, because both master and self may use
the same attributes/policy. Cannot pack the same set of attributes in a
single notification from both master and slave (duplicate attributes).
2) Generate one notification from master and the other notification from
self (This seems to be ideal):
For master: the master driver will send notification (bridge in this
example)
For self: the self driver will send notification (rocker in the above
example. It can use helpers from rtnetlink.c to do so. Like the
ndo_dflt_bridge_getlink api).
This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').
v1->v2 :
- rtnl_bridge_notify is now called only for self,
so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
earlier. So, I have changed the notification from br_dellink to
go as RTM_NEWLINK
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-15 12:02:25 +08:00
if ( ! err ) {
2013-02-13 20:00:12 +08:00
flags & = ~ BRIDGE_FLAGS_SELF ;
bridge: fix setlink/dellink notifications
problems with bridge getlink/setlink notifications today:
- bridge setlink generates two notifications to userspace
- one from the bridge driver
- one from rtnetlink.c (rtnl_bridge_notify)
- dellink generates one notification from rtnetlink.c. Which
means bridge setlink and dellink notifications are not
consistent
- Looking at the code it appears,
If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
the size calculation in rtnl_bridge_notify can be wrong.
Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
in a setlink request to rocker dev, rtnl_bridge_notify will
allocate skb for one set of bridge attributes, but,
both the bridge driver and rocker dev will try to add
attributes resulting in twice the number of attributes
being added to the skb. (rocker dev calls ndo_dflt_bridge_getlink)
There are multiple options:
1) Generate one notification including all attributes from master and self:
But, I don't think it will work, because both master and self may use
the same attributes/policy. Cannot pack the same set of attributes in a
single notification from both master and slave (duplicate attributes).
2) Generate one notification from master and the other notification from
self (This seems to be ideal):
For master: the master driver will send notification (bridge in this
example)
For self: the self driver will send notification (rocker in the above
example. It can use helpers from rtnetlink.c to do so. Like the
ndo_dflt_bridge_getlink api).
This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').
v1->v2 :
- rtnl_bridge_notify is now called only for self,
so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
earlier. So, I have changed the notification from br_dellink to
go as RTM_NEWLINK
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-15 12:02:25 +08:00
/* Generate event to notify upper layer of bridge
* change
*/
err = rtnl_bridge_notify ( dev ) ;
}
2013-02-13 20:00:12 +08:00
}
if ( have_flags )
memcpy ( nla_data ( attr ) , & flags , sizeof ( flags ) ) ;
out :
return err ;
}
2016-04-30 16:25:26 +08:00
static bool stats_attr_valid ( unsigned int mask , int attrid , int idxattr )
{
return ( mask & IFLA_STATS_FILTER_BIT ( attrid ) ) & &
( ! idxattr | | idxattr = = attrid ) ;
}
2022-03-03 00:31:16 +08:00
static bool
rtnl_offload_xstats_have_ndo ( const struct net_device * dev , int attr_id )
2016-09-16 21:05:37 +08:00
{
2022-03-03 00:31:16 +08:00
return dev - > netdev_ops & &
dev - > netdev_ops - > ndo_has_offload_stats & &
dev - > netdev_ops - > ndo_get_offload_stats & &
dev - > netdev_ops - > ndo_has_offload_stats ( dev , attr_id ) ;
}
2016-09-16 21:05:37 +08:00
2022-03-03 00:31:16 +08:00
static unsigned int
rtnl_offload_xstats_get_size_ndo ( const struct net_device * dev , int attr_id )
{
return rtnl_offload_xstats_have_ndo ( dev , attr_id ) ?
sizeof ( struct rtnl_link_stats64 ) : 0 ;
2016-09-16 21:05:37 +08:00
}
2022-03-03 00:31:16 +08:00
static int
rtnl_offload_xstats_fill_ndo ( struct net_device * dev , int attr_id ,
struct sk_buff * skb )
2016-09-16 21:05:37 +08:00
{
2022-03-03 00:31:16 +08:00
unsigned int size = rtnl_offload_xstats_get_size_ndo ( dev , attr_id ) ;
2016-09-16 21:05:37 +08:00
struct nlattr * attr = NULL ;
void * attr_data ;
int err ;
2022-03-03 00:31:16 +08:00
if ( ! size )
2016-09-16 21:05:37 +08:00
return - ENODATA ;
2022-03-03 00:31:16 +08:00
attr = nla_reserve_64bit ( skb , attr_id , size ,
IFLA_OFFLOAD_XSTATS_UNSPEC ) ;
if ( ! attr )
return - EMSGSIZE ;
2016-09-16 21:05:37 +08:00
2022-03-03 00:31:16 +08:00
attr_data = nla_data ( attr ) ;
memset ( attr_data , 0 , size ) ;
2016-09-16 21:05:37 +08:00
2022-03-03 00:31:16 +08:00
err = dev - > netdev_ops - > ndo_get_offload_stats ( attr_id , dev , attr_data ) ;
if ( err )
return err ;
2016-09-16 21:05:37 +08:00
2022-03-03 00:31:16 +08:00
return 0 ;
}
2016-09-16 21:05:37 +08:00
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
static unsigned int
rtnl_offload_xstats_get_size_stats ( const struct net_device * dev ,
enum netdev_offload_xstats_type type )
{
bool enabled = netdev_offload_xstats_enabled ( dev , type ) ;
return enabled ? sizeof ( struct rtnl_hw_stats64 ) : 0 ;
}
struct rtnl_offload_xstats_request_used {
bool request ;
bool used ;
} ;
static int
rtnl_offload_xstats_get_stats ( struct net_device * dev ,
enum netdev_offload_xstats_type type ,
struct rtnl_offload_xstats_request_used * ru ,
struct rtnl_hw_stats64 * stats ,
struct netlink_ext_ack * extack )
{
bool request ;
bool used ;
int err ;
request = netdev_offload_xstats_enabled ( dev , type ) ;
if ( ! request ) {
used = false ;
goto out ;
}
err = netdev_offload_xstats_get ( dev , type , stats , & used , extack ) ;
if ( err )
return err ;
out :
if ( ru ) {
ru - > request = request ;
ru - > used = used ;
}
return 0 ;
}
static int
rtnl_offload_xstats_fill_hw_s_info_one ( struct sk_buff * skb , int attr_id ,
struct rtnl_offload_xstats_request_used * ru )
{
struct nlattr * nest ;
nest = nla_nest_start ( skb , attr_id ) ;
if ( ! nest )
return - EMSGSIZE ;
if ( nla_put_u8 ( skb , IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST , ru - > request ) )
goto nla_put_failure ;
if ( nla_put_u8 ( skb , IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED , ru - > used ) )
goto nla_put_failure ;
nla_nest_end ( skb , nest ) ;
return 0 ;
nla_put_failure :
nla_nest_cancel ( skb , nest ) ;
return - EMSGSIZE ;
}
static int
rtnl_offload_xstats_fill_hw_s_info ( struct sk_buff * skb , struct net_device * dev ,
struct netlink_ext_ack * extack )
{
enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3 ;
struct rtnl_offload_xstats_request_used ru_l3 ;
struct nlattr * nest ;
int err ;
err = rtnl_offload_xstats_get_stats ( dev , t_l3 , & ru_l3 , NULL , extack ) ;
if ( err )
return err ;
nest = nla_nest_start ( skb , IFLA_OFFLOAD_XSTATS_HW_S_INFO ) ;
if ( ! nest )
return - EMSGSIZE ;
if ( rtnl_offload_xstats_fill_hw_s_info_one ( skb ,
IFLA_OFFLOAD_XSTATS_L3_STATS ,
& ru_l3 ) )
goto nla_put_failure ;
nla_nest_end ( skb , nest ) ;
return 0 ;
nla_put_failure :
nla_nest_cancel ( skb , nest ) ;
return - EMSGSIZE ;
}
2022-03-03 00:31:16 +08:00
static int rtnl_offload_xstats_fill ( struct sk_buff * skb , struct net_device * dev ,
2022-03-03 00:31:18 +08:00
int * prividx , u32 off_filter_mask ,
struct netlink_ext_ack * extack )
2022-03-03 00:31:16 +08:00
{
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3 ;
int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO ;
int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS ;
2022-03-03 00:31:16 +08:00
int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT ;
bool have_data = false ;
int err ;
2022-03-03 00:31:17 +08:00
if ( * prividx < = attr_id_cpu_hit & &
( off_filter_mask &
IFLA_STATS_FILTER_BIT ( attr_id_cpu_hit ) ) ) {
2022-03-03 00:31:16 +08:00
err = rtnl_offload_xstats_fill_ndo ( dev , attr_id_cpu_hit , skb ) ;
if ( ! err ) {
have_data = true ;
} else if ( err ! = - ENODATA ) {
* prividx = attr_id_cpu_hit ;
return err ;
}
2016-09-16 21:05:37 +08:00
}
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
if ( * prividx < = attr_id_hw_s_info & &
( off_filter_mask & IFLA_STATS_FILTER_BIT ( attr_id_hw_s_info ) ) ) {
* prividx = attr_id_hw_s_info ;
err = rtnl_offload_xstats_fill_hw_s_info ( skb , dev , extack ) ;
if ( err )
return err ;
have_data = true ;
* prividx = 0 ;
}
if ( * prividx < = attr_id_l3_stats & &
( off_filter_mask & IFLA_STATS_FILTER_BIT ( attr_id_l3_stats ) ) ) {
unsigned int size_l3 ;
struct nlattr * attr ;
* prividx = attr_id_l3_stats ;
size_l3 = rtnl_offload_xstats_get_size_stats ( dev , t_l3 ) ;
2022-04-13 04:25:06 +08:00
if ( ! size_l3 )
goto skip_l3_stats ;
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
attr = nla_reserve_64bit ( skb , attr_id_l3_stats , size_l3 ,
IFLA_OFFLOAD_XSTATS_UNSPEC ) ;
if ( ! attr )
return - EMSGSIZE ;
err = rtnl_offload_xstats_get_stats ( dev , t_l3 , NULL ,
nla_data ( attr ) , extack ) ;
if ( err )
return err ;
have_data = true ;
2022-04-13 04:25:06 +08:00
skip_l3_stats :
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
* prividx = 0 ;
}
2022-03-03 00:31:16 +08:00
if ( ! have_data )
2016-09-16 21:05:37 +08:00
return - ENODATA ;
* prividx = 0 ;
return 0 ;
}
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
static unsigned int
rtnl_offload_xstats_get_size_hw_s_info_one ( const struct net_device * dev ,
enum netdev_offload_xstats_type type )
{
bool enabled = netdev_offload_xstats_enabled ( dev , type ) ;
return nla_total_size ( 0 ) +
/* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
nla_total_size ( sizeof ( u8 ) ) +
/* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
( enabled ? nla_total_size ( sizeof ( u8 ) ) : 0 ) +
0 ;
}
static unsigned int
rtnl_offload_xstats_get_size_hw_s_info ( const struct net_device * dev )
{
enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3 ;
return nla_total_size ( 0 ) +
/* IFLA_OFFLOAD_XSTATS_L3_STATS */
rtnl_offload_xstats_get_size_hw_s_info_one ( dev , t_l3 ) +
0 ;
}
2022-03-03 00:31:17 +08:00
static int rtnl_offload_xstats_get_size ( const struct net_device * dev ,
u32 off_filter_mask )
2016-09-16 21:05:37 +08:00
{
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3 ;
2022-03-03 00:31:16 +08:00
int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT ;
2016-09-16 21:05:37 +08:00
int nla_size = 0 ;
int size ;
2022-03-03 00:31:17 +08:00
if ( off_filter_mask &
IFLA_STATS_FILTER_BIT ( attr_id_cpu_hit ) ) {
size = rtnl_offload_xstats_get_size_ndo ( dev , attr_id_cpu_hit ) ;
nla_size + = nla_total_size_64bit ( size ) ;
}
2016-09-16 21:05:37 +08:00
net: rtnetlink: Add UAPI for obtaining L3 offload xstats
Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.
The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:
- attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
- {0,1} as u8
- attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
- {0,1} as u8
Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 00:31:21 +08:00
if ( off_filter_mask &
IFLA_STATS_FILTER_BIT ( IFLA_OFFLOAD_XSTATS_HW_S_INFO ) )
nla_size + = rtnl_offload_xstats_get_size_hw_s_info ( dev ) ;
if ( off_filter_mask &
IFLA_STATS_FILTER_BIT ( IFLA_OFFLOAD_XSTATS_L3_STATS ) ) {
size = rtnl_offload_xstats_get_size_stats ( dev , t_l3 ) ;
nla_size + = nla_total_size_64bit ( size ) ;
}
2016-09-16 21:05:37 +08:00
if ( nla_size ! = 0 )
nla_size + = nla_total_size ( 0 ) ;
return nla_size ;
}
2022-03-03 00:31:17 +08:00
struct rtnl_stats_dump_filters {
/* mask[0] filters outer attributes. Then individual nests have their
* filtering mask at the index of the nested attribute .
*/
u32 mask [ IFLA_STATS_MAX + 1 ] ;
} ;
2016-04-20 23:43:43 +08:00
static int rtnl_fill_statsinfo ( struct sk_buff * skb , struct net_device * dev ,
int type , u32 pid , u32 seq , u32 change ,
2022-03-03 00:31:17 +08:00
unsigned int flags ,
const struct rtnl_stats_dump_filters * filters ,
2022-03-03 00:31:18 +08:00
int * idxattr , int * prividx ,
struct netlink_ext_ack * extack )
2016-04-20 23:43:43 +08:00
{
2022-03-03 00:31:17 +08:00
unsigned int filter_mask = filters - > mask [ 0 ] ;
2016-04-20 23:43:43 +08:00
struct if_stats_msg * ifsm ;
struct nlmsghdr * nlh ;
struct nlattr * attr ;
2016-04-30 16:25:26 +08:00
int s_prividx = * prividx ;
2016-09-16 21:05:37 +08:00
int err ;
2016-04-20 23:43:43 +08:00
ASSERT_RTNL ( ) ;
nlh = nlmsg_put ( skb , pid , seq , type , sizeof ( * ifsm ) , flags ) ;
if ( ! nlh )
return - EMSGSIZE ;
ifsm = nlmsg_data ( nlh ) ;
2017-10-03 18:20:48 +08:00
ifsm - > family = PF_UNSPEC ;
ifsm - > pad1 = 0 ;
ifsm - > pad2 = 0 ;
2016-04-20 23:43:43 +08:00
ifsm - > ifindex = dev - > ifindex ;
ifsm - > filter_mask = filter_mask ;
2016-04-30 16:25:26 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_64 , * idxattr ) ) {
2016-04-20 23:43:43 +08:00
struct rtnl_link_stats64 * sp ;
2016-04-22 00:58:25 +08:00
attr = nla_reserve_64bit ( skb , IFLA_STATS_LINK_64 ,
sizeof ( struct rtnl_link_stats64 ) ,
IFLA_STATS_UNSPEC ) ;
2022-03-03 00:31:19 +08:00
if ( ! attr ) {
err = - EMSGSIZE ;
2016-04-20 23:43:43 +08:00
goto nla_put_failure ;
2022-03-03 00:31:19 +08:00
}
2016-04-20 23:43:43 +08:00
sp = nla_data ( attr ) ;
dev_get_stats ( dev , sp ) ;
}
2016-04-30 16:25:27 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_XSTATS , * idxattr ) ) {
const struct rtnl_link_ops * ops = dev - > rtnl_link_ops ;
if ( ops & & ops - > fill_linkxstats ) {
* idxattr = IFLA_STATS_LINK_XSTATS ;
2019-04-26 17:13:06 +08:00
attr = nla_nest_start_noflag ( skb ,
IFLA_STATS_LINK_XSTATS ) ;
2022-03-03 00:31:19 +08:00
if ( ! attr ) {
err = - EMSGSIZE ;
2016-04-30 16:25:27 +08:00
goto nla_put_failure ;
2022-03-03 00:31:19 +08:00
}
2016-04-30 16:25:27 +08:00
2016-06-28 22:57:05 +08:00
err = ops - > fill_linkxstats ( skb , dev , prividx , * idxattr ) ;
nla_nest_end ( skb , attr ) ;
if ( err )
goto nla_put_failure ;
* idxattr = 0 ;
}
}
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_XSTATS_SLAVE ,
* idxattr ) ) {
const struct rtnl_link_ops * ops = NULL ;
const struct net_device * master ;
master = netdev_master_upper_dev_get ( dev ) ;
if ( master )
ops = master - > rtnl_link_ops ;
if ( ops & & ops - > fill_linkxstats ) {
* idxattr = IFLA_STATS_LINK_XSTATS_SLAVE ;
2019-04-26 17:13:06 +08:00
attr = nla_nest_start_noflag ( skb ,
IFLA_STATS_LINK_XSTATS_SLAVE ) ;
2022-03-03 00:31:19 +08:00
if ( ! attr ) {
err = - EMSGSIZE ;
2016-06-28 22:57:05 +08:00
goto nla_put_failure ;
2022-03-03 00:31:19 +08:00
}
2016-06-28 22:57:05 +08:00
err = ops - > fill_linkxstats ( skb , dev , prividx , * idxattr ) ;
2016-04-30 16:25:27 +08:00
nla_nest_end ( skb , attr ) ;
if ( err )
goto nla_put_failure ;
* idxattr = 0 ;
}
}
2016-09-16 21:05:37 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_OFFLOAD_XSTATS ,
* idxattr ) ) {
2022-03-03 00:31:17 +08:00
u32 off_filter_mask ;
off_filter_mask = filters - > mask [ IFLA_STATS_LINK_OFFLOAD_XSTATS ] ;
2016-09-16 21:05:37 +08:00
* idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS ;
2019-04-26 17:13:06 +08:00
attr = nla_nest_start_noflag ( skb ,
IFLA_STATS_LINK_OFFLOAD_XSTATS ) ;
2022-03-03 00:31:19 +08:00
if ( ! attr ) {
err = - EMSGSIZE ;
2016-09-16 21:05:37 +08:00
goto nla_put_failure ;
2022-03-03 00:31:19 +08:00
}
2016-09-16 21:05:37 +08:00
2022-03-03 00:31:17 +08:00
err = rtnl_offload_xstats_fill ( skb , dev , prividx ,
2022-03-03 00:31:18 +08:00
off_filter_mask , extack ) ;
2016-09-16 21:05:37 +08:00
if ( err = = - ENODATA )
nla_nest_cancel ( skb , attr ) ;
else
nla_nest_end ( skb , attr ) ;
if ( err & & err ! = - ENODATA )
goto nla_put_failure ;
* idxattr = 0 ;
}
2017-01-16 22:16:36 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_AF_SPEC , * idxattr ) ) {
struct rtnl_af_ops * af_ops ;
* idxattr = IFLA_STATS_AF_SPEC ;
2019-04-26 17:13:06 +08:00
attr = nla_nest_start_noflag ( skb , IFLA_STATS_AF_SPEC ) ;
2022-03-03 00:31:19 +08:00
if ( ! attr ) {
err = - EMSGSIZE ;
2017-01-16 22:16:36 +08:00
goto nla_put_failure ;
2022-03-03 00:31:19 +08:00
}
2017-01-16 22:16:36 +08:00
2017-10-16 21:44:36 +08:00
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( af_ops , & rtnl_af_ops , list ) {
2017-01-16 22:16:36 +08:00
if ( af_ops - > fill_stats_af ) {
struct nlattr * af ;
2019-04-26 17:13:06 +08:00
af = nla_nest_start_noflag ( skb ,
af_ops - > family ) ;
2017-10-16 21:44:36 +08:00
if ( ! af ) {
rcu_read_unlock ( ) ;
2022-03-06 02:13:46 +08:00
err = - EMSGSIZE ;
2017-01-16 22:16:36 +08:00
goto nla_put_failure ;
2017-10-16 21:44:36 +08:00
}
2017-01-16 22:16:36 +08:00
err = af_ops - > fill_stats_af ( skb , dev ) ;
2017-10-16 21:44:36 +08:00
if ( err = = - ENODATA ) {
2017-01-16 22:16:36 +08:00
nla_nest_cancel ( skb , af ) ;
2017-10-16 21:44:36 +08:00
} else if ( err < 0 ) {
rcu_read_unlock ( ) ;
2017-01-16 22:16:36 +08:00
goto nla_put_failure ;
2017-10-16 21:44:36 +08:00
}
2017-01-16 22:16:36 +08:00
nla_nest_end ( skb , af ) ;
}
}
2017-10-16 21:44:36 +08:00
rcu_read_unlock ( ) ;
2017-01-16 22:16:36 +08:00
nla_nest_end ( skb , attr ) ;
* idxattr = 0 ;
}
2016-04-20 23:43:43 +08:00
nlmsg_end ( skb , nlh ) ;
return 0 ;
nla_put_failure :
2016-04-30 16:25:26 +08:00
/* not a multi message or no progress mean a real error */
if ( ! ( flags & NLM_F_MULTI ) | | s_prividx = = * prividx )
nlmsg_cancel ( skb , nlh ) ;
else
nlmsg_end ( skb , nlh ) ;
2016-04-20 23:43:43 +08:00
2022-03-03 00:31:19 +08:00
return err ;
2016-04-20 23:43:43 +08:00
}
static size_t if_nlmsg_stats_size ( const struct net_device * dev ,
2022-03-03 00:31:17 +08:00
const struct rtnl_stats_dump_filters * filters )
2016-04-20 23:43:43 +08:00
{
2021-10-06 05:04:17 +08:00
size_t size = NLMSG_ALIGN ( sizeof ( struct if_stats_msg ) ) ;
2022-03-03 00:31:17 +08:00
unsigned int filter_mask = filters - > mask [ 0 ] ;
2016-04-20 23:43:43 +08:00
2016-04-30 16:25:26 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_64 , 0 ) )
2016-04-20 23:43:43 +08:00
size + = nla_total_size_64bit ( sizeof ( struct rtnl_link_stats64 ) ) ;
2016-04-30 16:25:27 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_XSTATS , 0 ) ) {
const struct rtnl_link_ops * ops = dev - > rtnl_link_ops ;
2016-06-28 22:57:05 +08:00
int attr = IFLA_STATS_LINK_XSTATS ;
2016-04-30 16:25:27 +08:00
if ( ops & & ops - > get_linkxstats_size ) {
2016-06-28 22:57:05 +08:00
size + = nla_total_size ( ops - > get_linkxstats_size ( dev ,
attr ) ) ;
2016-04-30 16:25:27 +08:00
/* for IFLA_STATS_LINK_XSTATS */
size + = nla_total_size ( 0 ) ;
}
}
2016-06-28 22:57:05 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_XSTATS_SLAVE , 0 ) ) {
struct net_device * _dev = ( struct net_device * ) dev ;
const struct rtnl_link_ops * ops = NULL ;
const struct net_device * master ;
/* netdev_master_upper_dev_get can't take const */
master = netdev_master_upper_dev_get ( _dev ) ;
if ( master )
ops = master - > rtnl_link_ops ;
if ( ops & & ops - > get_linkxstats_size ) {
int attr = IFLA_STATS_LINK_XSTATS_SLAVE ;
size + = nla_total_size ( ops - > get_linkxstats_size ( dev ,
attr ) ) ;
/* for IFLA_STATS_LINK_XSTATS_SLAVE */
size + = nla_total_size ( 0 ) ;
}
}
2022-03-03 00:31:17 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_LINK_OFFLOAD_XSTATS , 0 ) ) {
u32 off_filter_mask ;
off_filter_mask = filters - > mask [ IFLA_STATS_LINK_OFFLOAD_XSTATS ] ;
size + = rtnl_offload_xstats_get_size ( dev , off_filter_mask ) ;
}
2016-09-16 21:05:37 +08:00
2017-01-16 22:16:36 +08:00
if ( stats_attr_valid ( filter_mask , IFLA_STATS_AF_SPEC , 0 ) ) {
struct rtnl_af_ops * af_ops ;
/* for IFLA_STATS_AF_SPEC */
size + = nla_total_size ( 0 ) ;
2017-10-16 21:44:36 +08:00
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( af_ops , & rtnl_af_ops , list ) {
2017-01-16 22:16:36 +08:00
if ( af_ops - > get_stats_af_size ) {
size + = nla_total_size (
af_ops - > get_stats_af_size ( dev ) ) ;
/* for AF_* */
size + = nla_total_size ( 0 ) ;
}
}
2017-10-16 21:44:36 +08:00
rcu_read_unlock ( ) ;
2017-01-16 22:16:36 +08:00
}
2016-04-20 23:43:43 +08:00
return size ;
}
2022-03-03 00:31:17 +08:00
# define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1)
static const struct nla_policy
rtnl_stats_get_policy_filters [ IFLA_STATS_MAX + 1 ] = {
[ IFLA_STATS_LINK_OFFLOAD_XSTATS ] =
NLA_POLICY_MASK ( NLA_U32 , RTNL_STATS_OFFLOAD_XSTATS_VALID ) ,
} ;
static const struct nla_policy
rtnl_stats_get_policy [ IFLA_STATS_GETSET_MAX + 1 ] = {
[ IFLA_STATS_GET_FILTERS ] =
NLA_POLICY_NESTED ( rtnl_stats_get_policy_filters ) ,
} ;
2022-03-03 00:31:22 +08:00
static const struct nla_policy
ifla_stats_set_policy [ IFLA_STATS_GETSET_MAX + 1 ] = {
2022-03-03 00:31:23 +08:00
[ IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS ] = NLA_POLICY_MAX ( NLA_U8 , 1 ) ,
2022-03-03 00:31:22 +08:00
} ;
2022-03-03 00:31:17 +08:00
static int rtnl_stats_get_parse_filters ( struct nlattr * ifla_filters ,
struct rtnl_stats_dump_filters * filters ,
struct netlink_ext_ack * extack )
{
struct nlattr * tb [ IFLA_STATS_MAX + 1 ] ;
int err ;
int at ;
err = nla_parse_nested ( tb , IFLA_STATS_MAX , ifla_filters ,
rtnl_stats_get_policy_filters , extack ) ;
if ( err < 0 )
return err ;
for ( at = 1 ; at < = IFLA_STATS_MAX ; at + + ) {
if ( tb [ at ] ) {
if ( ! ( filters - > mask [ 0 ] & IFLA_STATS_FILTER_BIT ( at ) ) ) {
NL_SET_ERR_MSG ( extack , " Filtered attribute not enabled in filter_mask " ) ;
return - EINVAL ;
}
filters - > mask [ at ] = nla_get_u32 ( tb [ at ] ) ;
}
}
return 0 ;
}
static int rtnl_stats_get_parse ( const struct nlmsghdr * nlh ,
u32 filter_mask ,
struct rtnl_stats_dump_filters * filters ,
struct netlink_ext_ack * extack )
{
struct nlattr * tb [ IFLA_STATS_GETSET_MAX + 1 ] ;
int err ;
int i ;
filters - > mask [ 0 ] = filter_mask ;
for ( i = 1 ; i < ARRAY_SIZE ( filters - > mask ) ; i + + )
filters - > mask [ i ] = - 1U ;
err = nlmsg_parse ( nlh , sizeof ( struct if_stats_msg ) , tb ,
IFLA_STATS_GETSET_MAX , rtnl_stats_get_policy , extack ) ;
if ( err < 0 )
return err ;
if ( tb [ IFLA_STATS_GET_FILTERS ] ) {
err = rtnl_stats_get_parse_filters ( tb [ IFLA_STATS_GET_FILTERS ] ,
filters , extack ) ;
if ( err )
return err ;
}
return 0 ;
}
2019-01-19 02:46:14 +08:00
static int rtnl_valid_stats_req ( const struct nlmsghdr * nlh , bool strict_check ,
bool is_dump , struct netlink_ext_ack * extack )
{
struct if_stats_msg * ifsm ;
2019-04-15 02:02:05 +08:00
if ( nlh - > nlmsg_len < nlmsg_msg_size ( sizeof ( * ifsm ) ) ) {
2019-01-19 02:46:14 +08:00
NL_SET_ERR_MSG ( extack , " Invalid header for stats dump " ) ;
return - EINVAL ;
}
if ( ! strict_check )
return 0 ;
ifsm = nlmsg_data ( nlh ) ;
/* only requests using strict checks can pass data to influence
* the dump . The legacy exception is filter_mask .
*/
if ( ifsm - > pad1 | | ifsm - > pad2 | | ( is_dump & & ifsm - > ifindex ) ) {
NL_SET_ERR_MSG ( extack , " Invalid values in header for stats dump request " ) ;
return - EINVAL ;
}
2019-01-19 02:46:15 +08:00
if ( ifsm - > filter_mask > = IFLA_STATS_FILTER_BIT ( IFLA_STATS_MAX + 1 ) ) {
NL_SET_ERR_MSG ( extack , " Invalid stats requested through filter mask " ) ;
return - EINVAL ;
}
2019-01-19 02:46:14 +08:00
return 0 ;
}
2017-04-17 00:48:24 +08:00
static int rtnl_stats_get ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2016-04-20 23:43:43 +08:00
{
2022-03-03 00:31:17 +08:00
struct rtnl_stats_dump_filters filters ;
2016-04-20 23:43:43 +08:00
struct net * net = sock_net ( skb - > sk ) ;
struct net_device * dev = NULL ;
2016-04-30 16:25:26 +08:00
int idxattr = 0 , prividx = 0 ;
struct if_stats_msg * ifsm ;
2016-04-20 23:43:43 +08:00
struct sk_buff * nskb ;
int err ;
2019-01-19 02:46:14 +08:00
err = rtnl_valid_stats_req ( nlh , netlink_strict_get_check ( skb ) ,
false , extack ) ;
if ( err )
return err ;
2016-12-29 00:52:15 +08:00
2016-04-20 23:43:43 +08:00
ifsm = nlmsg_data ( nlh ) ;
if ( ifsm - > ifindex > 0 )
dev = __dev_get_by_index ( net , ifsm - > ifindex ) ;
else
return - EINVAL ;
if ( ! dev )
return - ENODEV ;
2022-03-03 00:31:17 +08:00
if ( ! ifsm - > filter_mask ) {
2022-02-16 22:31:36 +08:00
NL_SET_ERR_MSG ( extack , " Filter mask must be set for stats get " ) ;
2016-04-20 23:43:43 +08:00
return - EINVAL ;
2022-02-16 22:31:36 +08:00
}
2016-04-20 23:43:43 +08:00
2022-03-03 00:31:17 +08:00
err = rtnl_stats_get_parse ( nlh , ifsm - > filter_mask , & filters , extack ) ;
if ( err )
return err ;
nskb = nlmsg_new ( if_nlmsg_stats_size ( dev , & filters ) , GFP_KERNEL ) ;
2016-04-20 23:43:43 +08:00
if ( ! nskb )
return - ENOBUFS ;
err = rtnl_fill_statsinfo ( nskb , dev , RTM_NEWSTATS ,
NETLINK_CB ( skb ) . portid , nlh - > nlmsg_seq , 0 ,
2022-03-03 00:31:18 +08:00
0 , & filters , & idxattr , & prividx , extack ) ;
2016-04-20 23:43:43 +08:00
if ( err < 0 ) {
/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
WARN_ON ( err = = - EMSGSIZE ) ;
kfree_skb ( nskb ) ;
} else {
err = rtnl_unicast ( nskb , net , NETLINK_CB ( skb ) . portid ) ;
}
return err ;
}
static int rtnl_stats_dump ( struct sk_buff * skb , struct netlink_callback * cb )
{
2018-10-08 11:16:32 +08:00
struct netlink_ext_ack * extack = cb - > extack ;
2016-04-30 16:25:26 +08:00
int h , s_h , err , s_idx , s_idxattr , s_prividx ;
2022-03-03 00:31:17 +08:00
struct rtnl_stats_dump_filters filters ;
2016-04-20 23:43:43 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2016-04-30 16:25:26 +08:00
unsigned int flags = NLM_F_MULTI ;
2016-04-20 23:43:43 +08:00
struct if_stats_msg * ifsm ;
struct hlist_head * head ;
2016-04-30 16:25:26 +08:00
struct net_device * dev ;
int idx = 0 ;
2016-04-20 23:43:43 +08:00
s_h = cb - > args [ 0 ] ;
s_idx = cb - > args [ 1 ] ;
2016-04-30 16:25:26 +08:00
s_idxattr = cb - > args [ 2 ] ;
s_prividx = cb - > args [ 3 ] ;
2016-04-20 23:43:43 +08:00
cb - > seq = net - > dev_base_seq ;
2019-01-19 02:46:14 +08:00
err = rtnl_valid_stats_req ( cb - > nlh , cb - > strict_check , true , extack ) ;
if ( err )
return err ;
2016-12-29 00:52:15 +08:00
2016-04-20 23:43:43 +08:00
ifsm = nlmsg_data ( cb - > nlh ) ;
2022-03-03 00:31:17 +08:00
if ( ! ifsm - > filter_mask ) {
2018-10-08 11:16:32 +08:00
NL_SET_ERR_MSG ( extack , " Filter mask must be set for stats dump " ) ;
2016-04-20 23:43:43 +08:00
return - EINVAL ;
2018-10-08 11:16:32 +08:00
}
2016-04-20 23:43:43 +08:00
2022-03-03 00:31:17 +08:00
err = rtnl_stats_get_parse ( cb - > nlh , ifsm - > filter_mask , & filters ,
extack ) ;
if ( err )
return err ;
2016-04-20 23:43:43 +08:00
for ( h = s_h ; h < NETDEV_HASHENTRIES ; h + + , s_idx = 0 ) {
idx = 0 ;
head = & net - > dev_index_head [ h ] ;
hlist_for_each_entry ( dev , head , index_hlist ) {
if ( idx < s_idx )
goto cont ;
err = rtnl_fill_statsinfo ( skb , dev , RTM_NEWSTATS ,
NETLINK_CB ( cb - > skb ) . portid ,
cb - > nlh - > nlmsg_seq , 0 ,
2022-03-03 00:31:17 +08:00
flags , & filters ,
2022-03-03 00:31:18 +08:00
& s_idxattr , & s_prividx ,
extack ) ;
2016-04-20 23:43:43 +08:00
/* If we ran out of room on the first message,
* we ' re in trouble
*/
WARN_ON ( ( err = = - EMSGSIZE ) & & ( skb - > len = = 0 ) ) ;
if ( err < 0 )
goto out ;
2016-04-30 16:25:26 +08:00
s_prividx = 0 ;
s_idxattr = 0 ;
2016-04-20 23:43:43 +08:00
nl_dump_check_consistent ( cb , nlmsg_hdr ( skb ) ) ;
cont :
idx + + ;
}
}
out :
2016-04-30 16:25:26 +08:00
cb - > args [ 3 ] = s_prividx ;
cb - > args [ 2 ] = s_idxattr ;
2016-04-20 23:43:43 +08:00
cb - > args [ 1 ] = idx ;
cb - > args [ 0 ] = h ;
return skb - > len ;
}
2022-03-03 00:31:23 +08:00
void rtnl_offload_xstats_notify ( struct net_device * dev )
{
struct rtnl_stats_dump_filters response_filters = { } ;
struct net * net = dev_net ( dev ) ;
int idxattr = 0 , prividx = 0 ;
struct sk_buff * skb ;
int err = - ENOBUFS ;
ASSERT_RTNL ( ) ;
response_filters . mask [ 0 ] | =
IFLA_STATS_FILTER_BIT ( IFLA_STATS_LINK_OFFLOAD_XSTATS ) ;
response_filters . mask [ IFLA_STATS_LINK_OFFLOAD_XSTATS ] | =
IFLA_STATS_FILTER_BIT ( IFLA_OFFLOAD_XSTATS_HW_S_INFO ) ;
skb = nlmsg_new ( if_nlmsg_stats_size ( dev , & response_filters ) ,
GFP_KERNEL ) ;
if ( ! skb )
goto errout ;
err = rtnl_fill_statsinfo ( skb , dev , RTM_NEWSTATS , 0 , 0 , 0 , 0 ,
& response_filters , & idxattr , & prividx , NULL ) ;
if ( err < 0 ) {
kfree_skb ( skb ) ;
goto errout ;
}
rtnl_notify ( skb , net , 0 , RTNLGRP_STATS , NULL , GFP_KERNEL ) ;
return ;
errout :
rtnl_set_sk_err ( net , RTNLGRP_STATS , err ) ;
}
EXPORT_SYMBOL ( rtnl_offload_xstats_notify ) ;
2022-03-03 00:31:22 +08:00
static int rtnl_stats_set ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
{
2022-03-03 00:31:23 +08:00
enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3 ;
2022-03-03 00:31:22 +08:00
struct rtnl_stats_dump_filters response_filters = { } ;
struct nlattr * tb [ IFLA_STATS_GETSET_MAX + 1 ] ;
struct net * net = sock_net ( skb - > sk ) ;
struct net_device * dev = NULL ;
struct if_stats_msg * ifsm ;
2022-03-03 00:31:23 +08:00
bool notify = false ;
2022-03-03 00:31:22 +08:00
int err ;
err = rtnl_valid_stats_req ( nlh , netlink_strict_get_check ( skb ) ,
false , extack ) ;
if ( err )
return err ;
ifsm = nlmsg_data ( nlh ) ;
if ( ifsm - > family ! = AF_UNSPEC ) {
NL_SET_ERR_MSG ( extack , " Address family should be AF_UNSPEC " ) ;
return - EINVAL ;
}
if ( ifsm - > ifindex > 0 )
dev = __dev_get_by_index ( net , ifsm - > ifindex ) ;
else
return - EINVAL ;
if ( ! dev )
return - ENODEV ;
if ( ifsm - > filter_mask ) {
NL_SET_ERR_MSG ( extack , " Filter mask must be 0 for stats set " ) ;
return - EINVAL ;
}
err = nlmsg_parse ( nlh , sizeof ( * ifsm ) , tb , IFLA_STATS_GETSET_MAX ,
ifla_stats_set_policy , extack ) ;
if ( err < 0 )
return err ;
2022-03-03 00:31:23 +08:00
if ( tb [ IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS ] ) {
u8 req = nla_get_u8 ( tb [ IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS ] ) ;
2022-03-03 00:31:22 +08:00
2022-03-03 00:31:23 +08:00
if ( req )
err = netdev_offload_xstats_enable ( dev , t_l3 , extack ) ;
else
err = netdev_offload_xstats_disable ( dev , t_l3 ) ;
if ( ! err )
notify = true ;
else if ( err ! = - EALREADY )
return err ;
response_filters . mask [ 0 ] | =
IFLA_STATS_FILTER_BIT ( IFLA_STATS_LINK_OFFLOAD_XSTATS ) ;
response_filters . mask [ IFLA_STATS_LINK_OFFLOAD_XSTATS ] | =
IFLA_STATS_FILTER_BIT ( IFLA_OFFLOAD_XSTATS_HW_S_INFO ) ;
2022-03-03 00:31:22 +08:00
}
2022-03-03 00:31:23 +08:00
if ( notify )
rtnl_offload_xstats_notify ( dev ) ;
return 0 ;
2022-03-03 00:31:22 +08:00
}
2005-04-17 06:20:36 +08:00
/* Process one rtnetlink message. */
2017-04-12 20:34:04 +08:00
static int rtnetlink_rcv_msg ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2005-04-17 06:20:36 +08:00
{
2008-03-26 01:26:21 +08:00
struct net * net = sock_net ( skb - > sk ) ;
2017-12-03 04:44:05 +08:00
struct rtnl_link * link ;
2022-04-13 18:51:51 +08:00
enum rtnl_kinds kind ;
2017-12-03 04:44:06 +08:00
struct module * owner ;
2017-08-10 02:41:51 +08:00
int err = - EOPNOTSUPP ;
2007-03-23 02:48:11 +08:00
rtnl_doit_func doit ;
2017-08-10 02:41:52 +08:00
unsigned int flags ;
2005-04-17 06:20:36 +08:00
int family ;
int type ;
type = nlh - > nlmsg_type ;
if ( type > RTM_MAX )
2007-04-06 05:35:52 +08:00
return - EOPNOTSUPP ;
2005-04-17 06:20:36 +08:00
type - = RTM_BASE ;
/* All the messages must have at least 1 byte length */
2013-03-27 14:47:04 +08:00
if ( nlmsg_len ( nlh ) < sizeof ( struct rtgenmsg ) )
2005-04-17 06:20:36 +08:00
return 0 ;
2013-03-27 14:47:04 +08:00
family = ( ( struct rtgenmsg * ) nlmsg_data ( nlh ) ) - > rtgen_family ;
2022-04-13 18:51:52 +08:00
kind = rtnl_msgtype_kind ( type ) ;
2005-04-17 06:20:36 +08:00
2022-04-13 18:51:51 +08:00
if ( kind ! = RTNL_KIND_GET & & ! netlink_net_capable ( skb , CAP_NET_ADMIN ) )
2007-03-23 14:30:12 +08:00
return - EPERM ;
2005-04-17 06:20:36 +08:00
2017-08-10 02:41:51 +08:00
rcu_read_lock ( ) ;
2022-04-13 18:51:51 +08:00
if ( kind = = RTNL_KIND_GET & & ( nlh - > nlmsg_flags & NLM_F_DUMP ) ) {
2007-11-20 14:26:51 +08:00
struct sock * rtnl ;
2007-03-23 02:48:11 +08:00
rtnl_dumpit_func dumpit ;
2020-10-21 10:00:53 +08:00
u32 min_dump_alloc = 0 ;
2005-04-17 06:20:36 +08:00
2017-12-03 04:44:05 +08:00
link = rtnl_get_link ( family , type ) ;
if ( ! link | | ! link - > dumpit ) {
2017-08-10 02:41:51 +08:00
family = PF_UNSPEC ;
2017-12-03 04:44:05 +08:00
link = rtnl_get_link ( family , type ) ;
if ( ! link | | ! link - > dumpit )
2017-08-10 02:41:51 +08:00
goto err_unlock ;
}
2017-12-03 04:44:06 +08:00
owner = link - > owner ;
2017-12-03 04:44:05 +08:00
dumpit = link - > dumpit ;
2017-08-10 02:41:47 +08:00
2017-08-10 22:52:58 +08:00
if ( type = = RTM_GETLINK - RTM_BASE )
2017-08-10 02:41:47 +08:00
min_dump_alloc = rtnl_calcit ( skb , nlh ) ;
2005-11-10 09:25:55 +08:00
2017-12-03 04:44:06 +08:00
err = 0 ;
/* need to do this before rcu_read_unlock() */
if ( ! try_module_get ( owner ) )
err = - EPROTONOSUPPORT ;
2017-08-10 02:41:51 +08:00
rcu_read_unlock ( ) ;
2007-11-20 14:26:51 +08:00
rtnl = net - > rtnl ;
2017-12-03 04:44:06 +08:00
if ( err = = 0 ) {
2012-02-24 22:30:15 +08:00
struct netlink_dump_control c = {
. dump = dumpit ,
. min_dump_alloc = min_dump_alloc ,
2017-12-03 04:44:06 +08:00
. module = owner ,
2012-02-24 22:30:15 +08:00
} ;
err = netlink_dump_start ( rtnl , skb , nlh , & c ) ;
2017-12-03 04:44:06 +08:00
/* netlink_dump_start() will keep a reference on
* module if dump is still in progress .
*/
module_put ( owner ) ;
2012-02-24 22:30:15 +08:00
}
2011-05-25 15:34:04 +08:00
return err ;
2005-04-17 06:20:36 +08:00
}
2017-12-03 04:44:05 +08:00
link = rtnl_get_link ( family , type ) ;
if ( ! link | | ! link - > doit ) {
2017-08-10 22:53:01 +08:00
family = PF_UNSPEC ;
2017-12-03 04:44:05 +08:00
link = rtnl_get_link ( PF_UNSPEC , type ) ;
if ( ! link | | ! link - > doit )
goto out_unlock ;
2017-08-10 22:53:01 +08:00
}
2017-12-03 04:44:06 +08:00
owner = link - > owner ;
if ( ! try_module_get ( owner ) ) {
err = - EPROTONOSUPPORT ;
goto out_unlock ;
}
2017-12-03 04:44:05 +08:00
flags = link - > flags ;
2022-04-13 18:51:55 +08:00
if ( kind = = RTNL_KIND_DEL & & ( nlh - > nlmsg_flags & NLM_F_BULK ) & &
! ( flags & RTNL_FLAG_BULK_DEL_SUPPORTED ) ) {
NL_SET_ERR_MSG ( extack , " Bulk delete is not supported " ) ;
goto err_unlock ;
}
2017-08-10 02:41:52 +08:00
if ( flags & RTNL_FLAG_DOIT_UNLOCKED ) {
2017-12-03 04:44:05 +08:00
doit = link - > doit ;
2017-08-10 02:41:52 +08:00
rcu_read_unlock ( ) ;
if ( doit )
err = doit ( skb , nlh , extack ) ;
2017-12-03 04:44:06 +08:00
module_put ( owner ) ;
2017-08-10 02:41:52 +08:00
return err ;
}
2017-08-10 02:41:51 +08:00
rcu_read_unlock ( ) ;
2005-04-17 06:20:36 +08:00
2017-08-10 02:41:51 +08:00
rtnl_lock ( ) ;
2017-12-03 04:44:05 +08:00
link = rtnl_get_link ( family , type ) ;
if ( link & & link - > doit )
err = link - > doit ( skb , nlh , extack ) ;
2017-08-10 02:41:50 +08:00
rtnl_unlock ( ) ;
2017-12-03 04:44:05 +08:00
2017-12-03 04:44:06 +08:00
module_put ( owner ) ;
2017-12-03 04:44:05 +08:00
return err ;
out_unlock :
rcu_read_unlock ( ) ;
2017-08-10 02:41:50 +08:00
return err ;
err_unlock :
2017-08-10 02:41:51 +08:00
rcu_read_unlock ( ) ;
2017-08-10 02:41:50 +08:00
return - EOPNOTSUPP ;
2005-04-17 06:20:36 +08:00
}
2007-10-11 12:15:29 +08:00
static void rtnetlink_rcv ( struct sk_buff * skb )
2005-04-17 06:20:36 +08:00
{
2007-10-11 12:15:29 +08:00
netlink_rcv_skb ( skb , & rtnetlink_rcv_msg ) ;
2005-04-17 06:20:36 +08:00
}
2017-06-21 04:54:16 +08:00
static int rtnetlink_bind ( struct net * net , int group )
{
switch ( group ) {
case RTNLGRP_IPV4_MROUTE_R :
case RTNLGRP_IPV6_MROUTE_R :
if ( ! ns_capable ( net - > user_ns , CAP_NET_ADMIN ) )
return - EPERM ;
break ;
}
return 0 ;
}
2005-04-17 06:20:36 +08:00
static int rtnetlink_event ( struct notifier_block * this , unsigned long event , void * ptr )
{
2013-05-28 09:30:21 +08:00
struct net_device * dev = netdev_notifier_info_to_dev ( ptr ) ;
2007-09-12 19:02:17 +08:00
2005-04-17 06:20:36 +08:00
switch ( event ) {
2017-04-04 21:23:41 +08:00
case NETDEV_REBOOT :
2017-10-15 18:13:41 +08:00
case NETDEV_CHANGEMTU :
2017-07-20 01:22:40 +08:00
case NETDEV_CHANGEADDR :
2017-04-04 21:23:41 +08:00
case NETDEV_CHANGENAME :
case NETDEV_FEAT_CHANGE :
case NETDEV_BONDING_FAILOVER :
2017-10-15 18:13:43 +08:00
case NETDEV_POST_TYPE_CHANGE :
2017-04-04 21:23:41 +08:00
case NETDEV_NOTIFY_PEERS :
2017-10-15 18:13:44 +08:00
case NETDEV_CHANGEUPPER :
2017-04-04 21:23:41 +08:00
case NETDEV_RESEND_IGMP :
case NETDEV_CHANGEINFODATA :
2017-10-24 13:54:19 +08:00
case NETDEV_CHANGELOWERSTATE :
2017-10-15 18:13:42 +08:00
case NETDEV_CHANGE_TX_QUEUE_LEN :
2017-05-27 22:14:34 +08:00
rtmsg_ifinfo_event ( RTM_NEWLINK , dev , 0 , rtnl_get_event ( event ) ,
2018-01-25 22:01:39 +08:00
GFP_KERNEL , NULL , 0 ) ;
2005-04-17 06:20:36 +08:00
break ;
default :
break ;
}
return NOTIFY_DONE ;
}
static struct notifier_block rtnetlink_dev_notifier = {
. notifier_call = rtnetlink_event ,
} ;
2007-11-20 14:26:51 +08:00
2010-01-17 11:35:32 +08:00
static int __net_init rtnetlink_net_init ( struct net * net )
2007-11-20 14:26:51 +08:00
{
struct sock * sk ;
2012-06-29 14:15:21 +08:00
struct netlink_kernel_cfg cfg = {
. groups = RTNLGRP_MAX ,
. input = rtnetlink_rcv ,
. cb_mutex = & rtnl_mutex ,
2012-09-08 10:53:53 +08:00
. flags = NL_CFG_F_NONROOT_RECV ,
2017-06-21 04:54:16 +08:00
. bind = rtnetlink_bind ,
2012-06-29 14:15:21 +08:00
} ;
2012-09-08 10:53:54 +08:00
sk = netlink_kernel_create ( net , NETLINK_ROUTE , & cfg ) ;
2007-11-20 14:26:51 +08:00
if ( ! sk )
return - ENOMEM ;
net - > rtnl = sk ;
return 0 ;
}
2010-01-17 11:35:32 +08:00
static void __net_exit rtnetlink_net_exit ( struct net * net )
2007-11-20 14:26:51 +08:00
{
2008-01-19 15:55:19 +08:00
netlink_kernel_release ( net - > rtnl ) ;
net - > rtnl = NULL ;
2007-11-20 14:26:51 +08:00
}
static struct pernet_operations rtnetlink_net_ops = {
. init = rtnetlink_net_init ,
. exit = rtnetlink_net_exit ,
} ;
2005-04-17 06:20:36 +08:00
void __init rtnetlink_init ( void )
{
2007-11-20 14:26:51 +08:00
if ( register_pernet_subsys ( & rtnetlink_net_ops ) )
2005-04-17 06:20:36 +08:00
panic ( " rtnetlink_init: cannot initialize rtnetlink \n " ) ;
2007-11-20 14:26:51 +08:00
2005-04-17 06:20:36 +08:00
register_netdevice_notifier ( & rtnetlink_dev_notifier ) ;
2007-03-23 02:49:22 +08:00
2011-06-10 09:27:09 +08:00
rtnl_register ( PF_UNSPEC , RTM_GETLINK , rtnl_getlink ,
2017-08-10 02:41:48 +08:00
rtnl_dump_ifinfo , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_SETLINK , rtnl_setlink , NULL , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_NEWLINK , rtnl_newlink , NULL , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_DELLINK , rtnl_dellink , NULL , 0 ) ;
2007-03-23 02:59:42 +08:00
2017-08-10 02:41:48 +08:00
rtnl_register ( PF_UNSPEC , RTM_GETADDR , NULL , rtnl_dump_all , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_GETROUTE , NULL , rtnl_dump_all , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_GETNETCONF , NULL , rtnl_dump_all , 0 ) ;
2012-04-15 14:43:56 +08:00
2019-09-30 17:48:16 +08:00
rtnl_register ( PF_UNSPEC , RTM_NEWLINKPROP , rtnl_newlinkprop , NULL , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_DELLINKPROP , rtnl_dellinkprop , NULL , 0 ) ;
2017-08-10 02:41:48 +08:00
rtnl_register ( PF_BRIDGE , RTM_NEWNEIGH , rtnl_fdb_add , NULL , 0 ) ;
2022-04-13 18:51:57 +08:00
rtnl_register ( PF_BRIDGE , RTM_DELNEIGH , rtnl_fdb_del , NULL ,
RTNL_FLAG_BULK_DEL_SUPPORTED ) ;
2018-12-16 14:35:08 +08:00
rtnl_register ( PF_BRIDGE , RTM_GETNEIGH , rtnl_fdb_get , rtnl_fdb_dump , 0 ) ;
2012-10-24 16:12:57 +08:00
2017-08-10 02:41:48 +08:00
rtnl_register ( PF_BRIDGE , RTM_GETLINK , NULL , rtnl_bridge_getlink , 0 ) ;
rtnl_register ( PF_BRIDGE , RTM_DELLINK , rtnl_bridge_dellink , NULL , 0 ) ;
rtnl_register ( PF_BRIDGE , RTM_SETLINK , rtnl_bridge_setlink , NULL , 0 ) ;
2016-04-20 23:43:43 +08:00
rtnl_register ( PF_UNSPEC , RTM_GETSTATS , rtnl_stats_get , rtnl_stats_dump ,
2017-08-10 02:41:48 +08:00
0 ) ;
2022-03-03 00:31:22 +08:00
rtnl_register ( PF_UNSPEC , RTM_SETSTATS , rtnl_stats_set , NULL , 0 ) ;
2005-04-17 06:20:36 +08:00
}