Merge branch 'bridge-improve-cache-utilization'
Nikolay Aleksandrov says: ==================== bridge: improve cache utilization This is the first set which begins to deal with the bad bridge cache access patterns. The first patch rearranges the bridge and port structs a little so the frequently (and closely) accessed members are in the same cache line. The second patch then moves the garbage collection to a workqueue trying to improve system responsiveness under load (many fdbs) and more importantly removes the need to check if the matched entry is expired in __br_fdb_get which was a major source of false-sharing. The third patch is a preparation for the final one which If properly configured, i.e. ports bound to CPUs (thus updating "updated" locally) then the bridge's HitM goes from 100% to 0%, but even without binding we get a win because previously every lookup that iterated over the hash chain caused false-sharing due to the first cache line being used for both mac/vid and used/updated fields. Some results from tests I've run: (note that these were run in good conditions for the baseline, everything ran on a single NUMA node and there were only 3 fdbs) 1. baseline 100% Load HitM on the fdbs (between everyone who has done lookups and hit one of the 3 hash chains of the communicating src/dst fdbs) Overall 5.06% Load HitM for the bridge, first place in the list 2. patched & ports bound to CPUs 0% Local load HitM, bridge is not even in the c2c report list Also there's 3% consistent improvement in netperf tests. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
152bff3776
|
@ -411,4 +411,5 @@ void br_dev_setup(struct net_device *dev)
|
||||||
br_netfilter_rtable_init(br);
|
br_netfilter_rtable_init(br);
|
||||||
br_stp_timer_init(br);
|
br_stp_timer_init(br);
|
||||||
br_multicast_init(br);
|
br_multicast_init(br);
|
||||||
|
INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup);
|
||||||
}
|
}
|
||||||
|
|
|
@ -154,7 +154,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
|
||||||
if (f->added_by_external_learn)
|
if (f->added_by_external_learn)
|
||||||
fdb_del_external_learn(f);
|
fdb_del_external_learn(f);
|
||||||
|
|
||||||
hlist_del_rcu(&f->hlist);
|
hlist_del_init_rcu(&f->hlist);
|
||||||
fdb_notify(br, f, RTM_DELNEIGH);
|
fdb_notify(br, f, RTM_DELNEIGH);
|
||||||
call_rcu(&f->rcu, fdb_rcu_free);
|
call_rcu(&f->rcu, fdb_rcu_free);
|
||||||
}
|
}
|
||||||
|
@ -290,34 +290,43 @@ out:
|
||||||
spin_unlock_bh(&br->hash_lock);
|
spin_unlock_bh(&br->hash_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void br_fdb_cleanup(unsigned long _data)
|
void br_fdb_cleanup(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct net_bridge *br = (struct net_bridge *)_data;
|
struct net_bridge *br = container_of(work, struct net_bridge,
|
||||||
|
gc_work.work);
|
||||||
unsigned long delay = hold_time(br);
|
unsigned long delay = hold_time(br);
|
||||||
unsigned long next_timer = jiffies + br->ageing_time;
|
unsigned long work_delay = delay;
|
||||||
|
unsigned long now = jiffies;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
spin_lock(&br->hash_lock);
|
|
||||||
for (i = 0; i < BR_HASH_SIZE; i++) {
|
for (i = 0; i < BR_HASH_SIZE; i++) {
|
||||||
struct net_bridge_fdb_entry *f;
|
struct net_bridge_fdb_entry *f;
|
||||||
struct hlist_node *n;
|
struct hlist_node *n;
|
||||||
|
|
||||||
|
if (!br->hash[i].first)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
spin_lock_bh(&br->hash_lock);
|
||||||
hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
|
hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
|
||||||
unsigned long this_timer;
|
unsigned long this_timer;
|
||||||
|
|
||||||
if (f->is_static)
|
if (f->is_static)
|
||||||
continue;
|
continue;
|
||||||
if (f->added_by_external_learn)
|
if (f->added_by_external_learn)
|
||||||
continue;
|
continue;
|
||||||
this_timer = f->updated + delay;
|
this_timer = f->updated + delay;
|
||||||
if (time_before_eq(this_timer, jiffies))
|
if (time_after(this_timer, now))
|
||||||
|
work_delay = min(work_delay, this_timer - now);
|
||||||
|
else
|
||||||
fdb_delete(br, f);
|
fdb_delete(br, f);
|
||||||
else if (time_before(this_timer, next_timer))
|
|
||||||
next_timer = this_timer;
|
|
||||||
}
|
}
|
||||||
|
spin_unlock_bh(&br->hash_lock);
|
||||||
|
cond_resched();
|
||||||
}
|
}
|
||||||
spin_unlock(&br->hash_lock);
|
|
||||||
|
|
||||||
mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
|
/* Cleanup minimum 10 milliseconds apart */
|
||||||
|
work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
|
||||||
|
mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Completely flush all dynamic entries in forwarding database.*/
|
/* Completely flush all dynamic entries in forwarding database.*/
|
||||||
|
@ -382,8 +391,6 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
|
||||||
&br->hash[br_mac_hash(addr, vid)], hlist) {
|
&br->hash[br_mac_hash(addr, vid)], hlist) {
|
||||||
if (ether_addr_equal(fdb->addr.addr, addr) &&
|
if (ether_addr_equal(fdb->addr.addr, addr) &&
|
||||||
fdb->vlan_id == vid) {
|
fdb->vlan_id == vid) {
|
||||||
if (unlikely(has_expired(br, fdb)))
|
|
||||||
break;
|
|
||||||
return fdb;
|
return fdb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -590,7 +597,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
|
||||||
fdb->dst = source;
|
fdb->dst = source;
|
||||||
fdb_modified = true;
|
fdb_modified = true;
|
||||||
}
|
}
|
||||||
fdb->updated = jiffies;
|
if (jiffies != fdb->updated)
|
||||||
|
fdb->updated = jiffies;
|
||||||
if (unlikely(added_by_user))
|
if (unlikely(added_by_user))
|
||||||
fdb->added_by_user = 1;
|
fdb->added_by_user = 1;
|
||||||
if (unlikely(fdb_modified))
|
if (unlikely(fdb_modified))
|
||||||
|
|
|
@ -313,7 +313,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
|
||||||
|
|
||||||
br_vlan_flush(br);
|
br_vlan_flush(br);
|
||||||
br_multicast_dev_del(br);
|
br_multicast_dev_del(br);
|
||||||
del_timer_sync(&br->gc_timer);
|
cancel_delayed_work_sync(&br->gc_work);
|
||||||
|
|
||||||
br_sysfs_delbr(br->dev);
|
br_sysfs_delbr(br->dev);
|
||||||
unregister_netdevice_queue(br->dev, head);
|
unregister_netdevice_queue(br->dev, head);
|
||||||
|
|
|
@ -198,7 +198,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
|
||||||
if (dst->is_local)
|
if (dst->is_local)
|
||||||
return br_pass_frame_up(skb);
|
return br_pass_frame_up(skb);
|
||||||
|
|
||||||
dst->used = jiffies;
|
if (jiffies != dst->used)
|
||||||
|
dst->used = jiffies;
|
||||||
br_forward(dst->dst, skb, local_rcv, false);
|
br_forward(dst->dst, skb, local_rcv, false);
|
||||||
} else {
|
} else {
|
||||||
if (!mcast_hit)
|
if (!mcast_hit)
|
||||||
|
|
|
@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
|
||||||
b.hello_timer_value = br_timer_value(&br->hello_timer);
|
b.hello_timer_value = br_timer_value(&br->hello_timer);
|
||||||
b.tcn_timer_value = br_timer_value(&br->tcn_timer);
|
b.tcn_timer_value = br_timer_value(&br->tcn_timer);
|
||||||
b.topology_change_timer_value = br_timer_value(&br->topology_change_timer);
|
b.topology_change_timer_value = br_timer_value(&br->topology_change_timer);
|
||||||
b.gc_timer_value = br_timer_value(&br->gc_timer);
|
b.gc_timer_value = br_timer_value(&br->gc_work.timer);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
|
if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
|
||||||
|
|
|
@ -1250,7 +1250,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
|
||||||
if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval,
|
if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval,
|
||||||
IFLA_BR_PAD))
|
IFLA_BR_PAD))
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
clockval = br_timer_value(&br->gc_timer);
|
clockval = br_timer_value(&br->gc_work.timer);
|
||||||
if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD))
|
if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD))
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
|
|
||||||
|
|
|
@ -160,19 +160,21 @@ struct net_bridge_vlan_group {
|
||||||
u16 pvid;
|
u16 pvid;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct net_bridge_fdb_entry
|
struct net_bridge_fdb_entry {
|
||||||
{
|
|
||||||
struct hlist_node hlist;
|
struct hlist_node hlist;
|
||||||
struct net_bridge_port *dst;
|
struct net_bridge_port *dst;
|
||||||
|
|
||||||
unsigned long updated;
|
|
||||||
unsigned long used;
|
|
||||||
mac_addr addr;
|
mac_addr addr;
|
||||||
__u16 vlan_id;
|
__u16 vlan_id;
|
||||||
unsigned char is_local:1,
|
unsigned char is_local:1,
|
||||||
is_static:1,
|
is_static:1,
|
||||||
added_by_user:1,
|
added_by_user:1,
|
||||||
added_by_external_learn:1;
|
added_by_external_learn:1;
|
||||||
|
|
||||||
|
/* write-heavy members should not affect lookups */
|
||||||
|
unsigned long updated ____cacheline_aligned_in_smp;
|
||||||
|
unsigned long used;
|
||||||
|
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -212,12 +214,16 @@ struct net_bridge_mdb_htable
|
||||||
u32 ver;
|
u32 ver;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct net_bridge_port
|
struct net_bridge_port {
|
||||||
{
|
|
||||||
struct net_bridge *br;
|
struct net_bridge *br;
|
||||||
struct net_device *dev;
|
struct net_device *dev;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
|
||||||
|
unsigned long flags;
|
||||||
|
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||||
|
struct net_bridge_vlan_group __rcu *vlgrp;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* STP */
|
/* STP */
|
||||||
u8 priority;
|
u8 priority;
|
||||||
u8 state;
|
u8 state;
|
||||||
|
@ -238,8 +244,6 @@ struct net_bridge_port
|
||||||
struct kobject kobj;
|
struct kobject kobj;
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
|
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
|
||||||
struct bridge_mcast_own_query ip4_own_query;
|
struct bridge_mcast_own_query ip4_own_query;
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
@ -259,9 +263,6 @@ struct net_bridge_port
|
||||||
#ifdef CONFIG_NET_POLL_CONTROLLER
|
#ifdef CONFIG_NET_POLL_CONTROLLER
|
||||||
struct netpoll *np;
|
struct netpoll *np;
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
|
||||||
struct net_bridge_vlan_group __rcu *vlgrp;
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_NET_SWITCHDEV
|
#ifdef CONFIG_NET_SWITCHDEV
|
||||||
int offload_fwd_mark;
|
int offload_fwd_mark;
|
||||||
#endif
|
#endif
|
||||||
|
@ -283,14 +284,21 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *
|
||||||
rtnl_dereference(dev->rx_handler_data) : NULL;
|
rtnl_dereference(dev->rx_handler_data) : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct net_bridge
|
struct net_bridge {
|
||||||
{
|
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
|
spinlock_t hash_lock;
|
||||||
struct list_head port_list;
|
struct list_head port_list;
|
||||||
struct net_device *dev;
|
struct net_device *dev;
|
||||||
|
|
||||||
struct pcpu_sw_netstats __percpu *stats;
|
struct pcpu_sw_netstats __percpu *stats;
|
||||||
spinlock_t hash_lock;
|
/* These fields are accessed on each packet */
|
||||||
|
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||||
|
u8 vlan_enabled;
|
||||||
|
u8 vlan_stats_enabled;
|
||||||
|
__be16 vlan_proto;
|
||||||
|
u16 default_pvid;
|
||||||
|
struct net_bridge_vlan_group __rcu *vlgrp;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct hlist_head hash[BR_HASH_SIZE];
|
struct hlist_head hash[BR_HASH_SIZE];
|
||||||
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
||||||
union {
|
union {
|
||||||
|
@ -308,6 +316,9 @@ struct net_bridge
|
||||||
bridge_id designated_root;
|
bridge_id designated_root;
|
||||||
bridge_id bridge_id;
|
bridge_id bridge_id;
|
||||||
u32 root_path_cost;
|
u32 root_path_cost;
|
||||||
|
unsigned char topology_change;
|
||||||
|
unsigned char topology_change_detected;
|
||||||
|
u16 root_port;
|
||||||
unsigned long max_age;
|
unsigned long max_age;
|
||||||
unsigned long hello_time;
|
unsigned long hello_time;
|
||||||
unsigned long forward_delay;
|
unsigned long forward_delay;
|
||||||
|
@ -319,7 +330,6 @@ struct net_bridge
|
||||||
|
|
||||||
u8 group_addr[ETH_ALEN];
|
u8 group_addr[ETH_ALEN];
|
||||||
bool group_addr_set;
|
bool group_addr_set;
|
||||||
u16 root_port;
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
BR_NO_STP, /* no spanning tree */
|
BR_NO_STP, /* no spanning tree */
|
||||||
|
@ -327,9 +337,6 @@ struct net_bridge
|
||||||
BR_USER_STP, /* new RSTP in userspace */
|
BR_USER_STP, /* new RSTP in userspace */
|
||||||
} stp_enabled;
|
} stp_enabled;
|
||||||
|
|
||||||
unsigned char topology_change;
|
|
||||||
unsigned char topology_change_detected;
|
|
||||||
|
|
||||||
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
|
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
|
||||||
unsigned char multicast_router;
|
unsigned char multicast_router;
|
||||||
|
|
||||||
|
@ -374,21 +381,13 @@ struct net_bridge
|
||||||
struct timer_list hello_timer;
|
struct timer_list hello_timer;
|
||||||
struct timer_list tcn_timer;
|
struct timer_list tcn_timer;
|
||||||
struct timer_list topology_change_timer;
|
struct timer_list topology_change_timer;
|
||||||
struct timer_list gc_timer;
|
struct delayed_work gc_work;
|
||||||
struct kobject *ifobj;
|
struct kobject *ifobj;
|
||||||
u32 auto_cnt;
|
u32 auto_cnt;
|
||||||
|
|
||||||
#ifdef CONFIG_NET_SWITCHDEV
|
#ifdef CONFIG_NET_SWITCHDEV
|
||||||
int offload_fwd_mark;
|
int offload_fwd_mark;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
|
||||||
struct net_bridge_vlan_group __rcu *vlgrp;
|
|
||||||
u8 vlan_enabled;
|
|
||||||
u8 vlan_stats_enabled;
|
|
||||||
__be16 vlan_proto;
|
|
||||||
u16 default_pvid;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct br_input_skb_cb {
|
struct br_input_skb_cb {
|
||||||
|
@ -505,7 +504,7 @@ void br_fdb_find_delete_local(struct net_bridge *br,
|
||||||
const unsigned char *addr, u16 vid);
|
const unsigned char *addr, u16 vid);
|
||||||
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
|
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
|
||||||
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
|
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
|
||||||
void br_fdb_cleanup(unsigned long arg);
|
void br_fdb_cleanup(struct work_struct *work);
|
||||||
void br_fdb_delete_by_port(struct net_bridge *br,
|
void br_fdb_delete_by_port(struct net_bridge *br,
|
||||||
const struct net_bridge_port *p, u16 vid, int do_all);
|
const struct net_bridge_port *p, u16 vid, int do_all);
|
||||||
struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
|
struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
|
||||||
|
|
|
@ -602,7 +602,7 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
|
||||||
br->ageing_time = t;
|
br->ageing_time = t;
|
||||||
spin_unlock_bh(&br->lock);
|
spin_unlock_bh(&br->lock);
|
||||||
|
|
||||||
mod_timer(&br->gc_timer, jiffies);
|
mod_delayed_work(system_long_wq, &br->gc_work, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ void br_stp_enable_bridge(struct net_bridge *br)
|
||||||
spin_lock_bh(&br->lock);
|
spin_lock_bh(&br->lock);
|
||||||
if (br->stp_enabled == BR_KERNEL_STP)
|
if (br->stp_enabled == BR_KERNEL_STP)
|
||||||
mod_timer(&br->hello_timer, jiffies + br->hello_time);
|
mod_timer(&br->hello_timer, jiffies + br->hello_time);
|
||||||
mod_timer(&br->gc_timer, jiffies + HZ/10);
|
mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10);
|
||||||
|
|
||||||
br_config_bpdu_generation(br);
|
br_config_bpdu_generation(br);
|
||||||
|
|
||||||
|
@ -88,7 +88,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
|
||||||
del_timer_sync(&br->hello_timer);
|
del_timer_sync(&br->hello_timer);
|
||||||
del_timer_sync(&br->topology_change_timer);
|
del_timer_sync(&br->topology_change_timer);
|
||||||
del_timer_sync(&br->tcn_timer);
|
del_timer_sync(&br->tcn_timer);
|
||||||
del_timer_sync(&br->gc_timer);
|
cancel_delayed_work_sync(&br->gc_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* called under bridge lock */
|
/* called under bridge lock */
|
||||||
|
|
|
@ -153,8 +153,6 @@ void br_stp_timer_init(struct net_bridge *br)
|
||||||
setup_timer(&br->topology_change_timer,
|
setup_timer(&br->topology_change_timer,
|
||||||
br_topology_change_timer_expired,
|
br_topology_change_timer_expired,
|
||||||
(unsigned long) br);
|
(unsigned long) br);
|
||||||
|
|
||||||
setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void br_stp_port_timer_init(struct net_bridge_port *p)
|
void br_stp_port_timer_init(struct net_bridge_port *p)
|
||||||
|
|
|
@ -263,7 +263,7 @@ static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
{
|
{
|
||||||
struct net_bridge *br = to_bridge(d);
|
struct net_bridge *br = to_bridge(d);
|
||||||
return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer));
|
return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer));
|
||||||
}
|
}
|
||||||
static DEVICE_ATTR_RO(gc_timer);
|
static DEVICE_ATTR_RO(gc_timer);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue