Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) BPF tail call handling bug fixes from Daniel Borkmann.

 2) Fix allowance of too many rx queues in sfc driver, from Bert
    Kenward.

 3) Non-loopback ipv6 packets claiming src of ::1 should be dropped,
    from Florian Westphal.

 4) Statistics requests on KSZ9031 can crash, fix from Grygorii
    Strashko.

 5) TX ring handling fixes in mediatek driver, from Sean Wang.

 6) ip_ra_control can deadlock, fix lock acquisition ordering to fix,
    from Cong WANG.

 7) Fix use after free in ip_recv_error(), from Willem de Buijn.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
  bpf: fix checking xdp_adjust_head on tail calls
  bpf: fix cb access in socket filter programs on tail calls
  ipv6: drop non loopback packets claiming to originate from ::1
  net: ethernet: mediatek: fix inconsistency of port number carried in TXD
  net: ethernet: mediatek: fix inconsistency between TXD and the used buffer
  net: phy: micrel: fix crash when statistic requested for KSZ9031 phy
  net: vrf: Fix setting NLM_F_EXCL flag when adding l3mdev rule
  net: thunderx: Fix set_max_bgx_per_node for 81xx rgx
  net-timestamp: avoid use-after-free in ip_recv_error
  ipv4: fix a deadlock in ip_ra_control
  sfc: limit the number of receive queues
This commit is contained in:
Linus Torvalds 2017-04-18 13:24:42 -07:00
commit 40d9018eb7
15 changed files with 69 additions and 58 deletions

View File

@ -134,6 +134,7 @@ static void set_max_bgx_per_node(struct pci_dev *pdev)
pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
switch (sdevid) { switch (sdevid) {
case PCI_SUBSYS_DEVID_81XX_BGX: case PCI_SUBSYS_DEVID_81XX_BGX:
case PCI_SUBSYS_DEVID_81XX_RGX:
max_bgx_per_node = MAX_BGX_PER_CN81XX; max_bgx_per_node = MAX_BGX_PER_CN81XX;
break; break;
case PCI_SUBSYS_DEVID_83XX_BGX: case PCI_SUBSYS_DEVID_83XX_BGX:

View File

@ -16,6 +16,7 @@
/* Subsystem device IDs */ /* Subsystem device IDs */
#define PCI_SUBSYS_DEVID_88XX_BGX 0xA126 #define PCI_SUBSYS_DEVID_88XX_BGX 0xA126
#define PCI_SUBSYS_DEVID_81XX_BGX 0xA226 #define PCI_SUBSYS_DEVID_81XX_BGX 0xA226
#define PCI_SUBSYS_DEVID_81XX_RGX 0xA254
#define PCI_SUBSYS_DEVID_83XX_BGX 0xA326 #define PCI_SUBSYS_DEVID_83XX_BGX 0xA326
#define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */ #define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */

View File

@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
struct mtk_mac *mac = netdev_priv(dev); struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw; struct mtk_eth *eth = mac->hw;
struct mtk_tx_dma *itxd, *txd; struct mtk_tx_dma *itxd, *txd;
struct mtk_tx_buf *tx_buf; struct mtk_tx_buf *itx_buf, *tx_buf;
dma_addr_t mapped_addr; dma_addr_t mapped_addr;
unsigned int nr_frags; unsigned int nr_frags;
int i, n_desc = 1; int i, n_desc = 1;
@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT; fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
txd4 |= fport; txd4 |= fport;
tx_buf = mtk_desc_to_tx_buf(ring, itxd); itx_buf = mtk_desc_to_tx_buf(ring, itxd);
memset(tx_buf, 0, sizeof(*tx_buf)); memset(itx_buf, 0, sizeof(*itx_buf));
if (gso) if (gso)
txd4 |= TX_DMA_TSO; txd4 |= TX_DMA_TSO;
@ -647,9 +647,11 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
return -ENOMEM; return -ENOMEM;
WRITE_ONCE(itxd->txd1, mapped_addr); WRITE_ONCE(itxd->txd1, mapped_addr);
tx_buf->flags |= MTK_TX_FLAGS_SINGLE0; itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb)); MTK_TX_FLAGS_FPORT1;
dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
/* TX SG offload */ /* TX SG offload */
txd = itxd; txd = itxd;
@ -685,11 +687,13 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
last_frag * TX_DMA_LS0)); last_frag * TX_DMA_LS0));
WRITE_ONCE(txd->txd4, fport); WRITE_ONCE(txd->txd4, fport);
tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
tx_buf = mtk_desc_to_tx_buf(ring, txd); tx_buf = mtk_desc_to_tx_buf(ring, txd);
memset(tx_buf, 0, sizeof(*tx_buf)); memset(tx_buf, 0, sizeof(*tx_buf));
tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
tx_buf->flags |= MTK_TX_FLAGS_PAGE0; tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
MTK_TX_FLAGS_FPORT1;
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(tx_buf, dma_len0, frag_map_size); dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
frag_size -= frag_map_size; frag_size -= frag_map_size;
@ -698,7 +702,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
} }
/* store skb to cleanup */ /* store skb to cleanup */
tx_buf->skb = skb; itx_buf->skb = skb;
WRITE_ONCE(itxd->txd4, txd4); WRITE_ONCE(itxd->txd4, txd4);
WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) | WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
@ -1012,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
while ((cpu != dma) && budget) { while ((cpu != dma) && budget) {
u32 next_cpu = desc->txd2; u32 next_cpu = desc->txd2;
int mac; int mac = 0;
desc = mtk_qdma_phys_to_virt(ring, desc->txd2); desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0) if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
break; break;
mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
TX_DMA_FPORT_MASK;
mac--;
tx_buf = mtk_desc_to_tx_buf(ring, desc); tx_buf = mtk_desc_to_tx_buf(ring, desc);
if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
mac = 1;
skb = tx_buf->skb; skb = tx_buf->skb;
if (!skb) { if (!skb) {
condition = 1; condition = 1;

View File

@ -406,12 +406,18 @@ struct mtk_hw_stats {
struct u64_stats_sync syncp; struct u64_stats_sync syncp;
}; };
/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
* memory was allocated so that it can be freed properly
*/
enum mtk_tx_flags { enum mtk_tx_flags {
/* PDMA descriptor can point at 1-2 segments. This enum allows us to
* track how memory was allocated so that it can be freed properly.
*/
MTK_TX_FLAGS_SINGLE0 = 0x01, MTK_TX_FLAGS_SINGLE0 = 0x01,
MTK_TX_FLAGS_PAGE0 = 0x02, MTK_TX_FLAGS_PAGE0 = 0x02,
/* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted
* SKB out instead of looking up through hardware TX descriptor.
*/
MTK_TX_FLAGS_FPORT0 = 0x04,
MTK_TX_FLAGS_FPORT1 = 0x08,
}; };
/* This enum allows us to identify how the clock is defined on the array of the /* This enum allows us to identify how the clock is defined on the array of the

View File

@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
free_cpumask_var(thread_mask); free_cpumask_var(thread_mask);
} }
if (count > EFX_MAX_RX_QUEUES) {
netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
"Reducing number of rx queues from %u to %u.\n",
count, EFX_MAX_RX_QUEUES);
count = EFX_MAX_RX_QUEUES;
}
/* If RSS is requested for the PF *and* VFs then we can't write RSS /* If RSS is requested for the PF *and* VFs then we can't write RSS
* table entries that are inaccessible to VFs * table entries that are inaccessible to VFs
*/ */

View File

@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
free_cpumask_var(thread_mask); free_cpumask_var(thread_mask);
} }
if (count > EF4_MAX_RX_QUEUES) {
netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
"Reducing number of rx queues from %u to %u.\n",
count, EF4_MAX_RX_QUEUES);
count = EF4_MAX_RX_QUEUES;
}
return count; return count;
} }

View File

@ -798,9 +798,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status = genphy_read_status, .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt, .ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr, .config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend, .suspend = genphy_suspend,
.resume = genphy_resume, .resume = genphy_resume,
}, { }, {
@ -940,9 +937,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status = genphy_read_status, .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt, .ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr, .config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend, .suspend = genphy_suspend,
.resume = genphy_resume, .resume = genphy_resume,
}, { }, {
@ -952,6 +946,7 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_GBIT_FEATURES, .features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data = &ksz9021_type, .driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9021_config_init, .config_init = ksz9021_config_init,
.config_aneg = genphy_config_aneg, .config_aneg = genphy_config_aneg,
.read_status = genphy_read_status, .read_status = genphy_read_status,
@ -971,6 +966,7 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_GBIT_FEATURES, .features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data = &ksz9021_type, .driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9031_config_init, .config_init = ksz9031_config_init,
.config_aneg = genphy_config_aneg, .config_aneg = genphy_config_aneg,
.read_status = ksz9031_read_status, .read_status = ksz9031_read_status,
@ -989,9 +985,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init, .config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg, .config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status, .read_status = ksz8873mll_read_status,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend, .suspend = genphy_suspend,
.resume = genphy_resume, .resume = genphy_resume,
}, { }, {
@ -1003,9 +996,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init, .config_init = kszphy_config_init,
.config_aneg = genphy_config_aneg, .config_aneg = genphy_config_aneg,
.read_status = genphy_read_status, .read_status = genphy_read_status,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend, .suspend = genphy_suspend,
.resume = genphy_resume, .resume = genphy_resume,
}, { }, {
@ -1017,9 +1007,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init, .config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg, .config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status, .read_status = ksz8873mll_read_status,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend, .suspend = genphy_suspend,
.resume = genphy_resume, .resume = genphy_resume,
} }; } };

View File

@ -1128,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
goto nla_put_failure; goto nla_put_failure;
/* rule only needs to appear once */ /* rule only needs to appear once */
nlh->nlmsg_flags &= NLM_F_EXCL; nlh->nlmsg_flags |= NLM_F_EXCL;
frh = nlmsg_data(nlh); frh = nlmsg_data(nlh);
memset(frh, 0, sizeof(*frh)); memset(frh, 0, sizeof(*frh));

View File

@ -617,6 +617,14 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
if (insn->imm == BPF_FUNC_xdp_adjust_head) if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1; prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) { if (insn->imm == BPF_FUNC_tail_call) {
/* If we tail call into other programs, we
* cannot make any assumptions since they
* can be replaced dynamically during runtime
* in the program array.
*/
prog->cb_access = 1;
prog->xdp_adjust_head = 1;
/* mark bpf_tail_call as different opcode /* mark bpf_tail_call as different opcode
* to avoid conditional branch in * to avoid conditional branch in
* interpeter for every normal call * interpeter for every normal call

View File

@ -3807,6 +3807,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype; serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats; serr->opt_stats = opt_stats;
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey; serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP && if (sk->sk_protocol == IPPROTO_TCP &&

View File

@ -488,16 +488,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
return false; return false;
/* Support IP_PKTINFO on tstamp packets if requested, to correlate /* Support IP_PKTINFO on tstamp packets if requested, to correlate
* timestamp with egress dev. Not possible for packets without dev * timestamp with egress dev. Not possible for packets without iif
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY). * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/ */
if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || info = PKTINFO_SKB_CB(skb);
(!skb->dev)) if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false; return false;
info = PKTINFO_SKB_CB(skb);
info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
info->ipi_ifindex = skb->dev->ifindex;
return true; return true;
} }
@ -591,6 +590,7 @@ static bool setsockopt_needs_rtnl(int optname)
case MCAST_LEAVE_GROUP: case MCAST_LEAVE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_UNBLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE:
case IP_ROUTER_ALERT:
return true; return true;
} }
return false; return false;

View File

@ -1278,7 +1278,7 @@ static void mrtsock_destruct(struct sock *sk)
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct mr_table *mrt; struct mr_table *mrt;
rtnl_lock(); ASSERT_RTNL();
ipmr_for_each_table(mrt, net) { ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) { if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@ -1289,7 +1289,6 @@ static void mrtsock_destruct(struct sock *sk)
mroute_clean_tables(mrt, false); mroute_clean_tables(mrt, false);
} }
} }
rtnl_unlock();
} }
/* Socket options and virtual interface manipulation. The whole /* Socket options and virtual interface manipulation. The whole
@ -1353,13 +1352,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (sk != rcu_access_pointer(mrt->mroute_sk)) { if (sk != rcu_access_pointer(mrt->mroute_sk)) {
ret = -EACCES; ret = -EACCES;
} else { } else {
/* We need to unlock here because mrtsock_destruct takes
* care of rtnl itself and we can't change that due to
* the IP_ROUTER_ALERT setsockopt which runs without it.
*/
rtnl_unlock();
ret = ip_ra_control(sk, 0, NULL); ret = ip_ra_control(sk, 0, NULL);
goto out; goto out_unlock;
} }
break; break;
case MRT_ADD_VIF: case MRT_ADD_VIF:
@ -1470,7 +1464,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
} }
out_unlock: out_unlock:
rtnl_unlock(); rtnl_unlock();
out:
return ret; return ret;
} }

View File

@ -682,7 +682,9 @@ static void raw_close(struct sock *sk, long timeout)
/* /*
* Raw sockets may have direct kernel references. Kill them. * Raw sockets may have direct kernel references. Kill them.
*/ */
rtnl_lock();
ip_ra_control(sk, 0, NULL); ip_ra_control(sk, 0, NULL);
rtnl_unlock();
sk_common_release(sk); sk_common_release(sk);
} }

View File

@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
* At one point, excluding local errors was a quick test to identify icmp/icmp6 * At one point, excluding local errors was a quick test to identify icmp/icmp6
* errors. This is no longer true, but the test remained, so the v6 stack, * errors. This is no longer true, but the test remained, so the v6 stack,
* unlike v4, also honors cmsg requests on all wifi and timestamp errors. * unlike v4, also honors cmsg requests on all wifi and timestamp errors.
*
* Timestamp code paths do not initialize the fields expected by cmsg:
* the PKTINFO fields in skb->cb[]. Fill those in here.
*/ */
static bool ip6_datagram_support_cmsg(struct sk_buff *skb, static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
struct sock_exterr_skb *serr) struct sock_exterr_skb *serr)
@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
return false; return false;
if (!skb->dev) if (!IP6CB(skb)->iif)
return false; return false;
if (skb->protocol == htons(ETH_P_IPV6))
IP6CB(skb)->iif = skb->dev->ifindex;
else
PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
return true; return true;
} }

View File

@ -122,11 +122,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/* /*
* RFC4291 2.5.3 * RFC4291 2.5.3
* The loopback address must not be used as the source address in IPv6
* packets that are sent outside of a single node. [..]
* A packet received on an interface with a destination address * A packet received on an interface with a destination address
* of loopback must be dropped. * of loopback must be dropped.
*/ */
if (!(dev->flags & IFF_LOOPBACK) && if ((ipv6_addr_loopback(&hdr->saddr) ||
ipv6_addr_loopback(&hdr->daddr)) ipv6_addr_loopback(&hdr->daddr)) &&
!(dev->flags & IFF_LOOPBACK))
goto err; goto err;
/* RFC4291 Errata ID: 3480 /* RFC4291 Errata ID: 3480