From 3b01fe7f91c8e4f9afc4fae3c5af72c14958d2d8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:09 +0200 Subject: [PATCH 1/5] net/mlx4_core: Use-after-free causes a resource leak in flow-steering detach mlx4_QP_FLOW_STEERING_DETACH_wrapper first removes the steering rule (which results in freeing the rule structure), and then references a field in this struct (the qp number) when releasing the busy-status on the rule's qp. Since this memory was freed, it could reallocated and changed. Therefore, the qp number in the struct may be incorrect, so that we are releasing the incorrect qp. This leaves the rule's qp in the busy state (and could possibly release an incorrect qp as well). Fix this by saving the qp number in a local variable, for use after removing the steering rule. Fixes: 2c473ae7e582 ("net/mlx4_core: Disallow releasing VF QPs which have steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index c548beaaf910..4b3e139e9c82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4473,6 +4473,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct res_qp *rqp; struct res_fs_rule *rrule; u64 mirr_reg_id; + int qpn; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4489,10 +4490,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, } mirr_reg_id = rrule->mirr_rule_id; kfree(rrule->mirr_mbox); + qpn = rrule->qpn; /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); - err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; @@ -4517,7 +4519,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, if (!err) atomic_dec(&rqp->ref_count); out: - put_res(dev, slave, rrule->qpn, RES_QP); + put_res(dev, slave, qpn, RES_QP); return err; } From 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 29 Dec 2016 18:37:10 +0200 Subject: [PATCH 2/5] net/mlx4_en: Fix bad WQE issue Single send WQE in RX buffer should be stamped with software ownership in order to prevent the flow of QP in error in FW once UPDATE_QP is called. Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues') Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 3c37e216bbf3..eac527e25ec9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -445,8 +445,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; From c1d5f8ff80ea84768f5fae1ca9d1abfbb5e6bbaa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Dec 2016 18:37:11 +0200 Subject: [PATCH 3/5] net/mlx4: Remove BUG_ON from ICM allocation routine This patch removes BUG_ON() macro from mlx4_alloc_icm_coherent() by checking DMA address alignment in advance and performing proper folding in case of error. Fixes: 5b0bf5e25efe ("mlx4_core: Support ICM tables in coherent memory") Reported-by: Ozgur Karatas Signed-off-by: Leon Romanovsky Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 2a9dd460a95f..e1f9e7cebf8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } From 61b6034c6cfdcb265bb453505c3d688e7567727a Mon Sep 17 00:00:00 2001 From: Slava Shwartsman Date: Thu, 29 Dec 2016 18:37:12 +0200 Subject: [PATCH 4/5] net/mlx4_en: Fix type mismatch for 32-bit systems is_power_of_2 expects unsigned long and we pass u64 max_val_cycles, this will be truncated on 32 bit systems, and the result is not what we were expecting. div_u64 expects u32 as a second argument and we pass max_val_cycles_rounded which is u64 hence it will always be truncated. Fix was tested on both 64 and 32 bit systems and got same results for max_val_cycles and max_val_cycles_rounded. Fixes: 4850cf458157 ("net/mlx4_en: Resolve dividing by zero in 32-bit system") Signed-off-by: Slava Shwartsman Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 015198c14fa8..504461a464c5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -245,13 +245,9 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; - u64 tmp_rounded = - roundup_pow_of_two(max_val_cycles) > max_val_cycles ? - roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; - u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : tmp_rounded; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ - u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); /* This comes from the reverse of clocksource_khz2mult */ return ilog2(div_u64(max_mul * freq_khz, 1000000)); From 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:13 +0200 Subject: [PATCH 5/5] net/mlx4_core: Fix raw qp flow steering rules under SRIOV Demoting simple flow steering rule priority (for DPDK) was achieved by wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper function for the PF and all VFs. In function mlx4_ib_create_flow(), this change caused the main rule creation for the PF to be wrapped, while it left the associated tunnel steering rule creation unwrapped for the PF. This mismatch caused rule deletion failures in mlx4_ib_destroy_flow() for the PF when the detach wrapper function did not find the associated tunnel-steering rule (since creation of that rule for the PF did not go through the wrapper function). Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native" (so that the PF invocation does not go through the wrapper), and perform the required priority demotion for the PF in the mlx4_ib_create_flow() code path. Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 14 ++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 18 +++++++++++++++ .../ethernet/mellanox/mlx4/resource_tracker.c | 22 +------------------ include/linux/mlx4/device.h | 2 ++ 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c8413fc120e6..7031a8dd4d14 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1682,9 +1682,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att size += ret; } + if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR && + flow_attr->num_of_specs == 1) { + struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1); + enum ib_flow_spec_type header_spec = + ((union ib_flow_spec *)(flow_attr + 1))->type; + + if (header_spec == IB_FLOW_SPEC_ETH) + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); + } + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) @@ -1701,7 +1711,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", reg_id); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5e7840a7a33b..bffa6f345f2f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4b3e139e9c82..56185a0b827d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4164,22 +4164,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } -static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, - struct _rule_hw *eth_header) -{ - if (is_multicast_ether_addr(eth_header->eth.dst_mac) || - is_broadcast_ether_addr(eth_header->eth.dst_mac)) { - struct mlx4_net_trans_rule_hw_eth *eth = - (struct mlx4_net_trans_rule_hw_eth *)eth_header; - struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); - bool last_rule = next_rule->size == 0 && next_rule->id == 0 && - next_rule->rsvd == 0; - - if (last_rule) - ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); - } -} - /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4363,10 +4347,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) - handle_eth_header_mcast_prio(ctrl, rule_header); - - if (slave == dev->caps.function) - goto execute; + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -4394,7 +4375,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put_qp; } -execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 93bdb3485192..6533c16e27ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);