From 772ed869f535b4ec2b134645c951ff22de4d3f79 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:20 +0000 Subject: [PATCH 1/7] net: ena: fix surprise unplug NULL dereference kernel crash Starting with driver version 1.5.0, in case of a surprise device unplug, there is a race caused by invoking ena_destroy_device() from two different places. As a result, the readless register might be accessed after it was destroyed. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c673ac2df65b..170830b807fe 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3409,12 +3409,12 @@ static void ena_remove(struct pci_dev *pdev) netdev->rx_cpu_rmap = NULL; } #endif /* CONFIG_RFS_ACCEL */ - - unregister_netdev(netdev); del_timer_sync(&adapter->timer_service); cancel_work_sync(&adapter->reset_task); + unregister_netdev(netdev); + /* Reset the device only if the device is running. */ if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) ena_com_dev_reset(ena_dev, adapter->reset_reason); From ef5b0771d247379c90c8bf1332ff32f7f74bff7f Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:21 +0000 Subject: [PATCH 2/7] net: ena: fix driver when PAGE_SIZE == 64kB The buffer length field in the ena rx descriptor is 16 bit, and the current driver passes a full page in each ena rx descriptor. When PAGE_SIZE equals 64kB or more, the buffer length field becomes zero. To solve this issue, limit the ena Rx descriptor to use 16kB even when allocating 64kB kernel pages. This change would not impact ena device functionality, as 16kB is still larger than maximum MTU. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++----- drivers/net/ethernet/amazon/ena/ena_netdev.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 170830b807fe..69e684fd2787 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -461,7 +461,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, return -ENOMEM; } - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, + dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { u64_stats_update_begin(&rx_ring->syncp); @@ -478,7 +478,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; ena_buf->paddr = dma; - ena_buf->len = PAGE_SIZE; + ena_buf->len = ENA_PAGE_SIZE; return 0; } @@ -495,7 +495,7 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE, + dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE, DMA_FROM_DEVICE); __free_page(page); @@ -916,10 +916,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, do { dma_unmap_page(rx_ring->dev, dma_unmap_addr(&rx_info->ena_buf, paddr), - PAGE_SIZE, DMA_FROM_DEVICE); + ENA_PAGE_SIZE, DMA_FROM_DEVICE); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, - rx_info->page_offset, len, PAGE_SIZE); + rx_info->page_offset, len, ENA_PAGE_SIZE); netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx skb updated. len %d. data_len %d\n", diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index f1972b5ab650..7c7ae56c52cf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -355,4 +355,15 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_get_sset_count(struct net_device *netdev, int sset); +/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the + * driver passas 0. + * Since the max packet size the ENA handles is ~9kB limit the buffer length to + * 16kB. + */ +#if PAGE_SIZE > SZ_16K +#define ENA_PAGE_SIZE SZ_16K +#else +#define ENA_PAGE_SIZE PAGE_SIZE +#endif + #endif /* !(ENA_H) */ From cfa324a514233b28a6934de619183eee941f02d7 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:22 +0000 Subject: [PATCH 3/7] net: ena: fix device destruction to gracefully free resources When ena_destroy_device() is called from ena_suspend(), the device is still reachable from the driver. Therefore, the driver can send a command to the device to free all resources. However, in all other cases of calling ena_destroy_device(), the device is potentially in an error state and unreachable from the driver. In these cases the driver must not send commands to the device. The current implementation does not request resource freeing from the device even when possible. We add the graceful parameter to ena_destroy_device() to enable resource freeing when possible, and use it in ena_suspend(). Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 69e684fd2787..035d47d2179a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -76,7 +76,7 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); static void check_for_admin_com_state(struct ena_adapter *adapter); -static void ena_destroy_device(struct ena_adapter *adapter); +static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev) @@ -1900,7 +1900,7 @@ static int ena_close(struct net_device *netdev) "Destroy failure, restarting device\n"); ena_dump_stats_to_dmesg(adapter); /* rtnl lock already obtained in dev_ioctl() layer */ - ena_destroy_device(adapter); + ena_destroy_device(adapter, false); ena_restore_device(adapter); } @@ -2550,7 +2550,7 @@ err_disable_msix: return rc; } -static void ena_destroy_device(struct ena_adapter *adapter) +static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) { struct net_device *netdev = adapter->netdev; struct ena_com_dev *ena_dev = adapter->ena_dev; @@ -2563,7 +2563,8 @@ static void ena_destroy_device(struct ena_adapter *adapter) dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); adapter->dev_up_before_reset = dev_up; - ena_com_set_admin_running_state(ena_dev, false); + if (!graceful) + ena_com_set_admin_running_state(ena_dev, false); if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) ena_down(adapter); @@ -2665,7 +2666,7 @@ static void ena_fw_reset_device(struct work_struct *work) return; } rtnl_lock(); - ena_destroy_device(adapter); + ena_destroy_device(adapter, false); ena_restore_device(adapter); rtnl_unlock(); } @@ -3467,7 +3468,7 @@ static int ena_suspend(struct pci_dev *pdev, pm_message_t state) "ignoring device reset request as the device is being suspended\n"); clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); } - ena_destroy_device(adapter); + ena_destroy_device(adapter, true); rtnl_unlock(); return 0; } From fe870c77efdf8682252545cbd3d29800d8379efc Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:23 +0000 Subject: [PATCH 4/7] net: ena: fix potential double ena_destroy_device() ena_destroy_device() can potentially be called twice. To avoid this, check that the device is running and only then proceed destroying it. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 035d47d2179a..a68c2a8d4da2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2556,6 +2556,9 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) struct ena_com_dev *ena_dev = adapter->ena_dev; bool dev_up; + if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) + return; + netif_carrier_off(netdev); del_timer_sync(&adapter->timer_service); @@ -2592,6 +2595,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) adapter->reset_reason = ENA_REGS_RESET_NORMAL; clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); } static int ena_restore_device(struct ena_adapter *adapter) @@ -2636,6 +2640,7 @@ static int ena_restore_device(struct ena_adapter *adapter) } } + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); dev_err(&pdev->dev, "Device reset completed successfully\n"); From 944b28aa2982b4590d4d4dfc777cf85135dca2c0 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:24 +0000 Subject: [PATCH 5/7] net: ena: fix missing lock during device destruction acquire the rtnl_lock during device destruction to avoid using partially destroyed device. ena_remove() shares almost the same logic as ena_destroy_device(), so use ena_destroy_device() and avoid duplications. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a68c2a8d4da2..b9ce2a6a87ed 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3421,24 +3421,18 @@ static void ena_remove(struct pci_dev *pdev) unregister_netdev(netdev); - /* Reset the device only if the device is running. */ + /* If the device is running then we want to make sure the device will be + * reset to make sure no more events will be issued by the device. + */ if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) - ena_com_dev_reset(ena_dev, adapter->reset_reason); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - ena_free_mgmnt_irq(adapter); - - ena_disable_msix(adapter); + rtnl_lock(); + ena_destroy_device(adapter, true); + rtnl_unlock(); free_netdev(netdev); - ena_com_mmio_reg_read_request_destroy(ena_dev); - - ena_com_abort_admin_commands(ena_dev); - - ena_com_wait_for_abort_completion(ena_dev); - - ena_com_admin_destroy(ena_dev); - ena_com_rss_destroy(ena_dev); ena_com_delete_debug_area(ena_dev); From 28abf4e9c9201eda5c4d29ea609d07e877b464b8 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:25 +0000 Subject: [PATCH 6/7] net: ena: fix missing calls to READ_ONCE Add READ_ONCE calls where necessary (for example when iterating over a memory field that gets updated by the hardware). Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 17f12c18d225..c37deef3bcf1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -459,7 +459,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu cqe = &admin_queue->cq.entries[head_masked]; /* Go over all the completions */ - while ((cqe->acq_common_descriptor.flags & + while ((READ_ONCE(cqe->acq_common_descriptor.flags) & ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { /* Do not read the rest of the completion entry before the * phase bit was validated @@ -637,7 +637,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) mmiowb(); for (i = 0; i < timeout; i++) { - if (read_resp->req_id == mmio_read->seq_num) + if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) break; udelay(1); @@ -1796,8 +1796,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) aenq_common = &aenq_e->aenq_common_desc; /* Go over all the events */ - while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == - phase) { + while ((READ_ONCE(aenq_common->flags) & + ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", aenq_common->group, aenq_common->syndrom, (u64)aenq_common->timestamp_low + From 37dff155dcf57f6c08bf1641c5ddf9abd45f2b1f Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:26 +0000 Subject: [PATCH 7/7] net: ena: fix incorrect usage of memory barriers Added memory barriers where they were missing to support multiple architectures, and removed redundant ones. As part of removing the redundant memory barriers and improving performance, we moved to more relaxed versions of memory barriers, as well as to the more relaxed version of writel - writel_relaxed, while maintaining correctness. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 16 +++++----- drivers/net/ethernet/amazon/ena/ena_eth_com.c | 6 ++++ drivers/net/ethernet/amazon/ena/ena_eth_com.h | 8 ++--- drivers/net/ethernet/amazon/ena/ena_netdev.c | 30 +++++++------------ 4 files changed, 26 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index c37deef3bcf1..7635c38e77dd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -464,7 +464,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu /* Do not read the rest of the completion entry before the * phase bit was validated */ - rmb(); + dma_rmb(); ena_com_handle_single_admin_completion(admin_queue, cqe); head_masked++; @@ -627,15 +627,8 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) mmio_read_reg |= mmio_read->seq_num & ENA_REGS_MMIO_REG_READ_REQ_ID_MASK; - /* make sure read_resp->req_id get updated before the hw can write - * there - */ - wmb(); + writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); - writel_relaxed(mmio_read_reg, - ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); - - mmiowb(); for (i = 0; i < timeout; i++) { if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) break; @@ -1798,6 +1791,11 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) /* Go over all the events */ while ((READ_ONCE(aenq_common->flags) & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Make sure the phase bit (ownership) is as expected before + * reading the rest of the descriptor. + */ + dma_rmb(); + pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", aenq_common->group, aenq_common->syndrom, (u64)aenq_common->timestamp_low + diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index ea149c134e15..1c682b76190f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -51,6 +51,11 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( if (desc_phase != expected_phase) return NULL; + /* Make sure we read the rest of the descriptor after the phase bit + * has been read + */ + dma_rmb(); + return cdesc; } @@ -493,6 +498,7 @@ int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) if (cdesc_phase != expected_phase) return -EAGAIN; + dma_rmb(); if (unlikely(cdesc->req_id >= io_cq->q_depth)) { pr_err("Invalid req id %d\n", cdesc->req_id); return -EINVAL; diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 6fdc753d9483..2f7657227cfe 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -107,8 +107,7 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) return io_sq->q_depth - 1 - cnt; } -static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq, - bool relaxed) +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) { u16 tail; @@ -117,10 +116,7 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq, pr_debug("write submission queue doorbell for queue: %d tail: %d\n", io_sq->qid, tail); - if (relaxed) - writel_relaxed(tail, io_sq->db_addr); - else - writel(tail, io_sq->db_addr); + writel(tail, io_sq->db_addr); return 0; } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index b9ce2a6a87ed..29b5774dd32d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -551,14 +551,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) rx_ring->qid, i, num); } - if (likely(i)) { - /* Add memory barrier to make sure the desc were written before - * issue a doorbell - */ - wmb(); - ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true); - mmiowb(); - } + /* ena_com_write_sq_doorbell issues a wmb() */ + if (likely(i)) + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); rx_ring->next_to_use = next_to_use; @@ -2112,12 +2107,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, tx_ring->ring_size); - /* This WMB is aimed to: - * 1 - perform smp barrier before reading next_to_completion - * 2 - make sure the desc were written before trigger DB - */ - wmb(); - /* stop the queue when no more space available, the packet can have up * to sgl_size + 2. one for the meta descriptor and one for header * (if the header is larger than tx_max_header_size). @@ -2136,10 +2125,11 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) * stop the queue but meanwhile clean_tx_irq updates * next_to_completion and terminates. * The queue will remain stopped forever. - * To solve this issue this function perform rmb, check - * the wakeup condition and wake up the queue if needed. + * To solve this issue add a mb() to make sure that + * netif_tx_stop_queue() write is vissible before checking if + * there is additional space in the queue. */ - smp_rmb(); + smp_mb(); if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > ENA_TX_WAKEUP_THRESH) { @@ -2151,8 +2141,10 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (netif_xmit_stopped(txq) || !skb->xmit_more) { - /* trigger the dma engine */ - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false); + /* trigger the dma engine. ena_com_write_sq_doorbell() + * has a mb + */ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); u64_stats_update_begin(&tx_ring->syncp); tx_ring->tx_stats.doorbells++; u64_stats_update_end(&tx_ring->syncp);