From d716d9b702bb759dd6fb50804f10a174bd156d71 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 14 Feb 2018 18:40:12 +0900 Subject: [PATCH 01/90] dmaengine: rcar-dmac: fix max_chunk_size for R-Car Gen3 According to R-Car Gen3 Rev.0.80 manual, the DMATCR can be set to 16,777,215 as maximum. So, this patch fixes the max_chunk_size for safety on all of SoCs. Otherwise, a system may hang if the DMATCR is set to 0 on R-Car Gen3. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index e3ff162c03fc..d0cacdb0713e 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -917,7 +917,7 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl, rcar_dmac_chan_configure_desc(chan, desc); - max_chunk_size = (RCAR_DMATCR_MASK + 1) << desc->xfer_shift; + max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift; /* * Allocate and fill the transfer chunk descriptors. We own the only From da343b6d90e11132f1e917d865d88ee35d6e6d00 Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Sun, 25 Feb 2018 13:39:48 +0200 Subject: [PATCH 02/90] IB/mlx5: Fix incorrect size of klms in the memory region The value of mr->ndescs greater than mr->max_descs is set in the function mlx5_ib_sg_to_klms() if sg_nents is greater than mr->max_descs. This is an invalid value and it causes the following error when registering mr: mlx5_0:dump_cqe:276:(pid 193): dump error cqe 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 0f 00 78 06 25 00 00 8b 08 1e 8f d3 Cc: # 4.5 Fixes: b005d3164713 ("mlx5: Add arbitrary sg list support") Signed-off-by: Sergey Gorenko Tested-by: Laurence Oberman Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 556e015678de..1961c6a45437 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1816,7 +1816,6 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; - mr->ndescs = sg_nents; for_each_sg(sgl, sg, sg_nents, i) { if (unlikely(i >= mr->max_descs)) @@ -1828,6 +1827,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, sg_offset = 0; } + mr->ndescs = i; if (sg_offset_p) *sg_offset_p = sg_offset; From e7b169f34403becd3c9fd3b6e46614ab788f2187 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 25 Feb 2018 13:39:51 +0200 Subject: [PATCH 03/90] IB/mlx5: Avoid passing an invalid QP type to firmware During QP creation, the mlx5 driver translates the QP type to an internal value which is passed on to FW. There was no check to make sure that the translated value is valid, and -EINVAL was coerced into the mailbox command. Current firmware refuses this as an invalid QP type, but future/past firmware may do something else. Fixes: 09a7d9eca1a6c ('{net,IB}/mlx5: QP/XRCD commands via mlx5 ifc') Reviewed-by: Ilya Lesokhin Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 39d24bf694a8..e8d7eaf0670c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1584,6 +1584,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 uidx = MLX5_IB_DEFAULT_UIDX; struct mlx5_ib_create_qp ucmd; struct mlx5_ib_qp_base *base; + int mlx5_st; void *qpc; u32 *in; int err; @@ -1592,6 +1593,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + mlx5_st = to_mlx5_st(init_attr->qp_type); + if (mlx5_st < 0) + return -EINVAL; + if (init_attr->rwq_ind_tbl) { if (!udata) return -ENOSYS; @@ -1753,7 +1758,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); + MLX5_SET(qpc, qpc, st, mlx5_st); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) From aba462134634b502d720e15b23154f21cfa277e5 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Sun, 25 Feb 2018 13:39:53 +0200 Subject: [PATCH 04/90] {net, IB}/mlx5: Raise fatal IB event when sys error occurs All other mlx5_events report the port number as 1 based, which is how FW reports it in the port event EQE. Reporting 0 for this event causes mlx5_ib to not raise a fatal event notification to registered clients due to a seemingly invalid port. All switch cases in mlx5_ib_event that go through the port check are supposed to set the port now, so just do it once at variable declaration. Fixes: 89d44f0a6c73("net/mlx5_core: Add pci error handlers to mlx5_core driver") Reviewed-by: Majd Dibbiny Signed-off-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 11 ++--------- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4236c8086820..bab38c6647d7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3263,7 +3263,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = 0; + u8 port = (u8)work->param; if (mlx5_core_is_mp_slave(work->dev)) { ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); @@ -3283,8 +3283,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work) case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: case MLX5_DEV_EVENT_PORT_INITIALIZED: - port = (u8)work->param; - /* In RoCE, port up/down events are handled in * mlx5_netdev_event(). */ @@ -3298,24 +3296,19 @@ static void mlx5_ib_handle_event(struct work_struct *_work) case MLX5_DEV_EVENT_LID_CHANGE: ibev.event = IB_EVENT_LID_CHANGE; - port = (u8)work->param; break; case MLX5_DEV_EVENT_PKEY_CHANGE: ibev.event = IB_EVENT_PKEY_CHANGE; - port = (u8)work->param; - schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; case MLX5_DEV_EVENT_GUID_CHANGE: ibev.event = IB_EVENT_GID_CHANGE; - port = (u8)work->param; break; case MLX5_DEV_EVENT_CLIENT_REREG: ibev.event = IB_EVENT_CLIENT_REREGISTER; - port = (u8)work->param; break; case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: schedule_work(&ibdev->delay_drop.delay_drop_work); @@ -3327,7 +3320,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work) ibev.device = &ibdev->ib_dev; ibev.element.port_num = port; - if (port < 1 || port > ibdev->num_ports) { + if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 21d29f7936f6..d39b0b7011b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -124,7 +124,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) trigger_cmd_completions(dev); } - mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); + mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); mlx5_core_err(dev, "end\n"); unlock: From 65389322b28f81cc137b60a41044c2d958a7b950 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 25 Feb 2018 13:39:54 +0200 Subject: [PATCH 05/90] IB/mlx: Set slid to zero in Ethernet completion struct IB spec says that a lid should be ignored when link layer is Ethernet, for example when building or parsing a CM request message (CA17-34). However, since ib_lid_be16() and ib_lid_cpu16() validates the slid, not only when link layer is IB, we set the slid to zero to prevent false warnings in the kernel log. Fixes: 62ede7779904 ("Add OPA extended LID support") Reviewed-by: Majd Dibbiny Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/cq.c | 4 +++- drivers/infiniband/hw/mlx5/cq.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 9a566ee3ceff..82adc0d1d30e 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -601,6 +601,7 @@ static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct wc->dlid_path_bits = 0; if (is_eth) { + wc->slid = 0; wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); @@ -851,7 +852,6 @@ repoll: } } - wc->slid = be16_to_cpu(cqe->rlid); g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; @@ -860,6 +860,7 @@ repoll: wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; if (is_eth) { + wc->slid = 0; wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_CVLAN_PRESENT_MASK) { @@ -871,6 +872,7 @@ repoll: memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); } else { + wc->slid = be16_to_cpu(cqe->rlid); wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; wc->vlan_id = 0xffff; } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 5b974fb97611..b5cfdaa9c7c8 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -226,7 +226,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); break; } - wc->slid = be16_to_cpu(cqe->slid); wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; wc->dlid_path_bits = cqe->ml_path; g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; @@ -241,10 +240,12 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } if (ll != IB_LINK_LAYER_ETHERNET) { + wc->slid = be16_to_cpu(cqe->slid); wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; return; } + wc->slid = 0; vlan_present = cqe->l4_l3_hdr_type & 0x1; roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3; if (vlan_present) { From 2fb4f4eadd180a50112618dd9c5fef7fc50d4f08 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 25 Feb 2018 13:39:56 +0200 Subject: [PATCH 06/90] IB/core: Fix missing RDMA cgroups release in case of failure to register device During IB device registration process, if query_device() fails or if ib_core fails to registers sysfs entries, rdma cgroup cleanup is skipped. Cc: # v4.2+ Fixes: 4be3a4fa51f4 ("IB/core: Fix kernel crash during fail to initialize device") Reviewed-by: Daniel Jurgens Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e8010e73a1cf..bb065c9449be 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -536,14 +536,14 @@ int ib_register_device(struct ib_device *device, ret = device->query_device(device, &device->attrs, &uhw); if (ret) { pr_warn("Couldn't query the device attributes\n"); - goto cache_cleanup; + goto cg_cleanup; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { pr_warn("Couldn't register device %s with driver model\n", device->name); - goto cache_cleanup; + goto cg_cleanup; } device->reg_state = IB_DEV_REGISTERED; @@ -559,6 +559,8 @@ int ib_register_device(struct ib_device *device, mutex_unlock(&device_mutex); return 0; +cg_cleanup: + ib_device_unregister_rdmacg(device); cache_cleanup: ib_cache_cleanup_one(device); ib_cache_release_one(device); From a45bc17b360d75fac9ced85e99fda14bf38b4dc3 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Mon, 26 Feb 2018 01:51:37 -0800 Subject: [PATCH 07/90] RDMA/bnxt_re: Unconditionly fence non wire memory operations HW requires an unconditonal fence for all non-wire memory operations through SQ. This guarantees the completions of these memory operations. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 643174d949a8..755f1ccd82bb 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2227,10 +2227,13 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey; + /* Need unconditional fence for local invalidate + * opcode to work as expected. + */ + wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; + if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; - if (wr->send_flags & IB_SEND_FENCE) - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; if (wr->send_flags & IB_SEND_SOLICITED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT; @@ -2251,8 +2254,12 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, wqe->frmr.levels = qplib_frpl->hwq.level + 1; wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR; - if (wr->wr.send_flags & IB_SEND_FENCE) - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; + /* Need unconditional fence for reg_mr + * opcode to function as expected. + */ + + wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; + if (wr->wr.send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; From c354dff00db8df80f271418d8392065e10ffffb6 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Mon, 26 Feb 2018 01:51:38 -0800 Subject: [PATCH 08/90] RDMA/bnxt_re: Fix incorrect DB offset calculation To support host systems with non 4K page size, l2_db_size shall be calculated with 4096 instead of PAGE_SIZE. Also, supply the host page size to FW during initialization. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 6 +++++- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 1 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 3 ++- drivers/infiniband/hw/bnxt_re/roce_hsi.h | 25 +++++++++++++++++++++- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 8329ec6a7946..14d153d4013c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -460,7 +460,11 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, int rc; RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags); - + /* Supply (log-base-2-of-host-page-size - base-page-shift) + * to bono to adjust the doorbell page sizes. + */ + req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT - + RCFW_DBR_BASE_PAGE_SHIFT); /* * VFs need not setup the HW context area, PF * shall setup this area for VF. Skipping the diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 6bee6e3636ea..c7cce2e4185e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -49,6 +49,7 @@ #define RCFW_COMM_SIZE 0x104 #define RCFW_DBR_PCI_BAR_REGION 2 +#define RCFW_DBR_BASE_PAGE_SHIFT 12 #define RCFW_CMD_PREP(req, CMD, cmd_flags) \ do { \ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 03057983341f..ee98e5efef84 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -139,7 +139,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_pkey = le32_to_cpu(sb->max_pkeys); attr->max_inline_data = le32_to_cpu(sb->max_inline_data); - attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE; + attr->l2_db_size = (sb->l2_db_space_size + 1) * + (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); attr->max_sgid = le32_to_cpu(sb->max_gid); bnxt_qplib_query_version(rcfw, attr->fw_ver); diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 2d7ea096a247..3e5a4f760d0e 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1761,7 +1761,30 @@ struct cmdq_initialize_fw { #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M (0x3UL << 4) #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M (0x4UL << 4) #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G (0x5UL << 4) - __le16 reserved16; + /* This value is (log-base-2-of-DBR-page-size - 12). + * 0 for 4KB. HW supported values are enumerated below. + */ + __le16 log2_dbr_pg_size; + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK 0xfUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT 0 + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K 0x0UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K 0x1UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K 0x2UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K 0x3UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K 0x4UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K 0x5UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K 0x6UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K 0x7UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M 0x8UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M 0x9UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M 0xaUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M 0xbUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M 0xcUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M 0xdUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M 0xeUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M 0xfUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST \ + CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M __le64 qpc_page_dir; __le64 mrw_page_dir; __le64 srq_page_dir; From 497158aa5f520db50452ef928c0f955cb42f2e77 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 26 Feb 2018 01:51:39 -0800 Subject: [PATCH 09/90] RDMA/bnxt_re: Fix the ib_reg failure cleanup Release the netdev references in the cleanup path. Invokes the cleanup routines if bnxt_re_ib_reg fails. Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 33a448036c2e..604c805ceaa7 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1416,9 +1416,12 @@ static void bnxt_re_task(struct work_struct *work) switch (re_work->event) { case NETDEV_REGISTER: rc = bnxt_re_ib_reg(rdev); - if (rc) + if (rc) { dev_err(rdev_to_dev(rdev), "Failed to register with IB: %#x", rc); + bnxt_re_remove_one(rdev); + bnxt_re_dev_unreg(rdev); + } break; case NETDEV_UP: bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, From 4cd482c12be473ae507eba232a8374c798233e42 Mon Sep 17 00:00:00 2001 From: Muneendra Kumar M Date: Tue, 27 Feb 2018 21:51:49 -0800 Subject: [PATCH 10/90] IB/core : Add null pointer check in addr_resolve dev_get_by_index is being called in addr_resolve function which returns NULL and NULL pointer access leads to kernel crash. Following call trace is observed while running rdma_lat test application [ 146.173149] BUG: unable to handle kernel NULL pointer dereference at 00000000000004a0 [ 146.173198] IP: addr_resolve+0x9e/0x3e0 [ib_core] [ 146.173221] PGD 0 P4D 0 [ 146.173869] Oops: 0000 [#1] SMP PTI [ 146.182859] CPU: 8 PID: 127 Comm: kworker/8:1 Tainted: G O 4.15.0-rc6+ #18 [ 146.183758] Hardware name: LENOVO System x3650 M5: -[8871AC1]-/01KN179, BIOS-[TCE132H-2.50]- 10/11/2017 [ 146.184691] Workqueue: ib_cm cm_work_handler [ib_cm] [ 146.185632] RIP: 0010:addr_resolve+0x9e/0x3e0 [ib_core] [ 146.186584] RSP: 0018:ffffc9000362faa0 EFLAGS: 00010246 [ 146.187521] RAX: 000000000000001b RBX: ffffc9000362fc08 RCX: 0000000000000006 [ 146.188472] RDX: 0000000000000000 RSI: 0000000000000096 RDI : ffff88087fc16990 [ 146.189427] RBP: ffffc9000362fb18 R08: 00000000ffffff9d R09: 00000000000004ac [ 146.190392] R10: 00000000000001e7 R11: 0000000000000001 R12: ffff88086af2e090 [ 146.191361] R13: 0000000000000000 R14: 0000000000000001 R15: 00000000ffffff9d [ 146.192327] FS: 0000000000000000(0000) GS:ffff88087fc00000(0000) knlGS:0000000000000000 [ 146.193301] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 146.194274] CR2: 00000000000004a0 CR3: 000000000220a002 CR4: 00000000003606e0 [ 146.195258] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 146.196256] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 146.197231] Call Trace: [ 146.198209] ? rdma_addr_register_client+0x30/0x30 [ib_core] [ 146.199199] rdma_resolve_ip+0x1af/0x280 [ib_core] [ 146.200196] rdma_addr_find_l2_eth_by_grh+0x154/0x2b0 [ib_core] The below patch adds the missing NULL pointer check returned by dev_get_by_index before accessing the netdev to avoid kernel crash. We observed the below crash when we try to do the below test. server client --------- --------- |1.1.1.1|<----rxe-channel--->|1.1.1.2| --------- --------- On server: rdma_lat -c -n 2 -s 1024 On client:rdma_lat 1.1.1.1 -c -n 2 -s 1024 Fixes: 200298326b27 ("IB/core: Validate route when we init ah") Signed-off-by: Muneendra Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a5b4cf030c11..9183d148d644 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -550,18 +550,13 @@ static int addr_resolve(struct sockaddr *src_in, dst_release(dst); } - if (ndev->flags & IFF_LOOPBACK) { - ret = rdma_translate_ip(dst_in, addr); - /* - * Put the loopback device and get the translated - * device instead. - */ + if (ndev) { + if (ndev->flags & IFF_LOOPBACK) + ret = rdma_translate_ip(dst_in, addr); + else + addr->bound_dev_if = ndev->ifindex; dev_put(ndev); - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); - } else { - addr->bound_dev_if = ndev->ifindex; } - dev_put(ndev); return ret; } From 651438bb0af5213f1f70d66e75bf11d08cb5537a Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Thu, 15 Feb 2018 14:05:10 -0600 Subject: [PATCH 11/90] nvme-pci: Fix EEH failure on ppc Triggering PPC EEH detection and handling requires a memory mapped read failure. The NVMe driver removed the periodic health check MMIO, so there's no early detection mechanism to trigger the recovery. Instead, the detection now happens when the nvme driver handles an IO timeout event. This takes the pci channel offline, so we do not want the driver to proceed with escalating its own recovery efforts that may conflict with the EEH handler. This patch ensures the driver will observe the channel was set to offline after a failed MMIO read and resets the IO timer so the EEH handler has a chance to recover the device. Signed-off-by: Wen Xiong [updated change log] Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5933a5c732e8..e5ce07f4966f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) if (!(csts & NVME_CSTS_CFS) && !nssro) return false; - /* If PCI error recovery process is happening, we cannot reset or - * the recovery mechanism will surely fail. - */ - if (pci_channel_offline(to_pci_dev(dev->dev))) - return false; - return true; } @@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_command cmd; u32 csts = readl(dev->bar + NVME_REG_CSTS); + /* If PCI error recovery process is happening, we cannot reset or + * the recovery mechanism will surely fail. + */ + mb(); + if (pci_channel_offline(to_pci_dev(dev->dev))) + return BLK_EH_RESET_TIMER; + /* * Reset immediately if the controller is failed */ From 16ccfff2897613007b5eda9e29d65303c6280026 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Feb 2018 20:17:42 +0800 Subject: [PATCH 12/90] nvme: pci: pass max vectors as num_possible_cpus() to pci_alloc_irq_vectors 84676c1f21 ("genirq/affinity: assign vectors to all possible CPUs") has switched to do irq vectors spread among all possible CPUs, so pass num_possible_cpus() as max vecotrs to be assigned. For example, in a 8 cores system, 0~3 online, 4~8 offline/not present, see 'lscpu': [ming@box]$lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 4 On-line CPU(s) list: 0-3 Thread(s) per core: 1 Core(s) per socket: 2 Socket(s): 2 NUMA node(s): 2 ... NUMA node0 CPU(s): 0-3 NUMA node1 CPU(s): ... 1) before this patch, follows the allocated vectors and their affinity: irq 47, cpu list 0,4 irq 48, cpu list 1,6 irq 49, cpu list 2,5 irq 50, cpu list 3,7 2) after this patch, follows the allocated vectors and their affinity: irq 43, cpu list 0 irq 44, cpu list 1 irq 45, cpu list 2 irq 46, cpu list 3 irq 47, cpu list 4 irq 48, cpu list 6 irq 49, cpu list 5 irq 50, cpu list 7 Cc: Keith Busch Cc: Sagi Grimberg Cc: Thomas Gleixner Cc: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e5ce07f4966f..b6f43b738f03 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1914,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; - nr_io_queues = num_present_cpus(); + nr_io_queues = num_possible_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; From 7bd3e7b743956afbec30fb525bc3c5e22e3d475c Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Wed, 28 Feb 2018 00:59:12 -0800 Subject: [PATCH 13/90] watchdog: f71808e_wdt: Fix magic close handling Watchdog close is "expected" when any byte is 'V' not just the last one. Writing "V" to the device fails because the last byte is the end of string. $ echo V > /dev/watchdog f71808e_wdt: Unexpected close, not stopping watchdog! Signed-off-by: Igor Pylypiv Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/f71808e_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index e0678c14480f..3a33c5344bd5 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -566,7 +566,8 @@ static ssize_t watchdog_write(struct file *file, const char __user *buf, char c; if (get_user(c, buf + i)) return -EFAULT; - expect_close = (c == 'V'); + if (c == 'V') + expect_close = true; } /* Properly order writes across fork()ed processes */ From 93ac3deb7c220cbcec032a967220a1f109d58431 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Wed, 28 Feb 2018 02:52:20 -0800 Subject: [PATCH 14/90] watchdog: sbsa: use 32-bit read for WCV According to SBSA spec v3.1 section 5.3: All registers are 32 bits in size and should be accessed using 32-bit reads and writes. If an access size other than 32 bits is used then the results are IMPLEMENTATION DEFINED. [...] The Generic Watchdog is little-endian The current code uses readq to read the watchdog compare register which does a 64-bit access. This fails on ThunderX2 which does not implement 64-bit access to this register. Fix this by using lo_hi_readq() that does two 32-bit reads. Signed-off-by: Jayachandran C Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 316c2eb122d2..e8bd9887c566 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -50,6 +50,7 @@ */ #include +#include #include #include #include @@ -159,7 +160,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd) !(readl(gwdt->control_base + SBSA_GWDT_WCS) & SBSA_GWDT_WCS_WS0)) timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR); - timeleft += readq(gwdt->control_base + SBSA_GWDT_WCV) - + timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) - arch_counter_get_cntvct(); do_div(timeleft, gwdt->clk); From 2b3d89b402b085b08498e896c65267a145bed486 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Sun, 25 Feb 2018 20:22:20 -0700 Subject: [PATCH 15/90] watchdog: hpwdt: Remove legacy NMI sourcing. Gen8 and prior Proliant systems supported the "CRU" interface to firmware. This interfaces allows linux to "call back" into firmware to source the cause of an NMI. This feature isn't fully utilized as the actual source of the NMI isn't printed, the driver only indicates that the source couldn't be determined when the call fails. With the advent of Gen9, iCRU replaces the CRU. The call back feature is no longer available in firmware. To be compatible and not attempt to call back into firmware on system not supporting CRU, the SMBIOS table is consulted to determine if it is safe to make the call back or not. This results in about half of the driver code being devoted to either making CRU calls or determing if it is safe to make CRU calls. As noted, the driver isn't really using the results of the CRU calls. Furthermore, as a consequence of the Spectre security issue, the BIOS/EFI calls are being wrapped into Spectre-disabling section. Removing the call back in hpwdt_pretimeout assists in this effort. As the CRU sourcing of the NMI isn't required for handling the NMI and there are security concerns with making the call back, remove the legacy (pre Gen9) NMI sourcing and the DMI code to determine if the system had the CRU interface. Signed-off-by: Jerry Hoemann Acked-by: Ingo Molnar Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/hpwdt.c | 505 +-------------------------------------- 1 file changed, 11 insertions(+), 494 deletions(-) diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index f1f00dfc0e68..b0a158073abd 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -28,16 +28,7 @@ #include #include #include -#ifdef CONFIG_HPWDT_NMI_DECODING -#include -#include -#include -#include -#include -#include -#endif /* CONFIG_HPWDT_NMI_DECODING */ #include -#include #define HPWDT_VERSION "1.4.0" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) @@ -48,6 +39,9 @@ static unsigned int soft_margin = DEFAULT_MARGIN; /* in seconds */ static unsigned int reload; /* the computed soft_margin */ static bool nowayout = WATCHDOG_NOWAYOUT; +#ifdef CONFIG_HPWDT_NMI_DECODING +static unsigned int allow_kdump = 1; +#endif static char expect_release; static unsigned long hpwdt_is_open; @@ -63,373 +57,6 @@ static const struct pci_device_id hpwdt_devices[] = { }; MODULE_DEVICE_TABLE(pci, hpwdt_devices); -#ifdef CONFIG_HPWDT_NMI_DECODING -#define PCI_BIOS32_SD_VALUE 0x5F32335F /* "_32_" */ -#define CRU_BIOS_SIGNATURE_VALUE 0x55524324 -#define PCI_BIOS32_PARAGRAPH_LEN 16 -#define PCI_ROM_BASE1 0x000F0000 -#define ROM_SIZE 0x10000 - -struct bios32_service_dir { - u32 signature; - u32 entry_point; - u8 revision; - u8 length; - u8 checksum; - u8 reserved[5]; -}; - -/* type 212 */ -struct smbios_cru64_info { - u8 type; - u8 byte_length; - u16 handle; - u32 signature; - u64 physical_address; - u32 double_length; - u32 double_offset; -}; -#define SMBIOS_CRU64_INFORMATION 212 - -/* type 219 */ -struct smbios_proliant_info { - u8 type; - u8 byte_length; - u16 handle; - u32 power_features; - u32 omega_features; - u32 reserved; - u32 misc_features; -}; -#define SMBIOS_ICRU_INFORMATION 219 - - -struct cmn_registers { - union { - struct { - u8 ral; - u8 rah; - u16 rea2; - }; - u32 reax; - } u1; - union { - struct { - u8 rbl; - u8 rbh; - u8 reb2l; - u8 reb2h; - }; - u32 rebx; - } u2; - union { - struct { - u8 rcl; - u8 rch; - u16 rec2; - }; - u32 recx; - } u3; - union { - struct { - u8 rdl; - u8 rdh; - u16 red2; - }; - u32 redx; - } u4; - - u32 resi; - u32 redi; - u16 rds; - u16 res; - u32 reflags; -} __attribute__((packed)); - -static unsigned int hpwdt_nmi_decoding; -static unsigned int allow_kdump = 1; -static unsigned int is_icru; -static unsigned int is_uefi; -static DEFINE_SPINLOCK(rom_lock); -static void *cru_rom_addr; -static struct cmn_registers cmn_regs; - -extern asmlinkage void asminline_call(struct cmn_registers *pi86Regs, - unsigned long *pRomEntry); - -#ifdef CONFIG_X86_32 -/* --32 Bit Bios------------------------------------------------------------ */ - -#define HPWDT_ARCH 32 - -asm(".text \n\t" - ".align 4 \n\t" - ".globl asminline_call \n" - "asminline_call: \n\t" - "pushl %ebp \n\t" - "movl %esp, %ebp \n\t" - "pusha \n\t" - "pushf \n\t" - "push %es \n\t" - "push %ds \n\t" - "pop %es \n\t" - "movl 8(%ebp),%eax \n\t" - "movl 4(%eax),%ebx \n\t" - "movl 8(%eax),%ecx \n\t" - "movl 12(%eax),%edx \n\t" - "movl 16(%eax),%esi \n\t" - "movl 20(%eax),%edi \n\t" - "movl (%eax),%eax \n\t" - "push %cs \n\t" - "call *12(%ebp) \n\t" - "pushf \n\t" - "pushl %eax \n\t" - "movl 8(%ebp),%eax \n\t" - "movl %ebx,4(%eax) \n\t" - "movl %ecx,8(%eax) \n\t" - "movl %edx,12(%eax) \n\t" - "movl %esi,16(%eax) \n\t" - "movl %edi,20(%eax) \n\t" - "movw %ds,24(%eax) \n\t" - "movw %es,26(%eax) \n\t" - "popl %ebx \n\t" - "movl %ebx,(%eax) \n\t" - "popl %ebx \n\t" - "movl %ebx,28(%eax) \n\t" - "pop %es \n\t" - "popf \n\t" - "popa \n\t" - "leave \n\t" - "ret \n\t" - ".previous"); - - -/* - * cru_detect - * - * Routine Description: - * This function uses the 32-bit BIOS Service Directory record to - * search for a $CRU record. - * - * Return Value: - * 0 : SUCCESS - * <0 : FAILURE - */ -static int cru_detect(unsigned long map_entry, - unsigned long map_offset) -{ - void *bios32_map; - unsigned long *bios32_entrypoint; - unsigned long cru_physical_address; - unsigned long cru_length; - unsigned long physical_bios_base = 0; - unsigned long physical_bios_offset = 0; - int retval = -ENODEV; - - bios32_map = ioremap(map_entry, (2 * PAGE_SIZE)); - - if (bios32_map == NULL) - return -ENODEV; - - bios32_entrypoint = bios32_map + map_offset; - - cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE; - - set_memory_x((unsigned long)bios32_map, 2); - asminline_call(&cmn_regs, bios32_entrypoint); - - if (cmn_regs.u1.ral != 0) { - pr_warn("Call succeeded but with an error: 0x%x\n", - cmn_regs.u1.ral); - } else { - physical_bios_base = cmn_regs.u2.rebx; - physical_bios_offset = cmn_regs.u4.redx; - cru_length = cmn_regs.u3.recx; - cru_physical_address = - physical_bios_base + physical_bios_offset; - - /* If the values look OK, then map it in. */ - if ((physical_bios_base + physical_bios_offset)) { - cru_rom_addr = - ioremap(cru_physical_address, cru_length); - if (cru_rom_addr) { - set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK, - (cru_length + PAGE_SIZE - 1) >> PAGE_SHIFT); - retval = 0; - } - } - - pr_debug("CRU Base Address: 0x%lx\n", physical_bios_base); - pr_debug("CRU Offset Address: 0x%lx\n", physical_bios_offset); - pr_debug("CRU Length: 0x%lx\n", cru_length); - pr_debug("CRU Mapped Address: %p\n", &cru_rom_addr); - } - iounmap(bios32_map); - return retval; -} - -/* - * bios_checksum - */ -static int bios_checksum(const char __iomem *ptr, int len) -{ - char sum = 0; - int i; - - /* - * calculate checksum of size bytes. This should add up - * to zero if we have a valid header. - */ - for (i = 0; i < len; i++) - sum += ptr[i]; - - return ((sum == 0) && (len > 0)); -} - -/* - * bios32_present - * - * Routine Description: - * This function finds the 32-bit BIOS Service Directory - * - * Return Value: - * 0 : SUCCESS - * <0 : FAILURE - */ -static int bios32_present(const char __iomem *p) -{ - struct bios32_service_dir *bios_32_ptr; - int length; - unsigned long map_entry, map_offset; - - bios_32_ptr = (struct bios32_service_dir *) p; - - /* - * Search for signature by checking equal to the swizzled value - * instead of calling another routine to perform a strcmp. - */ - if (bios_32_ptr->signature == PCI_BIOS32_SD_VALUE) { - length = bios_32_ptr->length * PCI_BIOS32_PARAGRAPH_LEN; - if (bios_checksum(p, length)) { - /* - * According to the spec, we're looking for the - * first 4KB-aligned address below the entrypoint - * listed in the header. The Service Directory code - * is guaranteed to occupy no more than 2 4KB pages. - */ - map_entry = bios_32_ptr->entry_point & ~(PAGE_SIZE - 1); - map_offset = bios_32_ptr->entry_point - map_entry; - - return cru_detect(map_entry, map_offset); - } - } - return -ENODEV; -} - -static int detect_cru_service(void) -{ - char __iomem *p, *q; - int rc = -1; - - /* - * Search from 0x0f0000 through 0x0fffff, inclusive. - */ - p = ioremap(PCI_ROM_BASE1, ROM_SIZE); - if (p == NULL) - return -ENOMEM; - - for (q = p; q < p + ROM_SIZE; q += 16) { - rc = bios32_present(q); - if (!rc) - break; - } - iounmap(p); - return rc; -} -/* ------------------------------------------------------------------------- */ -#endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_64 -/* --64 Bit Bios------------------------------------------------------------ */ - -#define HPWDT_ARCH 64 - -asm(".text \n\t" - ".align 4 \n\t" - ".globl asminline_call \n\t" - ".type asminline_call, @function \n\t" - "asminline_call: \n\t" - FRAME_BEGIN - "pushq %rax \n\t" - "pushq %rbx \n\t" - "pushq %rdx \n\t" - "pushq %r12 \n\t" - "pushq %r9 \n\t" - "movq %rsi, %r12 \n\t" - "movq %rdi, %r9 \n\t" - "movl 4(%r9),%ebx \n\t" - "movl 8(%r9),%ecx \n\t" - "movl 12(%r9),%edx \n\t" - "movl 16(%r9),%esi \n\t" - "movl 20(%r9),%edi \n\t" - "movl (%r9),%eax \n\t" - "call *%r12 \n\t" - "pushfq \n\t" - "popq %r12 \n\t" - "movl %eax, (%r9) \n\t" - "movl %ebx, 4(%r9) \n\t" - "movl %ecx, 8(%r9) \n\t" - "movl %edx, 12(%r9) \n\t" - "movl %esi, 16(%r9) \n\t" - "movl %edi, 20(%r9) \n\t" - "movq %r12, %rax \n\t" - "movl %eax, 28(%r9) \n\t" - "popq %r9 \n\t" - "popq %r12 \n\t" - "popq %rdx \n\t" - "popq %rbx \n\t" - "popq %rax \n\t" - FRAME_END - "ret \n\t" - ".previous"); - -/* - * dmi_find_cru - * - * Routine Description: - * This function checks whether or not a SMBIOS/DMI record is - * the 64bit CRU info or not - */ -static void dmi_find_cru(const struct dmi_header *dm, void *dummy) -{ - struct smbios_cru64_info *smbios_cru64_ptr; - unsigned long cru_physical_address; - - if (dm->type == SMBIOS_CRU64_INFORMATION) { - smbios_cru64_ptr = (struct smbios_cru64_info *) dm; - if (smbios_cru64_ptr->signature == CRU_BIOS_SIGNATURE_VALUE) { - cru_physical_address = - smbios_cru64_ptr->physical_address + - smbios_cru64_ptr->double_offset; - cru_rom_addr = ioremap(cru_physical_address, - smbios_cru64_ptr->double_length); - set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK, - smbios_cru64_ptr->double_length >> PAGE_SHIFT); - } - } -} - -static int detect_cru_service(void) -{ - cru_rom_addr = NULL; - - dmi_walk(dmi_find_cru, NULL); - - /* if cru_rom_addr has been set then we found a CRU service */ - return ((cru_rom_addr != NULL) ? 0 : -ENODEV); -} -/* ------------------------------------------------------------------------- */ -#endif /* CONFIG_X86_64 */ -#endif /* CONFIG_HPWDT_NMI_DECODING */ /* * Watchdog operations @@ -486,30 +113,12 @@ static int hpwdt_my_nmi(void) */ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs) { - unsigned long rom_pl; - static int die_nmi_called; - - if (!hpwdt_nmi_decoding) - return NMI_DONE; - if ((ulReason == NMI_UNKNOWN) && !hpwdt_my_nmi()) return NMI_DONE; - spin_lock_irqsave(&rom_lock, rom_pl); - if (!die_nmi_called && !is_icru && !is_uefi) - asminline_call(&cmn_regs, cru_rom_addr); - die_nmi_called = 1; - spin_unlock_irqrestore(&rom_lock, rom_pl); - if (allow_kdump) hpwdt_stop(); - if (!is_icru && !is_uefi) { - if (cmn_regs.u1.ral == 0) { - nmi_panic(regs, "An NMI occurred, but unable to determine source.\n"); - return NMI_HANDLED; - } - } nmi_panic(regs, "An NMI occurred. Depending on your system the reason " "for the NMI is logged in any one of the following " "resources:\n" @@ -675,84 +284,11 @@ static struct miscdevice hpwdt_miscdev = { * Init & Exit */ -#ifdef CONFIG_HPWDT_NMI_DECODING -#ifdef CONFIG_X86_LOCAL_APIC -static void hpwdt_check_nmi_decoding(struct pci_dev *dev) -{ - /* - * If nmi_watchdog is turned off then we can turn on - * our nmi decoding capability. - */ - hpwdt_nmi_decoding = 1; -} -#else -static void hpwdt_check_nmi_decoding(struct pci_dev *dev) -{ - dev_warn(&dev->dev, "NMI decoding is disabled. " - "Your kernel does not support a NMI Watchdog.\n"); -} -#endif /* CONFIG_X86_LOCAL_APIC */ - -/* - * dmi_find_icru - * - * Routine Description: - * This function checks whether or not we are on an iCRU-based server. - * This check is independent of architecture and needs to be made for - * any ProLiant system. - */ -static void dmi_find_icru(const struct dmi_header *dm, void *dummy) -{ - struct smbios_proliant_info *smbios_proliant_ptr; - - if (dm->type == SMBIOS_ICRU_INFORMATION) { - smbios_proliant_ptr = (struct smbios_proliant_info *) dm; - if (smbios_proliant_ptr->misc_features & 0x01) - is_icru = 1; - if (smbios_proliant_ptr->misc_features & 0x1400) - is_uefi = 1; - } -} static int hpwdt_init_nmi_decoding(struct pci_dev *dev) { +#ifdef CONFIG_HPWDT_NMI_DECODING int retval; - - /* - * On typical CRU-based systems we need to map that service in - * the BIOS. For 32 bit Operating Systems we need to go through - * the 32 Bit BIOS Service Directory. For 64 bit Operating - * Systems we get that service through SMBIOS. - * - * On systems that support the new iCRU service all we need to - * do is call dmi_walk to get the supported flag value and skip - * the old cru detect code. - */ - dmi_walk(dmi_find_icru, NULL); - if (!is_icru && !is_uefi) { - - /* - * We need to map the ROM to get the CRU service. - * For 32 bit Operating Systems we need to go through the 32 Bit - * BIOS Service Directory - * For 64 bit Operating Systems we get that service through SMBIOS. - */ - retval = detect_cru_service(); - if (retval < 0) { - dev_warn(&dev->dev, - "Unable to detect the %d Bit CRU Service.\n", - HPWDT_ARCH); - return retval; - } - - /* - * We know this is the only CRU call we need to make so lets keep as - * few instructions as possible once the NMI comes in. - */ - cmn_regs.u1.rah = 0x0D; - cmn_regs.u1.ral = 0x02; - } - /* * Only one function can register for NMI_UNKNOWN */ @@ -780,44 +316,25 @@ error: dev_warn(&dev->dev, "Unable to register a die notifier (err=%d).\n", retval); - if (cru_rom_addr) - iounmap(cru_rom_addr); return retval; -} - -static void hpwdt_exit_nmi_decoding(void) -{ - unregister_nmi_handler(NMI_UNKNOWN, "hpwdt"); - unregister_nmi_handler(NMI_SERR, "hpwdt"); - unregister_nmi_handler(NMI_IO_CHECK, "hpwdt"); - if (cru_rom_addr) - iounmap(cru_rom_addr); -} -#else /* !CONFIG_HPWDT_NMI_DECODING */ -static void hpwdt_check_nmi_decoding(struct pci_dev *dev) -{ -} - -static int hpwdt_init_nmi_decoding(struct pci_dev *dev) -{ +#endif /* CONFIG_HPWDT_NMI_DECODING */ return 0; } static void hpwdt_exit_nmi_decoding(void) { +#ifdef CONFIG_HPWDT_NMI_DECODING + unregister_nmi_handler(NMI_UNKNOWN, "hpwdt"); + unregister_nmi_handler(NMI_SERR, "hpwdt"); + unregister_nmi_handler(NMI_IO_CHECK, "hpwdt"); +#endif } -#endif /* CONFIG_HPWDT_NMI_DECODING */ static int hpwdt_init_one(struct pci_dev *dev, const struct pci_device_id *ent) { int retval; - /* - * Check if we can do NMI decoding or not - */ - hpwdt_check_nmi_decoding(dev); - /* * First let's find out if we are on an iLO2+ server. We will * not run on a legacy ASM box. @@ -922,6 +439,6 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" #ifdef CONFIG_HPWDT_NMI_DECODING module_param(allow_kdump, int, 0); MODULE_PARM_DESC(allow_kdump, "Start a kernel dump after NMI occurs"); -#endif /* !CONFIG_HPWDT_NMI_DECODING */ +#endif /* CONFIG_HPWDT_NMI_DECODING */ module_pci_driver(hpwdt_driver); From 317660940fd9dddd3201c2f92e25c27902c753fa Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 2 Mar 2018 07:22:30 -0800 Subject: [PATCH 16/90] perf/x86/intel/uncore: Fix Skylake UPI event format There is no event extension (bit 21) for SKX UPI, so use 'event' instead of 'event_ext'. Reported-by: Stephane Eranian Signed-off-by: Kan Liang Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support") Link: http://lkml.kernel.org/r/1520004150-4855-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6d8044ab1060..22ec65bc033a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3606,7 +3606,7 @@ static struct intel_uncore_type skx_uncore_imc = { }; static struct attribute *skx_upi_uncore_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_umask_ext.attr, &format_attr_edge.attr, &format_attr_inv.attr, From 9ac79ba9c77d8595157bbdc4327919f8ee062426 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Feb 2018 14:55:13 +0100 Subject: [PATCH 17/90] gpio: rcar: Use wakeup_path i.s.o. explicit clock handling Since commit ab82fa7da4dce5c7 ("gpio: rcar: Prevent module clock disable when wake-up is enabled"), when a GPIO is used for wakeup, the GPIO block's module clock (if exists) is manually kept running during system suspend, to make sure the device stays active. However, this explicit clock handling is merely a workaround for a failure to properly communicate wakeup information to the device core. Instead, set the device's power.wakeup_path field, to indicate this device is part of the wakeup path. Depending on the PM Domain's active_wakeup configuration, the genpd core code will keep the device enabled (and the clock running) during system suspend when needed. This allows for the removal of all explicit clock handling code from the driver. Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- drivers/gpio/gpio-rcar.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index e76de57dd617..ebaea8b1594b 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -14,7 +14,6 @@ * GNU General Public License for more details. */ -#include #include #include #include @@ -37,10 +36,9 @@ struct gpio_rcar_priv { struct platform_device *pdev; struct gpio_chip gpio_chip; struct irq_chip irq_chip; - struct clk *clk; unsigned int irq_parent; + atomic_t wakeup_path; bool has_both_edge_trigger; - bool needs_clk; }; #define IOINTSEL 0x00 /* General IO/Interrupt Switching Register */ @@ -186,13 +184,10 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on) } } - if (!p->clk) - return 0; - if (on) - clk_enable(p->clk); + atomic_inc(&p->wakeup_path); else - clk_disable(p->clk); + atomic_dec(&p->wakeup_path); return 0; } @@ -330,17 +325,14 @@ static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset, struct gpio_rcar_info { bool has_both_edge_trigger; - bool needs_clk; }; static const struct gpio_rcar_info gpio_rcar_info_gen1 = { .has_both_edge_trigger = false, - .needs_clk = false, }; static const struct gpio_rcar_info gpio_rcar_info_gen2 = { .has_both_edge_trigger = true, - .needs_clk = true, }; static const struct of_device_id gpio_rcar_of_table[] = { @@ -403,7 +395,6 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins) ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args); *npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK; p->has_both_edge_trigger = info->has_both_edge_trigger; - p->needs_clk = info->needs_clk; if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) { dev_warn(&p->pdev->dev, @@ -440,16 +431,6 @@ static int gpio_rcar_probe(struct platform_device *pdev) platform_set_drvdata(pdev, p); - p->clk = devm_clk_get(dev, NULL); - if (IS_ERR(p->clk)) { - if (p->needs_clk) { - dev_err(dev, "unable to get clock\n"); - ret = PTR_ERR(p->clk); - goto err0; - } - p->clk = NULL; - } - pm_runtime_enable(dev); irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); @@ -531,11 +512,24 @@ static int gpio_rcar_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused gpio_rcar_suspend(struct device *dev) +{ + struct gpio_rcar_priv *p = dev_get_drvdata(dev); + + if (atomic_read(&p->wakeup_path)) + device_set_wakeup_path(dev); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(gpio_rcar_pm_ops, gpio_rcar_suspend, NULL); + static struct platform_driver gpio_rcar_device_driver = { .probe = gpio_rcar_probe, .remove = gpio_rcar_remove, .driver = { .name = "gpio_rcar", + .pm = &gpio_rcar_pm_ops, .of_match_table = of_match_ptr(gpio_rcar_of_table), } }; From 14a596a7e6fd9c5baa6b2cfc57962e2c3bda6c69 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 28 Feb 2018 20:17:35 +0100 Subject: [PATCH 18/90] fixdep: remove stale references to uml-config.h uml-config.h hasn't existed in this decade (87e299e5c750 - x86, um: get rid of uml-config.h). The few remaining UML_CONFIG instances are defined directly in terms of their real CONFIG symbol in common-offsets.h, so unlike when the symbols got defined via a sed script, anything that uses UML_CONFIG_FOO now should also automatically pick up a dependency on CONFIG_FOO via the normal fixdep mechanism (since common-offsets.h should at least recursively be a dependency). Hence I believe we should actually be able to ignore the HELLO_CONFIG_BOOM cases. Cc: Al Viro Cc: Richard Weinberger Cc: user-mode-linux-devel@lists.sourceforge.net Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index fa3d39b6f23b..d7fbe545dd5d 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -93,14 +93,6 @@ * (Note: it'd be easy to port over the complete mkdep state machine, * but I don't think the added complexity is worth it) */ -/* - * Note 2: if somebody writes HELLO_CONFIG_BOOM in a file, it will depend onto - * CONFIG_BOOM. This could seem a bug (not too hard to fix), but please do not - * fix it! Some UserModeLinux files (look at arch/um/) call CONFIG_BOOM as - * UML_CONFIG_BOOM, to avoid conflicts with /usr/include/linux/autoconf.h, - * through arch/um/include/uml-config.h; this fixdep "bug" makes sure that - * those files will have correct dependencies. - */ #include #include @@ -286,7 +278,6 @@ static int is_ignored_file(const char *s, int len) { return str_ends_with(s, len, "include/generated/autoconf.h") || str_ends_with(s, len, "include/generated/autoksyms.h") || - str_ends_with(s, len, "arch/um/include/uml-config.h") || str_ends_with(s, len, "include/linux/kconfig.h") || str_ends_with(s, len, ".ver"); } From 5b8ad96d1a4421ffe417e647a65064aad1e84fb4 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 28 Feb 2018 20:17:36 +0100 Subject: [PATCH 19/90] fixdep: remove some false CONFIG_ matches The string CONFIG_ quite often appears after other alphanumerics, meaning that that instance cannot be referencing a Kconfig symbol. Omitting these means make has fewer files to stat() when deciding what needs to be rebuilt - for a defconfig build, this seems to remove about 2% of the (wildcard ...) lines from the .o.cmd files. Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index d7fbe545dd5d..1b21870d6e7f 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -225,8 +225,13 @@ static int str_ends_with(const char *s, int slen, const char *sub) static void parse_config_file(const char *p) { const char *q, *r; + const char *start = p; while ((p = strstr(p, "CONFIG_"))) { + if (p > start && (isalnum(p[-1]) || p[-1] == '_')) { + p += 7; + continue; + } p += 7; q = p; while (*q && (isalnum(*q) || *q == '_')) From 638e69cf2230737655fcb5ee9879c2fab7679187 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 28 Feb 2018 20:17:37 +0100 Subject: [PATCH 20/90] fixdep: do not ignore kconfig.h kconfig.h was excluded from consideration by fixdep by 6a5be57f0f00 (fixdep: fix extraneous dependencies) to avoid some false positive hits (1) include/config/.h (2) include/config/h.h (3) include/config/foo.h (1) occurred because kconfig.h contains the string CONFIG_ in a comment. However, since dee81e988674 (fixdep: faster CONFIG_ search), we have a check that the part after CONFIG_ is non-empty, so this does not happen anymore (and CONFIG_ appears by itself elsewhere, so that check is worthwhile). (2) comes from the include guard, __LINUX_KCONFIG_H. But with the previous patch, we no longer match that either. That leaves (3), which amounts to one [1] false dependency (aka stat() call done by make), which I think we can live with: We've already had one case [2] where the lack of include/linux/kconfig.h in the .o.cmd file caused a missing rebuild, and while I originally thought we should just put kconfig.h in the dependency list without parsing it for the CONFIG_ pattern, we actually do have some real CONFIG_ symbols mentioned in it, and one can imagine some translation unit that just does '#ifdef __BIG_ENDIAN' but doesn't through some other header actually depend on CONFIG_CPU_BIG_ENDIAN - so changing the target endianness could end up rebuilding the world, minus that small TU. Quoting Linus, ... when missing dependencies cause a missed re-compile, the resulting bugs can be _really_ subtle. [1] well, two, we now also have CONFIG_BOOGER/booger.h - we could change that to FOO if we care [2] https://lkml.org/lkml/2018/2/22/838 Cc: Linus Torvalds Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 1b21870d6e7f..449b68c4c90c 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -283,7 +283,6 @@ static int is_ignored_file(const char *s, int len) { return str_ends_with(s, len, "include/generated/autoconf.h") || str_ends_with(s, len, "include/generated/autoksyms.h") || - str_ends_with(s, len, "include/linux/kconfig.h") || str_ends_with(s, len, ".ver"); } From f6d3f35e006496c282ccbb67494d90b04f6cba10 Mon Sep 17 00:00:00 2001 From: Sangwon Hong Date: Mon, 12 Feb 2018 04:37:44 +0900 Subject: [PATCH 21/90] perf kallsyms: Fix the usage on the man page First, all man pages highlight only perf and subcommands except 'perf kallsyms', which includes the full usage. Fix it for commands to monopolize underlines. Second, options can be ommited when executing 'perf kallsyms', so add square brackets between