From 80a6ec7d5e1653208eb53f6738620dab98f6f50e Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Thu, 6 Sep 2018 21:25:37 +0800
Subject: [PATCH 01/95] clk: sunxi-ng: sun4i: Set VCO and PLL bias current to
 lowest setting

The default mid-level PLL bias current setting interferes with sigma
delta modulation. This manifests as decreased audio quality at lower
sampling rates, which sounds like radio broadcast quality, and
distortion noises at sampling rates at 48 kHz or above.

Changing the bias current settings to the lowest gets rid of the
noise.

Fixes: de3448519194 ("clk: sunxi-ng: sun4i: Use sigma-delta modulation
		      for audio PLL")
Cc: <stable@vger.kernel.org> # 4.15.x
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 drivers/clk/sunxi-ng/ccu-sun4i-a10.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
index ffa5dac221e4..129ebd2588fd 100644
--- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
+++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
@@ -1434,8 +1434,16 @@ static void __init sun4i_ccu_init(struct device_node *node,
 		return;
 	}
 
-	/* Force the PLL-Audio-1x divider to 1 */
 	val = readl(reg + SUN4I_PLL_AUDIO_REG);
+
+	/*
+	 * Force VCO and PLL bias current to lowest setting. Higher
+	 * settings interfere with sigma-delta modulation and result
+	 * in audible noise and distortions when using SPDIF or I2S.
+	 */
+	val &= ~GENMASK(25, 16);
+
+	/* Force the PLL-Audio-1x divider to 1 */
 	val &= ~GENMASK(29, 26);
 	writel(val | (1 << 26), reg + SUN4I_PLL_AUDIO_REG);
 

From 4da402597c2b75c4b636ab2416baf921b363b325 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Sun, 30 Sep 2018 15:06:06 +0800
Subject: [PATCH 02/95] xfrm: fix gro_cells leak when remove virtual xfrm
 interfaces

The device gro_cells has been initialized, it should be freed,
otherwise it will be leaked

Fixes: f203b76d78092faf2 ("xfrm: Add virtual xfrm interfaces")
Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_interface.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 31acc6f33d98..6f05e831a73e 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -116,6 +116,9 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 
 static void xfrmi_dev_free(struct net_device *dev)
 {
+	struct xfrm_if *xi = netdev_priv(dev);
+
+	gro_cells_destroy(&xi->gro_cells);
 	free_percpu(dev->tstats);
 }
 

From 0711a43b6d84ff9189adfbf83c8bbf56eef794bf Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 2 Oct 2018 23:29:11 +0800
Subject: [PATCH 03/95] drm/edid: Add 6 bpc quirk for BOE panel in HP Pavilion
 15-n233sl

There's another panel that reports "DFP 1.x compliant TMDS" but it
supports 6bpc instead of 8 bpc.

Apply 6 bpc quirk for the panel to fix it.

BugLink: https://bugs.launchpad.net/bugs/1794387
Cc: <stable@vger.kernel.org> # v4.8+
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20181002152911.4370-1-kai.heng.feng@canonical.com
---
 drivers/gpu/drm/drm_edid.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 3c9fc99648b7..1e2b9407c8d0 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -113,6 +113,9 @@ static const struct edid_quirk {
 	/* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
 	{ "AEO", 0, EDID_QUIRK_FORCE_6BPC },
 
+	/* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */
+	{ "BOE", 0x78b, EDID_QUIRK_FORCE_6BPC },
+
 	/* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */
 	{ "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC },
 

From 5f78aec0d7e9f239d64b2cd1c9e671345ff92f94 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 4 Oct 2018 07:21:48 +0200
Subject: [PATCH 04/95] MAINTAINERS: Remove net/core/flow.c

net/core/flow.c does not exist anymore, so remove it
from the IPSEC NETWORKING section of the MAINTAINERS
file.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index dcb0191c4f54..4ff21dac9b45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10130,7 +10130,6 @@ L:	netdev@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
 S:	Maintained
-F:	net/core/flow.c
 F:	net/xfrm/
 F:	net/key/
 F:	net/ipv4/xfrm*

From db05c481977599236f12a85e55de9f5ab37b0a2c Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Wed, 3 Oct 2018 19:45:38 +0300
Subject: [PATCH 05/95] drm: fb-helper: Reject all pixel format changing
 requests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm fbdev emulation doesn't support changing the pixel format at all,
so reject all pixel format changing requests.

Cc: stable@vger.kernel.org
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20181003164538.5534-1-Eugeniy.Paltsev@synopsys.com
---
 drivers/gpu/drm/drm_fb_helper.c | 91 ++++++++++-----------------------
 1 file changed, 26 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 515a7aec57ac..9628dd617826 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1580,6 +1580,25 @@ unlock:
 }
 EXPORT_SYMBOL(drm_fb_helper_ioctl);
 
+static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1,
+				      const struct fb_var_screeninfo *var_2)
+{
+	return var_1->bits_per_pixel == var_2->bits_per_pixel &&
+	       var_1->grayscale == var_2->grayscale &&
+	       var_1->red.offset == var_2->red.offset &&
+	       var_1->red.length == var_2->red.length &&
+	       var_1->red.msb_right == var_2->red.msb_right &&
+	       var_1->green.offset == var_2->green.offset &&
+	       var_1->green.length == var_2->green.length &&
+	       var_1->green.msb_right == var_2->green.msb_right &&
+	       var_1->blue.offset == var_2->blue.offset &&
+	       var_1->blue.length == var_2->blue.length &&
+	       var_1->blue.msb_right == var_2->blue.msb_right &&
+	       var_1->transp.offset == var_2->transp.offset &&
+	       var_1->transp.length == var_2->transp.length &&
+	       var_1->transp.msb_right == var_2->transp.msb_right;
+}
+
 /**
  * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var
  * @var: screeninfo to check
@@ -1590,7 +1609,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 {
 	struct drm_fb_helper *fb_helper = info->par;
 	struct drm_framebuffer *fb = fb_helper->fb;
-	int depth;
 
 	if (var->pixclock != 0 || in_dbg_master())
 		return -EINVAL;
@@ -1610,72 +1628,15 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 		return -EINVAL;
 	}
 
-	switch (var->bits_per_pixel) {
-	case 16:
-		depth = (var->green.length == 6) ? 16 : 15;
-		break;
-	case 32:
-		depth = (var->transp.length > 0) ? 32 : 24;
-		break;
-	default:
-		depth = var->bits_per_pixel;
-		break;
-	}
-
-	switch (depth) {
-	case 8:
-		var->red.offset = 0;
-		var->green.offset = 0;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 15:
-		var->red.offset = 10;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 5;
-		var->blue.length = 5;
-		var->transp.length = 1;
-		var->transp.offset = 15;
-		break;
-	case 16:
-		var->red.offset = 11;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 6;
-		var->blue.length = 5;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 24:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 24;
-		break;
-	default:
+	/*
+	 * drm fbdev emulation doesn't support changing the pixel format at all,
+	 * so reject all pixel format changing requests.
+	 */
+	if (!drm_fb_pixel_format_equal(var, &info->var)) {
+		DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n");
 		return -EINVAL;
 	}
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_fb_helper_check_var);

From 064253c1c0625efd0362a0b7ecdbe8bee2a2904d Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Mon, 17 Sep 2018 14:00:54 +0300
Subject: [PATCH 06/95] drm: fix use of freed memory in drm_mode_setcrtc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm_mode_setcrtc() retries modesetting in case one of the functions it
calls returns -EDEADLK. connector_set, mode and fb are freed before
retrying, but they are not set to NULL. This can cause
drm_mode_setcrtc() to use those variables.

For example: On the first try __drm_mode_set_config_internal() returns
-EDEADLK. connector_set, mode and fb are freed. Next retry starts, and
drm_modeset_lock_all_ctx() returns -EDEADLK, and we jump to 'out'. The
code will happily try to release all three again.

This leads to crashes of different kinds, depending on the sequence the
EDEADLKs happen.

Fix this by setting the three variables to NULL at the start of the
retry loop.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20180917110054.4053-1-tomi.valkeinen@ti.com
---
 drivers/gpu/drm/drm_crtc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index bae43938c8f6..9cbe8f5c9aca 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -567,9 +567,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	struct drm_mode_crtc *crtc_req = data;
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
-	struct drm_connector **connector_set = NULL, *connector;
-	struct drm_framebuffer *fb = NULL;
-	struct drm_display_mode *mode = NULL;
+	struct drm_connector **connector_set, *connector;
+	struct drm_framebuffer *fb;
+	struct drm_display_mode *mode;
 	struct drm_mode_set set;
 	uint32_t __user *set_connectors_ptr;
 	struct drm_modeset_acquire_ctx ctx;
@@ -598,6 +598,10 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	mutex_lock(&crtc->dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
 retry:
+	connector_set = NULL;
+	fb = NULL;
+	mode = NULL;
+
 	ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx);
 	if (ret)
 		goto out;

From 262f9d811c7608f1e74258ceecfe1fa213bdf912 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 5 Oct 2018 19:38:46 -0700
Subject: [PATCH 07/95] bpf: do not blindly change rlimit in reuseport net
 selftest

If the current process has unlimited RLIMIT_MEMLOCK,
we should should leave it as is.

Fixes: 941ff6f11c02 ("bpf: fix rlimit in reuseport net selftest")
Signed-off-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/net/reuseport_bpf.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index cad14cd0ea92..b5277106df1f 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -437,14 +437,19 @@ void enable_fastopen(void)
 	}
 }
 
-static struct rlimit rlim_old, rlim_new;
+static struct rlimit rlim_old;
 
 static  __attribute__((constructor)) void main_ctor(void)
 {
 	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
-	rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
-	rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
-	setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+
+	if (rlim_old.rlim_cur != RLIM_INFINITY) {
+		struct rlimit rlim_new;
+
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
 }
 
 static __attribute__((destructor)) void main_dtor(void)

From ff5d1a42096cd96e70546c97d92f01d10cbddc2d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 8 Oct 2018 08:46:51 -0700
Subject: [PATCH 08/95] sunvdc: Remove VLA usage

In the quest to remove all stack VLA usage from the kernel[1], this moves
the math for cookies calculation into macros and allocates a fixed size
array for the maximum number of cookies and adds a runtime sanity check.
(Note that the size was always fixed, but just hidden from the compiler.)

[1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com

Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/sunvdc.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5ca56bfae63c..f68e9baffad7 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -36,6 +36,10 @@ MODULE_VERSION(DRV_MODULE_VERSION);
 #define VDC_TX_RING_SIZE	512
 #define VDC_DEFAULT_BLK_SIZE	512
 
+#define MAX_XFER_BLKS		(128 * 1024)
+#define MAX_XFER_SIZE		(MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE)
+#define MAX_RING_COOKIES	((MAX_XFER_BLKS / PAGE_SIZE) + 2)
+
 #define WAITING_FOR_LINK_UP	0x01
 #define WAITING_FOR_TX_SPACE	0x02
 #define WAITING_FOR_GEN_CMD	0x04
@@ -450,7 +454,7 @@ static int __send_request(struct request *req)
 {
 	struct vdc_port *port = req->rq_disk->private_data;
 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
-	struct scatterlist sg[port->ring_cookies];
+	struct scatterlist sg[MAX_RING_COOKIES];
 	struct vdc_req_entry *rqe;
 	struct vio_disk_desc *desc;
 	unsigned int map_perm;
@@ -458,6 +462,9 @@ static int __send_request(struct request *req)
 	u64 len;
 	u8 op;
 
+	if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES))
+		return -EINVAL;
+
 	map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
 
 	if (rq_data_dir(req) == READ) {
@@ -984,9 +991,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 		goto err_out_free_port;
 
 	port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
-	port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
-	port->ring_cookies = ((port->max_xfer_size *
-			       port->vdisk_block_size) / PAGE_SIZE) + 2;
+	port->max_xfer_size = MAX_XFER_SIZE;
+	port->ring_cookies = MAX_RING_COOKIES;
 
 	err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
 	if (err)

From d7782145e1ad537df4ce74e58c50f1f732a1462d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 6 Oct 2018 10:56:11 -0700
Subject: [PATCH 09/95] filesystem-dax: Fix dax_layout_busy_page() livelock

In the presence of multi-order entries the typical
pagevec_lookup_entries() pattern may loop forever:

	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
				min(end - index, (pgoff_t)PAGEVEC_SIZE),
				indices)) {
		...
		for (i = 0; i < pagevec_count(&pvec); i++) {
			index = indices[i];
			...
		}
		index++; /* BUG */
	}

The loop updates 'index' for each index found and then increments to the
next possible page to continue the lookup. However, if the last entry in
the pagevec is multi-order then the next possible page index is more
than 1 page away. Fix this locally for the filesystem-dax case by
checking for dax-multi-order entries. Going forward new users of
multi-order entries need to be similarly careful, or we need a generic
way to report the page increment in the radix iterator.

Fixes: 5fac7408d828 ("mm, fs, dax: handle layout changes to pinned dax...")
Cc: <stable@vger.kernel.org>
Cc: Ross Zwisler <zwisler@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 4becbf168b7f..0fb270f0a0ef 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -666,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
 				min(end - index, (pgoff_t)PAGEVEC_SIZE),
 				indices)) {
+		pgoff_t nr_pages = 1;
+
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *pvec_ent = pvec.pages[i];
 			void *entry;
@@ -680,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 
 			xa_lock_irq(&mapping->i_pages);
 			entry = get_unlocked_mapping_entry(mapping, index, NULL);
-			if (entry)
+			if (entry) {
 				page = dax_busy_page(entry);
+				/*
+				 * Account for multi-order entries at
+				 * the end of the pagevec.
+				 */
+				if (i + 1 >= pagevec_count(&pvec))
+					nr_pages = 1UL << dax_radix_order(entry);
+			}
 			put_unlocked_mapping_entry(mapping, index, entry);
 			xa_unlock_irq(&mapping->i_pages);
 			if (page)
@@ -696,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 		 */
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		index++;
+		index += nr_pages;
 
 		if (page)
 			break;

From ec876f4b252c4084acad259ce3e65ad97f44f040 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 4 Oct 2018 09:35:35 +0100
Subject: [PATCH 10/95] ARM: KVM: Correctly order SGI register entries in the
 cp15 array

The ICC_ASGI1R and ICC_SGI0R register entries in the cp15 array
are not correctly ordered, leading to a BUG() at boot time.

Move them to their natural location.

Fixes: 3e8a8a50c7ef ("KVM: arm: vgic-v3: Add support for ICC_SGI0R and ICC_ASGI1R accesses")
Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/coproc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 450c7a4fbc8a..cb094e55dc5f 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* ICC_SGI1R */
 	{ CRm64(12), Op1( 0), is64, access_gic_sgi},
-	/* ICC_ASGI1R */
-	{ CRm64(12), Op1( 1), is64, access_gic_sgi},
-	/* ICC_SGI0R */
-	{ CRm64(12), Op1( 2), is64, access_gic_sgi},
 
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c12_VBAR, 0x00000000 },
 
+	/* ICC_ASGI1R */
+	{ CRm64(12), Op1( 1), is64, access_gic_sgi},
+	/* ICC_SGI0R */
+	{ CRm64(12), Op1( 2), is64, access_gic_sgi},
 	/* ICC_SRE */
 	{ CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre },
 

From 853c110982eaff0d99dace3f66f1ba58b5bfd9d5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 9 Oct 2018 18:35:29 +0200
Subject: [PATCH 11/95] KVM: x86: support CONFIG_KVM_AMD=y with
 CONFIG_CRYPTO_DEV_CCP_DD=m

SEV requires access to the AMD cryptographic device APIs, and this
does not work when KVM is builtin and the crypto driver is a module.
Actually the Kconfig conditions for CONFIG_KVM_AMD_SEV try to disable
SEV in that case, but it does not work because the actual crypto
calls are not culled, only sev_hardware_setup() is.

This patch adds two CONFIG_KVM_AMD_SEV checks that gate all the remaining
SEV code; it fixes this particular configuration, and drops 5 KiB of
code when CONFIG_KVM_AMD_SEV=n.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d96092b35936..61ccfb13899e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
 
 static inline bool svm_sev_enabled(void)
 {
-	return max_sev_asid;
+	return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
 }
 
 static inline bool sev_guest(struct kvm *kvm)
 {
+#ifdef CONFIG_KVM_AMD_SEV
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
 	return sev->active;
+#else
+	return false;
+#endif
 }
 
 static inline int sev_get_asid(struct kvm *kvm)

From 4628a64591e6cee181237060961e98c615c33966 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 9 Oct 2018 12:19:17 +0200
Subject: [PATCH 12/95] mm: Preserve _PAGE_DEVMAP across mprotect() calls

Currently _PAGE_DEVMAP bit is not preserved in mprotect(2) calls. As a
result we will see warnings such as:

BUG: Bad page map in process JobWrk0013  pte:800001803875ea25 pmd:7624381067
addr:00007f0930720000 vm_flags:280000f9 anon_vma:          (null) mapping:ffff97f2384056f0 index:0
file:457-000000fe00000030-00000009-000000ca-00000001_2001.fileblock fault:xfs_filemap_fault [xfs] mmap:xfs_file_mmap [xfs] readpage:          (null)
CPU: 3 PID: 15848 Comm: JobWrk0013 Tainted: G        W          4.12.14-2.g7573215-default #1 SLE12-SP4 (unreleased)
Hardware name: Intel Corporation S2600WFD/S2600WFD, BIOS SE5C620.86B.01.00.0833.051120182255 05/11/2018
Call Trace:
 dump_stack+0x5a/0x75
 print_bad_pte+0x217/0x2c0
 ? enqueue_task_fair+0x76/0x9f0
 _vm_normal_page+0xe5/0x100
 zap_pte_range+0x148/0x740
 unmap_page_range+0x39a/0x4b0
 unmap_vmas+0x42/0x90
 unmap_region+0x99/0xf0
 ? vma_gap_callbacks_rotate+0x1a/0x20
 do_munmap+0x255/0x3a0
 vm_munmap+0x54/0x80
 SyS_munmap+0x1d/0x30
 do_syscall_64+0x74/0x150
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2
...

when mprotect(2) gets used on DAX mappings. Also there is a wide variety
of other failures that can result from the missing _PAGE_DEVMAP flag
when the area gets used by get_user_pages() later.

Fix the problem by including _PAGE_DEVMAP in a set of flags that get
preserved by mprotect(2).

Fixes: 69660fd797c3 ("x86, mm: introduce _PAGE_DEVMAP")
Fixes: ebd31197931d ("powerpc/mm: Add devmap support for ppc64")
Cc: <stable@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++--
 arch/x86/include/asm/pgtable_types.h         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2fdc865ca374..2a2486526d1f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -114,7 +114,7 @@
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
 			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
-			 _PAGE_SOFT_DIRTY)
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 /*
  * user access blocked by key
  */
@@ -132,7 +132,7 @@
  */
 #define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
 			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |	\
-			 _PAGE_SOFT_DIRTY)
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 
 #define H_PTE_PKEY  (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
 		     H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b64acb08a62b..106b7d0e2dae 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -124,7 +124,7 @@
  */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
 			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
-			 _PAGE_SOFT_DIRTY)
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 /*

From 7c26701a77ec4569d9054b334d70724a06ad62f9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Oct 2018 16:52:38 -0700
Subject: [PATCH 13/95] sparc: Wire up io_pgetevents system call.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/uapi/asm/unistd.h | 3 ++-
 arch/sparc/kernel/systbls_32.S       | 2 +-
 arch/sparc/kernel/systbls_64.S       | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 09acf0ddec10..45b4bf1875e6 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -427,8 +427,9 @@
 #define __NR_preadv2		358
 #define __NR_pwritev2		359
 #define __NR_statx		360
+#define __NR_io_pgetevents	361
 
-#define NR_syscalls		361
+#define NR_syscalls		362
 
 /* Bitmask values returned from kern_features system call.  */
 #define KERN_FEATURE_MIXED_MODE_STACK	0x00000001
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 12bee14b552c..621a363098ec 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -90,4 +90,4 @@ sys_call_table:
 /*345*/	.long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
 /*355*/	.long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/	.long sys_statx
+/*360*/	.long sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 387ef993880a..bb68c805b891 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -91,7 +91,7 @@ sys_call_table32:
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
 	.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
-/*360*/	.word sys_statx
+/*360*/	.word sys_statx, compat_sys_io_pgetevents
 
 #endif /* CONFIG_COMPAT */
 
@@ -173,4 +173,4 @@ sys_call_table:
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
 	.word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/	.word sys_statx
+/*360*/	.word sys_statx, sys_io_pgetevents

From 073c1a781e4a1217d572506621434cd6d750969b Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Wed, 10 Oct 2018 01:50:15 -0400
Subject: [PATCH 14/95] MAINTAINERS: update the SELinux mailing list location

Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 67e4c4f92ba9..fd060218baa8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12775,7 +12775,7 @@ SELINUX SECURITY MODULE
 M:	Paul Moore <paul@paul-moore.com>
 M:	Stephen Smalley <sds@tycho.nsa.gov>
 M:	Eric Paris <eparis@parisplace.org>
-L:	selinux@tycho.nsa.gov (moderated for non-subscribers)
+L:	selinux@vger.kernel.org
 W:	https://selinuxproject.org
 W:	https://github.com/SELinuxProject
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git

From fd7e848077c1a466b9187537adce16658f7cb94b Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Thu, 30 Aug 2018 16:31:52 +0300
Subject: [PATCH 15/95] net/mlx5: Fix memory leak when setting fpga ipsec caps

Allocated memory for context should be freed once
finished working with it.

Fixes: d6c4f0298cec ("net/mlx5: Refactor accel IPSec code")
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 5645a4facad2..b8ee9101c506 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -245,7 +245,7 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
 		return ERR_PTR(res);
 	}
 
-	/* Context will be freed by wait func after completion */
+	/* Context should be freed by the caller after completion. */
 	return context;
 }
 
@@ -418,10 +418,8 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
 	cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
 	cmd.flags = htonl(flags);
 	context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
-	if (IS_ERR(context)) {
-		err = PTR_ERR(context);
-		goto out;
-	}
+	if (IS_ERR(context))
+		return PTR_ERR(context);
 
 	err = mlx5_fpga_ipsec_cmd_wait(context);
 	if (err)
@@ -435,6 +433,7 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
 	}
 
 out:
+	kfree(context);
 	return err;
 }
 

From a48bc513159d4767f9988f0d857b2b0c38a4d614 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Tue, 11 Sep 2018 14:58:22 -0500
Subject: [PATCH 16/95] net/mlx5: Take only bit 24-26 of wqe.pftype_wq for page
 fault type

The HW spec defines only bits 24-26 of pftype_wq as the page fault type,
use the required mask to ensure that.

Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 48864f4988a4..c1e1a16a9b07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -273,7 +273,7 @@ static void eq_pf_process(struct mlx5_eq *eq)
 		case MLX5_PFAULT_SUBTYPE_WQE:
 			/* WQE based event */
 			pfault->type =
-				be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24;
+				(be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
 			pfault->token =
 				be32_to_cpu(pf_eqe->wqe.token);
 			pfault->wqe.wq_num =

From 37fdffb217a45609edccbb8b407d031143f551c0 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 21 Aug 2018 14:41:41 +0300
Subject: [PATCH 17/95] net/mlx5: WQ, fixes for fragmented WQ buffers API

mlx5e netdevice used to calculate fragment edges by a call to
mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
indication for queues smaller than a PAGE_SIZE, (broken by default on
PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
calls/API.

Since (TX/RX) Work Queues buffers are fragmented, here we introduce
changes to the API in core driver, so that it gets a stride index and
returns the index of last stride on same fragment, and an additional
wrapping function that returns the number of physically contiguous
strides that can be written contiguously to the work queue.

This obsoletes the following API functions, and their buggy
usage in EN driver:
* mlx5_wq_cyc_get_frag_size()
* mlx5_wq_cyc_ctr2fragix()

The new API improves modularity and hides the details of such
calculation for mlx5e netdevice and mlx5_ib rdma drivers.

New calculation is also more efficient, and improves performance
as follows:

Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.

Before: 16,477,619 pps
After:  17,085,793 pps

3.7% improvement

Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 12 +++++-----
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 22 +++++++++----------
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h |  5 ++---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c  |  5 -----
 drivers/net/ethernet/mellanox/mlx5/core/wq.h  | 11 +++++-----
 include/linux/mlx5/driver.h                   |  8 +++++++
 6 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 15d8ae28c040..00172dee5339 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -432,10 +432,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
 
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
 					      struct mlx5_wq_cyc *wq,
-					      u16 pi, u16 frag_pi)
+					      u16 pi, u16 nnops)
 {
 	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -454,15 +453,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-	u16 pi, frag_pi;
+	u16 pi, contig_wqebbs_room;
 	int err;
 	int i;
 
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
-	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ae73ea992845..6dacaeba2fbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -290,10 +290,9 @@ dma_unmap_wqe_err:
 
 static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
 					   struct mlx5_wq_cyc *wq,
-					   u16 pi, u16 frag_pi)
+					   u16 pi, u16 nnops)
 {
 	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
+	u16 headlen, ihs, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
-	u16 headlen, ihs, frag_pi;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
 	int num_dma;
@@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
 	}
 
@@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 headlen, ihs, pi, frag_pi;
+	u16 headlen, ihs, pi, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
@@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
 	}
 
-	mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
+	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
 
 	/* fill wqe */
 	wi       = &sq->db.wqe_info[pi];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 08eac92fc26c..0982c579ec74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -109,12 +109,11 @@ struct mlx5i_tx_wqe {
 
 static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
 				      struct mlx5i_tx_wqe **wqe,
-				      u16 *pi)
+				      u16 pi)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 
-	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
 	memset(*wqe, 0, sizeof(**wqe));
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 68e7f8df2a6d..ddca327e8950 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
-{
-	return wq->fbc.frag_sz_m1 + 1;
-}
-
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
 	return wq->fbc.sz_m1 + 1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 3a1a170bb2d7..b1293d153a58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
@@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 	return ctr & wq->fbc.sz_m1;
 }
 
-static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
-{
-	return ctr & wq->fbc.frag_sz_m1;
-}
-
 static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
 {
 	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
@@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
 	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+	return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
 static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 {
 	int equal   = (cc1 == cc2);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 66d94b4557cf..88a041b73abf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1032,6 +1032,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
 }
 
+static inline u32
+mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
+{
+	u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
+
+	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);

From cee271678d0e3177a25d0fcb2fa5e051d48e4262 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Mon, 8 Oct 2018 19:40:16 +0200
Subject: [PATCH 18/95] xsk: do not call synchronize_net() under RCU read lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The XSKMAP update and delete functions called synchronize_net(), which
can sleep. It is not allowed to sleep during an RCU read section.

Instead we need to make sure that the sock sk_destruct (xsk_destruct)
function is asynchronously called after an RCU grace period. Setting
the SOCK_RCU_FREE flag for XDP sockets takes care of this.

Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/xskmap.c | 10 ++--------
 net/xdp/xsk.c       |  2 ++
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 9f8463afda9c..47147c9e184d 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 	sock_hold(sock->sk);
 
 	old_xs = xchg(&m->xsk_map[i], xs);
-	if (old_xs) {
-		/* Make sure we've flushed everything. */
-		synchronize_net();
+	if (old_xs)
 		sock_put((struct sock *)old_xs);
-	}
 
 	sockfd_put(sock);
 	return 0;
@@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 		return -EINVAL;
 
 	old_xs = xchg(&m->xsk_map[k], NULL);
-	if (old_xs) {
-		/* Make sure we've flushed everything. */
-		synchronize_net();
+	if (old_xs)
 		sock_put((struct sock *)old_xs);
-	}
 
 	return 0;
 }
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4e937cd7c17d..661504042d30 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -744,6 +744,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 	sk->sk_destruct = xsk_destruct;
 	sk_refcnt_debug_inc(sk);
 
+	sock_set_flag(sk, SOCK_RCU_FREE);
+
 	xs = xdp_sk(sk);
 	mutex_init(&xs->mutex);
 	spin_lock_init(&xs->tx_completion_lock);

From 9f7e43da6ae4862b48bac233838ba808c1167a0d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 9 Oct 2018 09:59:36 -0700
Subject: [PATCH 19/95] net/xfrm: fix out-of-bounds packet access

BUG: KASAN: slab-out-of-bounds in _decode_session6+0x1331/0x14e0
net/ipv6/xfrm6_policy.c:161
Read of size 1 at addr ffff8801d882eec7 by task syz-executor1/6667
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
  print_address_description+0x6c/0x20b mm/kasan/report.c:256
  kasan_report_error mm/kasan/report.c:354 [inline]
  kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412
  __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430
  _decode_session6+0x1331/0x14e0 net/ipv6/xfrm6_policy.c:161
  __xfrm_decode_session+0x71/0x140 net/xfrm/xfrm_policy.c:2299
  xfrm_decode_session include/net/xfrm.h:1232 [inline]
  vti6_tnl_xmit+0x3c3/0x1bc1 net/ipv6/ip6_vti.c:542
  __netdev_start_xmit include/linux/netdevice.h:4313 [inline]
  netdev_start_xmit include/linux/netdevice.h:4322 [inline]
  xmit_one net/core/dev.c:3217 [inline]
  dev_hard_start_xmit+0x272/0xc10 net/core/dev.c:3233
  __dev_queue_xmit+0x2ab2/0x3870 net/core/dev.c:3803
  dev_queue_xmit+0x17/0x20 net/core/dev.c:3836

Reported-by: syzbot+acffccec848dc13fe459@syzkaller.appspotmail.com
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef3defaf43b9..d35bcf92969c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -146,8 +146,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
 	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
 
-	while (nh + offset + 1 < skb->data ||
-	       pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+	while (nh + offset + sizeof(*exthdr) < skb->data ||
+	       pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
 		nh = skb_network_header(skb);
 		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 

From 9dffff200fd178f11dd50eb1fd8ccd0650c9284e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 10 Oct 2018 18:02:21 +0200
Subject: [PATCH 20/95] xfrm: policy: use hlist rcu variants on insert

bydst table/list lookups use rcu, so insertions must use rcu versions.

Fixes: a7c44247f704e ("xfrm: policy: make xfrm_policy_lookup_bytype lockless")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f094d4b3520d..119a427d9b2b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -632,9 +632,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 				break;
 		}
 		if (newpos)
-			hlist_add_behind(&policy->bydst, newpos);
+			hlist_add_behind_rcu(&policy->bydst, newpos);
 		else
-			hlist_add_head(&policy->bydst, chain);
+			hlist_add_head_rcu(&policy->bydst, chain);
 	}
 
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -774,9 +774,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 			break;
 	}
 	if (newpos)
-		hlist_add_behind(&policy->bydst, newpos);
+		hlist_add_behind_rcu(&policy->bydst, newpos);
 	else
-		hlist_add_head(&policy->bydst, chain);
+		hlist_add_head_rcu(&policy->bydst, chain);
 	__xfrm_policy_link(policy, dir);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */

From 5e65a20341e02df637d1c16cd487858d2c6a876a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 11 Oct 2018 15:29:30 -0400
Subject: [PATCH 21/95] blk-wbt: wake up all when we scale up, not down

Tetsuo brought to my attention that I screwed up the scale_up/scale_down
helpers when I factored out the rq-qos code.  We need to wake up all the
waiters when we add slots for requests to make, not when we shrink the
slots.  Otherwise we'll end up things waiting forever.  This was a
mistake and simply puts everything back the way it was.

cc: stable@vger.kernel.org
Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt")
eported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-wbt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8e20a0677dcf..8ac93fcbaa2e 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -310,6 +310,7 @@ static void scale_up(struct rq_wb *rwb)
 	rq_depth_scale_up(&rwb->rq_depth);
 	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
+	rwb_wake_all(rwb);
 	rwb_trace_step(rwb, "scale up");
 }
 
@@ -318,7 +319,6 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
 	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
-	rwb_wake_all(rwb);
 	rwb_trace_step(rwb, "scale down");
 }
 

From 34d1b82cd198379545ca634119e296fce06252aa Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 8 Oct 2018 07:55:20 -0700
Subject: [PATCH 22/95] i2c: Fix kerneldoc for renamed i2c dma put function

This function was renamed in commit 82fe39a6bc7b ("i2c: refactor
function to release a DMA safe buffer") but this kernel doc wasn't
updated to point at the new function. Rename it.

Fixes: 82fe39a6bc7b ("i2c: refactor function to release a DMA safe buffer")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 9ee9a15e7134..9200e349f29e 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -2270,7 +2270,7 @@ EXPORT_SYMBOL(i2c_put_adapter);
  *
  * Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO.
  *	   Or a valid pointer to be used with DMA. After use, release it by
- *	   calling i2c_release_dma_safe_msg_buf().
+ *	   calling i2c_put_dma_safe_msg_buf().
  *
  * This function must only be called from process context!
  */

From d91680e687f47984ffd3200c8e5d587903e7bd11 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 11 Oct 2018 11:29:14 +0100
Subject: [PATCH 23/95] arm64: Fix /proc/iomem for reserved but not memory
 regions

We describe ranges of 'reserved' memory to userspace via /proc/iomem.
Commit 50d7ba36b916 ("arm64: export memblock_reserve()d regions via
/proc/iomem") updated the logic to export regions that were reserved
because their contents should be preserved. This allowed kexec-tools
to tell the difference between 'reserved' memory that must be
preserved and not overwritten, (e.g. the ACPI tables), and 'nomap'
memory that must not be touched without knowing the memory-attributes
(e.g. RAS CPER regions).

The above commit wrongly assumed that memblock_reserve() would not
be used to reserve regions that aren't memory. It turns out this is
exactly what early_init_dt_reserve_memory_arch() will do if it finds
a DT reserved-memory that was also carved out of the memory node, which
results in a WARN_ON_ONCE() and the region being reserved instead of
ignored. The ramoops description on hikey and dragonboard-410c both do
this, so we can't simply write this configuration off as "buggy firmware".

Avoid this issue by rewriting reserve_memblock_reserved_regions() so
that only the portions of reserved regions which overlap with mapped
memory are actually reserved.

Fixes: 50d7ba36b916 ("arm64: export memblock_reserve()d regions via /proc/iomem")
Reported-by: John Stultz <john.stultz@linaro.org>
Reported-by: Paolo Pisati <p.pisati@gmail.com>
CC: Akashi Takahiro <takahiro.akashi@linaro.org>
CC: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/setup.c | 52 +++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 5b4fac434c84..b3354ff94e79 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -64,6 +64,9 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/mmu_context.h>
 
+static int num_standard_resources;
+static struct resource *standard_resources;
+
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -206,14 +209,19 @@ static void __init request_standard_resources(void)
 {
 	struct memblock_region *region;
 	struct resource *res;
+	unsigned long i = 0;
 
 	kernel_code.start   = __pa_symbol(_text);
 	kernel_code.end     = __pa_symbol(__init_begin - 1);
 	kernel_data.start   = __pa_symbol(_sdata);
 	kernel_data.end     = __pa_symbol(_end - 1);
 
+	num_standard_resources = memblock.memory.cnt;
+	standard_resources = alloc_bootmem_low(num_standard_resources *
+					       sizeof(*standard_resources));
+
 	for_each_memblock(memory, region) {
-		res = alloc_bootmem_low(sizeof(*res));
+		res = &standard_resources[i++];
 		if (memblock_is_nomap(region)) {
 			res->name  = "reserved";
 			res->flags = IORESOURCE_MEM;
@@ -243,36 +251,26 @@ static void __init request_standard_resources(void)
 
 static int __init reserve_memblock_reserved_regions(void)
 {
-	phys_addr_t start, end, roundup_end = 0;
-	struct resource *mem, *res;
-	u64 i;
+	u64 i, j;
 
-	for_each_reserved_mem_region(i, &start, &end) {
-		if (end <= roundup_end)
-			continue; /* done already */
+	for (i = 0; i < num_standard_resources; ++i) {
+		struct resource *mem = &standard_resources[i];
+		phys_addr_t r_start, r_end, mem_size = resource_size(mem);
 
-		start = __pfn_to_phys(PFN_DOWN(start));
-		end = __pfn_to_phys(PFN_UP(end)) - 1;
-		roundup_end = end;
-
-		res = kzalloc(sizeof(*res), GFP_ATOMIC);
-		if (WARN_ON(!res))
-			return -ENOMEM;
-		res->start = start;
-		res->end = end;
-		res->name  = "reserved";
-		res->flags = IORESOURCE_MEM;
-
-		mem = request_resource_conflict(&iomem_resource, res);
-		/*
-		 * We expected memblock_reserve() regions to conflict with
-		 * memory created by request_standard_resources().
-		 */
-		if (WARN_ON_ONCE(!mem))
+		if (!memblock_is_region_reserved(mem->start, mem_size))
 			continue;
-		kfree(res);
 
-		reserve_region_with_split(mem, start, end, "reserved");
+		for_each_reserved_mem_region(j, &r_start, &r_end) {
+			resource_size_t start, end;
+
+			start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
+			end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end);
+
+			if (start > mem->end || end < mem->start)
+				continue;
+
+			reserve_region_with_split(mem, start, end, "reserved");
+		}
 	}
 
 	return 0;

From ca2b497253ad01c80061a1f3ee9eb91b5d54a849 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 5 Oct 2018 13:24:36 +0100
Subject: [PATCH 24/95] arm64: perf: Reject stand-alone CHAIN events for PMUv3

It doesn't make sense for a perf event to be configured as a CHAIN event
in isolation, so extend the arm_pmu structure with a ->filter_match()
function to allow the backend PMU implementation to reject CHAIN events
early.

Cc: <stable@vger.kernel.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 7 +++++++
 drivers/perf/arm_pmu.c         | 8 +++++++-
 include/linux/perf/arm_pmu.h   | 1 +
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 8e38d5267f22..e213f8e867f6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 	return 0;
 }
 
+static int armv8pmu_filter_match(struct perf_event *event)
+{
+	unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
+	return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
+}
+
 static void armv8pmu_reset(void *info)
 {
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop			= armv8pmu_stop,
 	cpu_pmu->reset			= armv8pmu_reset,
 	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+	cpu_pmu->filter_match		= armv8pmu_filter_match;
 
 	return 0;
 }
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 7f01f6f60b87..d0b7dd8fb184 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	unsigned int cpu = smp_processor_id();
-	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+	int ret;
+
+	ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+	if (ret && armpmu->filter_match)
+		return armpmu->filter_match(event);
+
+	return ret;
 }
 
 static ssize_t armpmu_cpumask_show(struct device *dev,
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 10f92e1d8e7b..bf309ff6f244 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -99,6 +99,7 @@ struct arm_pmu {
 	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
 	int		(*map_event)(struct perf_event *event);
+	int		(*filter_match)(struct perf_event *event);
 	int		num_events;
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40

From fee5150c484c75c473dc1e2d07cb6151384ef85f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 10 Oct 2018 21:18:18 +0100
Subject: [PATCH 25/95] gfs2: Fix iomap buffered write support for journaled
 files (2)

It turns out that the fix in commit 6636c3cc56 is bad; the assertion
that the iomap code no longer creates buffer heads is incorrect for
filesystems that set the IOMAP_F_BUFFER_HEAD flag.

Instead, what's happening is that gfs2_iomap_begin_write treats all
files that have the jdata flag set as journaled files, which is
incorrect as long as those files are inline ("stuffed").  We're handling
stuffed files directly via the page cache, which is why we ended up with
pages without buffer heads in gfs2_page_add_databufs.

Fix this by handling stuffed journaled files correctly in
gfs2_iomap_begin_write.

This reverts commit 6636c3cc5690c11631e6366cf9a28fb99c8b25bb.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c159a7f9a9e..84544a4f012d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -975,10 +975,6 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, inode->i_sb->s_blocksize,
-				     (1 << BH_Dirty)|(1 << BH_Uptodate));
-	}
 	gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
 }
 
@@ -1061,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 		}
 	}
 	release_metapath(&mp);
-	if (gfs2_is_jdata(ip))
+	if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
 		iomap->page_done = gfs2_iomap_journaled_page_done;
 	return 0;
 

From cfdc3170d214046b9509183fe9b9544dc644d40b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 12 Oct 2018 10:31:58 -0700
Subject: [PATCH 26/95] sparc: Fix single-pcr perf event counter management.

It is important to clear the hw->state value for non-stopped events
when they are added into the PMU.  Otherwise when the event is
scheduled out, we won't read the counter because HES_UPTODATE is still
set.  This breaks 'perf stat' and similar use cases, causing all the
events to show zero.

This worked for multi-pcr because we make explicit sparc_pmu_start()
calls in calculate_multiple_pcrs().  calculate_single_pcr() doesn't do
this because the idea there is to accumulate all of the counter
settings into the single pcr value.  So we have to add explicit
hw->state handling there.

Like x86, we use the PERF_HES_ARCH bit to track truly stopped events
so that we don't accidently start them on a reload.

Related to all of this, sparc_pmu_start() is missing a userpage update
so add it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_event.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index d3149baaa33c..a4cc26bd89a2 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -927,6 +927,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc)
 			sparc_perf_event_update(cp, &cp->hw,
 						cpuc->current_idx[i]);
 			cpuc->current_idx[i] = PIC_NO_INDEX;
+			if (cp->hw.state & PERF_HES_STOPPED)
+				cp->hw.state |= PERF_HES_ARCH;
 		}
 	}
 }
@@ -959,10 +961,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
 
 		enc = perf_event_get_enc(cpuc->events[i]);
 		cpuc->pcr[0] &= ~mask_for_index(idx);
-		if (hwc->state & PERF_HES_STOPPED)
+		if (hwc->state & PERF_HES_ARCH) {
 			cpuc->pcr[0] |= nop_for_index(idx);
-		else
+		} else {
 			cpuc->pcr[0] |= event_encoding(enc, idx);
+			hwc->state = 0;
+		}
 	}
 out:
 	cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
@@ -988,6 +992,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
 
 		cpuc->current_idx[i] = idx;
 
+		if (cp->hw.state & PERF_HES_ARCH)
+			continue;
+
 		sparc_pmu_start(cp, PERF_EF_RELOAD);
 	}
 out:
@@ -1079,6 +1086,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags)
 	event->hw.state = 0;
 
 	sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+	perf_event_update_userpage(event);
 }
 
 static void sparc_pmu_stop(struct perf_event *event, int flags)
@@ -1371,9 +1380,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 	cpuc->events[n0] = event->hw.event_base;
 	cpuc->current_idx[n0] = PIC_NO_INDEX;
 
-	event->hw.state = PERF_HES_UPTODATE;
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 	if (!(ef_flags & PERF_EF_START))
-		event->hw.state |= PERF_HES_STOPPED;
+		event->hw.state |= PERF_HES_ARCH;
 
 	/*
 	 * If group events scheduling transaction was started,

From 455adb3174d2c8518cef1a61140c211f6ac224d2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 12 Oct 2018 10:33:20 -0700
Subject: [PATCH 27/95] sparc: Throttle perf events properly.

Like x86 and arm, call perf_sample_event_took() in perf event
NMI interrupt handler.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_event.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index a4cc26bd89a2..67b3e6b3ce5d 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -24,6 +24,7 @@
 #include <asm/cpudata.h>
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
+#include <linux/sched/clock.h>
 #include <asm/nmi.h>
 #include <asm/pcr.h>
 #include <asm/cacheflush.h>
@@ -1612,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
 	struct pt_regs *regs;
+	u64 finish_clock;
+	u64 start_clock;
 	int i;
 
 	if (!atomic_read(&active_events))
@@ -1625,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 	}
 
+	start_clock = sched_clock();
+
 	regs = args->regs;
 
 	cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1663,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 			sparc_pmu_stop(event, 0);
 	}
 
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+
 	return NOTIFY_STOP;
 }
 

From 1cff514a5101d514900f0e94613402d8a18359e6 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Fri, 12 Oct 2018 21:34:26 -0700
Subject: [PATCH 28/95] ocfs2: fix a GCC warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compile warning:

fs/ocfs2/dlmglue.c:99:30: warning: lockdep_keys defined but not used [-Wunused-variable]
 static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];

Link: http://lkml.kernel.org/r/1536938148-32110-1-git-send-email-zhongjiang@huawei.com
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlmglue.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8e712b614e6e..933aac5da193 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl {
 };
 
 /* Lockdep class keys */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
+#endif
 
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level);

From 7aa867dd89526e9cfd9714d8b9b587c016eaea34 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 12 Oct 2018 21:34:32 -0700
Subject: [PATCH 29/95] mm/mmap.c: don't clobber partially overlapping VMA with
 MAP_FIXED_NOREPLACE

Daniel Micay reports that attempting to use MAP_FIXED_NOREPLACE in an
application causes that application to randomly crash.  The existing check
for handling MAP_FIXED_NOREPLACE looks up the first VMA that either
overlaps or follows the requested region, and then bails out if that VMA
overlaps *the start* of the requested region.  It does not bail out if the
VMA only overlaps another part of the requested region.

Fix it by checking that the found VMA only starts at or after the end of
the requested region, in which case there is no overlap.

Test case:

user@debian:~$ cat mmap_fixed_simple.c
#include <sys/mman.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#ifndef MAP_FIXED_NOREPLACE
#define MAP_FIXED_NOREPLACE 0x100000
#endif

int main(void) {
  char *p;

  errno = 0;
  p = mmap((void*)0x10001000, 0x4000, PROT_NONE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED_NOREPLACE, -1, 0);
  printf("p1=%p err=%m\n", p);

  errno = 0;
  p = mmap((void*)0x10000000, 0x2000, PROT_READ,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED_NOREPLACE, -1, 0);
  printf("p2=%p err=%m\n", p);

  char cmd[100];
  sprintf(cmd, "cat /proc/%d/maps", getpid());
  system(cmd);

  return 0;
}
user@debian:~$ gcc -o mmap_fixed_simple mmap_fixed_simple.c
user@debian:~$ ./mmap_fixed_simple
p1=0x10001000 err=Success
p2=0x10000000 err=Success
10000000-10002000 r--p 00000000 00:00 0
10002000-10005000 ---p 00000000 00:00 0
564a9a06f000-564a9a070000 r-xp 00000000 fe:01 264004
  /home/user/mmap_fixed_simple
564a9a26f000-564a9a270000 r--p 00000000 fe:01 264004
  /home/user/mmap_fixed_simple
564a9a270000-564a9a271000 rw-p 00001000 fe:01 264004
  /home/user/mmap_fixed_simple
564a9a54a000-564a9a56b000 rw-p 00000000 00:00 0                          [heap]
7f8eba447000-7f8eba5dc000 r-xp 00000000 fe:01 405885
  /lib/x86_64-linux-gnu/libc-2.24.so
7f8eba5dc000-7f8eba7dc000 ---p 00195000 fe:01 405885
  /lib/x86_64-linux-gnu/libc-2.24.so
7f8eba7dc000-7f8eba7e0000 r--p 00195000 fe:01 405885
  /lib/x86_64-linux-gnu/libc-2.24.so
7f8eba7e0000-7f8eba7e2000 rw-p 00199000 fe:01 405885
  /lib/x86_64-linux-gnu/libc-2.24.so
7f8eba7e2000-7f8eba7e6000 rw-p 00000000 00:00 0
7f8eba7e6000-7f8eba809000 r-xp 00000000 fe:01 405876
  /lib/x86_64-linux-gnu/ld-2.24.so
7f8eba9e9000-7f8eba9eb000 rw-p 00000000 00:00 0
7f8ebaa06000-7f8ebaa09000 rw-p 00000000 00:00 0
7f8ebaa09000-7f8ebaa0a000 r--p 00023000 fe:01 405876
  /lib/x86_64-linux-gnu/ld-2.24.so
7f8ebaa0a000-7f8ebaa0b000 rw-p 00024000 fe:01 405876
  /lib/x86_64-linux-gnu/ld-2.24.so
7f8ebaa0b000-7f8ebaa0c000 rw-p 00000000 00:00 0
7ffcc99fa000-7ffcc9a1b000 rw-p 00000000 00:00 0                          [stack]
7ffcc9b44000-7ffcc9b47000 r--p 00000000 00:00 0                          [vvar]
7ffcc9b47000-7ffcc9b49000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0
  [vsyscall]
user@debian:~$ uname -a
Linux debian 4.19.0-rc6+ #181 SMP Wed Oct 3 23:43:42 CEST 2018 x86_64 GNU/Linux
user@debian:~$

As you can see, the first page of the mapping at 0x10001000 was clobbered.

Link: http://lkml.kernel.org/r/20181010152736.99475-1-jannh@google.com
Fixes: a4ff8e8620d3 ("mm: introduce MAP_FIXED_NOREPLACE")
Signed-off-by: Jann Horn <jannh@google.com>
Reported-by: Daniel Micay <danielmicay@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 5f2b2b184c60..f7cd9cb966c0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1410,7 +1410,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (flags & MAP_FIXED_NOREPLACE) {
 		struct vm_area_struct *vma = find_vma(mm, addr);
 
-		if (vma && vma->vm_start <= addr)
+		if (vma && vma->vm_start < addr + len)
 			return -EEXIST;
 	}
 

From bfba8e5cf28f413aa05571af493871d74438979f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
Date: Fri, 12 Oct 2018 21:34:36 -0700
Subject: [PATCH 30/95] mm/thp: fix call to mmu_notifier in
 set_pmd_migration_entry() v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Inside set_pmd_migration_entry() we are holding page table locks and thus
we can not sleep so we can not call invalidate_range_start/end()

So remove call to mmu_notifier_invalidate_range_start/end() because they
are call inside the function calling set_pmd_migration_entry() (see
try_to_unmap_one()).

Link: http://lkml.kernel.org/r/20181012181056.7864-1-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Zi Yan <zi.yan@cs.rutgers.edu>
Acked-by: Michal Hocko <mhocko@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Nellans <dnellans@nvidia.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..58269f8ba7c4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2885,9 +2885,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
 	if (!(pvmw->pmd && !pvmw->pte))
 		return;
 
-	mmu_notifier_invalidate_range_start(mm, address,
-			address + HPAGE_PMD_SIZE);
-
 	flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
 	pmdval = *pvmw->pmd;
 	pmdp_invalidate(vma, address, pvmw->pmd);
@@ -2900,9 +2897,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
 	set_pmd_at(mm, address, pvmw->pmd, pmdswp);
 	page_remove_rmap(page, true);
 	put_page(page);
-
-	mmu_notifier_invalidate_range_end(mm, address,
-			address + HPAGE_PMD_SIZE);
 }
 
 void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)

From ac081c3be3fae6d0cc3e1862507fca3862d30b67 Mon Sep 17 00:00:00 2001
From: Khazhismel Kumykov <khazhy@google.com>
Date: Fri, 12 Oct 2018 21:34:40 -0700
Subject: [PATCH 31/95] fs/fat/fatent.c: add cond_resched() to
 fat_count_free_clusters()

On non-preempt kernels this loop can take a long time (more than 50 ticks)
processing through entries.

Link: http://lkml.kernel.org/r/20181010172623.57033-1-khazhy@google.com
Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fat/fatent.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index defc2168de91..f58c0cacc531 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb)
 			if (ops->ent_get(&fatent) == FAT_ENT_FREE)
 				free++;
 		} while (fat_ent_next(sbi, &fatent));
+		cond_resched();
 	}
 	sbi->free_clusters = free;
 	sbi->free_clus_valid = 1;

From f8ccb14fd6c9f58ef766062b7e3929c423580f09 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sat, 13 Oct 2018 10:18:41 +0200
Subject: [PATCH 32/95] ubifs: Fix WARN_ON logic in exit path

ubifs_assert() is not WARN_ON(), so we have to invert
the checks.
Randy faced this warning with UBIFS being a module, since
most users use UBIFS as builtin because UBIFS is the rootfs
nobody noticed so far. :-(
Including me.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Fixes: 54169ddd382d ("ubifs: Turn two ubifs_assert() into a WARN_ON()")
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index bf000c8aeffb..fec62e9dfbe6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2337,8 +2337,8 @@ late_initcall(ubifs_init);
 
 static void __exit ubifs_exit(void)
 {
-	WARN_ON(list_empty(&ubifs_infos));
-	WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
+	WARN_ON(!list_empty(&ubifs_infos));
+	WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0);
 
 	dbg_debugfs_exit();
 	ubifs_compressors_exit();

From 5f8bb004bca43f7b5043c02b4aa5b9b43e15fe50 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 11 Oct 2018 12:03:12 +0200
Subject: [PATCH 33/95] KVM: vmx: hyper-v: don't pass EPT configuration info to
 vmx_hv_remote_flush_tlb()

I'm observing random crashes in multi-vCPU L2 guests running on KVM on
Hyper-V. I bisected the issue to the commit 877ad952be3d ("KVM: vmx: Add
tlb_remote_flush callback support"). Hyper-V TLFS states:

"AddressSpace specifies an address space ID (an EPT PML4 table pointer)"

So apparently, Hyper-V doesn't expect us to pass naked EPTP, only PML4
pointer should be used. Strip off EPT configuration information before
calling into vmx_hv_remote_flush_tlb().

Fixes: 877ad952be3d ("KVM: vmx: Add tlb_remote_flush callback support")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 612fd17be635..e665aa7167cf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1572,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
 		goto out;
 	}
 
+	/*
+	 * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
+	 * base of EPT PML4 table, strip off EPT configuration information.
+	 */
 	ret = hyperv_flush_guest_mapping(
-			to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+			to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
 
 out:
 	spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);

From b3e1eb8e7ac9aaa283989496651d99267c4cad6c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 14 Oct 2018 20:19:31 -0700
Subject: [PATCH 34/95] sparc64: Make proc_id signed.

So that when it is unset, ie. '-1', userspace can see it
properly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/cpudata_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index 666d6b5c0440..9c3fc03abe9a 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -28,7 +28,7 @@ typedef struct {
 	unsigned short	sock_id;	/* physical package */
 	unsigned short	core_id;
 	unsigned short  max_cache_id;	/* groupings of highest shared cache */
-	unsigned short	proc_id;	/* strand (aka HW thread) id */
+	signed short	proc_id;	/* strand (aka HW thread) id */
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);

From d1f1f98c6d1708a90436e1a3b2aff5e93946731b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 14 Oct 2018 20:22:28 -0700
Subject: [PATCH 35/95] sparc64: Set %l4 properly on trap return after handling
 signals.

If we did some signal processing, we have to reload the pt_regs
tstate register because it's value may have changed.

In doing so we also have to extract the %pil value contained in there
anre load that into %l4.

This value is at bit 20 and thus needs to be shifted down before we
later write it into the %pil register.

Most of the time this is harmless as we are returning to userspace
and the %pil is zero for that case.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/rtrap_64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index f6528884a2c8..4073e2b87dd0 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -84,8 +84,9 @@ __handle_signal:
 		ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
 		sethi			%hi(0xf << 20), %l4
 		and			%l1, %l4, %l4
+		andn			%l1, %l4, %l1
 		ba,pt			%xcc, __handle_preemption_continue
-		 andn			%l1, %l4, %l1
+		 srl			%l4, 20, %l4
 
 		/* When returning from a NMI (%pil==15) interrupt we want to
 		 * avoid running softirqs, doing IRQ tracing, preempting, etc.

From 35a7f35ad1b150ddf59a41dcac7b2fa32982be0e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 15 Oct 2018 07:20:24 +0200
Subject: [PATCH 36/95] Linux 4.19-rc8

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e8b599b4dcde..bf3786e4ffec 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Merciless Moray
 
 # *DOCUMENTATION*

From f0a7d1883d9f78ae7bf15fc258bf9a2b20f35b76 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 15 Oct 2018 12:43:02 +0100
Subject: [PATCH 37/95] afs: Fix clearance of reply

The recent patch to fix the afs_server struct leak didn't actually fix the
bug, but rather fixed some of the symptoms.  The problem is that an
asynchronous call that holds a resource pointed to by call->reply[0] will
find the pointer cleared in the call destructor, thereby preventing the
resource from being cleaned up.

In the case of the server record leak, the afs_fs_get_capabilities()
function in devel code sets up a call with reply[0] pointing at the server
record that should be altered when the result is obtained, but this was
being cleared before the destructor was called, so the put in the
destructor does nothing and the record is leaked.

Commit f014ffb025c1 removed the additional ref obtained by
afs_install_server(), but the removal of this ref is actually used by the
garbage collector to mark a server record as being defunct after the record
has expired through lack of use.

The offending clearance of call->reply[0] upon completion in
afs_process_async_call() has been there from the origin of the code, but
none of the asynchronous calls actually use that pointer currently, so it
should be safe to remove (note that synchronous calls don't involve this
function).

Fix this by the following means:

 (1) Revert commit f014ffb025c1.

 (2) Remove the clearance of reply[0] from afs_process_async_call().

Without this, afs_manage_servers() will suffer an assertion failure if it
sees a server record that didn't get used because the usage count is not 1.

Fixes: f014ffb025c1 ("afs: Fix afs_server struct leak")
Fixes: 08e0e7c82eea ("[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC.")
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/afs/rxrpc.c  | 2 --
 fs/afs/server.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 35f2ae30f31f..77a83790a31f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work)
 	}
 
 	if (call->state == AFS_CALL_COMPLETE) {
-		call->reply[0] = NULL;
-
 		/* We have two refs to release - one from the alloc and one
 		 * queued with the work item - and we can't just deallocate the
 		 * call because the work item may be queued again.
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 2f306c0cc4ee..1d329e6981d5 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -199,11 +199,9 @@ static struct afs_server *afs_install_server(struct afs_net *net,
 
 	write_sequnlock(&net->fs_addr_lock);
 	ret = 0;
-	goto out;
 
 exists:
 	afs_get_server(server);
-out:
 	write_sequnlock(&net->fs_lock);
 	return server;
 }

From c994b12945a65cd892ca2cf7ddcf0b7d8b25cdec Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 15 Oct 2018 16:23:08 -0400
Subject: [PATCH 38/95] test_ida: Fix lockdep warning

The IDA was declared on the stack instead of statically, so lockdep
triggered a warning that it was improperly initialised.

Reported-by: 0day bot
Tested-by: Rong Chen <rong.a.chen@intel.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 lib/test_ida.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/test_ida.c b/lib/test_ida.c
index 2d1637d8136b..b06880625961 100644
--- a/lib/test_ida.c
+++ b/lib/test_ida.c
@@ -150,10 +150,10 @@ static void ida_check_conv(struct ida *ida)
 	IDA_BUG_ON(ida, !ida_is_empty(ida));
 }
 
+static DEFINE_IDA(ida);
+
 static int ida_checks(void)
 {
-	DEFINE_IDA(ida);
-
 	IDA_BUG_ON(&ida, !ida_is_empty(&ida));
 	ida_check_alloc(&ida);
 	ida_check_destroy(&ida);

From a309d5db58793d4d91f5a346e3fa58aa6bf60b12 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 15 Oct 2018 16:28:21 -0400
Subject: [PATCH 39/95] idr: Change documentation license

This documentation was inadvertently released under the CC-BY-SA-4.0
license.  It was intended to be released under GPL-2.0 or later.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/idr.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst
index d351e880a2f6..a2738050c4f0 100644
--- a/Documentation/core-api/idr.rst
+++ b/Documentation/core-api/idr.rst
@@ -1,4 +1,4 @@
-.. SPDX-License-Identifier: CC-BY-SA-4.0
+.. SPDX-License-Identifier: GPL-2.0+
 
 =============
 ID Allocation

From a06ecbfe784ceb22e56ae3cd75fe77138e7cfa0b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 15 Oct 2018 18:32:54 -0700
Subject: [PATCH 40/95] Revert "sparc: Convert to using %pOFn instead of
 device_node.name"

This reverts commit 0b9871a3a8cc7234c285b5d9bf66cc6712cfee7c.

Causes crashes with qemu, interacts badly with commit commit
6d0a70a284be ("vsprintf: print OF node name using full_name")
etc.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/auxio_64.c |  4 +--
 arch/sparc/kernel/power.c    |  4 +--
 arch/sparc/kernel/prom_32.c  | 26 +++++++-------
 arch/sparc/kernel/prom_64.c  | 68 ++++++++++++++++++------------------
 4 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c
index cc42225c20f3..4e8f56c3793c 100644
--- a/arch/sparc/kernel/auxio_64.c
+++ b/arch/sparc/kernel/auxio_64.c
@@ -115,8 +115,8 @@ static int auxio_probe(struct platform_device *dev)
 		auxio_devtype = AUXIO_TYPE_SBUS;
 		size = 1;
 	} else {
-		printk("auxio: Unknown parent bus type [%pOFn]\n",
-		       dp->parent);
+		printk("auxio: Unknown parent bus type [%s]\n",
+		       dp->parent->name);
 		return -ENODEV;
 	}
 	auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio");
diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c
index d941875dd718..92627abce311 100644
--- a/arch/sparc/kernel/power.c
+++ b/arch/sparc/kernel/power.c
@@ -41,8 +41,8 @@ static int power_probe(struct platform_device *op)
 
 	power_reg = of_ioremap(res, 0, 0x4, "power");
 
-	printk(KERN_INFO "%pOFn: Control reg at %llx\n",
-	       op->dev.of_node, res->start);
+	printk(KERN_INFO "%s: Control reg at %llx\n",
+	       op->dev.of_node->name, res->start);
 
 	if (has_button_interrupt(irq, op->dev.of_node)) {
 		if (request_irq(irq,
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 17c87d29ff20..b51cbb9e87dc 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -68,8 +68,8 @@ static void __init sparc32_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = rprop->value;
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io, regs->phys_addr);
 }
 
@@ -84,8 +84,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = prop->value;
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io,
 		regs->phys_addr);
 }
@@ -104,13 +104,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
 	regs = prop->value;
 	devfn = (regs->phys_hi >> 8) & 0xff;
 	if (devfn & 0x07) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			devfn >> 3,
 			devfn & 0x07);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x",
+			dp->name,
 			devfn >> 3);
 	}
 }
@@ -127,8 +127,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io, regs->phys_addr);
 }
 
@@ -167,8 +167,8 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 	device = prop->value;
 
-	sprintf(tmp_buf, "%pOFn:%d:%d@%x,%x",
-		dp, *vendor, *device,
+	sprintf(tmp_buf, "%s:%d:%d@%x,%x",
+		dp->name, *vendor, *device,
 		*intr, reg0);
 }
 
@@ -201,7 +201,7 @@ char * __init build_path_component(struct device_node *dp)
 	tmp_buf[0] = '\0';
 	__build_path_component(dp, tmp_buf);
 	if (tmp_buf[0] == '\0')
-		snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp);
+		strcpy(tmp_buf, dp->name);
 
 	n = prom_early_alloc(strlen(tmp_buf) + 1);
 	strcpy(n, tmp_buf);
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index 6220411ce8fc..baeaeed64993 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -82,8 +82,8 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = rprop->value;
 	if (!of_node_is_root(dp->parent)) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			(unsigned int) (regs->phys_addr >> 32UL),
 			(unsigned int) (regs->phys_addr & 0xffffffffUL));
 		return;
@@ -97,17 +97,17 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
 		const char *prefix = (type == 0) ? "m" : "i";
 
 		if (low_bits)
-			sprintf(tmp_buf, "%pOFn@%s%x,%x",
-				dp, prefix,
+			sprintf(tmp_buf, "%s@%s%x,%x",
+				dp->name, prefix,
 				high_bits, low_bits);
 		else
-			sprintf(tmp_buf, "%pOFn@%s%x",
-				dp,
+			sprintf(tmp_buf, "%s@%s%x",
+				dp->name,
 				prefix,
 				high_bits);
 	} else if (type == 12) {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp, high_bits);
+		sprintf(tmp_buf, "%s@%x",
+			dp->name, high_bits);
 	}
 }
 
@@ -122,8 +122,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 	if (!of_node_is_root(dp->parent)) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			(unsigned int) (regs->phys_addr >> 32UL),
 			(unsigned int) (regs->phys_addr & 0xffffffffUL));
 		return;
@@ -138,8 +138,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
 		if (tlb_type >= cheetah)
 			mask = 0x7fffff;
 
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			*(u32 *)prop->value,
 			(unsigned int) (regs->phys_addr & mask));
 	}
@@ -156,8 +156,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = prop->value;
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io,
 		regs->phys_addr);
 }
@@ -176,13 +176,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
 	regs = prop->value;
 	devfn = (regs->phys_hi >> 8) & 0xff;
 	if (devfn & 0x07) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			devfn >> 3,
 			devfn & 0x07);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x",
+			dp->name,
 			devfn >> 3);
 	}
 }
@@ -203,8 +203,8 @@ static void __init upa_path_component(struct device_node *dp, char *tmp_buf)
 	if (!prop)
 		return;
 
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		*(u32 *) prop->value,
 		(unsigned int) (regs->phys_addr & 0xffffffffUL));
 }
@@ -221,7 +221,7 @@ static void __init vdev_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 
-	sprintf(tmp_buf, "%pOFn@%x", dp, *regs);
+	sprintf(tmp_buf, "%s@%x", dp->name, *regs);
 }
 
 /* "name@addrhi,addrlo" */
@@ -236,8 +236,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		(unsigned int) (regs->phys_addr >> 32UL),
 		(unsigned int) (regs->phys_addr & 0xffffffffUL));
 }
@@ -257,8 +257,8 @@ static void __init i2c_path_component(struct device_node *dp, char *tmp_buf)
 	/* This actually isn't right... should look at the #address-cells
 	 * property of the i2c bus node etc. etc.
 	 */
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp, regs[0], regs[1]);
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name, regs[0], regs[1]);
 }
 
 /* "name@reg0[,reg1]" */
@@ -274,11 +274,11 @@ static void __init usb_path_component(struct device_node *dp, char *tmp_buf)
 	regs = prop->value;
 
 	if (prop->length == sizeof(u32) || regs[1] == 1) {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp, regs[0]);
+		sprintf(tmp_buf, "%s@%x",
+			dp->name, regs[0]);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp, regs[0], regs[1]);
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name, regs[0], regs[1]);
 	}
 }
 
@@ -295,11 +295,11 @@ static void __init ieee1394_path_component(struct device_node *dp, char *tmp_buf
 	regs = prop->value;
 
 	if (regs[2] || regs[3]) {
-		sprintf(tmp_buf, "%pOFn@%08x%08x,%04x%08x",
-			dp, regs[0], regs[1], regs[2], regs[3]);
+		sprintf(tmp_buf, "%s@%08x%08x,%04x%08x",
+			dp->name, regs[0], regs[1], regs[2], regs[3]);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%08x%08x",
-			dp, regs[0], regs[1]);
+		sprintf(tmp_buf, "%s@%08x%08x",
+			dp->name, regs[0], regs[1]);
 	}
 }
 
@@ -361,7 +361,7 @@ char * __init build_path_component(struct device_node *dp)
 	tmp_buf[0] = '\0';
 	__build_path_component(dp, tmp_buf);
 	if (tmp_buf[0] == '\0')
-		snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp);
+		strcpy(tmp_buf, dp->name);
 
 	n = prom_early_alloc(strlen(tmp_buf) + 1);
 	strcpy(n, tmp_buf);

From d49c88d7677ba737e9d2759a87db0402d5ab2607 Mon Sep 17 00:00:00 2001
From: Jian-Hong Pan <jian-hong@endlessm.com>
Date: Thu, 27 Sep 2018 12:09:48 +0800
Subject: [PATCH 41/95] r8169: Enable MSI-X on RTL8106e

Originally, we have an issue where r8169 MSI-X interrupt is broken after
S3 suspend/resume on RTL8106e of ASUS X441UAR.

02:00.0 Ethernet controller [0200]: Realtek Semiconductor Co., Ltd.
RTL8101/2/6E PCI Express Fast/Gigabit Ethernet controller [10ec:8136]
(rev 07)
	Subsystem: ASUSTeK Computer Inc. RTL810xE PCI Express Fast
Ethernet controller [1043:200f]
	Flags: bus master, fast devsel, latency 0, IRQ 16
	I/O ports at e000 [size=256]
	Memory at ef100000 (64-bit, non-prefetchable) [size=4K]
	Memory at e0000000 (64-bit, prefetchable) [size=16K]
	Capabilities: [40] Power Management version 3
	Capabilities: [50] MSI: Enable- Count=1/1 Maskable- 64bit+
	Capabilities: [70] Express Endpoint, MSI 01
	Capabilities: [b0] MSI-X: Enable+ Count=4 Masked-
	Capabilities: [d0] Vital Product Data
	Capabilities: [100] Advanced Error Reporting
	Capabilities: [140] Virtual Channel
	Capabilities: [160] Device Serial Number 01-00-00-00-36-4c-e0-00
	Capabilities: [170] Latency Tolerance Reporting
	Kernel driver in use: r8169
	Kernel modules: r8169

We found the all of the values in PCI BAR=4 of the ethernet adapter
become 0xFF after system resumes.  That breaks the MSI-X interrupt.
Therefore, we can only fall back to MSI interrupt to fix the issue at
that time.

However, there is a commit which resolves the drivers getting nothing in
PCI BAR=4 after system resumes.  It is 04cb3ae895d7 "PCI: Reprogram
bridge prefetch registers on resume" by Daniel Drake.

After apply the patch, the ethernet adapter works fine before suspend
and after resume.  So, we can revert the workaround after the commit
"PCI: Reprogram bridge prefetch registers on resume" is merged into main
tree.

This patch reverts commit 7bb05b85bc2d1a1b647b91424b2ed4a18e6ecd81
"r8169: don't use MSI-X on RTL8106e".

Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=201181
Fixes: 7bb05b85bc2d ("r8169: don't use MSI-X on RTL8106e")
Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 3a5e6160bf0d..f4df367fb894 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7093,20 +7093,17 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
 	unsigned int flags;
 
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
 		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
 		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 		flags = PCI_IRQ_LEGACY;
-		break;
-	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_40:
+	} else if (tp->mac_version == RTL_GIGA_MAC_VER_40) {
 		/* This version was reported to have issues with resume
 		 * from suspend when using MSI-X
 		 */
 		flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI;
-		break;
-	default:
+	} else {
 		flags = PCI_IRQ_ALL_TYPES;
 	}
 

From 2bb3207dbbd4d30e96dd0e1c8e013104193bd59c Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Tue, 9 Oct 2018 08:15:38 -0500
Subject: [PATCH 42/95] ethtool: fix a missing-check bug

In ethtool_get_rxnfc(), the eth command 'cmd' is compared against
'ETHTOOL_GRXFH' to see whether it is necessary to adjust the variable
'info_size'. Then the whole structure of 'info' is copied from the
user-space buffer 'useraddr' with 'info_size' bytes. In the following
execution, 'info' may be copied again from the buffer 'useraddr' depending
on the 'cmd' and the 'info.flow_type'. However, after these two copies,
there is no check between 'cmd' and 'info.cmd'. In fact, 'cmd' is also
copied from the buffer 'useraddr' in dev_ethtool(), which is the caller
function of ethtool_get_rxnfc(). Given that 'useraddr' is in the user
space, a malicious user can race to change the eth command in the buffer
between these copies. By doing so, the attacker can supply inconsistent
data and cause undefined behavior because in the following execution 'info'
will be passed to ops->get_rxnfc().

This patch adds a necessary check on 'info.cmd' and 'cmd' to confirm that
they are still same after the two copies in ethtool_get_rxnfc(). Otherwise,
an error code EINVAL will be returned.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0762aaf8e964..192f2f76b7bd 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1015,6 +1015,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 			return -EINVAL;
 	}
 
+	if (info.cmd != cmd)
+		return -EINVAL;
+
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
 			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))

From 58f5bbe331c566f49c9559568f982202a278aa78 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Mon, 8 Oct 2018 10:49:35 -0500
Subject: [PATCH 43/95] ethtool: fix a privilege escalation bug

In dev_ethtool(), the eth command 'ethcmd' is firstly copied from the
use-space buffer 'useraddr' and checked to see whether it is
ETHTOOL_PERQUEUE. If yes, the sub-command 'sub_cmd' is further copied from
the user space. Otherwise, 'sub_cmd' is the same as 'ethcmd'. Next,
according to 'sub_cmd', a permission check is enforced through the function
ns_capable(). For example, the permission check is required if 'sub_cmd' is
ETHTOOL_SCOALESCE, but it is not necessary if 'sub_cmd' is
ETHTOOL_GCOALESCE, as suggested in the comment "Allow some commands to be
done by anyone". The following execution invokes different handlers
according to 'ethcmd'. Specifically, if 'ethcmd' is ETHTOOL_PERQUEUE,
ethtool_set_per_queue() is called. In ethtool_set_per_queue(), the kernel
object 'per_queue_opt' is copied again from the user-space buffer
'useraddr' and 'per_queue_opt.sub_command' is used to determine which
operation should be performed. Given that the buffer 'useraddr' is in the
user space, a malicious user can race to change the sub-command between the
two copies. In particular, the attacker can supply ETHTOOL_PERQUEUE and
ETHTOOL_GCOALESCE to bypass the permission check in dev_ethtool(). Then
before ethtool_set_per_queue() is called, the attacker changes
ETHTOOL_GCOALESCE to ETHTOOL_SCOALESCE. In this way, the attacker can
bypass the permission check and execute ETHTOOL_SCOALESCE.

This patch enforces a check in ethtool_set_per_queue() after the second
copy from 'useraddr'. If the sub-command is different from the one obtained
in the first copy in dev_ethtool(), an error code EINVAL will be returned.

Fixes: f38d138a7da6 ("net/ethtool: support set coalesce per queue")
Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 192f2f76b7bd..aeabc4831fca 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2472,13 +2472,17 @@ roll_back:
 	return ret;
 }
 
-static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_per_queue(struct net_device *dev,
+				 void __user *useraddr, u32 sub_cmd)
 {
 	struct ethtool_per_queue_op per_queue_opt;
 
 	if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
 		return -EFAULT;
 
+	if (per_queue_opt.sub_command != sub_cmd)
+		return -EINVAL;
+
 	switch (per_queue_opt.sub_command) {
 	case ETHTOOL_GCOALESCE:
 		return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
@@ -2849,7 +2853,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc = ethtool_get_phy_stats(dev, useraddr);
 		break;
 	case ETHTOOL_PERQUEUE:
-		rc = ethtool_set_per_queue(dev, useraddr);
+		rc = ethtool_set_per_queue(dev, useraddr, sub_cmd);
 		break;
 	case ETHTOOL_GLINKSETTINGS:
 		rc = ethtool_get_link_ksettings(dev, useraddr);

From e331473fee3d500bb0d2582a1fe598df3326d8cd Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 10 Oct 2018 22:00:58 +0200
Subject: [PATCH 44/95] net/sched: cls_api: add missing validation of netlink
 attributes

Similarly to what has been done in 8b4c3cdd9dd8 ("net: sched: Add policy
validation for tc attributes"), fix classifier code to add validation of
TCA_CHAIN and TCA_KIND netlink attributes.

tested with:
 # ./tdc.py -c filter

v2: Let sch_api and cls_api share nla_policy they have in common, thanks
    to David Ahern.
v3: Avoid EXPORT_SYMBOL(), as validation of those attributes is not done
    by TC modules, thanks to Cong Wang.
    While at it, restore the 'Delete / get qdisc' comment to its orginal
    position, just above tc_get_qdisc() function prototype.

Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 13 ++++++++-----
 net/sched/sch_api.c |  8 ++++----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0a75cb2e5e7b..70f144ac5e1d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,8 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
+extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+
 /* The list of all installed classifier types */
 static LIST_HEAD(tcf_proto_base);
 
@@ -1211,7 +1213,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 replay:
 	tp_created = 0;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1360,7 +1362,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1475,7 +1477,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	void *fh = NULL;
 	int err;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1838,7 +1840,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 		return -EPERM;
 
 replay:
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1949,7 +1951,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
 
-	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  NULL);
 	if (err)
 		return err;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 85e73f48e48f..6684641ea344 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1307,10 +1307,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
 	return 0;
 }
 
-/*
- * Delete/get qdisc.
- */
-
 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
 	[TCA_KIND]		= { .type = NLA_STRING },
 	[TCA_OPTIONS]		= { .type = NLA_NESTED },
@@ -1323,6 +1319,10 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
 };
 
+/*
+ * Delete/get qdisc.
+ */
+
 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 			struct netlink_ext_ack *extack)
 {

From 5a8e7aea953bdb6d4da13aff6f1e7f9c62023499 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 11 Oct 2018 11:15:13 -0700
Subject: [PATCH 45/95] llc: set SOCK_RCU_FREE in llc_sap_add_socket()

WHen an llc sock is added into the sk_laddr_hash of an llc_sap,
it is not marked with SOCK_RCU_FREE.

This causes that the sock could be freed while it is still being
read by __llc_lookup_established() with RCU read lock. sock is
refcounted, but with RCU read lock, nothing prevents the readers
getting a zero refcnt.

Fix it by setting SOCK_RCU_FREE in llc_sap_add_socket().

Reported-by: syzbot+11e05f04c15e03be5254@syzkaller.appspotmail.com
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_conn.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c0ac522b48a1..4ff89cb7c86f 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
 	llc_sk(sk)->sap = sap;
 
 	spin_lock_bh(&sap->sk_lock);
+	sock_set_flag(sk, SOCK_RCU_FREE);
 	sap->sk_count++;
 	sk_nulls_add_node_rcu(sk, laddr_hb);
 	hlist_add_head(&llc->dev_hash_node, dev_hb);

From 4af00f4cc1ba34da4654ac31830843cae871642d Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 11 Oct 2018 22:02:29 +0200
Subject: [PATCH 46/95] tipc: initialize broadcast link stale counter correctly

In the commit referred to below we added link tolerance as an additional
criteria for declaring broadcast transmission "stale" and resetting the
unicast links to the affected node.

Unfortunately, this 'improvement' introduced two bugs, which each and
one alone cause only limited problems, but combined lead to seemingly
stochastic unicast link resets, depending on the amount of broadcast
traffic transmitted.

The first issue, a missing initialization of the 'tolerance' field of
the receiver broadcast link, was recently fixed by commit 047491ea334a
("tipc: set link tolerance correctly in broadcast link").

Ths second issue, where we omit to reset the 'stale_cnt' field of
the same link after a 'stale' period is over, leads to this counter
accumulating over time, and in the absence of the 'tolerance' criteria
leads to the above described symptoms. This commit adds the missing
initialization.

Fixes: a4dc70d46cf1 ("tipc: extend link reset criteria for stale packet retransmission")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index f6552e4f4b43..201c3b5bc96b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1041,6 +1041,7 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
 	if (r->last_retransm != buf_seqno(skb)) {
 		r->last_retransm = buf_seqno(skb);
 		r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
+		r->stale_cnt = 0;
 	} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
 		link_retransmit_failure(l, skb);
 		if (link_is_bc_sndlink(l))

From d7b4c24f45d2efe51b8f213da4593fefd49240ba Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Oct 2018 22:32:31 +0100
Subject: [PATCH 47/95] rxrpc: Fix an uninitialised variable

Fix an uninitialised variable introduced by the last patch.  This can cause
a crash when a new call comes in to a local service, such as when an AFS
fileserver calls back to the local cache manager.

Fixes: c1e15b4944c9 ("rxrpc: Fix the packet reception routine")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/call_accept.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 652e314de38e..8079aacaecac 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -337,7 +337,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_connection *conn;
-	struct rxrpc_peer *peer;
+	struct rxrpc_peer *peer = NULL;
 	struct rxrpc_call *call;
 
 	_enter("");

From d6672a5a97918f92bf2f3a2591f25d02bb0897a4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 11 Oct 2018 22:32:39 +0100
Subject: [PATCH 48/95] rxrpc: use correct kvec num when sending BUSY response
 packet

Fixes gcc '-Wunused-but-set-variable' warning:

net/rxrpc/output.c: In function 'rxrpc_reject_packets':
net/rxrpc/output.c:527:11: warning:
 variable 'ioc' set but not used [-Wunused-but-set-variable]

'ioc' is the correct kvec num when sending a BUSY (or an ABORT) response
packet.

Fixes: ece64fec164f ("rxrpc: Emit BUSY packets when supposed to rather than ABORTs")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index e8fb8922bca8..a141ee3ab812 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -572,7 +572,8 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 			whdr.flags	^= RXRPC_CLIENT_INITIATED;
 			whdr.flags	&= RXRPC_CLIENT_INITIATED;
 
-			ret = kernel_sendmsg(local->socket, &msg, iov, 2, size);
+			ret = kernel_sendmsg(local->socket, &msg,
+					     iov, ioc, size);
 			if (ret < 0)
 				trace_rxrpc_tx_fail(local->debug_id, 0, ret,
 						    rxrpc_tx_point_reject);

From 64bd9c8135751b561f27edaaffe93d07093f81af Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 11 Oct 2018 15:06:33 -0700
Subject: [PATCH 49/95] net: bcmgenet: Poll internal PHY for GENETv5

On GENETv5, there is a hardware issue which prevents the GENET hardware
from generating a link UP interrupt when the link is operating at
10Mbits/sec. Since we do not have any way to configure the link
detection logic, fallback to polling in that case.

Fixes: 421380856d9c ("net: bcmgenet: add support for the GENETv5 hardware")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmmii.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 4241ae928d4a..34af5f1569c8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -321,9 +321,12 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	phydev->advertising = phydev->supported;
 
 	/* The internal PHY has its link interrupts routed to the
-	 * Ethernet MAC ISRs
+	 * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
+	 * that prevents the signaling of link UP interrupts when
+	 * the link operates at 10Mbps, so fallback to polling for
+	 * those versions of GENET.
 	 */
-	if (priv->internal_phy)
+	if (priv->internal_phy && !GENET_IS_V5(priv))
 		dev->phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;

From f547fac624be53ad8b07e9ebca7654a7827ba61b Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 12 Oct 2018 16:22:47 +0200
Subject: [PATCH 50/95] ipv6: rate-limit probes for neighbourless routes

When commit 270972554c91 ("[IPV6]: ROUTE: Add Router Reachability
Probing (RFC4191).") introduced router probing, the rt6_probe() function
required that a neighbour entry existed. This neighbour entry is used to
record the timestamp of the last probe via the ->updated field.

Later, commit 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().")
removed the requirement for a neighbour entry. Neighbourless routes skip
the interval check and are not rate-limited.

This patch adds rate-limiting for neighbourless routes, by recording the
timestamp of the last probe in the fib6_info itself.

Fixes: 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h |  4 ++++
 net/ipv6/route.c      | 12 ++++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 3d4930528db0..2d31e22babd8 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -159,6 +159,10 @@ struct fib6_info {
 	struct rt6_info * __percpu	*rt6i_pcpu;
 	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	unsigned long			last_probe;
+#endif
+
 	u32				fib6_metric;
 	u8				fib6_protocol;
 	u8				fib6_type;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a366c05a239d..abcb5ae77319 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -520,10 +520,11 @@ static void rt6_probe_deferred(struct work_struct *w)
 
 static void rt6_probe(struct fib6_info *rt)
 {
-	struct __rt6_probe_work *work;
+	struct __rt6_probe_work *work = NULL;
 	const struct in6_addr *nh_gw;
 	struct neighbour *neigh;
 	struct net_device *dev;
+	struct inet6_dev *idev;
 
 	/*
 	 * Okay, this does not seem to be appropriate
@@ -539,15 +540,12 @@ static void rt6_probe(struct fib6_info *rt)
 	nh_gw = &rt->fib6_nh.nh_gw;
 	dev = rt->fib6_nh.nh_dev;
 	rcu_read_lock_bh();
+	idev = __in6_dev_get(dev);
 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 	if (neigh) {
-		struct inet6_dev *idev;
-
 		if (neigh->nud_state & NUD_VALID)
 			goto out;
 
-		idev = __in6_dev_get(dev);
-		work = NULL;
 		write_lock(&neigh->lock);
 		if (!(neigh->nud_state & NUD_VALID) &&
 		    time_after(jiffies,
@@ -557,11 +555,13 @@ static void rt6_probe(struct fib6_info *rt)
 				__neigh_set_probe_once(neigh);
 		}
 		write_unlock(&neigh->lock);
-	} else {
+	} else if (time_after(jiffies, rt->last_probe +
+				       idev->cnf.rtr_probe_interval)) {
 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	}
 
 	if (work) {
+		rt->last_probe = jiffies;
 		INIT_WORK(&work->work, rt6_probe_deferred);
 		work->target = *nh_gw;
 		dev_hold(dev);

From 7ec8dc96e1cb45693f28f1287802ef6f2888dae0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Oct 2018 16:38:36 +0100
Subject: [PATCH 51/95] rxrpc: Fix incorrect conditional on IPV6

The udpv6_encap_enable() function is part of the ipv6 code, and if that is
configured as a loadable module and rxrpc is built in then a build failure
will occur because the conditional check is wrong:

  net/rxrpc/local_object.o: In function `rxrpc_lookup_local':
  local_object.c:(.text+0x2688): undefined reference to `udpv6_encap_enable'

Use the correct config symbol (CONFIG_AF_RXRPC_IPV6) in the conditional
check rather than CONFIG_IPV6 as that will do the right thing.

Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook")
Reported-by: kbuild-all@01.org
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/local_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index cad0691c2bb4..0906e51d3cfb 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -139,7 +139,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 	udp_sk(usk)->gro_complete = NULL;
 
 	udp_encap_enable();
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
 	if (local->srx.transport.family == AF_INET6)
 		udpv6_encap_enable();
 #endif

From d3092b2efca1cd1d492d0b08499a2066c5ca8cec Mon Sep 17 00:00:00 2001
From: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Date: Fri, 12 Oct 2018 22:46:55 +0200
Subject: [PATCH 52/95] tipc: fix unsafe rcu locking when accessing publication
 list

The binding table's 'cluster_scope' list is rcu protected to handle
races between threads changing the list and those traversing the list at
the same moment. We have now found that the function named_distribute()
uses the regular list_for_each() macro to traverse the said list.
Likewise, the function tipc_named_withdraw() is removing items from the
same list using the regular list_del() call. When these two functions
execute in parallel we see occasional crashes.

This commit fixes this by adding the missing _rcu() suffixes.

Signed-off-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 51b4b96f89db..3cfeb9df64b0 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_del(&publ->binding_node);
+	list_del_rcu(&publ->binding_node);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
 			ITEM_SIZE) * ITEM_SIZE;
 	u32 msg_rem = msg_dsz;
 
-	list_for_each_entry(publ, pls, binding_node) {
+	list_for_each_entry_rcu(publ, pls, binding_node) {
 		/* Prepare next buffer: */
 		if (!skb) {
 			skb = named_prepare_buf(net, PUBLICATION, msg_rem,

From dc012f3628eaecfb5ba68404a5c30ef501daf63d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 12 Oct 2018 18:58:53 -0700
Subject: [PATCH 53/95] ipv6: mcast: fix a use-after-free in inet6_mc_check

syzbot found a use-after-free in inet6_mc_check [1]

The problem here is that inet6_mc_check() uses rcu
and read_lock(&iml->sflock)

So the fact that ip6_mc_leave_src() is called under RTNL
and the socket lock does not help us, we need to acquire
iml->sflock in write mode.

In the future, we should convert all this stuff to RCU.

[1]
BUG: KASAN: use-after-free in ipv6_addr_equal include/net/ipv6.h:521 [inline]
BUG: KASAN: use-after-free in inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649
Read of size 8 at addr ffff8801ce7f2510 by task syz-executor0/22432

CPU: 1 PID: 22432 Comm: syz-executor0 Not tainted 4.19.0-rc7+ #280
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
 ipv6_addr_equal include/net/ipv6.h:521 [inline]
 inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649
 __raw_v6_lookup+0x320/0x3f0 net/ipv6/raw.c:98
 ipv6_raw_deliver net/ipv6/raw.c:183 [inline]
 raw6_local_deliver+0x3d3/0xcb0 net/ipv6/raw.c:240
 ip6_input_finish+0x467/0x1aa0 net/ipv6/ip6_input.c:345
 NF_HOOK include/linux/netfilter.h:289 [inline]
 ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:426
 ip6_mc_input+0x48a/0xd20 net/ipv6/ip6_input.c:503
 dst_input include/net/dst.h:450 [inline]
 ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76
 NF_HOOK include/linux/netfilter.h:289 [inline]
 ipv6_rcv+0x120/0x640 net/ipv6/ip6_input.c:271
 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913
 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023
 netif_receive_skb_internal+0x12c/0x620 net/core/dev.c:5126
 napi_frags_finish net/core/dev.c:5664 [inline]
 napi_gro_frags+0x75a/0xc90 net/core/dev.c:5737
 tun_get_user+0x3189/0x4250 drivers/net/tun.c:1923
 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1968
 call_write_iter include/linux/fs.h:1808 [inline]
 do_iter_readv_writev+0x8b0/0xa80 fs/read_write.c:680
 do_iter_write+0x185/0x5f0 fs/read_write.c:959
 vfs_writev+0x1f1/0x360 fs/read_write.c:1004
 do_writev+0x11a/0x310 fs/read_write.c:1039
 __do_sys_writev fs/read_write.c:1112 [inline]
 __se_sys_writev fs/read_write.c:1109 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457421
Code: 75 14 b8 14 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 34 b5 fb ff c3 48 83 ec 08 e8 1a 2d 00 00 48 89 04 24 b8 14 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 63 2d 00 00 48 89 d0 48 83 c4 08 48 3d 01
RSP: 002b:00007f2d30ecaba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 000000000000003e RCX: 0000000000457421
RDX: 0000000000000001 RSI: 00007f2d30ecabf0 RDI: 00000000000000f0
RBP: 0000000020000500 R08: 00000000000000f0 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000293 R12: 00007f2d30ecb6d4
R13: 00000000004c4890 R14: 00000000004d7b90 R15: 00000000ffffffff

Allocated by task 22437:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
 __do_kmalloc mm/slab.c:3718 [inline]
 __kmalloc+0x14e/0x760 mm/slab.c:3727
 kmalloc include/linux/slab.h:518 [inline]
 sock_kmalloc+0x15a/0x1f0 net/core/sock.c:1983
 ip6_mc_source+0x14dd/0x1960 net/ipv6/mcast.c:427
 do_ipv6_setsockopt.isra.9+0x3afb/0x45d0 net/ipv6/ipv6_sockglue.c:743
 ipv6_setsockopt+0xbd/0x170 net/ipv6/ipv6_sockglue.c:933
 rawv6_setsockopt+0x59/0x140 net/ipv6/raw.c:1069
 sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3038
 __sys_setsockopt+0x1ba/0x3c0 net/socket.c:1902
 __do_sys_setsockopt net/socket.c:1913 [inline]
 __se_sys_setsockopt net/socket.c:1910 [inline]
 __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1910
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 22430:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
 __cache_free mm/slab.c:3498 [inline]
 kfree+0xcf/0x230 mm/slab.c:3813
 __sock_kfree_s net/core/sock.c:2004 [inline]
 sock_kfree_s+0x29/0x60 net/core/sock.c:2010
 ip6_mc_leave_src+0x11a/0x1d0 net/ipv6/mcast.c:2448
 __ipv6_sock_mc_close+0x20b/0x4e0 net/ipv6/mcast.c:310
 ipv6_sock_mc_close+0x158/0x1d0 net/ipv6/mcast.c:328
 inet6_release+0x40/0x70 net/ipv6/af_inet6.c:452
 __sock_release+0xd7/0x250 net/socket.c:579
 sock_close+0x19/0x20 net/socket.c:1141
 __fput+0x385/0xa30 fs/file_table.c:278
 ____fput+0x15/0x20 fs/file_table.c:309
 task_work_run+0x1e8/0x2a0 kernel/task_work.c:113
 tracehook_notify_resume include/linux/tracehook.h:193 [inline]
 exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166
 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
 syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
 do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

The buggy address belongs to the object at ffff8801ce7f2500
 which belongs to the cache kmalloc-192 of size 192
The buggy address is located 16 bytes inside of
 192-byte region [ffff8801ce7f2500, ffff8801ce7f25c0)
The buggy address belongs to the page:
page:ffffea000739fc80 count:1 mapcount:0 mapping:ffff8801da800040 index:0x0
flags: 0x2fffc0000000100(slab)
raw: 02fffc0000000100 ffffea0006f6e548 ffffea000737b948 ffff8801da800040
raw: 0000000000000000 ffff8801ce7f2000 0000000100000010 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8801ce7f2400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8801ce7f2480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
>ffff8801ce7f2500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                         ^
 ffff8801ce7f2580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
 ffff8801ce7f2600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 4ae54aaca373..dbab62e3f0d7 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 {
 	int err;
 
-	/* callers have the socket lock and rtnl lock
-	 * so no other readers or writers of iml or its sflist
-	 */
+	write_lock_bh(&iml->sflock);
 	if (!iml->sflist) {
 		/* any-source empty exclude case */
-		return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+	} else {
+		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+				iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+		sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+		iml->sflist = NULL;
 	}
-	err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
-		iml->sflist->sl_count, iml->sflist->sl_addr, 0);
-	sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
-	iml->sflist = NULL;
+	write_unlock_bh(&iml->sflock);
 	return err;
 }
 

From fbe1222c63b805e946c3af29b0bfbfee4c2fbeff Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 13 Oct 2018 16:48:25 +0100
Subject: [PATCH 54/95] qed: fix spelling mistake "Ireelevant" -> "Irrelevant"

Trivial fix to spelling mistake in DP_INFO message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_int.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index af3a28ec04eb..0f0aba793352 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -228,7 +228,7 @@ static int qed_grc_attn_cb(struct qed_hwfn *p_hwfn)
 		attn_master_to_str(GET_FIELD(tmp, QED_GRC_ATTENTION_MASTER)),
 		GET_FIELD(tmp2, QED_GRC_ATTENTION_PF),
 		(GET_FIELD(tmp2, QED_GRC_ATTENTION_PRIV) ==
-		 QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Ireelevant)",
+		 QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Irrelevant)",
 		GET_FIELD(tmp2, QED_GRC_ATTENTION_VF));
 
 out:

From ec20a63aa8b8ec3223fb25cdb2a49f9f9dfda88c Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Mon, 15 Oct 2018 05:19:00 +0000
Subject: [PATCH 55/95] net: fec: don't dump RX FIFO register when not
 available

Commit db65f35f50e0 ("net: fec: add support of ethtool get_regs") introduce
ethool "--register-dump" interface to dump all FEC registers.

But not all silicon implementations of the Freescale FEC hardware module
have the FRBR (FIFO Receive Bound Register) and FRSR (FIFO Receive Start
Register) register, so we should not be trying to dump them on those that
don't.

To fix it we create a quirk flag, FEC_QUIRK_HAS_RFREG, and check it before
dump those RX FIFO registers.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.h      |  4 ++++
 drivers/net/ethernet/freescale/fec_main.c | 16 ++++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 4778b663653e..bf80855dd0dd 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -452,6 +452,10 @@ struct bufdesc_ex {
  * initialisation.
  */
 #define FEC_QUIRK_MIB_CLEAR		(1 << 15)
+/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers,
+ * those FIFO receive registers are resolved in other platforms.
+ */
+#define FEC_QUIRK_HAS_FRREG		(1 << 16)
 
 struct bufdesc_prop {
 	int qid;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index bf9b9fd6d2a0..7b98bb75ba8a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -91,14 +91,16 @@ static struct platform_device_id fec_devtype[] = {
 		.driver_data = 0,
 	}, {
 		.name = "imx25-fec",
-		.driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR,
+		.driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR |
+			       FEC_QUIRK_HAS_FRREG,
 	}, {
 		.name = "imx27-fec",
-		.driver_data = FEC_QUIRK_MIB_CLEAR,
+		.driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG,
 	}, {
 		.name = "imx28-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
-				FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC,
+				FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC |
+				FEC_QUIRK_HAS_FRREG,
 	}, {
 		.name = "imx6q-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
@@ -2164,7 +2166,13 @@ static void fec_enet_get_regs(struct net_device *ndev,
 	memset(buf, 0, regs->len);
 
 	for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) {
-		off = fec_enet_register_offset[i] / 4;
+		off = fec_enet_register_offset[i];
+
+		if ((off == FEC_R_BOUND || off == FEC_R_FSTART) &&
+		    !(fep->quirks & FEC_QUIRK_HAS_FRREG))
+			continue;
+
+		off >>= 2;
 		buf[off] = readl(&theregs[off]);
 	}
 }

From d805397c3822d57ca3884d4bea37b2291fc40992 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 15 Oct 2018 19:58:29 +0800
Subject: [PATCH 56/95] sctp: use the pmtu from the icmp packet to update
 transport pathmtu

Other than asoc pmtu sync from all transports, sctp_assoc_sync_pmtu
is also processing transport pmtu_pending by icmp packets. But it's
meaningless to use sctp_dst_mtu(t->dst) as new pmtu for a transport.

The right pmtu value should come from the icmp packet, and it would
be saved into transport->mtu_info in this patch and used later when
the pmtu sync happens in sctp_sendmsg_to_asoc or sctp_packet_config.

Besides, without this patch, as pmtu can only be updated correctly
when receiving a icmp packet and no place is holding sock lock, it
will take long time if the sock is busy with sending packets.

Note that it doesn't process transport->mtu_info in .release_cb(),
as there is no enough information for pmtu update, like for which
asoc or transport. It is not worth traversing all asocs to check
pmtu_pending. So unlike tcp, sctp does this in tx path, for which
mtu_info needs to be atomic_t.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 ++
 net/sctp/associola.c       | 3 ++-
 net/sctp/input.c           | 1 +
 net/sctp/output.c          | 6 ++++++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 28a7c8e44636..a11f93790476 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -876,6 +876,8 @@ struct sctp_transport {
 	unsigned long sackdelay;
 	__u32 sackfreq;
 
+	atomic_t mtu_info;
+
 	/* When was the last time that we heard from this transport? We use
 	 * this to pick new active and retran paths.
 	 */
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 297d9cf960b9..a827a1f562bf 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1450,7 +1450,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 	/* Get the lowest pmtu of all the transports. */
 	list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
 		if (t->pmtu_pending && t->dst) {
-			sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst));
+			sctp_transport_update_pmtu(t,
+						   atomic_read(&t->mtu_info));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 9bbc5f92c941..5c36a99882ed 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -395,6 +395,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 		return;
 
 	if (sock_owned_by_user(sk)) {
+		atomic_set(&t->mtu_info, pmtu);
 		asoc->pmtu_pending = 1;
 		t->pmtu_pending = 1;
 		return;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 7f849b01ec8e..67939ad99c01 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -120,6 +120,12 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 			sctp_assoc_sync_pmtu(asoc);
 	}
 
+	if (asoc->pmtu_pending) {
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(asoc);
+		asoc->pmtu_pending = 0;
+	}
+
 	/* If there a is a prepend chunk stick it on the list before
 	 * any other chunks get appended.
 	 */

From 1890fea7936ad9be0b7caf6a94146b0d905c4b60 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 15 Oct 2018 22:37:21 +0100
Subject: [PATCH 57/95] rxrpc: Fix a missing rxrpc_put_peer() in the
 error_report handler

Fix a missing call to rxrpc_put_peer() on the main path through the
rxrpc_error_report() function.  This manifests itself as a ref leak
whenever an ICMP packet or other error comes in.

In commit f334430316e7, the hand-off of the ref to a work item was removed
and was not replaced with a put.

Fixes: f334430316e7 ("rxrpc: Fix error distribution")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/peer_event.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 05b51bdbdd41..bd2fa3b7caa7 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -195,6 +195,7 @@ void rxrpc_error_report(struct sock *sk)
 	rxrpc_store_error(peer, serr);
 	rcu_read_unlock();
 	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_put_peer(peer);
 
 	_leave("");
 }

From 8913806f166e47c6b3fe8253e9cfb9caabe64341 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Mon, 15 Oct 2018 16:52:23 -0700
Subject: [PATCH 58/95] nfp: flower: fix pedit set actions for multiple partial
 masks

Previously we did not correctly change headers when using multiple
pedit actions with partial masks. We now take this into account and
no longer just commit the last pedit action.

Fixes: c0b1bd9a8b8a ("nfp: add set ipv4 header action flower offload")
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/flower/action.c    | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 46ba0cf257c6..91de7a9b0190 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -429,12 +429,14 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
 
 	switch (off) {
 	case offsetof(struct iphdr, daddr):
-		set_ip_addr->ipv4_dst_mask = mask;
-		set_ip_addr->ipv4_dst = exact;
+		set_ip_addr->ipv4_dst_mask |= mask;
+		set_ip_addr->ipv4_dst &= ~mask;
+		set_ip_addr->ipv4_dst |= exact & mask;
 		break;
 	case offsetof(struct iphdr, saddr):
-		set_ip_addr->ipv4_src_mask = mask;
-		set_ip_addr->ipv4_src = exact;
+		set_ip_addr->ipv4_src_mask |= mask;
+		set_ip_addr->ipv4_src &= ~mask;
+		set_ip_addr->ipv4_src |= exact & mask;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -451,8 +453,9 @@ static void
 nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask,
 		      struct nfp_fl_set_ipv6_addr *ip6)
 {
-	ip6->ipv6[idx % 4].mask = mask;
-	ip6->ipv6[idx % 4].exact = exact;
+	ip6->ipv6[idx % 4].mask |= mask;
+	ip6->ipv6[idx % 4].exact &= ~mask;
+	ip6->ipv6[idx % 4].exact |= exact & mask;
 
 	ip6->reserved = cpu_to_be16(0);
 	ip6->head.jump_id = opcode_tag;

From d08c9e589300b015e72e5b41ff4dfed6eb8e7421 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Mon, 15 Oct 2018 16:52:24 -0700
Subject: [PATCH 59/95] nfp: flower: fix multiple keys per pedit action

Previously we only allowed a single header key per pedit action to
change the header. This used to result in the last header key in the
pedit action to overwrite previous headers. We now keep track of them
and allow multiple header keys per pedit action.

Fixes: c0b1bd9a8b8a ("nfp: add set ipv4 header action flower offload")
Fixes: 354b82bb320e ("nfp: add set ipv6 source and destination address")
Fixes: f8b7b0a6b113 ("nfp: add set tcp and udp header action flower offload")
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/flower/action.c   | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 91de7a9b0190..c39d7fdf73e6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -544,7 +544,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 	struct nfp_fl_set_eth set_eth;
 	enum pedit_header_type htype;
 	int idx, nkeys, err;
-	size_t act_size;
+	size_t act_size = 0;
 	u32 offset, cmd;
 	u8 ip_proto = 0;
 
@@ -602,7 +602,9 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		act_size = sizeof(set_eth);
 		memcpy(nfp_action, &set_eth, act_size);
 		*a_len += act_size;
-	} else if (set_ip_addr.head.len_lw) {
+	}
+	if (set_ip_addr.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_ip_addr);
 		memcpy(nfp_action, &set_ip_addr, act_size);
 		*a_len += act_size;
@@ -610,10 +612,12 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		/* Hardware will automatically fix IPv4 and TCP/UDP checksum. */
 		*csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR |
 				nfp_fl_csum_l4_to_flag(ip_proto);
-	} else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
+	}
+	if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
 		/* TC compiles set src and dst IPv6 address as a single action,
 		 * the hardware requires this to be 2 separate actions.
 		 */
+		nfp_action += act_size;
 		act_size = sizeof(set_ip6_src);
 		memcpy(nfp_action, &set_ip6_src, act_size);
 		*a_len += act_size;
@@ -626,6 +630,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		/* Hardware will automatically fix TCP/UDP checksum. */
 		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
 	} else if (set_ip6_dst.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_ip6_dst);
 		memcpy(nfp_action, &set_ip6_dst, act_size);
 		*a_len += act_size;
@@ -633,13 +638,16 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		/* Hardware will automatically fix TCP/UDP checksum. */
 		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
 	} else if (set_ip6_src.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_ip6_src);
 		memcpy(nfp_action, &set_ip6_src, act_size);
 		*a_len += act_size;
 
 		/* Hardware will automatically fix TCP/UDP checksum. */
 		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
-	} else if (set_tport.head.len_lw) {
+	}
+	if (set_tport.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_tport);
 		memcpy(nfp_action, &set_tport, act_size);
 		*a_len += act_size;

From 140b6abac26d799f75d772ab5e969b34ad8d68f1 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Mon, 15 Oct 2018 16:52:25 -0700
Subject: [PATCH 60/95] nfp: flower: use offsets provided by pedit instead of
 index for ipv6

Previously when populating the set ipv6 address action, we incorrectly
made use of pedit's key index to determine which 32bit word should be
set. We now calculate which word has been selected based on the offset
provided by the pedit action.

Fixes: 354b82bb320e ("nfp: add set ipv6 source and destination address")
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/action.c    | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index c39d7fdf73e6..7a1e9cd9cc62 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -450,12 +450,12 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
 }
 
 static void
-nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask,
+nfp_fl_set_ip6_helper(int opcode_tag, u8 word, __be32 exact, __be32 mask,
 		      struct nfp_fl_set_ipv6_addr *ip6)
 {
-	ip6->ipv6[idx % 4].mask |= mask;
-	ip6->ipv6[idx % 4].exact &= ~mask;
-	ip6->ipv6[idx % 4].exact |= exact & mask;
+	ip6->ipv6[word].mask |= mask;
+	ip6->ipv6[word].exact &= ~mask;
+	ip6->ipv6[word].exact |= exact & mask;
 
 	ip6->reserved = cpu_to_be16(0);
 	ip6->head.jump_id = opcode_tag;
@@ -468,6 +468,7 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
 	       struct nfp_fl_set_ipv6_addr *ip_src)
 {
 	__be32 exact, mask;
+	u8 word;
 
 	/* We are expecting tcf_pedit to return a big endian value */
 	mask = (__force __be32)~tcf_pedit_mask(action, idx);
@@ -476,17 +477,20 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
 	if (exact & ~mask)
 		return -EOPNOTSUPP;
 
-	if (off < offsetof(struct ipv6hdr, saddr))
+	if (off < offsetof(struct ipv6hdr, saddr)) {
 		return -EOPNOTSUPP;
-	else if (off < offsetof(struct ipv6hdr, daddr))
-		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx,
+	} else if (off < offsetof(struct ipv6hdr, daddr)) {
+		word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact);
+		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word,
 				      exact, mask, ip_src);
-	else if (off < offsetof(struct ipv6hdr, daddr) +
-		       sizeof(struct in6_addr))
-		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx,
+	} else if (off < offsetof(struct ipv6hdr, daddr) +
+		       sizeof(struct in6_addr)) {
+		word = (off - offsetof(struct ipv6hdr, daddr)) / sizeof(exact);
+		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word,
 				      exact, mask, ip_dst);
-	else
+	} else {
 		return -EOPNOTSUPP;
+	}
 
 	return 0;
 }

From cf8afe5c53621eda385be961bc72329a2adcd767 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 16 Oct 2018 08:21:48 +0200
Subject: [PATCH 61/95] parisc: Fix uninitialized variable usage in unwind.c

As noticed by Dave Anglin, the last commit introduced a small bug where
the potentially uninitialized r struct is used instead of the regs
pointer as input for unwind_frame_init(). Fix it.

Signed-off-by: Helge Deller <deller@gmx.de>
Reported-by: John David Anglin <dave.anglin@bell.net>
---
 arch/parisc/kernel/unwind.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index f329b466e68f..2d14f17838d2 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info,
 			r.gr[30] = get_parisc_stackpointer();
 			regs = &r;
 		}
-		unwind_frame_init(info, task, &r);
+		unwind_frame_init(info, task, regs);
 	} else {
 		unwind_frame_init_from_blocked_task(info, task);
 	}

From 9068e02f58740778d8270840657f1e250a2cc60f Mon Sep 17 00:00:00 2001
From: Clint Taylor <clinton.a.taylor@intel.com>
Date: Fri, 5 Oct 2018 14:52:15 -0700
Subject: [PATCH 62/95] drm/edid: VSDB yCBCr420 Deep Color mode bit definitions

HDMI Forum VSDB YCBCR420 deep color capability bits are 2:0. Correct
definitions in the header for the mask to work correctly.

Fixes: e6a9a2c3dc43 ("drm/edid: parse ycbcr 420 deep color information")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107893
Cc: <stable@vger.kernel.org> # v4.14+
Signed-off-by: Clint Taylor <clinton.a.taylor@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Shashank Sharma <shashank.sharma@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1538776335-12569-1-git-send-email-clinton.a.taylor@intel.com
---
 drivers/gpu/drm/drm_edid.c | 2 +-
 include/drm/drm_edid.h     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 1e2b9407c8d0..ff0bfc65a8c1 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -4282,7 +4282,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector,
 	struct drm_hdmi_info *hdmi = &connector->display_info.hdmi;
 
 	dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK;
-	hdmi->y420_dc_modes |= dc_mask;
+	hdmi->y420_dc_modes = dc_mask;
 }
 
 static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector,
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index b25d12ef120a..e3c404833115 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -214,9 +214,9 @@ struct detailed_timing {
 #define DRM_EDID_HDMI_DC_Y444             (1 << 3)
 
 /* YCBCR 420 deep color modes */
-#define DRM_EDID_YCBCR420_DC_48		  (1 << 6)
-#define DRM_EDID_YCBCR420_DC_36		  (1 << 5)
-#define DRM_EDID_YCBCR420_DC_30		  (1 << 4)
+#define DRM_EDID_YCBCR420_DC_48		  (1 << 2)
+#define DRM_EDID_YCBCR420_DC_36		  (1 << 1)
+#define DRM_EDID_YCBCR420_DC_30		  (1 << 0)
 #define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \
 				    DRM_EDID_YCBCR420_DC_36 | \
 				    DRM_EDID_YCBCR420_DC_30)

From 0295e39595e1146522f2722715dba7f7fba42217 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 16 Oct 2018 16:32:40 +0200
Subject: [PATCH 63/95] IB/ucm: Fix Spectre v1 vulnerability

hdr.cmd can be indirectly controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/infiniband/core/ucm.c:1127 ib_ucm_write() warn: potential
spectre issue 'ucm_cmd_table' [r] (local cap)

Fix this by sanitizing hdr.cmd before using it to index
ucm_cmd_table.

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/ucm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index faa9e6116b2f..73332b9a25b5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -46,6 +46,8 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 
+#include <linux/nospec.h>
+
 #include <linux/uaccess.h>
 
 #include <rdma/ib.h>
@@ -1120,6 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
 
 	if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
 		return -EINVAL;
+	hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table));
 
 	if (hdr.in + sizeof(hdr) > len)
 		return -EINVAL;

From a3671a4f973ee9d9621d60166cc3b037c397d604 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 16 Oct 2018 16:59:01 +0200
Subject: [PATCH 64/95] RDMA/ucma: Fix Spectre v1 vulnerability

hdr.cmd can be indirectly controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/infiniband/core/ucma.c:1686 ucma_write() warn: potential
spectre issue 'ucma_cmd_table' [r] (local cap)

Fix this by sanitizing hdr.cmd before using it to index
ucm_cmd_table.

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/ucma.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 21863ddde63e..01d68ed46c1b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -44,6 +44,8 @@
 #include <linux/module.h>
 #include <linux/nsproxy.h>
 
+#include <linux/nospec.h>
+
 #include <rdma/rdma_user_cm.h>
 #include <rdma/ib_marshall.h>
 #include <rdma/rdma_cm.h>
@@ -1676,6 +1678,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
 
 	if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
 		return -EINVAL;
+	hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table));
 
 	if (hdr.in + sizeof(hdr) > len)
 		return -EINVAL;

From 0ac1077e3a549bf8d35971613e2be05bdbb41a00 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 16 Oct 2018 15:52:02 +0800
Subject: [PATCH 65/95] sctp: get pr_assoc and pr_stream all status with
 SCTP_PR_SCTP_ALL instead

According to rfc7496 section 4.3 or 4.4:

   sprstat_policy:  This parameter indicates for which PR-SCTP policy
      the user wants the information.  It is an error to use
      SCTP_PR_SCTP_NONE in sprstat_policy.  If SCTP_PR_SCTP_ALL is used,
      the counters provided are aggregated over all supported policies.

We change to dump pr_assoc and pr_stream all status by SCTP_PR_SCTP_ALL
instead, and return error for SCTP_PR_SCTP_NONE, as it also said "It is
an error to use SCTP_PR_SCTP_NONE in sprstat_policy. "

Fixes: 826d253d57b1 ("sctp: add SCTP_PR_ASSOC_STATUS on sctp sockopt")
Fixes: d229d48d183f ("sctp: add SCTP_PR_STREAM_STATUS sockopt for prsctp")
Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 1 +
 net/sctp/socket.c         | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index b479db5c71d9..34dd3d497f2c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -301,6 +301,7 @@ enum sctp_sinfo_flags {
 	SCTP_SACK_IMMEDIATELY	= (1 << 3), /* SACK should be sent without delay. */
 	/* 2 bits here have been used by SCTP_PR_SCTP_MASK */
 	SCTP_SENDALL		= (1 << 6),
+	SCTP_PR_SCTP_ALL	= (1 << 7),
 	SCTP_NOTIFICATION	= MSG_NOTIFICATION, /* Next message is not user msg but notification. */
 	SCTP_EOF		= MSG_FIN,  /* Initiate graceful shutdown process. */
 };
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f73e9d38d5ba..e25a20fc629a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7100,14 +7100,14 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
 	}
 
 	policy = params.sprstat_policy;
-	if (policy & ~SCTP_PR_SCTP_MASK)
+	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
 	if (!asoc)
 		goto out;
 
-	if (policy == SCTP_PR_SCTP_NONE) {
+	if (policy & SCTP_PR_SCTP_ALL) {
 		params.sprstat_abandoned_unsent = 0;
 		params.sprstat_abandoned_sent = 0;
 		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7159,7 +7159,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 	}
 
 	policy = params.sprstat_policy;
-	if (policy & ~SCTP_PR_SCTP_MASK)
+	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
@@ -7175,7 +7175,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 		goto out;
 	}
 
-	if (policy == SCTP_PR_SCTP_NONE) {
+	if (policy == SCTP_PR_SCTP_ALL) {
 		params.sprstat_abandoned_unsent = 0;
 		params.sprstat_abandoned_sent = 0;
 		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {

From 9c0be3f6b5d776dfe3ed249862c244a4486414dc Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 13 Oct 2018 15:10:50 -0400
Subject: [PATCH 66/95] tracepoint: Fix tracepoint array element size mismatch

commit 46e0c9be206f ("kernel: tracepoints: add support for relative
references") changes the layout of the __tracepoint_ptrs section on
architectures supporting relative references. However, it does so
without turning struct tracepoint * const into const int elsewhere in
the tracepoint code, which has the following side-effect:

Setting mod->num_tracepoints is done in by module.c:

    mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
                                         sizeof(*mod->tracepoints_ptrs),
                                         &mod->num_tracepoints);

Basically, since sizeof(*mod->tracepoints_ptrs) is a pointer size
(rather than sizeof(int)), num_tracepoints is erroneously set to half the
size it should be on 64-bit arch. So a module with an odd number of
tracepoints misses the last tracepoint due to effect of integer
division.

So in the module going notifier:

        for_each_tracepoint_range(mod->tracepoints_ptrs,
                mod->tracepoints_ptrs + mod->num_tracepoints,
                tp_module_going_check_quiescent, NULL);

the expression (mod->tracepoints_ptrs + mod->num_tracepoints) actually
evaluates to something within the bounds of the array, but miss the
last tracepoint if the number of tracepoints is odd on 64-bit arch.

Fix this by introducing a new typedef: tracepoint_ptr_t, which
is either "const int" on architectures that have PREL32 relocations,
or "struct tracepoint * const" on architectures that does not have
this feature.

Also provide a new tracepoint_ptr_defer() static inline to
encapsulate deferencing this type rather than duplicate code and
ugly idefs within the for_each_tracepoint_range() implementation.

This issue appears in 4.19-rc kernels, and should ideally be fixed
before the end of the rc cycle.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Link: http://lkml.kernel.org/r/20181013191050.22389-1-mathieu.desnoyers@efficios.com
Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/module.h          |  3 ++-
 include/linux/tracepoint-defs.h |  6 ++++++
 include/linux/tracepoint.h      | 36 +++++++++++++++++++++------------
 kernel/tracepoint.c             | 24 ++++++++--------------
 4 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index f807f15bebbe..e19ae08c7fb8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -20,6 +20,7 @@
 #include <linux/export.h>
 #include <linux/rbtree_latch.h>
 #include <linux/error-injection.h>
+#include <linux/tracepoint-defs.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -430,7 +431,7 @@ struct module {
 
 #ifdef CONFIG_TRACEPOINTS
 	unsigned int num_tracepoints;
-	struct tracepoint * const *tracepoints_ptrs;
+	tracepoint_ptr_t *tracepoints_ptrs;
 #endif
 #ifdef HAVE_JUMP_LABEL
 	struct jump_entry *jump_entries;
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 22c5a46e9693..49ba9cde7e4b 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,6 +35,12 @@ struct tracepoint {
 	struct tracepoint_func __rcu *funcs;
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef const int tracepoint_ptr_t;
+#else
+typedef struct tracepoint * const tracepoint_ptr_t;
+#endif
+
 struct bpf_raw_event_map {
 	struct tracepoint	*tp;
 	void			*bpf_func;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 041f7e56a289..538ba1a58f5b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -99,6 +99,29 @@ extern void syscall_unregfunc(void);
 #define TRACE_DEFINE_ENUM(x)
 #define TRACE_DEFINE_SIZEOF(x)
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+	return offset_to_ptr(p);
+}
+
+#define __TRACEPOINT_ENTRY(name)					\
+	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
+	    "	.balign 4					\n"	\
+	    "	.long 	__tracepoint_" #name " - .		\n"	\
+	    "	.previous					\n")
+#else
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+	return *p;
+}
+
+#define __TRACEPOINT_ENTRY(name)					 \
+	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
+		&__tracepoint_##name
+#endif
+
 #endif /* _LINUX_TRACEPOINT_H */
 
 /*
@@ -253,19 +276,6 @@ extern void syscall_unregfunc(void);
 		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
-#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#define __TRACEPOINT_ENTRY(name)					\
-	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
-	    "	.balign 4					\n"	\
-	    "	.long 	__tracepoint_" #name " - .		\n"	\
-	    "	.previous					\n")
-#else
-#define __TRACEPOINT_ENTRY(name)					 \
-	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
-	__attribute__((section("__tracepoints_ptrs"))) =		 \
-		&__tracepoint_##name
-#endif
-
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index bf2c06ef9afc..a3be42304485 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -28,8 +28,8 @@
 #include <linux/sched/task.h>
 #include <linux/static_key.h>
 
-extern struct tracepoint * const __start___tracepoints_ptrs[];
-extern struct tracepoint * const __stop___tracepoints_ptrs[];
+extern tracepoint_ptr_t __start___tracepoints_ptrs[];
+extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
 
 DEFINE_SRCU(tracepoint_srcu);
 EXPORT_SYMBOL_GPL(tracepoint_srcu);
@@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
-		struct tracepoint * const *end,
+static void for_each_tracepoint_range(
+		tracepoint_ptr_t *begin, tracepoint_ptr_t *end,
 		void (*fct)(struct tracepoint *tp, void *priv),
 		void *priv)
 {
+	tracepoint_ptr_t *iter;
+
 	if (!begin)
 		return;
-
-	if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
-		const int *iter;
-
-		for (iter = (const int *)begin; iter < (const int *)end; iter++)
-			fct(offset_to_ptr(iter), priv);
-	} else {
-		struct tracepoint * const *iter;
-
-		for (iter = begin; iter < end; iter++)
-			fct(*iter, priv);
-	}
+	for (iter = begin; iter < end; iter++)
+		fct(tracepoint_ptr_deref(iter), priv);
 }
 
 #ifdef CONFIG_MODULES

From 12ad0cb2123aed30241a14792ef5bef9efcccbcd Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 15 Oct 2018 23:31:42 -0400
Subject: [PATCH 67/95] tracing: Use trace_clock_local() for looping in
 preemptirq_delay_test.c

The preemptirq_delay_test module is used for the ftrace selftest code that
tests the latency tracers. The problem is that it uses ktime for the delay
loop, and then checks the tracer to see if the delay loop is caught, but the
tracer uses trace_clock_local() which uses various different other clocks to
measure the latency. As ktime uses the clock cycles, and the code then
converts that to nanoseconds, it causes rounding errors, and the preemptirq
latency tests are failing due to being off by 1 (it expects to see a delay
of 500000 us, but the delay is only 499999 us). This is happening due to a
rounding error in the ktime (which is totally legit). The purpose of the
test is to see if it can catch the delay, not to test the accuracy between
trace_clock_local() and ktime_get(). Best to use apples to apples, and have
the delay loop use the same clock as the latency tracer does.

Cc: stable@vger.kernel.org
Fixes: f96e8577da102 ("lib: Add module for testing preemptoff/irqsoff latency tracers")
Acked-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/preemptirq_delay_test.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index f704390db9fc..d8765c952fab 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -5,12 +5,12 @@
  * Copyright (C) 2018 Joel Fernandes (Google) <joel@joelfernandes.org>
  */
 
+#include <linux/trace_clock.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
-#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/string.h>
@@ -25,13 +25,13 @@ MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default ir
 
 static void busy_wait(ulong time)
 {
-	ktime_t start, end;
-	start = ktime_get();
+	u64 start, end;
+	start = trace_clock_local();
 	do {
-		end = ktime_get();
+		end = trace_clock_local();
 		if (kthread_should_stop())
 			break;
-	} while (ktime_to_ns(ktime_sub(end, start)) < (time * 1000));
+	} while ((end - start) < (time * 1000));
 }
 
 static int preemptirq_delay_run(void *data)

From 776ca1543b5fe673aaf1beb244fcc2429d378083 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 17 Oct 2018 21:28:01 -0700
Subject: [PATCH 68/95] sparc: Fix syscall fallback bugs in VDSO.

First, the trap number for 32-bit syscalls is 0x10.

Also, only negate the return value when syscall error is indicated by
the carry bit being set.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/vdso/vclock_gettime.c | 12 +++++++++++-
 tools/perf/util/event.c          |  5 +++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
index 3feb3d960ca5..75dca9aab737 100644
--- a/arch/sparc/vdso/vclock_gettime.c
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -33,9 +33,19 @@
 #define	TICK_PRIV_BIT	(1ULL << 63)
 #endif
 
+#ifdef	CONFIG_SPARC64
 #define SYSCALL_STRING							\
 	"ta	0x6d;"							\
-	"sub	%%g0, %%o0, %%o0;"					\
+	"bcs,a	1f;"							\
+	" sub	%%g0, %%o0, %%o0;"					\
+	"1:"
+#else
+#define SYSCALL_STRING							\
+	"ta	0x10;"							\
+	"bcs,a	1f;"							\
+	" sub	%%g0, %%o0, %%o0;"					\
+	"1:"
+#endif
 
 #define SYSCALL_CLOBBERS						\
 	"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",			\
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 0cd42150f712..651cd6f6762c 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1081,6 +1081,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max
 	}
 
 	*size += sizeof(struct cpu_map_data);
+	*size = PERF_ALIGN(*size, sizeof(u64));
 	return zalloc(*size);
 }
 
@@ -1560,7 +1561,9 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
 
 		return NULL;
 	}
+#if 0
 try_again:
+#endif
 	al->map = map_groups__find(mg, al->addr);
 	if (al->map == NULL) {
 		/*
@@ -1572,6 +1575,7 @@ try_again:
 		 * "[vdso]" dso, but for now lets use the old trick of looking
 		 * in the whole kernel symbol list.
 		 */
+#if 0
 		if (cpumode == PERF_RECORD_MISC_USER && machine &&
 		    mg != &machine->kmaps &&
 		    machine__kernel_ip(machine, al->addr)) {
@@ -1579,6 +1583,7 @@ try_again:
 			load_map = true;
 			goto try_again;
 		}
+#endif
 	} else {
 		/*
 		 * Kernel maps might be changed when loading symbols so loading

From 7463e4f9b99c089cc962033b46349ff29f466e40 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 12 Oct 2018 23:53:58 +0200
Subject: [PATCH 69/95] geneve, vxlan: Don't check skb_dst() twice

Commit f15ca723c1eb ("net: don't call update_pmtu unconditionally") avoids
that we try updating PMTU for a non-existent destination, but didn't clean
up cases where the check was already explicit. Drop those redundant checks.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 15 ++++-----------
 drivers/net/vxlan.c  | 12 ++----------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 6acb6b5718b9..61c4bfbeb41c 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -830,12 +830,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
-	if (skb_dst(skb)) {
-		int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
-			  info->options_len;
-
-		skb_dst_update_pmtu(skb, mtu);
-	}
+	skb_dst_update_pmtu(skb, dst_mtu(&rt->dst) -
+				 GENEVE_IPV4_HLEN - info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
@@ -876,11 +872,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
-	if (skb_dst(skb)) {
-		int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
-
-		skb_dst_update_pmtu(skb, mtu);
-	}
+	skb_dst_update_pmtu(skb, dst_mtu(dst) -
+				 GENEVE_IPV6_HLEN - info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2b8da2b7e721..22e0ce592e07 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2194,11 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ndst = &rt->dst;
-		if (skb_dst(skb)) {
-			int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
-
-			skb_dst_update_pmtu(skb, mtu);
-		}
+		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2235,11 +2231,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				goto out_unlock;
 		}
 
-		if (skb_dst(skb)) {
-			int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
-
-			skb_dst_update_pmtu(skb, mtu);
-		}
+		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN6_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);

From 6b4f92af3d59e882d3ba04c44a815266890d188f Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 12 Oct 2018 23:53:59 +0200
Subject: [PATCH 70/95] geneve, vxlan: Don't set exceptions if skb->len < mtu

We shouldn't abuse exceptions: if the destination MTU is already higher
than what we're transmitting, no exception should be created.

Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c |  7 +++----
 drivers/net/vxlan.c  |  4 ++--
 include/net/dst.h    | 10 ++++++++++
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 61c4bfbeb41c..493cd382b8aa 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -830,8 +830,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
-	skb_dst_update_pmtu(skb, dst_mtu(&rt->dst) -
-				 GENEVE_IPV4_HLEN - info->options_len);
+	skb_tunnel_check_pmtu(skb, &rt->dst,
+			      GENEVE_IPV4_HLEN + info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
@@ -872,8 +872,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
-	skb_dst_update_pmtu(skb, dst_mtu(dst) -
-				 GENEVE_IPV6_HLEN - info->options_len);
+	skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 22e0ce592e07..27bd586b94b0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2194,7 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ndst = &rt->dst;
-		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN_HEADROOM);
+		skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2231,7 +2231,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				goto out_unlock;
 		}
 
-		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN6_HEADROOM);
+		skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
diff --git a/include/net/dst.h b/include/net/dst.h
index 7f735e76ca73..6cf0870414c7 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
 		dst->ops->update_pmtu(dst, NULL, skb, mtu);
 }
 
+static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+					 struct dst_entry *encap_dst,
+					 int headroom)
+{
+	u32 encap_mtu = dst_mtu(encap_dst);
+
+	if (skb->len > encap_mtu - headroom)
+		skb_dst_update_pmtu(skb, encap_mtu - headroom);
+}
+
 #endif /* _NET_DST_H */

From 8c3bf9b62b667456a57aefcf1689e826df146159 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Fri, 12 Oct 2018 19:14:58 -0700
Subject: [PATCH 71/95] net: qla3xxx: Remove overflowing shift statement

Clang currently warns:

drivers/net/ethernet/qlogic/qla3xxx.c:384:24: warning: signed shift
result (0xF00000000) requires 37 bits to represent, but 'int' only has
32 bits [-Wshift-overflow]
                    ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data));
                      ~~~~~~~~~~~~~~ ^  ~~
1 warning generated.

The warning is certainly accurate since ISP_NVRAM_MASK is defined as
(0x000F << 16) which is then shifted by 16, resulting in 64424509440,
well above UINT_MAX.

Given that this is the only location in this driver where ISP_NVRAM_MASK
is shifted again, it seems likely that ISP_NVRAM_MASK was originally
defined without a shift and during the move of the shift to the
definition, this statement wasn't properly removed (since ISP_NVRAM_MASK
is used in the statenent right above this). Only the maintainers can
confirm this since this statment has been here since the driver was
first added to the kernel.

Link: https://github.com/ClangBuiltLinux/linux/issues/127
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qla3xxx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index b48f76182049..10b075bc5959 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev)
 
 	qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1;
 	ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data);
-	ql_write_nvram_reg(qdev, spir,
-			   ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data));
 }
 
 /*

From 62d6f3b7b85e9ba79c96bfd36f501f180908e623 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 13 Oct 2018 13:26:53 +0300
Subject: [PATCH 72/95] sparc: vDSO: Silence an uninitialized variable warning

Smatch complains that "val" would be uninitialized if kstrtoul() fails.

Fixes: 9a08862a5d2e ("vDSO for sparc")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/vdso/vma.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
index f51595f861b8..5eaff3c1aa0c 100644
--- a/arch/sparc/vdso/vma.c
+++ b/arch/sparc/vdso/vma.c
@@ -262,7 +262,9 @@ static __init int vdso_setup(char *s)
 	unsigned long val;
 
 	err = kstrtoul(s, 10, &val);
+	if (err)
+		return err;
 	vdso_enabled = val;
-	return err;
+	return 0;
 }
 __setup("vdso=", vdso_setup);

From efa61c8cf2950ab5c0e66cff3cabe2a2b24e81ba Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 16 Oct 2018 15:06:41 +0200
Subject: [PATCH 73/95] ptp: fix Spectre v1 vulnerability

pin_index can be indirectly controlled by user-space, hence leading
to a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/ptp/ptp_chardev.c:253 ptp_ioctl() warn: potential spectre issue
'ops->pin_config' [r] (local cap)

Fix this by sanitizing pin_index before using it to index
ops->pin_config, and before passing it as an argument to
function ptp_set_pinfunc(), in which it is used to index
info->pin_config.

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_chardev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 01b0e2bb3319..2012551d93e0 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -24,6 +24,8 @@
 #include <linux/slab.h>
 #include <linux/timekeeping.h>
 
+#include <linux/nospec.h>
+
 #include "ptp_private.h"
 
 static int ptp_disable_pinfunc(struct ptp_clock_info *ops,
@@ -248,6 +250,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EINVAL;
 			break;
 		}
+		pin_index = array_index_nospec(pin_index, ops->n_pins);
 		if (mutex_lock_interruptible(&ptp->pincfg_mux))
 			return -ERESTARTSYS;
 		pd = ops->pin_config[pin_index];
@@ -266,6 +269,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EINVAL;
 			break;
 		}
+		pin_index = array_index_nospec(pin_index, ops->n_pins);
 		if (mutex_lock_interruptible(&ptp->pincfg_mux))
 			return -ERESTARTSYS;
 		err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan);

From 84258438e8ce12d6888b68a1238bba9cb25307e2 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 17 Oct 2018 00:35:10 +0900
Subject: [PATCH 74/95] net: bpfilter: use get_pid_task instead of pid_task

pid_task() dereferences rcu protected tasks array.
But there is no rcu_read_lock() in shutdown_umh() routine so that
rcu_read_lock() is needed.
get_pid_task() is wrapper function of pid_task. it holds rcu_read_lock()
then calls pid_task(). if task isn't NULL, it increases reference count
of task.

test commands:
   %modprobe bpfilter
   %modprobe -rv bpfilter

splat looks like:
[15102.030932] =============================
[15102.030957] WARNING: suspicious RCU usage
[15102.030985] 4.19.0-rc7+ #21 Not tainted
[15102.031010] -----------------------------
[15102.031038] kernel/pid.c:330 suspicious rcu_dereference_check() usage!
[15102.031063]
	       other info that might help us debug this:

[15102.031332]
	       rcu_scheduler_active = 2, debug_locks = 1
[15102.031363] 1 lock held by modprobe/1570:
[15102.031389]  #0: 00000000580ef2b0 (bpfilter_lock){+.+.}, at: stop_umh+0x13/0x52 [bpfilter]
[15102.031552]
               stack backtrace:
[15102.031583] CPU: 1 PID: 1570 Comm: modprobe Not tainted 4.19.0-rc7+ #21
[15102.031607] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015
[15102.031628] Call Trace:
[15102.031676]  dump_stack+0xc9/0x16b
[15102.031723]  ? show_regs_print_info+0x5/0x5
[15102.031801]  ? lockdep_rcu_suspicious+0x117/0x160
[15102.031855]  pid_task+0x134/0x160
[15102.031900]  ? find_vpid+0xf0/0xf0
[15102.032017]  shutdown_umh.constprop.1+0x1e/0x53 [bpfilter]
[15102.032055]  stop_umh+0x46/0x52 [bpfilter]
[15102.032092]  __x64_sys_delete_module+0x47e/0x570
[ ... ]

Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bpfilter/bpfilter_kern.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index b64e1649993b..94e88f510c5b 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -23,9 +23,11 @@ static void shutdown_umh(struct umh_info *info)
 
 	if (!info->pid)
 		return;
-	tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
-	if (tsk)
+	tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID);
+	if (tsk) {
 		force_sig(SIGKILL, tsk);
+		put_task_struct(tsk);
+	}
 	fput(info->pipe_to_umh);
 	fput(info->pipe_from_umh);
 	info->pid = 0;

From 9675931e6b65d160d16bcc9472c1acef15524def Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 16 Oct 2018 19:35:17 +0200
Subject: [PATCH 75/95] r8169: re-enable MSI-X on RTL8168g

Similar to d49c88d7677b ("r8169: Enable MSI-X on RTL8106e") after
e9d0ba506ea8 ("PCI: Reprogram bridge prefetch registers on resume")
we can safely assume that this also fixes the root cause of
the issue worked around by 7c53a722459c ("r8169: don't use MSI-X on
RTL8168g"). So let's revert it.

Fixes: 7c53a722459c ("r8169: don't use MSI-X on RTL8168g")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index f4df367fb894..28184b984a44 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7098,11 +7098,6 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
 		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 		flags = PCI_IRQ_LEGACY;
-	} else if (tp->mac_version == RTL_GIGA_MAC_VER_40) {
-		/* This version was reported to have issues with resume
-		 * from suspend when using MSI-X
-		 */
-		flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI;
 	} else {
 		flags = PCI_IRQ_ALL_TYPES;
 	}

From b336decab22158937975293aea79396525f92bb3 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Tue, 16 Oct 2018 15:18:17 -0300
Subject: [PATCH 76/95] sctp: fix race on sctp_id2asoc

syzbot reported an use-after-free involving sctp_id2asoc.  Dmitry Vyukov
helped to root cause it and it is because of reading the asoc after it
was freed:

        CPU 1                       CPU 2
(working on socket 1)            (working on socket 2)
	                         sctp_association_destroy
sctp_id2asoc
   spin lock
     grab the asoc from idr
   spin unlock
                                   spin lock
				     remove asoc from idr
				   spin unlock
				   free(asoc)
   if asoc->base.sk != sk ... [*]

This can only be hit if trying to fetch asocs from different sockets. As
we have a single IDR for all asocs, in all SCTP sockets, their id is
unique on the system. An application can try to send stuff on an id
that matches on another socket, and the if in [*] will protect from such
usage. But it didn't consider that as that asoc may belong to another
socket, it may be freed in parallel (read: under another socket lock).

We fix it by moving the checks in [*] into the protected region. This
fixes it because the asoc cannot be freed while the lock is held.

Reported-by: syzbot+c7dd55d7aec49d48e49a@syzkaller.appspotmail.com
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e25a20fc629a..bba877a0205b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -271,11 +271,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
 
 	spin_lock_bh(&sctp_assocs_id_lock);
 	asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id);
+	if (asoc && (asoc->base.sk != sk || asoc->base.dead))
+		asoc = NULL;
 	spin_unlock_bh(&sctp_assocs_id_lock);
 
-	if (!asoc || (asoc->base.sk != sk) || asoc->base.dead)
-		return NULL;
-
 	return asoc;
 }
 

From c863850ce22e1b0bb365d49cadf51f4765153ae4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 17 Oct 2018 03:06:12 +0800
Subject: [PATCH 77/95] sctp: not free the new asoc when sctp_wait_for_connect
 returns err

When sctp_wait_for_connect is called to wait for connect ready
for sp->strm_interleave in sctp_sendmsg_to_asoc, a panic could
be triggered if cpu is scheduled out and the new asoc is freed
elsewhere, as it will return err and later the asoc gets freed
again in sctp_sendmsg.

[  285.840764] list_del corruption, ffff9f0f7b284078->next is LIST_POISON1 (dead000000000100)
[  285.843590] WARNING: CPU: 1 PID: 8861 at lib/list_debug.c:47 __list_del_entry_valid+0x50/0xa0
[  285.846193] Kernel panic - not syncing: panic_on_warn set ...
[  285.846193]
[  285.848206] CPU: 1 PID: 8861 Comm: sctp_ndata Kdump: loaded Not tainted 4.19.0-rc7.label #584
[  285.850559] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[  285.852164] Call Trace:
...
[  285.872210]  ? __list_del_entry_valid+0x50/0xa0
[  285.872894]  sctp_association_free+0x42/0x2d0 [sctp]
[  285.873612]  sctp_sendmsg+0x5a4/0x6b0 [sctp]
[  285.874236]  sock_sendmsg+0x30/0x40
[  285.874741]  ___sys_sendmsg+0x27a/0x290
[  285.875304]  ? __switch_to_asm+0x34/0x70
[  285.875872]  ? __switch_to_asm+0x40/0x70
[  285.876438]  ? ptep_set_access_flags+0x2a/0x30
[  285.877083]  ? do_wp_page+0x151/0x540
[  285.877614]  __sys_sendmsg+0x58/0xa0
[  285.878138]  do_syscall_64+0x55/0x180
[  285.878669]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

This is a similar issue with the one fixed in Commit ca3af4dd28cf
("sctp: do not free asoc when it is already dead in sctp_sendmsg").
But this one can't be fixed by returning -ESRCH for the dead asoc
in sctp_wait_for_connect, as it will break sctp_connect's return
value to users.

This patch is to simply set err to -ESRCH before it returns to
sctp_sendmsg when any err is returned by sctp_wait_for_connect
for sp->strm_interleave, so that no asoc would be freed due to
this.

When users see this error, they will know the packet hasn't been
sent. And it also makes sense to not free asoc because waiting
connect fails, like the second call for sctp_wait_for_connect in
sctp_sendmsg_to_asoc.

Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bba877a0205b..c1c1bda334a4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1945,8 +1945,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		if (sp->strm_interleave) {
 			timeo = sock_sndtimeo(sk, 0);
 			err = sctp_wait_for_connect(asoc, &timeo);
-			if (err)
+			if (err) {
+				err = -ESRCH;
 				goto err;
+			}
 		} else {
 			wait_connect = true;
 		}

From 9b3bc7db759e64c33471025721817467f8c3ecd4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Oct 2018 08:05:45 +0000
Subject: [PATCH 78/95] mlxsw: core: Fix use-after-free when flashing firmware
 during init

When the switch driver (e.g., mlxsw_spectrum) determines it needs to
flash a new firmware version it resets the ASIC after the flashing
process. The bus driver (e.g., mlxsw_pci) then registers itself again
with mlxsw_core which means (among other things) that the device
registers itself again with the hwmon subsystem again.

Since the device was registered with the hwmon subsystem using
devm_hwmon_device_register_with_groups(), then the old hwmon device
(registered before the flashing) was never unregistered and was
referencing stale data, resulting in a use-after free.

Fix by removing reliance on device managed APIs in mlxsw_hwmon_init().

Fixes: c86d62cc410c ("mlxsw: spectrum: Reset FW after flash")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Alexander Petrovskiy <alexpe@mellanox.com>
Tested-by: Alexander Petrovskiy <alexpe@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c      |  2 ++
 drivers/net/ethernet/mellanox/mlxsw/core.h      |  4 ++++
 .../net/ethernet/mellanox/mlxsw/core_hwmon.c    | 17 +++++++++++------
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 81533d7f395c..937d0ace699a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1055,6 +1055,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 err_driver_init:
 	mlxsw_thermal_fini(mlxsw_core->thermal);
 err_thermal_init:
+	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 err_hwmon_init:
 	if (!reload)
 		devlink_unregister(devlink);
@@ -1088,6 +1089,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 	if (mlxsw_core->driver->fini)
 		mlxsw_core->driver->fini(mlxsw_core);
 	mlxsw_thermal_fini(mlxsw_core->thermal);
+	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 	if (!reload)
 		devlink_unregister(devlink);
 	mlxsw_emad_fini(mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 655ddd204ab2..c35be477856f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -359,6 +359,10 @@ static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	return 0;
 }
 
+static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+}
+
 #endif
 
 struct mlxsw_thermal;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index f6cf2896d337..e04e8162aa14 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -303,8 +303,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	struct device *hwmon_dev;
 	int err;
 
-	mlxsw_hwmon = devm_kzalloc(mlxsw_bus_info->dev, sizeof(*mlxsw_hwmon),
-				   GFP_KERNEL);
+	mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL);
 	if (!mlxsw_hwmon)
 		return -ENOMEM;
 	mlxsw_hwmon->core = mlxsw_core;
@@ -321,10 +320,9 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
 	mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev,
-							   "mlxsw",
-							   mlxsw_hwmon,
-							   mlxsw_hwmon->groups);
+	hwmon_dev = hwmon_device_register_with_groups(mlxsw_bus_info->dev,
+						      "mlxsw", mlxsw_hwmon,
+						      mlxsw_hwmon->groups);
 	if (IS_ERR(hwmon_dev)) {
 		err = PTR_ERR(hwmon_dev);
 		goto err_hwmon_register;
@@ -337,5 +335,12 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 err_hwmon_register:
 err_fans_init:
 err_temp_init:
+	kfree(mlxsw_hwmon);
 	return err;
 }
+
+void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	hwmon_device_unregister(mlxsw_hwmon->hwmon_dev);
+	kfree(mlxsw_hwmon);
+}

From 84dad55951b0d009372ec21760b650634246e144 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 17 Oct 2018 11:44:04 +0200
Subject: [PATCH 79/95] udp6: fix encap return code for resubmitting

The commit eb63f2964dbe ("udp6: add missing checks on edumux packet
processing") used the same return code convention of the ipv4 counterpart,
but ipv6 uses the opposite one: positive values means resubmit.

This change addresses the issue, using positive return value for
resubmitting. Also update the related comment, which was broken, too.

Fixes: eb63f2964dbe ("udp6: add missing checks on edumux packet processing")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 28c4aa5078fc..b36694b6716e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -766,11 +766,9 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
 
 	ret = udpv6_queue_rcv_skb(sk, skb);
 
-	/* a return value > 0 means to resubmit the input, but
-	 * it wants the return to be -protocol, or 0
-	 */
+	/* a return value > 0 means to resubmit the input */
 	if (ret > 0)
-		return -ret;
+		return ret;
 	return 0;
 }
 

From 05c998b738fdd3e5d6a257bcacc8f34b6284d795 Mon Sep 17 00:00:00 2001
From: Ake Koomsin <ake@igel.co.jp>
Date: Wed, 17 Oct 2018 19:44:12 +0900
Subject: [PATCH 80/95] virtio_net: avoid using netif_tx_disable() for
 serializing tx routine

Commit 713a98d90c5e ("virtio-net: serialize tx routine during reset")
introduces netif_tx_disable() after netif_device_detach() in order to
avoid use-after-free of tx queues. However, there are two issues.

1) Its operation is redundant with netif_device_detach() in case the
   interface is running.
2) In case of the interface is not running before suspending and
   resuming, the tx does not get resumed by netif_device_attach().
   This results in losing network connectivity.

It is better to use netif_tx_lock_bh()/netif_tx_unlock_bh() instead for
serializing tx routine during reset. This also preserves the symmetry
of netif_device_detach() and netif_device_attach().

Fixes commit 713a98d90c5e ("virtio-net: serialize tx routine during reset")
Signed-off-by: Ake Koomsin <ake@igel.co.jp>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index dab504ec5e50..ddfa3f24204c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2218,8 +2218,9 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 	/* Make sure no work handler is accessing the device */
 	flush_work(&vi->config_work);
 
+	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
-	netif_tx_disable(vi->dev);
+	netif_tx_unlock_bh(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
 	if (netif_running(vi->dev)) {
@@ -2255,7 +2256,9 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 		}
 	}
 
+	netif_tx_lock_bh(vi->dev);
 	netif_device_attach(vi->dev);
+	netif_tx_unlock_bh(vi->dev);
 	return err;
 }
 

From 5660b9d9d6a29c2c3cc12f62ae44bfb56b0a15a9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 17 Oct 2018 21:11:27 +0800
Subject: [PATCH 81/95] sctp: fix the data size calculation in sctp_data_size

sctp data size should be calculated by subtracting data chunk header's
length from chunk_hdr->length, not just data header.

Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 5ef1bad81ef5..9e3d32746430 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
 	__u16 size;
 
 	size = ntohs(chunk->chunk_hdr->length);
-	size -= sctp_datahdr_len(&chunk->asoc->stream);
+	size -= sctp_datachk_len(&chunk->asoc->stream);
 
 	return size;
 }

From 06a36ecb5d0ee4b379845a5687f83084d3187521 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@bootlin.com>
Date: Wed, 17 Oct 2018 17:26:35 +0200
Subject: [PATCH 82/95] net: mscc: ocelot: Fix comment in
 ocelot_vlant_wait_for_completion()

The ocelot_vlant_wait_for_completion() function is very similar to the
ocelot_mact_wait_for_completion(). It seemed to have be copied but the
comment was not updated, so let's fix it.

Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 1a4f2bb48ead..ed4e298cd823 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -133,9 +133,9 @@ static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
 {
 	unsigned int val, timeout = 10;
 
-	/* Wait for the issued mac table command to be completed, or timeout.
-	 * When the command read from ANA_TABLES_MACACCESS is
-	 * MACACCESS_CMD_IDLE, the issued command completed successfully.
+	/* Wait for the issued vlan table command to be completed, or timeout.
+	 * When the command read from ANA_TABLES_VLANACCESS is
+	 * VLANACCESS_CMD_IDLE, the issued command completed successfully.
 	 */
 	do {
 		val = ocelot_read(ocelot, ANA_TABLES_VLANACCESS);

From eddf016b910486d2123675a6b5fd7d64f77cdca8 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Wed, 17 Oct 2018 22:34:34 +0300
Subject: [PATCH 83/95] net: ipmr: fix unresolved entry dumps

If the skb space ends in an unresolved entry while dumping we'll miss
some unresolved entries. The reason is due to zeroing the entry counter
between dumping resolved and unresolved mfc entries. We should just
keep counting until the whole table is dumped and zero when we move to
the next as we have a separate table counter.

Reported-by: Colin Ian King <colin.king@canonical.com>
Fixes: 8fb472c09b9d ("ipmr: improve hash scalability")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr_base.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 1ad9aa62a97b..eab8cd5ec2f5 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -296,8 +296,6 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 next_entry:
 			e++;
 		}
-		e = 0;
-		s_e = 0;
 
 		spin_lock_bh(lock);
 		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {

From 19e6420e4170acce7a8651dfb87195dff5adbe72 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 18 Oct 2018 08:22:39 +0200
Subject: [PATCH 84/95] LICENSES: Remove CC-BY-SA-4.0 license text

Using non-GPL licenses for our documentation is rather problematic,
as it can directly include other files, which generally are GPLv2
licensed and thus not compatible.

Remove this license now that the only user (idr.rst) is gone to avoid
people semi-accidentally using it again.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 LICENSES/other/CC-BY-SA-4.0 | 397 ------------------------------------
 1 file changed, 397 deletions(-)
 delete mode 100644 LICENSES/other/CC-BY-SA-4.0

diff --git a/LICENSES/other/CC-BY-SA-4.0 b/LICENSES/other/CC-BY-SA-4.0
deleted file mode 100644
index f9158e831e79..000000000000
--- a/LICENSES/other/CC-BY-SA-4.0
+++ /dev/null
@@ -1,397 +0,0 @@
-Valid-License-Identifier: CC-BY-SA-4.0
-SPDX-URL: https://spdx.org/licenses/CC-BY-SA-4.0
-Usage-Guide:
-  To use the Creative Commons Attribution Share Alike 4.0 International
-  license put the following SPDX tag/value pair into a comment according to
-  the placement guidelines in the licensing rules documentation:
-    SPDX-License-Identifier: CC-BY-SA-4.0
-License-Text:
-
-Creative Commons Attribution-ShareAlike 4.0 International
-
-Creative Commons Corporation ("Creative Commons") is not a law firm and
-does not provide legal services or legal advice. Distribution of Creative
-Commons public licenses does not create a lawyer-client or other
-relationship. Creative Commons makes its licenses and related information
-available on an "as-is" basis. Creative Commons gives no warranties
-regarding its licenses, any material licensed under their terms and
-conditions, or any related information. Creative Commons disclaims all
-liability for damages resulting from their use to the fullest extent
-possible.
-
-Using Creative Commons Public Licenses
-
-Creative Commons public licenses provide a standard set of terms and
-conditions that creators and other rights holders may use to share original
-works of authorship and other material subject to copyright and certain
-other rights specified in the public license below. The following
-considerations are for informational purposes only, are not exhaustive, and
-do not form part of our licenses.
-
-Considerations for licensors: Our public licenses are intended for use by
-those authorized to give the public permission to use material in ways
-otherwise restricted by copyright and certain other rights. Our licenses
-are irrevocable. Licensors should read and understand the terms and
-conditions of the license they choose before applying it. Licensors should
-also secure all rights necessary before applying our licenses so that the
-public can reuse the material as expected. Licensors should clearly mark
-any material not subject to the license. This includes other CC-licensed
-material, or material used under an exception or limitation to
-copyright. More considerations for licensors :
-wiki.creativecommons.org/Considerations_for_licensors
-
-Considerations for the public: By using one of our public licenses, a
-licensor grants the public permission to use the licensed material under
-specified terms and conditions. If the licensor's permission is not
-necessary for any reason - for example, because of any applicable exception
-or limitation to copyright - then that use is not regulated by the
-license. Our licenses grant only permissions under copyright and certain
-other rights that a licensor has authority to grant. Use of the licensed
-material may still be restricted for other reasons, including because
-others have copyright or other rights in the material. A licensor may make
-special requests, such as asking that all changes be marked or described.
-
-Although not required by our licenses, you are encouraged to respect those
-requests where reasonable. More considerations for the public :
-wiki.creativecommons.org/Considerations_for_licensees
-
-Creative Commons Attribution-ShareAlike 4.0 International Public License
-
-By exercising the Licensed Rights (defined below), You accept and agree to
-be bound by the terms and conditions of this Creative Commons
-Attribution-ShareAlike 4.0 International Public License ("Public
-License"). To the extent this Public License may be interpreted as a
-contract, You are granted the Licensed Rights in consideration of Your
-acceptance of these terms and conditions, and the Licensor grants You such
-rights in consideration of benefits the Licensor receives from making the
-Licensed Material available under these terms and conditions.
-
-Section 1 - Definitions.
-
-    a. Adapted Material means material subject to Copyright and Similar
-       Rights that is derived from or based upon the Licensed Material and
-       in which the Licensed Material is translated, altered, arranged,
-       transformed, or otherwise modified in a manner requiring permission
-       under the Copyright and Similar Rights held by the Licensor. For
-       purposes of this Public License, where the Licensed Material is a
-       musical work, performance, or sound recording, Adapted Material is
-       always produced where the Licensed Material is synched in timed
-       relation with a moving image.
-
-    b. Adapter's License means the license You apply to Your Copyright and
-       Similar Rights in Your contributions to Adapted Material in
-       accordance with the terms and conditions of this Public License.
-
-    c. BY-SA Compatible License means a license listed at
-       creativecommons.org/compatiblelicenses, approved by Creative Commons
-       as essentially the equivalent of this Public License.
-
-    d. Copyright and Similar Rights means copyright and/or similar rights
-       closely related to copyright including, without limitation,
-       performance, broadcast, sound recording, and Sui Generis Database
-       Rights, without regard to how the rights are labeled or
-       categorized. For purposes of this Public License, the rights
-       specified in Section 2(b)(1)-(2) are not Copyright and Similar
-       Rights.
-
-    e. Effective Technological Measures means those measures that, in the
-       absence of proper authority, may not be circumvented under laws
-       fulfilling obligations under Article 11 of the WIPO Copyright Treaty
-       adopted on December 20, 1996, and/or similar international
-       agreements.
-
-    f. Exceptions and Limitations means fair use, fair dealing, and/or any
-       other exception or limitation to Copyright and Similar Rights that
-       applies to Your use of the Licensed Material.
-
-    g. License Elements means the license attributes listed in the name of
-       a Creative Commons Public License. The License Elements of this
-       Public License are Attribution and ShareAlike.
-
-    h. Licensed Material means the artistic or literary work, database, or
-       other material to which the Licensor applied this Public License.
-
-    i. Licensed Rights means the rights granted to You subject to the terms
-       and conditions of this Public License, which are limited to all
-       Copyright and Similar Rights that apply to Your use of the Licensed
-       Material and that the Licensor has authority to license.
-
-    j. Licensor means the individual(s) or entity(ies) granting rights
-       under this Public License.
-
-    k. Share means to provide material to the public by any means or
-       process that requires permission under the Licensed Rights, such as
-       reproduction, public display, public performance, distribution,
-       dissemination, communication, or importation, and to make material
-       available to the public including in ways that members of the public
-       may access the material from a place and at a time individually
-       chosen by them.
-
-    l. Sui Generis Database Rights means rights other than copyright
-       resulting from Directive 96/9/EC of the European Parliament and of
-       the Council of 11 March 1996 on the legal protection of databases,
-       as amended and/or succeeded, as well as other essentially equivalent
-       rights anywhere in the world.  m. You means the individual or entity
-       exercising the Licensed Rights under this Public License. Your has a
-       corresponding meaning.
-
-Section 2 - Scope.
-
-    a. License grant.
-
-        1. Subject to the terms and conditions of this Public License, the
-           Licensor hereby grants You a worldwide, royalty-free,
-           non-sublicensable, non-exclusive, irrevocable license to
-           exercise the Licensed Rights in the Licensed Material to:
-
-            A. reproduce and Share the Licensed Material, in whole or in part; and
-
-            B. produce, reproduce, and Share Adapted Material.
-
-        2. Exceptions and Limitations. For the avoidance of doubt, where
-           Exceptions and Limitations apply to Your use, this Public
-           License does not apply, and You do not need to comply with its
-           terms and conditions.
-
-        3. Term. The term of this Public License is specified in Section 6(a).
-
-        4. Media and formats; technical modifications allowed. The Licensor
-           authorizes You to exercise the Licensed Rights in all media and
-           formats whether now known or hereafter created, and to make
-           technical modifications necessary to do so. The Licensor waives
-           and/or agrees not to assert any right or authority to forbid You
-           from making technical modifications necessary to exercise the
-           Licensed Rights, including technical modifications necessary to
-           circumvent Effective Technological Measures. For purposes of
-           this Public License, simply making modifications authorized by
-           this Section 2(a)(4) never produces Adapted Material.
-
-        5. Downstream recipients.
-
-            A. Offer from the Licensor - Licensed Material. Every recipient
-               of the Licensed Material automatically receives an offer
-               from the Licensor to exercise the Licensed Rights under the
-               terms and conditions of this Public License.
-
-            B. Additional offer from the Licensor - Adapted Material. Every
-               recipient of Adapted Material from You automatically
-               receives an offer from the Licensor to exercise the Licensed
-               Rights in the Adapted Material under the conditions of the
-               Adapter's License You apply.
-
-            C. No downstream restrictions. You may not offer or impose any
-               additional or different terms or conditions on, or apply any
-               Effective Technological Measures to, the Licensed Material
-               if doing so restricts exercise of the Licensed Rights by any
-               recipient of the Licensed Material.
-
-        6. No endorsement. Nothing in this Public License constitutes or
-           may be construed as permission to assert or imply that You are,
-           or that Your use of the Licensed Material is, connected with, or
-           sponsored, endorsed, or granted official status by, the Licensor
-           or others designated to receive attribution as provided in
-           Section 3(a)(1)(A)(i).
-
-    b. Other rights.
-
-        1. Moral rights, such as the right of integrity, are not licensed
-           under this Public License, nor are publicity, privacy, and/or
-           other similar personality rights; however, to the extent
-           possible, the Licensor waives and/or agrees not to assert any
-           such rights held by the Licensor to the limited extent necessary
-           to allow You to exercise the Licensed Rights, but not otherwise.
-
-        2. Patent and trademark rights are not licensed under this Public
-           License.
-
-        3. To the extent possible, the Licensor waives any right to collect
-           royalties from You for the exercise of the Licensed Rights,
-           whether directly or through a collecting society under any
-           voluntary or waivable statutory or compulsory licensing
-           scheme. In all other cases the Licensor expressly reserves any
-           right to collect such royalties.
-
-Section 3 - License Conditions.
-
-Your exercise of the Licensed Rights is expressly made subject to the
-following conditions.
-
-    a. Attribution.
-
-        1. If You Share the Licensed Material (including in modified form),
-           You must:
-
-            A. retain the following if it is supplied by the Licensor with
-               the Licensed Material:
-
-                i. identification of the creator(s) of the Licensed
-                   Material and any others designated to receive
-                   attribution, in any reasonable manner requested by the
-                   Licensor (including by pseudonym if designated);
-
-                ii. a copyright notice;
-
-                iii. a notice that refers to this Public License;
-
-                iv. a notice that refers to the disclaimer of warranties;
-
-                v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
-
-            B. indicate if You modified the Licensed Material and retain an
-               indication of any previous modifications; and
-
-            C. indicate the Licensed Material is licensed under this Public
-            License, and include the text of, or the URI or hyperlink to,
-            this Public License.
-
-        2. You may satisfy the conditions in Section 3(a)(1) in any
-           reasonable manner based on the medium, means, and context in
-           which You Share the Licensed Material. For example, it may be
-           reasonable to satisfy the conditions by providing a URI or
-           hyperlink to a resource that includes the required information.
-
-        3. If requested by the Licensor, You must remove any of the
-           information required by Section 3(a)(1)(A) to the extent
-           reasonably practicable.  b. ShareAlike.In addition to the
-           conditions in Section 3(a), if You Share Adapted Material You
-           produce, the following conditions also apply.
-
-           1. The Adapter's License You apply must be a Creative Commons
-              license with the same License Elements, this version or
-              later, or a BY-SA Compatible License.
-
-           2. You must include the text of, or the URI or hyperlink to, the
-              Adapter's License You apply. You may satisfy this condition
-              in any reasonable manner based on the medium, means, and
-              context in which You Share Adapted Material.
-
-           3. You may not offer or impose any additional or different terms
-              or conditions on, or apply any Effective Technological
-              Measures to, Adapted Material that restrict exercise of the
-              rights granted under the Adapter's License You apply.
-
-Section 4 - Sui Generis Database Rights.
-
-Where the Licensed Rights include Sui Generis Database Rights that apply to
-Your use of the Licensed Material:
-
-    a. for the avoidance of doubt, Section 2(a)(1) grants You the right to
-       extract, reuse, reproduce, and Share all or a substantial portion of
-       the contents of the database;
-
-    b. if You include all or a substantial portion of the database contents
-       in a database in which You have Sui Generis Database Rights, then
-       the database in which You have Sui Generis Database Rights (but not
-       its individual contents) is Adapted Material, including for purposes
-       of Section 3(b); and
-
-    c. You must comply with the conditions in Section 3(a) if You Share all
-       or a substantial portion of the contents of the database.
-
-    For the avoidance of doubt, this Section 4 supplements and does not
-    replace Your obligations under this Public License where the Licensed
-    Rights include other Copyright and Similar Rights.
-
-Section 5 - Disclaimer of Warranties and Limitation of Liability.
-
-    a. Unless otherwise separately undertaken by the Licensor, to the
-       extent possible, the Licensor offers the Licensed Material as-is and
-       as-available, and makes no representations or warranties of any kind
-       concerning the Licensed Material, whether express, implied,
-       statutory, or other. This includes, without limitation, warranties
-       of title, merchantability, fitness for a particular purpose,
-       non-infringement, absence of latent or other defects, accuracy, or
-       the presence or absence of errors, whether or not known or
-       discoverable. Where disclaimers of warranties are not allowed in
-       full or in part, this disclaimer may not apply to You.
-
-    b. To the extent possible, in no event will the Licensor be liable to
-       You on any legal theory (including, without limitation, negligence)
-       or otherwise for any direct, special, indirect, incidental,
-       consequential, punitive, exemplary, or other losses, costs,
-       expenses, or damages arising out of this Public License or use of
-       the Licensed Material, even if the Licensor has been advised of the
-       possibility of such losses, costs, expenses, or damages. Where a
-       limitation of liability is not allowed in full or in part, this
-       limitation may not apply to You.
-
-    c. The disclaimer of warranties and limitation of liability provided
-       above shall be interpreted in a manner that, to the extent possible,
-       most closely approximates an absolute disclaimer and waiver of all
-       liability.
-
-Section 6 - Term and Termination.
-
-    a. This Public License applies for the term of the Copyright and
-       Similar Rights licensed here. However, if You fail to comply with
-       this Public License, then Your rights under this Public License
-       terminate automatically.
-
-    b. Where Your right to use the Licensed Material has terminated under
-       Section 6(a), it reinstates:
-
-        1. automatically as of the date the violation is cured, provided it
-           is cured within 30 days of Your discovery of the violation; or
-
-        2. upon express reinstatement by the Licensor.
-
-    c. For the avoidance of doubt, this Section 6(b) does not affect any
-       right the Licensor may have to seek remedies for Your violations of
-       this Public License.
-
-    d. For the avoidance of doubt, the Licensor may also offer the Licensed
-       Material under separate terms or conditions or stop distributing the
-       Licensed Material at any time; however, doing so will not terminate
-       this Public License.
-
-    e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
-
-Section 7 - Other Terms and Conditions.
-
-    a. The Licensor shall not be bound by any additional or different terms
-       or conditions communicated by You unless expressly agreed.
-
-    b. Any arrangements, understandings, or agreements regarding the
-       Licensed Material not stated herein are separate from and
-       independent of the terms and conditions of this Public License.
-
-Section 8 - Interpretation.
-
-    a. For the avoidance of doubt, this Public License does not, and shall
-       not be interpreted to, reduce, limit, restrict, or impose conditions
-       on any use of the Licensed Material that could lawfully be made
-       without permission under this Public License.
-
-    b. To the extent possible, if any provision of this Public License is
-       deemed unenforceable, it shall be automatically reformed to the
-       minimum extent necessary to make it enforceable. If the provision
-       cannot be reformed, it shall be severed from this Public License
-       without affecting the enforceability of the remaining terms and
-       conditions.
-
-    c. No term or condition of this Public License will be waived and no
-       failure to comply consented to unless expressly agreed to by the
-       Licensor.
-
-    d. Nothing in this Public License constitutes or may be interpreted as
-       a limitation upon, or waiver of, any privileges and immunities that
-       apply to the Licensor or You, including from the legal processes of
-       any jurisdiction or authority.
-
-Creative Commons is not a party to its public licenses. Notwithstanding,
-Creative Commons may elect to apply one of its public licenses to material
-it publishes and in those instances will be considered the "Licensor." The
-text of the Creative Commons public licenses is dedicated to the public
-domain under the CC0 Public Domain Dedication. Except for the limited
-purpose of indicating that material is shared under a Creative Commons
-public license or as otherwise permitted by the Creative Commons policies
-published at creativecommons.org/policies, Creative Commons does not
-authorize the use of the trademark "Creative Commons" or any other
-trademark or logo of Creative Commons without its prior written consent
-including, without limitation, in connection with any unauthorized
-modifications to any of its public licenses or any other arrangements,
-understandings, or agreements concerning use of licensed material. For the
-avoidance of doubt, this paragraph does not form part of the public
-licenses.
-
-Creative Commons may be contacted at creativecommons.org.

From eb66ae030829605d61fbef1909ce310e29f78821 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 12 Oct 2018 15:22:59 -0700
Subject: [PATCH 85/95] mremap: properly flush TLB before releasing the page

Jann Horn points out that our TLB flushing was subtly wrong for the
mremap() case.  What makes mremap() special is that we don't follow the
usual "add page to list of pages to be freed, then flush tlb, and then
free pages".  No, mremap() obviously just _moves_ the page from one page
table location to another.

That matters, because mremap() thus doesn't directly control the
lifetime of the moved page with a freelist: instead, the lifetime of the
page is controlled by the page table locking, that serializes access to
the entry.

As a result, we need to flush the TLB not just before releasing the lock
for the source location (to avoid any concurrent accesses to the entry),
but also before we release the destination page table lock (to avoid the
TLB being flushed after somebody else has already done something to that
page).

This also makes the whole "need_flush" logic unnecessary, since we now
always end up flushing the TLB for every valid entry.

Reported-and-tested-by: Jann Horn <jannh@google.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Tested-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/huge_mm.h |  2 +-
 mm/huge_memory.c        | 10 ++++------
 mm/mremap.c             | 30 +++++++++++++-----------------
 3 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 99c19b06d9a4..fdcb45999b26 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned char *vec);
 extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 			 unsigned long new_addr, unsigned long old_end,
-			 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
+			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 58269f8ba7c4..deed97fba979 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
 
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, unsigned long old_end,
-		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+		  pmd_t *old_pmd, pmd_t *new_pmd)
 {
 	spinlock_t *old_ptl, *new_ptl;
 	pmd_t pmd;
@@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		if (new_ptl != old_ptl)
 			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
-		if (pmd_present(pmd) && pmd_dirty(pmd))
+		if (pmd_present(pmd))
 			force_flush = true;
 		VM_BUG_ON(!pmd_none(*new_pmd));
 
@@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		}
 		pmd = move_soft_dirty_pmd(pmd);
 		set_pmd_at(mm, new_addr, new_pmd, pmd);
-		if (new_ptl != old_ptl)
-			spin_unlock(new_ptl);
 		if (force_flush)
 			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
-		else
-			*need_flush = true;
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
 		spin_unlock(old_ptl);
 		return true;
 	}
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..a9617e72e6b7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
 static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		unsigned long old_addr, unsigned long old_end,
 		struct vm_area_struct *new_vma, pmd_t *new_pmd,
-		unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
+		unsigned long new_addr, bool need_rmap_locks)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
@@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 
 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
 		/*
-		 * If we are remapping a dirty PTE, make sure
+		 * If we are remapping a valid PTE, make sure
 		 * to flush TLB before we drop the PTL for the
-		 * old PTE or we may race with page_mkclean().
+		 * PTE.
 		 *
-		 * This check has to be done after we removed the
-		 * old PTE from page tables or another thread may
-		 * dirty it after the check and before the removal.
+		 * NOTE! Both old and new PTL matter: the old one
+		 * for racing with page_mkclean(), the new one to
+		 * make sure the physical page stays valid until
+		 * the TLB entry for the old mapping has been
+		 * flushed.
 		 */
-		if (pte_present(pte) && pte_dirty(pte))
+		if (pte_present(pte))
 			force_flush = true;
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 		pte = move_soft_dirty_pte(pte);
@@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	}
 
 	arch_leave_lazy_mmu_mode();
+	if (force_flush)
+		flush_tlb_range(vma, old_end - len, old_end);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	pte_unmap(new_pte - 1);
-	if (force_flush)
-		flush_tlb_range(vma, old_end - len, old_end);
-	else
-		*need_flush = true;
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (need_rmap_locks)
 		drop_rmap_locks(vma);
@@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 {
 	unsigned long extent, next, old_end;
 	pmd_t *old_pmd, *new_pmd;
-	bool need_flush = false;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
@@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 				if (need_rmap_locks)
 					take_rmap_locks(vma);
 				moved = move_huge_pmd(vma, old_addr, new_addr,
-						    old_end, old_pmd, new_pmd,
-						    &need_flush);
+						    old_end, old_pmd, new_pmd);
 				if (need_rmap_locks)
 					drop_rmap_locks(vma);
 				if (moved)
@@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 		if (extent > next - new_addr)
 			extent = next - new_addr;
 		move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
-			  new_pmd, new_addr, need_rmap_locks, &need_flush);
+			  new_pmd, new_addr, need_rmap_locks);
 	}
-	if (need_flush)
-		flush_tlb_range(vma, old_end-len, old_addr);
 
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 

From 169b803397499be85bdd1e3d07d6f5e3d4bd669e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Oct 2018 15:23:26 +0100
Subject: [PATCH 86/95] cachefiles: fix the race between
 cachefiles_bury_object() and rmdir(2)

the victim might've been rmdir'ed just before the lock_rename();
unlike the normal callers, we do not look the source up after the
parents are locked - we know it beforehand and just recheck that it's
still the child of what used to be its parent.  Unfortunately,
the check is too weak - we don't spot a dead directory since its
->d_parent is unchanged, dentry is positive, etc.  So we sail all
the way to ->rename(), with hosting filesystems _not_ expecting
to be asked renaming an rmdir'ed subdirectory.

The fix is easy, fortunately - the lock on parent is sufficient for
making IS_DEADDIR() on child safe.

Cc: stable@vger.kernel.org
Fixes: 9ae326a69004 (CacheFiles: A cache that backs onto a mounted filesystem)
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cachefiles/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index af2b17b21b94..95983c744164 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -343,7 +343,7 @@ try_again:
 	trap = lock_rename(cache->graveyard, dir);
 
 	/* do some checks before getting the grave dentry */
-	if (rep->d_parent != dir) {
+	if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
 		/* the entry was probably culled when we dropped the parent dir
 		 * lock */
 		unlock_rename(cache->graveyard, dir);

From 1ff22883b0b2f7a73eb2609ffe879c9fd96f6328 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 17 Oct 2018 15:23:45 +0100
Subject: [PATCH 87/95] fscache: Fix incomplete initialisation of inline key
 space

The inline key in struct rxrpc_cookie is insufficiently initialized,
zeroing only 3 of the 4 slots, therefore an index_key_len between 13 and 15
bytes will end up hashing uninitialized memory because the memcpy only
partially fills the last buf[] element.

Fix this by clearing fscache_cookie objects on allocation rather than using
the slab constructor to initialise them.  We're going to pretty much fill
in the entire struct anyway, so bringing it into our dcache writably
shouldn't incur much overhead.

This removes the need to do clearance in fscache_set_key() (where we aren't
doing it correctly anyway).

Also, we don't need to set cookie->key_len in fscache_set_key() as we
already did it in the only caller, so remove that.

Fixes: ec0328e46d6e ("fscache: Maintain a catalogue of allocated cookies")
Reported-by: syzbot+a95b989b2dde8e806af8@syzkaller.appspotmail.com
Reported-by: Eric Sandeen <sandeen@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fscache/cookie.c   | 23 ++++-------------------
 fs/fscache/internal.h |  1 -
 fs/fscache/main.c     |  4 +---
 3 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 83bfe04456b6..b52f1dcd5dea 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -69,19 +69,6 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
 	}
 }
 
-/*
- * initialise an cookie jar slab element prior to any use
- */
-void fscache_cookie_init_once(void *_cookie)
-{
-	struct fscache_cookie *cookie = _cookie;
-
-	memset(cookie, 0, sizeof(*cookie));
-	spin_lock_init(&cookie->lock);
-	spin_lock_init(&cookie->stores_lock);
-	INIT_HLIST_HEAD(&cookie->backing_objects);
-}
-
 /*
  * Set the index key in a cookie.  The cookie struct has space for a 12-byte
  * key plus length and hash, but if that's not big enough, it's instead a
@@ -95,8 +82,6 @@ static int fscache_set_key(struct fscache_cookie *cookie,
 	u32 *buf;
 	int i;
 
-	cookie->key_len = index_key_len;
-
 	if (index_key_len > sizeof(cookie->inline_key)) {
 		buf = kzalloc(index_key_len, GFP_KERNEL);
 		if (!buf)
@@ -104,9 +89,6 @@ static int fscache_set_key(struct fscache_cookie *cookie,
 		cookie->key = buf;
 	} else {
 		buf = (u32 *)cookie->inline_key;
-		buf[0] = 0;
-		buf[1] = 0;
-		buf[2] = 0;
 	}
 
 	memcpy(buf, index_key, index_key_len);
@@ -161,7 +143,7 @@ struct fscache_cookie *fscache_alloc_cookie(
 	struct fscache_cookie *cookie;
 
 	/* allocate and initialise a cookie */
-	cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
+	cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
 	if (!cookie)
 		return NULL;
 
@@ -192,6 +174,9 @@ struct fscache_cookie *fscache_alloc_cookie(
 	cookie->netfs_data	= netfs_data;
 	cookie->flags		= (1 << FSCACHE_COOKIE_NO_DATA_YET);
 	cookie->type		= def->type;
+	spin_lock_init(&cookie->lock);
+	spin_lock_init(&cookie->stores_lock);
+	INIT_HLIST_HEAD(&cookie->backing_objects);
 
 	/* radix tree insertion won't use the preallocation pool unless it's
 	 * told it may not wait */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index f83328a7f048..d6209022e965 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -51,7 +51,6 @@ extern struct fscache_cache *fscache_select_cache_for_object(
 extern struct kmem_cache *fscache_cookie_jar;
 
 extern void fscache_free_cookie(struct fscache_cookie *);
-extern void fscache_cookie_init_once(void *);
 extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *,
 						   const struct fscache_cookie_def *,
 						   const void *, size_t,
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 7dce110bf17d..30ad89db1efc 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -143,9 +143,7 @@ static int __init fscache_init(void)
 
 	fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
 					       sizeof(struct fscache_cookie),
-					       0,
-					       0,
-					       fscache_cookie_init_once);
+					       0, 0, NULL);
 	if (!fscache_cookie_jar) {
 		pr_notice("Failed to allocate a cookie jar\n");
 		ret = -ENOMEM;

From fa520c47eaa15b9baa8ad66ac18da4a31679693b Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 17 Oct 2018 15:23:59 +0100
Subject: [PATCH 88/95] fscache: Fix out of bound read in long cookie keys

fscache_set_key() can incur an out-of-bounds read, reported by KASAN:

 BUG: KASAN: slab-out-of-bounds in fscache_alloc_cookie+0x5b3/0x680 [fscache]
 Read of size 4 at addr ffff88084ff056d4 by task mount.nfs/32615

and also reported by syzbot at https://lkml.org/lkml/2018/7/8/236

  BUG: KASAN: slab-out-of-bounds in fscache_set_key fs/fscache/cookie.c:120 [inline]
  BUG: KASAN: slab-out-of-bounds in fscache_alloc_cookie+0x7a9/0x880 fs/fscache/cookie.c:171
  Read of size 4 at addr ffff8801d3cc8bb4 by task syz-executor907/4466

This happens for any index_key_len which is not divisible by 4 and is
larger than the size of the inline key, because the code allocates exactly
index_key_len for the key buffer, but the hashing loop is stepping through
it 4 bytes (u32) at a time in the buf[] array.

Fix this by calculating how many u32 buffers we'll need by using
DIV_ROUND_UP, and then using kcalloc() to allocate a precleared allocation
buffer to hold the index_key, then using that same count as the hashing
index limit.

Fixes: ec0328e46d6e ("fscache: Maintain a catalogue of allocated cookies")
Reported-by: syzbot+a95b989b2dde8e806af8@syzkaller.appspotmail.com
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fscache/cookie.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index b52f1dcd5dea..c550512ce335 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -70,7 +70,7 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
 }
 
 /*
- * Set the index key in a cookie.  The cookie struct has space for a 12-byte
+ * Set the index key in a cookie.  The cookie struct has space for a 16-byte
  * key plus length and hash, but if that's not big enough, it's instead a
  * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then
  * the key data.
@@ -80,10 +80,13 @@ static int fscache_set_key(struct fscache_cookie *cookie,
 {
 	unsigned long long h;
 	u32 *buf;
+	int bufs;
 	int i;
 
+	bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));
+
 	if (index_key_len > sizeof(cookie->inline_key)) {
-		buf = kzalloc(index_key_len, GFP_KERNEL);
+		buf = kcalloc(bufs, sizeof(*buf), GFP_KERNEL);
 		if (!buf)
 			return -ENOMEM;
 		cookie->key = buf;
@@ -98,7 +101,8 @@ static int fscache_set_key(struct fscache_cookie *cookie,
 	 */
 	h = (unsigned long)cookie->parent;
 	h += index_key_len + cookie->type;
-	for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++)
+
+	for (i = 0; i < bufs; i++)
 		h += buf[i];
 
 	cookie->key_hash = h ^ (h >> 32);

From 27faeebd00813cd52a140e8944b6b32729b50e61 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 18 Oct 2018 11:32:29 -0700
Subject: [PATCH 89/95] sparc: Revert unintended perf changes.

Some local debugging hacks accidently slipped into the VDSO commit.

Sorry!

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/perf/util/event.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 651cd6f6762c..0cd42150f712 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1081,7 +1081,6 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max
 	}
 
 	*size += sizeof(struct cpu_map_data);
-	*size = PERF_ALIGN(*size, sizeof(u64));
 	return zalloc(*size);
 }
 
@@ -1561,9 +1560,7 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
 
 		return NULL;
 	}
-#if 0
 try_again:
-#endif
 	al->map = map_groups__find(mg, al->addr);
 	if (al->map == NULL) {
 		/*
@@ -1575,7 +1572,6 @@ try_again:
 		 * "[vdso]" dso, but for now lets use the old trick of looking
 		 * in the whole kernel symbol list.
 		 */
-#if 0
 		if (cpumode == PERF_RECORD_MISC_USER && machine &&
 		    mg != &machine->kmaps &&
 		    machine__kernel_ip(machine, al->addr)) {
@@ -1583,7 +1579,6 @@ try_again:
 			load_map = true;
 			goto try_again;
 		}
-#endif
 	} else {
 		/*
 		 * Kernel maps might be changed when loading symbols so loading

From 6b839b6cf9eada30b086effb51e5d6076bafc761 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 18 Oct 2018 19:56:01 +0200
Subject: [PATCH 90/95] r8169: fix NAPI handling under high load

rtl_rx() and rtl_tx() are called only if the respective bits are set
in the interrupt status register. Under high load NAPI may not be
able to process all data (work_done == budget) and it will schedule
subsequent calls to the poll callback.
rtl_ack_events() however resets the bits in the interrupt status
register, therefore subsequent calls to rtl8169_poll() won't call
rtl_rx() and rtl_tx() - chip interrupts are still disabled.

Fix this by calling rtl_rx() and rtl_tx() independent of the bits
set in the interrupt status register. Both functions will detect
if there's nothing to do for them.

Fixes: da78dbff2e05 ("r8169: remove work from irq handler.")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 28184b984a44..2c350099b83c 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6549,17 +6549,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
 	struct net_device *dev = tp->dev;
 	u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow;
-	int work_done= 0;
+	int work_done;
 	u16 status;
 
 	status = rtl_get_events(tp);
 	rtl_ack_events(tp, status & ~tp->event_slow);
 
-	if (status & RTL_EVENT_NAPI_RX)
-		work_done = rtl_rx(dev, tp, (u32) budget);
+	work_done = rtl_rx(dev, tp, (u32) budget);
 
-	if (status & RTL_EVENT_NAPI_TX)
-		rtl_tx(dev, tp);
+	rtl_tx(dev, tp);
 
 	if (status & tp->event_slow) {
 		enable_mask &= ~tp->event_slow;

From 3c53ed8fef6881a864f0ee8240ed2793ef73ad0d Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 18 Oct 2018 10:34:26 +0200
Subject: [PATCH 91/95] net: sched: Fix for duplicate class dump

When dumping classes by parent, kernel would return classes twice:

| # tc qdisc add dev lo root prio
| # tc class show dev lo
| class prio 8001:1 parent 8001:
| class prio 8001:2 parent 8001:
| class prio 8001:3 parent 8001:
| # tc class show dev lo parent 8001:
| class prio 8001:1 parent 8001:
| class prio 8001:2 parent 8001:
| class prio 8001:3 parent 8001:
| class prio 8001:1 parent 8001:
| class prio 8001:2 parent 8001:
| class prio 8001:3 parent 8001:

This comes from qdisc_match_from_root() potentially returning the root
qdisc itself if its handle matched. Though in that case, root's classes
were already dumped a few lines above.

Fixes: cb395b2010879 ("net: sched: optimize class dumps")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6684641ea344..3dc0acf54245 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2059,7 +2059,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 
 	if (tcm->tcm_parent) {
 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
-		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+		if (q && q != root &&
+		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
 		return 0;
 	}

From b6168562c8ce2bd5a30e213021650422e08764dc Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Thu, 18 Oct 2018 09:36:46 -0500
Subject: [PATCH 92/95] net: socket: fix a missing-check bug

In ethtool_ioctl(), the ioctl command 'ethcmd' is checked through a switch
statement to see whether it is necessary to pre-process the ethtool
structure, because, as mentioned in the comment, the structure
ethtool_rxnfc is defined with padding. If yes, a user-space buffer 'rxnfc'
is allocated through compat_alloc_user_space(). One thing to note here is
that, if 'ethcmd' is ETHTOOL_GRXCLSRLALL, the size of the buffer 'rxnfc' is
partially determined by 'rule_cnt', which is actually acquired from the
user-space buffer 'compat_rxnfc', i.e., 'compat_rxnfc->rule_cnt', through
get_user(). After 'rxnfc' is allocated, the data in the original user-space
buffer 'compat_rxnfc' is then copied to 'rxnfc' through copy_in_user(),
including the 'rule_cnt' field. However, after this copy, no check is
re-enforced on 'rxnfc->rule_cnt'. So it is possible that a malicious user
race to change the value in the 'compat_rxnfc->rule_cnt' between these two
copies. Through this way, the attacker can bypass the previous check on
'rule_cnt' and inject malicious data. This can cause undefined behavior of
the kernel and introduce potential security risk.

This patch avoids the above issue via copying the value acquired by
get_user() to 'rxnfc->rule_cn', if 'ethcmd' is ETHTOOL_GRXCLSRLALL.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 01f3f8f32d6f..390a8ecef4bf 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2875,9 +2875,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 		    copy_in_user(&rxnfc->fs.ring_cookie,
 				 &compat_rxnfc->fs.ring_cookie,
 				 (void __user *)(&rxnfc->fs.location + 1) -
-				 (void __user *)&rxnfc->fs.ring_cookie) ||
-		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
-				 sizeof(rxnfc->rule_cnt)))
+				 (void __user *)&rxnfc->fs.ring_cookie))
+			return -EFAULT;
+		if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+			if (put_user(rule_cnt, &rxnfc->rule_cnt))
+				return -EFAULT;
+		} else if (copy_in_user(&rxnfc->rule_cnt,
+					&compat_rxnfc->rule_cnt,
+					sizeof(rxnfc->rule_cnt)))
 			return -EFAULT;
 	}
 

From b06f9d9f1a907dd03f203e2ce9e27e318c22ba01 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 18 Oct 2018 17:38:29 +0200
Subject: [PATCH 93/95] tipc: fix info leak from kernel tipc_event

We initialize a struct tipc_event allocated on the kernel stack to
zero to avert info leak to user space.

Reported-by: syzbot+057458894bc8cada4dee@syzkaller.appspotmail.com
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/group.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/group.c b/net/tipc/group.c
index e82f13cb2dc5..06fee142f09f 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -666,6 +666,7 @@ static void tipc_group_create_event(struct tipc_group *grp,
 	struct sk_buff *skb;
 	struct tipc_msg *hdr;
 
+	memset(&evt, 0, sizeof(evt));
 	evt.event = event;
 	evt.found_lower = m->instance;
 	evt.found_upper = m->instance;

From d4d576f5ab7edcb757bb33e6a5600666a0b1232d Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 18 Oct 2018 21:25:07 +0200
Subject: [PATCH 94/95] ip6_tunnel: Fix encapsulation layout

Commit 058214a4d1df ("ip6_tun: Add infrastructure for doing
encapsulation") added the ip6_tnl_encap() call in ip6_tnl_xmit(), before
the call to ipv6_push_frag_opts() to append the IPv6 Tunnel Encapsulation
Limit option (option 4, RFC 2473, par. 5.1) to the outer IPv6 header.

As long as the option didn't actually end up in generated packets, this
wasn't an issue. Then commit 89a23c8b528b ("ip6_tunnel: Fix missing tunnel
encapsulation limit option") fixed sending of this option, and the
resulting layout, e.g. for FoU, is:

.-------------------.------------.----------.-------------------.----- - -
| Outer IPv6 Header | UDP header | Option 4 | Inner IPv6 Header | Payload
'-------------------'------------'----------'-------------------'----- - -

Needless to say, FoU and GUE (at least) won't work over IPv6. The option
is appended by default, and I couldn't find a way to disable it with the
current iproute2.

Turn this into a more reasonable:

.-------------------.----------.------------.-------------------.----- - -
| Outer IPv6 Header | Option 4 | UDP header | Inner IPv6 Header | Payload
'-------------------'----------'------------'-------------------'----- - -

With this, and with 84dad55951b0 ("udp6: fix encap return code for
resubmitting"), FoU and GUE work again over IPv6.

Fixes: 058214a4d1df ("ip6_tun: Add infrastructure for doing encapsulation")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a0b6932c3afd..a9d06d4dd057 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1184,11 +1184,6 @@ route_lookup:
 	}
 	skb_dst_set(skb, dst);
 
-	if (encap_limit >= 0) {
-		init_tel_txopt(&opt, encap_limit);
-		ipv6_push_frag_opts(skb, &opt.ops, &proto);
-	}
-
 	if (hop_limit == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
 			hop_limit = ip_hdr(skb)->ttl;
@@ -1210,6 +1205,11 @@ route_lookup:
 	if (err)
 		return err;
 
+	if (encap_limit >= 0) {
+		init_tel_txopt(&opt, encap_limit);
+		ipv6_push_frag_opts(skb, &opt.ops, &proto);
+	}
+
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);

From 48995423143a097527802e28d7add20e5a27677a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 19 Oct 2018 10:45:08 -0700
Subject: [PATCH 95/95] Revert "bond: take rcu lock in netpoll_send_skb_on_dev"

This reverts commit 6fe9487892b32cb1c8b8b0d552ed7222a527fe30.

It is causing more serious regressions than the RCU warning
it is fixing.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1d1ba92f2d..3ae899805f8b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -312,7 +312,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo;
 
-	rcu_read_lock_bh();
 	lockdep_assert_irqs_disabled();
 
 	npinfo = rcu_dereference_bh(np->dev->npinfo);
@@ -357,7 +356,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		skb_queue_tail(&npinfo->txq, skb);
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
-	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(netpoll_send_skb_on_dev);